1 /*
2 * *****************************************************************************
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright notice, this
12 * list of conditions and the following disclaimer.
13 *
14 * * Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 * *****************************************************************************
31 *
32 * The parser for bc.
33 *
34 */
35
36 #if BC_ENABLED
37
38 #include <assert.h>
39 #include <stdbool.h>
40 #include <stdlib.h>
41 #include <string.h>
42
43 #include <setjmp.h>
44
45 #include <bc.h>
46 #include <num.h>
47 #include <vm.h>
48
49 // Before you embark on trying to understand this code, have you read the
50 // Development manual (manuals/development.md) and the comment in include/bc.h
51 // yet? No? Do that first. I'm serious.
52 //
53 // The reason is because this file holds the most sensitive and finicky code in
54 // the entire codebase. Even getting history to work on Windows was nothing
55 // compared to this. This is where dreams go to die, where dragons live, and
56 // from which Ken Thompson himself would flee.
57
58 static void bc_parse_else(BcParse *p);
59 static void bc_parse_stmt(BcParse *p);
60 static BcParseStatus bc_parse_expr_err(BcParse *p, uint8_t flags,
61 BcParseNext next);
62 static void bc_parse_expr_status(BcParse *p, uint8_t flags, BcParseNext next);
63
64 /**
65 * Returns true if an instruction could only have come from a "leaf" expression.
66 * For more on what leaf expressions are, read the comment for BC_PARSE_LEAF().
67 * @param t The instruction to test.
68 */
bc_parse_inst_isLeaf(BcInst t)69 static bool bc_parse_inst_isLeaf(BcInst t) {
70 return (t >= BC_INST_NUM && t <= BC_INST_MAXSCALE) ||
71 #if BC_ENABLE_EXTRA_MATH
72 t == BC_INST_TRUNC ||
73 #endif // BC_ENABLE_EXTRA_MATH
74 t <= BC_INST_DEC;
75 }
76
77 /**
78 * Returns true if the *previous* token was a delimiter. A delimiter is anything
79 * that can legally end a statement. In bc's case, it could be a newline, a
80 * semicolon, and a brace in certain cases.
81 * @param p The parser.
82 * @return True if the token is a legal delimiter.
83 */
bc_parse_isDelimiter(const BcParse * p)84 static bool bc_parse_isDelimiter(const BcParse *p) {
85
86 BcLexType t = p->l.t;
87 bool good;
88
89 // If it's an obvious delimiter, say so.
90 if (BC_PARSE_DELIMITER(t)) return true;
91
92 good = false;
93
94 // If the current token is a keyword, then...beware. That means that we need
95 // to check for a "dangling" else, where there was no brace-delimited block
96 // on the previous if.
97 if (t == BC_LEX_KW_ELSE) {
98
99 size_t i;
100 uint16_t *fptr = NULL, flags = BC_PARSE_FLAG_ELSE;
101
102 // As long as going up the stack is valid for a dangling else, keep on.
103 for (i = 0; i < p->flags.len && BC_PARSE_BLOCK_STMT(flags); ++i) {
104
105 fptr = bc_vec_item_rev(&p->flags, i);
106 flags = *fptr;
107
108 // If we need a brace and don't have one, then we don't have a
109 // delimiter.
110 if ((flags & BC_PARSE_FLAG_BRACE) && p->l.last != BC_LEX_RBRACE)
111 return false;
112 }
113
114 // Oh, and we had also better have an if statement somewhere.
115 good = ((flags & BC_PARSE_FLAG_IF) != 0);
116 }
117 else if (t == BC_LEX_RBRACE) {
118
119 size_t i;
120
121 // Since we have a brace, we need to just check if a brace was needed.
122 for (i = 0; !good && i < p->flags.len; ++i) {
123 uint16_t *fptr = bc_vec_item_rev(&p->flags, i);
124 good = (((*fptr) & BC_PARSE_FLAG_BRACE) != 0);
125 }
126 }
127
128 return good;
129 }
130
131 /**
132 * Returns true if we are in top level of a function body. The POSIX grammar
133 * is defined such that anything is allowed after a function body, so we must
134 * use this function to detect that case when ending a function body.
135 * @param p The parser.
136 * @return True if we are in the top level of parsing a function body.
137 */
bc_parse_TopFunc(const BcParse * p)138 static bool bc_parse_TopFunc(const BcParse *p) {
139
140 bool good = p->flags.len == 2;
141
142 uint16_t val = BC_PARSE_FLAG_BRACE | BC_PARSE_FLAG_FUNC_INNER;
143 val |= BC_PARSE_FLAG_FUNC;
144
145 return good && BC_PARSE_TOP_FLAG(p) == val;
146 }
147
148 /**
149 * Sets a previously defined exit label. What are labels? See the bc Parsing
150 * section of the Development manual (manuals/development.md).
151 * @param p The parser.
152 */
bc_parse_setLabel(BcParse * p)153 static void bc_parse_setLabel(BcParse *p) {
154
155 BcFunc *func = p->func;
156 BcInstPtr *ip = bc_vec_top(&p->exits);
157 size_t *label;
158
159 assert(func == bc_vec_item(&p->prog->fns, p->fidx));
160
161 // Set the preallocated label to the correct index.
162 label = bc_vec_item(&func->labels, ip->idx);
163 *label = func->code.len;
164
165 // Now, we don't need the exit label; it is done.
166 bc_vec_pop(&p->exits);
167 }
168
169 /**
170 * Creates a label and sets it to idx. If this is an exit label, then idx is
171 * actually invalid, but it doesn't matter because it will be fixed by
172 * bc_parse_setLabel() later.
173 * @param p The parser.
174 * @param idx The index of the label.
175 */
bc_parse_createLabel(BcParse * p,size_t idx)176 static void bc_parse_createLabel(BcParse *p, size_t idx) {
177 bc_vec_push(&p->func->labels, &idx);
178 }
179
180 /**
181 * Creates a conditional label. Unlike an exit label, this label is set at
182 * creation time because it comes *before* the code that will target it.
183 * @param p The parser.
184 * @param idx The index of the label.
185 */
bc_parse_createCondLabel(BcParse * p,size_t idx)186 static void bc_parse_createCondLabel(BcParse *p, size_t idx) {
187 bc_parse_createLabel(p, p->func->code.len);
188 bc_vec_push(&p->conds, &idx);
189 }
190
191 /*
192 * Creates an exit label to be filled in later by bc_parse_setLabel(). Also, why
193 * create a label to be filled in later? Because exit labels are meant to be
194 * targeted by code that comes *before* the label. Since we have to parse that
195 * code first, and don't know how long it will be, we need to just make sure to
196 * reserve a slot to be filled in later when we know.
197 *
198 * By the way, this uses BcInstPtr because it was convenient. The field idx
199 * holds the index, and the field func holds the loop boolean.
200 *
201 * @param p The parser.
202 * @param idx The index of the label's position.
203 * @param loop True if the exit label is for a loop or not.
204 */
bc_parse_createExitLabel(BcParse * p,size_t idx,bool loop)205 static void bc_parse_createExitLabel(BcParse *p, size_t idx, bool loop) {
206
207 BcInstPtr ip;
208
209 assert(p->func == bc_vec_item(&p->prog->fns, p->fidx));
210
211 ip.func = loop;
212 ip.idx = idx;
213 ip.len = 0;
214
215 bc_vec_push(&p->exits, &ip);
216 bc_parse_createLabel(p, SIZE_MAX);
217 }
218
219 /**
220 * Pops the correct operators off of the operator stack based on the current
221 * operator. This is because of the Shunting-Yard algorithm. Lower prec means
222 * higher precedence.
223 * @param p The parser.
224 * @param type The operator.
225 * @param start The previous start of the operator stack. For more
226 * information, see the bc Parsing section of the Development
227 * manual (manuals/development.md).
228 * @param nexprs A pointer to the current number of expressions that have not
229 * been consumed yet. This is an IN and OUT parameter.
230 */
bc_parse_operator(BcParse * p,BcLexType type,size_t start,size_t * nexprs)231 static void bc_parse_operator(BcParse *p, BcLexType type,
232 size_t start, size_t *nexprs)
233 {
234 BcLexType t;
235 uchar l, r = BC_PARSE_OP_PREC(type);
236 uchar left = BC_PARSE_OP_LEFT(type);
237
238 // While we haven't hit the stop point yet.
239 while (p->ops.len > start) {
240
241 // Get the top operator.
242 t = BC_PARSE_TOP_OP(p);
243
244 // If it's a right paren, we have reached the end of whatever expression
245 // this is no matter what.
246 if (t == BC_LEX_LPAREN) break;
247
248 // Break for precedence. Precedence operates differently on left and
249 // right associativity, by the way. A left associative operator that
250 // matches the current precedence should take priority, but a right
251 // associative operator should not.
252 l = BC_PARSE_OP_PREC(t);
253 if (l >= r && (l != r || !left)) break;
254
255 // Do the housekeeping. In particular, make sure to note that one
256 // expression was consumed. (Two were, but another was added.)
257 bc_parse_push(p, BC_PARSE_TOKEN_INST(t));
258 bc_vec_pop(&p->ops);
259 *nexprs -= !BC_PARSE_OP_PREFIX(t);
260 }
261
262 bc_vec_push(&p->ops, &type);
263 }
264
265 /**
266 * Parses a right paren. In the Shunting-Yard algorithm, it needs to be put on
267 * the operator stack. But before that, it needs to consume whatever operators
268 * there are until it hits a left paren.
269 * @param p The parser.
270 * @param nexprs A pointer to the current number of expressions that have not
271 * been consumed yet. This is an IN and OUT parameter.
272 */
bc_parse_rightParen(BcParse * p,size_t * nexprs)273 static void bc_parse_rightParen(BcParse *p, size_t *nexprs) {
274
275 BcLexType top;
276
277 // Consume operators until a left paren.
278 while ((top = BC_PARSE_TOP_OP(p)) != BC_LEX_LPAREN) {
279 bc_parse_push(p, BC_PARSE_TOKEN_INST(top));
280 bc_vec_pop(&p->ops);
281 *nexprs -= !BC_PARSE_OP_PREFIX(top);
282 }
283
284 // We need to pop the left paren as well.
285 bc_vec_pop(&p->ops);
286
287 // Oh, and we also want the next token.
288 bc_lex_next(&p->l);
289 }
290
291 /**
292 * Parses function arguments.
293 * @param p The parser.
294 * @param flags Flags restricting what kind of expressions the arguments can
295 * be.
296 */
bc_parse_args(BcParse * p,uint8_t flags)297 static void bc_parse_args(BcParse *p, uint8_t flags) {
298
299 bool comma = false;
300 size_t nargs;
301
302 bc_lex_next(&p->l);
303
304 // Print and comparison operators not allowed. Well, comparison operators
305 // only for POSIX. But we do allow arrays, and we *must* get a value.
306 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL);
307 flags |= (BC_PARSE_ARRAY | BC_PARSE_NEEDVAL);
308
309 // Count the arguments and parse them.
310 for (nargs = 0; p->l.t != BC_LEX_RPAREN; ++nargs) {
311
312 bc_parse_expr_status(p, flags, bc_parse_next_arg);
313
314 comma = (p->l.t == BC_LEX_COMMA);
315 if (comma) bc_lex_next(&p->l);
316 }
317
318 // An ending comma is FAIL.
319 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
320
321 // Now do the call with the number of arguments.
322 bc_parse_push(p, BC_INST_CALL);
323 bc_parse_pushIndex(p, nargs);
324 }
325
326 /**
327 * Parses a function call.
328 * @param p The parser.
329 * @param flags Flags restricting what kind of expressions the arguments can
330 * be.
331 */
bc_parse_call(BcParse * p,const char * name,uint8_t flags)332 static void bc_parse_call(BcParse *p, const char *name, uint8_t flags) {
333
334 size_t idx;
335
336 bc_parse_args(p, flags);
337
338 // We just assert this because bc_parse_args() should
339 // ensure that the next token is what it should be.
340 assert(p->l.t == BC_LEX_RPAREN);
341
342 // We cannot use bc_program_insertFunc() here
343 // because it will overwrite an existing function.
344 idx = bc_map_index(&p->prog->fn_map, name);
345
346 // The function does not exist yet. Create a space for it. If the user does
347 // not define it, it's a *runtime* error, not a parse error.
348 if (idx == BC_VEC_INVALID_IDX) {
349
350 idx = bc_program_insertFunc(p->prog, name);
351
352 assert(idx != BC_VEC_INVALID_IDX);
353
354 // Make sure that this pointer was not invalidated.
355 p->func = bc_vec_item(&p->prog->fns, p->fidx);
356 }
357 // The function exists, so set the right function index.
358 else idx = ((BcId*) bc_vec_item(&p->prog->fn_map, idx))->idx;
359
360 bc_parse_pushIndex(p, idx);
361
362 // Make sure to get the next token.
363 bc_lex_next(&p->l);
364 }
365
366 /**
367 * Parses a name/identifier-based expression. It could be a variable, an array
368 * element, an array itself (for function arguments), a function call, etc.
369 *
370 */
bc_parse_name(BcParse * p,BcInst * type,bool * can_assign,uint8_t flags)371 static void bc_parse_name(BcParse *p, BcInst *type,
372 bool *can_assign, uint8_t flags)
373 {
374 char *name;
375
376 BC_SIG_ASSERT_LOCKED;
377
378 // We want a copy of the name since the lexer might overwrite its copy.
379 name = bc_vm_strdup(p->l.str.v);
380
381 BC_SETJMP_LOCKED(err);
382
383 // We need the next token to see if it's just a variable or something more.
384 bc_lex_next(&p->l);
385
386 // Array element or array.
387 if (p->l.t == BC_LEX_LBRACKET) {
388
389 bc_lex_next(&p->l);
390
391 // Array only. This has to be a function parameter.
392 if (p->l.t == BC_LEX_RBRACKET) {
393
394 // Error if arrays are not allowed.
395 if (BC_ERR(!(flags & BC_PARSE_ARRAY)))
396 bc_parse_err(p, BC_ERR_PARSE_EXPR);
397
398 *type = BC_INST_ARRAY;
399 *can_assign = false;
400 }
401 else {
402
403 // If we are here, we have an array element. We need to set the
404 // expression parsing flags.
405 uint8_t flags2 = (flags & ~(BC_PARSE_PRINT | BC_PARSE_REL)) |
406 BC_PARSE_NEEDVAL;
407
408 bc_parse_expr_status(p, flags2, bc_parse_next_elem);
409
410 // The next token *must* be a right bracket.
411 if (BC_ERR(p->l.t != BC_LEX_RBRACKET))
412 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
413
414 *type = BC_INST_ARRAY_ELEM;
415 *can_assign = true;
416 }
417
418 // Make sure to get the next token.
419 bc_lex_next(&p->l);
420
421 // Push the instruction and the name of the identifier.
422 bc_parse_push(p, *type);
423 bc_parse_pushName(p, name, false);
424 }
425 else if (p->l.t == BC_LEX_LPAREN) {
426
427 // We are parsing a function call; error if not allowed.
428 if (BC_ERR(flags & BC_PARSE_NOCALL))
429 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
430
431 *type = BC_INST_CALL;
432 *can_assign = false;
433
434 bc_parse_call(p, name, flags);
435 }
436 else {
437 // Just a variable.
438 *type = BC_INST_VAR;
439 *can_assign = true;
440 bc_parse_push(p, BC_INST_VAR);
441 bc_parse_pushName(p, name, true);
442 }
443
444 err:
445 // Need to make sure to unallocate the name.
446 free(name);
447 BC_LONGJMP_CONT;
448 BC_SIG_MAYLOCK;
449 }
450
451 /**
452 * Parses a builtin function that takes no arguments. This includes read(),
453 * rand(), maxibase(), maxobase(), maxscale(), and maxrand().
454 * @param p The parser.
455 * @param inst The instruction corresponding to the builtin.
456 */
bc_parse_noArgBuiltin(BcParse * p,BcInst inst)457 static void bc_parse_noArgBuiltin(BcParse *p, BcInst inst) {
458
459 // Must have a left paren.
460 bc_lex_next(&p->l);
461 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
462
463 // Must have a right paren.
464 bc_lex_next(&p->l);
465 if ((p->l.t != BC_LEX_RPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
466
467 bc_parse_push(p, inst);
468
469 bc_lex_next(&p->l);
470 }
471
472 /**
473 * Parses a builtin function that takes 1 argument. This includes length(),
474 * sqrt(), abs(), scale(), and irand().
475 * @param p The parser.
476 * @param type The lex token.
477 * @param flags The expression parsing flags for parsing the argument.
478 * @param prev An out parameter; the previous instruction pointer.
479 */
bc_parse_builtin(BcParse * p,BcLexType type,uint8_t flags,BcInst * prev)480 static void bc_parse_builtin(BcParse *p, BcLexType type,
481 uint8_t flags, BcInst *prev)
482 {
483 // Must have a left paren.
484 bc_lex_next(&p->l);
485 if (BC_ERR(p->l.t != BC_LEX_LPAREN))
486 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
487
488 bc_lex_next(&p->l);
489
490 // Change the flags as needed for parsing the argument.
491 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL);
492 flags |= BC_PARSE_NEEDVAL;
493
494 // Since length can take arrays, we need to specially add that flag.
495 if (type == BC_LEX_KW_LENGTH) flags |= BC_PARSE_ARRAY;
496
497 bc_parse_expr_status(p, flags, bc_parse_next_rel);
498
499 // Must have a right paren.
500 if (BC_ERR(p->l.t != BC_LEX_RPAREN))
501 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
502
503 // Adjust previous based on the token and push it.
504 *prev = type - BC_LEX_KW_LENGTH + BC_INST_LENGTH;
505 bc_parse_push(p, *prev);
506
507 bc_lex_next(&p->l);
508 }
509
510 /**
511 * Parses a builtin function that takes 3 arguments. This includes modexp() and
512 * divmod().
513 */
bc_parse_builtin3(BcParse * p,BcLexType type,uint8_t flags,BcInst * prev)514 static void bc_parse_builtin3(BcParse *p, BcLexType type,
515 uint8_t flags, BcInst *prev)
516 {
517 assert(type == BC_LEX_KW_MODEXP || type == BC_LEX_KW_DIVMOD);
518
519 // Must have a left paren.
520 bc_lex_next(&p->l);
521 if (BC_ERR(p->l.t != BC_LEX_LPAREN))
522 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
523
524 bc_lex_next(&p->l);
525
526 // Change the flags as needed for parsing the argument.
527 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL);
528 flags |= BC_PARSE_NEEDVAL;
529
530 bc_parse_expr_status(p, flags, bc_parse_next_builtin);
531
532 // Must have a comma.
533 if (BC_ERR(p->l.t != BC_LEX_COMMA))
534 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
535
536 bc_lex_next(&p->l);
537
538 bc_parse_expr_status(p, flags, bc_parse_next_builtin);
539
540 // Must have a comma.
541 if (BC_ERR(p->l.t != BC_LEX_COMMA))
542 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
543
544 bc_lex_next(&p->l);
545
546 // If it is a divmod, parse an array name. Otherwise, just parse another
547 // expression.
548 if (type == BC_LEX_KW_DIVMOD) {
549
550 // Must have a name.
551 if (BC_ERR(p->l.t != BC_LEX_NAME)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
552
553 // This is safe because the next token should not overwrite the name.
554 bc_lex_next(&p->l);
555
556 // Must have a left bracket.
557 if (BC_ERR(p->l.t != BC_LEX_LBRACKET))
558 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
559
560 // This is safe because the next token should not overwrite the name.
561 bc_lex_next(&p->l);
562
563 // Must have a right bracket.
564 if (BC_ERR(p->l.t != BC_LEX_RBRACKET))
565 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
566
567 // This is safe because the next token should not overwrite the name.
568 bc_lex_next(&p->l);
569 }
570 else bc_parse_expr_status(p, flags, bc_parse_next_rel);
571
572 // Must have a right paren.
573 if (BC_ERR(p->l.t != BC_LEX_RPAREN))
574 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
575
576 // Adjust previous based on the token and push it.
577 *prev = type - BC_LEX_KW_MODEXP + BC_INST_MODEXP;
578 bc_parse_push(p, *prev);
579
580 // If we have divmod, we need to assign the modulus to the array element, so
581 // we need to push the instructions for doing so.
582 if (type == BC_LEX_KW_DIVMOD) {
583
584 // The zeroth element.
585 bc_parse_push(p, BC_INST_ZERO);
586 bc_parse_push(p, BC_INST_ARRAY_ELEM);
587
588 // Push the array.
589 bc_parse_pushName(p, p->l.str.v, false);
590
591 // Swap them and assign. After this, the top item on the stack should
592 // be the quotient.
593 bc_parse_push(p, BC_INST_SWAP);
594 bc_parse_push(p, BC_INST_ASSIGN_NO_VAL);
595 }
596
597 bc_lex_next(&p->l);
598 }
599
600 /**
601 * Parses the scale keyword. This is special because scale can be a value or a
602 * builtin function.
603 * @param p The parser.
604 * @param type An out parameter; the instruction for the parse.
605 * @param can_assign An out parameter; whether the expression can be assigned
606 * to.
607 * @param flags The expression parsing flags for parsing a scale() arg.
608 */
bc_parse_scale(BcParse * p,BcInst * type,bool * can_assign,uint8_t flags)609 static void bc_parse_scale(BcParse *p, BcInst *type,
610 bool *can_assign, uint8_t flags)
611 {
612 bc_lex_next(&p->l);
613
614 // Without the left paren, it's just the keyword.
615 if (p->l.t != BC_LEX_LPAREN) {
616
617 // Set, push, and return.
618 *type = BC_INST_SCALE;
619 *can_assign = true;
620 bc_parse_push(p, BC_INST_SCALE);
621 return;
622 }
623
624 // Handle the scale function.
625 *type = BC_INST_SCALE_FUNC;
626 *can_assign = false;
627
628 // Once again, adjust the flags.
629 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL);
630 flags |= BC_PARSE_NEEDVAL;
631
632 bc_lex_next(&p->l);
633
634 bc_parse_expr_status(p, flags, bc_parse_next_rel);
635
636 // Must have a right paren.
637 if (BC_ERR(p->l.t != BC_LEX_RPAREN))
638 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
639
640 bc_parse_push(p, BC_INST_SCALE_FUNC);
641
642 bc_lex_next(&p->l);
643 }
644
645 /**
646 * Parses and increment or decrement operator. This is a bit complex.
647 * @param p The parser.
648 * @param prev An out parameter; the previous instruction pointer.
649 * @param can_assign An out parameter; whether the expression can be assigned
650 * to.
651 * @param nexs An in/out parameter; the number of expressions in the
652 * parse tree that are not used.
653 * @param flags The expression parsing flags for parsing a scale() arg.
654 */
bc_parse_incdec(BcParse * p,BcInst * prev,bool * can_assign,size_t * nexs,uint8_t flags)655 static void bc_parse_incdec(BcParse *p, BcInst *prev, bool *can_assign,
656 size_t *nexs, uint8_t flags)
657 {
658 BcLexType type;
659 uchar inst;
660 BcInst etype = *prev;
661 BcLexType last = p->l.last;
662
663 assert(prev != NULL && can_assign != NULL);
664
665 // If we can't assign to the previous token, then we have an error.
666 if (BC_ERR(last == BC_LEX_OP_INC || last == BC_LEX_OP_DEC ||
667 last == BC_LEX_RPAREN))
668 {
669 bc_parse_err(p, BC_ERR_PARSE_ASSIGN);
670 }
671
672 // Is the previous instruction for a variable?
673 if (BC_PARSE_INST_VAR(etype)) {
674
675 // If so, this is a postfix operator.
676 if (!*can_assign) bc_parse_err(p, BC_ERR_PARSE_ASSIGN);
677
678 // Only postfix uses BC_INST_INC and BC_INST_DEC.
679 *prev = inst = BC_INST_INC + (p->l.t != BC_LEX_OP_INC);
680 bc_parse_push(p, inst);
681 bc_lex_next(&p->l);
682 *can_assign = false;
683 }
684 else {
685
686 // This is a prefix operator. In that case, we just convert it to
687 // an assignment instruction.
688 *prev = inst = BC_INST_ASSIGN_PLUS + (p->l.t != BC_LEX_OP_INC);
689
690 bc_lex_next(&p->l);
691 type = p->l.t;
692
693 // Because we parse the next part of the expression
694 // right here, we need to increment this.
695 *nexs = *nexs + 1;
696
697 // Is the next token a normal identifier?
698 if (type == BC_LEX_NAME) {
699
700 // Parse the name.
701 uint8_t flags2 = flags & ~BC_PARSE_ARRAY;
702 bc_parse_name(p, prev, can_assign, flags2 | BC_PARSE_NOCALL);
703 }
704 // Is the next token a global?
705 else if (type >= BC_LEX_KW_LAST && type <= BC_LEX_KW_OBASE) {
706 bc_parse_push(p, type - BC_LEX_KW_LAST + BC_INST_LAST);
707 bc_lex_next(&p->l);
708 }
709 // Is the next token specifically scale, which needs special treatment?
710 else if (BC_NO_ERR(type == BC_LEX_KW_SCALE)) {
711
712 bc_lex_next(&p->l);
713
714 // Check that scale() was not used.
715 if (BC_ERR(p->l.t == BC_LEX_LPAREN))
716 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
717 else bc_parse_push(p, BC_INST_SCALE);
718 }
719 // Now we know we have an error.
720 else bc_parse_err(p, BC_ERR_PARSE_TOKEN);
721
722 *can_assign = false;
723
724 bc_parse_push(p, BC_INST_ONE);
725 bc_parse_push(p, inst);
726 }
727 }
728
729 /**
730 * Parses the minus operator. This needs special treatment because it is either
731 * subtract or negation.
732 * @param p The parser.
733 * @param prev An in/out parameter; the previous instruction.
734 * @param ops_bgn The size of the operator stack.
735 * @param rparen True if the last token was a right paren.
736 * @param binlast True if the last token was a binary operator.
737 * @param nexprs An in/out parameter; the number of unused expressions.
738 */
bc_parse_minus(BcParse * p,BcInst * prev,size_t ops_bgn,bool rparen,bool binlast,size_t * nexprs)739 static void bc_parse_minus(BcParse *p, BcInst *prev, size_t ops_bgn,
740 bool rparen, bool binlast, size_t *nexprs)
741 {
742 BcLexType type;
743
744 bc_lex_next(&p->l);
745
746 // Figure out if it's a minus or a negation.
747 type = BC_PARSE_LEAF(*prev, binlast, rparen) ? BC_LEX_OP_MINUS : BC_LEX_NEG;
748 *prev = BC_PARSE_TOKEN_INST(type);
749
750 // We can just push onto the op stack because this is the largest
751 // precedence operator that gets pushed. Inc/dec does not.
752 if (type != BC_LEX_OP_MINUS) bc_vec_push(&p->ops, &type);
753 else bc_parse_operator(p, type, ops_bgn, nexprs);
754 }
755
756 /**
757 * Parses a string.
758 * @param p The parser.
759 * @param inst The instruction corresponding to how the string was found and
760 * how it should be printed.
761 */
bc_parse_str(BcParse * p,BcInst inst)762 static void bc_parse_str(BcParse *p, BcInst inst) {
763 bc_parse_addString(p);
764 bc_parse_push(p, inst);
765 bc_lex_next(&p->l);
766 }
767
768 /**
769 * Parses a print statement.
770 * @param p The parser.
771 */
bc_parse_print(BcParse * p,BcLexType type)772 static void bc_parse_print(BcParse *p, BcLexType type) {
773
774 BcLexType t;
775 bool comma = false;
776 BcInst inst = type == BC_LEX_KW_STREAM ?
777 BC_INST_PRINT_STREAM : BC_INST_PRINT_POP;
778
779 bc_lex_next(&p->l);
780
781 t = p->l.t;
782
783 // A print or stream statement has to have *something*.
784 if (bc_parse_isDelimiter(p)) bc_parse_err(p, BC_ERR_PARSE_PRINT);
785
786 do {
787
788 // If the token is a string, then print it with escapes.
789 // BC_INST_PRINT_POP plays that role for bc.
790 if (t == BC_LEX_STR) bc_parse_str(p, inst);
791 else {
792 // We have an actual number; parse and add a print instruction.
793 bc_parse_expr_status(p, BC_PARSE_NEEDVAL, bc_parse_next_print);
794 bc_parse_push(p, inst);
795 }
796
797 // Is the next token a comma?
798 comma = (p->l.t == BC_LEX_COMMA);
799
800 // Get the next token if we have a comma.
801 if (comma) bc_lex_next(&p->l);
802 else {
803
804 // If we don't have a comma, the statement needs to end.
805 if (!bc_parse_isDelimiter(p))
806 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
807 else break;
808 }
809
810 t = p->l.t;
811
812 } while (true);
813
814 // If we have a comma but no token, that's bad.
815 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
816 }
817
818 /**
819 * Parses a return statement.
820 * @param p The parser.
821 */
bc_parse_return(BcParse * p)822 static void bc_parse_return(BcParse *p) {
823
824 BcLexType t;
825 bool paren;
826 uchar inst = BC_INST_RET0;
827
828 // If we are not in a function, that's an error.
829 if (BC_ERR(!BC_PARSE_FUNC(p))) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
830
831 // If we are in a void function, make sure to return void.
832 if (p->func->voidfn) inst = BC_INST_RET_VOID;
833
834 bc_lex_next(&p->l);
835
836 t = p->l.t;
837 paren = (t == BC_LEX_LPAREN);
838
839 // An empty return statement just needs to push the selected instruction.
840 if (bc_parse_isDelimiter(p)) bc_parse_push(p, inst);
841 else {
842
843 BcParseStatus s;
844
845 // Need to parse the expression whose value will be returned.
846 s = bc_parse_expr_err(p, BC_PARSE_NEEDVAL, bc_parse_next_expr);
847
848 // If the expression was empty, just push the selected instruction.
849 if (s == BC_PARSE_STATUS_EMPTY_EXPR) {
850 bc_parse_push(p, inst);
851 bc_lex_next(&p->l);
852 }
853
854 // POSIX requires parentheses.
855 if (!paren || p->l.last != BC_LEX_RPAREN) {
856 bc_parse_err(p, BC_ERR_POSIX_RET);
857 }
858
859 // Void functions require an empty expression.
860 if (BC_ERR(p->func->voidfn)) {
861 if (s != BC_PARSE_STATUS_EMPTY_EXPR)
862 bc_parse_verr(p, BC_ERR_PARSE_RET_VOID, p->func->name);
863 }
864 // If we got here, we want to be sure to end the function with a real
865 // return instruction, just in case.
866 else bc_parse_push(p, BC_INST_RET);
867 }
868 }
869
870 /**
871 * Clears flags that indicate the end of an if statement and its block and sets
872 * the jump location.
873 * @param p The parser.
874 */
bc_parse_noElse(BcParse * p)875 static void bc_parse_noElse(BcParse *p) {
876 uint16_t *flag_ptr = BC_PARSE_TOP_FLAG_PTR(p);
877 *flag_ptr = (*flag_ptr & ~(BC_PARSE_FLAG_IF_END));
878 bc_parse_setLabel(p);
879 }
880
881 /**
882 * Ends (finishes parsing) the body of a control statement or a function.
883 * @param p The parser.
884 * @param brace True if the body was ended by a brace, false otherwise.
885 */
bc_parse_endBody(BcParse * p,bool brace)886 static void bc_parse_endBody(BcParse *p, bool brace) {
887
888 bool has_brace, new_else = false;
889
890 // We cannot be ending a body if there are no bodies to end.
891 if (BC_ERR(p->flags.len <= 1)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
892
893 if (brace) {
894
895 // The brace was already gotten; make sure that the caller did not lie.
896 // We check for the requirement of braces later.
897 assert(p->l.t == BC_LEX_RBRACE);
898
899 bc_lex_next(&p->l);
900
901 // If the next token is not a delimiter, that is a problem.
902 if (BC_ERR(!bc_parse_isDelimiter(p) && !bc_parse_TopFunc(p)))
903 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
904 }
905
906 // Do we have a brace flag?
907 has_brace = (BC_PARSE_BRACE(p) != 0);
908
909 do {
910 size_t len = p->flags.len;
911 bool loop;
912
913 // If we have a brace flag but not a brace, that's a problem.
914 if (has_brace && !brace) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
915
916 // Are we inside a loop?
917 loop = (BC_PARSE_LOOP_INNER(p) != 0);
918
919 // If we are ending a loop or an else...
920 if (loop || BC_PARSE_ELSE(p)) {
921
922 // Loops have condition labels that we have to take care of as well.
923 if (loop) {
924
925 size_t *label = bc_vec_top(&p->conds);
926
927 bc_parse_push(p, BC_INST_JUMP);
928 bc_parse_pushIndex(p, *label);
929
930 bc_vec_pop(&p->conds);
931 }
932
933 bc_parse_setLabel(p);
934 bc_vec_pop(&p->flags);
935 }
936 // If we are ending a function...
937 else if (BC_PARSE_FUNC_INNER(p)) {
938 BcInst inst = (p->func->voidfn ? BC_INST_RET_VOID : BC_INST_RET0);
939 bc_parse_push(p, inst);
940 bc_parse_updateFunc(p, BC_PROG_MAIN);
941 bc_vec_pop(&p->flags);
942 }
943 // If we have a brace flag and not an if statement, we can pop the top
944 // of the flags stack because they have been taken care of above.
945 else if (has_brace && !BC_PARSE_IF(p)) bc_vec_pop(&p->flags);
946
947 // This needs to be last to parse nested if's properly.
948 if (BC_PARSE_IF(p) && (len == p->flags.len || !BC_PARSE_BRACE(p))) {
949
950 // Eat newlines.
951 while (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l);
952
953 // *Now* we can pop the flags.
954 bc_vec_pop(&p->flags);
955
956 // If we are allowed non-POSIX stuff...
957 if (!BC_S) {
958
959 // Have we found yet another dangling else?
960 *(BC_PARSE_TOP_FLAG_PTR(p)) |= BC_PARSE_FLAG_IF_END;
961 new_else = (p->l.t == BC_LEX_KW_ELSE);
962
963 // Parse the else or end the if statement body.
964 if (new_else) bc_parse_else(p);
965 else if (!has_brace && (!BC_PARSE_IF_END(p) || brace))
966 bc_parse_noElse(p);
967 }
968 // POSIX requires us to do the bare minimum only.
969 else bc_parse_noElse(p);
970 }
971
972 // If these are both true, we have "used" the braces that we found.
973 if (brace && has_brace) brace = false;
974
975 // This condition was perhaps the hardest single part of the parser. If the
976 // flags stack does not have enough, we should stop. If we have a new else
977 // statement, we should stop. If we do have the end of an if statement and
978 // we have eaten the brace, we should stop. If we do have a brace flag, we
979 // should stop.
980 } while (p->flags.len > 1 && !new_else && (!BC_PARSE_IF_END(p) || brace) &&
981 !(has_brace = (BC_PARSE_BRACE(p) != 0)));
982
983 // If we have a brace, yet no body for it, that's a problem.
984 if (BC_ERR(p->flags.len == 1 && brace))
985 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
986 else if (brace && BC_PARSE_BRACE(p)) {
987
988 // If we make it here, we have a brace and a flag for it.
989 uint16_t flags = BC_PARSE_TOP_FLAG(p);
990
991 // This condition ensure that the *last* body is correctly finished by
992 // popping its flags.
993 if (!(flags & (BC_PARSE_FLAG_FUNC_INNER | BC_PARSE_FLAG_LOOP_INNER)) &&
994 !(flags & (BC_PARSE_FLAG_IF | BC_PARSE_FLAG_ELSE)) &&
995 !(flags & (BC_PARSE_FLAG_IF_END)))
996 {
997 bc_vec_pop(&p->flags);
998 }
999 }
1000 }
1001
1002 /**
1003 * Starts the body of a control statement or function.
1004 * @param p The parser.
1005 * @param flags The current flags (will be edited).
1006 */
bc_parse_startBody(BcParse * p,uint16_t flags)1007 static void bc_parse_startBody(BcParse *p, uint16_t flags) {
1008 assert(flags);
1009 flags |= (BC_PARSE_TOP_FLAG(p) & (BC_PARSE_FLAG_FUNC | BC_PARSE_FLAG_LOOP));
1010 flags |= BC_PARSE_FLAG_BODY;
1011 bc_vec_push(&p->flags, &flags);
1012 }
1013
bc_parse_endif(BcParse * p)1014 void bc_parse_endif(BcParse *p) {
1015
1016 size_t i;
1017 bool good;
1018
1019 // Not a problem if this is true.
1020 if (BC_NO_ERR(!BC_PARSE_NO_EXEC(p))) return;
1021
1022 good = true;
1023
1024 // Find an instance of a body that needs closing, i.e., a statement that did
1025 // not have a right brace when it should have.
1026 for (i = 0; good && i < p->flags.len; ++i) {
1027 uint16_t flag = *((uint16_t*) bc_vec_item(&p->flags, i));
1028 good = ((flag & BC_PARSE_FLAG_BRACE) != BC_PARSE_FLAG_BRACE);
1029 }
1030
1031 // If we did not find such an instance...
1032 if (good) {
1033
1034 // We set this to restore it later. We don't want the parser thinking
1035 // that we are on stdin for this one because it will want more.
1036 bool is_stdin = vm.is_stdin;
1037
1038 vm.is_stdin = false;
1039
1040 // End all of the if statements and loops.
1041 while (p->flags.len > 1 || BC_PARSE_IF_END(p)) {
1042 if (BC_PARSE_IF_END(p)) bc_parse_noElse(p);
1043 if (p->flags.len > 1) bc_parse_endBody(p, false);
1044 }
1045
1046 vm.is_stdin = is_stdin;
1047 }
1048 // If we reach here, a block was not properly closed, and we should error.
1049 else bc_parse_err(&vm.prs, BC_ERR_PARSE_BLOCK);
1050 }
1051
1052 /**
1053 * Parses an if statement.
1054 * @param p The parser.
1055 */
bc_parse_if(BcParse * p)1056 static void bc_parse_if(BcParse *p) {
1057
1058 // We are allowed relational operators, and we must have a value.
1059 size_t idx;
1060 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL);
1061
1062 // Get the left paren and barf if necessary.
1063 bc_lex_next(&p->l);
1064 if (BC_ERR(p->l.t != BC_LEX_LPAREN))
1065 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1066
1067 // Parse the condition.
1068 bc_lex_next(&p->l);
1069 bc_parse_expr_status(p, flags, bc_parse_next_rel);
1070
1071 // Must have a right paren.
1072 if (BC_ERR(p->l.t != BC_LEX_RPAREN))
1073 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1074
1075 bc_lex_next(&p->l);
1076
1077 // Insert the conditional jump instruction.
1078 bc_parse_push(p, BC_INST_JUMP_ZERO);
1079
1080 idx = p->func->labels.len;
1081
1082 // Push the index for the instruction and create an exit label for an else
1083 // statement.
1084 bc_parse_pushIndex(p, idx);
1085 bc_parse_createExitLabel(p, idx, false);
1086
1087 bc_parse_startBody(p, BC_PARSE_FLAG_IF);
1088 }
1089
1090 /**
1091 * Parses an else statement.
1092 * @param p The parser.
1093 */
bc_parse_else(BcParse * p)1094 static void bc_parse_else(BcParse *p) {
1095
1096 size_t idx = p->func->labels.len;
1097
1098 // We must be at the end of an if statement.
1099 if (BC_ERR(!BC_PARSE_IF_END(p)))
1100 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1101
1102 // Push an unconditional jump to make bc jump over the else statement if it
1103 // executed the original if statement.
1104 bc_parse_push(p, BC_INST_JUMP);
1105 bc_parse_pushIndex(p, idx);
1106
1107 // Clear the else stuff. Yes, that function is misnamed for its use here,
1108 // but deal with it.
1109 bc_parse_noElse(p);
1110
1111 // Create the exit label and parse the body.
1112 bc_parse_createExitLabel(p, idx, false);
1113 bc_parse_startBody(p, BC_PARSE_FLAG_ELSE);
1114
1115 bc_lex_next(&p->l);
1116 }
1117
1118 /**
1119 * Parse a while loop.
1120 * @param p The parser.
1121 */
bc_parse_while(BcParse * p)1122 static void bc_parse_while(BcParse *p) {
1123
1124 // We are allowed relational operators, and we must have a value.
1125 size_t idx;
1126 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL);
1127
1128 // Get the left paren and barf if necessary.
1129 bc_lex_next(&p->l);
1130 if (BC_ERR(p->l.t != BC_LEX_LPAREN))
1131 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1132 bc_lex_next(&p->l);
1133
1134 // Create the labels. Loops need both.
1135 bc_parse_createCondLabel(p, p->func->labels.len);
1136 idx = p->func->labels.len;
1137 bc_parse_createExitLabel(p, idx, true);
1138
1139 // Parse the actual condition and barf on non-right paren.
1140 bc_parse_expr_status(p, flags, bc_parse_next_rel);
1141 if (BC_ERR(p->l.t != BC_LEX_RPAREN))
1142 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1143 bc_lex_next(&p->l);
1144
1145 // Now we can push the conditional jump and start the body.
1146 bc_parse_push(p, BC_INST_JUMP_ZERO);
1147 bc_parse_pushIndex(p, idx);
1148 bc_parse_startBody(p, BC_PARSE_FLAG_LOOP | BC_PARSE_FLAG_LOOP_INNER);
1149 }
1150
1151 /**
1152 * Parse a for loop.
1153 * @param p The parser.
1154 */
bc_parse_for(BcParse * p)1155 static void bc_parse_for(BcParse *p) {
1156
1157 size_t cond_idx, exit_idx, body_idx, update_idx;
1158
1159 // Barf on the missing left paren.
1160 bc_lex_next(&p->l);
1161 if (BC_ERR(p->l.t != BC_LEX_LPAREN))
1162 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1163 bc_lex_next(&p->l);
1164
1165 // The first statement can be empty, but if it is, check for error in POSIX
1166 // mode. Otherwise, parse it.
1167 if (p->l.t != BC_LEX_SCOLON)
1168 bc_parse_expr_status(p, 0, bc_parse_next_for);
1169 else bc_parse_err(p, BC_ERR_POSIX_FOR);
1170
1171 // Must have a semicolon.
1172 if (BC_ERR(p->l.t != BC_LEX_SCOLON)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1173 bc_lex_next(&p->l);
1174
1175 // These are indices for labels. There are so many of them because the end
1176 // of the loop must unconditionally jump to the update code. Then the update
1177 // code must unconditionally jump to the condition code. Then the condition
1178 // code must *conditionally* jump to the exit.
1179 cond_idx = p->func->labels.len;
1180 update_idx = cond_idx + 1;
1181 body_idx = update_idx + 1;
1182 exit_idx = body_idx + 1;
1183
1184 // This creates the condition label.
1185 bc_parse_createLabel(p, p->func->code.len);
1186
1187 // Parse an expression if it exists.
1188 if (p->l.t != BC_LEX_SCOLON) {
1189 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL);
1190 bc_parse_expr_status(p, flags, bc_parse_next_for);
1191 }
1192 else {
1193
1194 // Set this for the next call to bc_parse_number because an empty
1195 // condition means that it is an infinite loop, so the condition must be
1196 // non-zero. This is safe to set because the current token is a
1197 // semicolon, which has no string requirement.
1198 bc_vec_string(&p->l.str, sizeof(bc_parse_one) - 1, bc_parse_one);
1199 bc_parse_number(p);
1200
1201 // An empty condition makes POSIX mad.
1202 bc_parse_err(p, BC_ERR_POSIX_FOR);
1203 }
1204
1205 // Must have a semicolon.
1206 if (BC_ERR(p->l.t != BC_LEX_SCOLON))
1207 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1208 bc_lex_next(&p->l);
1209
1210 // Now we can set up the conditional jump to the exit and an unconditional
1211 // jump to the body right after. The unconditional jump to the body is
1212 // because there is update code coming right after the condition, so we need
1213 // to skip it to get to the body.
1214 bc_parse_push(p, BC_INST_JUMP_ZERO);
1215 bc_parse_pushIndex(p, exit_idx);
1216 bc_parse_push(p, BC_INST_JUMP);
1217 bc_parse_pushIndex(p, body_idx);
1218
1219 // Now create the label for the update code.
1220 bc_parse_createCondLabel(p, update_idx);
1221
1222 // Parse if not empty, and if it is, let POSIX yell if necessary.
1223 if (p->l.t != BC_LEX_RPAREN)
1224 bc_parse_expr_status(p, 0, bc_parse_next_rel);
1225 else bc_parse_err(p, BC_ERR_POSIX_FOR);
1226
1227 // Must have a right paren.
1228 if (BC_ERR(p->l.t != BC_LEX_RPAREN))
1229 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1230
1231 // Set up a jump to the condition right after the update code.
1232 bc_parse_push(p, BC_INST_JUMP);
1233 bc_parse_pushIndex(p, cond_idx);
1234 bc_parse_createLabel(p, p->func->code.len);
1235
1236 // Create an exit label for the body and start the body.
1237 bc_parse_createExitLabel(p, exit_idx, true);
1238 bc_lex_next(&p->l);
1239 bc_parse_startBody(p, BC_PARSE_FLAG_LOOP | BC_PARSE_FLAG_LOOP_INNER);
1240 }
1241
1242 /**
1243 * Parse a statement or token that indicates a loop exit. This includes an
1244 * actual loop exit, the break keyword, or the continue keyword.
1245 * @param p The parser.
1246 * @param type The type of exit.
1247 */
bc_parse_loopExit(BcParse * p,BcLexType type)1248 static void bc_parse_loopExit(BcParse *p, BcLexType type) {
1249
1250 size_t i;
1251 BcInstPtr *ip;
1252
1253 // Must have a loop. If we don't, that's an error.
1254 if (BC_ERR(!BC_PARSE_LOOP(p))) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1255
1256 // If we have a break statement...
1257 if (type == BC_LEX_KW_BREAK) {
1258
1259 // If there are no exits, something went wrong somewhere.
1260 if (BC_ERR(!p->exits.len)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1261
1262 // Get the exit.
1263 i = p->exits.len - 1;
1264 ip = bc_vec_item(&p->exits, i);
1265
1266 // The condition !ip->func is true if the exit is not for a loop, so we
1267 // need to find the first actual loop exit.
1268 while (!ip->func && i < p->exits.len) ip = bc_vec_item(&p->exits, i--);
1269
1270 // Make sure everything is hunky dory.
1271 assert(ip != NULL && (i < p->exits.len || ip->func));
1272
1273 // Set the index for the exit.
1274 i = ip->idx;
1275 }
1276 // If we have a continue statement or just the loop end, jump to the
1277 // condition (or update for a foor loop).
1278 else i = *((size_t*) bc_vec_top(&p->conds));
1279
1280 // Add the unconditional jump.
1281 bc_parse_push(p, BC_INST_JUMP);
1282 bc_parse_pushIndex(p, i);
1283
1284 bc_lex_next(&p->l);
1285 }
1286
1287 /**
1288 * Parse a function (header).
1289 * @param p The parser.
1290 */
bc_parse_func(BcParse * p)1291 static void bc_parse_func(BcParse *p) {
1292
1293 bool comma = false, voidfn;
1294 uint16_t flags;
1295 size_t idx;
1296
1297 bc_lex_next(&p->l);
1298
1299 // Must have a name.
1300 if (BC_ERR(p->l.t != BC_LEX_NAME)) bc_parse_err(p, BC_ERR_PARSE_FUNC);
1301
1302 // If the name is "void", and POSIX is not on, mark as void.
1303 voidfn = (!BC_IS_POSIX && p->l.t == BC_LEX_NAME &&
1304 !strcmp(p->l.str.v, "void"));
1305
1306 // We can safely do this because the expected token should not overwrite the
1307 // function name.
1308 bc_lex_next(&p->l);
1309
1310 // If we *don't* have another name, then void is the name of the function.
1311 voidfn = (voidfn && p->l.t == BC_LEX_NAME);
1312
1313 // With a void function, allow POSIX to complain and get a new token.
1314 if (voidfn) {
1315
1316 bc_parse_err(p, BC_ERR_POSIX_VOID);
1317
1318 // We can safely do this because the expected token should not overwrite
1319 // the function name.
1320 bc_lex_next(&p->l);
1321 }
1322
1323 // Must have a left paren.
1324 if (BC_ERR(p->l.t != BC_LEX_LPAREN))
1325 bc_parse_err(p, BC_ERR_PARSE_FUNC);
1326
1327 // Make sure the functions map and vector are synchronized.
1328 assert(p->prog->fns.len == p->prog->fn_map.len);
1329
1330 // Insert the function by name into the map and vector.
1331 idx = bc_program_insertFunc(p->prog, p->l.str.v);
1332
1333 // Make sure the insert worked.
1334 assert(idx);
1335
1336 // Update the function pointer and stuff in the parser and set its void.
1337 bc_parse_updateFunc(p, idx);
1338 p->func->voidfn = voidfn;
1339
1340 bc_lex_next(&p->l);
1341
1342 // While we do not have a right paren, we are still parsing arguments.
1343 while (p->l.t != BC_LEX_RPAREN) {
1344
1345 BcType t = BC_TYPE_VAR;
1346
1347 // If we have an asterisk, we are parsing a reference argument.
1348 if (p->l.t == BC_LEX_OP_MULTIPLY) {
1349
1350 t = BC_TYPE_REF;
1351 bc_lex_next(&p->l);
1352
1353 // Let POSIX complain if necessary.
1354 bc_parse_err(p, BC_ERR_POSIX_REF);
1355 }
1356
1357 // If we don't have a name, the argument will not have a name. Barf.
1358 if (BC_ERR(p->l.t != BC_LEX_NAME))
1359 bc_parse_err(p, BC_ERR_PARSE_FUNC);
1360
1361 // Increment the number of parameters.
1362 p->func->nparams += 1;
1363
1364 // Copy the string in the lexer so that we can use the lexer again.
1365 bc_vec_string(&p->buf, p->l.str.len, p->l.str.v);
1366
1367 bc_lex_next(&p->l);
1368
1369 // We are parsing an array parameter if this is true.
1370 if (p->l.t == BC_LEX_LBRACKET) {
1371
1372 // Set the array type, unless we are already parsing a reference.
1373 if (t == BC_TYPE_VAR) t = BC_TYPE_ARRAY;
1374
1375 bc_lex_next(&p->l);
1376
1377 // The brackets *must* be empty.
1378 if (BC_ERR(p->l.t != BC_LEX_RBRACKET))
1379 bc_parse_err(p, BC_ERR_PARSE_FUNC);
1380
1381 bc_lex_next(&p->l);
1382 }
1383 // If we did *not* get a bracket, but we are expecting a reference, we
1384 // have a problem.
1385 else if (BC_ERR(t == BC_TYPE_REF))
1386 bc_parse_verr(p, BC_ERR_PARSE_REF_VAR, p->buf.v);
1387
1388 // Test for comma and get the next token if it exists.
1389 comma = (p->l.t == BC_LEX_COMMA);
1390 if (comma) bc_lex_next(&p->l);
1391
1392 // Insert the parameter into the function.
1393 bc_func_insert(p->func, p->prog, p->buf.v, t, p->l.line);
1394 }
1395
1396 // If we have a comma, but no parameter, barf.
1397 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_FUNC);
1398
1399 // Start the body.
1400 flags = BC_PARSE_FLAG_FUNC | BC_PARSE_FLAG_FUNC_INNER;
1401 bc_parse_startBody(p, flags);
1402
1403 bc_lex_next(&p->l);
1404
1405 // POSIX requires that a brace be on the same line as the function header.
1406 // If we don't have a brace, let POSIX throw an error.
1407 if (p->l.t != BC_LEX_LBRACE) bc_parse_err(p, BC_ERR_POSIX_BRACE);
1408 }
1409
1410 /**
1411 * Parse an auto list.
1412 * @param p The parser.
1413 */
bc_parse_auto(BcParse * p)1414 static void bc_parse_auto(BcParse *p) {
1415
1416 bool comma, one;
1417
1418 // Error if the auto keyword appeared in the wrong place.
1419 if (BC_ERR(!p->auto_part)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1420 bc_lex_next(&p->l);
1421
1422 p->auto_part = comma = false;
1423
1424 // We need at least one variable or array.
1425 one = (p->l.t == BC_LEX_NAME);
1426
1427 // While we have a variable or array.
1428 while (p->l.t == BC_LEX_NAME) {
1429
1430 BcType t;
1431
1432 // Copy the name from the lexer, so we can use it again.
1433 bc_vec_string(&p->buf, p->l.str.len - 1, p->l.str.v);
1434
1435 bc_lex_next(&p->l);
1436
1437 // If we are parsing an array...
1438 if (p->l.t == BC_LEX_LBRACKET) {
1439
1440 t = BC_TYPE_ARRAY;
1441
1442 bc_lex_next(&p->l);
1443
1444 // The brackets *must* be empty.
1445 if (BC_ERR(p->l.t != BC_LEX_RBRACKET))
1446 bc_parse_err(p, BC_ERR_PARSE_FUNC);
1447
1448 bc_lex_next(&p->l);
1449 }
1450 else t = BC_TYPE_VAR;
1451
1452 // Test for comma and get the next token if it exists.
1453 comma = (p->l.t == BC_LEX_COMMA);
1454 if (comma) bc_lex_next(&p->l);
1455
1456 // Insert the auto into the function.
1457 bc_func_insert(p->func, p->prog, p->buf.v, t, p->l.line);
1458 }
1459
1460 // If we have a comma, but no auto, barf.
1461 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_FUNC);
1462
1463 // If we don't have any variables or arrays, barf.
1464 if (BC_ERR(!one)) bc_parse_err(p, BC_ERR_PARSE_NO_AUTO);
1465
1466 // The auto statement should be all that's in the statement.
1467 if (BC_ERR(!bc_parse_isDelimiter(p)))
1468 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1469 }
1470
1471 /**
1472 * Parses a body.
1473 * @param p The parser.
1474 * @param brace True if a brace was encountered, false otherwise.
1475 */
bc_parse_body(BcParse * p,bool brace)1476 static void bc_parse_body(BcParse *p, bool brace) {
1477
1478 uint16_t *flag_ptr = BC_PARSE_TOP_FLAG_PTR(p);
1479
1480 assert(flag_ptr != NULL);
1481 assert(p->flags.len >= 2);
1482
1483 // The body flag is for when we expect a body. We got a body, so clear the
1484 // flag.
1485 *flag_ptr &= ~(BC_PARSE_FLAG_BODY);
1486
1487 // If we are inside a function, that means we just barely entered it, and
1488 // we can expect an auto list.
1489 if (*flag_ptr & BC_PARSE_FLAG_FUNC_INNER) {
1490
1491 // We *must* have a brace in this case.
1492 if (BC_ERR(!brace)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1493
1494 p->auto_part = (p->l.t != BC_LEX_KW_AUTO);
1495
1496 if (!p->auto_part) {
1497
1498 // Make sure this is true to not get a parse error.
1499 p->auto_part = true;
1500
1501 // Since we already have the auto keyword, parse.
1502 bc_parse_auto(p);
1503 }
1504
1505 // Eat a newline.
1506 if (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l);
1507 }
1508 else {
1509
1510 // This is the easy part.
1511 size_t len = p->flags.len;
1512
1513 assert(*flag_ptr);
1514
1515 // Parse a statement.
1516 bc_parse_stmt(p);
1517
1518 // This is a very important condition to get right. If there is no
1519 // brace, and no body flag, and the flags len hasn't shrunk, then we
1520 // have a body that was not delimited by braces, so we need to end it
1521 // now, after just one statement.
1522 if (!brace && !BC_PARSE_BODY(p) && len <= p->flags.len)
1523 bc_parse_endBody(p, false);
1524 }
1525 }
1526
1527 /**
1528 * Parses a statement. This is the entry point for just about everything, except
1529 * function definitions.
1530 * @param p The parser.
1531 */
bc_parse_stmt(BcParse * p)1532 static void bc_parse_stmt(BcParse *p) {
1533
1534 size_t len;
1535 uint16_t flags;
1536 BcLexType type = p->l.t;
1537
1538 // Eat newline.
1539 if (type == BC_LEX_NLINE) {
1540 bc_lex_next(&p->l);
1541 return;
1542 }
1543
1544 // Eat auto list.
1545 if (type == BC_LEX_KW_AUTO) {
1546 bc_parse_auto(p);
1547 return;
1548 }
1549
1550 // If we reach this point, no auto list is allowed.
1551 p->auto_part = false;
1552
1553 // Everything but an else needs to be taken care of here, but else is
1554 // special.
1555 if (type != BC_LEX_KW_ELSE) {
1556
1557 // After an if, no else found.
1558 if (BC_PARSE_IF_END(p)) {
1559
1560 // Clear the expectation for else, end body, and return. Returning
1561 // gives us a clean slate for parsing again.
1562 bc_parse_noElse(p);
1563 if (p->flags.len > 1 && !BC_PARSE_BRACE(p))
1564 bc_parse_endBody(p, false);
1565 return;
1566 }
1567 // With a left brace, we are parsing a body.
1568 else if (type == BC_LEX_LBRACE) {
1569
1570 // We need to start a body if we are not expecting one yet.
1571 if (!BC_PARSE_BODY(p)) {
1572 bc_parse_startBody(p, BC_PARSE_FLAG_BRACE);
1573 bc_lex_next(&p->l);
1574 }
1575 // If we *are* expecting a body, that body should get a brace. This
1576 // takes care of braces being on a different line than if and loop
1577 // headers.
1578 else {
1579 *(BC_PARSE_TOP_FLAG_PTR(p)) |= BC_PARSE_FLAG_BRACE;
1580 bc_lex_next(&p->l);
1581 bc_parse_body(p, true);
1582 }
1583
1584 // If we have reached this point, we need to return for a clean
1585 // slate.
1586 return;
1587 }
1588 // This happens when we are expecting a body and get a single statement,
1589 // i.e., a body with no braces surrounding it. Returns after for a clean
1590 // slate.
1591 else if (BC_PARSE_BODY(p) && !BC_PARSE_BRACE(p)) {
1592 bc_parse_body(p, false);
1593 return;
1594 }
1595 }
1596
1597 len = p->flags.len;
1598 flags = BC_PARSE_TOP_FLAG(p);
1599
1600 switch (type) {
1601
1602 // All of these are valid for expressions.
1603 case BC_LEX_OP_INC:
1604 case BC_LEX_OP_DEC:
1605 case BC_LEX_OP_MINUS:
1606 case BC_LEX_OP_BOOL_NOT:
1607 case BC_LEX_LPAREN:
1608 case BC_LEX_NAME:
1609 case BC_LEX_NUMBER:
1610 case BC_LEX_KW_IBASE:
1611 case BC_LEX_KW_LAST:
1612 case BC_LEX_KW_LENGTH:
1613 case BC_LEX_KW_OBASE:
1614 case BC_LEX_KW_SCALE:
1615 #if BC_ENABLE_EXTRA_MATH
1616 case BC_LEX_KW_SEED:
1617 #endif // BC_ENABLE_EXTRA_MATH
1618 case BC_LEX_KW_SQRT:
1619 case BC_LEX_KW_ABS:
1620 #if BC_ENABLE_EXTRA_MATH
1621 case BC_LEX_KW_IRAND:
1622 #endif // BC_ENABLE_EXTRA_MATH
1623 case BC_LEX_KW_ASCIIFY:
1624 case BC_LEX_KW_MODEXP:
1625 case BC_LEX_KW_DIVMOD:
1626 case BC_LEX_KW_READ:
1627 #if BC_ENABLE_EXTRA_MATH
1628 case BC_LEX_KW_RAND:
1629 #endif // BC_ENABLE_EXTRA_MATH
1630 case BC_LEX_KW_MAXIBASE:
1631 case BC_LEX_KW_MAXOBASE:
1632 case BC_LEX_KW_MAXSCALE:
1633 #if BC_ENABLE_EXTRA_MATH
1634 case BC_LEX_KW_MAXRAND:
1635 #endif // BC_ENABLE_EXTRA_MATH
1636 case BC_LEX_KW_LINE_LENGTH:
1637 case BC_LEX_KW_GLOBAL_STACKS:
1638 case BC_LEX_KW_LEADING_ZERO:
1639 {
1640 bc_parse_expr_status(p, BC_PARSE_PRINT, bc_parse_next_expr);
1641 break;
1642 }
1643
1644 case BC_LEX_KW_ELSE:
1645 {
1646 bc_parse_else(p);
1647 break;
1648 }
1649
1650 // Just eat.
1651 case BC_LEX_SCOLON:
1652 {
1653 // Do nothing.
1654 break;
1655 }
1656
1657 case BC_LEX_RBRACE:
1658 {
1659 bc_parse_endBody(p, true);
1660 break;
1661 }
1662
1663 case BC_LEX_STR:
1664 {
1665 bc_parse_str(p, BC_INST_PRINT_STR);
1666 break;
1667 }
1668
1669 case BC_LEX_KW_BREAK:
1670 case BC_LEX_KW_CONTINUE:
1671 {
1672 bc_parse_loopExit(p, p->l.t);
1673 break;
1674 }
1675
1676 case BC_LEX_KW_FOR:
1677 {
1678 bc_parse_for(p);
1679 break;
1680 }
1681
1682 case BC_LEX_KW_HALT:
1683 {
1684 bc_parse_push(p, BC_INST_HALT);
1685 bc_lex_next(&p->l);
1686 break;
1687 }
1688
1689 case BC_LEX_KW_IF:
1690 {
1691 bc_parse_if(p);
1692 break;
1693 }
1694
1695 case BC_LEX_KW_LIMITS:
1696 {
1697 // `limits` is a compile-time command, so execute it right away.
1698 bc_vm_printf("BC_LONG_BIT = %lu\n", (ulong) BC_LONG_BIT);
1699 bc_vm_printf("BC_BASE_DIGS = %lu\n", (ulong) BC_BASE_DIGS);
1700 bc_vm_printf("BC_BASE_POW = %lu\n", (ulong) BC_BASE_POW);
1701 bc_vm_printf("BC_OVERFLOW_MAX = %lu\n", (ulong) BC_NUM_BIGDIG_MAX);
1702 bc_vm_printf("\n");
1703 bc_vm_printf("BC_BASE_MAX = %lu\n", BC_MAX_OBASE);
1704 bc_vm_printf("BC_DIM_MAX = %lu\n", BC_MAX_DIM);
1705 bc_vm_printf("BC_SCALE_MAX = %lu\n", BC_MAX_SCALE);
1706 bc_vm_printf("BC_STRING_MAX = %lu\n", BC_MAX_STRING);
1707 bc_vm_printf("BC_NAME_MAX = %lu\n", BC_MAX_NAME);
1708 bc_vm_printf("BC_NUM_MAX = %lu\n", BC_MAX_NUM);
1709 #if BC_ENABLE_EXTRA_MATH
1710 bc_vm_printf("BC_RAND_MAX = %lu\n", BC_MAX_RAND);
1711 #endif // BC_ENABLE_EXTRA_MATH
1712 bc_vm_printf("MAX Exponent = %lu\n", BC_MAX_EXP);
1713 bc_vm_printf("Number of vars = %lu\n", BC_MAX_VARS);
1714
1715 bc_lex_next(&p->l);
1716
1717 break;
1718 }
1719
1720 case BC_LEX_KW_STREAM:
1721 case BC_LEX_KW_PRINT:
1722 {
1723 bc_parse_print(p, type);
1724 break;
1725 }
1726
1727 case BC_LEX_KW_QUIT:
1728 {
1729 // Quit is a compile-time command. We don't exit directly, so the vm
1730 // can clean up.
1731 vm.status = BC_STATUS_QUIT;
1732 BC_JMP;
1733 break;
1734 }
1735
1736 case BC_LEX_KW_RETURN:
1737 {
1738 bc_parse_return(p);
1739 break;
1740 }
1741
1742 case BC_LEX_KW_WHILE:
1743 {
1744 bc_parse_while(p);
1745 break;
1746 }
1747
1748 default:
1749 {
1750 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1751 }
1752 }
1753
1754 // If the flags did not change, we expect a delimiter.
1755 if (len == p->flags.len && flags == BC_PARSE_TOP_FLAG(p)) {
1756 if (BC_ERR(!bc_parse_isDelimiter(p)))
1757 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1758 }
1759
1760 // Make sure semicolons are eaten.
1761 while (p->l.t == BC_LEX_SCOLON) bc_lex_next(&p->l);
1762
1763 // POSIX's grammar does not allow a function definition after a semicolon
1764 // without a newline, so check specifically for that case and error if
1765 // the POSIX standard flag is set.
1766 if (p->l.last == BC_LEX_SCOLON && p->l.t == BC_LEX_KW_DEFINE && BC_IS_POSIX)
1767 {
1768 bc_parse_err(p, BC_ERR_POSIX_FUNC_AFTER_SEMICOLON);
1769 }
1770 }
1771
bc_parse_parse(BcParse * p)1772 void bc_parse_parse(BcParse *p) {
1773
1774 assert(p);
1775
1776 BC_SETJMP_LOCKED(exit);
1777
1778 // We should not let an EOF get here unless some partial parse was not
1779 // completed, in which case, it's the user's fault.
1780 if (BC_ERR(p->l.t == BC_LEX_EOF)) bc_parse_err(p, BC_ERR_PARSE_EOF);
1781
1782 // Functions need special parsing.
1783 else if (p->l.t == BC_LEX_KW_DEFINE) {
1784 if (BC_ERR(BC_PARSE_NO_EXEC(p))) {
1785 bc_parse_endif(p);
1786 if (BC_ERR(BC_PARSE_NO_EXEC(p)))
1787 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1788 }
1789 bc_parse_func(p);
1790 }
1791
1792 // Otherwise, parse a normal statement.
1793 else bc_parse_stmt(p);
1794
1795 exit:
1796
1797 // We need to reset on error.
1798 if (BC_ERR(((vm.status && vm.status != BC_STATUS_QUIT) || vm.sig)))
1799 bc_parse_reset(p);
1800
1801 BC_LONGJMP_CONT;
1802 BC_SIG_MAYLOCK;
1803 }
1804
1805 /**
1806 * Parse an expression. This is the actual implementation of the Shunting-Yard
1807 * Algorithm.
1808 * @param p The parser.
1809 * @param flags The flags for what is valid in the expression.
1810 * @param next A set of tokens for what is valid *after* the expression.
1811 * @return A parse status. In some places, an empty expression is an
1812 * error, and sometimes, it is required. This allows this function
1813 * to tell the caller if the expression was empty and let the
1814 * caller handle it.
1815 */
bc_parse_expr_err(BcParse * p,uint8_t flags,BcParseNext next)1816 static BcParseStatus bc_parse_expr_err(BcParse *p, uint8_t flags,
1817 BcParseNext next)
1818 {
1819 BcInst prev = BC_INST_PRINT;
1820 uchar inst = BC_INST_INVALID;
1821 BcLexType top, t;
1822 size_t nexprs, ops_bgn;
1823 uint32_t i, nparens, nrelops;
1824 bool pfirst, rprn, done, get_token, assign, bin_last, incdec, can_assign;
1825
1826 // One of these *must* be true.
1827 assert(!(flags & BC_PARSE_PRINT) || !(flags & BC_PARSE_NEEDVAL));
1828
1829 // These are set very carefully. In fact, controlling the values of these
1830 // locals is the biggest part of making this work. ops_bgn especially is
1831 // important because it marks where the operator stack begins for *this*
1832 // invocation of this function. That's because bc_parse_expr_err() is
1833 // recursive (the Shunting-Yard Algorithm is most easily expressed
1834 // recursively when parsing subexpressions), and each invocation needs to
1835 // know where to stop.
1836 //
1837 // - nparens is the number of left parens without matches.
1838 // - nrelops is the number of relational operators that appear in the expr.
1839 // - nexprs is the number of unused expressions.
1840 // - rprn is a right paren encountered last.
1841 // - done means the expression has been fully parsed.
1842 // - get_token is true when a token is needed at the end of an iteration.
1843 // - assign is true when an assignment statement was parsed last.
1844 // - incdec is true when the previous operator was an inc or dec operator.
1845 // - can_assign is true when an assignemnt is valid.
1846 // - bin_last is true when the previous instruction was a binary operator.
1847 t = p->l.t;
1848 pfirst = (p->l.t == BC_LEX_LPAREN);
1849 nparens = nrelops = 0;
1850 nexprs = 0;
1851 ops_bgn = p->ops.len;
1852 rprn = done = get_token = assign = incdec = can_assign = false;
1853 bin_last = true;
1854
1855 // We want to eat newlines if newlines are not a valid ending token.
1856 // This is for spacing in things like for loop headers.
1857 if (!(flags & BC_PARSE_NOREAD)) {
1858 while ((t = p->l.t) == BC_LEX_NLINE) bc_lex_next(&p->l);
1859 }
1860
1861 // This is the Shunting-Yard algorithm loop.
1862 for (; !done && BC_PARSE_EXPR(t); t = p->l.t)
1863 {
1864 switch (t) {
1865
1866 case BC_LEX_OP_INC:
1867 case BC_LEX_OP_DEC:
1868 {
1869 // These operators can only be used with items that can be
1870 // assigned to.
1871 if (BC_ERR(incdec)) bc_parse_err(p, BC_ERR_PARSE_ASSIGN);
1872
1873 bc_parse_incdec(p, &prev, &can_assign, &nexprs, flags);
1874
1875 rprn = get_token = bin_last = false;
1876 incdec = true;
1877 flags &= ~(BC_PARSE_ARRAY);
1878
1879 break;
1880 }
1881
1882 #if BC_ENABLE_EXTRA_MATH
1883 case BC_LEX_OP_TRUNC:
1884 {
1885 // The previous token must have been a leaf expression, or the
1886 // operator is in the wrong place.
1887 if (BC_ERR(!BC_PARSE_LEAF(prev, bin_last, rprn)))
1888 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1889
1890 // I can just add the instruction because
1891 // negative will already be taken care of.
1892 bc_parse_push(p, BC_INST_TRUNC);
1893
1894 rprn = can_assign = incdec = false;
1895 get_token = true;
1896 flags &= ~(BC_PARSE_ARRAY);
1897
1898 break;
1899 }
1900 #endif // BC_ENABLE_EXTRA_MATH
1901
1902 case BC_LEX_OP_MINUS:
1903 {
1904 bc_parse_minus(p, &prev, ops_bgn, rprn, bin_last, &nexprs);
1905
1906 rprn = get_token = can_assign = false;
1907
1908 // This is true if it was a binary operator last.
1909 bin_last = (prev == BC_INST_MINUS);
1910 if (bin_last) incdec = false;
1911
1912 flags &= ~(BC_PARSE_ARRAY);
1913
1914 break;
1915 }
1916
1917 // All of this group, including the fallthrough, is to parse binary
1918 // operators.
1919 case BC_LEX_OP_ASSIGN_POWER:
1920 case BC_LEX_OP_ASSIGN_MULTIPLY:
1921 case BC_LEX_OP_ASSIGN_DIVIDE:
1922 case BC_LEX_OP_ASSIGN_MODULUS:
1923 case BC_LEX_OP_ASSIGN_PLUS:
1924 case BC_LEX_OP_ASSIGN_MINUS:
1925 #if BC_ENABLE_EXTRA_MATH
1926 case BC_LEX_OP_ASSIGN_PLACES:
1927 case BC_LEX_OP_ASSIGN_LSHIFT:
1928 case BC_LEX_OP_ASSIGN_RSHIFT:
1929 #endif // BC_ENABLE_EXTRA_MATH
1930 case BC_LEX_OP_ASSIGN:
1931 {
1932 // We need to make sure the assignment is valid.
1933 if (!BC_PARSE_INST_VAR(prev))
1934 bc_parse_err(p, BC_ERR_PARSE_ASSIGN);
1935 }
1936 // Fallthrough.
1937 BC_FALLTHROUGH
1938
1939 case BC_LEX_OP_POWER:
1940 case BC_LEX_OP_MULTIPLY:
1941 case BC_LEX_OP_DIVIDE:
1942 case BC_LEX_OP_MODULUS:
1943 case BC_LEX_OP_PLUS:
1944 #if BC_ENABLE_EXTRA_MATH
1945 case BC_LEX_OP_PLACES:
1946 case BC_LEX_OP_LSHIFT:
1947 case BC_LEX_OP_RSHIFT:
1948 #endif // BC_ENABLE_EXTRA_MATH
1949 case BC_LEX_OP_REL_EQ:
1950 case BC_LEX_OP_REL_LE:
1951 case BC_LEX_OP_REL_GE:
1952 case BC_LEX_OP_REL_NE:
1953 case BC_LEX_OP_REL_LT:
1954 case BC_LEX_OP_REL_GT:
1955 case BC_LEX_OP_BOOL_NOT:
1956 case BC_LEX_OP_BOOL_OR:
1957 case BC_LEX_OP_BOOL_AND:
1958 {
1959 // This is true if the operator if the token is a prefix
1960 // operator. This is only for boolean not.
1961 if (BC_PARSE_OP_PREFIX(t)) {
1962
1963 // Prefix operators are only allowed after binary operators
1964 // or prefix operators.
1965 if (BC_ERR(!bin_last && !BC_PARSE_OP_PREFIX(p->l.last)))
1966 bc_parse_err(p, BC_ERR_PARSE_EXPR);
1967 }
1968 // If we execute the else, that means we have a binary operator.
1969 // If the previous operator was a prefix or a binary operator,
1970 // then a binary operator is not allowed.
1971 else if (BC_ERR(BC_PARSE_PREV_PREFIX(prev) || bin_last))
1972 bc_parse_err(p, BC_ERR_PARSE_EXPR);
1973
1974 nrelops += (t >= BC_LEX_OP_REL_EQ && t <= BC_LEX_OP_REL_GT);
1975 prev = BC_PARSE_TOKEN_INST(t);
1976
1977 bc_parse_operator(p, t, ops_bgn, &nexprs);
1978
1979 rprn = incdec = can_assign = false;
1980 get_token = true;
1981 bin_last = !BC_PARSE_OP_PREFIX(t);
1982 flags &= ~(BC_PARSE_ARRAY);
1983
1984 break;
1985 }
1986
1987 case BC_LEX_LPAREN:
1988 {
1989 // A left paren is *not* allowed right after a leaf expr.
1990 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
1991 bc_parse_err(p, BC_ERR_PARSE_EXPR);
1992
1993 nparens += 1;
1994 rprn = incdec = can_assign = false;
1995 get_token = true;
1996
1997 // Push the paren onto the operator stack.
1998 bc_vec_push(&p->ops, &t);
1999
2000 break;
2001 }
2002
2003 case BC_LEX_RPAREN:
2004 {
2005 // This needs to be a status. The error is handled in
2006 // bc_parse_expr_status().
2007 if (BC_ERR(p->l.last == BC_LEX_LPAREN))
2008 return BC_PARSE_STATUS_EMPTY_EXPR;
2009
2010 // The right paren must not come after a prefix or binary
2011 // operator.
2012 if (BC_ERR(bin_last || BC_PARSE_PREV_PREFIX(prev)))
2013 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2014
2015 // If there are no parens left, we are done, but we need another
2016 // token.
2017 if (!nparens) {
2018 done = true;
2019 get_token = false;
2020 break;
2021 }
2022
2023 nparens -= 1;
2024 rprn = true;
2025 get_token = bin_last = incdec = false;
2026
2027 bc_parse_rightParen(p, &nexprs);
2028
2029 break;
2030 }
2031
2032 case BC_LEX_STR:
2033 {
2034 // POSIX only allows strings alone.
2035 if (BC_IS_POSIX) bc_parse_err(p, BC_ERR_POSIX_EXPR_STRING);
2036
2037 // A string is a leaf and cannot come right after a leaf.
2038 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2039 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2040
2041 bc_parse_addString(p);
2042
2043 get_token = true;
2044 bin_last = rprn = false;
2045 nexprs += 1;
2046
2047 break;
2048 }
2049
2050 case BC_LEX_NAME:
2051 {
2052 // A name is a leaf and cannot come right after a leaf.
2053 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2054 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2055
2056 get_token = bin_last = false;
2057
2058 bc_parse_name(p, &prev, &can_assign, flags & ~BC_PARSE_NOCALL);
2059
2060 rprn = (prev == BC_INST_CALL);
2061 nexprs += 1;
2062 flags &= ~(BC_PARSE_ARRAY);
2063
2064 break;
2065 }
2066
2067 case BC_LEX_NUMBER:
2068 {
2069 // A number is a leaf and cannot come right after a leaf.
2070 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2071 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2072
2073 // The number instruction is pushed in here.
2074 bc_parse_number(p);
2075
2076 nexprs += 1;
2077 prev = BC_INST_NUM;
2078 get_token = true;
2079 rprn = bin_last = can_assign = false;
2080 flags &= ~(BC_PARSE_ARRAY);
2081
2082 break;
2083 }
2084
2085 case BC_LEX_KW_IBASE:
2086 case BC_LEX_KW_LAST:
2087 case BC_LEX_KW_OBASE:
2088 #if BC_ENABLE_EXTRA_MATH
2089 case BC_LEX_KW_SEED:
2090 #endif // BC_ENABLE_EXTRA_MATH
2091 {
2092 // All of these are leaves and cannot come right after a leaf.
2093 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2094 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2095
2096 prev = t - BC_LEX_KW_LAST + BC_INST_LAST;
2097 bc_parse_push(p, prev);
2098
2099 get_token = can_assign = true;
2100 rprn = bin_last = false;
2101 nexprs += 1;
2102 flags &= ~(BC_PARSE_ARRAY);
2103
2104 break;
2105 }
2106
2107 case BC_LEX_KW_LENGTH:
2108 case BC_LEX_KW_SQRT:
2109 case BC_LEX_KW_ABS:
2110 #if BC_ENABLE_EXTRA_MATH
2111 case BC_LEX_KW_IRAND:
2112 #endif // BC_ENABLE_EXTRA_MATH
2113 case BC_LEX_KW_ASCIIFY:
2114 {
2115 // All of these are leaves and cannot come right after a leaf.
2116 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2117 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2118
2119 bc_parse_builtin(p, t, flags, &prev);
2120
2121 rprn = get_token = bin_last = incdec = can_assign = false;
2122 nexprs += 1;
2123 flags &= ~(BC_PARSE_ARRAY);
2124
2125 break;
2126 }
2127
2128 case BC_LEX_KW_READ:
2129 #if BC_ENABLE_EXTRA_MATH
2130 case BC_LEX_KW_RAND:
2131 #endif // BC_ENABLE_EXTRA_MATH
2132 case BC_LEX_KW_MAXIBASE:
2133 case BC_LEX_KW_MAXOBASE:
2134 case BC_LEX_KW_MAXSCALE:
2135 #if BC_ENABLE_EXTRA_MATH
2136 case BC_LEX_KW_MAXRAND:
2137 #endif // BC_ENABLE_EXTRA_MATH
2138 case BC_LEX_KW_LINE_LENGTH:
2139 case BC_LEX_KW_GLOBAL_STACKS:
2140 case BC_LEX_KW_LEADING_ZERO:
2141 {
2142 // All of these are leaves and cannot come right after a leaf.
2143 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2144 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2145
2146 // Error if we have read and it's not allowed.
2147 else if (t == BC_LEX_KW_READ && BC_ERR(flags & BC_PARSE_NOREAD))
2148 bc_parse_err(p, BC_ERR_EXEC_REC_READ);
2149
2150 prev = t - BC_LEX_KW_READ + BC_INST_READ;
2151 bc_parse_noArgBuiltin(p, prev);
2152
2153 rprn = get_token = bin_last = incdec = can_assign = false;
2154 nexprs += 1;
2155 flags &= ~(BC_PARSE_ARRAY);
2156
2157 break;
2158 }
2159
2160 case BC_LEX_KW_SCALE:
2161 {
2162 // This is a leaf and cannot come right after a leaf.
2163 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2164 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2165
2166 // Scale needs special work because it can be a variable *or* a
2167 // function.
2168 bc_parse_scale(p, &prev, &can_assign, flags);
2169
2170 rprn = get_token = bin_last = false;
2171 nexprs += 1;
2172 flags &= ~(BC_PARSE_ARRAY);
2173
2174 break;
2175 }
2176
2177 case BC_LEX_KW_MODEXP:
2178 case BC_LEX_KW_DIVMOD:
2179 {
2180 // This is a leaf and cannot come right after a leaf.
2181 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2182 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2183
2184 bc_parse_builtin3(p, t, flags, &prev);
2185
2186 rprn = get_token = bin_last = incdec = can_assign = false;
2187 nexprs += 1;
2188 flags &= ~(BC_PARSE_ARRAY);
2189
2190 break;
2191 }
2192
2193 default:
2194 {
2195 #ifndef NDEBUG
2196 // We should never get here, even in debug builds.
2197 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
2198 break;
2199 #endif // NDEBUG
2200 }
2201 }
2202
2203 if (get_token) bc_lex_next(&p->l);
2204 }
2205
2206 // Now that we have parsed the expression, we need to empty the operator
2207 // stack.
2208 while (p->ops.len > ops_bgn) {
2209
2210 top = BC_PARSE_TOP_OP(p);
2211 assign = top >= BC_LEX_OP_ASSIGN_POWER && top <= BC_LEX_OP_ASSIGN;
2212
2213 // There should not be *any* parens on the stack anymore.
2214 if (BC_ERR(top == BC_LEX_LPAREN || top == BC_LEX_RPAREN))
2215 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2216
2217 bc_parse_push(p, BC_PARSE_TOKEN_INST(top));
2218
2219 // Adjust the number of unused expressions.
2220 nexprs -= !BC_PARSE_OP_PREFIX(top);
2221 bc_vec_pop(&p->ops);
2222
2223 incdec = false;
2224 }
2225
2226 // There must be only one expression at the top.
2227 if (BC_ERR(nexprs != 1)) bc_parse_err(p, BC_ERR_PARSE_EXPR);
2228
2229 // Check that the next token is correct.
2230 for (i = 0; i < next.len && t != next.tokens[i]; ++i);
2231 if (BC_ERR(i == next.len && !bc_parse_isDelimiter(p)))
2232 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2233
2234 // Check that POSIX would be happy with the number of relational operators.
2235 if (!(flags & BC_PARSE_REL) && nrelops)
2236 bc_parse_err(p, BC_ERR_POSIX_REL_POS);
2237 else if ((flags & BC_PARSE_REL) && nrelops > 1)
2238 bc_parse_err(p, BC_ERR_POSIX_MULTIREL);
2239
2240 // If this is true, then we might be in a situation where we don't print.
2241 // We would want to have the increment/decrement operator not make an extra
2242 // copy if it's not necessary.
2243 if (!(flags & BC_PARSE_NEEDVAL) && !pfirst) {
2244
2245 // We have the easy case if the last operator was an assignment
2246 // operator.
2247 if (assign) {
2248 inst = *((uchar*) bc_vec_top(&p->func->code));
2249 inst += (BC_INST_ASSIGN_POWER_NO_VAL - BC_INST_ASSIGN_POWER);
2250 incdec = false;
2251 }
2252 // If we have an inc/dec operator and we are *not* printing, implement
2253 // the optimization to get rid of the extra copy.
2254 else if (incdec && !(flags & BC_PARSE_PRINT)) {
2255 inst = *((uchar*) bc_vec_top(&p->func->code));
2256 incdec = (inst <= BC_INST_DEC);
2257 inst = BC_INST_ASSIGN_PLUS_NO_VAL + (inst != BC_INST_INC &&
2258 inst != BC_INST_ASSIGN_PLUS);
2259 }
2260
2261 // This condition allows us to change the previous assignment
2262 // instruction (which does a copy) for a NO_VAL version, which does not.
2263 // This condition is set if either of the above if statements ends up
2264 // being true.
2265 if (inst >= BC_INST_ASSIGN_POWER_NO_VAL &&
2266 inst <= BC_INST_ASSIGN_NO_VAL)
2267 {
2268 // Pop the previous assignment instruction and push a new one.
2269 // Inc/dec needs the extra instruction because it is now a binary
2270 // operator and needs a second operand.
2271 bc_vec_pop(&p->func->code);
2272 if (incdec) bc_parse_push(p, BC_INST_ONE);
2273 bc_parse_push(p, inst);
2274 }
2275 }
2276
2277 // If we might have to print...
2278 if ((flags & BC_PARSE_PRINT)) {
2279
2280 // With a paren first or the last operator not being an assignment, we
2281 // *do* want to print.
2282 if (pfirst || !assign) bc_parse_push(p, BC_INST_PRINT);
2283 }
2284 // We need to make sure to push a pop instruction for assignment statements
2285 // that will not print. The print will pop, but without it, we need to pop.
2286 else if (!(flags & BC_PARSE_NEEDVAL) &&
2287 (inst < BC_INST_ASSIGN_POWER_NO_VAL ||
2288 inst > BC_INST_ASSIGN_NO_VAL))
2289 {
2290 bc_parse_push(p, BC_INST_POP);
2291 }
2292
2293 // We want to eat newlines if newlines are not a valid ending token.
2294 // This is for spacing in things like for loop headers.
2295 //
2296 // Yes, this is one case where I reuse a variable for a different purpose;
2297 // in this case, incdec being true now means that newlines are not valid.
2298 for (incdec = true, i = 0; i < next.len && incdec; ++i)
2299 incdec = (next.tokens[i] != BC_LEX_NLINE);
2300 if (incdec) {
2301 while (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l);
2302 }
2303
2304 return BC_PARSE_STATUS_SUCCESS;
2305 }
2306
2307 /**
2308 * Parses an expression with bc_parse_expr_err(), but throws an error if it gets
2309 * an empty expression.
2310 * @param p The parser.
2311 * @param flags The flags for what is valid in the expression.
2312 * @param next A set of tokens for what is valid *after* the expression.
2313 */
bc_parse_expr_status(BcParse * p,uint8_t flags,BcParseNext next)2314 static void bc_parse_expr_status(BcParse *p, uint8_t flags, BcParseNext next) {
2315
2316 BcParseStatus s = bc_parse_expr_err(p, flags, next);
2317
2318 if (BC_ERR(s == BC_PARSE_STATUS_EMPTY_EXPR))
2319 bc_parse_err(p, BC_ERR_PARSE_EMPTY_EXPR);
2320 }
2321
bc_parse_expr(BcParse * p,uint8_t flags)2322 void bc_parse_expr(BcParse *p, uint8_t flags) {
2323 assert(p);
2324 bc_parse_expr_status(p, flags, bc_parse_next_read);
2325 }
2326 #endif // BC_ENABLED
2327