1 //! Lexical analysis for .clif files.
2 
3 use crate::error::Location;
4 use cranelift_codegen::ir::types;
5 use cranelift_codegen::ir::{Block, Value};
6 use std::str::CharIndices;
7 use std::u16;
8 
9 /// A Token returned from the `Lexer`.
10 ///
11 /// Some variants may contains references to the original source text, so the `Token` has the same
12 /// lifetime as the source.
13 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
14 pub enum Token<'a> {
15     Comment(&'a str),
16     LPar,                   // '('
17     RPar,                   // ')'
18     LBrace,                 // '{'
19     RBrace,                 // '}'
20     LBracket,               // '['
21     RBracket,               // ']'
22     LAngle,                 // '<'
23     RAngle,                 // '>'
24     Minus,                  // '-'
25     Plus,                   // '+'
26     Multiply,               // '*'
27     Comma,                  // ','
28     Dot,                    // '.'
29     Colon,                  // ':'
30     Equal,                  // '='
31     Bang,                   // '!'
32     At,                     // '@'
33     Arrow,                  // '->'
34     Float(&'a str),         // Floating point immediate
35     Integer(&'a str),       // Integer immediate
36     Type(types::Type),      // i32, f32, i32x4, ...
37     DynamicType(u32),       // dt5
38     Value(Value),           // v12, v7
39     Block(Block),           // block3
40     Cold,                   // cold (flag on block)
41     StackSlot(u32),         // ss3
42     DynamicStackSlot(u32),  // dss4
43     GlobalValue(u32),       // gv3
44     MemoryType(u32),        // mt0
45     Constant(u32),          // const2
46     FuncRef(u32),           // fn2
47     SigRef(u32),            // sig2
48     UserRef(u32),           // u345
49     UserNameRef(u32),       // userextname345
50     ExceptionTableRef(u32), // ex123
51     ExceptionTag(u32),      // tag123
52     TryCallRet(u32),        // ret123
53     TryCallExn(u32),        // exn123
54     Name(&'a str),          // %9arbitrary_alphanum, %x3, %0, %function ...
55     String(&'a str),        // "arbitrary quoted string with no escape" ...
56     HexSequence(&'a str),   // #89AF
57     Identifier(&'a str),    // Unrecognized identifier (opcode, enumerator, ...)
58     SourceLoc(&'a str),     // @00c7
59 }
60 
61 /// A `Token` with an associated location.
62 #[derive(Debug, PartialEq, Eq)]
63 pub struct LocatedToken<'a> {
64     pub token: Token<'a>,
65     pub location: Location,
66 }
67 
68 /// Wrap up a `Token` with the given location.
token(token: Token, loc: Location) -> Result<LocatedToken, LocatedError>69 fn token(token: Token, loc: Location) -> Result<LocatedToken, LocatedError> {
70     Ok(LocatedToken {
71         token,
72         location: loc,
73     })
74 }
75 
76 /// An error from the lexical analysis.
77 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
78 pub enum LexError {
79     InvalidChar,
80 }
81 
82 /// A `LexError` with an associated Location.
83 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
84 pub struct LocatedError {
85     pub error: LexError,
86     pub location: Location,
87 }
88 
89 /// Wrap up a `LexError` with the given location.
error<'a>(error: LexError, loc: Location) -> Result<LocatedToken<'a>, LocatedError>90 fn error<'a>(error: LexError, loc: Location) -> Result<LocatedToken<'a>, LocatedError> {
91     Err(LocatedError {
92         error,
93         location: loc,
94     })
95 }
96 
97 /// Get the number of decimal digits at the end of `s`.
trailing_digits(s: &str) -> usize98 fn trailing_digits(s: &str) -> usize {
99     // It's faster to iterate backwards over bytes, and we're only counting ASCII digits.
100     s.as_bytes()
101         .iter()
102         .rev()
103         .take_while(|&&b| b'0' <= b && b <= b'9')
104         .count()
105 }
106 
107 /// Pre-parse a supposed entity name by splitting it into two parts: A head of lowercase ASCII
108 /// letters and numeric tail.
split_entity_name(name: &str) -> Option<(&str, u32)>109 pub fn split_entity_name(name: &str) -> Option<(&str, u32)> {
110     let (head, tail) = name.split_at(name.len() - trailing_digits(name));
111     if tail.len() > 1 && tail.starts_with('0') {
112         None
113     } else {
114         tail.parse().ok().map(|n| (head, n))
115     }
116 }
117 
118 /// Lexical analysis.
119 ///
120 /// A `Lexer` reads text from a `&str` and provides a sequence of tokens.
121 ///
122 /// Also keep track of a line number for error reporting.
123 ///
124 pub struct Lexer<'a> {
125     // Complete source being processed.
126     source: &'a str,
127 
128     // Iterator into `source`.
129     chars: CharIndices<'a>,
130 
131     // Next character to be processed, or `None` at the end.
132     lookahead: Option<char>,
133 
134     // Index into `source` of lookahead character.
135     pos: usize,
136 
137     // Current line number.
138     line_number: usize,
139 }
140 
141 impl<'a> Lexer<'a> {
new(s: &'a str) -> Self142     pub fn new(s: &'a str) -> Self {
143         let mut lex = Self {
144             source: s,
145             chars: s.char_indices(),
146             lookahead: None,
147             pos: 0,
148             line_number: 1,
149         };
150         // Advance to the first char.
151         lex.next_ch();
152         lex
153     }
154 
155     // Advance to the next character.
156     // Return the next lookahead character, or None when the end is encountered.
157     // Always update cur_ch to reflect
next_ch(&mut self) -> Option<char>158     fn next_ch(&mut self) -> Option<char> {
159         if self.lookahead == Some('\n') {
160             self.line_number += 1;
161         }
162         match self.chars.next() {
163             Some((idx, ch)) => {
164                 self.pos = idx;
165                 self.lookahead = Some(ch);
166             }
167             None => {
168                 self.pos = self.source.len();
169                 self.lookahead = None;
170             }
171         }
172         self.lookahead
173     }
174 
175     // Get the location corresponding to `lookahead`.
loc(&self) -> Location176     fn loc(&self) -> Location {
177         Location {
178             line_number: self.line_number,
179         }
180     }
181 
182     // Starting from `lookahead`, are we looking at `prefix`?
looking_at(&self, prefix: &str) -> bool183     fn looking_at(&self, prefix: &str) -> bool {
184         self.source[self.pos..].starts_with(prefix)
185     }
186 
187     // Starting from `lookahead`, are we looking at a number?
looking_at_numeric(&self) -> bool188     fn looking_at_numeric(&self) -> bool {
189         if let Some(c) = self.lookahead {
190             match c {
191                 '0'..='9' => return true,
192                 '-' => return true,
193                 '+' => return true,
194                 '.' => return true,
195                 _ => {}
196             }
197             if self.looking_at("NaN") || self.looking_at("Inf") || self.looking_at("sNaN") {
198                 return true;
199             }
200         }
201         false
202     }
203 
204     // Scan a single-char token.
scan_char(&mut self, tok: Token<'a>) -> Result<LocatedToken<'a>, LocatedError>205     fn scan_char(&mut self, tok: Token<'a>) -> Result<LocatedToken<'a>, LocatedError> {
206         assert_ne!(self.lookahead, None);
207         let loc = self.loc();
208         self.next_ch();
209         token(tok, loc)
210     }
211 
212     // Scan a multi-char token.
scan_chars( &mut self, count: usize, tok: Token<'a>, ) -> Result<LocatedToken<'a>, LocatedError>213     fn scan_chars(
214         &mut self,
215         count: usize,
216         tok: Token<'a>,
217     ) -> Result<LocatedToken<'a>, LocatedError> {
218         let loc = self.loc();
219         for _ in 0..count {
220             assert_ne!(self.lookahead, None);
221             self.next_ch();
222         }
223         token(tok, loc)
224     }
225 
226     /// Get the rest of the current line.
227     /// The next token returned by `next()` will be from the following lines.
rest_of_line(&mut self) -> &'a str228     pub fn rest_of_line(&mut self) -> &'a str {
229         let begin = self.pos;
230         loop {
231             match self.next_ch() {
232                 None | Some('\n') => return &self.source[begin..self.pos],
233                 _ => {}
234             }
235         }
236     }
237 
238     // Scan a comment extending to the end of the current line.
scan_comment(&mut self) -> Result<LocatedToken<'a>, LocatedError>239     fn scan_comment(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
240         let loc = self.loc();
241         let text = self.rest_of_line();
242         token(Token::Comment(text), loc)
243     }
244 
245     // Scan a number token which can represent either an integer or floating point number.
246     //
247     // Accept the following forms:
248     //
249     // - `10`: Integer
250     // - `-10`: Integer
251     // - `0xff_00`: Integer
252     // - `0.0`: Float
253     // - `0x1.f`: Float
254     // - `-0x2.4`: Float
255     // - `0x0.4p-34`: Float
256     //
257     // This function does not filter out all invalid numbers. It depends in the context-sensitive
258     // decoding of the text for that. For example, the number of allowed digits in an `Ieee32` and
259     // an `Ieee64` constant are different.
scan_number(&mut self) -> Result<LocatedToken<'a>, LocatedError>260     fn scan_number(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
261         let begin = self.pos;
262         let loc = self.loc();
263         let mut is_float = false;
264 
265         // Skip a leading sign.
266         match self.lookahead {
267             Some('-') => {
268                 self.next_ch();
269                 if !self.looking_at_numeric() {
270                     // If the next characters won't parse as a number, we return Token::Minus
271                     return token(Token::Minus, loc);
272                 }
273             }
274             Some('+') => {
275                 self.next_ch();
276                 if !self.looking_at_numeric() {
277                     // If the next characters won't parse as a number, we return Token::Plus
278                     return token(Token::Plus, loc);
279                 }
280             }
281             _ => {}
282         }
283 
284         // Check for NaNs with payloads.
285         if self.looking_at("NaN:") || self.looking_at("sNaN:") {
286             // Skip the `NaN:` prefix, the loop below won't accept it.
287             // We expect a hexadecimal number to follow the colon.
288             while self.next_ch() != Some(':') {}
289             is_float = true;
290         } else if self.looking_at("NaN") || self.looking_at("Inf") {
291             // This is Inf or a default quiet NaN.
292             is_float = true;
293         }
294 
295         // Look for the end of this number. Detect the radix point if there is one.
296         loop {
297             match self.next_ch() {
298                 Some('-') | Some('_') => {}
299                 Some('.') => is_float = true,
300                 Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
301                 _ => break,
302             }
303         }
304         let text = &self.source[begin..self.pos];
305         if is_float {
306             token(Token::Float(text), loc)
307         } else {
308             token(Token::Integer(text), loc)
309         }
310     }
311 
312     // Scan a 'word', which is an identifier-like sequence of characters beginning with '_' or an
313     // alphabetic char, followed by zero or more alphanumeric or '_' characters.
scan_word(&mut self) -> Result<LocatedToken<'a>, LocatedError>314     fn scan_word(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
315         let begin = self.pos;
316         let loc = self.loc();
317 
318         assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_ascii_alphabetic());
319         loop {
320             match self.next_ch() {
321                 Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
322                 _ => break,
323             }
324         }
325         let text = &self.source[begin..self.pos];
326 
327         // Look for numbered well-known entities like block15, v45, ...
328         token(
329             split_entity_name(text)
330                 .and_then(|(prefix, number)| {
331                     Self::numbered_entity(prefix, number)
332                         .or_else(|| Self::value_type(text, prefix, number))
333                 })
334                 .unwrap_or_else(|| match text {
335                     "cold" => Token::Cold,
336                     _ => Token::Identifier(text),
337                 }),
338             loc,
339         )
340     }
341 
342     // If prefix is a well-known entity prefix and suffix is a valid entity number, return the
343     // decoded token.
numbered_entity(prefix: &str, number: u32) -> Option<Token<'a>>344     fn numbered_entity(prefix: &str, number: u32) -> Option<Token<'a>> {
345         match prefix {
346             "v" => Value::with_number(number).map(Token::Value),
347             "block" => Block::with_number(number).map(Token::Block),
348             "ss" => Some(Token::StackSlot(number)),
349             "dss" => Some(Token::DynamicStackSlot(number)),
350             "dt" => Some(Token::DynamicType(number)),
351             "gv" => Some(Token::GlobalValue(number)),
352             "mt" => Some(Token::MemoryType(number)),
353             "const" => Some(Token::Constant(number)),
354             "fn" => Some(Token::FuncRef(number)),
355             "sig" => Some(Token::SigRef(number)),
356             "u" => Some(Token::UserRef(number)),
357             "userextname" => Some(Token::UserNameRef(number)),
358             "extable" => Some(Token::ExceptionTableRef(number)),
359             "tag" => Some(Token::ExceptionTag(number)),
360             "ret" => Some(Token::TryCallRet(number)),
361             "exn" => Some(Token::TryCallExn(number)),
362             _ => None,
363         }
364     }
365 
366     // Recognize a scalar or vector type.
value_type(text: &str, prefix: &str, number: u32) -> Option<Token<'a>>367     fn value_type(text: &str, prefix: &str, number: u32) -> Option<Token<'a>> {
368         let is_vector = prefix.ends_with('x');
369         let scalar = if is_vector {
370             &prefix[0..prefix.len() - 1]
371         } else {
372             text
373         };
374         let base_type = match scalar {
375             "i8" => types::I8,
376             "i16" => types::I16,
377             "i32" => types::I32,
378             "i64" => types::I64,
379             "i128" => types::I128,
380             "f16" => types::F16,
381             "f32" => types::F32,
382             "f64" => types::F64,
383             "f128" => types::F128,
384             _ => return None,
385         };
386         if is_vector {
387             if number <= u32::from(u16::MAX) {
388                 base_type.by(number).map(Token::Type)
389             } else {
390                 None
391             }
392         } else {
393             Some(Token::Type(base_type))
394         }
395     }
396 
scan_name(&mut self) -> Result<LocatedToken<'a>, LocatedError>397     fn scan_name(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
398         let loc = self.loc();
399         let begin = self.pos + 1;
400 
401         assert_eq!(self.lookahead, Some('%'));
402 
403         loop {
404             match self.next_ch() {
405                 Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
406                 _ => break,
407             }
408         }
409 
410         let end = self.pos;
411         token(Token::Name(&self.source[begin..end]), loc)
412     }
413 
414     /// Scan for a multi-line quoted string with no escape character.
scan_string(&mut self) -> Result<LocatedToken<'a>, LocatedError>415     fn scan_string(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
416         let loc = self.loc();
417         let begin = self.pos + 1;
418 
419         assert_eq!(self.lookahead, Some('"'));
420 
421         while let Some(c) = self.next_ch() {
422             if c == '"' {
423                 break;
424             }
425         }
426 
427         let end = self.pos;
428         if self.lookahead != Some('"') {
429             return error(LexError::InvalidChar, self.loc());
430         }
431         self.next_ch();
432         token(Token::String(&self.source[begin..end]), loc)
433     }
434 
scan_hex_sequence(&mut self) -> Result<LocatedToken<'a>, LocatedError>435     fn scan_hex_sequence(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
436         let loc = self.loc();
437         let begin = self.pos + 1;
438 
439         assert_eq!(self.lookahead, Some('#'));
440 
441         while let Some(c) = self.next_ch() {
442             if !char::is_digit(c, 16) {
443                 break;
444             }
445         }
446 
447         let end = self.pos;
448         token(Token::HexSequence(&self.source[begin..end]), loc)
449     }
450 
451     /// Given that we've consumed an `@` character, are we looking at a source
452     /// location?
looking_at_srcloc(&self) -> bool453     fn looking_at_srcloc(&self) -> bool {
454         match self.lookahead {
455             Some(c) => char::is_digit(c, 16),
456             _ => false,
457         }
458     }
459 
scan_srcloc(&mut self, pos: usize, loc: Location) -> Result<LocatedToken<'a>, LocatedError>460     fn scan_srcloc(&mut self, pos: usize, loc: Location) -> Result<LocatedToken<'a>, LocatedError> {
461         let begin = pos + 1;
462         while let Some(c) = self.next_ch() {
463             if !char::is_digit(c, 16) {
464                 break;
465             }
466         }
467 
468         let end = self.pos;
469         token(Token::SourceLoc(&self.source[begin..end]), loc)
470     }
471 
472     /// Get the next token or a lexical error.
473     ///
474     /// Return None when the end of the source is encountered.
next(&mut self) -> Option<Result<LocatedToken<'a>, LocatedError>>475     pub fn next(&mut self) -> Option<Result<LocatedToken<'a>, LocatedError>> {
476         loop {
477             let loc = self.loc();
478             return match self.lookahead {
479                 None => None,
480                 Some(';') => Some(self.scan_comment()),
481                 Some('(') => Some(self.scan_char(Token::LPar)),
482                 Some(')') => Some(self.scan_char(Token::RPar)),
483                 Some('{') => Some(self.scan_char(Token::LBrace)),
484                 Some('}') => Some(self.scan_char(Token::RBrace)),
485                 Some('[') => Some(self.scan_char(Token::LBracket)),
486                 Some(']') => Some(self.scan_char(Token::RBracket)),
487                 Some('<') => Some(self.scan_char(Token::LAngle)),
488                 Some('>') => Some(self.scan_char(Token::RAngle)),
489                 Some(',') => Some(self.scan_char(Token::Comma)),
490                 Some('.') => Some(self.scan_char(Token::Dot)),
491                 Some(':') => Some(self.scan_char(Token::Colon)),
492                 Some('=') => Some(self.scan_char(Token::Equal)),
493                 Some('!') => Some(self.scan_char(Token::Bang)),
494                 Some('+') => Some(self.scan_number()),
495                 Some('*') => Some(self.scan_char(Token::Multiply)),
496                 Some('-') => {
497                     if self.looking_at("->") {
498                         Some(self.scan_chars(2, Token::Arrow))
499                     } else {
500                         Some(self.scan_number())
501                     }
502                 }
503                 Some('0'..='9') => Some(self.scan_number()),
504                 Some('a'..='z') | Some('A'..='Z') => {
505                     if self.looking_at("NaN") || self.looking_at("Inf") {
506                         Some(self.scan_number())
507                     } else {
508                         Some(self.scan_word())
509                     }
510                 }
511                 Some('%') => Some(self.scan_name()),
512                 Some('"') => Some(self.scan_string()),
513                 Some('#') => Some(self.scan_hex_sequence()),
514                 Some('@') => {
515                     let pos = self.pos;
516                     let loc = self.loc();
517                     self.next_ch();
518                     if self.looking_at_srcloc() {
519                         Some(self.scan_srcloc(pos, loc))
520                     } else {
521                         Some(token(Token::At, loc))
522                     }
523                 }
524                 // all ascii whitespace
525                 Some(' ') | Some('\x09'..='\x0d') => {
526                     self.next_ch();
527                     continue;
528                 }
529                 _ => {
530                     // Skip invalid char, return error.
531                     self.next_ch();
532                     Some(error(LexError::InvalidChar, loc))
533                 }
534             };
535         }
536     }
537 }
538 
539 #[cfg(test)]
540 mod tests {
541     use super::*;
542 
543     #[test]
digits()544     fn digits() {
545         assert_eq!(trailing_digits(""), 0);
546         assert_eq!(trailing_digits("x"), 0);
547         assert_eq!(trailing_digits("0x"), 0);
548         assert_eq!(trailing_digits("x1"), 1);
549         assert_eq!(trailing_digits("1x1"), 1);
550         assert_eq!(trailing_digits("1x01"), 2);
551     }
552 
553     #[test]
entity_name()554     fn entity_name() {
555         assert_eq!(split_entity_name(""), None);
556         assert_eq!(split_entity_name("x"), None);
557         assert_eq!(split_entity_name("x+"), None);
558         assert_eq!(split_entity_name("x+1"), Some(("x+", 1)));
559         assert_eq!(split_entity_name("x-1"), Some(("x-", 1)));
560         assert_eq!(split_entity_name("1"), Some(("", 1)));
561         assert_eq!(split_entity_name("x1"), Some(("x", 1)));
562         assert_eq!(split_entity_name("xy0"), Some(("xy", 0)));
563         // Reject this non-canonical form.
564         assert_eq!(split_entity_name("inst01"), None);
565     }
566 
token<'a>(token: Token<'a>, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>>567     fn token<'a>(token: Token<'a>, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
568         Some(super::token(token, Location { line_number: line }))
569     }
570 
error<'a>(error: LexError, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>>571     fn error<'a>(error: LexError, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
572         Some(super::error(error, Location { line_number: line }))
573     }
574 
575     #[test]
make_lexer()576     fn make_lexer() {
577         let mut l1 = Lexer::new("");
578         let mut l2 = Lexer::new(" ");
579         let mut l3 = Lexer::new("\n ");
580 
581         assert_eq!(l1.next(), None);
582         assert_eq!(l2.next(), None);
583         assert_eq!(l3.next(), None);
584     }
585 
586     #[test]
lex_comment()587     fn lex_comment() {
588         let mut lex = Lexer::new("; hello");
589         assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
590         assert_eq!(lex.next(), None);
591 
592         lex = Lexer::new("\n  ;hello\n;foo");
593         assert_eq!(lex.next(), token(Token::Comment(";hello"), 2));
594         assert_eq!(lex.next(), token(Token::Comment(";foo"), 3));
595         assert_eq!(lex.next(), None);
596 
597         // Scan a comment after an invalid char.
598         let mut lex = Lexer::new("$; hello");
599         assert_eq!(lex.next(), error(LexError::InvalidChar, 1));
600         assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
601         assert_eq!(lex.next(), None);
602     }
603 
604     #[test]
lex_chars()605     fn lex_chars() {
606         let mut lex = Lexer::new("(); hello\n = :{, }.");
607         assert_eq!(lex.next(), token(Token::LPar, 1));
608         assert_eq!(lex.next(), token(Token::RPar, 1));
609         assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
610         assert_eq!(lex.next(), token(Token::Equal, 2));
611         assert_eq!(lex.next(), token(Token::Colon, 2));
612         assert_eq!(lex.next(), token(Token::LBrace, 2));
613         assert_eq!(lex.next(), token(Token::Comma, 2));
614         assert_eq!(lex.next(), token(Token::RBrace, 2));
615         assert_eq!(lex.next(), token(Token::Dot, 2));
616         assert_eq!(lex.next(), None);
617     }
618 
619     #[test]
lex_numbers()620     fn lex_numbers() {
621         let mut lex = Lexer::new(" 0 2_000 -1,0xf -0x0 0.0 0x0.4p-34 NaN +5");
622         assert_eq!(lex.next(), token(Token::Integer("0"), 1));
623         assert_eq!(lex.next(), token(Token::Integer("2_000"), 1));
624         assert_eq!(lex.next(), token(Token::Integer("-1"), 1));
625         assert_eq!(lex.next(), token(Token::Comma, 1));
626         assert_eq!(lex.next(), token(Token::Integer("0xf"), 1));
627         assert_eq!(lex.next(), token(Token::Integer("-0x0"), 1));
628         assert_eq!(lex.next(), token(Token::Float("0.0"), 1));
629         assert_eq!(lex.next(), token(Token::Float("0x0.4p-34"), 1));
630         assert_eq!(lex.next(), token(Token::Float("NaN"), 1));
631         assert_eq!(lex.next(), token(Token::Integer("+5"), 1));
632         assert_eq!(lex.next(), None);
633     }
634 
635     #[test]
lex_identifiers()636     fn lex_identifiers() {
637         let mut lex = Lexer::new(
638             "v0 v00 vx01 block1234567890 block5234567890 v1x vx1 vxvx4 \
639              function0 function i8 i32x4 f32x5 f16 f128",
640         );
641         assert_eq!(
642             lex.next(),
643             token(Token::Value(Value::with_number(0).unwrap()), 1)
644         );
645         assert_eq!(lex.next(), token(Token::Identifier("v00"), 1));
646         assert_eq!(lex.next(), token(Token::Identifier("vx01"), 1));
647         assert_eq!(
648             lex.next(),
649             token(Token::Block(Block::with_number(1234567890).unwrap()), 1)
650         );
651         assert_eq!(lex.next(), token(Token::Identifier("block5234567890"), 1));
652         assert_eq!(lex.next(), token(Token::Identifier("v1x"), 1));
653         assert_eq!(lex.next(), token(Token::Identifier("vx1"), 1));
654         assert_eq!(lex.next(), token(Token::Identifier("vxvx4"), 1));
655         assert_eq!(lex.next(), token(Token::Identifier("function0"), 1));
656         assert_eq!(lex.next(), token(Token::Identifier("function"), 1));
657         assert_eq!(lex.next(), token(Token::Type(types::I8), 1));
658         assert_eq!(lex.next(), token(Token::Type(types::I32X4), 1));
659         assert_eq!(lex.next(), token(Token::Identifier("f32x5"), 1));
660         assert_eq!(lex.next(), token(Token::Type(types::F16), 1));
661         assert_eq!(lex.next(), token(Token::Type(types::F128), 1));
662         assert_eq!(lex.next(), None);
663     }
664 
665     #[test]
lex_hex_sequences()666     fn lex_hex_sequences() {
667         let mut lex = Lexer::new("#0 #DEADbeef123 #789");
668 
669         assert_eq!(lex.next(), token(Token::HexSequence("0"), 1));
670         assert_eq!(lex.next(), token(Token::HexSequence("DEADbeef123"), 1));
671         assert_eq!(lex.next(), token(Token::HexSequence("789"), 1));
672     }
673 
674     #[test]
lex_names()675     fn lex_names() {
676         let mut lex = Lexer::new("%0 %x3 %function %123_abc %ss0 %v3 %block11 %const42 %_");
677 
678         assert_eq!(lex.next(), token(Token::Name("0"), 1));
679         assert_eq!(lex.next(), token(Token::Name("x3"), 1));
680         assert_eq!(lex.next(), token(Token::Name("function"), 1));
681         assert_eq!(lex.next(), token(Token::Name("123_abc"), 1));
682         assert_eq!(lex.next(), token(Token::Name("ss0"), 1));
683         assert_eq!(lex.next(), token(Token::Name("v3"), 1));
684         assert_eq!(lex.next(), token(Token::Name("block11"), 1));
685         assert_eq!(lex.next(), token(Token::Name("const42"), 1));
686         assert_eq!(lex.next(), token(Token::Name("_"), 1));
687     }
688 
689     #[test]
lex_strings()690     fn lex_strings() {
691         let mut lex = Lexer::new(
692             r#"""  "0" "x3""function" "123 abc" "\" "start
693                     and end on
694                     different lines" "#,
695         );
696 
697         assert_eq!(lex.next(), token(Token::String(""), 1));
698         assert_eq!(lex.next(), token(Token::String("0"), 1));
699         assert_eq!(lex.next(), token(Token::String("x3"), 1));
700         assert_eq!(lex.next(), token(Token::String("function"), 1));
701         assert_eq!(lex.next(), token(Token::String("123 abc"), 1));
702         assert_eq!(lex.next(), token(Token::String(r#"\"#), 1));
703         assert_eq!(
704             lex.next(),
705             token(
706                 Token::String(
707                     r#"start
708                     and end on
709                     different lines"#
710                 ),
711                 1
712             )
713         );
714     }
715 
716     #[test]
lex_userrefs()717     fn lex_userrefs() {
718         let mut lex = Lexer::new("u0 u1 u234567890 u9:8765");
719 
720         assert_eq!(lex.next(), token(Token::UserRef(0), 1));
721         assert_eq!(lex.next(), token(Token::UserRef(1), 1));
722         assert_eq!(lex.next(), token(Token::UserRef(234567890), 1));
723         assert_eq!(lex.next(), token(Token::UserRef(9), 1));
724         assert_eq!(lex.next(), token(Token::Colon, 1));
725         assert_eq!(lex.next(), token(Token::Integer("8765"), 1));
726         assert_eq!(lex.next(), None);
727     }
728 }
729