Skip to main content

cl_parser/
parser.rs

1//! The parser takes a stream of [`Token`]s from the [`Lexer`], and turns them into [`cl_ast::ast`]
2//! nodes.
3
4pub mod expr;
5pub mod pat;
6
7pub mod error;
8
9use cl_ast::{types::*, *};
10use cl_lexer::{LexError, LexFailure, Lexer};
11use cl_structures::span::Span;
12use cl_token::{Lexeme, TKind, Token};
13pub use error::{EOF, PResult, PResultExt, ParseError, no_eof};
14
15/// Parse an expression from a [Parser]'s token stream at a given precedence level
16pub trait Parse<'t> {
17    /// The possible precedence `level`s for this parser implementation
18    type Prec: Copy + Default;
19
20    /// Parses `Self` from the tokens (and extra data) held in a [Parser]
21    fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self>
22    where Self: Sized;
23}
24
25/// Handles stateful extraction from a [Lexer], with single-[Token] lookahead.
26#[derive(Debug)]
27pub struct Parser<'t> {
28    /// A stream of tokens produced from some source text
29    pub lexer: Lexer<'t>,
30
31    /// The currently-peeked output from the [Lexer]
32    pub next_tok: Option<PResult<Token>>,
33
34    /// The span of the last-consumed [Token]
35    pub last_loc: Span,
36
37    /// Whether the last-consumed [Token] can stand in for a semicolon in a `do` sequence
38    pub elide_do: bool,
39}
40
41impl<'t> Parser<'t> {
42    /// Constructs a new Parser
43    pub fn new(lexer: Lexer<'t>) -> Self {
44        Self { last_loc: lexer.span(), lexer, next_tok: None, elide_do: false }
45    }
46
47    /// The identity function. This exists to make production chaining easier.
48    pub const fn then<T>(&self, t: T) -> T {
49        t
50    }
51
52    /// Gets the [struct@Span] of the last-consumed [Token]
53    pub const fn span(&self) -> Span {
54        self.last_loc
55    }
56
57    /// Parses a value that implements the [Parse] trait.
58    pub fn parse<T: Parse<'t>>(&mut self, level: T::Prec) -> PResult<T> {
59        Parse::parse(self, level)
60    }
61
62    /// Parses a value that implements the [Parse] trait, and asserts the entire input
63    /// has been consumed.
64    pub fn parse_entire<T: Parse<'t>>(&mut self, level: T::Prec) -> PResult<T> {
65        let out = Parse::parse(self, level);
66        match self.peek().allow_eof()? {
67            Some(t) => Err(ParseError::ExpectedEOF(t.kind, t.span)),
68            None => out,
69        }
70    }
71
72    /// Peeks the next [`Token`]. Returns [`ParseError::FromLexer`] on lexer error.
73    pub fn peek(&mut self) -> PResult<&Token> {
74        let next_tok = match self.next_tok.take() {
75            Some(tok) => tok,
76            None => loop {
77                match self.lexer.scan() {
78                    Ok(Token { kind: TKind::Comment, .. }) => {}
79                    Ok(tok) => break Ok(tok),
80                    Err(LexError { pos, res: LexFailure::EOF }) => Err(ParseError::EOF(pos))?,
81                    Err(e) => break Err(ParseError::FromLexer(e)),
82                }
83            },
84        };
85        let next_tok = self.next_tok.insert(next_tok);
86        next_tok.as_ref().map_err(|e| *e)
87    }
88
89    /// Peeks the next token if it matches the `expected` [`TKind`]
90    pub fn peek_if(&mut self, expected: TKind) -> PResult<Option<&Token>> {
91        match self.peek() {
92            Ok(tok) if tok.kind == expected => Ok(Some(tok)),
93            Ok(_) => Ok(None),
94            Err(e) => Err(e),
95        }
96    }
97
98    /// Consumes and returns the currently-peeked [Token].
99    pub fn take(&mut self) -> PResult<Token> {
100        let tok = self
101            .next_tok
102            .take()
103            .unwrap_or(Err(ParseError::UnexpectedEOF(self.last_loc)));
104
105        if let Ok(tok) = &tok {
106            self.last_loc = tok.span;
107            self.elide_do = matches!(
108                tok.kind,
109                TKind::RCurly | TKind::Semi | TKind::DotDot | TKind::DotDotEq
110            )
111        }
112
113        tok
114    }
115
116    /// Consumes the currently-peeked [Token], returning its lexeme without cloning.
117    pub fn take_lexeme(&mut self) -> PResult<Lexeme> {
118        self.take().map(|tok| tok.lexeme)
119    }
120
121    #[allow(clippy::should_implement_trait)]
122    pub fn next(&mut self) -> PResult<Token> {
123        self.peek().no_eof()?;
124        self.take() // .expect("should have token here")
125    }
126
127    /// Consumes and returns the next [`Token`] if it matches the `expected` [`TKind`]
128    pub fn next_if(&mut self, expected: TKind) -> PResult<Result<Token, TKind>> {
129        match self.peek() {
130            Ok(t) if t.kind == expected => self.take().map(Ok),
131            Ok(t) => Ok(Err(t.kind)),
132            Err(e) => Err(e),
133        }
134    }
135
136    /// Parses a list of P separated by `sep` tokens, ending in an `end` token.
137    /// ```ignore
138    /// List<T> = (T sep)* T? end ;
139    /// ```
140    pub fn list<P: Parse<'t>>(
141        &mut self,
142        mut elems: Vec<P>,
143        level: P::Prec,
144        sep: TKind,
145        end: TKind,
146    ) -> PResult<Vec<P>> {
147        // TODO: This loses lexer errors
148        while self.peek_if(end).no_eof()?.is_none() {
149            elems.push(self.parse(level).no_eof()?);
150            match self.peek_if(sep)? {
151                Some(_) => self.consume(),
152                None => break,
153            };
154        }
155        let kind = self.peek().map(Token::kind)?;
156        if kind == end {
157            self.consume();
158        } else if let Ok((first, _)) = kind.split()
159            && first == end
160        {
161            self.split()?;
162        } else {
163            return Err(ParseError::Expected(end, kind, self.span()));
164        }
165        Ok(elems)
166    }
167
168    /// Parses a list of one or more P at level `level`, separated by `sep` tokens
169    /// ```ignore
170    /// UnterminatedList<P> = P (sep P)*
171    /// ```
172    pub fn list_bare<P: Parse<'t>>(
173        &mut self,
174        mut elems: Vec<P>,
175        level: P::Prec,
176        sep: TKind,
177    ) -> PResult<Vec<P>> {
178        loop {
179            let elem = self.parse(level).no_eof()?;
180            elems.push(elem);
181            match self.peek_if(sep) {
182                Ok(Some(_)) => self.consume(),
183                Ok(None) | Err(ParseError::EOF(_)) => break Ok(elems),
184                Err(e) => Err(e)?,
185            };
186        }
187    }
188
189    /// Parses into an [`Option<P>`] if the next token is `next`
190    pub fn opt_if<P: Parse<'t>>(&mut self, level: P::Prec, next: TKind) -> PResult<Option<P>> {
191        Ok(match self.next_if(next)? {
192            Ok(_) => Some(self.parse(level).no_eof()?),
193            Err(_) => None,
194        })
195    }
196
197    /// Parses a P unless the next [Token]'s [TKind] is `end`
198    pub fn opt<P: Parse<'t>>(&mut self, level: P::Prec, end: TKind) -> PResult<Option<P>> {
199        let out = match self.peek_if(end)? {
200            None => Some(self.parse(level).no_eof()?),
201            Some(_) => None,
202        };
203        self.expect(end)?;
204        Ok(out)
205    }
206
207    /// Ensures the next [Token]'s [TKind] is `next`
208    pub fn expect(&mut self, next: TKind) -> PResult<&mut Self> {
209        self.next_if(next)?
210            .map_err(|tk| ParseError::Expected(next, tk, self.span()))?;
211        Ok(self)
212    }
213
214    /// Consumes the currently peeked token without returning it.
215    pub fn consume(&mut self) -> &mut Self {
216        if self.next_tok.as_ref().is_some_and(|tok| tok.is_ok()) {
217            let _ = self.take();
218        }
219        self
220    }
221
222    /// Consumes the next token, and attempts to split it into multiple.
223    ///
224    /// If the next token cannot be split, it will be returned.
225    pub fn split(&mut self) -> PResult<Token> {
226        let Token { lexeme, kind, span } = self.next()?;
227        let kind = match kind.split() {
228            Err(_) => kind,
229            Ok((out, next)) => {
230                self.next_tok = Some(Ok(Token { lexeme: lexeme.clone(), kind: next, span }));
231                out
232            }
233        };
234        Ok(Token { lexeme, kind, span })
235    }
236}
237
238impl<'t> Parse<'t> for Path {
239    type Prec = ();
240
241    fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
242        let mut parts = vec![];
243        if p.next_if(TKind::ColonColon)?.is_ok() {
244            parts.push("".into()); // the "root"
245        }
246        while let Some(Ok(id)) = p.next_if(TKind::Identifier).allow_eof()? {
247            parts.push(
248                id.lexeme
249                    .str()
250                    .expect("Identifier should have String")
251                    .into(),
252            );
253            if let None | Some(Err(_)) = p.next_if(TKind::ColonColon).allow_eof()? {
254                break;
255            }
256        }
257
258        Ok(Path { parts })
259    }
260}
261
262impl<'t> Parse<'t> for Literal {
263    type Prec = ();
264    fn parse(p: &mut Parser<'t>, _level: ()) -> PResult<Self> {
265        let tok = p.peek()?;
266        Ok(match tok.kind {
267            TKind::True => p.consume().then(Literal::Bool(true)),
268            TKind::False => p.consume().then(Literal::Bool(false)),
269            TKind::Character | TKind::Integer | TKind::String => {
270                match p.take().expect("should have Token after peek").lexeme {
271                    Lexeme::String(str) => Literal::Str(str),
272                    Lexeme::Integer(int, base) => Literal::Int(int, base),
273                    Lexeme::Char(chr) => Literal::Char(chr),
274                }
275            }
276            other => Err(ParseError::NotLiteral(other, tok.span))?,
277        })
278    }
279}
280
281impl<'t> Parse<'t> for Use {
282    type Prec = ();
283
284    fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
285        let tok = p.next()?;
286        Ok(match tok.kind {
287            TKind::Star => p.then(Use::Glob),
288            TKind::Identifier => {
289                let name = tok.lexeme.str().expect("should have String").into();
290                match p.peek().map(Token::kind).allow_eof()? {
291                    Some(TKind::ColonColon) => Use::Path(name, p.consume().parse(())?),
292                    Some(TKind::As) => Use::Alias(
293                        name,
294                        p.consume()
295                            .next_if(TKind::Identifier)?
296                            .map_err(|e| ParseError::Expected(TKind::Identifier, e, p.span()))?
297                            .lexeme
298                            .str()
299                            .expect("Identifier should have string")
300                            .into(),
301                    ),
302                    _ => Use::Name(name),
303                }
304            }
305            TKind::LCurly => Use::Tree(p.list(vec![], (), TKind::Comma, TKind::RCurly)?),
306            _ => Err(ParseError::NotUse(tok.kind, tok.span))?,
307        })
308    }
309}
310
311impl<'t, P: Parse<'t> + AstNode> Parse<'t> for At<P> {
312    type Prec = P::Prec;
313    fn parse(p: &mut Parser<'t>, level: P::Prec) -> PResult<Self>
314    where Self: Sized {
315        let start = p.peek().map(|t| t.span).unwrap_or_else(|_| p.span());
316        Ok(At(p.parse(level)?, start.merge(p.span())))
317    }
318}
319
320impl<'t, P: Parse<'t>> Parse<'t> for Box<P> {
321    type Prec = P::Prec;
322    fn parse(p: &mut Parser<'t>, level: P::Prec) -> PResult<Self>
323    where Self: Sized {
324        Ok(Box::new(p.parse(level)?))
325    }
326}