time/format_description/parse/
lexer.rs

1//! Lexer for parsing format descriptions.
2
3use core::iter;
4
5use super::{Error, Location, Spanned, SpannedValue, attach_location, unused};
6
7/// An iterator over the lexed tokens.
8pub(super) struct Lexed<I>
9where
10    I: Iterator,
11{
12    /// The internal iterator.
13    iter: iter::Peekable<I>,
14}
15
16impl<I> Iterator for Lexed<I>
17where
18    I: Iterator,
19{
20    type Item = I::Item;
21
22    fn next(&mut self) -> Option<Self::Item> {
23        self.iter.next()
24    }
25}
26
27impl<'iter, 'token, I> Lexed<I>
28where
29    'token: 'iter,
30    I: Iterator<Item = Result<Token<'token>, Error>> + 'iter,
31{
32    /// Peek at the next item in the iterator.
33    #[inline]
34    pub(super) fn peek(&mut self) -> Option<&I::Item> {
35        self.iter.peek()
36    }
37
38    /// Consume the next token if it is whitespace.
39    #[inline]
40    pub(super) fn next_if_whitespace(&mut self) -> Option<Spanned<&'token [u8]>> {
41        if let Some(&Ok(Token::ComponentPart {
42            kind: ComponentKind::Whitespace,
43            value,
44        })) = self.peek()
45        {
46            self.next(); // consume
47            Some(value)
48        } else {
49            None
50        }
51    }
52
53    /// Consume the next token if it is a component item that is not whitespace.
54    #[inline]
55    pub(super) fn next_if_not_whitespace(&mut self) -> Option<Spanned<&'token [u8]>> {
56        if let Some(&Ok(Token::ComponentPart {
57            kind: ComponentKind::NotWhitespace,
58            value,
59        })) = self.peek()
60        {
61            self.next(); // consume
62            Some(value)
63        } else {
64            None
65        }
66    }
67
68    /// Consume the next token if it is an opening bracket.
69    #[inline]
70    pub(super) fn next_if_opening_bracket(&mut self) -> Option<Location> {
71        if let Some(&Ok(Token::Bracket {
72            kind: BracketKind::Opening,
73            location,
74        })) = self.peek()
75        {
76            self.next(); // consume
77            Some(location)
78        } else {
79            None
80        }
81    }
82
83    /// Peek at the next token if it is a closing bracket.
84    #[inline]
85    pub(super) fn peek_closing_bracket(&'iter mut self) -> Option<&'iter Location> {
86        if let Some(Ok(Token::Bracket {
87            kind: BracketKind::Closing,
88            location,
89        })) = self.peek()
90        {
91            Some(location)
92        } else {
93            None
94        }
95    }
96
97    /// Consume the next token if it is a closing bracket.
98    #[inline]
99    pub(super) fn next_if_closing_bracket(&mut self) -> Option<Location> {
100        if let Some(&Ok(Token::Bracket {
101            kind: BracketKind::Closing,
102            location,
103        })) = self.peek()
104        {
105            self.next(); // consume
106            Some(location)
107        } else {
108            None
109        }
110    }
111}
112
113/// A token emitted by the lexer. There is no semantic meaning at this stage.
114pub(super) enum Token<'a> {
115    /// A literal string, formatted and parsed as-is.
116    Literal(Spanned<&'a [u8]>),
117    /// An opening or closing bracket. May or may not be the start or end of a component.
118    Bracket {
119        /// Whether the bracket is opening or closing.
120        kind: BracketKind,
121        /// Where the bracket was in the format string.
122        location: Location,
123    },
124    /// One part of a component. This could be its name, a modifier, or whitespace.
125    ComponentPart {
126        /// Whether the part is whitespace or not.
127        kind: ComponentKind,
128        /// The part itself.
129        value: Spanned<&'a [u8]>,
130    },
131}
132
133/// What type of bracket is present.
134pub(super) enum BracketKind {
135    /// An opening bracket: `[`
136    Opening,
137    /// A closing bracket: `]`
138    Closing,
139}
140
141/// Indicates whether the component is whitespace or not.
142pub(super) enum ComponentKind {
143    Whitespace,
144    NotWhitespace,
145}
146
147/// Parse the string into a series of [`Token`]s.
148///
149/// `VERSION` controls the version of the format description that is being parsed. Currently, this
150/// must be 1 or 2.
151///
152/// - When `VERSION` is 1, `[[` is the only escape sequence, resulting in a literal `[`.
153/// - When `VERSION` is 2, all escape sequences begin with `\`. The only characters that may
154///   currently follow are `\`, `[`, and `]`, all of which result in the literal character. All
155///   other characters result in a lex error.
156#[inline]
157pub(super) fn lex<const VERSION: usize>(
158    mut input: &[u8],
159) -> Lexed<impl Iterator<Item = Result<Token<'_>, Error>>> {
160    validate_version!(VERSION);
161
162    let mut depth: u8 = 0;
163    let mut iter = attach_location(input.iter()).peekable();
164    let mut second_bracket_location = None;
165
166    let iter = iter::from_fn(move || {
167        // The flag is only set when version is zero.
168        if version!(..=1) {
169            // There is a flag set to emit the second half of an escaped bracket pair.
170            if let Some(location) = second_bracket_location.take() {
171                return Some(Ok(Token::Bracket {
172                    kind: BracketKind::Opening,
173                    location,
174                }));
175            }
176        }
177
178        Some(Ok(match iter.next()? {
179            // possible escape sequence
180            (b'\\', backslash_loc) if version!(2..) => {
181                match iter.next() {
182                    Some((b'\\' | b'[' | b']', char_loc)) => {
183                        // The escaped character is emitted as-is.
184                        let char = &input[1..2];
185                        input = &input[2..];
186                        if depth == 0 {
187                            Token::Literal(char.spanned(backslash_loc.to(char_loc)))
188                        } else {
189                            Token::ComponentPart {
190                                kind: ComponentKind::NotWhitespace,
191                                value: char.spanned(backslash_loc.to(char_loc)),
192                            }
193                        }
194                    }
195                    Some((_, loc)) => {
196                        return Some(Err(Error {
197                            _inner: unused(loc.error("invalid escape sequence")),
198                            public: crate::error::InvalidFormatDescription::Expected {
199                                what: "valid escape sequence",
200                                index: loc.byte as usize,
201                            },
202                        }));
203                    }
204                    None => {
205                        return Some(Err(Error {
206                            _inner: unused(backslash_loc.error("unexpected end of input")),
207                            public: crate::error::InvalidFormatDescription::Expected {
208                                what: "valid escape sequence",
209                                index: backslash_loc.byte as usize,
210                            },
211                        }));
212                    }
213                }
214            }
215            // potentially escaped opening bracket
216            (b'[', location) if version!(..=1) => {
217                if let Some((_, second_location)) = iter.next_if(|&(&byte, _)| byte == b'[') {
218                    // Escaped bracket. Store the location of the second so we can emit it later.
219                    second_bracket_location = Some(second_location);
220                    input = &input[2..];
221                } else {
222                    // opening bracket
223                    depth += 1;
224                    input = &input[1..];
225                }
226
227                Token::Bracket {
228                    kind: BracketKind::Opening,
229                    location,
230                }
231            }
232            // opening bracket
233            (b'[', location) => {
234                depth += 1;
235                input = &input[1..];
236
237                Token::Bracket {
238                    kind: BracketKind::Opening,
239                    location,
240                }
241            }
242            // closing bracket
243            (b']', location) if depth > 0 => {
244                depth -= 1;
245                input = &input[1..];
246
247                Token::Bracket {
248                    kind: BracketKind::Closing,
249                    location,
250                }
251            }
252            // literal
253            (_, start_location) if depth == 0 => {
254                let mut bytes = 1;
255                let mut end_location = start_location;
256
257                while let Some((_, location)) =
258                    iter.next_if(|&(&byte, _)| !((version!(2..) && byte == b'\\') || byte == b'['))
259                {
260                    end_location = location;
261                    bytes += 1;
262                }
263
264                let value = &input[..bytes];
265                input = &input[bytes..];
266
267                Token::Literal(value.spanned(start_location.to(end_location)))
268            }
269            // component part
270            (byte, start_location) => {
271                let mut bytes = 1;
272                let mut end_location = start_location;
273                let is_whitespace = byte.is_ascii_whitespace();
274
275                while let Some((_, location)) = iter.next_if(|&(byte, _)| {
276                    !matches!(byte, b'\\' | b'[' | b']')
277                        && is_whitespace == byte.is_ascii_whitespace()
278                }) {
279                    end_location = location;
280                    bytes += 1;
281                }
282
283                let value = &input[..bytes];
284                input = &input[bytes..];
285
286                Token::ComponentPart {
287                    kind: if is_whitespace {
288                        ComponentKind::Whitespace
289                    } else {
290                        ComponentKind::NotWhitespace
291                    },
292                    value: value.spanned(start_location.to(end_location)),
293                }
294            }
295        }))
296    });
297
298    Lexed {
299        iter: iter.peekable(),
300    }
301}