time_macros/format_description/
lexer.rs

1use core::iter;
2
3use super::{Error, Location, Spanned, SpannedValue};
4
5pub(super) struct Lexed<I: Iterator> {
6    iter: iter::Peekable<I>,
7}
8
9impl<I: Iterator> Iterator for Lexed<I> {
10    type Item = I::Item;
11
12    fn next(&mut self) -> Option<Self::Item> {
13        self.iter.next()
14    }
15}
16
17impl<'iter, 'token: 'iter, I: Iterator<Item = Result<Token<'token>, Error>> + 'iter> Lexed<I> {
18    pub(super) fn peek(&mut self) -> Option<&I::Item> {
19        self.iter.peek()
20    }
21
22    pub(super) fn next_if_whitespace(&mut self) -> Option<Spanned<&'token [u8]>> {
23        if let Some(&Ok(Token::ComponentPart {
24            kind: ComponentKind::Whitespace,
25            value,
26        })) = self.peek()
27        {
28            self.next(); // consume
29            Some(value)
30        } else {
31            None
32        }
33    }
34
35    pub(super) fn next_if_not_whitespace(&mut self) -> Option<Spanned<&'token [u8]>> {
36        if let Some(&Ok(Token::ComponentPart {
37            kind: ComponentKind::NotWhitespace,
38            value,
39        })) = self.peek()
40        {
41            self.next();
42            Some(value)
43        } else {
44            None
45        }
46    }
47
48    pub(super) fn next_if_opening_bracket(&mut self) -> Option<Location> {
49        if let Some(&Ok(Token::Bracket {
50            kind: BracketKind::Opening,
51            location,
52        })) = self.peek()
53        {
54            self.next();
55            Some(location)
56        } else {
57            None
58        }
59    }
60
61    pub(super) fn peek_closing_bracket(&'iter mut self) -> Option<&'iter Location> {
62        if let Some(Ok(Token::Bracket {
63            kind: BracketKind::Closing,
64            location,
65        })) = self.peek()
66        {
67            Some(location)
68        } else {
69            None
70        }
71    }
72
73    pub(super) fn next_if_closing_bracket(&mut self) -> Option<Location> {
74        if let Some(&Ok(Token::Bracket {
75            kind: BracketKind::Closing,
76            location,
77        })) = self.peek()
78        {
79            self.next();
80            Some(location)
81        } else {
82            None
83        }
84    }
85}
86
87pub(super) enum Token<'a> {
88    Literal(Spanned<&'a [u8]>),
89    Bracket {
90        kind: BracketKind,
91        location: Location,
92    },
93    ComponentPart {
94        kind: ComponentKind,
95        value: Spanned<&'a [u8]>,
96    },
97}
98
99pub(super) enum BracketKind {
100    Opening,
101    Closing,
102}
103
104pub(super) enum ComponentKind {
105    Whitespace,
106    NotWhitespace,
107}
108
109fn attach_location<'item>(
110    iter: impl Iterator<Item = &'item u8>,
111    proc_span: proc_macro::Span,
112) -> impl Iterator<Item = (&'item u8, Location)> {
113    let mut byte_pos = 0;
114
115    iter.map(move |byte| {
116        let location = Location {
117            byte: byte_pos,
118            proc_span,
119        };
120        byte_pos += 1;
121        (byte, location)
122    })
123}
124
125pub(super) fn lex<const VERSION: u8>(
126    mut input: &[u8],
127    proc_span: proc_macro::Span,
128) -> Lexed<impl Iterator<Item = Result<Token<'_>, Error>>> {
129    assert!(version!(1..=2));
130
131    let mut depth: u8 = 0;
132    let mut iter = attach_location(input.iter(), proc_span).peekable();
133    let mut second_bracket_location = None;
134
135    let iter = iter::from_fn(move || {
136        if version!(..=1) {
137            if let Some(location) = second_bracket_location.take() {
138                return Some(Ok(Token::Bracket {
139                    kind: BracketKind::Opening,
140                    location,
141                }));
142            }
143        }
144
145        Some(Ok(match iter.next()? {
146            (b'\\', backslash_loc) if version!(2..) => match iter.next() {
147                Some((b'\\' | b'[' | b']', char_loc)) => {
148                    let char = &input[1..2];
149                    input = &input[2..];
150                    if depth == 0 {
151                        Token::Literal(char.spanned(backslash_loc.to(char_loc)))
152                    } else {
153                        Token::ComponentPart {
154                            kind: ComponentKind::NotWhitespace,
155                            value: char.spanned(backslash_loc.to(char_loc)),
156                        }
157                    }
158                }
159                Some((_, loc)) => {
160                    return Some(Err(loc.error("invalid escape sequence")));
161                }
162                None => {
163                    return Some(Err(backslash_loc.error("unexpected end of input")));
164                }
165            },
166            (b'[', location) if version!(..=1) => {
167                if let Some((_, second_location)) = iter.next_if(|&(&byte, _)| byte == b'[') {
168                    second_bracket_location = Some(second_location);
169                    input = &input[2..];
170                } else {
171                    depth += 1;
172                    input = &input[1..];
173                }
174
175                Token::Bracket {
176                    kind: BracketKind::Opening,
177                    location,
178                }
179            }
180            (b'[', location) => {
181                depth += 1;
182                input = &input[1..];
183
184                Token::Bracket {
185                    kind: BracketKind::Opening,
186                    location,
187                }
188            }
189            (b']', location) if depth > 0 => {
190                depth -= 1;
191                input = &input[1..];
192
193                Token::Bracket {
194                    kind: BracketKind::Closing,
195                    location,
196                }
197            }
198            (_, start_location) if depth == 0 => {
199                let mut bytes = 1;
200                let mut end_location = start_location;
201
202                while let Some((_, location)) =
203                    iter.next_if(|&(&byte, _)| !((version!(2..) && byte == b'\\') || byte == b'['))
204                {
205                    end_location = location;
206                    bytes += 1;
207                }
208
209                let value = &input[..bytes];
210                input = &input[bytes..];
211
212                Token::Literal(value.spanned(start_location.to(end_location)))
213            }
214            (byte, start_location) => {
215                let mut bytes = 1;
216                let mut end_location = start_location;
217                let is_whitespace = byte.is_ascii_whitespace();
218
219                while let Some((_, location)) = iter.next_if(|&(byte, _)| {
220                    !matches!(byte, b'\\' | b'[' | b']')
221                        && is_whitespace == byte.is_ascii_whitespace()
222                }) {
223                    end_location = location;
224                    bytes += 1;
225                }
226
227                let value = &input[..bytes];
228                input = &input[bytes..];
229
230                Token::ComponentPart {
231                    kind: if is_whitespace {
232                        ComponentKind::Whitespace
233                    } else {
234                        ComponentKind::NotWhitespace
235                    },
236                    value: value.spanned(start_location.to(end_location)),
237                }
238            }
239        }))
240    });
241
242    Lexed {
243        iter: iter.peekable(),
244    }
245}