Skip to main content

time_macros/format_description/
lexer.rs

1use core::iter;
2
3use super::{Error, Location, Spanned, SpannedValue};
4use crate::FormatDescriptionVersion;
5
6pub(super) struct Lexed<I: Iterator> {
7    iter: iter::Peekable<I>,
8}
9
10impl<I: Iterator> Iterator for Lexed<I> {
11    type Item = I::Item;
12
13    fn next(&mut self) -> Option<Self::Item> {
14        self.iter.next()
15    }
16}
17
18impl<'iter, 'token: 'iter, I: Iterator<Item = Result<Token<'token>, Error>> + 'iter> Lexed<I> {
19    pub(super) fn peek(&mut self) -> Option<&I::Item> {
20        self.iter.peek()
21    }
22
23    pub(super) fn next_if_whitespace(&mut self) -> Option<Spanned<&'token [u8]>> {
24        if let Some(&Ok(Token::ComponentPart {
25            kind: ComponentKind::Whitespace,
26            value,
27        })) = self.peek()
28        {
29            self.next(); // consume
30            Some(value)
31        } else {
32            None
33        }
34    }
35
36    pub(super) fn next_if_not_whitespace(&mut self) -> Option<Spanned<&'token [u8]>> {
37        if let Some(&Ok(Token::ComponentPart {
38            kind: ComponentKind::NotWhitespace,
39            value,
40        })) = self.peek()
41        {
42            self.next();
43            Some(value)
44        } else {
45            None
46        }
47    }
48
49    pub(super) fn next_if_opening_bracket(&mut self) -> Option<Location> {
50        if let Some(&Ok(Token::Bracket {
51            kind: BracketKind::Opening,
52            location,
53        })) = self.peek()
54        {
55            self.next();
56            Some(location)
57        } else {
58            None
59        }
60    }
61
62    pub(super) fn peek_closing_bracket(&'iter mut self) -> Option<&'iter Location> {
63        if let Some(Ok(Token::Bracket {
64            kind: BracketKind::Closing,
65            location,
66        })) = self.peek()
67        {
68            Some(location)
69        } else {
70            None
71        }
72    }
73
74    pub(super) fn next_if_closing_bracket(&mut self) -> Option<Location> {
75        if let Some(&Ok(Token::Bracket {
76            kind: BracketKind::Closing,
77            location,
78        })) = self.peek()
79        {
80            self.next();
81            Some(location)
82        } else {
83            None
84        }
85    }
86}
87
88pub(super) enum Token<'a> {
89    Literal(Spanned<&'a [u8]>),
90    Bracket {
91        kind: BracketKind,
92        location: Location,
93    },
94    ComponentPart {
95        kind: ComponentKind,
96        value: Spanned<&'a [u8]>,
97    },
98}
99
100impl std::fmt::Debug for Token<'_> {
101    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102        match self {
103            Self::Literal(arg0) => f
104                .debug_tuple("Literal")
105                .field(&String::from_utf8_lossy(arg0))
106                .finish(),
107            Self::Bracket { kind, location } => f
108                .debug_struct("Bracket")
109                .field("kind", kind)
110                .field("location", location)
111                .finish(),
112            Self::ComponentPart { kind, value } => f
113                .debug_struct("ComponentPart")
114                .field("kind", kind)
115                .field("value", &String::from_utf8_lossy(value))
116                .finish(),
117        }
118    }
119}
120
121#[derive(Debug)]
122pub(super) enum BracketKind {
123    Opening,
124    Closing,
125}
126
127#[derive(Debug)]
128pub(super) enum ComponentKind {
129    Whitespace,
130    NotWhitespace,
131}
132
133fn attach_location<'item>(
134    iter: impl Iterator<Item = &'item u8>,
135    proc_span: proc_macro::Span,
136) -> impl Iterator<Item = (&'item u8, Location)> {
137    let mut byte_pos = 0;
138
139    iter.map(move |byte| {
140        let location = Location {
141            byte: byte_pos,
142            proc_span,
143        };
144        byte_pos += 1;
145        (byte, location)
146    })
147}
148
149pub(super) fn lex(
150    version: FormatDescriptionVersion,
151    mut input: &[u8],
152    proc_span: proc_macro::Span,
153) -> Lexed<impl Iterator<Item = Result<Token<'_>, Error>>> {
154    let mut depth: u32 = 0;
155    let mut nested_component_name_seen = false;
156    let mut iter = attach_location(input.iter(), proc_span).peekable();
157    let mut second_bracket_location = None;
158
159    let iter = iter::from_fn(move || {
160        if version.is_v1()
161            && let Some(location) = second_bracket_location.take()
162        {
163            return Some(Ok(Token::Bracket {
164                kind: BracketKind::Opening,
165                location,
166            }));
167        }
168
169        Some(Ok(match iter.next()? {
170            (b'\\', backslash_loc) if version.is_at_least_v2() => match iter.next() {
171                Some((b'\\' | b'[' | b']', char_loc)) => {
172                    let char = &input[1..2];
173                    input = &input[2..];
174                    if depth == 0 {
175                        Token::Literal(char.spanned(backslash_loc.to(char_loc)))
176                    } else {
177                        Token::ComponentPart {
178                            kind: ComponentKind::NotWhitespace,
179                            value: char.spanned(backslash_loc.to(char_loc)),
180                        }
181                    }
182                }
183                Some((_, loc)) => {
184                    return Some(Err(loc.error("invalid escape sequence")));
185                }
186                None => {
187                    return Some(Err(backslash_loc.error("unexpected end of input")));
188                }
189            },
190            (b'[', location) if version.is_v1() && !nested_component_name_seen => {
191                if let Some((_, second_location)) = iter.next_if(|&(&byte, _)| byte == b'[') {
192                    second_bracket_location = Some(second_location);
193                    input = &input[2..];
194                } else {
195                    depth += 1;
196                    input = &input[1..];
197                }
198
199                Token::Bracket {
200                    kind: BracketKind::Opening,
201                    location,
202                }
203            }
204            (b'[', location) => {
205                depth += 1;
206                input = &input[1..];
207
208                Token::Bracket {
209                    kind: BracketKind::Opening,
210                    location,
211                }
212            }
213            (b']', location) if depth > 0 => {
214                depth -= 1;
215                if version.is_v1() {
216                    nested_component_name_seen = depth != 0;
217                }
218                input = &input[1..];
219
220                Token::Bracket {
221                    kind: BracketKind::Closing,
222                    location,
223                }
224            }
225            (_, start_location) if depth == 0 => {
226                let mut bytes = 1;
227                let mut end_location = start_location;
228
229                while let Some((_, location)) = iter.next_if(|&(&byte, _)| {
230                    !(version.is_at_least_v2() && byte == b'\\' || byte == b'[')
231                }) {
232                    end_location = location;
233                    bytes += 1;
234                }
235
236                let value = &input[..bytes];
237                input = &input[bytes..];
238
239                Token::Literal(value.spanned(start_location.to(end_location)))
240            }
241            (byte, start_location) => {
242                let mut bytes = 1;
243                let mut end_location = start_location;
244                let is_whitespace = byte.is_ascii_whitespace();
245
246                while let Some((_, location)) = iter.next_if(|&(byte, _)| {
247                    !matches!(byte, b'\\' | b'[' | b']')
248                        && is_whitespace == byte.is_ascii_whitespace()
249                }) {
250                    end_location = location;
251                    bytes += 1;
252                }
253
254                let value = &input[..bytes];
255                input = &input[bytes..];
256
257                if version.is_v1() && !is_whitespace {
258                    nested_component_name_seen = true;
259                }
260
261                Token::ComponentPart {
262                    kind: if is_whitespace {
263                        ComponentKind::Whitespace
264                    } else {
265                        ComponentKind::NotWhitespace
266                    },
267                    value: value.spanned(start_location.to(end_location)),
268                }
269            }
270        }))
271    });
272
273    Lexed {
274        iter: iter.peekable(),
275    }
276}