Skip to main content

time_macros/format_description/
lexer.rs

1use core::iter;
2
3use super::{Error, Location, Spanned, SpannedValue};
4use crate::FormatDescriptionVersion;
5
6pub(super) struct Lexed<I: Iterator> {
7    iter: iter::Peekable<I>,
8}
9
10impl<I: Iterator> Iterator for Lexed<I> {
11    type Item = I::Item;
12
13    fn next(&mut self) -> Option<Self::Item> {
14        self.iter.next()
15    }
16}
17
18impl<'iter, 'token: 'iter, I: Iterator<Item = Result<Token<'token>, Error>> + 'iter> Lexed<I> {
19    pub(super) fn peek(&mut self) -> Option<&I::Item> {
20        self.iter.peek()
21    }
22
23    pub(super) fn next_if_whitespace(&mut self) -> Option<Spanned<&'token str>> {
24        if let Some(&Ok(Token::ComponentPart {
25            kind: ComponentKind::Whitespace,
26            value,
27        })) = self.peek()
28        {
29            self.next(); // consume
30            Some(value)
31        } else {
32            None
33        }
34    }
35
36    pub(super) fn next_if_not_whitespace(&mut self) -> Option<Spanned<&'token str>> {
37        if let Some(&Ok(Token::ComponentPart {
38            kind: ComponentKind::NotWhitespace,
39            value,
40        })) = self.peek()
41        {
42            self.next();
43            Some(value)
44        } else {
45            None
46        }
47    }
48
49    pub(super) fn next_if_opening_bracket(&mut self) -> Option<Location> {
50        if let Some(&Ok(Token::Bracket {
51            kind: BracketKind::Opening,
52            location,
53        })) = self.peek()
54        {
55            self.next();
56            Some(location)
57        } else {
58            None
59        }
60    }
61
62    pub(super) fn peek_closing_bracket(&'iter mut self) -> Option<&'iter Location> {
63        if let Some(Ok(Token::Bracket {
64            kind: BracketKind::Closing,
65            location,
66        })) = self.peek()
67        {
68            Some(location)
69        } else {
70            None
71        }
72    }
73
74    pub(super) fn next_if_closing_bracket(&mut self) -> Option<Location> {
75        if let Some(&Ok(Token::Bracket {
76            kind: BracketKind::Closing,
77            location,
78        })) = self.peek()
79        {
80            self.next();
81            Some(location)
82        } else {
83            None
84        }
85    }
86}
87
88pub(super) enum Token<'a> {
89    Literal(Spanned<&'a [u8]>),
90    Bracket {
91        kind: BracketKind,
92        location: Location,
93    },
94    ComponentPart {
95        kind: ComponentKind,
96        value: Spanned<&'a str>,
97    },
98}
99
100impl std::fmt::Debug for Token<'_> {
101    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102        match self {
103            Self::Literal(arg0) => f
104                .debug_tuple("Literal")
105                .field(&String::from_utf8_lossy(arg0))
106                .finish(),
107            Self::Bracket { kind, location } => f
108                .debug_struct("Bracket")
109                .field("kind", kind)
110                .field("location", location)
111                .finish(),
112            Self::ComponentPart { kind, value } => f
113                .debug_struct("ComponentPart")
114                .field("kind", kind)
115                .field("value", value)
116                .finish(),
117        }
118    }
119}
120
121#[derive(Debug)]
122pub(super) enum BracketKind {
123    Opening,
124    Closing,
125}
126
127#[derive(Debug)]
128pub(super) enum ComponentKind {
129    Whitespace,
130    NotWhitespace,
131}
132
133pub(super) fn lex(
134    version: FormatDescriptionVersion,
135    mut input: &[u8],
136    proc_span: proc_macro::Span,
137) -> Lexed<impl Iterator<Item = Result<Token<'_>, Error>>> {
138    let mut depth: u32 = 0;
139    let mut byte_pos: u32 = 0;
140    let mut nested_component_name_seen = false;
141    let mut second_bracket_location = None;
142
143    let iter = iter::from_fn(move || {
144        if version.is_v1()
145            && let Some(location) = second_bracket_location.take()
146        {
147            return Some(Ok(Token::Bracket {
148                kind: BracketKind::Opening,
149                location,
150            }));
151        }
152
153        let byte = *input.first()?;
154        let location = Location {
155            byte: byte_pos,
156            proc_span,
157        };
158
159        Some(Ok(match byte {
160            b'\\' if version.is_at_least_v2() => {
161                let backslash_loc = location;
162                match input.get(1) {
163                    Some(b'\\' | b'[' | b']') => {
164                        let char_loc = Location {
165                            byte: byte_pos + 1,
166                            proc_span,
167                        };
168                        // Safety: We know that the character is either a left bracket, a right
169                        // bracket, or a backslash.
170                        let char = unsafe { str::from_utf8_unchecked(&input[1..2]) };
171                        input = &input[2..];
172                        byte_pos += 2;
173                        if depth == 0 {
174                            Token::Literal(char.as_bytes().spanned(backslash_loc.to(char_loc)))
175                        } else {
176                            Token::ComponentPart {
177                                kind: ComponentKind::NotWhitespace,
178                                value: char.spanned(backslash_loc.to(char_loc)),
179                            }
180                        }
181                    }
182                    Some(_) => {
183                        let loc = Location {
184                            byte: byte_pos + 1,
185                            proc_span,
186                        };
187                        return Some(Err(loc.error("invalid escape sequence")));
188                    }
189                    None => {
190                        return Some(Err(backslash_loc.error("unexpected end of input")));
191                    }
192                }
193            }
194            b'[' if version.is_v1() && !nested_component_name_seen => {
195                if input.get(1) == Some(&b'[') {
196                    let second_location = Location {
197                        byte: byte_pos + 1,
198                        proc_span,
199                    };
200                    second_bracket_location = Some(second_location);
201                    input = &input[2..];
202                    byte_pos += 2;
203                } else {
204                    depth += 1;
205                    input = &input[1..];
206                    byte_pos += 1;
207                }
208
209                Token::Bracket {
210                    kind: BracketKind::Opening,
211                    location,
212                }
213            }
214            b'[' => {
215                depth += 1;
216                input = &input[1..];
217                byte_pos += 1;
218
219                Token::Bracket {
220                    kind: BracketKind::Opening,
221                    location,
222                }
223            }
224            b']' if depth > 0 => {
225                depth -= 1;
226                if version.is_v1() {
227                    nested_component_name_seen = depth != 0;
228                }
229                input = &input[1..];
230                byte_pos += 1;
231
232                Token::Bracket {
233                    kind: BracketKind::Closing,
234                    location,
235                }
236            }
237            _ if depth == 0 => {
238                let mut bytes: u32 = 1;
239                let mut end_location = location;
240
241                while let Some(&next_byte) = input.get(bytes as usize) {
242                    if (version.is_at_least_v2() && next_byte == b'\\') || next_byte == b'[' {
243                        break;
244                    }
245                    end_location = Location {
246                        byte: byte_pos + bytes,
247                        proc_span,
248                    };
249                    bytes += 1;
250                }
251
252                let value = &input[..bytes as usize];
253                input = &input[bytes as usize..];
254                byte_pos += bytes;
255
256                Token::Literal(value.spanned(location.to(end_location)))
257            }
258            byte => {
259                let mut bytes: u32 = 1;
260                let mut end_location = location;
261                let is_whitespace = byte.is_ascii_whitespace();
262
263                while let Some(&next_byte) = input.get(bytes as usize) {
264                    if matches!(next_byte, b'\\' | b'[' | b']')
265                        || is_whitespace != next_byte.is_ascii_whitespace()
266                    {
267                        break;
268                    }
269                    end_location = Location {
270                        byte: byte_pos + bytes,
271                        proc_span,
272                    };
273                    bytes += 1;
274                }
275
276                let Ok(value) = str::from_utf8(&input[..bytes as usize]) else {
277                    return Some(Err(location.error("components must be valid UTF-8")));
278                };
279                input = &input[bytes as usize..];
280                byte_pos += bytes;
281
282                if version.is_v1() && !is_whitespace {
283                    nested_component_name_seen = true;
284                }
285
286                Token::ComponentPart {
287                    kind: if is_whitespace {
288                        ComponentKind::Whitespace
289                    } else {
290                        ComponentKind::NotWhitespace
291                    },
292                    value: value.spanned(location.to(end_location)),
293                }
294            }
295        }))
296    });
297
298    Lexed {
299        iter: iter.peekable(),
300    }
301}