time_macros/format_description/
lexer.rs1use core::iter;
2
3use super::{Error, Location, Spanned, SpannedValue};
4use crate::FormatDescriptionVersion;
5
6pub(super) struct Lexed<I: Iterator> {
7 iter: iter::Peekable<I>,
8}
9
10impl<I: Iterator> Iterator for Lexed<I> {
11 type Item = I::Item;
12
13 fn next(&mut self) -> Option<Self::Item> {
14 self.iter.next()
15 }
16}
17
18impl<'iter, 'token: 'iter, I: Iterator<Item = Result<Token<'token>, Error>> + 'iter> Lexed<I> {
19 pub(super) fn peek(&mut self) -> Option<&I::Item> {
20 self.iter.peek()
21 }
22
23 pub(super) fn next_if_whitespace(&mut self) -> Option<Spanned<&'token str>> {
24 if let Some(&Ok(Token::ComponentPart {
25 kind: ComponentKind::Whitespace,
26 value,
27 })) = self.peek()
28 {
29 self.next(); Some(value)
31 } else {
32 None
33 }
34 }
35
36 pub(super) fn next_if_not_whitespace(&mut self) -> Option<Spanned<&'token str>> {
37 if let Some(&Ok(Token::ComponentPart {
38 kind: ComponentKind::NotWhitespace,
39 value,
40 })) = self.peek()
41 {
42 self.next();
43 Some(value)
44 } else {
45 None
46 }
47 }
48
49 pub(super) fn next_if_opening_bracket(&mut self) -> Option<Location> {
50 if let Some(&Ok(Token::Bracket {
51 kind: BracketKind::Opening,
52 location,
53 })) = self.peek()
54 {
55 self.next();
56 Some(location)
57 } else {
58 None
59 }
60 }
61
62 pub(super) fn peek_closing_bracket(&'iter mut self) -> Option<&'iter Location> {
63 if let Some(Ok(Token::Bracket {
64 kind: BracketKind::Closing,
65 location,
66 })) = self.peek()
67 {
68 Some(location)
69 } else {
70 None
71 }
72 }
73
74 pub(super) fn next_if_closing_bracket(&mut self) -> Option<Location> {
75 if let Some(&Ok(Token::Bracket {
76 kind: BracketKind::Closing,
77 location,
78 })) = self.peek()
79 {
80 self.next();
81 Some(location)
82 } else {
83 None
84 }
85 }
86}
87
88pub(super) enum Token<'a> {
89 Literal(Spanned<&'a [u8]>),
90 Bracket {
91 kind: BracketKind,
92 location: Location,
93 },
94 ComponentPart {
95 kind: ComponentKind,
96 value: Spanned<&'a str>,
97 },
98}
99
100impl std::fmt::Debug for Token<'_> {
101 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102 match self {
103 Self::Literal(arg0) => f
104 .debug_tuple("Literal")
105 .field(&String::from_utf8_lossy(arg0))
106 .finish(),
107 Self::Bracket { kind, location } => f
108 .debug_struct("Bracket")
109 .field("kind", kind)
110 .field("location", location)
111 .finish(),
112 Self::ComponentPart { kind, value } => f
113 .debug_struct("ComponentPart")
114 .field("kind", kind)
115 .field("value", value)
116 .finish(),
117 }
118 }
119}
120
121#[derive(Debug)]
122pub(super) enum BracketKind {
123 Opening,
124 Closing,
125}
126
127#[derive(Debug)]
128pub(super) enum ComponentKind {
129 Whitespace,
130 NotWhitespace,
131}
132
133pub(super) fn lex(
134 version: FormatDescriptionVersion,
135 mut input: &[u8],
136 proc_span: proc_macro::Span,
137) -> Lexed<impl Iterator<Item = Result<Token<'_>, Error>>> {
138 let mut depth: u32 = 0;
139 let mut byte_pos: u32 = 0;
140 let mut nested_component_name_seen = false;
141 let mut second_bracket_location = None;
142
143 let iter = iter::from_fn(move || {
144 if version.is_v1()
145 && let Some(location) = second_bracket_location.take()
146 {
147 return Some(Ok(Token::Bracket {
148 kind: BracketKind::Opening,
149 location,
150 }));
151 }
152
153 let byte = *input.first()?;
154 let location = Location {
155 byte: byte_pos,
156 proc_span,
157 };
158
159 Some(Ok(match byte {
160 b'\\' if version.is_at_least_v2() => {
161 let backslash_loc = location;
162 match input.get(1) {
163 Some(b'\\' | b'[' | b']') => {
164 let char_loc = Location {
165 byte: byte_pos + 1,
166 proc_span,
167 };
168 let char = unsafe { str::from_utf8_unchecked(&input[1..2]) };
171 input = &input[2..];
172 byte_pos += 2;
173 if depth == 0 {
174 Token::Literal(char.as_bytes().spanned(backslash_loc.to(char_loc)))
175 } else {
176 Token::ComponentPart {
177 kind: ComponentKind::NotWhitespace,
178 value: char.spanned(backslash_loc.to(char_loc)),
179 }
180 }
181 }
182 Some(_) => {
183 let loc = Location {
184 byte: byte_pos + 1,
185 proc_span,
186 };
187 return Some(Err(loc.error("invalid escape sequence")));
188 }
189 None => {
190 return Some(Err(backslash_loc.error("unexpected end of input")));
191 }
192 }
193 }
194 b'[' if version.is_v1() && !nested_component_name_seen => {
195 if input.get(1) == Some(&b'[') {
196 let second_location = Location {
197 byte: byte_pos + 1,
198 proc_span,
199 };
200 second_bracket_location = Some(second_location);
201 input = &input[2..];
202 byte_pos += 2;
203 } else {
204 depth += 1;
205 input = &input[1..];
206 byte_pos += 1;
207 }
208
209 Token::Bracket {
210 kind: BracketKind::Opening,
211 location,
212 }
213 }
214 b'[' => {
215 depth += 1;
216 input = &input[1..];
217 byte_pos += 1;
218
219 Token::Bracket {
220 kind: BracketKind::Opening,
221 location,
222 }
223 }
224 b']' if depth > 0 => {
225 depth -= 1;
226 if version.is_v1() {
227 nested_component_name_seen = depth != 0;
228 }
229 input = &input[1..];
230 byte_pos += 1;
231
232 Token::Bracket {
233 kind: BracketKind::Closing,
234 location,
235 }
236 }
237 _ if depth == 0 => {
238 let mut bytes: u32 = 1;
239 let mut end_location = location;
240
241 while let Some(&next_byte) = input.get(bytes as usize) {
242 if (version.is_at_least_v2() && next_byte == b'\\') || next_byte == b'[' {
243 break;
244 }
245 end_location = Location {
246 byte: byte_pos + bytes,
247 proc_span,
248 };
249 bytes += 1;
250 }
251
252 let value = &input[..bytes as usize];
253 input = &input[bytes as usize..];
254 byte_pos += bytes;
255
256 Token::Literal(value.spanned(location.to(end_location)))
257 }
258 byte => {
259 let mut bytes: u32 = 1;
260 let mut end_location = location;
261 let is_whitespace = byte.is_ascii_whitespace();
262
263 while let Some(&next_byte) = input.get(bytes as usize) {
264 if matches!(next_byte, b'\\' | b'[' | b']')
265 || is_whitespace != next_byte.is_ascii_whitespace()
266 {
267 break;
268 }
269 end_location = Location {
270 byte: byte_pos + bytes,
271 proc_span,
272 };
273 bytes += 1;
274 }
275
276 let Ok(value) = str::from_utf8(&input[..bytes as usize]) else {
277 return Some(Err(location.error("components must be valid UTF-8")));
278 };
279 input = &input[bytes as usize..];
280 byte_pos += bytes;
281
282 if version.is_v1() && !is_whitespace {
283 nested_component_name_seen = true;
284 }
285
286 Token::ComponentPart {
287 kind: if is_whitespace {
288 ComponentKind::Whitespace
289 } else {
290 ComponentKind::NotWhitespace
291 },
292 value: value.spanned(location.to(end_location)),
293 }
294 }
295 }))
296 });
297
298 Lexed {
299 iter: iter.peekable(),
300 }
301}