time_macros/format_description/
lexer.rs1use core::iter;
2
3use super::{Error, Location, Spanned, SpannedValue};
4use crate::FormatDescriptionVersion;
5
6pub(super) struct Lexed<I: Iterator> {
7 iter: iter::Peekable<I>,
8}
9
10impl<I: Iterator> Iterator for Lexed<I> {
11 type Item = I::Item;
12
13 fn next(&mut self) -> Option<Self::Item> {
14 self.iter.next()
15 }
16}
17
18impl<'iter, 'token: 'iter, I: Iterator<Item = Result<Token<'token>, Error>> + 'iter> Lexed<I> {
19 pub(super) fn peek(&mut self) -> Option<&I::Item> {
20 self.iter.peek()
21 }
22
23 pub(super) fn next_if_whitespace(&mut self) -> Option<Spanned<&'token str>> {
24 if let Some(&Ok(Token::ComponentPart {
25 kind: ComponentKind::Whitespace,
26 value,
27 })) = self.peek()
28 {
29 self.next(); Some(value)
31 } else {
32 None
33 }
34 }
35
36 pub(super) fn next_if_not_whitespace(&mut self) -> Option<Spanned<&'token str>> {
37 if let Some(&Ok(Token::ComponentPart {
38 kind: ComponentKind::NotWhitespace,
39 value,
40 })) = self.peek()
41 {
42 self.next();
43 Some(value)
44 } else {
45 None
46 }
47 }
48
49 pub(super) fn next_if_opening_bracket(&mut self) -> Option<Location> {
50 if let Some(&Ok(Token::Bracket {
51 kind: BracketKind::Opening,
52 location,
53 })) = self.peek()
54 {
55 self.next();
56 Some(location)
57 } else {
58 None
59 }
60 }
61
62 pub(super) fn peek_closing_bracket(&'iter mut self) -> Option<&'iter Location> {
63 if let Some(Ok(Token::Bracket {
64 kind: BracketKind::Closing,
65 location,
66 })) = self.peek()
67 {
68 Some(location)
69 } else {
70 None
71 }
72 }
73
74 pub(super) fn next_if_closing_bracket(&mut self) -> Option<Location> {
75 if let Some(&Ok(Token::Bracket {
76 kind: BracketKind::Closing,
77 location,
78 })) = self.peek()
79 {
80 self.next();
81 Some(location)
82 } else {
83 None
84 }
85 }
86}
87
88pub(super) enum Token<'a> {
89 Literal(Spanned<&'a [u8]>),
90 Bracket {
91 kind: BracketKind,
92 location: Location,
93 },
94 ComponentPart {
95 kind: ComponentKind,
96 value: Spanned<&'a str>,
97 },
98}
99
100impl std::fmt::Debug for Token<'_> {
101 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102 match self {
103 Self::Literal(arg0) => f
104 .debug_tuple("Literal")
105 .field(&String::from_utf8_lossy(arg0))
106 .finish(),
107 Self::Bracket { kind, location } => f
108 .debug_struct("Bracket")
109 .field("kind", kind)
110 .field("location", location)
111 .finish(),
112 Self::ComponentPart { kind, value } => f
113 .debug_struct("ComponentPart")
114 .field("kind", kind)
115 .field("value", value)
116 .finish(),
117 }
118 }
119}
120
121#[derive(Debug)]
122pub(super) enum BracketKind {
123 Opening,
124 Closing,
125}
126
127#[derive(Debug)]
128pub(super) enum ComponentKind {
129 Whitespace,
130 NotWhitespace,
131}
132
133fn attach_location<'item>(
134 iter: impl Iterator<Item = &'item u8>,
135 proc_span: proc_macro::Span,
136) -> impl Iterator<Item = (&'item u8, Location)> {
137 let mut byte_pos = 0;
138
139 iter.map(move |byte| {
140 let location = Location {
141 byte: byte_pos,
142 proc_span,
143 };
144 byte_pos += 1;
145 (byte, location)
146 })
147}
148
149pub(super) fn lex(
150 version: FormatDescriptionVersion,
151 mut input: &[u8],
152 proc_span: proc_macro::Span,
153) -> Lexed<impl Iterator<Item = Result<Token<'_>, Error>>> {
154 let mut depth: u32 = 0;
155 let mut nested_component_name_seen = false;
156 let mut iter = attach_location(input.iter(), proc_span).peekable();
157 let mut second_bracket_location = None;
158
159 let iter = iter::from_fn(move || {
160 if version.is_v1()
161 && let Some(location) = second_bracket_location.take()
162 {
163 return Some(Ok(Token::Bracket {
164 kind: BracketKind::Opening,
165 location,
166 }));
167 }
168
169 Some(Ok(match iter.next()? {
170 (b'\\', backslash_loc) if version.is_at_least_v2() => match iter.next() {
171 Some((b'\\' | b'[' | b']', char_loc)) => {
172 let char = unsafe { str::from_utf8_unchecked(&input[1..2]) };
175 input = &input[2..];
176 if depth == 0 {
177 Token::Literal(char.as_bytes().spanned(backslash_loc.to(char_loc)))
178 } else {
179 Token::ComponentPart {
180 kind: ComponentKind::NotWhitespace,
181 value: char.spanned(backslash_loc.to(char_loc)),
182 }
183 }
184 }
185 Some((_, loc)) => {
186 return Some(Err(loc.error("invalid escape sequence")));
187 }
188 None => {
189 return Some(Err(backslash_loc.error("unexpected end of input")));
190 }
191 },
192 (b'[', location) if version.is_v1() && !nested_component_name_seen => {
193 if let Some((_, second_location)) = iter.next_if(|&(&byte, _)| byte == b'[') {
194 second_bracket_location = Some(second_location);
195 input = &input[2..];
196 } else {
197 depth += 1;
198 input = &input[1..];
199 }
200
201 Token::Bracket {
202 kind: BracketKind::Opening,
203 location,
204 }
205 }
206 (b'[', location) => {
207 depth += 1;
208 input = &input[1..];
209
210 Token::Bracket {
211 kind: BracketKind::Opening,
212 location,
213 }
214 }
215 (b']', location) if depth > 0 => {
216 depth -= 1;
217 if version.is_v1() {
218 nested_component_name_seen = depth != 0;
219 }
220 input = &input[1..];
221
222 Token::Bracket {
223 kind: BracketKind::Closing,
224 location,
225 }
226 }
227 (_, start_location) if depth == 0 => {
228 let mut bytes = 1;
229 let mut end_location = start_location;
230
231 while let Some((_, location)) = iter.next_if(|&(&byte, _)| {
232 !(version.is_at_least_v2() && byte == b'\\' || byte == b'[')
233 }) {
234 end_location = location;
235 bytes += 1;
236 }
237
238 let value = &input[..bytes];
239 input = &input[bytes..];
240
241 Token::Literal(value.spanned(start_location.to(end_location)))
242 }
243 (byte, start_location) => {
244 let mut bytes = 1;
245 let mut end_location = start_location;
246 let is_whitespace = byte.is_ascii_whitespace();
247
248 while let Some((_, location)) = iter.next_if(|&(byte, _)| {
249 !matches!(byte, b'\\' | b'[' | b']')
250 && is_whitespace == byte.is_ascii_whitespace()
251 }) {
252 end_location = location;
253 bytes += 1;
254 }
255
256 let Ok(value) = str::from_utf8(&input[..bytes]) else {
257 return Some(Err(start_location.error("components must be valid UTF-8")));
258 };
259 input = &input[bytes..];
260
261 if version.is_v1() && !is_whitespace {
262 nested_component_name_seen = true;
263 }
264
265 Token::ComponentPart {
266 kind: if is_whitespace {
267 ComponentKind::Whitespace
268 } else {
269 ComponentKind::NotWhitespace
270 },
271 value: value.spanned(start_location.to(end_location)),
272 }
273 }
274 }))
275 });
276
277 Lexed {
278 iter: iter.peekable(),
279 }
280}