time/format_description/parse/
lexer.rs1use core::iter;
4
5use super::{attach_location, unused, Error, Location, Spanned, SpannedValue};
6
7pub(super) struct Lexed<I: Iterator> {
9 iter: iter::Peekable<I>,
11}
12
13impl<I: Iterator> Iterator for Lexed<I> {
14 type Item = I::Item;
15
16 fn next(&mut self) -> Option<Self::Item> {
17 self.iter.next()
18 }
19}
20
21impl<'iter, 'token: 'iter, I: Iterator<Item = Result<Token<'token>, Error>> + 'iter> Lexed<I> {
22 #[inline]
24 pub(super) fn peek(&mut self) -> Option<&I::Item> {
25 self.iter.peek()
26 }
27
28 #[inline]
30 pub(super) fn next_if_whitespace(&mut self) -> Option<Spanned<&'token [u8]>> {
31 if let Some(&Ok(Token::ComponentPart {
32 kind: ComponentKind::Whitespace,
33 value,
34 })) = self.peek()
35 {
36 self.next(); Some(value)
38 } else {
39 None
40 }
41 }
42
43 #[inline]
45 pub(super) fn next_if_not_whitespace(&mut self) -> Option<Spanned<&'token [u8]>> {
46 if let Some(&Ok(Token::ComponentPart {
47 kind: ComponentKind::NotWhitespace,
48 value,
49 })) = self.peek()
50 {
51 self.next(); Some(value)
53 } else {
54 None
55 }
56 }
57
58 #[inline]
60 pub(super) fn next_if_opening_bracket(&mut self) -> Option<Location> {
61 if let Some(&Ok(Token::Bracket {
62 kind: BracketKind::Opening,
63 location,
64 })) = self.peek()
65 {
66 self.next(); Some(location)
68 } else {
69 None
70 }
71 }
72
73 #[inline]
75 pub(super) fn peek_closing_bracket(&'iter mut self) -> Option<&'iter Location> {
76 if let Some(Ok(Token::Bracket {
77 kind: BracketKind::Closing,
78 location,
79 })) = self.peek()
80 {
81 Some(location)
82 } else {
83 None
84 }
85 }
86
87 #[inline]
89 pub(super) fn next_if_closing_bracket(&mut self) -> Option<Location> {
90 if let Some(&Ok(Token::Bracket {
91 kind: BracketKind::Closing,
92 location,
93 })) = self.peek()
94 {
95 self.next(); Some(location)
97 } else {
98 None
99 }
100 }
101}
102
103pub(super) enum Token<'a> {
105 Literal(Spanned<&'a [u8]>),
107 Bracket {
109 kind: BracketKind,
111 location: Location,
113 },
114 ComponentPart {
116 kind: ComponentKind,
118 value: Spanned<&'a [u8]>,
120 },
121}
122
123pub(super) enum BracketKind {
125 Opening,
127 Closing,
129}
130
131pub(super) enum ComponentKind {
133 Whitespace,
134 NotWhitespace,
135}
136
137#[inline]
147pub(super) fn lex<const VERSION: usize>(
148 mut input: &[u8],
149) -> Lexed<impl Iterator<Item = Result<Token<'_>, Error>>> {
150 validate_version!(VERSION);
151
152 let mut depth: u8 = 0;
153 let mut iter = attach_location(input.iter()).peekable();
154 let mut second_bracket_location = None;
155
156 let iter = iter::from_fn(move || {
157 if version!(..=1) {
159 if let Some(location) = second_bracket_location.take() {
161 return Some(Ok(Token::Bracket {
162 kind: BracketKind::Opening,
163 location,
164 }));
165 }
166 }
167
168 Some(Ok(match iter.next()? {
169 (b'\\', backslash_loc) if version!(2..) => {
171 match iter.next() {
172 Some((b'\\' | b'[' | b']', char_loc)) => {
173 let char = &input[1..2];
175 input = &input[2..];
176 if depth == 0 {
177 Token::Literal(char.spanned(backslash_loc.to(char_loc)))
178 } else {
179 Token::ComponentPart {
180 kind: ComponentKind::NotWhitespace,
181 value: char.spanned(backslash_loc.to(char_loc)),
182 }
183 }
184 }
185 Some((_, loc)) => {
186 return Some(Err(Error {
187 _inner: unused(loc.error("invalid escape sequence")),
188 public: crate::error::InvalidFormatDescription::Expected {
189 what: "valid escape sequence",
190 index: loc.byte as usize,
191 },
192 }));
193 }
194 None => {
195 return Some(Err(Error {
196 _inner: unused(backslash_loc.error("unexpected end of input")),
197 public: crate::error::InvalidFormatDescription::Expected {
198 what: "valid escape sequence",
199 index: backslash_loc.byte as usize,
200 },
201 }));
202 }
203 }
204 }
205 (b'[', location) if version!(..=1) => {
207 if let Some((_, second_location)) = iter.next_if(|&(&byte, _)| byte == b'[') {
208 second_bracket_location = Some(second_location);
210 input = &input[2..];
211 } else {
212 depth += 1;
214 input = &input[1..];
215 }
216
217 Token::Bracket {
218 kind: BracketKind::Opening,
219 location,
220 }
221 }
222 (b'[', location) => {
224 depth += 1;
225 input = &input[1..];
226
227 Token::Bracket {
228 kind: BracketKind::Opening,
229 location,
230 }
231 }
232 (b']', location) if depth > 0 => {
234 depth -= 1;
235 input = &input[1..];
236
237 Token::Bracket {
238 kind: BracketKind::Closing,
239 location,
240 }
241 }
242 (_, start_location) if depth == 0 => {
244 let mut bytes = 1;
245 let mut end_location = start_location;
246
247 while let Some((_, location)) =
248 iter.next_if(|&(&byte, _)| !((version!(2..) && byte == b'\\') || byte == b'['))
249 {
250 end_location = location;
251 bytes += 1;
252 }
253
254 let value = &input[..bytes];
255 input = &input[bytes..];
256
257 Token::Literal(value.spanned(start_location.to(end_location)))
258 }
259 (byte, start_location) => {
261 let mut bytes = 1;
262 let mut end_location = start_location;
263 let is_whitespace = byte.is_ascii_whitespace();
264
265 while let Some((_, location)) = iter.next_if(|&(byte, _)| {
266 !matches!(byte, b'\\' | b'[' | b']')
267 && is_whitespace == byte.is_ascii_whitespace()
268 }) {
269 end_location = location;
270 bytes += 1;
271 }
272
273 let value = &input[..bytes];
274 input = &input[bytes..];
275
276 Token::ComponentPart {
277 kind: if is_whitespace {
278 ComponentKind::Whitespace
279 } else {
280 ComponentKind::NotWhitespace
281 },
282 value: value.spanned(start_location.to(end_location)),
283 }
284 }
285 }))
286 });
287
288 Lexed {
289 iter: iter.peekable(),
290 }
291}