time/format_description/parse/
lexer.rs1use core::iter;
4
5use super::{attach_location, unused, Error, Location, Spanned, SpannedValue};
6
7pub(super) struct Lexed<I: Iterator> {
9 iter: iter::Peekable<I>,
11}
12
13impl<I: Iterator> Iterator for Lexed<I> {
14 type Item = I::Item;
15
16 fn next(&mut self) -> Option<Self::Item> {
17 self.iter.next()
18 }
19}
20
21impl<'iter, 'token: 'iter, I: Iterator<Item = Result<Token<'token>, Error>> + 'iter> Lexed<I> {
22 pub(super) fn peek(&mut self) -> Option<&I::Item> {
24 self.iter.peek()
25 }
26
27 pub(super) fn next_if_whitespace(&mut self) -> Option<Spanned<&'token [u8]>> {
29 if let Some(&Ok(Token::ComponentPart {
30 kind: ComponentKind::Whitespace,
31 value,
32 })) = self.peek()
33 {
34 self.next(); Some(value)
36 } else {
37 None
38 }
39 }
40
41 pub(super) fn next_if_not_whitespace(&mut self) -> Option<Spanned<&'token [u8]>> {
43 if let Some(&Ok(Token::ComponentPart {
44 kind: ComponentKind::NotWhitespace,
45 value,
46 })) = self.peek()
47 {
48 self.next(); Some(value)
50 } else {
51 None
52 }
53 }
54
55 pub(super) fn next_if_opening_bracket(&mut self) -> Option<Location> {
57 if let Some(&Ok(Token::Bracket {
58 kind: BracketKind::Opening,
59 location,
60 })) = self.peek()
61 {
62 self.next(); Some(location)
64 } else {
65 None
66 }
67 }
68
69 pub(super) fn peek_closing_bracket(&'iter mut self) -> Option<&'iter Location> {
71 if let Some(Ok(Token::Bracket {
72 kind: BracketKind::Closing,
73 location,
74 })) = self.peek()
75 {
76 Some(location)
77 } else {
78 None
79 }
80 }
81
82 pub(super) fn next_if_closing_bracket(&mut self) -> Option<Location> {
84 if let Some(&Ok(Token::Bracket {
85 kind: BracketKind::Closing,
86 location,
87 })) = self.peek()
88 {
89 self.next(); Some(location)
91 } else {
92 None
93 }
94 }
95}
96
97pub(super) enum Token<'a> {
99 Literal(Spanned<&'a [u8]>),
101 Bracket {
103 kind: BracketKind,
105 location: Location,
107 },
108 ComponentPart {
110 kind: ComponentKind,
112 value: Spanned<&'a [u8]>,
114 },
115}
116
117pub(super) enum BracketKind {
119 Opening,
121 Closing,
123}
124
125pub(super) enum ComponentKind {
127 Whitespace,
128 NotWhitespace,
129}
130
131pub(super) fn lex<const VERSION: usize>(
141 mut input: &[u8],
142) -> Lexed<impl Iterator<Item = Result<Token<'_>, Error>>> {
143 validate_version!(VERSION);
144
145 let mut depth: u8 = 0;
146 let mut iter = attach_location(input.iter()).peekable();
147 let mut second_bracket_location = None;
148
149 let iter = iter::from_fn(move || {
150 if version!(..=1) {
152 if let Some(location) = second_bracket_location.take() {
154 return Some(Ok(Token::Bracket {
155 kind: BracketKind::Opening,
156 location,
157 }));
158 }
159 }
160
161 Some(Ok(match iter.next()? {
162 (b'\\', backslash_loc) if version!(2..) => {
164 match iter.next() {
165 Some((b'\\' | b'[' | b']', char_loc)) => {
166 let char = &input[1..2];
168 input = &input[2..];
169 if depth == 0 {
170 Token::Literal(char.spanned(backslash_loc.to(char_loc)))
171 } else {
172 Token::ComponentPart {
173 kind: ComponentKind::NotWhitespace,
174 value: char.spanned(backslash_loc.to(char_loc)),
175 }
176 }
177 }
178 Some((_, loc)) => {
179 return Some(Err(Error {
180 _inner: unused(loc.error("invalid escape sequence")),
181 public: crate::error::InvalidFormatDescription::Expected {
182 what: "valid escape sequence",
183 index: loc.byte as _,
184 },
185 }));
186 }
187 None => {
188 return Some(Err(Error {
189 _inner: unused(backslash_loc.error("unexpected end of input")),
190 public: crate::error::InvalidFormatDescription::Expected {
191 what: "valid escape sequence",
192 index: backslash_loc.byte as _,
193 },
194 }));
195 }
196 }
197 }
198 (b'[', location) if version!(..=1) => {
200 if let Some((_, second_location)) = iter.next_if(|&(&byte, _)| byte == b'[') {
201 second_bracket_location = Some(second_location);
203 input = &input[2..];
204 } else {
205 depth += 1;
207 input = &input[1..];
208 }
209
210 Token::Bracket {
211 kind: BracketKind::Opening,
212 location,
213 }
214 }
215 (b'[', location) => {
217 depth += 1;
218 input = &input[1..];
219
220 Token::Bracket {
221 kind: BracketKind::Opening,
222 location,
223 }
224 }
225 (b']', location) if depth > 0 => {
227 depth -= 1;
228 input = &input[1..];
229
230 Token::Bracket {
231 kind: BracketKind::Closing,
232 location,
233 }
234 }
235 (_, start_location) if depth == 0 => {
237 let mut bytes = 1;
238 let mut end_location = start_location;
239
240 while let Some((_, location)) =
241 iter.next_if(|&(&byte, _)| !((version!(2..) && byte == b'\\') || byte == b'['))
242 {
243 end_location = location;
244 bytes += 1;
245 }
246
247 let value = &input[..bytes];
248 input = &input[bytes..];
249
250 Token::Literal(value.spanned(start_location.to(end_location)))
251 }
252 (byte, start_location) => {
254 let mut bytes = 1;
255 let mut end_location = start_location;
256 let is_whitespace = byte.is_ascii_whitespace();
257
258 while let Some((_, location)) = iter.next_if(|&(byte, _)| {
259 !matches!(byte, b'\\' | b'[' | b']')
260 && is_whitespace == byte.is_ascii_whitespace()
261 }) {
262 end_location = location;
263 bytes += 1;
264 }
265
266 let value = &input[..bytes];
267 input = &input[bytes..];
268
269 Token::ComponentPart {
270 kind: if is_whitespace {
271 ComponentKind::Whitespace
272 } else {
273 ComponentKind::NotWhitespace
274 },
275 value: value.spanned(start_location.to(end_location)),
276 }
277 }
278 }))
279 });
280
281 Lexed {
282 iter: iter.peekable(),
283 }
284}