time/format_description/parse/
lexer_ast.rs1use alloc::borrow::ToOwned as _;
4use alloc::string::String;
5use alloc::vec::Vec;
6
7use super::format_item::Item;
8use super::{Error, Location, Span, Spanned, SpannedValue, Unused, unused};
9use crate::error::InvalidFormatDescription;
10use crate::internal_macros::{const_try_opt, try_likely_ok};
11
12#[must_use]
13enum Context {
14 Component,
15 Literal,
16}
17
18impl Context {
19 #[inline]
20 const fn is_component(&self) -> bool {
21 matches!(self, Self::Component)
22 }
23
24 #[inline]
25 const fn is_literal(&self) -> bool {
26 matches!(self, Self::Literal)
27 }
28}
29
30enum NextModifier<'a> {
31 Modifier(Modifier<'a>),
32 TrailingWhitespace(Spanned<&'a str>),
33 None,
34}
35
36pub(super) struct Lexer<'input, const VERSION: u8> {
38 input: &'input [u8],
39 depth: u8,
40 byte_pos: u32,
41}
42
43impl<'input, const VERSION: u8> Lexer<'input, VERSION> {
44 #[inline]
56 pub(super) const fn new(input: &'input str) -> Self {
57 Self {
58 input: input.as_bytes(),
59 depth: 0,
60 byte_pos: 0,
61 }
62 }
63
64 #[inline]
66 fn advance(&mut self, bytes: u32) {
67 self.input = &self.input[bytes as usize..];
68 self.byte_pos += bytes;
69 }
70
71 #[inline]
73 const fn context(&self) -> Context {
74 if self.depth.is_multiple_of(2) {
75 Context::Literal
76 } else {
77 Context::Component
78 }
79 }
80
81 #[inline]
83 fn consume_whitespace(&mut self) -> Option<Spanned<&'input str>> {
84 debug_assert!(self.context().is_component());
85
86 let bytes = self
87 .input
88 .iter()
89 .take_while(|byte| byte.is_ascii_whitespace())
90 .count() as u32;
91
92 if bytes == 0 {
93 return None;
94 }
95
96 let start_loc = Location {
97 byte: self.byte_pos,
98 };
99 let end_loc = Location {
100 byte: self.byte_pos + bytes,
101 };
102
103 let value = unsafe { str::from_utf8_unchecked(&self.input[..bytes as usize]) };
106 self.advance(bytes);
107
108 Some(value.spanned(start_loc.to(end_loc)))
109 }
110
111 #[inline]
113 fn consume_component_part(&mut self) -> Option<Spanned<&'input str>> {
114 debug_assert!(self.context().is_component());
115
116 let bytes = self
117 .input
118 .iter()
119 .take_while(|byte| !byte.is_ascii_whitespace() && !matches!(byte, b'\\' | b'[' | b']'))
120 .count() as u32;
121
122 if bytes == 0 {
123 return None;
124 }
125
126 let start_loc = Location {
127 byte: self.byte_pos,
128 };
129 let end_loc = Location {
130 byte: self.byte_pos + bytes,
131 };
132
133 let value = unsafe { str::from_utf8_unchecked(&self.input[..bytes as usize]) };
136 self.advance(bytes);
137
138 Some(value.spanned(start_loc.to(end_loc)))
139 }
140
141 #[inline]
143 fn consume_closing_bracket(&mut self) -> Option<Location> {
144 if self.input.first() != Some(&b']') {
145 return None;
146 }
147
148 self.depth -= 1;
149
150 let location = Location {
151 byte: self.byte_pos,
152 };
153 self.advance(1);
154 Some(location)
155 }
156
157 #[inline]
160 fn consume_component_name(
161 &mut self,
162 opening_bracket: Location,
163 ) -> Result<(Option<Spanned<&'input str>>, Spanned<&'input str>), Error> {
164 let leading_whitespace = self.consume_whitespace();
165
166 let Some(name) = self.consume_component_part() else {
167 let span = match leading_whitespace {
168 Some(Spanned { value: _, span }) => span,
169 None => opening_bracket.to_self(),
170 };
171 return Err(Error {
172 _inner: unused(span.error("expected component name")),
173 public: InvalidFormatDescription::MissingComponentName {
174 index: span.start.byte as usize,
175 },
176 });
177 };
178
179 Ok((leading_whitespace, name))
180 }
181
182 #[inline]
183 fn consume_modifier(&mut self) -> Result<NextModifier<'input>, Error> {
184 let Some(whitespace) = self.consume_whitespace() else {
185 return Ok(NextModifier::None);
186 };
187
188 let Some(token) = self.consume_component_part() else {
189 return Ok(NextModifier::TrailingWhitespace(whitespace));
190 };
191
192 let modifier =
193 try_likely_ok!(self.modifier_from_leading_whitespace_and_token(whitespace, token));
194 Ok(NextModifier::Modifier(modifier))
195 }
196
197 fn consume_component(
199 &mut self,
200 opening_bracket: Location,
201 ) -> Result<Item<'input, VERSION>, Error> {
202 match self.depth.checked_add(1) {
203 Some(depth) => self.depth = depth,
204 None => {
205 return Err(Error {
206 _inner: unused(opening_bracket.error("too much nesting")),
207 public: InvalidFormatDescription::NotSupported {
208 what: "highly-nested format description",
209 context: "",
210 index: opening_bracket.byte as usize,
211 },
212 });
213 }
214 };
215 self.advance(1);
217
218 let (_leading_whitespace, name) =
219 try_likely_ok!(self.consume_component_name(opening_bracket));
220 let modifiers = try_likely_ok!(Modifiers::parse(self));
221
222 let mut nested_format_descriptions = Vec::new();
223 while self.is_nested_description_start() {
224 if let Ok(description) = self.consume_nested(modifiers.span().end) {
225 nested_format_descriptions.push(description);
226 } else {
227 break;
228 }
229 }
230
231 if modifiers.trailing_whitespace.is_some()
232 && let Some(first_nested) = nested_format_descriptions.first_mut()
233 {
234 first_nested.leading_whitespace = modifiers.trailing_whitespace;
235 }
236
237 let _nested_fds_trailing_whitespace =
238 if modifiers.trailing_whitespace.is_some() && nested_format_descriptions.is_empty() {
239 modifiers.trailing_whitespace
240 } else {
241 self.consume_whitespace()
242 };
243
244 let Some(closing_bracket) = self.consume_closing_bracket() else {
245 return Err(Error {
246 _inner: unused(opening_bracket.error("unclosed bracket")),
247 public: InvalidFormatDescription::UnclosedOpeningBracket {
248 index: opening_bracket.byte as usize,
249 },
250 });
251 };
252
253 if let Some(first_nested_fd) = nested_format_descriptions.first()
254 && first_nested_fd.leading_whitespace.is_none()
255 {
256 return Err(Error {
257 _inner: unused(
258 opening_bracket
259 .to(closing_bracket)
260 .error("missing leading whitespace before nested format description"),
261 ),
262 public: InvalidFormatDescription::Expected {
263 what: "whitespace before nested format description",
264 index: first_nested_fd.opening_bracket.byte as usize,
265 },
266 });
267 }
268
269 if super::format_item::ident_eq::<VERSION>(*name, "optional") {
270 return Item::optional_from_parts(
271 opening_bracket,
272 &modifiers.modifiers,
273 nested_format_descriptions,
274 closing_bracket,
275 );
276 }
277
278 if super::format_item::ident_eq::<VERSION>(*name, "first") {
279 if !modifiers.modifiers.is_empty() {
280 let modifier = &modifiers.modifiers[0];
281 return Err(Error {
282 _inner: unused(modifier.key.span.error("invalid modifier key")),
283 public: InvalidFormatDescription::InvalidModifier {
284 value: (**modifier.key).to_owned(),
285 index: modifier.key.span.start.byte as usize,
286 },
287 });
288 }
289
290 if version!(3..) && nested_format_descriptions.is_empty() {
291 return Err(Error {
292 _inner: unused(opening_bracket.to(closing_bracket).error(
293 "the `first` component requires at least one nested format description",
294 )),
295 public: InvalidFormatDescription::Expected {
296 what: "at least one nested format description",
297 index: closing_bracket.byte as usize,
298 },
299 });
300 }
301
302 let items = nested_format_descriptions
303 .into_iter()
304 .map(|nested_format_description| nested_format_description.items)
305 .collect();
306
307 return Ok(Item::First {
308 value: items,
309 span: opening_bracket.to(closing_bracket),
310 });
311 }
312
313 if !nested_format_descriptions.is_empty() {
314 return Err(Error {
315 _inner: unused(
316 opening_bracket
317 .to(closing_bracket)
318 .error("this component does not support nested format descriptions"),
319 ),
320 public: InvalidFormatDescription::NotSupported {
321 what: "nested format descriptions",
322 context: "on this component",
323 index: opening_bracket.byte as usize,
324 },
325 });
326 }
327
328 let component = try_likely_ok!(super::format_item::component_from_ast::<VERSION>(
329 &name,
330 &modifiers.modifiers
331 ));
332
333 Ok(Item::Component(component))
334 }
335
336 #[inline]
338 fn consume_nested(
339 &mut self,
340 last_location: Location,
341 ) -> Result<NestedFormatDescription<'input, VERSION>, Error> {
342 let leading_whitespace = self.consume_whitespace();
343
344 let opening_bracket = {
345 match self.depth.checked_add(1) {
346 Some(depth) => self.depth = depth,
347 None => {
348 return Err(Error {
349 _inner: unused(last_location.error("too much nesting")),
350 public: InvalidFormatDescription::NotSupported {
351 what: "highly-nested format description",
352 context: "",
353 index: last_location.byte as usize,
354 },
355 });
356 }
357 }
358 let location = Location {
359 byte: self.byte_pos,
360 };
361 self.advance(1);
362 location
363 };
364
365 let mut items = Vec::new();
366 loop {
367 if self.context().is_literal() && self.input.first() == Some(&b']') {
370 break;
371 }
372
373 let Some(token) = self.next() else {
374 break;
375 };
376 items.push(try_likely_ok!(token));
377 }
378
379 let Some(closing_bracket) = self.consume_closing_bracket() else {
380 return Err(Error {
381 _inner: unused(opening_bracket.error("unclosed bracket")),
382 public: InvalidFormatDescription::UnclosedOpeningBracket {
383 index: opening_bracket.byte as usize,
384 },
385 });
386 };
387
388 Ok(NestedFormatDescription {
389 leading_whitespace,
390 opening_bracket,
391 items,
392 closing_bracket,
393 })
394 }
395
396 fn modifier_from_leading_whitespace_and_token(
397 &self,
398 leading_whitespace: Spanned<&'input str>,
399 token: Spanned<&'input str>,
400 ) -> Result<Modifier<'input>, Error> {
401 let Some(colon_index) = token.bytes().position(|b| b == b':') else {
402 return Err(Error {
403 _inner: unused(token.span.error("modifier must be of the form `key:value`")),
404 public: InvalidFormatDescription::InvalidModifier {
405 value: (*token).to_owned(),
406 index: token.span.start.byte as usize,
407 },
408 });
409 };
410 let key = &token[..colon_index];
411 let value = &token[colon_index + 1..];
412
413 if key.is_empty() {
414 return Err(Error {
415 _inner: unused(token.span.shrink_to_start().error("expected modifier key")),
416 public: InvalidFormatDescription::InvalidModifier {
417 value: String::new(),
418 index: token.span.start.byte as usize,
419 },
420 });
421 }
422 if value.is_empty() {
423 return Err(Error {
424 _inner: unused(token.span.shrink_to_end().error("expected modifier value")),
425 public: InvalidFormatDescription::InvalidModifier {
426 value: String::new(),
427 index: token.span.start.byte as usize + colon_index,
428 },
429 });
430 }
431
432 Ok(Modifier {
433 _leading_whitespace: unused(leading_whitespace),
434 key: key.spanned(
435 token
436 .span
437 .start
438 .to(token.span.start.offset(colon_index as u32)),
439 ),
440 _colon: unused(token.span.start.offset(colon_index as u32)),
441 value: value.spanned(
442 token
443 .span
444 .start
445 .offset(colon_index as u32 + 1)
446 .to(token.span.end),
447 ),
448 })
449 }
450
451 #[inline]
458 fn is_nested_description_start(&self) -> bool {
459 debug_assert!(self.context().is_component());
460
461 let Some(index) = self
462 .input
463 .iter()
464 .position(|&byte| !byte.is_ascii_whitespace())
465 else {
466 return false;
467 };
468
469 self.input[index] == b'['
470 && (version!(2..)
471 || self.context().is_component()
472 || self.input.get(index + 1) != Some(&b'['))
473 }
474
475 #[inline]
476 fn consume_literal(&mut self) -> &'input str {
477 let bytes = self
478 .input
479 .iter()
480 .take_while(|&&byte| byte != b'[' && byte != b']' && (version!(1) || byte != b'\\'))
481 .count() as u32;
482
483 let value = unsafe { str::from_utf8_unchecked(&self.input[..bytes as usize]) };
486 self.advance(bytes);
487
488 value
489 }
490
491 #[inline]
492 fn consume_backslash_escape_sequence(
493 &mut self,
494 location: Location,
495 ) -> Result<&'input str, Error> {
496 let backslash_loc = location;
497
498 Ok(match self.input.get(1) {
499 Some(b'\\' | b'[' | b']') => {
500 let char = unsafe { str::from_utf8_unchecked(&self.input[1..2]) };
504 self.advance(2);
505 if self.context().is_literal() {
506 char
507 } else {
508 return Err(Error {
510 _inner: unused(
511 backslash_loc.error("escape sequences are not allowed in components"),
512 ),
513 public: InvalidFormatDescription::NotSupported {
514 what: "escape sequence",
515 context: "components",
516 index: backslash_loc.byte as usize,
517 },
518 });
519 }
520 }
521 Some(_) => {
522 let loc = Location {
523 byte: self.byte_pos + 1,
524 };
525 return Err(Error {
526 _inner: unused(loc.error("invalid escape sequence")),
527 public: InvalidFormatDescription::Expected {
528 what: "valid escape sequence",
529 index: loc.byte as usize,
530 },
531 });
532 }
533 None => {
534 return Err(Error {
535 _inner: unused(backslash_loc.error("unexpected end of input")),
536 public: InvalidFormatDescription::Expected {
537 what: "valid escape sequence",
538 index: backslash_loc.byte as usize,
539 },
540 });
541 }
542 })
543 }
544}
545
546impl<'input, const VERSION: u8> Iterator for Lexer<'input, VERSION> {
547 type Item = Result<Item<'input, VERSION>, Error>;
548
549 #[inline]
550 fn next(&mut self) -> Option<Self::Item> {
551 let byte = *const_try_opt!(self.input.first());
552
553 let location = Location {
554 byte: self.byte_pos,
555 };
556
557 match byte {
558 b'[' if version!(1) && self.input.get(1) == Some(&b'[') => {
559 self.advance(2);
560 Some(Ok(Item::Literal("[")))
561 }
562 b'[' => Some(self.consume_component(location)),
563 b']' if version!(3..) => Some(Err(Error {
564 _inner: unused(location.error("right brackets must be escaped")),
565 public: InvalidFormatDescription::Expected {
566 what: "right bracket to be escaped",
567 index: location.byte as usize,
568 },
569 })),
570 b']' => {
571 self.advance(1);
572 Some(Ok(Item::Literal("]")))
573 }
574 b'\\' if version!(2..) => Some(
575 self.consume_backslash_escape_sequence(location)
576 .map(Item::Literal),
577 ),
578 _ => Some(Ok(Item::Literal(self.consume_literal()))),
579 }
580 }
581}
582
583pub(super) struct NestedFormatDescription<'a, const VERSION: u8> {
585 pub(super) leading_whitespace: Option<Spanned<&'a str>>,
587 pub(super) opening_bracket: Location,
589 pub(super) items: Vec<Item<'a, VERSION>>,
591 pub(super) closing_bracket: Location,
593}
594
595pub(super) struct Modifier<'a> {
597 pub(super) _leading_whitespace: Unused<Spanned<&'a str>>,
599 pub(super) key: Spanned<&'a str>,
601 pub(super) _colon: Unused<Location>,
603 pub(super) value: Spanned<&'a str>,
605}
606
607impl Modifier<'_> {
608 #[inline]
609 pub(super) const fn key_value_span(&self) -> Span {
610 self.key.span.start.to(self.value.span.end)
611 }
612}
613
614pub(super) struct Modifiers<'a> {
615 pub(super) modifiers: Vec<Modifier<'a>>,
616 pub(super) trailing_whitespace: Option<Spanned<&'a str>>,
617}
618
619impl<'a> Modifiers<'a> {
620 #[inline]
623 pub(super) fn parse<const VERSION: u8>(tokens: &mut Lexer<'a, VERSION>) -> Result<Self, Error> {
624 let mut modifiers = Vec::new();
625 loop {
626 match try_likely_ok!(tokens.consume_modifier()) {
627 NextModifier::Modifier(modifier) => modifiers.push(modifier),
628 NextModifier::TrailingWhitespace(whitespace) => {
629 return Ok(Self {
630 modifiers,
631 trailing_whitespace: Some(whitespace),
632 });
633 }
634 NextModifier::None => {
635 return Ok(Self {
636 modifiers,
637 trailing_whitespace: None,
638 });
639 }
640 }
641 }
642 }
643
644 #[inline]
645 pub(super) fn span(&self) -> Span {
646 match &*self.modifiers {
647 [] => Span::DUMMY,
648 [modifier] => modifier.key.span.start.to(modifier.value.span.end),
649 [first, .., last] => first.key.span.start.to(last.value.span.end),
650 }
651 }
652}