time/format_description/parse/
lexer_ast.rs1use alloc::borrow::ToOwned as _;
4use alloc::string::String;
5use alloc::vec::Vec;
6
7use super::format_item::{Item, ident_eq};
8use super::{
9 Error, Location, Span, Spanned, SpannedValue, WithLocation, WithLocationValue as _, unused,
10};
11use crate::error::InvalidFormatDescription;
12use crate::hint;
13use crate::internal_macros::{const_try_opt, try_likely_ok};
14
15#[must_use]
16enum Context {
17 Component,
18 Literal,
19}
20
21impl Context {
22 #[inline]
23 const fn is_component(&self) -> bool {
24 matches!(self, Self::Component)
25 }
26
27 #[inline]
28 const fn is_literal(&self) -> bool {
29 matches!(self, Self::Literal)
30 }
31}
32
33enum NextModifier<'a> {
34 Modifier(Modifier<'a>),
35 TrailingWhitespace(Spanned<&'a str>),
36 None,
37}
38
39pub(super) struct Lexer<'input, const VERSION: u8> {
41 input: &'input [u8],
42 depth: u8,
43 byte_pos: u32,
44}
45
46impl<'input, const VERSION: u8> Lexer<'input, VERSION> {
47 #[inline]
59 pub(super) const fn new(input: &'input str) -> Self {
60 Self {
61 input: input.as_bytes(),
62 depth: 0,
63 byte_pos: 0,
64 }
65 }
66
67 #[inline]
69 fn advance(&mut self, bytes: u32) {
70 self.input = &self.input[bytes as usize..];
71 self.byte_pos += bytes;
72 }
73
74 #[inline]
76 const fn context(&self) -> Context {
77 if self.depth.is_multiple_of(2) {
78 Context::Literal
79 } else {
80 Context::Component
81 }
82 }
83
84 #[inline]
86 fn consume_whitespace(&mut self) -> Option<Spanned<&'input str>> {
87 debug_assert!(self.context().is_component());
88
89 let bytes = self
90 .input
91 .iter()
92 .take_while(|byte| byte.is_ascii_whitespace())
93 .count() as u32;
94
95 if bytes == 0 {
96 return None;
97 }
98
99 let start_loc = Location {
100 byte: self.byte_pos,
101 };
102 let end_loc = Location {
103 byte: self.byte_pos + bytes,
104 };
105
106 let value = unsafe { str::from_utf8_unchecked(&self.input[..bytes as usize]) };
109 self.advance(bytes);
110
111 Some(value.spanned(start_loc.to(end_loc)))
112 }
113
114 #[inline]
116 fn consume_component_part(&mut self) -> Option<Spanned<&'input str>> {
117 debug_assert!(self.context().is_component());
118
119 let bytes = self
120 .input
121 .iter()
122 .take_while(|byte| !byte.is_ascii_whitespace() && !matches!(byte, b'\\' | b'[' | b']'))
123 .count() as u32;
124
125 if bytes == 0 {
126 hint::cold_path();
127 return None;
128 }
129
130 let start_loc = Location {
131 byte: self.byte_pos,
132 };
133 let end_loc = Location {
134 byte: self.byte_pos + bytes,
135 };
136
137 let value = unsafe { str::from_utf8_unchecked(&self.input[..bytes as usize]) };
140 self.advance(bytes);
141
142 Some(value.spanned(start_loc.to(end_loc)))
143 }
144
145 #[inline]
147 fn consume_closing_bracket(&mut self) -> Option<Location> {
148 if self.input.first() != Some(&b']') {
149 hint::cold_path();
150 return None;
151 }
152
153 self.depth -= 1;
154
155 let location = Location {
156 byte: self.byte_pos,
157 };
158 self.advance(1);
159 Some(location)
160 }
161
162 #[inline]
165 fn consume_component_name(
166 &mut self,
167 opening_bracket: Location,
168 ) -> Result<Spanned<&'input str>, Error> {
169 let leading_whitespace = self.consume_whitespace().is_some();
170
171 let Some(name) = self.consume_component_part() else {
172 hint::cold_path();
173 let location = if leading_whitespace {
174 opening_bracket.offset(1)
175 } else {
176 opening_bracket
177 };
178 return Err(Error {
179 _inner: unused(location.error("expected component name")),
180 public: InvalidFormatDescription::MissingComponentName {
181 index: location.byte as usize,
182 },
183 });
184 };
185
186 Ok(name)
187 }
188
189 #[inline]
190 fn consume_modifier(&mut self) -> Result<NextModifier<'input>, Error> {
191 let Some(whitespace) = self.consume_whitespace() else {
192 hint::cold_path();
193 return Ok(NextModifier::None);
194 };
195
196 let Some(token) = self.consume_component_part() else {
197 hint::cold_path();
198 return Ok(NextModifier::TrailingWhitespace(whitespace));
199 };
200
201 let modifier = try_likely_ok!(self.modifier_from_token(token));
202 Ok(NextModifier::Modifier(modifier))
203 }
204
205 fn consume_component(
207 &mut self,
208 opening_bracket: Location,
209 ) -> Result<Item<'input, VERSION>, Error> {
210 match self.depth.checked_add(1) {
211 Some(depth) => self.depth = depth,
212 None => {
213 hint::cold_path();
214 return Err(Error {
215 _inner: unused(opening_bracket.error("too much nesting")),
216 public: InvalidFormatDescription::NotSupported {
217 what: "highly-nested format description",
218 context: "",
219 index: opening_bracket.byte as usize,
220 },
221 });
222 }
223 };
224 self.advance(1);
226
227 let name = try_likely_ok!(self.consume_component_name(opening_bracket));
228 let modifiers = try_likely_ok!(Modifiers::parse(self));
229
230 let mut nested_format_descriptions = Vec::new();
231 while self.is_nested_description_start()
232 && let Ok(description) = self.consume_nested(modifiers.end_location())
233 {
234 nested_format_descriptions.push(description);
235 }
236
237 if modifiers.trailing_whitespace.is_some()
238 && let Some(first_nested) = nested_format_descriptions.first_mut()
239 {
240 first_nested.leading_whitespace = modifiers.trailing_whitespace;
241 }
242
243 if modifiers.trailing_whitespace.is_none() || !nested_format_descriptions.is_empty() {
244 self.consume_whitespace();
245 }
246
247 let Some(closing_bracket) = self.consume_closing_bracket() else {
248 hint::cold_path();
249 return Err(Error {
250 _inner: unused(opening_bracket.error("unclosed bracket")),
251 public: InvalidFormatDescription::UnclosedOpeningBracket {
252 index: opening_bracket.byte as usize,
253 },
254 });
255 };
256
257 if let Some(first_nested_fd) = nested_format_descriptions.first()
258 && first_nested_fd.leading_whitespace.is_none()
259 {
260 hint::cold_path();
261 return Err(Error {
262 _inner: unused(
263 opening_bracket
264 .to(closing_bracket)
265 .error("missing leading whitespace before nested format description"),
266 ),
267 public: InvalidFormatDescription::Expected {
268 what: "whitespace before nested format description",
269 index: first_nested_fd.opening_bracket.byte as usize,
270 },
271 });
272 }
273
274 if ident_eq::<VERSION>(*name, "optional") {
275 hint::cold_path();
276 return Item::optional_from_parts(
277 opening_bracket,
278 &modifiers.modifiers,
279 nested_format_descriptions,
280 closing_bracket,
281 );
282 }
283
284 if ident_eq::<VERSION>(*name, "first") {
285 hint::cold_path();
286 if !modifiers.modifiers.is_empty() {
287 hint::cold_path();
288 let modifier = &modifiers.modifiers[0];
289 return Err(Error {
290 _inner: unused(modifier.key_span().error("invalid modifier key")),
291 public: InvalidFormatDescription::InvalidModifier {
292 value: (*modifier.key).to_owned(),
293 index: modifier.key.location.byte as usize,
294 },
295 });
296 }
297
298 if version!(3..) && nested_format_descriptions.is_empty() {
299 hint::cold_path();
300 return Err(Error {
301 _inner: unused(opening_bracket.to(closing_bracket).error(
302 "the `first` component requires at least one nested format description",
303 )),
304 public: InvalidFormatDescription::Expected {
305 what: "at least one nested format description",
306 index: closing_bracket.byte as usize,
307 },
308 });
309 }
310
311 let items = nested_format_descriptions
312 .into_iter()
313 .map(|nested_format_description| nested_format_description.items)
314 .collect();
315
316 return Ok(Item::First {
317 value: items,
318 span: opening_bracket.to(closing_bracket),
319 });
320 }
321
322 if !nested_format_descriptions.is_empty() {
323 hint::cold_path();
324 return Err(Error {
325 _inner: unused(
326 opening_bracket
327 .to(closing_bracket)
328 .error("this component does not support nested format descriptions"),
329 ),
330 public: InvalidFormatDescription::NotSupported {
331 what: "nested format descriptions",
332 context: "on this component",
333 index: opening_bracket.byte as usize,
334 },
335 });
336 }
337
338 let component = try_likely_ok!(super::format_item::component_from_ast::<VERSION>(
339 &name,
340 &modifiers.modifiers
341 ));
342
343 Ok(Item::Component(component))
344 }
345
346 #[inline]
348 fn consume_nested(
349 &mut self,
350 last_location: Location,
351 ) -> Result<NestedFormatDescription<'input, VERSION>, Error> {
352 let leading_whitespace = self.consume_whitespace();
353
354 let opening_bracket = {
355 match self.depth.checked_add(1) {
356 Some(depth) => self.depth = depth,
357 None => {
358 hint::cold_path();
359 return Err(Error {
360 _inner: unused(last_location.error("too much nesting")),
361 public: InvalidFormatDescription::NotSupported {
362 what: "highly-nested format description",
363 context: "",
364 index: last_location.byte as usize,
365 },
366 });
367 }
368 }
369 let location = Location {
370 byte: self.byte_pos,
371 };
372 self.advance(1);
373 location
374 };
375
376 let mut items = Vec::new();
377 loop {
378 if self.context().is_literal() && self.input.first() == Some(&b']') {
381 break;
382 }
383
384 let Some(token) = self.next() else {
385 break;
386 };
387 items.push(try_likely_ok!(token));
388 }
389
390 let Some(closing_bracket) = self.consume_closing_bracket() else {
391 hint::cold_path();
392 return Err(Error {
393 _inner: unused(opening_bracket.error("unclosed bracket")),
394 public: InvalidFormatDescription::UnclosedOpeningBracket {
395 index: opening_bracket.byte as usize,
396 },
397 });
398 };
399
400 Ok(NestedFormatDescription {
401 leading_whitespace,
402 opening_bracket,
403 items,
404 closing_bracket,
405 })
406 }
407
408 fn modifier_from_token(&self, token: Spanned<&'input str>) -> Result<Modifier<'input>, Error> {
409 let Some(colon_index) = token.bytes().position(|b| b == b':') else {
410 hint::cold_path();
411 return Err(Error {
412 _inner: unused(token.span.error("modifier must be of the form `key:value`")),
413 public: InvalidFormatDescription::InvalidModifier {
414 value: (*token).to_owned(),
415 index: token.span.start.byte as usize,
416 },
417 });
418 };
419 let key = &token[..colon_index];
420 let value = &token[colon_index + 1..];
421
422 if key.is_empty() {
423 hint::cold_path();
424 return Err(Error {
425 _inner: unused(token.span.shrink_to_start().error("expected modifier key")),
426 public: InvalidFormatDescription::InvalidModifier {
427 value: String::new(),
428 index: token.span.start.byte as usize,
429 },
430 });
431 }
432 if value.is_empty() {
433 hint::cold_path();
434 return Err(Error {
435 _inner: unused(token.span.shrink_to_end().error("expected modifier value")),
436 public: InvalidFormatDescription::InvalidModifier {
437 value: String::new(),
438 index: token.span.start.byte as usize + colon_index,
439 },
440 });
441 }
442
443 Ok(Modifier {
444 key: key.with_location(token.span.start),
445 value,
446 })
447 }
448
449 #[inline]
456 fn is_nested_description_start(&self) -> bool {
457 debug_assert!(self.context().is_component());
458
459 let Some(index) = self
460 .input
461 .iter()
462 .position(|&byte| !byte.is_ascii_whitespace())
463 else {
464 return false;
465 };
466
467 self.input[index] == b'['
468 && (version!(2..)
469 || self.context().is_component()
470 || self.input.get(index + 1) != Some(&b'['))
471 }
472
473 #[inline]
474 fn consume_literal(&mut self) -> &'input str {
475 let bytes = self
476 .input
477 .iter()
478 .take_while(|&&byte| byte != b'[' && byte != b']' && (version!(1) || byte != b'\\'))
479 .count() as u32;
480
481 let value = unsafe { str::from_utf8_unchecked(&self.input[..bytes as usize]) };
484 self.advance(bytes);
485
486 value
487 }
488
489 #[inline]
490 fn consume_backslash_escape_sequence(
491 &mut self,
492 location: Location,
493 ) -> Result<&'input str, Error> {
494 let backslash_loc = location;
495
496 Ok(match self.input.get(1) {
497 Some(b'\\' | b'[' | b']') => {
498 let char = unsafe { str::from_utf8_unchecked(&self.input[1..2]) };
502 self.advance(2);
503 char
504 }
505 Some(_) => {
506 hint::cold_path();
507 let loc = Location {
508 byte: self.byte_pos + 1,
509 };
510 return Err(Error {
511 _inner: unused(loc.error("invalid escape sequence")),
512 public: InvalidFormatDescription::Expected {
513 what: "valid escape sequence",
514 index: loc.byte as usize,
515 },
516 });
517 }
518 None => {
519 hint::cold_path();
520 return Err(Error {
521 _inner: unused(backslash_loc.error("unexpected end of input")),
522 public: InvalidFormatDescription::Expected {
523 what: "valid escape sequence",
524 index: backslash_loc.byte as usize,
525 },
526 });
527 }
528 })
529 }
530}
531
532impl<'input, const VERSION: u8> Iterator for Lexer<'input, VERSION> {
533 type Item = Result<Item<'input, VERSION>, Error>;
534
535 #[inline]
536 fn next(&mut self) -> Option<Self::Item> {
537 let byte = *const_try_opt!(self.input.first());
538
539 let location = Location {
540 byte: self.byte_pos,
541 };
542
543 match byte {
544 b'[' if version!(1) && self.input.get(1) == Some(&b'[') => {
545 self.advance(2);
546 Some(Ok(Item::Literal("[")))
547 }
548 b'[' => Some(self.consume_component(location)),
549 b']' if version!(3..) => {
550 hint::cold_path();
551 Some(Err(Error {
552 _inner: unused(location.error("right brackets must be escaped")),
553 public: InvalidFormatDescription::Expected {
554 what: "right bracket to be escaped",
555 index: location.byte as usize,
556 },
557 }))
558 }
559 b']' => {
560 self.advance(1);
561 Some(Ok(Item::Literal("]")))
562 }
563 b'\\' if version!(2..) => Some(
564 self.consume_backslash_escape_sequence(location)
565 .map(Item::Literal),
566 ),
567 _ => Some(Ok(Item::Literal(self.consume_literal()))),
568 }
569 }
570}
571
572pub(super) struct NestedFormatDescription<'a, const VERSION: u8> {
574 pub(super) leading_whitespace: Option<Spanned<&'a str>>,
576 pub(super) opening_bracket: Location,
578 pub(super) items: Vec<Item<'a, VERSION>>,
580 pub(super) closing_bracket: Location,
582}
583
584pub(super) struct Modifier<'a> {
586 pub(super) key: WithLocation<&'a str>,
588 pub(super) value: &'a str,
590}
591
592impl Modifier<'_> {
593 #[inline]
594 pub(super) fn key_value_span(&self) -> Span {
595 self.key
596 .location
597 .with_length(self.key.len() + self.value.len() + 1)
598 }
599
600 #[inline]
601 pub(super) fn key_span(&self) -> Span {
602 self.key.location.with_length(self.key.len())
603 }
604
605 #[inline]
606 pub(super) fn value_span(&self) -> Span {
607 self.key
608 .location
609 .offset(self.key.len() as u32 + 1)
610 .with_length(self.value.len())
611 }
612}
613
614pub(super) struct Modifiers<'a> {
615 pub(super) modifiers: Vec<Modifier<'a>>,
616 pub(super) trailing_whitespace: Option<Spanned<&'a str>>,
617}
618
619impl<'a> Modifiers<'a> {
620 #[inline]
623 pub(super) fn parse<const VERSION: u8>(tokens: &mut Lexer<'a, VERSION>) -> Result<Self, Error> {
624 let mut modifiers = Vec::new();
625 loop {
626 match try_likely_ok!(tokens.consume_modifier()) {
627 NextModifier::Modifier(modifier) => modifiers.push(modifier),
628 NextModifier::TrailingWhitespace(whitespace) => {
629 return Ok(Self {
630 modifiers,
631 trailing_whitespace: Some(whitespace),
632 });
633 }
634 NextModifier::None => {
635 return Ok(Self {
636 modifiers,
637 trailing_whitespace: None,
638 });
639 }
640 }
641 }
642 }
643
644 #[inline]
645 pub(super) fn end_location(&self) -> Location {
646 match &*self.modifiers {
647 [] => Location::DUMMY,
648 [.., modifier] => modifier.value_span().end,
649 }
650 }
651}