Skip to main content

time/format_description/parse/
ast.rs

1//! AST for parsing format descriptions.
2
3use alloc::boxed::Box;
4use alloc::string::String;
5use alloc::vec::Vec;
6use core::iter;
7
8use super::{Error, Location, Span, Spanned, SpannedValue, Unused, lexer, unused};
9use crate::error;
10use crate::format_description::FormatDescriptionVersion;
11use crate::internal_macros::bug;
12
13/// One part of a complete format description.
14pub(super) enum Item<'a> {
15    /// A literal string, formatted and parsed as-is.
16    ///
17    /// This should never be present inside a nested format description.
18    Literal(Spanned<&'a [u8]>),
19    /// A sequence of brackets. The first acts as the escape character.
20    ///
21    /// This should never be present if the lexer has `BACKSLASH_ESCAPE` set to `true`.
22    EscapedBracket {
23        /// The first bracket.
24        _first: Unused<Location>,
25        /// The second bracket.
26        _second: Unused<Location>,
27    },
28    /// Part of a type, along with its modifiers.
29    Component {
30        version: FormatDescriptionVersion,
31        /// Where the opening bracket was in the format string.
32        _opening_bracket: Unused<Location>,
33        /// Whitespace between the opening bracket and name.
34        _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
35        /// The name of the component.
36        name: Spanned<&'a [u8]>,
37        /// The modifiers for the component.
38        modifiers: Box<[Modifier<'a>]>,
39        /// Whitespace between the modifiers and closing bracket.
40        _trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
41        /// Where the closing bracket was in the format string.
42        _closing_bracket: Unused<Location>,
43    },
44    /// An optional sequence of items.
45    Optional {
46        /// Where the opening bracket was in the format string.
47        opening_bracket: Location,
48        /// Whitespace between the opening bracket and "optional".
49        _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
50        /// The "optional" keyword.
51        _optional_kw: Unused<Spanned<&'a [u8]>>,
52        /// The modifiers for the optional description.
53        modifiers: Box<[Modifier<'a>]>,
54        /// Whitespace between either the "optional" keyword or modifiers and the opening bracket
55        /// of the nested description.
56        _whitespace_after_modifiers: Unused<Option<Spanned<&'a [u8]>>>,
57        /// The items within the optional sequence.
58        nested_format_description: NestedFormatDescription<'a>,
59        /// Where the closing bracket was in the format string.
60        closing_bracket: Location,
61    },
62    /// The first matching parse of a sequence of items.
63    First {
64        /// Where the opening bracket was in the format string.
65        opening_bracket: Location,
66        /// Whitespace between the opening bracket and "first".
67        _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
68        /// The "first" keyword.
69        _first_kw: Unused<Spanned<&'a [u8]>>,
70        /// The modifiers for the optional description.
71        modifiers: Box<[Modifier<'a>]>,
72        /// Whitespace between either the "first" keyword or modifiers and the opening bracket of
73        /// the nested description.
74        _whitespace_after_modifiers: Unused<Option<Spanned<&'a [u8]>>>,
75        /// The sequences of items to try.
76        nested_format_descriptions: Box<[NestedFormatDescription<'a>]>,
77        /// Where the closing bracket was in the format string.
78        closing_bracket: Location,
79    },
80}
81
82/// A format description that is nested within another format description.
83pub(super) struct NestedFormatDescription<'a> {
84    /// Where the opening bracket was in the format string.
85    pub(super) _opening_bracket: Unused<Location>,
86    /// The items within the nested format description.
87    pub(super) items: Box<[Item<'a>]>,
88    /// Where the closing bracket was in the format string.
89    pub(super) _closing_bracket: Unused<Location>,
90    /// Whitespace between the closing bracket and the next item.
91    pub(super) _trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
92}
93
94/// A modifier for a component.
95pub(super) struct Modifier<'a> {
96    /// Whitespace preceding the modifier.
97    pub(super) _leading_whitespace: Unused<Spanned<&'a [u8]>>,
98    /// The key of the modifier.
99    pub(super) key: Spanned<&'a [u8]>,
100    /// Where the colon of the modifier was in the format string.
101    pub(super) _colon: Unused<Location>,
102    /// The value of the modifier.
103    pub(super) value: Spanned<&'a [u8]>,
104}
105
106impl<'a> Modifier<'a> {
107    fn from_leading_whitespace_and_token(
108        leading_whitespace: Spanned<&'a [u8]>,
109        token: Spanned<&'a [u8]>,
110    ) -> Result<Self, Error> {
111        let Some(colon_index) = token.iter().position(|&b| b == b':') else {
112            return Err(Error {
113                _inner: unused(token.span.error("modifier must be of the form `key:value`")),
114                public: error::InvalidFormatDescription::InvalidModifier {
115                    value: String::from_utf8_lossy(*token).into_owned(),
116                    index: token.span.start.byte as usize,
117                },
118            });
119        };
120        let key = &token[..colon_index];
121        let value = &token[colon_index + 1..];
122
123        if key.is_empty() {
124            return Err(Error {
125                _inner: unused(token.span.shrink_to_start().error("expected modifier key")),
126                public: error::InvalidFormatDescription::InvalidModifier {
127                    value: String::new(),
128                    index: token.span.start.byte as usize,
129                },
130            });
131        }
132        if value.is_empty() {
133            return Err(Error {
134                _inner: unused(token.span.shrink_to_end().error("expected modifier value")),
135                public: error::InvalidFormatDescription::InvalidModifier {
136                    value: String::new(),
137                    index: token.span.start.byte as usize + colon_index,
138                },
139            });
140        }
141
142        Ok(Self {
143            _leading_whitespace: unused(leading_whitespace),
144            key: key.spanned(
145                token
146                    .span
147                    .start
148                    .to(token.span.start.offset(colon_index as u32)),
149            ),
150            _colon: unused(token.span.start.offset(colon_index as u32)),
151            value: value.spanned(
152                token
153                    .span
154                    .start
155                    .offset(colon_index as u32 + 1)
156                    .to(token.span.end),
157            ),
158        })
159    }
160
161    pub(super) const fn key_value_span(&self) -> Span {
162        self.key.span.start.to(self.value.span.end)
163    }
164}
165
166/// Parse the provided tokens into an AST.
167#[inline]
168pub(super) fn parse<'item, 'iter, I>(
169    version: FormatDescriptionVersion,
170    tokens: &'iter mut lexer::Lexed<I>,
171) -> impl Iterator<Item = Result<Item<'item>, Error>> + use<'item, 'iter, I>
172where
173    'item: 'iter,
174    I: Iterator<Item = Result<lexer::Token<'item>, Error>>,
175{
176    parse_inner(version, false, tokens)
177}
178
179/// Parse the provided tokens into an AST. The const generic indicates whether the resulting
180/// [`Item`] will be used directly or as part of a [`NestedFormatDescription`].
181#[inline]
182fn parse_inner<'item, I>(
183    version: FormatDescriptionVersion,
184    nested: bool,
185    tokens: &mut lexer::Lexed<I>,
186) -> impl Iterator<Item = Result<Item<'item>, Error>> + use<'_, 'item, I>
187where
188    I: Iterator<Item = Result<lexer::Token<'item>, Error>>,
189{
190    iter::from_fn(move || {
191        if nested && tokens.peek_closing_bracket().is_some() {
192            return None;
193        }
194
195        let next = match tokens.next()? {
196            Ok(token) => token,
197            Err(err) => return Some(Err(err)),
198        };
199
200        Some(match next {
201            lexer::Token::Literal(Spanned { value: _, span: _ }) if nested => {
202                bug!("literal should not be present in nested description")
203            }
204            lexer::Token::Literal(value) => Ok(Item::Literal(value)),
205            lexer::Token::Bracket {
206                kind: lexer::BracketKind::Opening,
207                location,
208            } => {
209                if version.is_v1()
210                    && let Some(second_location) = tokens.next_if_opening_bracket()
211                {
212                    Ok(Item::EscapedBracket {
213                        _first: unused(location),
214                        _second: unused(second_location),
215                    })
216                } else {
217                    parse_component(version, location, tokens)
218                }
219            }
220            lexer::Token::Bracket {
221                kind: lexer::BracketKind::Closing,
222                location: _,
223            } if nested => {
224                bug!("closing bracket should be caught by the `if` statement")
225            }
226            lexer::Token::Bracket {
227                kind: lexer::BracketKind::Closing,
228                location: _,
229            } => {
230                bug!("closing bracket should have been consumed by `parse_component`")
231            }
232            lexer::Token::ComponentPart {
233                kind: _, // whitespace is significant in nested components
234                value,
235            } if nested => Ok(Item::Literal(value)),
236            lexer::Token::ComponentPart { kind: _, value: _ } => {
237                bug!("component part should have been consumed by `parse_component`")
238            }
239        })
240    })
241}
242
243/// Parse a component. This assumes that the opening bracket has already been consumed.
244fn parse_component<'a, I>(
245    version: FormatDescriptionVersion,
246    opening_bracket: Location,
247    tokens: &mut lexer::Lexed<I>,
248) -> Result<Item<'a>, Error>
249where
250    I: Iterator<Item = Result<lexer::Token<'a>, Error>>,
251{
252    let leading_whitespace = tokens.next_if_whitespace();
253
254    let Some(name) = tokens.next_if_not_whitespace() else {
255        let span = match leading_whitespace {
256            Some(Spanned { value: _, span }) => span,
257            None => opening_bracket.to_self(),
258        };
259        return Err(Error {
260            _inner: unused(span.error("expected component name")),
261            public: error::InvalidFormatDescription::MissingComponentName {
262                index: span.start.byte as usize,
263            },
264        });
265    };
266
267    if *name == b"optional" {
268        let modifiers = Modifiers::parse(true, tokens)?;
269        let nested = parse_nested(version, modifiers.span().end, tokens)?;
270
271        let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
272            return Err(Error {
273                _inner: unused(opening_bracket.error("unclosed bracket")),
274                public: error::InvalidFormatDescription::UnclosedOpeningBracket {
275                    index: opening_bracket.byte as usize,
276                },
277            });
278        };
279
280        if modifiers.trailing_whitespace.is_none() {
281            if let Some(modifier) = modifiers.modifiers.last() {
282                return Err(Error {
283                    _inner: unused(
284                        modifier
285                            .value
286                            .span
287                            .shrink_to_end()
288                            .error("expected whitespace between modifiers and nested description"),
289                    ),
290                    public: error::InvalidFormatDescription::Expected {
291                        what: "whitespace between modifiers and nested description",
292                        index: modifier.value.span.end.byte as usize,
293                    },
294                });
295            } else {
296                return Err(Error {
297                    _inner: unused(
298                        name.span
299                            .shrink_to_end()
300                            .error("expected whitespace between `optional` and nested description"),
301                    ),
302                    public: error::InvalidFormatDescription::Expected {
303                        what: "whitespace between `optional` and nested description",
304                        index: name.span.end.byte as usize,
305                    },
306                });
307            }
308        }
309
310        return Ok(Item::Optional {
311            opening_bracket,
312            _leading_whitespace: unused(leading_whitespace),
313            _optional_kw: unused(name),
314            modifiers: modifiers.modifiers,
315            _whitespace_after_modifiers: unused(modifiers.trailing_whitespace),
316            nested_format_description: nested,
317            closing_bracket,
318        });
319    }
320
321    if *name == b"first" {
322        let modifiers = Modifiers::parse(true, tokens)?;
323
324        let mut nested_format_descriptions = Vec::new();
325        while let Ok(description) = parse_nested(version, modifiers.span().end, tokens) {
326            nested_format_descriptions.push(description);
327        }
328
329        if version.is_at_least_v3() && nested_format_descriptions.is_empty() {
330            return Err(Error {
331                _inner: unused(
332                    modifiers
333                        .span()
334                        .shrink_to_end()
335                        .error("expected at least one nested description"),
336                ),
337                public: error::InvalidFormatDescription::Expected {
338                    what: "at least one nested description",
339                    index: modifiers.span().end.byte as usize,
340                },
341            });
342        }
343
344        let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
345            return Err(Error {
346                _inner: unused(opening_bracket.error("unclosed bracket")),
347                public: error::InvalidFormatDescription::UnclosedOpeningBracket {
348                    index: opening_bracket.byte as usize,
349                },
350            });
351        };
352
353        if modifiers.trailing_whitespace.is_none() {
354            if let Some(modifier) = modifiers.modifiers.last() {
355                return Err(Error {
356                    _inner: unused(
357                        modifier
358                            .value
359                            .span
360                            .shrink_to_end()
361                            .error("expected whitespace between modifiers and nested descriptions"),
362                    ),
363                    public: error::InvalidFormatDescription::Expected {
364                        what: "whitespace between modifiers and nested descriptions",
365                        index: modifier.value.span.end.byte as usize,
366                    },
367                });
368            } else {
369                return Err(Error {
370                    _inner: unused(
371                        name.span
372                            .shrink_to_end()
373                            .error("expected whitespace between `first` and nested descriptions"),
374                    ),
375                    public: error::InvalidFormatDescription::Expected {
376                        what: "whitespace between `first` and nested descriptions",
377                        index: name.span.end.byte as usize,
378                    },
379                });
380            }
381        }
382
383        return Ok(Item::First {
384            opening_bracket,
385            _leading_whitespace: unused(leading_whitespace),
386            _first_kw: unused(name),
387            modifiers: modifiers.modifiers,
388            _whitespace_after_modifiers: unused(modifiers.trailing_whitespace),
389            nested_format_descriptions: nested_format_descriptions.into_boxed_slice(),
390            closing_bracket,
391        });
392    }
393
394    let Modifiers {
395        modifiers,
396        trailing_whitespace,
397    } = Modifiers::parse(false, tokens)?;
398
399    let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
400        return Err(Error {
401            _inner: unused(opening_bracket.error("unclosed bracket")),
402            public: error::InvalidFormatDescription::UnclosedOpeningBracket {
403                index: opening_bracket.byte as usize,
404            },
405        });
406    };
407
408    Ok(Item::Component {
409        version,
410        _opening_bracket: unused(opening_bracket),
411        _leading_whitespace: unused(leading_whitespace),
412        name,
413        modifiers,
414        _trailing_whitespace: unused(trailing_whitespace),
415        _closing_bracket: unused(closing_bracket),
416    })
417}
418
419struct Modifiers<'a> {
420    modifiers: Box<[Modifier<'a>]>,
421    trailing_whitespace: Option<Spanned<&'a [u8]>>,
422}
423
424impl<'a> Modifiers<'a> {
425    fn parse<I>(nested_is_allowed: bool, tokens: &mut lexer::Lexed<I>) -> Result<Self, Error>
426    where
427        I: Iterator<Item = Result<lexer::Token<'a>, Error>>,
428    {
429        let mut modifiers = Vec::new();
430        loop {
431            let Some(whitespace) = tokens.next_if_whitespace() else {
432                return Ok(Self {
433                    modifiers: modifiers.into_boxed_slice(),
434                    trailing_whitespace: None,
435                });
436            };
437
438            // This is not necessary for proper parsing, but provides a much better error when a
439            // nested description is used where it's not allowed.
440            if !nested_is_allowed && let Some(location) = tokens.next_if_opening_bracket() {
441                return Err(Error {
442                    _inner: unused(
443                        location
444                            .to_self()
445                            .error("modifier must be of the form `key:value`"),
446                    ),
447                    public: error::InvalidFormatDescription::InvalidModifier {
448                        value: String::from("["),
449                        index: location.byte as usize,
450                    },
451                });
452            }
453
454            let Some(token) = tokens.next_if_not_whitespace() else {
455                return Ok(Self {
456                    modifiers: modifiers.into_boxed_slice(),
457                    trailing_whitespace: Some(whitespace),
458                });
459            };
460
461            let modifier = Modifier::from_leading_whitespace_and_token(whitespace, token)?;
462            modifiers.push(modifier);
463        }
464    }
465
466    fn span(&self) -> Span {
467        match &*self.modifiers {
468            [] => self
469                .trailing_whitespace
470                .map(|whitespace| whitespace.span)
471                .unwrap_or(Span::DUMMY),
472            [modifier] => modifier.key.span.start.to(modifier.value.span.end),
473            [first, .., last] => first.key.span.start.to(last.value.span.end),
474        }
475    }
476}
477
478/// Parse a nested format description. The location provided is the most recent one consumed.
479#[inline]
480fn parse_nested<'a, I>(
481    version: FormatDescriptionVersion,
482    last_location: Location,
483    tokens: &mut lexer::Lexed<I>,
484) -> Result<NestedFormatDescription<'a>, Error>
485where
486    I: Iterator<Item = Result<lexer::Token<'a>, Error>>,
487{
488    let Some(opening_bracket) = tokens.next_if_opening_bracket() else {
489        return Err(Error {
490            _inner: unused(last_location.error("expected opening bracket")),
491            public: error::InvalidFormatDescription::Expected {
492                what: "opening bracket",
493                index: last_location.byte as usize,
494            },
495        });
496    };
497    let items = parse_inner(version, true, tokens).collect::<Result<_, _>>()?;
498    let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
499        return Err(Error {
500            _inner: unused(opening_bracket.error("unclosed bracket")),
501            public: error::InvalidFormatDescription::UnclosedOpeningBracket {
502                index: opening_bracket.byte as usize,
503            },
504        });
505    };
506    let trailing_whitespace = tokens.next_if_whitespace();
507
508    Ok(NestedFormatDescription {
509        _opening_bracket: unused(opening_bracket),
510        items,
511        _closing_bracket: unused(closing_bracket),
512        _trailing_whitespace: unused(trailing_whitespace),
513    })
514}