Skip to main content

time/format_description/parse/
ast.rs

1//! AST for parsing format descriptions.
2
3use alloc::boxed::Box;
4use alloc::string::String;
5use alloc::vec::Vec;
6use core::iter;
7
8use super::{Error, Location, Span, Spanned, SpannedValue, Unused, lexer, unused};
9use crate::error;
10use crate::format_description::FormatDescriptionVersion;
11use crate::internal_macros::bug;
12
13/// One part of a complete format description.
14pub(super) enum Item<'a> {
15    /// A literal string, formatted and parsed as-is.
16    ///
17    /// This should never be present inside a nested format description.
18    Literal(Spanned<&'a [u8]>),
19    /// A sequence of brackets. The first acts as the escape character.
20    ///
21    /// This should never be present if the lexer has `BACKSLASH_ESCAPE` set to `true`.
22    EscapedBracket {
23        /// The first bracket.
24        _first: Unused<Location>,
25        /// The second bracket.
26        _second: Unused<Location>,
27    },
28    /// Part of a type, along with its modifiers.
29    Component {
30        /// Where the opening bracket was in the format string.
31        _opening_bracket: Unused<Location>,
32        /// Whitespace between the opening bracket and name.
33        _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
34        /// The name of the component.
35        name: Spanned<&'a [u8]>,
36        /// The modifiers for the component.
37        modifiers: Box<[Modifier<'a>]>,
38        /// Whitespace between the modifiers and closing bracket.
39        _trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
40        /// Where the closing bracket was in the format string.
41        _closing_bracket: Unused<Location>,
42    },
43    /// An optional sequence of items.
44    Optional {
45        /// Where the opening bracket was in the format string.
46        opening_bracket: Location,
47        /// Whitespace between the opening bracket and "optional".
48        _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
49        /// The "optional" keyword.
50        _optional_kw: Unused<Spanned<&'a [u8]>>,
51        /// The modifiers for the optional description.
52        modifiers: Box<[Modifier<'a>]>,
53        /// Whitespace between either the "optional" keyword or modifiers and the opening bracket
54        /// of the nested description.
55        _whitespace_after_modifiers: Unused<Option<Spanned<&'a [u8]>>>,
56        /// The items within the optional sequence.
57        nested_format_description: NestedFormatDescription<'a>,
58        /// Where the closing bracket was in the format string.
59        closing_bracket: Location,
60    },
61    /// The first matching parse of a sequence of items.
62    First {
63        /// Where the opening bracket was in the format string.
64        opening_bracket: Location,
65        /// Whitespace between the opening bracket and "first".
66        _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
67        /// The "first" keyword.
68        _first_kw: Unused<Spanned<&'a [u8]>>,
69        /// The modifiers for the optional description.
70        modifiers: Box<[Modifier<'a>]>,
71        /// Whitespace between either the "first" keyword or modifiers and the opening bracket of
72        /// the nested description.
73        _whitespace_after_modifiers: Unused<Option<Spanned<&'a [u8]>>>,
74        /// The sequences of items to try.
75        nested_format_descriptions: Box<[NestedFormatDescription<'a>]>,
76        /// Where the closing bracket was in the format string.
77        closing_bracket: Location,
78    },
79}
80
81/// A format description that is nested within another format description.
82pub(super) struct NestedFormatDescription<'a> {
83    /// Where the opening bracket was in the format string.
84    pub(super) _opening_bracket: Unused<Location>,
85    /// The items within the nested format description.
86    pub(super) items: Box<[Item<'a>]>,
87    /// Where the closing bracket was in the format string.
88    pub(super) _closing_bracket: Unused<Location>,
89    /// Whitespace between the closing bracket and the next item.
90    pub(super) _trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
91}
92
93/// A modifier for a component.
94pub(super) struct Modifier<'a> {
95    /// Whitespace preceding the modifier.
96    pub(super) _leading_whitespace: Unused<Spanned<&'a [u8]>>,
97    /// The key of the modifier.
98    pub(super) key: Spanned<&'a [u8]>,
99    /// Where the colon of the modifier was in the format string.
100    pub(super) _colon: Unused<Location>,
101    /// The value of the modifier.
102    pub(super) value: Spanned<&'a [u8]>,
103}
104
105impl<'a> Modifier<'a> {
106    fn from_leading_whitespace_and_token(
107        leading_whitespace: Spanned<&'a [u8]>,
108        token: Spanned<&'a [u8]>,
109    ) -> Result<Self, Error> {
110        let Some(colon_index) = token.iter().position(|&b| b == b':') else {
111            return Err(Error {
112                _inner: unused(token.span.error("modifier must be of the form `key:value`")),
113                public: error::InvalidFormatDescription::InvalidModifier {
114                    value: String::from_utf8_lossy(*token).into_owned(),
115                    index: token.span.start.byte as usize,
116                },
117            });
118        };
119        let key = &token[..colon_index];
120        let value = &token[colon_index + 1..];
121
122        if key.is_empty() {
123            return Err(Error {
124                _inner: unused(token.span.shrink_to_start().error("expected modifier key")),
125                public: error::InvalidFormatDescription::InvalidModifier {
126                    value: String::new(),
127                    index: token.span.start.byte as usize,
128                },
129            });
130        }
131        if value.is_empty() {
132            return Err(Error {
133                _inner: unused(token.span.shrink_to_end().error("expected modifier value")),
134                public: error::InvalidFormatDescription::InvalidModifier {
135                    value: String::new(),
136                    index: token.span.start.byte as usize + colon_index,
137                },
138            });
139        }
140
141        Ok(Self {
142            _leading_whitespace: unused(leading_whitespace),
143            key: key.spanned(
144                token
145                    .span
146                    .start
147                    .to(token.span.start.offset(colon_index as u32)),
148            ),
149            _colon: unused(token.span.start.offset(colon_index as u32)),
150            value: value.spanned(
151                token
152                    .span
153                    .start
154                    .offset(colon_index as u32 + 1)
155                    .to(token.span.end),
156            ),
157        })
158    }
159}
160
161/// Parse the provided tokens into an AST.
162#[inline]
163pub(super) fn parse<'item, 'iter, I>(
164    version: FormatDescriptionVersion,
165    tokens: &'iter mut lexer::Lexed<I>,
166) -> impl Iterator<Item = Result<Item<'item>, Error>> + use<'item, 'iter, I>
167where
168    'item: 'iter,
169    I: Iterator<Item = Result<lexer::Token<'item>, Error>>,
170{
171    parse_inner(version, false, tokens)
172}
173
174/// Parse the provided tokens into an AST. The const generic indicates whether the resulting
175/// [`Item`] will be used directly or as part of a [`NestedFormatDescription`].
176#[inline]
177fn parse_inner<'item, I>(
178    version: FormatDescriptionVersion,
179    nested: bool,
180    tokens: &mut lexer::Lexed<I>,
181) -> impl Iterator<Item = Result<Item<'item>, Error>> + use<'_, 'item, I>
182where
183    I: Iterator<Item = Result<lexer::Token<'item>, Error>>,
184{
185    iter::from_fn(move || {
186        if nested && tokens.peek_closing_bracket().is_some() {
187            return None;
188        }
189
190        let next = match tokens.next()? {
191            Ok(token) => token,
192            Err(err) => return Some(Err(err)),
193        };
194
195        Some(match next {
196            lexer::Token::Literal(Spanned { value: _, span: _ }) if nested => {
197                bug!("literal should not be present in nested description")
198            }
199            lexer::Token::Literal(value) => Ok(Item::Literal(value)),
200            lexer::Token::Bracket {
201                kind: lexer::BracketKind::Opening,
202                location,
203            } => {
204                if version.is_v1()
205                    && let Some(second_location) = tokens.next_if_opening_bracket()
206                {
207                    Ok(Item::EscapedBracket {
208                        _first: unused(location),
209                        _second: unused(second_location),
210                    })
211                } else {
212                    parse_component(version, location, tokens)
213                }
214            }
215            lexer::Token::Bracket {
216                kind: lexer::BracketKind::Closing,
217                location: _,
218            } if nested => {
219                bug!("closing bracket should be caught by the `if` statement")
220            }
221            lexer::Token::Bracket {
222                kind: lexer::BracketKind::Closing,
223                location: _,
224            } => {
225                bug!("closing bracket should have been consumed by `parse_component`")
226            }
227            lexer::Token::ComponentPart {
228                kind: _, // whitespace is significant in nested components
229                value,
230            } if nested => Ok(Item::Literal(value)),
231            lexer::Token::ComponentPart { kind: _, value: _ } => {
232                bug!("component part should have been consumed by `parse_component`")
233            }
234        })
235    })
236}
237
238/// Parse a component. This assumes that the opening bracket has already been consumed.
239fn parse_component<'a, I>(
240    version: FormatDescriptionVersion,
241    opening_bracket: Location,
242    tokens: &mut lexer::Lexed<I>,
243) -> Result<Item<'a>, Error>
244where
245    I: Iterator<Item = Result<lexer::Token<'a>, Error>>,
246{
247    let leading_whitespace = tokens.next_if_whitespace();
248
249    let Some(name) = tokens.next_if_not_whitespace() else {
250        let span = match leading_whitespace {
251            Some(Spanned { value: _, span }) => span,
252            None => opening_bracket.to_self(),
253        };
254        return Err(Error {
255            _inner: unused(span.error("expected component name")),
256            public: error::InvalidFormatDescription::MissingComponentName {
257                index: span.start.byte as usize,
258            },
259        });
260    };
261
262    if *name == b"optional" {
263        let modifiers = Modifiers::parse(true, tokens)?;
264        let nested = parse_nested(version, modifiers.span().end, tokens)?;
265
266        let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
267            return Err(Error {
268                _inner: unused(opening_bracket.error("unclosed bracket")),
269                public: error::InvalidFormatDescription::UnclosedOpeningBracket {
270                    index: opening_bracket.byte as usize,
271                },
272            });
273        };
274
275        if modifiers.trailing_whitespace.is_none() {
276            if let Some(modifier) = modifiers.modifiers.last() {
277                return Err(Error {
278                    _inner: unused(
279                        modifier
280                            .value
281                            .span
282                            .shrink_to_end()
283                            .error("expected whitespace between modifiers and nested description"),
284                    ),
285                    public: error::InvalidFormatDescription::Expected {
286                        what: "whitespace between modifiers and nested description",
287                        index: modifier.value.span.end.byte as usize,
288                    },
289                });
290            } else {
291                return Err(Error {
292                    _inner: unused(
293                        name.span
294                            .shrink_to_end()
295                            .error("expected whitespace between `optional` and nested description"),
296                    ),
297                    public: error::InvalidFormatDescription::Expected {
298                        what: "whitespace between `optional` and nested description",
299                        index: name.span.end.byte as usize,
300                    },
301                });
302            }
303        }
304
305        return Ok(Item::Optional {
306            opening_bracket,
307            _leading_whitespace: unused(leading_whitespace),
308            _optional_kw: unused(name),
309            modifiers: modifiers.modifiers,
310            _whitespace_after_modifiers: unused(modifiers.trailing_whitespace),
311            nested_format_description: nested,
312            closing_bracket,
313        });
314    }
315
316    if *name == b"first" {
317        let modifiers = Modifiers::parse(true, tokens)?;
318
319        let mut nested_format_descriptions = Vec::new();
320        while let Ok(description) = parse_nested(version, modifiers.span().end, tokens) {
321            nested_format_descriptions.push(description);
322        }
323
324        if version.is_at_least_v3() && nested_format_descriptions.is_empty() {
325            return Err(Error {
326                _inner: unused(
327                    modifiers
328                        .span()
329                        .shrink_to_end()
330                        .error("expected at least one nested description"),
331                ),
332                public: error::InvalidFormatDescription::Expected {
333                    what: "at least one nested description",
334                    index: modifiers.span().end.byte as usize,
335                },
336            });
337        }
338
339        let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
340            return Err(Error {
341                _inner: unused(opening_bracket.error("unclosed bracket")),
342                public: error::InvalidFormatDescription::UnclosedOpeningBracket {
343                    index: opening_bracket.byte as usize,
344                },
345            });
346        };
347
348        if modifiers.trailing_whitespace.is_none() {
349            if let Some(modifier) = modifiers.modifiers.last() {
350                return Err(Error {
351                    _inner: unused(
352                        modifier
353                            .value
354                            .span
355                            .shrink_to_end()
356                            .error("expected whitespace between modifiers and nested descriptions"),
357                    ),
358                    public: error::InvalidFormatDescription::Expected {
359                        what: "whitespace between modifiers and nested descriptions",
360                        index: modifier.value.span.end.byte as usize,
361                    },
362                });
363            } else {
364                return Err(Error {
365                    _inner: unused(
366                        name.span
367                            .shrink_to_end()
368                            .error("expected whitespace between `first` and nested descriptions"),
369                    ),
370                    public: error::InvalidFormatDescription::Expected {
371                        what: "whitespace between `first` and nested descriptions",
372                        index: name.span.end.byte as usize,
373                    },
374                });
375            }
376        }
377
378        return Ok(Item::First {
379            opening_bracket,
380            _leading_whitespace: unused(leading_whitespace),
381            _first_kw: unused(name),
382            modifiers: modifiers.modifiers,
383            _whitespace_after_modifiers: unused(modifiers.trailing_whitespace),
384            nested_format_descriptions: nested_format_descriptions.into_boxed_slice(),
385            closing_bracket,
386        });
387    }
388
389    let Modifiers {
390        modifiers,
391        trailing_whitespace,
392    } = Modifiers::parse(false, tokens)?;
393
394    let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
395        return Err(Error {
396            _inner: unused(opening_bracket.error("unclosed bracket")),
397            public: error::InvalidFormatDescription::UnclosedOpeningBracket {
398                index: opening_bracket.byte as usize,
399            },
400        });
401    };
402
403    Ok(Item::Component {
404        _opening_bracket: unused(opening_bracket),
405        _leading_whitespace: unused(leading_whitespace),
406        name,
407        modifiers,
408        _trailing_whitespace: unused(trailing_whitespace),
409        _closing_bracket: unused(closing_bracket),
410    })
411}
412
413struct Modifiers<'a> {
414    modifiers: Box<[Modifier<'a>]>,
415    trailing_whitespace: Option<Spanned<&'a [u8]>>,
416}
417
418impl<'a> Modifiers<'a> {
419    fn parse<I>(nested_is_allowed: bool, tokens: &mut lexer::Lexed<I>) -> Result<Self, Error>
420    where
421        I: Iterator<Item = Result<lexer::Token<'a>, Error>>,
422    {
423        let mut modifiers = Vec::new();
424        loop {
425            let Some(whitespace) = tokens.next_if_whitespace() else {
426                return Ok(Self {
427                    modifiers: modifiers.into_boxed_slice(),
428                    trailing_whitespace: None,
429                });
430            };
431
432            // This is not necessary for proper parsing, but provides a much better error when a
433            // nested description is used where it's not allowed.
434            if !nested_is_allowed && let Some(location) = tokens.next_if_opening_bracket() {
435                return Err(Error {
436                    _inner: unused(
437                        location
438                            .to_self()
439                            .error("modifier must be of the form `key:value`"),
440                    ),
441                    public: error::InvalidFormatDescription::InvalidModifier {
442                        value: String::from("["),
443                        index: location.byte as usize,
444                    },
445                });
446            }
447
448            let Some(token) = tokens.next_if_not_whitespace() else {
449                return Ok(Self {
450                    modifiers: modifiers.into_boxed_slice(),
451                    trailing_whitespace: Some(whitespace),
452                });
453            };
454
455            let modifier = Modifier::from_leading_whitespace_and_token(whitespace, token)?;
456            modifiers.push(modifier);
457        }
458    }
459
460    fn span(&self) -> Span {
461        match &*self.modifiers {
462            [] => self
463                .trailing_whitespace
464                .map(|whitespace| whitespace.span)
465                .unwrap_or(Span::DUMMY),
466            [modifier] => modifier.key.span.start.to(modifier.value.span.end),
467            [first, .., last] => first.key.span.start.to(last.value.span.end),
468        }
469    }
470}
471
472/// Parse a nested format description. The location provided is the most recent one consumed.
473#[inline]
474fn parse_nested<'a, I>(
475    version: FormatDescriptionVersion,
476    last_location: Location,
477    tokens: &mut lexer::Lexed<I>,
478) -> Result<NestedFormatDescription<'a>, Error>
479where
480    I: Iterator<Item = Result<lexer::Token<'a>, Error>>,
481{
482    let Some(opening_bracket) = tokens.next_if_opening_bracket() else {
483        return Err(Error {
484            _inner: unused(last_location.error("expected opening bracket")),
485            public: error::InvalidFormatDescription::Expected {
486                what: "opening bracket",
487                index: last_location.byte as usize,
488            },
489        });
490    };
491    let items = parse_inner(version, true, tokens).collect::<Result<_, _>>()?;
492    let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
493        return Err(Error {
494            _inner: unused(opening_bracket.error("unclosed bracket")),
495            public: error::InvalidFormatDescription::UnclosedOpeningBracket {
496                index: opening_bracket.byte as usize,
497            },
498        });
499    };
500    let trailing_whitespace = tokens.next_if_whitespace();
501
502    Ok(NestedFormatDescription {
503        _opening_bracket: unused(opening_bracket),
504        items,
505        _closing_bracket: unused(closing_bracket),
506        _trailing_whitespace: unused(trailing_whitespace),
507    })
508}