time/format_description/parse/
ast.rs

1//! AST for parsing format descriptions.
2
3use alloc::boxed::Box;
4use alloc::string::String;
5use alloc::vec::Vec;
6use core::iter;
7
8use super::{Error, Location, Spanned, SpannedValue, Unused, lexer, unused};
9use crate::internal_macros::bug;
10
11/// One part of a complete format description.
12pub(super) enum Item<'a> {
13    /// A literal string, formatted and parsed as-is.
14    ///
15    /// This should never be present inside a nested format description.
16    Literal(Spanned<&'a [u8]>),
17    /// A sequence of brackets. The first acts as the escape character.
18    ///
19    /// This should never be present if the lexer has `BACKSLASH_ESCAPE` set to `true`.
20    EscapedBracket {
21        /// The first bracket.
22        _first: Unused<Location>,
23        /// The second bracket.
24        _second: Unused<Location>,
25    },
26    /// Part of a type, along with its modifiers.
27    Component {
28        /// Where the opening bracket was in the format string.
29        _opening_bracket: Unused<Location>,
30        /// Whitespace between the opening bracket and name.
31        _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
32        /// The name of the component.
33        name: Spanned<&'a [u8]>,
34        /// The modifiers for the component.
35        modifiers: Box<[Modifier<'a>]>,
36        /// Whitespace between the modifiers and closing bracket.
37        _trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
38        /// Where the closing bracket was in the format string.
39        _closing_bracket: Unused<Location>,
40    },
41    /// An optional sequence of items.
42    Optional {
43        /// Where the opening bracket was in the format string.
44        opening_bracket: Location,
45        /// Whitespace between the opening bracket and "optional".
46        _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
47        /// The "optional" keyword.
48        _optional_kw: Unused<Spanned<&'a [u8]>>,
49        /// Whitespace between the "optional" keyword and the opening bracket.
50        _whitespace: Unused<Spanned<&'a [u8]>>,
51        /// The items within the optional sequence.
52        nested_format_description: NestedFormatDescription<'a>,
53        /// Where the closing bracket was in the format string.
54        closing_bracket: Location,
55    },
56    /// The first matching parse of a sequence of items.
57    First {
58        /// Where the opening bracket was in the format string.
59        opening_bracket: Location,
60        /// Whitespace between the opening bracket and "first".
61        _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
62        /// The "first" keyword.
63        _first_kw: Unused<Spanned<&'a [u8]>>,
64        /// Whitespace between the "first" keyword and the opening bracket.
65        _whitespace: Unused<Spanned<&'a [u8]>>,
66        /// The sequences of items to try.
67        nested_format_descriptions: Box<[NestedFormatDescription<'a>]>,
68        /// Where the closing bracket was in the format string.
69        closing_bracket: Location,
70    },
71}
72
73/// A format description that is nested within another format description.
74pub(super) struct NestedFormatDescription<'a> {
75    /// Where the opening bracket was in the format string.
76    pub(super) _opening_bracket: Unused<Location>,
77    /// The items within the nested format description.
78    pub(super) items: Box<[Item<'a>]>,
79    /// Where the closing bracket was in the format string.
80    pub(super) _closing_bracket: Unused<Location>,
81    /// Whitespace between the closing bracket and the next item.
82    pub(super) _trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
83}
84
85/// A modifier for a component.
86pub(super) struct Modifier<'a> {
87    /// Whitespace preceding the modifier.
88    pub(super) _leading_whitespace: Unused<Spanned<&'a [u8]>>,
89    /// The key of the modifier.
90    pub(super) key: Spanned<&'a [u8]>,
91    /// Where the colon of the modifier was in the format string.
92    pub(super) _colon: Unused<Location>,
93    /// The value of the modifier.
94    pub(super) value: Spanned<&'a [u8]>,
95}
96
97/// Parse the provided tokens into an AST.
98#[inline]
99pub(super) fn parse<'item, 'iter, I, const VERSION: usize>(
100    tokens: &'iter mut lexer::Lexed<I>,
101) -> impl Iterator<Item = Result<Item<'item>, Error>> + use<'item, 'iter, I, VERSION>
102where
103    'item: 'iter,
104    I: Iterator<Item = Result<lexer::Token<'item>, Error>>,
105{
106    validate_version!(VERSION);
107    parse_inner::<_, false, VERSION>(tokens)
108}
109
110/// Parse the provided tokens into an AST. The const generic indicates whether the resulting
111/// [`Item`] will be used directly or as part of a [`NestedFormatDescription`].
112#[inline]
113fn parse_inner<'item, I, const NESTED: bool, const VERSION: usize>(
114    tokens: &mut lexer::Lexed<I>,
115) -> impl Iterator<Item = Result<Item<'item>, Error>> + use<'_, 'item, I, NESTED, VERSION>
116where
117    I: Iterator<Item = Result<lexer::Token<'item>, Error>>,
118{
119    validate_version!(VERSION);
120    iter::from_fn(move || {
121        if NESTED && tokens.peek_closing_bracket().is_some() {
122            return None;
123        }
124
125        let next = match tokens.next()? {
126            Ok(token) => token,
127            Err(err) => return Some(Err(err)),
128        };
129
130        Some(match next {
131            lexer::Token::Literal(Spanned { value: _, span: _ }) if NESTED => {
132                bug!("literal should not be present in nested description")
133            }
134            lexer::Token::Literal(value) => Ok(Item::Literal(value)),
135            lexer::Token::Bracket {
136                kind: lexer::BracketKind::Opening,
137                location,
138            } => {
139                if version!(..=1) {
140                    if let Some(second_location) = tokens.next_if_opening_bracket() {
141                        Ok(Item::EscapedBracket {
142                            _first: unused(location),
143                            _second: unused(second_location),
144                        })
145                    } else {
146                        parse_component::<_, VERSION>(location, tokens)
147                    }
148                } else {
149                    parse_component::<_, VERSION>(location, tokens)
150                }
151            }
152            lexer::Token::Bracket {
153                kind: lexer::BracketKind::Closing,
154                location: _,
155            } if NESTED => {
156                bug!("closing bracket should be caught by the `if` statement")
157            }
158            lexer::Token::Bracket {
159                kind: lexer::BracketKind::Closing,
160                location: _,
161            } => {
162                bug!("closing bracket should have been consumed by `parse_component`")
163            }
164            lexer::Token::ComponentPart {
165                kind: _, // whitespace is significant in nested components
166                value,
167            } if NESTED => Ok(Item::Literal(value)),
168            lexer::Token::ComponentPart { kind: _, value: _ } => {
169                bug!("component part should have been consumed by `parse_component`")
170            }
171        })
172    })
173}
174
175/// Parse a component. This assumes that the opening bracket has already been consumed.
176fn parse_component<'a, I, const VERSION: usize>(
177    opening_bracket: Location,
178    tokens: &mut lexer::Lexed<I>,
179) -> Result<Item<'a>, Error>
180where
181    I: Iterator<Item = Result<lexer::Token<'a>, Error>>,
182{
183    validate_version!(VERSION);
184    let leading_whitespace = tokens.next_if_whitespace();
185
186    let Some(name) = tokens.next_if_not_whitespace() else {
187        let span = match leading_whitespace {
188            Some(Spanned { value: _, span }) => span,
189            None => opening_bracket.to_self(),
190        };
191        return Err(Error {
192            _inner: unused(span.error("expected component name")),
193            public: crate::error::InvalidFormatDescription::MissingComponentName {
194                index: span.start.byte as usize,
195            },
196        });
197    };
198
199    if *name == b"optional" {
200        let Some(whitespace) = tokens.next_if_whitespace() else {
201            return Err(Error {
202                _inner: unused(name.span.error("expected whitespace after `optional`")),
203                public: crate::error::InvalidFormatDescription::Expected {
204                    what: "whitespace after `optional`",
205                    index: name.span.end.byte as usize,
206                },
207            });
208        };
209
210        let nested = parse_nested::<_, VERSION>(whitespace.span.end, tokens)?;
211
212        let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
213            return Err(Error {
214                _inner: unused(opening_bracket.error("unclosed bracket")),
215                public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
216                    index: opening_bracket.byte as usize,
217                },
218            });
219        };
220
221        return Ok(Item::Optional {
222            opening_bracket,
223            _leading_whitespace: unused(leading_whitespace),
224            _optional_kw: unused(name),
225            _whitespace: unused(whitespace),
226            nested_format_description: nested,
227            closing_bracket,
228        });
229    }
230
231    if *name == b"first" {
232        let Some(whitespace) = tokens.next_if_whitespace() else {
233            return Err(Error {
234                _inner: unused(name.span.error("expected whitespace after `first`")),
235                public: crate::error::InvalidFormatDescription::Expected {
236                    what: "whitespace after `first`",
237                    index: name.span.end.byte as usize,
238                },
239            });
240        };
241
242        let mut nested_format_descriptions = Vec::new();
243        while let Ok(description) = parse_nested::<_, VERSION>(whitespace.span.end, tokens) {
244            nested_format_descriptions.push(description);
245        }
246
247        let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
248            return Err(Error {
249                _inner: unused(opening_bracket.error("unclosed bracket")),
250                public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
251                    index: opening_bracket.byte as usize,
252                },
253            });
254        };
255
256        return Ok(Item::First {
257            opening_bracket,
258            _leading_whitespace: unused(leading_whitespace),
259            _first_kw: unused(name),
260            _whitespace: unused(whitespace),
261            nested_format_descriptions: nested_format_descriptions.into_boxed_slice(),
262            closing_bracket,
263        });
264    }
265
266    let mut modifiers = Vec::new();
267    let trailing_whitespace = loop {
268        let Some(whitespace) = tokens.next_if_whitespace() else {
269            break None;
270        };
271
272        // This is not necessary for proper parsing, but provides a much better error when a nested
273        // description is used where it's not allowed.
274        if let Some(location) = tokens.next_if_opening_bracket() {
275            return Err(Error {
276                _inner: unused(
277                    location
278                        .to_self()
279                        .error("modifier must be of the form `key:value`"),
280                ),
281                public: crate::error::InvalidFormatDescription::InvalidModifier {
282                    value: String::from("["),
283                    index: location.byte as usize,
284                },
285            });
286        }
287
288        let Some(Spanned { value, span }) = tokens.next_if_not_whitespace() else {
289            break Some(whitespace);
290        };
291
292        let Some(colon_index) = value.iter().position(|&b| b == b':') else {
293            return Err(Error {
294                _inner: unused(span.error("modifier must be of the form `key:value`")),
295                public: crate::error::InvalidFormatDescription::InvalidModifier {
296                    value: String::from_utf8_lossy(value).into_owned(),
297                    index: span.start.byte as usize,
298                },
299            });
300        };
301        let key = &value[..colon_index];
302        let value = &value[colon_index + 1..];
303
304        if key.is_empty() {
305            return Err(Error {
306                _inner: unused(span.shrink_to_start().error("expected modifier key")),
307                public: crate::error::InvalidFormatDescription::InvalidModifier {
308                    value: String::new(),
309                    index: span.start.byte as usize,
310                },
311            });
312        }
313        if value.is_empty() {
314            return Err(Error {
315                _inner: unused(span.shrink_to_end().error("expected modifier value")),
316                public: crate::error::InvalidFormatDescription::InvalidModifier {
317                    value: String::new(),
318                    index: span.shrink_to_end().start.byte as usize,
319                },
320            });
321        }
322
323        modifiers.push(Modifier {
324            _leading_whitespace: unused(whitespace),
325            key: key.spanned(span.shrink_to_before(colon_index as u32)),
326            _colon: unused(span.start.offset(colon_index as u32)),
327            value: value.spanned(span.shrink_to_after(colon_index as u32)),
328        });
329    };
330
331    let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
332        return Err(Error {
333            _inner: unused(opening_bracket.error("unclosed bracket")),
334            public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
335                index: opening_bracket.byte as usize,
336            },
337        });
338    };
339
340    Ok(Item::Component {
341        _opening_bracket: unused(opening_bracket),
342        _leading_whitespace: unused(leading_whitespace),
343        name,
344        modifiers: modifiers.into_boxed_slice(),
345        _trailing_whitespace: unused(trailing_whitespace),
346        _closing_bracket: unused(closing_bracket),
347    })
348}
349
350/// Parse a nested format description. The location provided is the most recent one consumed.
351#[inline]
352fn parse_nested<'a, I, const VERSION: usize>(
353    last_location: Location,
354    tokens: &mut lexer::Lexed<I>,
355) -> Result<NestedFormatDescription<'a>, Error>
356where
357    I: Iterator<Item = Result<lexer::Token<'a>, Error>>,
358{
359    validate_version!(VERSION);
360    let Some(opening_bracket) = tokens.next_if_opening_bracket() else {
361        return Err(Error {
362            _inner: unused(last_location.error("expected opening bracket")),
363            public: crate::error::InvalidFormatDescription::Expected {
364                what: "opening bracket",
365                index: last_location.byte as usize,
366            },
367        });
368    };
369    let items = parse_inner::<_, true, VERSION>(tokens).collect::<Result<_, _>>()?;
370    let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
371        return Err(Error {
372            _inner: unused(opening_bracket.error("unclosed bracket")),
373            public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
374                index: opening_bracket.byte as usize,
375            },
376        });
377    };
378    let trailing_whitespace = tokens.next_if_whitespace();
379
380    Ok(NestedFormatDescription {
381        _opening_bracket: unused(opening_bracket),
382        items,
383        _closing_bracket: unused(closing_bracket),
384        _trailing_whitespace: unused(trailing_whitespace),
385    })
386}