time/format_description/parse/
ast.rs

1//! AST for parsing format descriptions.
2
3use alloc::boxed::Box;
4use alloc::string::String;
5use alloc::vec::Vec;
6use core::iter;
7
8use super::{lexer, unused, Error, Location, Spanned, SpannedValue, Unused};
9use crate::internal_macros::bug;
10
11/// One part of a complete format description.
12pub(super) enum Item<'a> {
13    /// A literal string, formatted and parsed as-is.
14    ///
15    /// This should never be present inside a nested format description.
16    Literal(Spanned<&'a [u8]>),
17    /// A sequence of brackets. The first acts as the escape character.
18    ///
19    /// This should never be present if the lexer has `BACKSLASH_ESCAPE` set to `true`.
20    EscapedBracket {
21        /// The first bracket.
22        _first: Unused<Location>,
23        /// The second bracket.
24        _second: Unused<Location>,
25    },
26    /// Part of a type, along with its modifiers.
27    Component {
28        /// Where the opening bracket was in the format string.
29        _opening_bracket: Unused<Location>,
30        /// Whitespace between the opening bracket and name.
31        _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
32        /// The name of the component.
33        name: Spanned<&'a [u8]>,
34        /// The modifiers for the component.
35        modifiers: Box<[Modifier<'a>]>,
36        /// Whitespace between the modifiers and closing bracket.
37        _trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
38        /// Where the closing bracket was in the format string.
39        _closing_bracket: Unused<Location>,
40    },
41    /// An optional sequence of items.
42    Optional {
43        /// Where the opening bracket was in the format string.
44        opening_bracket: Location,
45        /// Whitespace between the opening bracket and "optional".
46        _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
47        /// The "optional" keyword.
48        _optional_kw: Unused<Spanned<&'a [u8]>>,
49        /// Whitespace between the "optional" keyword and the opening bracket.
50        _whitespace: Unused<Spanned<&'a [u8]>>,
51        /// The items within the optional sequence.
52        nested_format_description: NestedFormatDescription<'a>,
53        /// Where the closing bracket was in the format string.
54        closing_bracket: Location,
55    },
56    /// The first matching parse of a sequence of items.
57    First {
58        /// Where the opening bracket was in the format string.
59        opening_bracket: Location,
60        /// Whitespace between the opening bracket and "first".
61        _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
62        /// The "first" keyword.
63        _first_kw: Unused<Spanned<&'a [u8]>>,
64        /// Whitespace between the "first" keyword and the opening bracket.
65        _whitespace: Unused<Spanned<&'a [u8]>>,
66        /// The sequences of items to try.
67        nested_format_descriptions: Box<[NestedFormatDescription<'a>]>,
68        /// Where the closing bracket was in the format string.
69        closing_bracket: Location,
70    },
71}
72
73/// A format description that is nested within another format description.
74pub(super) struct NestedFormatDescription<'a> {
75    /// Where the opening bracket was in the format string.
76    pub(super) _opening_bracket: Unused<Location>,
77    /// The items within the nested format description.
78    pub(super) items: Box<[Item<'a>]>,
79    /// Where the closing bracket was in the format string.
80    pub(super) _closing_bracket: Unused<Location>,
81    /// Whitespace between the closing bracket and the next item.
82    pub(super) _trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
83}
84
85/// A modifier for a component.
86pub(super) struct Modifier<'a> {
87    /// Whitespace preceding the modifier.
88    pub(super) _leading_whitespace: Unused<Spanned<&'a [u8]>>,
89    /// The key of the modifier.
90    pub(super) key: Spanned<&'a [u8]>,
91    /// Where the colon of the modifier was in the format string.
92    pub(super) _colon: Unused<Location>,
93    /// The value of the modifier.
94    pub(super) value: Spanned<&'a [u8]>,
95}
96
97/// Parse the provided tokens into an AST.
98#[inline]
99pub(super) fn parse<
100    'item: 'iter,
101    'iter,
102    I: Iterator<Item = Result<lexer::Token<'item>, Error>>,
103    const VERSION: usize,
104>(
105    tokens: &'iter mut lexer::Lexed<I>,
106) -> impl Iterator<Item = Result<Item<'item>, Error>> + 'iter {
107    validate_version!(VERSION);
108    parse_inner::<_, false, VERSION>(tokens)
109}
110
111/// Parse the provided tokens into an AST. The const generic indicates whether the resulting
112/// [`Item`] will be used directly or as part of a [`NestedFormatDescription`].
113#[inline]
114fn parse_inner<
115    'item,
116    I: Iterator<Item = Result<lexer::Token<'item>, Error>>,
117    const NESTED: bool,
118    const VERSION: usize,
119>(
120    tokens: &mut lexer::Lexed<I>,
121) -> impl Iterator<Item = Result<Item<'item>, Error>> + '_ {
122    validate_version!(VERSION);
123    iter::from_fn(move || {
124        if NESTED && tokens.peek_closing_bracket().is_some() {
125            return None;
126        }
127
128        let next = match tokens.next()? {
129            Ok(token) => token,
130            Err(err) => return Some(Err(err)),
131        };
132
133        Some(match next {
134            lexer::Token::Literal(Spanned { value: _, span: _ }) if NESTED => {
135                bug!("literal should not be present in nested description")
136            }
137            lexer::Token::Literal(value) => Ok(Item::Literal(value)),
138            lexer::Token::Bracket {
139                kind: lexer::BracketKind::Opening,
140                location,
141            } => {
142                if version!(..=1) {
143                    if let Some(second_location) = tokens.next_if_opening_bracket() {
144                        Ok(Item::EscapedBracket {
145                            _first: unused(location),
146                            _second: unused(second_location),
147                        })
148                    } else {
149                        parse_component::<_, VERSION>(location, tokens)
150                    }
151                } else {
152                    parse_component::<_, VERSION>(location, tokens)
153                }
154            }
155            lexer::Token::Bracket {
156                kind: lexer::BracketKind::Closing,
157                location: _,
158            } if NESTED => {
159                bug!("closing bracket should be caught by the `if` statement")
160            }
161            lexer::Token::Bracket {
162                kind: lexer::BracketKind::Closing,
163                location: _,
164            } => {
165                bug!("closing bracket should have been consumed by `parse_component`")
166            }
167            lexer::Token::ComponentPart {
168                kind: _, // whitespace is significant in nested components
169                value,
170            } if NESTED => Ok(Item::Literal(value)),
171            lexer::Token::ComponentPart { kind: _, value: _ } => {
172                bug!("component part should have been consumed by `parse_component`")
173            }
174        })
175    })
176}
177
178/// Parse a component. This assumes that the opening bracket has already been consumed.
179fn parse_component<
180    'a,
181    I: Iterator<Item = Result<lexer::Token<'a>, Error>>,
182    const VERSION: usize,
183>(
184    opening_bracket: Location,
185    tokens: &mut lexer::Lexed<I>,
186) -> Result<Item<'a>, Error> {
187    validate_version!(VERSION);
188    let leading_whitespace = tokens.next_if_whitespace();
189
190    let Some(name) = tokens.next_if_not_whitespace() else {
191        let span = match leading_whitespace {
192            Some(Spanned { value: _, span }) => span,
193            None => opening_bracket.to_self(),
194        };
195        return Err(Error {
196            _inner: unused(span.error("expected component name")),
197            public: crate::error::InvalidFormatDescription::MissingComponentName {
198                index: span.start.byte as usize,
199            },
200        });
201    };
202
203    if *name == b"optional" {
204        let Some(whitespace) = tokens.next_if_whitespace() else {
205            return Err(Error {
206                _inner: unused(name.span.error("expected whitespace after `optional`")),
207                public: crate::error::InvalidFormatDescription::Expected {
208                    what: "whitespace after `optional`",
209                    index: name.span.end.byte as usize,
210                },
211            });
212        };
213
214        let nested = parse_nested::<_, VERSION>(whitespace.span.end, tokens)?;
215
216        let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
217            return Err(Error {
218                _inner: unused(opening_bracket.error("unclosed bracket")),
219                public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
220                    index: opening_bracket.byte as usize,
221                },
222            });
223        };
224
225        return Ok(Item::Optional {
226            opening_bracket,
227            _leading_whitespace: unused(leading_whitespace),
228            _optional_kw: unused(name),
229            _whitespace: unused(whitespace),
230            nested_format_description: nested,
231            closing_bracket,
232        });
233    }
234
235    if *name == b"first" {
236        let Some(whitespace) = tokens.next_if_whitespace() else {
237            return Err(Error {
238                _inner: unused(name.span.error("expected whitespace after `first`")),
239                public: crate::error::InvalidFormatDescription::Expected {
240                    what: "whitespace after `first`",
241                    index: name.span.end.byte as usize,
242                },
243            });
244        };
245
246        let mut nested_format_descriptions = Vec::new();
247        while let Ok(description) = parse_nested::<_, VERSION>(whitespace.span.end, tokens) {
248            nested_format_descriptions.push(description);
249        }
250
251        let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
252            return Err(Error {
253                _inner: unused(opening_bracket.error("unclosed bracket")),
254                public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
255                    index: opening_bracket.byte as usize,
256                },
257            });
258        };
259
260        return Ok(Item::First {
261            opening_bracket,
262            _leading_whitespace: unused(leading_whitespace),
263            _first_kw: unused(name),
264            _whitespace: unused(whitespace),
265            nested_format_descriptions: nested_format_descriptions.into_boxed_slice(),
266            closing_bracket,
267        });
268    }
269
270    let mut modifiers = Vec::new();
271    let trailing_whitespace = loop {
272        let Some(whitespace) = tokens.next_if_whitespace() else {
273            break None;
274        };
275
276        // This is not necessary for proper parsing, but provides a much better error when a nested
277        // description is used where it's not allowed.
278        if let Some(location) = tokens.next_if_opening_bracket() {
279            return Err(Error {
280                _inner: unused(
281                    location
282                        .to_self()
283                        .error("modifier must be of the form `key:value`"),
284                ),
285                public: crate::error::InvalidFormatDescription::InvalidModifier {
286                    value: String::from("["),
287                    index: location.byte as usize,
288                },
289            });
290        }
291
292        let Some(Spanned { value, span }) = tokens.next_if_not_whitespace() else {
293            break Some(whitespace);
294        };
295
296        let Some(colon_index) = value.iter().position(|&b| b == b':') else {
297            return Err(Error {
298                _inner: unused(span.error("modifier must be of the form `key:value`")),
299                public: crate::error::InvalidFormatDescription::InvalidModifier {
300                    value: String::from_utf8_lossy(value).into_owned(),
301                    index: span.start.byte as usize,
302                },
303            });
304        };
305        let key = &value[..colon_index];
306        let value = &value[colon_index + 1..];
307
308        if key.is_empty() {
309            return Err(Error {
310                _inner: unused(span.shrink_to_start().error("expected modifier key")),
311                public: crate::error::InvalidFormatDescription::InvalidModifier {
312                    value: String::new(),
313                    index: span.start.byte as usize,
314                },
315            });
316        }
317        if value.is_empty() {
318            return Err(Error {
319                _inner: unused(span.shrink_to_end().error("expected modifier value")),
320                public: crate::error::InvalidFormatDescription::InvalidModifier {
321                    value: String::new(),
322                    index: span.shrink_to_end().start.byte as usize,
323                },
324            });
325        }
326
327        modifiers.push(Modifier {
328            _leading_whitespace: unused(whitespace),
329            key: key.spanned(span.shrink_to_before(colon_index as u32)),
330            _colon: unused(span.start.offset(colon_index as u32)),
331            value: value.spanned(span.shrink_to_after(colon_index as u32)),
332        });
333    };
334
335    let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
336        return Err(Error {
337            _inner: unused(opening_bracket.error("unclosed bracket")),
338            public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
339                index: opening_bracket.byte as usize,
340            },
341        });
342    };
343
344    Ok(Item::Component {
345        _opening_bracket: unused(opening_bracket),
346        _leading_whitespace: unused(leading_whitespace),
347        name,
348        modifiers: modifiers.into_boxed_slice(),
349        _trailing_whitespace: unused(trailing_whitespace),
350        _closing_bracket: unused(closing_bracket),
351    })
352}
353
354/// Parse a nested format description. The location provided is the most recent one consumed.
355#[inline]
356fn parse_nested<'a, I: Iterator<Item = Result<lexer::Token<'a>, Error>>, const VERSION: usize>(
357    last_location: Location,
358    tokens: &mut lexer::Lexed<I>,
359) -> Result<NestedFormatDescription<'a>, Error> {
360    validate_version!(VERSION);
361    let Some(opening_bracket) = tokens.next_if_opening_bracket() else {
362        return Err(Error {
363            _inner: unused(last_location.error("expected opening bracket")),
364            public: crate::error::InvalidFormatDescription::Expected {
365                what: "opening bracket",
366                index: last_location.byte as usize,
367            },
368        });
369    };
370    let items = parse_inner::<_, true, VERSION>(tokens).collect::<Result<_, _>>()?;
371    let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
372        return Err(Error {
373            _inner: unused(opening_bracket.error("unclosed bracket")),
374            public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
375                index: opening_bracket.byte as usize,
376            },
377        });
378    };
379    let trailing_whitespace = tokens.next_if_whitespace();
380
381    Ok(NestedFormatDescription {
382        _opening_bracket: unused(opening_bracket),
383        items,
384        _closing_bracket: unused(closing_bracket),
385        _trailing_whitespace: unused(trailing_whitespace),
386    })
387}