Skip to main content

time/format_description/parse/
ast.rs

1//! AST for parsing format descriptions.
2
3use alloc::boxed::Box;
4use alloc::string::String;
5use alloc::vec::Vec;
6use core::iter;
7
8use super::{Error, Location, Span, Spanned, SpannedValue, Unused, lexer, unused};
9use crate::error;
10use crate::format_description::FormatDescriptionVersion;
11use crate::internal_macros::bug;
12
13/// One part of a complete format description.
14pub(super) enum Item<'a> {
15    /// A literal string, formatted and parsed as-is.
16    ///
17    /// This should never be present inside a nested format description.
18    Literal(Spanned<&'a [u8]>),
19    /// Part of a type, along with its modifiers and nested format descriptions.
20    Component {
21        /// The version of the format description, which may affect how the component is parsed.
22        version: FormatDescriptionVersion,
23        /// Where the opening bracket was in the format string.
24        opening_bracket: Location,
25        /// Whitespace between the opening bracket and name.
26        _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
27        /// The name of the component.
28        name: Spanned<&'a [u8]>,
29        /// The modifiers for the component.
30        modifiers: Box<[Modifier<'a>]>,
31        /// The nested format descriptions within the component.
32        nested_format_descriptions: Box<[NestedFormatDescription<'a>]>,
33        /// Whitespace between the modifiers/nested format descriptions and closing bracket.
34        _trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
35        /// Where the closing bracket was in the format string.
36        closing_bracket: Location,
37    },
38}
39
40/// A format description that is nested within another format description.
41pub(super) struct NestedFormatDescription<'a> {
42    /// Whitespace between the end of the previous item and the opening bracket.
43    pub(super) leading_whitespace: Option<Spanned<&'a [u8]>>,
44    /// Where the opening bracket was in the format string.
45    pub(super) opening_bracket: Location,
46    /// The items within the nested format description.
47    pub(super) items: Box<[Item<'a>]>,
48    /// Where the closing bracket was in the format string.
49    pub(super) closing_bracket: Location,
50}
51
52/// A modifier for a component.
53pub(super) struct Modifier<'a> {
54    /// Whitespace preceding the modifier.
55    pub(super) _leading_whitespace: Unused<Spanned<&'a [u8]>>,
56    /// The key of the modifier.
57    pub(super) key: Spanned<&'a [u8]>,
58    /// Where the colon of the modifier was in the format string.
59    pub(super) _colon: Unused<Location>,
60    /// The value of the modifier.
61    pub(super) value: Spanned<&'a [u8]>,
62}
63
64impl<'a> Modifier<'a> {
65    fn from_leading_whitespace_and_token(
66        leading_whitespace: Spanned<&'a [u8]>,
67        token: Spanned<&'a [u8]>,
68    ) -> Result<Self, Error> {
69        let Some(colon_index) = token.iter().position(|&b| b == b':') else {
70            return Err(Error {
71                _inner: unused(token.span.error("modifier must be of the form `key:value`")),
72                public: error::InvalidFormatDescription::InvalidModifier {
73                    value: String::from_utf8_lossy(*token).into_owned(),
74                    index: token.span.start.byte as usize,
75                },
76            });
77        };
78        let key = &token[..colon_index];
79        let value = &token[colon_index + 1..];
80
81        if key.is_empty() {
82            return Err(Error {
83                _inner: unused(token.span.shrink_to_start().error("expected modifier key")),
84                public: error::InvalidFormatDescription::InvalidModifier {
85                    value: String::new(),
86                    index: token.span.start.byte as usize,
87                },
88            });
89        }
90        if value.is_empty() {
91            return Err(Error {
92                _inner: unused(token.span.shrink_to_end().error("expected modifier value")),
93                public: error::InvalidFormatDescription::InvalidModifier {
94                    value: String::new(),
95                    index: token.span.start.byte as usize + colon_index,
96                },
97            });
98        }
99
100        Ok(Self {
101            _leading_whitespace: unused(leading_whitespace),
102            key: key.spanned(
103                token
104                    .span
105                    .start
106                    .to(token.span.start.offset(colon_index as u32)),
107            ),
108            _colon: unused(token.span.start.offset(colon_index as u32)),
109            value: value.spanned(
110                token
111                    .span
112                    .start
113                    .offset(colon_index as u32 + 1)
114                    .to(token.span.end),
115            ),
116        })
117    }
118
119    pub(super) const fn key_value_span(&self) -> Span {
120        self.key.span.start.to(self.value.span.end)
121    }
122}
123
124/// Parse the provided tokens into an AST.
125#[inline]
126pub(super) fn parse<'item, 'iter, I>(
127    version: FormatDescriptionVersion,
128    tokens: &'iter mut lexer::Lexed<I>,
129) -> impl Iterator<Item = Result<Item<'item>, Error>> + use<'item, 'iter, I>
130where
131    'item: 'iter,
132    I: Iterator<Item = Result<lexer::Token<'item>, Error>>,
133{
134    parse_inner(version, false, tokens)
135}
136
137/// Parse the provided tokens into an AST. The const generic indicates whether the resulting
138/// [`Item`] will be used directly or as part of a [`NestedFormatDescription`].
139#[inline]
140fn parse_inner<'item, I>(
141    version: FormatDescriptionVersion,
142    nested: bool,
143    tokens: &mut lexer::Lexed<I>,
144) -> impl Iterator<Item = Result<Item<'item>, Error>> + use<'_, 'item, I>
145where
146    I: Iterator<Item = Result<lexer::Token<'item>, Error>>,
147{
148    iter::from_fn(move || {
149        if nested && tokens.peek_closing_bracket().is_some() {
150            return None;
151        }
152
153        let next = match tokens.next()? {
154            Ok(token) => token,
155            Err(err) => return Some(Err(err)),
156        };
157
158        Some(match next {
159            lexer::Token::Literal(Spanned { value: _, span: _ }) if nested => {
160                bug!("literal should not be present in nested description")
161            }
162            lexer::Token::Literal(value) => Ok(Item::Literal(value)),
163            lexer::Token::Bracket {
164                kind: lexer::BracketKind::Opening,
165                location,
166            } => {
167                if version.is_v1()
168                    && let Some(second_location) = tokens.next_if_opening_bracket()
169                {
170                    Ok(Item::Literal(
171                        b"[".as_slice().spanned(location.to(second_location)),
172                    ))
173                } else {
174                    parse_component(version, location, tokens)
175                }
176            }
177            lexer::Token::Bracket {
178                kind: lexer::BracketKind::Closing,
179                location: _,
180            } if nested => {
181                bug!("closing bracket should be caught by the `if` statement")
182            }
183            lexer::Token::Bracket {
184                kind: lexer::BracketKind::Closing,
185                location: _,
186            } => {
187                bug!("closing bracket should have been consumed by `parse_component`")
188            }
189            lexer::Token::ComponentPart {
190                kind: _, // whitespace is significant in nested components
191                value,
192            } if nested => Ok(Item::Literal(value)),
193            lexer::Token::ComponentPart { kind: _, value: _ } => {
194                bug!("component part should have been consumed by `parse_component`")
195            }
196        })
197    })
198}
199
200/// Parse a component. This assumes that the opening bracket has already been consumed.
201fn parse_component<'a, I>(
202    version: FormatDescriptionVersion,
203    opening_bracket: Location,
204    tokens: &mut lexer::Lexed<I>,
205) -> Result<Item<'a>, Error>
206where
207    I: Iterator<Item = Result<lexer::Token<'a>, Error>>,
208{
209    let leading_whitespace = tokens.next_if_whitespace();
210
211    let Some(name) = tokens.next_if_not_whitespace() else {
212        let span = match leading_whitespace {
213            Some(Spanned { value: _, span }) => span,
214            None => opening_bracket.to_self(),
215        };
216        return Err(Error {
217            _inner: unused(span.error("expected component name")),
218            public: error::InvalidFormatDescription::MissingComponentName {
219                index: span.start.byte as usize,
220            },
221        });
222    };
223
224    let modifiers = Modifiers::parse(tokens)?;
225
226    let mut nested_format_descriptions = Vec::new();
227    while let Ok(description) = parse_nested(version, modifiers.span().end, tokens) {
228        nested_format_descriptions.push(description);
229    }
230
231    if modifiers.trailing_whitespace.is_some()
232        && let Some(first_nested) = nested_format_descriptions.first_mut()
233    {
234        first_nested.leading_whitespace = modifiers.trailing_whitespace;
235    }
236
237    let nested_fds_trailing_whitespace =
238        if modifiers.trailing_whitespace.is_some() && nested_format_descriptions.is_empty() {
239            modifiers.trailing_whitespace
240        } else {
241            tokens.next_if_whitespace()
242        };
243
244    let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
245        return Err(Error {
246            _inner: unused(opening_bracket.error("unclosed bracket")),
247            public: error::InvalidFormatDescription::UnclosedOpeningBracket {
248                index: opening_bracket.byte as usize,
249            },
250        });
251    };
252
253    Ok(Item::Component {
254        version,
255        opening_bracket,
256        _leading_whitespace: unused(leading_whitespace),
257        name,
258        modifiers: modifiers.modifiers,
259        nested_format_descriptions: nested_format_descriptions.into_boxed_slice(),
260        _trailing_whitespace: unused(nested_fds_trailing_whitespace),
261        closing_bracket,
262    })
263}
264
265struct Modifiers<'a> {
266    modifiers: Box<[Modifier<'a>]>,
267    trailing_whitespace: Option<Spanned<&'a [u8]>>,
268}
269
270impl<'a> Modifiers<'a> {
271    /// Parse modifiers until there are none left. Returns the modifiers along with any trailing
272    /// whitespace after the last modifier.
273    fn parse<I>(tokens: &mut lexer::Lexed<I>) -> Result<Self, Error>
274    where
275        I: Iterator<Item = Result<lexer::Token<'a>, Error>>,
276    {
277        let mut modifiers = Vec::new();
278        loop {
279            let Some(whitespace) = tokens.next_if_whitespace() else {
280                return Ok(Self {
281                    modifiers: modifiers.into_boxed_slice(),
282                    trailing_whitespace: None,
283                });
284            };
285            let Some(token) = tokens.next_if_not_whitespace() else {
286                return Ok(Self {
287                    modifiers: modifiers.into_boxed_slice(),
288                    trailing_whitespace: Some(whitespace),
289                });
290            };
291            let modifier = Modifier::from_leading_whitespace_and_token(whitespace, token)?;
292            modifiers.push(modifier);
293        }
294    }
295
296    fn span(&self) -> Span {
297        match &*self.modifiers {
298            [] => Span::DUMMY,
299            [modifier] => modifier.key.span.start.to(modifier.value.span.end),
300            [first, .., last] => first.key.span.start.to(last.value.span.end),
301        }
302    }
303}
304
305/// Parse a nested format description. The location provided is the most recent one consumed.
306#[inline]
307fn parse_nested<'a, I>(
308    version: FormatDescriptionVersion,
309    last_location: Location,
310    tokens: &mut lexer::Lexed<I>,
311) -> Result<NestedFormatDescription<'a>, Error>
312where
313    I: Iterator<Item = Result<lexer::Token<'a>, Error>>,
314{
315    let leading_whitespace = tokens.next_if_whitespace();
316    let Some(opening_bracket) = tokens.next_if_opening_bracket() else {
317        return Err(Error {
318            _inner: unused(last_location.error("expected opening bracket")),
319            public: error::InvalidFormatDescription::Expected {
320                what: "opening bracket",
321                index: last_location.byte as usize,
322            },
323        });
324    };
325    let items = parse_inner(version, true, tokens).collect::<Result<_, _>>()?;
326    let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
327        return Err(Error {
328            _inner: unused(opening_bracket.error("unclosed bracket")),
329            public: error::InvalidFormatDescription::UnclosedOpeningBracket {
330                index: opening_bracket.byte as usize,
331            },
332        });
333    };
334
335    Ok(NestedFormatDescription {
336        leading_whitespace,
337        opening_bracket,
338        items,
339        closing_bracket,
340    })
341}