Skip to main content

time/parsing/combinator/rfc/
rfc2822.rs

1//! Rules defined in [RFC 2822].
2//!
3//! [RFC 2822]: https://datatracker.ietf.org/doc/html/rfc2822
4
5use num_conv::prelude::*;
6
7use crate::parsing::ParsedItem;
8use crate::parsing::combinator::rfc::rfc2234::wsp;
9use crate::parsing::combinator::{ascii_char, one_or_more, zero_or_more};
10
11const DEPTH_LIMIT: u8 = 32;
12
13/// Consume the `fws` rule.
14// The full rule is equivalent to /\r\n[ \t]+|[ \t]+(?:\r\n[ \t]+)*/
15#[inline]
16pub(crate) fn fws(input: &[u8]) -> Option<ParsedItem<'_, ()>> {
17    if !matches!(input.first(), Some(b'\r' | b' ' | b'\t')) {
18        return None;
19    }
20
21    if !matches!(input.get(1), Some(b' ' | b'\r' | b'\n' | b'\t')) {
22        return Some(ParsedItem(&input[1..], ()));
23    }
24
25    #[inline(never)]
26    fn fws_uncommon(mut input: &[u8]) -> Option<ParsedItem<'_, ()>> {
27        if let [b'\r', b'\n', rest @ ..] = input {
28            one_or_more(wsp)(rest)
29        } else {
30            input = one_or_more(wsp)(input)?.into_inner();
31            while let [b'\r', b'\n', rest @ ..] = input {
32                input = one_or_more(wsp)(rest)?.into_inner();
33            }
34            Some(ParsedItem(input, ()))
35        }
36    }
37    fws_uncommon(input)
38}
39
40/// Consume the `cfws` rule.
41// The full rule is equivalent to any combination of `fws` and `comment` so long as it is not empty.
42#[inline]
43pub(crate) fn cfws(input: &[u8]) -> Option<ParsedItem<'_, ()>> {
44    if input.first() != Some(&b'(') {
45        if !matches!(input.first(), Some(b'\r' | b' ' | b'\t')) {
46            return None;
47        }
48
49        if !matches!(
50            input.get(1),
51            Some(b'(' | b' ' | b'\r' | b'\n' | b'\t' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9')
52        ) {
53            return Some(ParsedItem(&input[1..], ()));
54        }
55    }
56
57    #[inline(never)]
58    fn cfws_uncommon(input: &[u8]) -> Option<ParsedItem<'_, ()>> {
59        one_or_more(|input| fws(input).or_else(|| comment(input, 1)))(input)
60    }
61    cfws_uncommon(input)
62}
63
64/// Consume the `comment` rule.
65#[inline]
66fn comment(mut input: &[u8], depth: u8) -> Option<ParsedItem<'_, ()>> {
67    // Avoid stack exhaustion DoS by limiting recursion depth. This will cause highly-nested
68    // comments to fail parsing, but comments *at all* are incredibly rare in practice.
69    //
70    // The error from this will not be descriptive, but the rarity and near-certain maliciousness of
71    // such inputs makes this an acceptable trade-off.
72    if depth == DEPTH_LIMIT {
73        return None;
74    }
75
76    input = ascii_char::<b'('>(input)?.into_inner();
77    input = zero_or_more(fws)(input).into_inner();
78    while let Some(rest) = ccontent(input, depth + 1) {
79        input = rest.into_inner();
80        input = zero_or_more(fws)(input).into_inner();
81    }
82    input = ascii_char::<b')'>(input)?.into_inner();
83
84    Some(ParsedItem(input, ()))
85}
86
87/// Consume the `ccontent` rule.
88#[inline]
89fn ccontent(input: &[u8], depth: u8) -> Option<ParsedItem<'_, ()>> {
90    ctext(input)
91        .or_else(|| quoted_pair(input))
92        .or_else(|| comment(input, depth))
93}
94
95/// Consume the `ctext` rule.
96#[expect(
97    clippy::unnecessary_lazy_evaluations,
98    reason = "rust-lang/rust-clippy#8522"
99)]
100#[inline]
101fn ctext(input: &[u8]) -> Option<ParsedItem<'_, ()>> {
102    no_ws_ctl(input).or_else(|| match input {
103        [33..=39 | 42..=91 | 93..=126, rest @ ..] => Some(ParsedItem(rest, ())),
104        _ => None,
105    })
106}
107
108/// Consume the `quoted_pair` rule.
109#[inline]
110fn quoted_pair(mut input: &[u8]) -> Option<ParsedItem<'_, ()>> {
111    input = ascii_char::<b'\\'>(input)?.into_inner();
112    input = text(input).into_inner();
113
114    // If nothing is parsed by `text`, this means by hit the `obs-text` rule and nothing matched.
115    // This is technically a success, and we used to check the `obs-qp` rule to ensure everything
116    // possible was consumed. After further analysis, it was determined that this check was
117    // unnecessary due to `obs-text` wholly subsuming `obs-qp` in this context. For this reason, if
118    // `text` fails to parse anything, we consider it a success without further consideration.
119
120    Some(ParsedItem(input, ()))
121}
122
123/// Consume the `no_ws_ctl` rule.
124#[inline]
125const fn no_ws_ctl(input: &[u8]) -> Option<ParsedItem<'_, ()>> {
126    match input {
127        [1..=8 | 11..=12 | 14..=31 | 127, rest @ ..] => Some(ParsedItem(rest, ())),
128        _ => None,
129    }
130}
131
132/// Consume the `text` rule.
133#[inline]
134fn text<'a>(input: &'a [u8]) -> ParsedItem<'a, ()> {
135    let new_text = |input: &'a [u8]| match input {
136        [1..=9 | 11..=12 | 14..=127, rest @ ..] => Some(ParsedItem(rest, ())),
137        _ => None,
138    };
139
140    let obs_char = |input: &'a [u8]| match input {
141        // This is technically allowed, but consuming this would mean the rest of the string is
142        // eagerly consumed without consideration for where the comment actually ends.
143        [b')', ..] => None,
144        [0..=9 | 11..=12 | 14..=127, rest @ ..] => Some(rest),
145        _ => None,
146    };
147
148    let obs_text = |mut input| {
149        input = zero_or_more(ascii_char::<b'\n'>)(input).into_inner();
150        input = zero_or_more(ascii_char::<b'\r'>)(input).into_inner();
151        while let Some(rest) = obs_char(input) {
152            input = rest;
153            input = zero_or_more(ascii_char::<b'\n'>)(input).into_inner();
154            input = zero_or_more(ascii_char::<b'\r'>)(input).into_inner();
155        }
156
157        ParsedItem(input, ())
158    };
159
160    new_text(input).unwrap_or_else(|| obs_text(input))
161}
162
163/// Consume an old zone literal, returning the offset in hours.
164#[inline]
165pub(crate) fn zone_literal(input: &[u8]) -> Option<ParsedItem<'_, i8>> {
166    if matches!(input.first(), Some(b'-' | b'+')) {
167        return None;
168    }
169
170    crate::hint::cold_path();
171
172    let [first, second, third, rest @ ..] = input else {
173        const UT_VARIANTS: [u16; 4] = [
174            u16::from_ne_bytes([b'u', b't']),
175            u16::from_ne_bytes([b'u', b'T']),
176            u16::from_ne_bytes([b'U', b't']),
177            u16::from_ne_bytes([b'U', b'T']),
178        ];
179
180        let [first, rest @ ..] = input else {
181            return None;
182        };
183        if let [second, rest @ ..] = rest
184            && UT_VARIANTS.contains(&u16::from_ne_bytes([*first, *second]))
185        {
186            return Some(ParsedItem(rest, 0));
187        }
188        return (*first != b'j' && *first != b'J' && first.is_ascii_alphabetic())
189            .then_some(ParsedItem(rest, 0));
190    };
191    let byte = u32::from_ne_bytes([
192        0,
193        first.to_ascii_lowercase(),
194        second.to_ascii_lowercase(),
195        third.to_ascii_lowercase(),
196    ]);
197    const ZONES: [u32; 8] = [
198        u32::from_ne_bytes([0, b'e', b's', b't']),
199        u32::from_ne_bytes([0, b'e', b'd', b't']),
200        u32::from_ne_bytes([0, b'c', b's', b't']),
201        u32::from_ne_bytes([0, b'c', b'd', b't']),
202        u32::from_ne_bytes([0, b'm', b's', b't']),
203        u32::from_ne_bytes([0, b'm', b'd', b't']),
204        u32::from_ne_bytes([0, b'p', b's', b't']),
205        u32::from_ne_bytes([0, b'p', b'd', b't']),
206    ];
207
208    let eq = [
209        if ZONES[0] == byte { i32::MAX } else { 0 },
210        if ZONES[1] == byte { i32::MAX } else { 0 },
211        if ZONES[2] == byte { i32::MAX } else { 0 },
212        if ZONES[3] == byte { i32::MAX } else { 0 },
213        if ZONES[4] == byte { i32::MAX } else { 0 },
214        if ZONES[5] == byte { i32::MAX } else { 0 },
215        if ZONES[6] == byte { i32::MAX } else { 0 },
216        if ZONES[7] == byte { i32::MAX } else { 0 },
217    ];
218    if eq == [0; 8] && byte != const { u32::from_ne_bytes([0, b'g', b'm', b't']) } {
219        return None;
220    }
221
222    let nonzero_zones = [
223        eq[0] & -5,
224        eq[1] & -4,
225        eq[2] & -6,
226        eq[3] & -5,
227        eq[4] & -7,
228        eq[5] & -6,
229        eq[6] & -8,
230        eq[7] & -7,
231    ];
232    let zone = nonzero_zones.iter().sum::<i32>().truncate();
233    Some(ParsedItem(rest, zone))
234}