Skip to main content

time/parsing/combinator/rfc/
rfc2822.rs

1//! Rules defined in [RFC 2822].
2//!
3//! [RFC 2822]: https://datatracker.ietf.org/doc/html/rfc2822
4
5use num_conv::prelude::*;
6
7use crate::parsing::ParsedItem;
8use crate::parsing::combinator::rfc::rfc2234::wsp;
9use crate::parsing::combinator::{ascii_char, one_or_more, zero_or_more};
10
11const DEPTH_LIMIT: u8 = 32;
12
13/// Consume the `fws` rule.
14// The full rule is equivalent to /\r\n[ \t]+|[ \t]+(?:\r\n[ \t]+)*/
15#[inline]
16pub(crate) fn fws(input: &[u8]) -> Option<ParsedItem<'_, ()>> {
17    if !matches!(input.first(), Some(b'\r' | b' ' | b'\t')) {
18        return None;
19    }
20
21    if !matches!(input.get(1), Some(b' ' | b'\r' | b'\n' | b'\t')) {
22        return Some(ParsedItem(&input[1..], ()));
23    }
24
25    #[inline(never)]
26    fn fws_uncommon(mut input: &[u8]) -> Option<ParsedItem<'_, ()>> {
27        if let [b'\r', b'\n', rest @ ..] = input {
28            one_or_more(wsp)(rest)
29        } else {
30            input = one_or_more(wsp)(input)?.into_inner();
31            while let [b'\r', b'\n', rest @ ..] = input {
32                input = one_or_more(wsp)(rest)?.into_inner();
33            }
34            Some(ParsedItem(input, ()))
35        }
36    }
37    fws_uncommon(input)
38}
39
40/// Consume the `cfws` rule.
41// The full rule is equivalent to any combination of `fws` and `comment` so long as it is not empty.
42#[inline]
43pub(crate) fn cfws(input: &[u8]) -> Option<ParsedItem<'_, ()>> {
44    if input.first() != Some(&b'(') {
45        if !matches!(input.first(), Some(b'\r' | b' ' | b'\t')) {
46            return None;
47        }
48
49        if !matches!(input.get(1), Some(b'(' | b' ' | b'\r' | b'\n' | b'\t')) {
50            return Some(ParsedItem(&input[1..], ()));
51        }
52    }
53
54    #[inline(never)]
55    fn cfws_uncommon(input: &[u8]) -> Option<ParsedItem<'_, ()>> {
56        one_or_more(|input| fws(input).or_else(|| comment(input, 1)))(input)
57    }
58    cfws_uncommon(input)
59}
60
61/// Consume the `comment` rule.
62#[inline]
63fn comment(mut input: &[u8], depth: u8) -> Option<ParsedItem<'_, ()>> {
64    // Avoid stack exhaustion DoS by limiting recursion depth. This will cause highly-nested
65    // comments to fail parsing, but comments *at all* are incredibly rare in practice.
66    //
67    // The error from this will not be descriptive, but the rarity and near-certain maliciousness of
68    // such inputs makes this an acceptable trade-off.
69    if depth == DEPTH_LIMIT {
70        return None;
71    }
72
73    input = ascii_char::<b'('>(input)?.into_inner();
74    input = zero_or_more(fws)(input).into_inner();
75    while let Some(rest) = ccontent(input, depth + 1) {
76        input = rest.into_inner();
77        input = zero_or_more(fws)(input).into_inner();
78    }
79    input = ascii_char::<b')'>(input)?.into_inner();
80
81    Some(ParsedItem(input, ()))
82}
83
84/// Consume the `ccontent` rule.
85#[inline]
86fn ccontent(input: &[u8], depth: u8) -> Option<ParsedItem<'_, ()>> {
87    ctext(input)
88        .or_else(|| quoted_pair(input))
89        .or_else(|| comment(input, depth))
90}
91
92/// Consume the `ctext` rule.
93#[expect(
94    clippy::unnecessary_lazy_evaluations,
95    reason = "rust-lang/rust-clippy#8522"
96)]
97#[inline]
98fn ctext(input: &[u8]) -> Option<ParsedItem<'_, ()>> {
99    no_ws_ctl(input).or_else(|| match input {
100        [33..=39 | 42..=91 | 93..=126, rest @ ..] => Some(ParsedItem(rest, ())),
101        _ => None,
102    })
103}
104
105/// Consume the `quoted_pair` rule.
106#[inline]
107fn quoted_pair(mut input: &[u8]) -> Option<ParsedItem<'_, ()>> {
108    input = ascii_char::<b'\\'>(input)?.into_inner();
109    input = text(input).into_inner();
110
111    // If nothing is parsed by `text`, this means by hit the `obs-text` rule and nothing matched.
112    // This is technically a success, and we used to check the `obs-qp` rule to ensure everything
113    // possible was consumed. After further analysis, it was determined that this check was
114    // unnecessary due to `obs-text` wholly subsuming `obs-qp` in this context. For this reason, if
115    // `text` fails to parse anything, we consider it a success without further consideration.
116
117    Some(ParsedItem(input, ()))
118}
119
120/// Consume the `no_ws_ctl` rule.
121#[inline]
122const fn no_ws_ctl(input: &[u8]) -> Option<ParsedItem<'_, ()>> {
123    match input {
124        [1..=8 | 11..=12 | 14..=31 | 127, rest @ ..] => Some(ParsedItem(rest, ())),
125        _ => None,
126    }
127}
128
129/// Consume the `text` rule.
130#[inline]
131fn text<'a>(input: &'a [u8]) -> ParsedItem<'a, ()> {
132    let new_text = |input: &'a [u8]| match input {
133        [1..=9 | 11..=12 | 14..=127, rest @ ..] => Some(ParsedItem(rest, ())),
134        _ => None,
135    };
136
137    let obs_char = |input: &'a [u8]| match input {
138        // This is technically allowed, but consuming this would mean the rest of the string is
139        // eagerly consumed without consideration for where the comment actually ends.
140        [b')', ..] => None,
141        [0..=9 | 11..=12 | 14..=127, rest @ ..] => Some(rest),
142        _ => None,
143    };
144
145    let obs_text = |mut input| {
146        input = zero_or_more(ascii_char::<b'\n'>)(input).into_inner();
147        input = zero_or_more(ascii_char::<b'\r'>)(input).into_inner();
148        while let Some(rest) = obs_char(input) {
149            input = rest;
150            input = zero_or_more(ascii_char::<b'\n'>)(input).into_inner();
151            input = zero_or_more(ascii_char::<b'\r'>)(input).into_inner();
152        }
153
154        ParsedItem(input, ())
155    };
156
157    new_text(input).unwrap_or_else(|| obs_text(input))
158}
159
160/// Consume an old zone literal, returning the offset in hours.
161#[inline]
162pub(crate) fn zone_literal(input: &[u8]) -> Option<ParsedItem<'_, i8>> {
163    if matches!(input.first(), Some(b'-' | b'+')) {
164        return None;
165    }
166
167    crate::hint::cold_path();
168
169    let [first, second, third, rest @ ..] = input else {
170        const UT_VARIANTS: [u16; 4] = [
171            u16::from_ne_bytes([b'u', b't']),
172            u16::from_ne_bytes([b'u', b'T']),
173            u16::from_ne_bytes([b'U', b't']),
174            u16::from_ne_bytes([b'U', b'T']),
175        ];
176
177        let [first, rest @ ..] = input else {
178            return None;
179        };
180        if let [second, rest @ ..] = rest
181            && UT_VARIANTS.contains(&u16::from_ne_bytes([*first, *second]))
182        {
183            return Some(ParsedItem(rest, 0));
184        }
185        return (*first != b'j' && *first != b'J' && first.is_ascii_alphabetic())
186            .then_some(ParsedItem(rest, 0));
187    };
188    let byte = u32::from_ne_bytes([
189        0,
190        first.to_ascii_lowercase(),
191        second.to_ascii_lowercase(),
192        third.to_ascii_lowercase(),
193    ]);
194    const ZONES: [u32; 8] = [
195        u32::from_ne_bytes([0, b'e', b's', b't']),
196        u32::from_ne_bytes([0, b'e', b'd', b't']),
197        u32::from_ne_bytes([0, b'c', b's', b't']),
198        u32::from_ne_bytes([0, b'c', b'd', b't']),
199        u32::from_ne_bytes([0, b'm', b's', b't']),
200        u32::from_ne_bytes([0, b'm', b'd', b't']),
201        u32::from_ne_bytes([0, b'p', b's', b't']),
202        u32::from_ne_bytes([0, b'p', b'd', b't']),
203    ];
204
205    let eq = [
206        if ZONES[0] == byte { i32::MAX } else { 0 },
207        if ZONES[1] == byte { i32::MAX } else { 0 },
208        if ZONES[2] == byte { i32::MAX } else { 0 },
209        if ZONES[3] == byte { i32::MAX } else { 0 },
210        if ZONES[4] == byte { i32::MAX } else { 0 },
211        if ZONES[5] == byte { i32::MAX } else { 0 },
212        if ZONES[6] == byte { i32::MAX } else { 0 },
213        if ZONES[7] == byte { i32::MAX } else { 0 },
214    ];
215    if eq == [0; 8] && byte != const { u32::from_ne_bytes([0, b'g', b'm', b't']) } {
216        return None;
217    }
218
219    let nonzero_zones = [
220        eq[0] & -5,
221        eq[1] & -4,
222        eq[2] & -6,
223        eq[3] & -5,
224        eq[4] & -7,
225        eq[5] & -6,
226        eq[6] & -8,
227        eq[7] & -7,
228    ];
229    let zone = nonzero_zones.iter().sum::<i32>().truncate();
230    Some(ParsedItem(rest, zone))
231}