Skip to main content

time/parsing/combinator/rfc/
rfc2822.rs

1//! Rules defined in [RFC 2822].
2//!
3//! [RFC 2822]: https://datatracker.ietf.org/doc/html/rfc2822
4
5use num_conv::prelude::*;
6
7use crate::parsing::ParsedItem;
8use crate::parsing::combinator::rfc::rfc2234::wsp;
9use crate::parsing::combinator::{ascii_char, one_or_more, zero_or_more};
10
11const DEPTH_LIMIT: u8 = 32;
12
13/// Consume the `fws` rule.
14// The full rule is equivalent to /\r\n[ \t]+|[ \t]+(?:\r\n[ \t]+)*/
15#[inline]
16pub(crate) fn fws(mut input: &[u8]) -> Option<ParsedItem<'_, ()>> {
17    if let [b'\r', b'\n', rest @ ..] = input {
18        one_or_more(wsp)(rest)
19    } else {
20        input = one_or_more(wsp)(input)?.into_inner();
21        while let [b'\r', b'\n', rest @ ..] = input {
22            input = one_or_more(wsp)(rest)?.into_inner();
23        }
24        Some(ParsedItem(input, ()))
25    }
26}
27
28/// Consume the `cfws` rule.
29// The full rule is equivalent to any combination of `fws` and `comment` so long as it is not empty.
30#[inline]
31pub(crate) fn cfws(input: &[u8]) -> Option<ParsedItem<'_, ()>> {
32    one_or_more(|input| fws(input).or_else(|| comment(input, 1)))(input)
33}
34
35/// Consume the `comment` rule.
36#[inline]
37fn comment(mut input: &[u8], depth: u8) -> Option<ParsedItem<'_, ()>> {
38    // Avoid stack exhaustion DoS by limiting recursion depth. This will cause highly-nested
39    // comments to fail parsing, but comments *at all* are incredibly rare in practice.
40    //
41    // The error from this will not be descriptive, but the rarity and near-certain maliciousness of
42    // such inputs makes this an acceptable trade-off.
43    if depth == DEPTH_LIMIT {
44        return None;
45    }
46
47    input = ascii_char::<b'('>(input)?.into_inner();
48    input = zero_or_more(fws)(input).into_inner();
49    while let Some(rest) = ccontent(input, depth + 1) {
50        input = rest.into_inner();
51        input = zero_or_more(fws)(input).into_inner();
52    }
53    input = ascii_char::<b')'>(input)?.into_inner();
54
55    Some(ParsedItem(input, ()))
56}
57
58/// Consume the `ccontent` rule.
59#[inline]
60fn ccontent(input: &[u8], depth: u8) -> Option<ParsedItem<'_, ()>> {
61    ctext(input)
62        .or_else(|| quoted_pair(input))
63        .or_else(|| comment(input, depth))
64}
65
66/// Consume the `ctext` rule.
67#[expect(
68    clippy::unnecessary_lazy_evaluations,
69    reason = "rust-lang/rust-clippy#8522"
70)]
71#[inline]
72fn ctext(input: &[u8]) -> Option<ParsedItem<'_, ()>> {
73    no_ws_ctl(input).or_else(|| match input {
74        [33..=39 | 42..=91 | 93..=126, rest @ ..] => Some(ParsedItem(rest, ())),
75        _ => None,
76    })
77}
78
79/// Consume the `quoted_pair` rule.
80#[inline]
81fn quoted_pair(mut input: &[u8]) -> Option<ParsedItem<'_, ()>> {
82    input = ascii_char::<b'\\'>(input)?.into_inner();
83    input = text(input).into_inner();
84
85    // If nothing is parsed by `text`, this means by hit the `obs-text` rule and nothing matched.
86    // This is technically a success, and we used to check the `obs-qp` rule to ensure everything
87    // possible was consumed. After further analysis, it was determined that this check was
88    // unnecessary due to `obs-text` wholly subsuming `obs-qp` in this context. For this reason, if
89    // `text` fails to parse anything, we consider it a success without further consideration.
90
91    Some(ParsedItem(input, ()))
92}
93
94/// Consume the `no_ws_ctl` rule.
95#[inline]
96const fn no_ws_ctl(input: &[u8]) -> Option<ParsedItem<'_, ()>> {
97    match input {
98        [1..=8 | 11..=12 | 14..=31 | 127, rest @ ..] => Some(ParsedItem(rest, ())),
99        _ => None,
100    }
101}
102
103/// Consume the `text` rule.
104#[inline]
105fn text<'a>(input: &'a [u8]) -> ParsedItem<'a, ()> {
106    let new_text = |input: &'a [u8]| match input {
107        [1..=9 | 11..=12 | 14..=127, rest @ ..] => Some(ParsedItem(rest, ())),
108        _ => None,
109    };
110
111    let obs_char = |input: &'a [u8]| match input {
112        // This is technically allowed, but consuming this would mean the rest of the string is
113        // eagerly consumed without consideration for where the comment actually ends.
114        [b')', ..] => None,
115        [0..=9 | 11..=12 | 14..=127, rest @ ..] => Some(rest),
116        _ => None,
117    };
118
119    let obs_text = |mut input| {
120        input = zero_or_more(ascii_char::<b'\n'>)(input).into_inner();
121        input = zero_or_more(ascii_char::<b'\r'>)(input).into_inner();
122        while let Some(rest) = obs_char(input) {
123            input = rest;
124            input = zero_or_more(ascii_char::<b'\n'>)(input).into_inner();
125            input = zero_or_more(ascii_char::<b'\r'>)(input).into_inner();
126        }
127
128        ParsedItem(input, ())
129    };
130
131    new_text(input).unwrap_or_else(|| obs_text(input))
132}
133
134/// Consume an old zone literal, returning the offset in hours.
135#[inline]
136pub(crate) fn zone_literal(input: &[u8]) -> Option<ParsedItem<'_, i8>> {
137    let [first, second, third, rest @ ..] = input else {
138        const UT_VARIANTS: [u16; 4] = [
139            u16::from_ne_bytes([b'u', b't']),
140            u16::from_ne_bytes([b'u', b'T']),
141            u16::from_ne_bytes([b'U', b't']),
142            u16::from_ne_bytes([b'U', b'T']),
143        ];
144
145        let [first, rest @ ..] = input else {
146            return None;
147        };
148        if let [second, rest @ ..] = rest
149            && UT_VARIANTS.contains(&u16::from_ne_bytes([*first, *second]))
150        {
151            return Some(ParsedItem(rest, 0));
152        }
153        return (*first != b'j' && *first != b'J' && first.is_ascii_alphabetic())
154            .then_some(ParsedItem(rest, 0));
155    };
156    let byte = u32::from_ne_bytes([
157        0,
158        first.to_ascii_lowercase(),
159        second.to_ascii_lowercase(),
160        third.to_ascii_lowercase(),
161    ]);
162    const ZONES: [u32; 8] = [
163        u32::from_ne_bytes([0, b'e', b's', b't']),
164        u32::from_ne_bytes([0, b'e', b'd', b't']),
165        u32::from_ne_bytes([0, b'c', b's', b't']),
166        u32::from_ne_bytes([0, b'c', b'd', b't']),
167        u32::from_ne_bytes([0, b'm', b's', b't']),
168        u32::from_ne_bytes([0, b'm', b'd', b't']),
169        u32::from_ne_bytes([0, b'p', b's', b't']),
170        u32::from_ne_bytes([0, b'p', b'd', b't']),
171    ];
172
173    let eq = [
174        if ZONES[0] == byte { i32::MAX } else { 0 },
175        if ZONES[1] == byte { i32::MAX } else { 0 },
176        if ZONES[2] == byte { i32::MAX } else { 0 },
177        if ZONES[3] == byte { i32::MAX } else { 0 },
178        if ZONES[4] == byte { i32::MAX } else { 0 },
179        if ZONES[5] == byte { i32::MAX } else { 0 },
180        if ZONES[6] == byte { i32::MAX } else { 0 },
181        if ZONES[7] == byte { i32::MAX } else { 0 },
182    ];
183    if eq == [0; 8] && byte != const { u32::from_ne_bytes([0, b'g', b'm', b't']) } {
184        return None;
185    }
186
187    let nonzero_zones = [
188        eq[0] & -5,
189        eq[1] & -4,
190        eq[2] & -6,
191        eq[3] & -5,
192        eq[4] & -7,
193        eq[5] & -6,
194        eq[6] & -8,
195        eq[7] & -7,
196    ];
197    let zone = nonzero_zones.iter().sum::<i32>().truncate();
198    Some(ParsedItem(rest, zone))
199}