actix_http/header/shared/
extended.rs

1//! Originally taken from `hyper::header::parsing`.
2
3use std::{fmt, str::FromStr};
4
5use language_tags::LanguageTag;
6
7use crate::header::{Charset, HTTP_VALUE};
8
9/// The value part of an extended parameter consisting of three parts:
10/// - The REQUIRED character set name (`charset`).
11/// - The OPTIONAL language information (`language_tag`).
12/// - A character sequence representing the actual value (`value`), separated by single quotes.
13///
14/// It is defined in [RFC 5987 §3.2](https://datatracker.ietf.org/doc/html/rfc5987#section-3.2).
15#[derive(Clone, Debug, PartialEq, Eq)]
16pub struct ExtendedValue {
17    /// The character set that is used to encode the `value` to a string.
18    pub charset: Charset,
19
20    /// The human language details of the `value`, if available.
21    pub language_tag: Option<LanguageTag>,
22
23    /// The parameter value, as expressed in octets.
24    pub value: Vec<u8>,
25}
26
27/// Parses extended header parameter values (`ext-value`), as defined
28/// in [RFC 5987 §3.2](https://datatracker.ietf.org/doc/html/rfc5987#section-3.2).
29///
30/// Extended values are denoted by parameter names that end with `*`.
31///
32/// ## ABNF
33///
34/// ```plain
35/// ext-value     = charset  "'" [ language ] "'" value-chars
36///               ; like RFC 2231's <extended-initial-value>
37///               ; (see [RFC 2231 §7])
38///
39/// charset       = "UTF-8" / "ISO-8859-1" / mime-charset
40///
41/// mime-charset  = 1*mime-charsetc
42/// mime-charsetc = ALPHA / DIGIT
43///               / "!" / "#" / "$" / "%" / "&"
44///               / "+" / "-" / "^" / "_" / "`"
45///               / "{" / "}" / "~"
46///               ; as <mime-charset> in [RFC 2978 §2.3]
47///               ; except that the single quote is not included
48///               ; SHOULD be registered in the IANA charset registry
49///
50/// language      = <Language-Tag, defined in [RFC 5646 §2.1]>
51///
52/// value-chars   = *( pct-encoded / attr-char )
53///
54/// pct-encoded   = "%" HEXDIG HEXDIG
55///               ; see [RFC 3986 §2.1]
56///
57/// attr-char     = ALPHA / DIGIT
58///               / "!" / "#" / "$" / "&" / "+" / "-" / "."
59///               / "^" / "_" / "`" / "|" / "~"
60///               ; token except ( "*" / "'" / "%" )
61/// ```
62///
63/// [RFC 2231 §7]: https://datatracker.ietf.org/doc/html/rfc2231#section-7
64/// [RFC 2978 §2.3]: https://datatracker.ietf.org/doc/html/rfc2978#section-2.3
65/// [RFC 3986 §2.1]: https://datatracker.ietf.org/doc/html/rfc5646#section-2.1
66pub fn parse_extended_value(val: &str) -> Result<ExtendedValue, crate::error::ParseError> {
67    // Break into three pieces separated by the single-quote character
68    let mut parts = val.splitn(3, '\'');
69
70    // Interpret the first piece as a Charset
71    let charset: Charset = match parts.next() {
72        None => return Err(crate::error::ParseError::Header),
73        Some(n) => FromStr::from_str(n).map_err(|_| crate::error::ParseError::Header)?,
74    };
75
76    // Interpret the second piece as a language tag
77    let language_tag: Option<LanguageTag> = match parts.next() {
78        None => return Err(crate::error::ParseError::Header),
79        Some("") => None,
80        Some(s) => match s.parse() {
81            Ok(lt) => Some(lt),
82            Err(_) => return Err(crate::error::ParseError::Header),
83        },
84    };
85
86    // Interpret the third piece as a sequence of value characters
87    let value: Vec<u8> = match parts.next() {
88        None => return Err(crate::error::ParseError::Header),
89        Some(v) => percent_encoding::percent_decode(v.as_bytes()).collect(),
90    };
91
92    Ok(ExtendedValue {
93        charset,
94        language_tag,
95        value,
96    })
97}
98
99impl fmt::Display for ExtendedValue {
100    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
101        let encoded_value = percent_encoding::percent_encode(&self.value[..], HTTP_VALUE);
102        if let Some(ref lang) = self.language_tag {
103            write!(f, "{}'{}'{}", self.charset, lang, encoded_value)
104        } else {
105            write!(f, "{}''{}", self.charset, encoded_value)
106        }
107    }
108}
109
110#[cfg(test)]
111mod tests {
112    use super::*;
113
114    #[test]
115    fn test_parse_extended_value_with_encoding_and_language_tag() {
116        let expected_language_tag = "en".parse::<LanguageTag>().unwrap();
117        // RFC 5987, Section 3.2.2
118        // Extended notation, using the Unicode character U+00A3 (POUND SIGN)
119        let result = parse_extended_value("iso-8859-1'en'%A3%20rates");
120        assert!(result.is_ok());
121        let extended_value = result.unwrap();
122        assert_eq!(Charset::Iso_8859_1, extended_value.charset);
123        assert!(extended_value.language_tag.is_some());
124        assert_eq!(expected_language_tag, extended_value.language_tag.unwrap());
125        assert_eq!(
126            vec![163, b' ', b'r', b'a', b't', b'e', b's'],
127            extended_value.value
128        );
129    }
130
131    #[test]
132    fn test_parse_extended_value_with_encoding() {
133        // RFC 5987, Section 3.2.2
134        // Extended notation, using the Unicode characters U+00A3 (POUND SIGN)
135        // and U+20AC (EURO SIGN)
136        let result = parse_extended_value("UTF-8''%c2%a3%20and%20%e2%82%ac%20rates");
137        assert!(result.is_ok());
138        let extended_value = result.unwrap();
139        assert_eq!(Charset::Ext("UTF-8".to_string()), extended_value.charset);
140        assert!(extended_value.language_tag.is_none());
141        assert_eq!(
142            vec![
143                194, 163, b' ', b'a', b'n', b'd', b' ', 226, 130, 172, b' ', b'r', b'a', b't',
144                b'e', b's',
145            ],
146            extended_value.value
147        );
148    }
149
150    #[test]
151    fn test_parse_extended_value_missing_language_tag_and_encoding() {
152        // From: https://greenbytes.de/tech/tc2231/#attwithfn2231quot2
153        let result = parse_extended_value("foo%20bar.html");
154        assert!(result.is_err());
155    }
156
157    #[test]
158    fn test_parse_extended_value_partially_formatted() {
159        let result = parse_extended_value("UTF-8'missing third part");
160        assert!(result.is_err());
161    }
162
163    #[test]
164    fn test_parse_extended_value_partially_formatted_blank() {
165        let result = parse_extended_value("blank second part'");
166        assert!(result.is_err());
167    }
168
169    #[test]
170    fn test_fmt_extended_value_with_encoding_and_language_tag() {
171        let extended_value = ExtendedValue {
172            charset: Charset::Iso_8859_1,
173            language_tag: Some("en".parse().expect("Could not parse language tag")),
174            value: vec![163, b' ', b'r', b'a', b't', b'e', b's'],
175        };
176        assert_eq!("ISO-8859-1'en'%A3%20rates", format!("{}", extended_value));
177    }
178
179    #[test]
180    fn test_fmt_extended_value_with_encoding() {
181        let extended_value = ExtendedValue {
182            charset: Charset::Ext("UTF-8".to_string()),
183            language_tag: None,
184            value: vec![
185                194, 163, b' ', b'a', b'n', b'd', b' ', 226, 130, 172, b' ', b'r', b'a', b't',
186                b'e', b's',
187            ],
188        };
189        assert_eq!(
190            "UTF-8''%C2%A3%20and%20%E2%82%AC%20rates",
191            format!("{}", extended_value)
192        );
193    }
194}