From 936ba2a3682f9c7becd287392fcc66014c16dd32 Mon Sep 17 00:00:00 2001 From: axon-q Date: Wed, 6 Jun 2018 14:06:01 +0000 Subject: [PATCH] multipart: parse and validate Content-Disposition --- Cargo.toml | 1 + src/error.rs | 3 + src/header/common/content_disposition.rs | 84 +++++++++------- src/header/common/mod.rs | 4 +- src/header/mod.rs | 120 +++++++++++++++++++++++ src/lib.rs | 2 + src/multipart.rs | 23 ++++- 7 files changed, 193 insertions(+), 44 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9cd3304fd..9d080f53b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,6 +77,7 @@ time = "0.1" encoding = "0.2" language-tags = "0.2" lazy_static = "1.0" +unicase = "2.1" url = { version="1.7", features=["query_encoding"] } cookie = { version="0.10", features=["percent-encode"] } brotli2 = { version="^0.3.2", optional = true } diff --git a/src/error.rs b/src/error.rs index cfb6a0287..f4de36813 100644 --- a/src/error.rs +++ b/src/error.rs @@ -353,6 +353,9 @@ pub enum MultipartError { /// Can not parse Content-Type header #[fail(display = "Can not parse Content-Type header")] ParseContentType, + /// Can not parse Content-Disposition header + #[fail(display = "Can not parse Content-Disposition header")] + ParseContentDisposition, /// Multipart boundary is not found #[fail(display = "Multipart boundary is not found")] Boundary, diff --git a/src/header/common/content_disposition.rs b/src/header/common/content_disposition.rs index 0fcd6ee09..4d1a0c6d2 100644 --- a/src/header/common/content_disposition.rs +++ b/src/header/common/content_disposition.rs @@ -7,13 +7,14 @@ // IANA assignment: http://www.iana.org/assignments/cont-disp/cont-disp.xhtml use language_tags::LanguageTag; -use std::fmt; use unicase; -use header::{Header, Raw, parsing}; -use header::parsing::{parse_extended_value, http_percent_encode}; +use header; +use header::{Header, IntoHeaderValue, Writer}; use header::shared::Charset; +use std::fmt::{self, Write}; + /// The implied disposition of the content of the HTTP body. #[derive(Clone, Debug, PartialEq)] pub enum DispositionType { @@ -88,19 +89,14 @@ pub struct ContentDisposition { /// Disposition parameters pub parameters: Vec, } - -impl Header for ContentDisposition { - fn header_name() -> &'static str { - static NAME: &'static str = "Content-Disposition"; - NAME - } - - fn parse_header(raw: &Raw) -> ::Result { - parsing::from_one_raw_str(raw).and_then(|s: String| { +impl ContentDisposition { + /// Parse a raw Content-Disposition header value + pub fn from_raw(hv: Option<&header::HeaderValue>) -> Result { + header::from_one_raw_str(hv).and_then(|s: String| { let mut sections = s.split(';'); let disposition = match sections.next() { Some(s) => s.trim(), - None => return Err(::Error::Header), + None => return Err(::error::ParseError::Header), }; let mut cd = ContentDisposition { @@ -120,13 +116,13 @@ impl Header for ContentDisposition { let key = if let Some(key) = parts.next() { key.trim() } else { - return Err(::Error::Header); + return Err(::error::ParseError::Header); }; let val = if let Some(val) = parts.next() { val.trim() } else { - return Err(::Error::Header); + return Err(::error::ParseError::Header); }; cd.parameters.push( @@ -135,7 +131,7 @@ impl Header for ContentDisposition { Charset::Ext("UTF-8".to_owned()), None, val.trim_matches('"').as_bytes().to_owned()) } else if unicase::eq_ascii(&*key, "filename*") { - let extended_value = try!(parse_extended_value(val)); + let extended_value = try!(header::parse_extended_value(val)); DispositionParam::Filename(extended_value.charset, extended_value.language_tag, extended_value.value) } else { DispositionParam::Ext(key.to_owned(), val.trim_matches('"').to_owned()) @@ -146,10 +142,25 @@ impl Header for ContentDisposition { Ok(cd) }) } +} - #[inline] - fn fmt_header(&self, f: &mut ::header::Formatter) -> fmt::Result { - f.fmt_line(self) +impl IntoHeaderValue for ContentDisposition { + type Error = header::InvalidHeaderValueBytes; + + fn try_into(self) -> Result { + let mut writer = Writer::new(); + let _ = write!(&mut writer, "{}", self); + header::HeaderValue::from_shared(writer.take()) + } +} + +impl Header for ContentDisposition { + fn name() -> header::HeaderName { + header::CONTENT_DISPOSITION + } + + fn parse(msg: &T) -> Result { + Self::from_raw(msg.headers().get(Self::name())) } } @@ -183,7 +194,7 @@ impl fmt::Display for ContentDisposition { try!(write!(f, "{}", lang)); }; try!(write!(f, "'")); - try!(http_percent_encode(f, bytes)) + try!(header::http_percent_encode(f, bytes)) } }, DispositionParam::Ext(ref k, ref v) => try!(write!(f, "; {}=\"{}\"", k, v)), @@ -196,15 +207,14 @@ impl fmt::Display for ContentDisposition { #[cfg(test)] mod tests { use super::{ContentDisposition,DispositionType,DispositionParam}; - use ::header::Header; - use ::header::shared::Charset; - + use header::HeaderValue; + use header::shared::Charset; #[test] - fn test_parse_header() { - assert!(ContentDisposition::parse_header(&"".into()).is_err()); + fn test_from_raw() { + assert!(ContentDisposition::from_raw(Some(&HeaderValue::from_static(""))).is_err()); - let a = "form-data; dummy=3; name=upload;\r\n filename=\"sample.png\"".into(); - let a: ContentDisposition = ContentDisposition::parse_header(&a).unwrap(); + let a = HeaderValue::from_static("form-data; dummy=3; name=upload;\r\n filename=\"sample.png\""); + let a: ContentDisposition = ContentDisposition::from_raw(Some(&a)).unwrap(); let b = ContentDisposition { disposition: DispositionType::Ext("form-data".to_owned()), parameters: vec![ @@ -217,8 +227,8 @@ mod tests { }; assert_eq!(a, b); - let a = "attachment; filename=\"image.jpg\"".into(); - let a: ContentDisposition = ContentDisposition::parse_header(&a).unwrap(); + let a = HeaderValue::from_static("attachment; filename=\"image.jpg\""); + let a: ContentDisposition = ContentDisposition::from_raw(Some(&a)).unwrap(); let b = ContentDisposition { disposition: DispositionType::Attachment, parameters: vec![ @@ -229,8 +239,8 @@ mod tests { }; assert_eq!(a, b); - let a = "attachment; filename*=UTF-8''%c2%a3%20and%20%e2%82%ac%20rates".into(); - let a: ContentDisposition = ContentDisposition::parse_header(&a).unwrap(); + let a = HeaderValue::from_static("attachment; filename*=UTF-8''%c2%a3%20and%20%e2%82%ac%20rates"); + let a: ContentDisposition = ContentDisposition::from_raw(Some(&a)).unwrap(); let b = ContentDisposition { disposition: DispositionType::Attachment, parameters: vec![ @@ -246,18 +256,18 @@ mod tests { #[test] fn test_display() { let as_string = "attachment; filename*=UTF-8'en'%C2%A3%20and%20%E2%82%AC%20rates"; - let a = as_string.into(); - let a: ContentDisposition = ContentDisposition::parse_header(&a).unwrap(); + let a = HeaderValue::from_static(as_string); + let a: ContentDisposition = ContentDisposition::from_raw(Some(&a)).unwrap(); let display_rendered = format!("{}",a); assert_eq!(as_string, display_rendered); - let a = "attachment; filename*=UTF-8''black%20and%20white.csv".into(); - let a: ContentDisposition = ContentDisposition::parse_header(&a).unwrap(); + let a = HeaderValue::from_static("attachment; filename*=UTF-8''black%20and%20white.csv"); + let a: ContentDisposition = ContentDisposition::from_raw(Some(&a)).unwrap(); let display_rendered = format!("{}",a); assert_eq!("attachment; filename=\"black and white.csv\"".to_owned(), display_rendered); - let a = "attachment; filename=colourful.csv".into(); - let a: ContentDisposition = ContentDisposition::parse_header(&a).unwrap(); + let a = HeaderValue::from_static("attachment; filename=colourful.csv"); + let a: ContentDisposition = ContentDisposition::from_raw(Some(&a)).unwrap(); let display_rendered = format!("{}",a); assert_eq!("attachment; filename=\"colourful.csv\"".to_owned(), display_rendered); } diff --git a/src/header/common/mod.rs b/src/header/common/mod.rs index 08f8e0cc4..e6185b5a7 100644 --- a/src/header/common/mod.rs +++ b/src/header/common/mod.rs @@ -13,7 +13,7 @@ pub use self::accept_language::AcceptLanguage; pub use self::accept::Accept; pub use self::allow::Allow; pub use self::cache_control::{CacheControl, CacheDirective}; -//pub use self::content_disposition::{ContentDisposition, DispositionType, DispositionParam}; +pub use self::content_disposition::{ContentDisposition, DispositionType, DispositionParam}; pub use self::content_language::ContentLanguage; pub use self::content_range::{ContentRange, ContentRangeSpec}; pub use self::content_type::ContentType; @@ -334,7 +334,7 @@ mod accept_language; mod accept; mod allow; mod cache_control; -//mod content_disposition; +mod content_disposition; mod content_language; mod content_range; mod content_type; diff --git a/src/header/mod.rs b/src/header/mod.rs index a9c42e29c..e4d4e0491 100644 --- a/src/header/mod.rs +++ b/src/header/mod.rs @@ -8,6 +8,7 @@ use bytes::{Bytes, BytesMut}; use mime::Mime; use modhttp::header::GetAll; use modhttp::Error as HttpError; +use percent_encoding; pub use modhttp::header::*; @@ -259,3 +260,122 @@ where } Ok(()) } + +// From hyper v0.11.27 src/header/parsing.rs + +/// An extended header parameter value (i.e., tagged with a character set and optionally, +/// a language), as defined in [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2). +#[derive(Clone, Debug, PartialEq)] +pub struct ExtendedValue { + /// The character set that is used to encode the `value` to a string. + pub charset: Charset, + /// The human language details of the `value`, if available. + pub language_tag: Option, + /// The parameter value, as expressed in octets. + pub value: Vec, +} + +/// Parses extended header parameter values (`ext-value`), as defined in +/// [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2). +/// +/// Extended values are denoted by parameter names that end with `*`. +/// +/// ## ABNF +/// +/// ```text +/// ext-value = charset "'" [ language ] "'" value-chars +/// ; like RFC 2231's +/// ; (see [RFC2231], Section 7) +/// +/// charset = "UTF-8" / "ISO-8859-1" / mime-charset +/// +/// mime-charset = 1*mime-charsetc +/// mime-charsetc = ALPHA / DIGIT +/// / "!" / "#" / "$" / "%" / "&" +/// / "+" / "-" / "^" / "_" / "`" +/// / "{" / "}" / "~" +/// ; as in Section 2.3 of [RFC2978] +/// ; except that the single quote is not included +/// ; SHOULD be registered in the IANA charset registry +/// +/// language = +/// +/// value-chars = *( pct-encoded / attr-char ) +/// +/// pct-encoded = "%" HEXDIG HEXDIG +/// ; see [RFC3986], Section 2.1 +/// +/// attr-char = ALPHA / DIGIT +/// / "!" / "#" / "$" / "&" / "+" / "-" / "." +/// / "^" / "_" / "`" / "|" / "~" +/// ; token except ( "*" / "'" / "%" ) +/// ``` +pub fn parse_extended_value(val: &str) -> Result { + + // Break into three pieces separated by the single-quote character + let mut parts = val.splitn(3,'\''); + + // Interpret the first piece as a Charset + let charset: Charset = match parts.next() { + None => return Err(::error::ParseError::Header), + Some(n) => FromStr::from_str(n).map_err(|_| ::error::ParseError::Header)?, + }; + + // Interpret the second piece as a language tag + let lang: Option = match parts.next() { + None => return Err(::error::ParseError::Header), + Some("") => None, + Some(s) => match s.parse() { + Ok(lt) => Some(lt), + Err(_) => return Err(::error::ParseError::Header), + } + }; + + // Interpret the third piece as a sequence of value characters + let value: Vec = match parts.next() { + None => return Err(::error::ParseError::Header), + Some(v) => percent_encoding::percent_decode(v.as_bytes()).collect(), + }; + + Ok(ExtendedValue { + charset: charset, + language_tag: lang, + value: value, + }) +} + + +impl fmt::Display for ExtendedValue { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let encoded_value = + percent_encoding::percent_encode(&self.value[..], self::percent_encoding_http::HTTP_VALUE); + if let Some(ref lang) = self.language_tag { + write!(f, "{}'{}'{}", self.charset, lang, encoded_value) + } else { + write!(f, "{}''{}", self.charset, encoded_value) + } + } +} + +/// Percent encode a sequence of bytes with a character set defined in +/// [https://tools.ietf.org/html/rfc5987#section-3.2][url] +/// +/// [url]: https://tools.ietf.org/html/rfc5987#section-3.2 +pub fn http_percent_encode(f: &mut fmt::Formatter, bytes: &[u8]) -> fmt::Result { + let encoded = percent_encoding::percent_encode(bytes, self::percent_encoding_http::HTTP_VALUE); + fmt::Display::fmt(&encoded, f) +} +mod percent_encoding_http { + use percent_encoding; + + // internal module because macro is hard-coded to make a public item + // but we don't want to public export this item + define_encode_set! { + // This encode set is used for HTTP header values and is defined at + // https://tools.ietf.org/html/rfc5987#section-3.2 + pub HTTP_VALUE = [percent_encoding::SIMPLE_ENCODE_SET] | { + ' ', '"', '%', '\'', '(', ')', '*', ',', '/', ':', ';', '<', '-', '>', '?', + '[', '\\', ']', '{', '}' + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 5d3767a29..25b4ef776 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -118,6 +118,7 @@ extern crate tokio_io; extern crate tokio_reactor; extern crate tokio_tcp; extern crate tokio_timer; +extern crate unicase; extern crate url; #[macro_use] extern crate serde; @@ -128,6 +129,7 @@ extern crate encoding; extern crate flate2; extern crate h2 as http2; extern crate num_cpus; +#[macro_use] extern crate percent_encoding; extern crate serde_json; extern crate serde_urlencoded; diff --git a/src/multipart.rs b/src/multipart.rs index f310327f4..632a40c24 100644 --- a/src/multipart.rs +++ b/src/multipart.rs @@ -7,7 +7,7 @@ use std::{cmp, fmt}; use bytes::Bytes; use futures::task::{current as current_task, Task}; use futures::{Async, Poll, Stream}; -use http::header::{self, HeaderMap, HeaderName, HeaderValue}; +use http::header::{self, ContentDisposition, HeaderMap, HeaderName, HeaderValue}; use http::HttpTryFrom; use httparse; use mime; @@ -362,7 +362,7 @@ where headers, mt, field, - ))))) + )?)))) } } } @@ -378,6 +378,7 @@ impl Drop for InnerMultipart { /// A single field in a multipart stream pub struct Field { ct: mime::Mime, + cd: ContentDisposition, headers: HeaderMap, inner: Rc>>, safety: Safety, @@ -390,13 +391,20 @@ where fn new( safety: Safety, headers: HeaderMap, ct: mime::Mime, inner: Rc>>, - ) -> Self { - Field { + ) -> Result { + // RFC 7578: 'Each part MUST contain a Content-Disposition header field + // where the disposition type is "form-data".' + let cd = ContentDisposition::from_raw( + headers.get(::http::header::CONTENT_DISPOSITION) + ).map_err(|_| MultipartError::ParseContentDisposition)?; + + Ok(Field { ct, + cd, headers, inner, safety, - } + }) } /// Get a map of headers @@ -408,6 +416,11 @@ where pub fn content_type(&self) -> &mime::Mime { &self.ct } + + /// Get the content disposition of the field + pub fn content_disposition(&self) -> &ContentDisposition { + &self.cd + } } impl Stream for Field