From edcde6707657d3e8bdd1df21533fe80aa235f0dd Mon Sep 17 00:00:00 2001 From: Hung-I Wang Date: Wed, 6 Nov 2019 22:08:37 +0800 Subject: [PATCH] Fix escaping/encoding problems in Content-Disposition header (#1151) * Fix filename encoding in Content-Disposition of acitx_files::NamedFile * Add more comments on how to use Content-Disposition header properly & Fix some trivial problems * Improve Content-Disposition filename(*) parameters of actix_files::NamedFile * Tweak Content-Disposition parse to accept empty param value in quoted-string * Fix typos in comments in .../content_disposition.rs (pointed out by @JohnTitor) * Update CHANGES.md * Update CHANGES.md again --- CHANGES.md | 1 + actix-files/src/lib.rs | 25 ++++++ actix-files/src/named.rs | 13 ++- .../src/header/common/content_disposition.rs | 90 +++++++++++++++++-- 4 files changed, 119 insertions(+), 10 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 689ab13d..dcb57630 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,6 +5,7 @@ ### Added * Add `Payload::into_inner` method and make stored `def::Payload` public. (#1110) +* Add an additional `filename*` param in the `Content-Disposition` header of `actix_files::NamedFile` to be more compatible. (#1151) ### Changed diff --git a/actix-files/src/lib.rs b/actix-files/src/lib.rs index 61674ca3..16f40a20 100644 --- a/actix-files/src/lib.rs +++ b/actix-files/src/lib.rs @@ -733,6 +733,31 @@ mod tests { assert!(resp.headers().get(header::CONTENT_DISPOSITION).is_none()); } + #[test] + fn test_named_file_non_ascii_file_name() { + let mut file = + NamedFile::from_file(File::open("Cargo.toml").unwrap(), "貨物.toml") + .unwrap(); + { + file.file(); + let _f: &File = &file; + } + { + let _f: &mut File = &mut file; + } + + let req = TestRequest::default().to_http_request(); + let resp = file.respond_to(&req).unwrap(); + assert_eq!( + resp.headers().get(header::CONTENT_TYPE).unwrap(), + "text/x-toml" + ); + assert_eq!( + resp.headers().get(header::CONTENT_DISPOSITION).unwrap(), + "inline; filename=\"貨物.toml\"; filename*=UTF-8''%E8%B2%A8%E7%89%A9.toml" + ); + } + #[test] fn test_named_file_set_content_type() { let mut file = NamedFile::open("Cargo.toml") diff --git a/actix-files/src/named.rs b/actix-files/src/named.rs index ca1a909a..955982ca 100644 --- a/actix-files/src/named.rs +++ b/actix-files/src/named.rs @@ -13,7 +13,7 @@ use mime_guess::from_path; use actix_http::body::SizedStream; use actix_web::http::header::{ - self, ContentDisposition, DispositionParam, DispositionType, + self, Charset, ContentDisposition, DispositionParam, DispositionType, ExtendedValue, }; use actix_web::http::{ContentEncoding, StatusCode}; use actix_web::middleware::BodyEncoding; @@ -93,9 +93,18 @@ impl NamedFile { mime::IMAGE | mime::TEXT | mime::VIDEO => DispositionType::Inline, _ => DispositionType::Attachment, }; + let mut parameters = + vec![DispositionParam::Filename(String::from(filename.as_ref()))]; + if !filename.is_ascii() { + parameters.push(DispositionParam::FilenameExt(ExtendedValue { + charset: Charset::Ext(String::from("UTF-8")), + language_tag: None, + value: filename.into_owned().into_bytes(), + })) + } let cd = ContentDisposition { disposition: disposition_type, - parameters: vec![DispositionParam::Filename(filename.into_owned())], + parameters: parameters, }; (ct, cd) }; diff --git a/actix-http/src/header/common/content_disposition.rs b/actix-http/src/header/common/content_disposition.rs index 14fcc351..b2b6f34d 100644 --- a/actix-http/src/header/common/content_disposition.rs +++ b/actix-http/src/header/common/content_disposition.rs @@ -76,6 +76,11 @@ pub enum DispositionParam { /// the form. Name(String), /// A plain file name. + /// + /// It is [not supposed](https://tools.ietf.org/html/rfc6266#appendix-D) to contain any + /// non-ASCII characters when used in a *Content-Disposition* HTTP response header, where + /// [`FilenameExt`](DispositionParam::FilenameExt) with charset UTF-8 may be used instead + /// in case there are Unicode characters in file names. Filename(String), /// An extended file name. It must not exist for `ContentType::Formdata` according to /// [RFC7578 Section 4.2](https://tools.ietf.org/html/rfc7578#section-4.2). @@ -220,7 +225,16 @@ impl DispositionParam { /// ext-token = /// ``` /// -/// **Note**: filename* [must not](https://tools.ietf.org/html/rfc7578#section-4.2) be used within +/// # Note +/// +/// filename is [not supposed](https://tools.ietf.org/html/rfc6266#appendix-D) to contain any +/// non-ASCII characters when used in a *Content-Disposition* HTTP response header, where +/// filename* with charset UTF-8 may be used instead in case there are Unicode characters in file +/// names. +/// filename is [acceptable](https://tools.ietf.org/html/rfc7578#section-4.2) to be UTF-8 encoded +/// directly in a *Content-Disposition* header for *multipart/form-data*, though. +/// +/// filename* [must not](https://tools.ietf.org/html/rfc7578#section-4.2) be used within /// *multipart/form-data*. /// /// # Example @@ -251,6 +265,22 @@ impl DispositionParam { /// }; /// assert_eq!(cd2.get_name(), Some("file")); // field name /// assert_eq!(cd2.get_filename(), Some("bill.odt")); +/// +/// // HTTP response header with Unicode characters in file names +/// let cd3 = ContentDisposition { +/// disposition: DispositionType::Attachment, +/// parameters: vec![ +/// DispositionParam::FilenameExt(ExtendedValue { +/// charset: Charset::Ext(String::from("UTF-8")), +/// language_tag: None, +/// value: String::from("\u{1f600}.svg").into_bytes(), +/// }), +/// // fallback for better compatibility +/// DispositionParam::Filename(String::from("Grinning-Face-Emoji.svg")) +/// ], +/// }; +/// assert_eq!(cd3.get_filename_ext().map(|ev| ev.value.as_ref()), +/// Some("\u{1f600}.svg".as_bytes())); /// ``` /// /// # WARN @@ -333,15 +363,17 @@ impl ContentDisposition { // token: won't contains semicolon according to RFC 2616 Section 2.2 let (token, new_left) = split_once_and_trim(left, ';'); left = new_left; + if token.is_empty() { + // quoted-string can be empty, but token cannot be empty + return Err(crate::error::ParseError::Header); + } token.to_owned() }; - if value.is_empty() { - return Err(crate::error::ParseError::Header); - } let param = if param_name.eq_ignore_ascii_case("name") { DispositionParam::Name(value) } else if param_name.eq_ignore_ascii_case("filename") { + // See also comments in test_from_raw_uncessary_percent_decode. DispositionParam::Filename(value) } else { DispositionParam::Unknown(param_name.to_owned(), value) @@ -466,11 +498,40 @@ impl fmt::Display for DispositionType { impl fmt::Display for DispositionParam { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - // All ASCII control charaters (0-30, 127) excepting horizontal tab, double quote, and + // All ASCII control characters (0-30, 127) including horizontal tab, double quote, and // backslash should be escaped in quoted-string (i.e. "foobar"). - // Ref: RFC6266 S4.1 -> RFC2616 S2.2; RFC 7578 S4.2 -> RFC2183 S2 -> ... . + // Ref: RFC6266 S4.1 -> RFC2616 S3.6 + // filename-parm = "filename" "=" value + // value = token | quoted-string + // quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) + // qdtext = > + // quoted-pair = "\" CHAR + // TEXT = + // LWS = [CRLF] 1*( SP | HT ) + // OCTET = + // CHAR = + // CTL = + // + // Ref: RFC7578 S4.2 -> RFC2183 S2 -> RFC2045 S5.1 + // parameter := attribute "=" value + // attribute := token + // ; Matching of attributes + // ; is ALWAYS case-insensitive. + // value := token / quoted-string + // token := 1* + // tspecials := "(" / ")" / "<" / ">" / "@" / + // "," / ";" / ":" / "\" / <"> + // "/" / "[" / "]" / "?" / "=" + // ; Must be in quoted-string, + // ; to use within parameter values + // + // + // See also comments in test_from_raw_uncessary_percent_decode. lazy_static! { - static ref RE: Regex = Regex::new("[\x01-\x08\x10\x1F\x7F\"\\\\]").unwrap(); + static ref RE: Regex = Regex::new("[\x00-\x08\x10-\x1F\x7F\"\\\\]").unwrap(); } match self { DispositionParam::Name(ref value) => write!(f, "name={}", value), @@ -774,8 +835,18 @@ mod tests { #[test] fn test_from_raw_uncessary_percent_decode() { + // In fact, RFC7578 (multipart/form-data) Section 2 and 4.2 suggests that filename with + // non-ASCII characters MAY be percent-encoded. + // On the contrary, RFC6266 or other RFCs related to Content-Disposition response header + // do not mention such percent-encoding. + // So, it appears to be undecidable whether to percent-decode or not without + // knowing the usage scenario (multipart/form-data v.s. HTTP response header) and + // inevitable to unnecessarily percent-decode filename with %XX in the former scenario. + // Fortunately, it seems that almost all mainstream browsers just send UTF-8 encoded file + // names in quoted-string format (tested on Edge, IE11, Chrome and Firefox) without + // percent-encoding. So we do not bother to attempt to percent-decode. let a = HeaderValue::from_static( - "form-data; name=photo; filename=\"%74%65%73%74%2e%70%6e%67\"", // Should not be decoded! + "form-data; name=photo; filename=\"%74%65%73%74%2e%70%6e%67\"", ); let a: ContentDisposition = ContentDisposition::from_raw(&a).unwrap(); let b = ContentDisposition { @@ -811,6 +882,9 @@ mod tests { let a = HeaderValue::from_static("inline; filename= "); assert!(ContentDisposition::from_raw(&a).is_err()); + + let a = HeaderValue::from_static("inline; filename=\"\""); + assert!(ContentDisposition::from_raw(&a).expect("parse cd").get_filename().expect("filename").is_empty()); } #[test]