From e33618ed6d222ffbf7d095adaa53500e024204ea Mon Sep 17 00:00:00 2001 From: Rob Ede Date: Wed, 17 Nov 2021 17:43:24 +0000 Subject: [PATCH] ensure content disposition header in multipart (#2451) Co-authored-by: Craig Pastro --- actix-multipart/CHANGES.md | 8 + actix-multipart/src/error.rs | 23 ++- actix-multipart/src/server.rs | 228 ++++++++++++++++++------- src/http/header/content_disposition.rs | 24 ++- 4 files changed, 213 insertions(+), 70 deletions(-) diff --git a/actix-multipart/CHANGES.md b/actix-multipart/CHANGES.md index 09cc707be..97c011393 100644 --- a/actix-multipart/CHANGES.md +++ b/actix-multipart/CHANGES.md @@ -1,6 +1,14 @@ # Changes ## Unreleased - 2021-xx-xx +* Ensure a correct Content-Disposition header is included in every part of a multipart message. [#2451] +* Added `MultipartError::NoContentDisposition` variant. [#2451] +* Since Content-Disposition is now ensured, `Field::content_disposition` is now infallible. [#2451] +* Added `Field::name` method for getting the field name. [#2451] +* `MultipartError` now marks variants with inner errors as the source. [#2451] +* `MultipartError` is now marked as non-exhaustive. [#2451] + +[#2451]: https://github.com/actix/actix-web/pull/2451 ## 0.4.0-beta.7 - 2021-10-20 diff --git a/actix-multipart/src/error.rs b/actix-multipart/src/error.rs index 5f91c60df..de4594b8f 100644 --- a/actix-multipart/src/error.rs +++ b/actix-multipart/src/error.rs @@ -2,39 +2,52 @@ use actix_web::error::{ParseError, PayloadError}; use actix_web::http::StatusCode; use actix_web::ResponseError; -use derive_more::{Display, From}; +use derive_more::{Display, Error, From}; /// A set of errors that can occur during parsing multipart streams -#[derive(Debug, Display, From)] +#[non_exhaustive] +#[derive(Debug, Display, From, Error)] pub enum MultipartError { + /// Content-Disposition header is not found or is not equal to "form-data". + /// + /// According to [RFC 7578](https://tools.ietf.org/html/rfc7578#section-4.2) a + /// Content-Disposition header must always be present and equal to "form-data". + #[display(fmt = "No Content-Disposition `form-data` header")] + NoContentDisposition, + /// Content-Type header is not found - #[display(fmt = "No Content-type header found")] + #[display(fmt = "No Content-Type header found")] NoContentType, + /// Can not parse Content-Type header #[display(fmt = "Can not parse Content-Type header")] ParseContentType, + /// Multipart boundary is not found #[display(fmt = "Multipart boundary is not found")] Boundary, + /// Nested multipart is not supported #[display(fmt = "Nested multipart is not supported")] Nested, + /// Multipart stream is incomplete #[display(fmt = "Multipart stream is incomplete")] Incomplete, + /// Error during field parsing #[display(fmt = "{}", _0)] Parse(ParseError), + /// Payload error #[display(fmt = "{}", _0)] Payload(PayloadError), + /// Not consumed #[display(fmt = "Multipart stream is not consumed")] NotConsumed, } -impl std::error::Error for MultipartError {} - /// Return `BadRequest` for `MultipartError` impl ResponseError for MultipartError { fn status_code(&self) -> StatusCode { diff --git a/actix-multipart/src/server.rs b/actix-multipart/src/server.rs index b7d251537..43f9ccf5f 100644 --- a/actix-multipart/src/server.rs +++ b/actix-multipart/src/server.rs @@ -1,15 +1,20 @@ //! Multipart response payload support. -use std::cell::{Cell, RefCell, RefMut}; -use std::convert::TryFrom; -use std::marker::PhantomData; -use std::pin::Pin; -use std::rc::Rc; -use std::task::{Context, Poll}; -use std::{cmp, fmt}; +use std::{ + cell::{Cell, RefCell, RefMut}, + cmp, + convert::TryFrom, + fmt, + marker::PhantomData, + pin::Pin, + rc::Rc, + task::{Context, Poll}, +}; -use actix_web::error::{ParseError, PayloadError}; -use actix_web::http::header::{self, ContentDisposition, HeaderMap, HeaderName, HeaderValue}; +use actix_web::{ + error::{ParseError, PayloadError}, + http::header::{self, ContentDisposition, HeaderMap, HeaderName, HeaderValue}, +}; use bytes::{Bytes, BytesMut}; use futures_core::stream::{LocalBoxStream, Stream}; use futures_util::stream::StreamExt as _; @@ -40,10 +45,13 @@ enum InnerMultipartItem { enum InnerState { /// Stream eof Eof, + /// Skip data until first boundary FirstBoundary, + /// Reading boundary Boundary, + /// Reading Headers, Headers, } @@ -332,31 +340,55 @@ impl InnerMultipart { return Poll::Pending; }; - // content type - let mut mt = mime::APPLICATION_OCTET_STREAM; - if let Some(content_type) = headers.get(&header::CONTENT_TYPE) { - if let Ok(content_type) = content_type.to_str() { - if let Ok(ct) = content_type.parse::() { - mt = ct; - } - } - } + // According to [RFC 7578](https://tools.ietf.org/html/rfc7578#section-4.2) a + // Content-Disposition header must always be present and set to "form-data". + + let content_disposition = headers + .get(&header::CONTENT_DISPOSITION) + .and_then(|cd| ContentDisposition::from_raw(cd).ok()) + .filter(|content_disposition| { + let is_form_data = + content_disposition.disposition == header::DispositionType::FormData; + + let has_field_name = content_disposition + .parameters + .iter() + .any(|param| matches!(param, header::DispositionParam::Name(_))); + + is_form_data && has_field_name + }); + + let cd = if let Some(content_disposition) = content_disposition { + content_disposition + } else { + return Poll::Ready(Some(Err(MultipartError::NoContentDisposition))); + }; + + let ct: mime::Mime = headers + .get(&header::CONTENT_TYPE) + .and_then(|ct| ct.to_str().ok()) + .and_then(|ct| ct.parse().ok()) + .unwrap_or(mime::APPLICATION_OCTET_STREAM); self.state = InnerState::Boundary; - // nested multipart stream - if mt.type_() == mime::MULTIPART { - Poll::Ready(Some(Err(MultipartError::Nested))) - } else { - let field = Rc::new(RefCell::new(InnerField::new( - self.payload.clone(), - self.boundary.clone(), - &headers, - )?)); - self.item = InnerMultipartItem::Field(Rc::clone(&field)); - - Poll::Ready(Some(Ok(Field::new(safety.clone(cx), headers, mt, field)))) + // nested multipart stream is not supported + if ct.type_() == mime::MULTIPART { + return Poll::Ready(Some(Err(MultipartError::Nested))); } + + let field = + InnerField::new_in_rc(self.payload.clone(), self.boundary.clone(), &headers)?; + + self.item = InnerMultipartItem::Field(Rc::clone(&field)); + + Poll::Ready(Some(Ok(Field::new( + safety.clone(cx), + headers, + ct, + cd, + field, + )))) } } } @@ -371,6 +403,7 @@ impl Drop for InnerMultipart { /// A single field in a multipart stream pub struct Field { ct: mime::Mime, + cd: ContentDisposition, headers: HeaderMap, inner: Rc>, safety: Safety, @@ -381,35 +414,51 @@ impl Field { safety: Safety, headers: HeaderMap, ct: mime::Mime, + cd: ContentDisposition, inner: Rc>, ) -> Self { Field { ct, + cd, headers, inner, safety, } } - /// Get a map of headers + /// Returns a reference to the field's header map. pub fn headers(&self) -> &HeaderMap { &self.headers } - /// Get the content type of the field + /// Returns a reference to the field's content (mime) type. pub fn content_type(&self) -> &mime::Mime { &self.ct } - /// Get the content disposition of the field, if it exists - pub fn content_disposition(&self) -> Option { - // RFC 7578: 'Each part MUST contain a Content-Disposition header field - // where the disposition type is "form-data".' - if let Some(content_disposition) = self.headers.get(&header::CONTENT_DISPOSITION) { - ContentDisposition::from_raw(content_disposition).ok() - } else { - None - } + /// Returns the field's Content-Disposition. + /// + /// Per [RFC 7578 §4.2]: 'Each part MUST contain a Content-Disposition header field where the + /// disposition type is "form-data". The Content-Disposition header field MUST also contain an + /// additional parameter of "name"; the value of the "name" parameter is the original field name + /// from the form.' + /// + /// This crate validates that it exists before returning a `Field`. As such, it is safe to + /// unwrap `.content_disposition().get_name()`. The [name](Self::name) method is provided as + /// a convenience. + /// + /// [RFC 7578 §4.2]: https://datatracker.ietf.org/doc/html/rfc7578#section-4.2 + pub fn content_disposition(&self) -> &ContentDisposition { + &self.cd + } + + /// Returns the field's name. + /// + /// See [content_disposition] regarding guarantees about + pub fn name(&self) -> &str { + self.content_disposition() + .get_name() + .expect("field name should be guaranteed to exist in multipart form-data") } } @@ -451,20 +500,23 @@ struct InnerField { } impl InnerField { + fn new_in_rc( + payload: PayloadRef, + boundary: String, + headers: &HeaderMap, + ) -> Result>, PayloadError> { + Self::new(payload, boundary, headers).map(|this| Rc::new(RefCell::new(this))) + } + fn new( payload: PayloadRef, boundary: String, headers: &HeaderMap, ) -> Result { let len = if let Some(len) = headers.get(&header::CONTENT_LENGTH) { - if let Ok(s) = len.to_str() { - if let Ok(len) = s.parse::() { - Some(len) - } else { - return Err(PayloadError::Incomplete(None)); - } - } else { - return Err(PayloadError::Incomplete(None)); + match len.to_str().ok().and_then(|len| len.parse::().ok()) { + Some(len) => Some(len), + None => return Err(PayloadError::Incomplete(None)), } } else { None @@ -658,9 +710,8 @@ impl Clone for PayloadRef { } } -/// Counter. It tracks of number of clones of payloads and give access to -/// payload only to top most task panics if Safety get destroyed and it not top -/// most task. +/// Counter. It tracks of number of clones of payloads and give access to payload only to top most +/// task panics if Safety get destroyed and it not top most task. #[derive(Debug)] struct Safety { task: LocalWaker, @@ -707,11 +758,12 @@ impl Drop for Safety { if Rc::strong_count(&self.payload) != self.level { self.clean.set(true); } + self.task.wake(); } } -/// Payload buffer +/// Payload buffer. struct PayloadBuffer { eof: bool, buf: BytesMut, @@ -719,7 +771,7 @@ struct PayloadBuffer { } impl PayloadBuffer { - /// Create new `PayloadBuffer` instance + /// Constructs new `PayloadBuffer` instance. fn new(stream: S) -> Self where S: Stream> + 'static, @@ -767,7 +819,7 @@ impl PayloadBuffer { } /// Read until specified ending - pub fn read_until(&mut self, line: &[u8]) -> Result, MultipartError> { + fn read_until(&mut self, line: &[u8]) -> Result, MultipartError> { let res = twoway::find_bytes(&self.buf, line) .map(|idx| self.buf.split_to(idx + line.len()).freeze()); @@ -779,12 +831,12 @@ impl PayloadBuffer { } /// Read bytes until new line delimiter - pub fn readline(&mut self) -> Result, MultipartError> { + fn readline(&mut self) -> Result, MultipartError> { self.read_until(b"\n") } /// Read bytes until new line delimiter or eof - pub fn readline_or_eof(&mut self) -> Result, MultipartError> { + fn readline_or_eof(&mut self) -> Result, MultipartError> { match self.readline() { Err(MultipartError::Incomplete) if self.eof => Ok(Some(self.buf.split().freeze())), line => line, @@ -792,7 +844,7 @@ impl PayloadBuffer { } /// Put unprocessed data back to the buffer - pub fn unprocessed(&mut self, data: Bytes) { + fn unprocessed(&mut self, data: Bytes) { let buf = BytesMut::from(data.as_ref()); let buf = std::mem::replace(&mut self.buf, buf); self.buf.extend_from_slice(&buf); @@ -914,6 +966,7 @@ mod tests { Content-Type: text/plain; charset=utf-8\r\nContent-Length: 4\r\n\r\n\ test\r\n\ --abbc761f78ff4d7cb7573b5a23f96ef0\r\n\ + Content-Disposition: form-data; name=\"file\"; filename=\"fn.txt\"\r\n\ Content-Type: text/plain; charset=utf-8\r\nContent-Length: 4\r\n\r\n\ data\r\n\ --abbc761f78ff4d7cb7573b5a23f96ef0--\r\n", @@ -965,7 +1018,7 @@ mod tests { let mut multipart = Multipart::new(&headers, payload); match multipart.next().await { Some(Ok(mut field)) => { - let cd = field.content_disposition().unwrap(); + let cd = field.content_disposition(); assert_eq!(cd.disposition, DispositionType::FormData); assert_eq!(cd.parameters[0], DispositionParam::Name("file".into())); @@ -1027,7 +1080,7 @@ mod tests { let mut multipart = Multipart::new(&headers, payload); match multipart.next().await.unwrap() { Ok(mut field) => { - let cd = field.content_disposition().unwrap(); + let cd = field.content_disposition(); assert_eq!(cd.disposition, DispositionType::FormData); assert_eq!(cd.parameters[0], DispositionParam::Name("file".into())); @@ -1182,4 +1235,59 @@ mod tests { _ => unreachable!(), } } + + #[actix_rt::test] + async fn no_content_disposition() { + let bytes = Bytes::from( + "testasdadsad\r\n\ + --abbc761f78ff4d7cb7573b5a23f96ef0\r\n\ + Content-Type: text/plain; charset=utf-8\r\nContent-Length: 4\r\n\r\n\ + test\r\n\ + --abbc761f78ff4d7cb7573b5a23f96ef0\r\n", + ); + let mut headers = HeaderMap::new(); + headers.insert( + header::CONTENT_TYPE, + header::HeaderValue::from_static( + "multipart/mixed; boundary=\"abbc761f78ff4d7cb7573b5a23f96ef0\"", + ), + ); + let payload = SlowStream::new(bytes); + + let mut multipart = Multipart::new(&headers, payload); + let res = multipart.next().await.unwrap(); + assert!(res.is_err()); + assert!(matches!( + res.unwrap_err(), + MultipartError::NoContentDisposition, + )); + } + + #[actix_rt::test] + async fn no_name_in_content_disposition() { + let bytes = Bytes::from( + "testasdadsad\r\n\ + --abbc761f78ff4d7cb7573b5a23f96ef0\r\n\ + Content-Disposition: form-data; filename=\"fn.txt\"\r\n\ + Content-Type: text/plain; charset=utf-8\r\nContent-Length: 4\r\n\r\n\ + test\r\n\ + --abbc761f78ff4d7cb7573b5a23f96ef0\r\n", + ); + let mut headers = HeaderMap::new(); + headers.insert( + header::CONTENT_TYPE, + header::HeaderValue::from_static( + "multipart/mixed; boundary=\"abbc761f78ff4d7cb7573b5a23f96ef0\"", + ), + ); + let payload = SlowStream::new(bytes); + + let mut multipart = Multipart::new(&headers, payload); + let res = multipart.next().await.unwrap(); + assert!(res.is_err()); + assert!(matches!( + res.unwrap_err(), + MultipartError::NoContentDisposition, + )); + } } diff --git a/src/http/header/content_disposition.rs b/src/http/header/content_disposition.rs index fdd8a7dac..6d07a41bd 100644 --- a/src/http/header/content_disposition.rs +++ b/src/http/header/content_disposition.rs @@ -34,15 +34,18 @@ fn split_once_and_trim(haystack: &str, needle: char) -> (&str, &str) { /// The implied disposition of the content of the HTTP body. #[derive(Clone, Debug, PartialEq)] pub enum DispositionType { - /// Inline implies default processing + /// Inline implies default processing. Inline, + /// Attachment implies that the recipient should prompt the user to save the response locally, /// rather than process it normally (as per its media type). Attachment, - /// Used in *multipart/form-data* as defined in - /// [RFC7578](https://tools.ietf.org/html/rfc7578) to carry the field name and the file name. + + /// Used in *multipart/form-data* as defined in [RFC7578](https://tools.ietf.org/html/rfc7578) + /// to carry the field name and optional filename. FormData, - /// Extension type. Should be handled by recipients the same way as Attachment + + /// Extension type. Should be handled by recipients the same way as Attachment. Ext(String), } @@ -76,6 +79,7 @@ pub enum DispositionParam { /// For [`DispositionType::FormData`] (i.e. *multipart/form-data*), the name of an field from /// the form. Name(String), + /// A plain file name. /// /// It is [not supposed](https://tools.ietf.org/html/rfc6266#appendix-D) to contain any @@ -83,14 +87,17 @@ pub enum DispositionParam { /// [`FilenameExt`](DispositionParam::FilenameExt) with charset UTF-8 may be used instead /// in case there are Unicode characters in file names. Filename(String), + /// An extended file name. It must not exist for `ContentType::Formdata` according to /// [RFC7578 Section 4.2](https://tools.ietf.org/html/rfc7578#section-4.2). FilenameExt(ExtendedValue), + /// An unrecognized regular parameter as defined in /// [RFC5987](https://tools.ietf.org/html/rfc5987) as *reg-parameter*, in /// [RFC6266](https://tools.ietf.org/html/rfc6266) as *token "=" value*. Recipients should /// ignore unrecognizable parameters. Unknown(String, String), + /// An unrecognized extended parameter as defined in /// [RFC5987](https://tools.ietf.org/html/rfc5987) as *ext-parameter*, in /// [RFC6266](https://tools.ietf.org/html/rfc6266) as *ext-token "=" ext-value*. The single @@ -205,7 +212,6 @@ impl DispositionParam { /// itself, *Content-Disposition* has no effect. /// /// # ABNF - /// ```text /// content-disposition = "Content-Disposition" ":" /// disposition-type *( ";" disposition-parm ) @@ -289,10 +295,12 @@ impl DispositionParam { /// If "filename" parameter is supplied, do not use the file name blindly, check and possibly /// change to match local file system conventions if applicable, and do not use directory path /// information that may be present. See [RFC2183](https://tools.ietf.org/html/rfc2183#section-2.3). +// TODO: private fields and use smallvec #[derive(Clone, Debug, PartialEq)] pub struct ContentDisposition { /// The disposition type pub disposition: DispositionType, + /// Disposition parameters pub parameters: Vec, } @@ -509,22 +517,28 @@ impl fmt::Display for DispositionParam { // // // See also comments in test_from_raw_unnecessary_percent_decode. + static RE: Lazy = Lazy::new(|| Regex::new("[\x00-\x08\x10-\x1F\x7F\"\\\\]").unwrap()); + match self { DispositionParam::Name(ref value) => write!(f, "name={}", value), + DispositionParam::Filename(ref value) => { write!(f, "filename=\"{}\"", RE.replace_all(value, "\\$0").as_ref()) } + DispositionParam::Unknown(ref name, ref value) => write!( f, "{}=\"{}\"", name, &RE.replace_all(value, "\\$0").as_ref() ), + DispositionParam::FilenameExt(ref ext_value) => { write!(f, "filename*={}", ext_value) } + DispositionParam::UnknownExt(ref name, ref ext_value) => { write!(f, "{}*={}", name, ext_value) }