1
0
mirror of https://github.com/actix/actix-extras.git synced 2024-11-28 09:42:40 +01:00
actix-extras/src/recognizer.rs

494 lines
15 KiB
Rust
Raw Normal View History

use std;
2017-10-17 04:21:24 +02:00
use std::rc::Rc;
use std::path::PathBuf;
use std::ops::Index;
use std::str::FromStr;
2017-10-17 04:21:24 +02:00
use std::collections::HashMap;
use failure::Fail;
use http::{StatusCode};
2017-10-17 04:21:24 +02:00
use regex::{Regex, RegexSet, Captures};
use body::Body;
use httpresponse::HttpResponse;
use error::{ResponseError, UriSegmentError};
/// A trait to abstract the idea of creating a new instance of a type from a path parameter.
pub trait FromParam: Sized {
/// The associated error which can be returned from parsing.
type Err: ResponseError;
/// Parses a string `s` to return a value of this type.
fn from_param(s: &str) -> Result<Self, Self::Err>;
}
/// Route match information
///
/// If resource path contains variable patterns, `Params` stores this variables.
#[derive(Debug)]
pub struct Params {
text: String,
matches: Vec<Option<(usize, usize)>>,
names: Rc<HashMap<String, usize>>,
}
impl Params {
pub(crate) fn new(names: Rc<HashMap<String, usize>>,
text: &str,
captures: &Captures) -> Self
{
Params {
names,
text: text.into(),
matches: captures
.iter()
.map(|capture| capture.map(|m| (m.start(), m.end())))
.collect(),
}
}
pub(crate) fn empty() -> Self
{
Params {
text: String::new(),
names: Rc::new(HashMap::new()),
matches: Vec::new(),
}
}
/// Check if there are any matched patterns
pub fn is_empty(&self) -> bool {
self.names.is_empty()
}
fn by_idx(&self, index: usize) -> Option<&str> {
self.matches
.get(index + 1)
.and_then(|m| m.map(|(start, end)| &self.text[start..end]))
}
/// Get matched parameter by name without type conversion
pub fn get(&self, key: &str) -> Option<&str> {
self.names.get(key).and_then(|&i| self.by_idx(i - 1))
}
/// Get matched `FromParam` compatible parameter by name.
///
/// If keyed parameter is not available empty string is used as default value.
///
2017-12-04 22:32:05 +01:00
/// ```rust
/// # extern crate actix_web;
/// # use actix_web::*;
/// fn index(req: HttpRequest) -> Result<String> {
/// let ivalue: isize = req.match_info().query("val")?;
2017-12-04 22:32:05 +01:00
/// Ok(format!("isuze value: {:?}", ivalue))
/// }
2017-12-04 22:32:05 +01:00
/// # fn main() {}
/// ```
pub fn query<T: FromParam>(&self, key: &str) -> Result<T, <T as FromParam>::Err>
{
if let Some(s) = self.get(key) {
T::from_param(s)
} else {
T::from_param("")
}
}
}
impl<'a> Index<&'a str> for Params {
type Output = str;
fn index(&self, name: &'a str) -> &str {
self.get(name).expect("Value for parameter is not available")
}
}
/// Creates a `PathBuf` from a path parameter. The returned `PathBuf` is
/// percent-decoded. If a segment is equal to "..", the previous segment (if
/// any) is skipped.
///
/// For security purposes, if a segment meets any of the following conditions,
/// an `Err` is returned indicating the condition met:
///
/// * Decoded segment starts with any of: `.` (except `..`), `*`
/// * Decoded segment ends with any of: `:`, `>`, `<`
/// * Decoded segment contains any of: `/`
/// * On Windows, decoded segment contains any of: '\'
/// * Percent-encoding results in invalid UTF8.
///
/// As a result of these conditions, a `PathBuf` parsed from request path parameter is
/// safe to interpolate within, or use as a suffix of, a path without additional
/// checks.
impl FromParam for PathBuf {
type Err = UriSegmentError;
fn from_param(val: &str) -> Result<PathBuf, UriSegmentError> {
let mut buf = PathBuf::new();
for segment in val.split('/') {
if segment == ".." {
buf.pop();
} else if segment.starts_with('.') {
return Err(UriSegmentError::BadStart('.'))
} else if segment.starts_with('*') {
return Err(UriSegmentError::BadStart('*'))
} else if segment.ends_with(':') {
return Err(UriSegmentError::BadEnd(':'))
} else if segment.ends_with('>') {
return Err(UriSegmentError::BadEnd('>'))
} else if segment.ends_with('<') {
return Err(UriSegmentError::BadEnd('<'))
2017-12-02 19:43:14 +01:00
} else if segment.is_empty() {
continue
} else if cfg!(windows) && segment.contains('\\') {
return Err(UriSegmentError::BadChar('\\'))
} else {
buf.push(segment)
}
}
Ok(buf)
}
}
#[derive(Fail, Debug)]
#[fail(display="Error")]
pub struct BadRequest<T>(T);
impl<T> BadRequest<T> {
pub fn cause(&self) -> &T {
&self.0
}
}
impl<T> ResponseError for BadRequest<T>
where T: Send + Sync + std::fmt::Debug +std::fmt::Display + 'static,
BadRequest<T>: Fail
{
fn error_response(&self) -> HttpResponse {
HttpResponse::new(StatusCode::BAD_REQUEST, Body::Empty)
}
}
macro_rules! FROM_STR {
($type:ty) => {
impl FromParam for $type {
type Err = BadRequest<<$type as FromStr>::Err>;
fn from_param(val: &str) -> Result<Self, Self::Err> {
2017-12-02 23:58:22 +01:00
<$type as FromStr>::from_str(val).map_err(BadRequest)
}
}
}
}
FROM_STR!(u8);
FROM_STR!(u16);
FROM_STR!(u32);
FROM_STR!(u64);
FROM_STR!(usize);
FROM_STR!(i8);
FROM_STR!(i16);
FROM_STR!(i32);
FROM_STR!(i64);
FROM_STR!(isize);
FROM_STR!(f32);
FROM_STR!(f64);
FROM_STR!(String);
FROM_STR!(std::net::IpAddr);
FROM_STR!(std::net::Ipv4Addr);
FROM_STR!(std::net::Ipv6Addr);
FROM_STR!(std::net::SocketAddr);
FROM_STR!(std::net::SocketAddrV4);
FROM_STR!(std::net::SocketAddrV6);
2017-10-17 04:21:24 +02:00
pub struct RouteRecognizer<T> {
prefix: usize,
patterns: RegexSet,
routes: Vec<(Pattern, T)>,
}
2017-10-24 04:28:23 +02:00
impl<T> Default for RouteRecognizer<T> {
fn default() -> Self {
RouteRecognizer {
prefix: 0,
patterns: RegexSet::new([""].iter()).unwrap(),
routes: Vec::new(),
}
}
}
2017-10-17 04:21:24 +02:00
impl<T> RouteRecognizer<T> {
2017-10-24 04:28:23 +02:00
2017-11-27 02:30:35 +01:00
pub fn new<P: Into<String>, U>(prefix: P, routes: U) -> Self
2017-12-05 20:31:35 +01:00
where U: IntoIterator<Item=(String, Option<String>, T)>
{
2017-10-17 04:21:24 +02:00
let mut paths = Vec::new();
let mut handlers = Vec::new();
for item in routes {
2017-12-05 20:31:35 +01:00
let (pat, elements) = parse(&item.0);
handlers.push((Pattern::new(&pat, elements), item.2));
2017-10-17 04:21:24 +02:00
paths.push(pat);
};
let regset = RegexSet::new(&paths);
RouteRecognizer {
2017-11-27 02:30:35 +01:00
prefix: prefix.into().len() - 1,
2017-10-17 04:21:24 +02:00
patterns: regset.unwrap(),
routes: handlers,
}
}
2017-12-05 20:31:35 +01:00
pub fn set_routes(&mut self, routes: Vec<(&str, Option<&str>, T)>) {
2017-10-24 04:28:23 +02:00
let mut paths = Vec::new();
let mut handlers = Vec::new();
for item in routes {
2017-12-05 20:31:35 +01:00
let (pat, elements) = parse(item.0);
handlers.push((Pattern::new(&pat, elements), item.2));
2017-10-24 04:28:23 +02:00
paths.push(pat);
};
self.patterns = RegexSet::new(&paths).unwrap();
self.routes = handlers;
}
2017-11-27 02:30:35 +01:00
pub fn set_prefix<P: Into<String>>(&mut self, prefix: P) {
let p = prefix.into();
2017-10-28 04:26:53 +02:00
if p.ends_with('/') {
self.prefix = p.len() - 1;
} else {
self.prefix = p.len();
}
2017-10-24 04:28:23 +02:00
}
2017-10-17 04:21:24 +02:00
pub fn recognize(&self, path: &str) -> Option<(Option<Params>, &T)> {
2017-10-28 04:26:53 +02:00
let p = &path[self.prefix..];
if p.is_empty() {
if let Some(idx) = self.patterns.matches("/").into_iter().next() {
let (ref pattern, ref route) = self.routes[idx];
return Some((pattern.match_info(&path[self.prefix..]), route))
}
} else if let Some(idx) = self.patterns.matches(p).into_iter().next() {
2017-10-17 04:21:24 +02:00
let (ref pattern, ref route) = self.routes[idx];
2017-10-28 04:26:53 +02:00
return Some((pattern.match_info(&path[self.prefix..]), route))
2017-10-17 04:21:24 +02:00
}
2017-10-28 04:26:53 +02:00
None
2017-10-17 04:21:24 +02:00
}
}
2017-12-05 20:31:35 +01:00
enum PatternElement {
Str(String),
Var(String),
}
2017-10-17 04:21:24 +02:00
struct Pattern {
re: Regex,
names: Rc<HashMap<String, usize>>,
2017-12-05 20:31:35 +01:00
elements: Vec<PatternElement>,
2017-10-17 04:21:24 +02:00
}
impl Pattern {
2017-12-05 20:31:35 +01:00
fn new(pattern: &str, elements: Vec<PatternElement>) -> Self {
2017-10-17 04:21:24 +02:00
let re = Regex::new(pattern).unwrap();
let names = re.capture_names()
.enumerate()
.filter_map(|(i, name)| name.map(|name| (name.to_owned(), i)))
.collect();
Pattern {
re,
names: Rc::new(names),
2017-12-05 20:31:35 +01:00
elements: elements,
2017-10-17 04:21:24 +02:00
}
}
fn match_info(&self, text: &str) -> Option<Params> {
let captures = match self.re.captures(text) {
Some(captures) => captures,
None => return None,
};
2017-11-27 02:30:35 +01:00
Some(Params::new(Rc::clone(&self.names), text, &captures))
2017-10-17 04:21:24 +02:00
}
}
pub(crate) fn check_pattern(path: &str) {
2017-12-05 20:31:35 +01:00
if let Err(err) = Regex::new(&parse(path).0) {
2017-10-17 04:21:24 +02:00
panic!("Wrong path pattern: \"{}\" {}", path, err);
}
}
2017-12-05 20:31:35 +01:00
fn parse(pattern: &str) -> (String, Vec<PatternElement>) {
const DEFAULT_PATTERN: &str = "[^/]+";
2017-10-17 04:21:24 +02:00
let mut re = String::from("^/");
2017-12-05 20:31:35 +01:00
let mut el = String::new();
2017-10-17 04:21:24 +02:00
let mut in_param = false;
let mut in_param_pattern = false;
let mut param_name = String::new();
let mut param_pattern = String::from(DEFAULT_PATTERN);
2017-12-05 20:31:35 +01:00
let mut elems = Vec::new();
2017-10-17 04:21:24 +02:00
for (index, ch) in pattern.chars().enumerate() {
// All routes must have a leading slash so its optional to have one
if index == 0 && ch == '/' {
continue;
}
if in_param {
// In parameter segment: `{....}`
if ch == '}' {
2017-12-05 20:31:35 +01:00
elems.push(PatternElement::Var(String::from(String::from(param_name.as_str()))));
2017-12-04 22:34:55 +01:00
re.push_str(&format!(r"(?P<{}>{})", &param_name, &param_pattern));
2017-10-17 04:21:24 +02:00
param_name.clear();
param_pattern = String::from(DEFAULT_PATTERN);
in_param_pattern = false;
in_param = false;
} else if ch == ':' {
// The parameter name has been determined; custom pattern land
in_param_pattern = true;
param_pattern.clear();
} else if in_param_pattern {
// Ignore leading whitespace for pattern
if !(ch == ' ' && param_pattern.is_empty()) {
param_pattern.push(ch);
}
} else {
param_name.push(ch);
}
} else if ch == '{' {
in_param = true;
2017-12-05 20:31:35 +01:00
elems.push(PatternElement::Str(String::from(el.as_str())));
el.clear();
2017-10-17 04:21:24 +02:00
} else {
re.push(ch);
2017-12-05 20:31:35 +01:00
el.push(ch);
2017-10-17 04:21:24 +02:00
}
}
re.push('$');
2017-12-05 20:31:35 +01:00
(re, elems)
2017-10-17 04:21:24 +02:00
}
2017-12-01 00:48:09 +01:00
#[cfg(test)]
mod tests {
use regex::Regex;
use super::*;
2017-12-02 19:43:14 +01:00
use std::iter::FromIterator;
#[test]
fn test_path_buf() {
assert_eq!(PathBuf::from_param("/test/.tt"), Err(UriSegmentError::BadStart('.')));
assert_eq!(PathBuf::from_param("/test/*tt"), Err(UriSegmentError::BadStart('*')));
assert_eq!(PathBuf::from_param("/test/tt:"), Err(UriSegmentError::BadEnd(':')));
assert_eq!(PathBuf::from_param("/test/tt<"), Err(UriSegmentError::BadEnd('<')));
assert_eq!(PathBuf::from_param("/test/tt>"), Err(UriSegmentError::BadEnd('>')));
assert_eq!(PathBuf::from_param("/seg1/seg2/"),
Ok(PathBuf::from_iter(vec!["seg1", "seg2"])));
assert_eq!(PathBuf::from_param("/seg1/../seg2/"),
Ok(PathBuf::from_iter(vec!["seg2"])));
}
2017-12-01 00:48:09 +01:00
2017-12-01 04:34:33 +01:00
#[test]
fn test_recognizer() {
let mut rec = RouteRecognizer::<usize>::default();
let routes = vec![
2017-12-05 20:43:41 +01:00
("/name", None, 1),
("/name/{val}", None, 2),
("/name/{val}/index.html", None, 3),
("/v{val}/{val2}/index.html", None, 4),
("/v/{tail:.*}", None, 5),
2017-12-01 04:34:33 +01:00
];
rec.set_routes(routes);
let (params, val) = rec.recognize("/name").unwrap();
assert_eq!(*val, 1);
assert!(params.unwrap().is_empty());
let (params, val) = rec.recognize("/name/value").unwrap();
assert_eq!(*val, 2);
assert!(!params.as_ref().unwrap().is_empty());
assert_eq!(params.as_ref().unwrap().get("val").unwrap(), "value");
assert_eq!(&params.as_ref().unwrap()["val"], "value");
2017-12-01 04:34:33 +01:00
let (params, val) = rec.recognize("/name/value2/index.html").unwrap();
assert_eq!(*val, 3);
assert!(!params.as_ref().unwrap().is_empty());
assert_eq!(params.as_ref().unwrap().get("val").unwrap(), "value2");
assert_eq!(params.as_ref().unwrap().by_idx(0).unwrap(), "value2");
let (params, val) = rec.recognize("/vtest/ttt/index.html").unwrap();
assert_eq!(*val, 4);
assert!(!params.as_ref().unwrap().is_empty());
assert_eq!(params.as_ref().unwrap().get("val").unwrap(), "test");
assert_eq!(params.as_ref().unwrap().get("val2").unwrap(), "ttt");
assert_eq!(params.as_ref().unwrap().by_idx(0).unwrap(), "test");
assert_eq!(params.as_ref().unwrap().by_idx(1).unwrap(), "ttt");
let (params, val) = rec.recognize("/v/blah-blah/index.html").unwrap();
assert_eq!(*val, 5);
assert!(!params.as_ref().unwrap().is_empty());
assert_eq!(params.as_ref().unwrap().get("tail").unwrap(), "blah-blah/index.html");
}
2017-12-01 00:48:09 +01:00
fn assert_parse(pattern: &str, expected_re: &str) -> Regex {
2017-12-05 20:43:41 +01:00
let (re_str, _) = parse(pattern);
2017-12-01 00:48:09 +01:00
assert_eq!(&*re_str, expected_re);
Regex::new(&re_str).unwrap()
}
#[test]
fn test_parse_static() {
let re = assert_parse("/", r"^/$");
assert!(re.is_match("/"));
assert!(!re.is_match("/a"));
let re = assert_parse("/name", r"^/name$");
assert!(re.is_match("/name"));
assert!(!re.is_match("/name1"));
assert!(!re.is_match("/name/"));
assert!(!re.is_match("/name~"));
let re = assert_parse("/name/", r"^/name/$");
assert!(re.is_match("/name/"));
assert!(!re.is_match("/name"));
assert!(!re.is_match("/name/gs"));
let re = assert_parse("/user/profile", r"^/user/profile$");
assert!(re.is_match("/user/profile"));
assert!(!re.is_match("/user/profile/profile"));
}
#[test]
fn test_parse_param() {
let re = assert_parse("/user/{id}", r"^/user/(?P<id>[^/]+)$");
assert!(re.is_match("/user/profile"));
assert!(re.is_match("/user/2345"));
assert!(!re.is_match("/user/2345/"));
assert!(!re.is_match("/user/2345/sdg"));
let captures = re.captures("/user/profile").unwrap();
assert_eq!(captures.get(1).unwrap().as_str(), "profile");
assert_eq!(captures.name("id").unwrap().as_str(), "profile");
let captures = re.captures("/user/1245125").unwrap();
assert_eq!(captures.get(1).unwrap().as_str(), "1245125");
assert_eq!(captures.name("id").unwrap().as_str(), "1245125");
let re = assert_parse(
"/v{version}/resource/{id}",
r"^/v(?P<version>[^/]+)/resource/(?P<id>[^/]+)$",
);
assert!(re.is_match("/v1/resource/320120"));
assert!(!re.is_match("/v/resource/1"));
assert!(!re.is_match("/resource"));
let captures = re.captures("/v151/resource/adahg32").unwrap();
assert_eq!(captures.get(1).unwrap().as_str(), "151");
assert_eq!(captures.name("version").unwrap().as_str(), "151");
assert_eq!(captures.name("id").unwrap().as_str(), "adahg32");
}
}