mirror of
https://github.com/fafhrd91/actix-web
synced 2025-06-26 06:57:43 +02:00
Add unicode
feature to switch between regex
and regex-lite
crates as a trade-off between full unicode support and binary size (#3291)
* - Add `unicode` feature to switch between `regex` and `regex-lite` as a trade-off between full unicode support and binary size. * Update CHANGES.md * Update CHANGES.md * refactor: move regexset code selection to own module * docs: add docs within RegexSet module * chore: restore manifests * test: ensure all actix-router codepaths are tested --------- Co-authored-by: Rob Ede <robjtede@icloud.com>
This commit is contained in:
@ -2,6 +2,7 @@
|
||||
|
||||
## Unreleased
|
||||
|
||||
- Add `unicode` crate feature (on-by-default) to switch between `regex` and `regex-lite` as a trade-off between full unicode support and binary size.
|
||||
- Minimum supported Rust version (MSRV) is now 1.72.
|
||||
|
||||
## 0.5.2
|
||||
|
@ -17,12 +17,16 @@ name = "actix_router"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[features]
|
||||
default = ["http"]
|
||||
default = ["http", "unicode"]
|
||||
http = ["dep:http"]
|
||||
unicode = ["dep:regex"]
|
||||
|
||||
[dependencies]
|
||||
bytestring = ">=0.1.5, <2"
|
||||
cfg-if = "1"
|
||||
http = { version = "0.2.7", optional = true }
|
||||
regex = "1.5"
|
||||
regex = { version = "1.5", optional = true }
|
||||
regex-lite = "0.1"
|
||||
serde = "1"
|
||||
tracing = { version = "0.1.30", default-features = false, features = ["log"] }
|
||||
|
||||
@ -35,6 +39,7 @@ percent-encoding = "2.1"
|
||||
[[bench]]
|
||||
name = "router"
|
||||
harness = false
|
||||
required-features = ["unicode"]
|
||||
|
||||
[[bench]]
|
||||
name = "quoter"
|
||||
|
@ -10,6 +10,7 @@ mod de;
|
||||
mod path;
|
||||
mod pattern;
|
||||
mod quoter;
|
||||
mod regex_set;
|
||||
mod resource;
|
||||
mod resource_path;
|
||||
mod router;
|
||||
|
66
actix-router/src/regex_set.rs
Normal file
66
actix-router/src/regex_set.rs
Normal file
@ -0,0 +1,66 @@
|
||||
//! Abstraction over `regex` and `regex-lite` depending on whether we have `unicode` crate feature
|
||||
//! enabled.
|
||||
|
||||
use cfg_if::cfg_if;
|
||||
#[cfg(feature = "unicode")]
|
||||
pub(crate) use regex::{escape, Regex};
|
||||
#[cfg(not(feature = "unicode"))]
|
||||
pub(crate) use regex_lite::{escape, Regex};
|
||||
|
||||
#[cfg(feature = "unicode")]
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct RegexSet(regex::RegexSet);
|
||||
|
||||
#[cfg(not(feature = "unicode"))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct RegexSet(Vec<regex_lite::Regex>);
|
||||
|
||||
impl RegexSet {
|
||||
/// Create a new regex set.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if any path patterns are malformed.
|
||||
pub(crate) fn new(re_set: Vec<String>) -> Self {
|
||||
cfg_if! {
|
||||
if #[cfg(feature = "unicode")] {
|
||||
Self(regex::RegexSet::new(re_set).unwrap())
|
||||
} else {
|
||||
Self(re_set.iter().map(|re| Regex::new(re).unwrap()).collect())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new empty regex set.
|
||||
pub(crate) fn empty() -> Self {
|
||||
cfg_if! {
|
||||
if #[cfg(feature = "unicode")] {
|
||||
Self(regex::RegexSet::empty())
|
||||
} else {
|
||||
Self(Vec::new())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if regex set matches `path`.
|
||||
pub(crate) fn is_match(&self, path: &str) -> bool {
|
||||
cfg_if! {
|
||||
if #[cfg(feature = "unicode")] {
|
||||
self.0.is_match(path)
|
||||
} else {
|
||||
self.0.iter().any(|re| re.is_match(path))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns index within `path` of first match.
|
||||
pub(crate) fn first_match_idx(&self, path: &str) -> Option<usize> {
|
||||
cfg_if! {
|
||||
if #[cfg(feature = "unicode")] {
|
||||
self.0.matches(path).into_iter().next()
|
||||
} else {
|
||||
Some(self.0.iter().enumerate().find(|(_, re)| re.is_match(path))?.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -5,10 +5,13 @@ use std::{
|
||||
mem,
|
||||
};
|
||||
|
||||
use regex::{escape, Regex, RegexSet};
|
||||
use tracing::error;
|
||||
|
||||
use crate::{path::PathItem, IntoPatterns, Patterns, Resource, ResourcePath};
|
||||
use crate::{
|
||||
path::PathItem,
|
||||
regex_set::{escape, Regex, RegexSet},
|
||||
IntoPatterns, Patterns, Resource, ResourcePath,
|
||||
};
|
||||
|
||||
const MAX_DYNAMIC_SEGMENTS: usize = 16;
|
||||
|
||||
@ -233,7 +236,7 @@ enum PatternSegment {
|
||||
Var(String),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
enum PatternType {
|
||||
/// Single constant/literal segment.
|
||||
@ -603,7 +606,7 @@ impl ResourceDef {
|
||||
PatternType::Dynamic(re, _) => Some(re.captures(path)?[1].len()),
|
||||
|
||||
PatternType::DynamicSet(re, params) => {
|
||||
let idx = re.matches(path).into_iter().next()?;
|
||||
let idx = re.first_match_idx(path)?;
|
||||
let (ref pattern, _) = params[idx];
|
||||
Some(pattern.captures(path)?[1].len())
|
||||
}
|
||||
@ -706,7 +709,7 @@ impl ResourceDef {
|
||||
|
||||
PatternType::DynamicSet(re, params) => {
|
||||
let path = path.unprocessed();
|
||||
let (pattern, names) = match re.matches(path).into_iter().next() {
|
||||
let (pattern, names) = match re.first_match_idx(path) {
|
||||
Some(idx) => ¶ms[idx],
|
||||
_ => return false,
|
||||
};
|
||||
@ -870,7 +873,7 @@ impl ResourceDef {
|
||||
}
|
||||
}
|
||||
|
||||
let pattern_re_set = RegexSet::new(re_set).unwrap();
|
||||
let pattern_re_set = RegexSet::new(re_set);
|
||||
let segments = segments.unwrap_or_default();
|
||||
|
||||
(
|
||||
|
Reference in New Issue
Block a user