mirror of
https://github.com/fafhrd91/actix-web
synced 2024-11-30 18:44:35 +01:00
40048a5811
Co-authored-by: Rob Ede <robjtede@icloud.com>
173 lines
5.4 KiB
Rust
173 lines
5.4 KiB
Rust
/// Partial percent-decoding.
|
|
///
|
|
/// Performs percent-decoding on a slice but can selectively skip decoding certain sequences.
|
|
///
|
|
/// # Examples
|
|
/// ```
|
|
/// # use actix_router::Quoter;
|
|
/// // + is set as a protected character and will not be decoded...
|
|
/// let q = Quoter::new(&[], b"+");
|
|
///
|
|
/// // ...but the other encoded characters (like the hyphen below) will.
|
|
/// assert_eq!(q.requote(b"/a%2Db%2Bc").unwrap(), b"/a-b%2Bc");
|
|
/// ```
|
|
pub struct Quoter {
|
|
/// Simple bit-map of protected values in the 0-127 ASCII range.
|
|
protected_table: AsciiBitmap,
|
|
}
|
|
|
|
impl Quoter {
|
|
/// Constructs a new `Quoter` instance given a set of protected ASCII bytes.
|
|
///
|
|
/// The first argument is ignored but is kept for backward compatibility.
|
|
///
|
|
/// # Panics
|
|
/// Panics if any of the `protected` bytes are not in the 0-127 ASCII range.
|
|
pub fn new(_: &[u8], protected: &[u8]) -> Quoter {
|
|
let mut protected_table = AsciiBitmap::default();
|
|
|
|
// prepare protected table
|
|
for &ch in protected {
|
|
protected_table.set_bit(ch);
|
|
}
|
|
|
|
Quoter { protected_table }
|
|
}
|
|
|
|
/// Decodes the next escape sequence, if any, and advances `val`.
|
|
#[inline(always)]
|
|
fn decode_next<'a>(&self, val: &mut &'a [u8]) -> Option<(&'a [u8], u8)> {
|
|
for i in 0..val.len() {
|
|
if let (prev, [b'%', p1, p2, rem @ ..]) = val.split_at(i) {
|
|
if let Some(ch) = hex_pair_to_char(*p1, *p2)
|
|
// ignore protected ascii bytes
|
|
.filter(|&ch| !(ch < 128 && self.protected_table.bit_at(ch)))
|
|
{
|
|
*val = rem;
|
|
return Some((prev, ch));
|
|
}
|
|
}
|
|
}
|
|
|
|
None
|
|
}
|
|
|
|
/// Partially percent-decodes the given bytes.
|
|
///
|
|
/// Escape sequences of the protected set are *not* decoded.
|
|
///
|
|
/// Returns `None` when no modification to the original bytes was required.
|
|
///
|
|
/// Invalid/incomplete percent-encoding sequences are passed unmodified.
|
|
pub fn requote(&self, val: &[u8]) -> Option<Vec<u8>> {
|
|
let mut remaining = val;
|
|
|
|
// early return indicates that no percent-encoded sequences exist and we can skip allocation
|
|
let (pre, decoded_char) = self.decode_next(&mut remaining)?;
|
|
|
|
// decoded output will always be shorter than the input
|
|
let mut decoded = Vec::<u8>::with_capacity(val.len());
|
|
|
|
// push first segment and decoded char
|
|
decoded.extend_from_slice(pre);
|
|
decoded.push(decoded_char);
|
|
|
|
// decode and push rest of segments and decoded chars
|
|
while let Some((prev, ch)) = self.decode_next(&mut remaining) {
|
|
// this ugly conditional achieves +50% perf in cases where this is a tight loop.
|
|
if !prev.is_empty() {
|
|
decoded.extend_from_slice(prev);
|
|
}
|
|
decoded.push(ch);
|
|
}
|
|
|
|
decoded.extend_from_slice(remaining);
|
|
|
|
Some(decoded)
|
|
}
|
|
|
|
pub(crate) fn requote_str_lossy(&self, val: &str) -> Option<String> {
|
|
self.requote(val.as_bytes())
|
|
.map(|data| String::from_utf8_lossy(&data).into_owned())
|
|
}
|
|
}
|
|
|
|
/// Decode a ASCII hex-encoded pair to an integer.
|
|
///
|
|
/// Returns `None` if either portion of the decoded pair does not evaluate to a valid hex value.
|
|
///
|
|
/// - `0x33 ('3'), 0x30 ('0') => 0x30 ('0')`
|
|
/// - `0x34 ('4'), 0x31 ('1') => 0x41 ('A')`
|
|
/// - `0x36 ('6'), 0x31 ('1') => 0x61 ('a')`
|
|
#[inline(always)]
|
|
fn hex_pair_to_char(d1: u8, d2: u8) -> Option<u8> {
|
|
let d_high = char::from(d1).to_digit(16)?;
|
|
let d_low = char::from(d2).to_digit(16)?;
|
|
|
|
// left shift high nibble by 4 bits
|
|
Some((d_high as u8) << 4 | (d_low as u8))
|
|
}
|
|
|
|
#[derive(Debug, Default, Clone)]
|
|
struct AsciiBitmap {
|
|
array: [u8; 16],
|
|
}
|
|
|
|
impl AsciiBitmap {
|
|
/// Sets bit in given bit-map to 1=true.
|
|
///
|
|
/// # Panics
|
|
/// Panics if `ch` index is out of bounds.
|
|
fn set_bit(&mut self, ch: u8) {
|
|
self.array[(ch >> 3) as usize] |= 0b1 << (ch & 0b111)
|
|
}
|
|
|
|
/// Returns true if bit to true in given bit-map.
|
|
///
|
|
/// # Panics
|
|
/// Panics if `ch` index is out of bounds.
|
|
fn bit_at(&self, ch: u8) -> bool {
|
|
self.array[(ch >> 3) as usize] & (0b1 << (ch & 0b111)) != 0
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn custom_quoter() {
|
|
let q = Quoter::new(b"", b"+");
|
|
assert_eq!(q.requote(b"/a%25c").unwrap(), b"/a%c");
|
|
assert_eq!(q.requote(b"/a%2Bc"), None);
|
|
|
|
let q = Quoter::new(b"%+", b"/");
|
|
assert_eq!(q.requote(b"/a%25b%2Bc").unwrap(), b"/a%b+c");
|
|
assert_eq!(q.requote(b"/a%2fb"), None);
|
|
assert_eq!(q.requote(b"/a%2Fb"), None);
|
|
assert_eq!(q.requote(b"/a%0Ab").unwrap(), b"/a\nb");
|
|
assert_eq!(q.requote(b"/a%FE\xffb").unwrap(), b"/a\xfe\xffb");
|
|
assert_eq!(q.requote(b"/a\xfe\xffb"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn non_ascii() {
|
|
let q = Quoter::new(b"%+", b"/");
|
|
assert_eq!(q.requote(b"/a%FE\xffb").unwrap(), b"/a\xfe\xffb");
|
|
assert_eq!(q.requote(b"/a\xfe\xffb"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn invalid_sequences() {
|
|
let q = Quoter::new(b"%+", b"/");
|
|
assert_eq!(q.requote(b"/a%2x%2X%%"), None);
|
|
assert_eq!(q.requote(b"/a%20%2X%%").unwrap(), b"/a %2X%%");
|
|
}
|
|
|
|
#[test]
|
|
fn quoter_no_modification() {
|
|
let q = Quoter::new(b"", b"");
|
|
assert_eq!(q.requote(b"/abc/../efg"), None);
|
|
}
|
|
}
|