From efbea6f747c440374a5dadf50dfa2ef834c573dd Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 3 May 2024 14:05:39 -0700 Subject: [PATCH 01/10] perf: Speed up `path_to_string` in cases where the path is already in the proper format --- src/spec.rs | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/src/spec.rs b/src/spec.rs index 8c5da7d8..a80c489e 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -217,6 +217,8 @@ impl Zip64CentralDirectoryEnd { /// Converts a path to the ZIP format (forward-slash-delimited and normalized). pub(crate) fn path_to_string>(path: T) -> String { + let original = path.as_ref().to_str(); + let mut recreate = original.is_some(); let mut normalized_components = Vec::new(); // Empty element ensures the path has a leading slash, with no extra allocation after the join @@ -225,15 +227,37 @@ pub(crate) fn path_to_string>(path: T) -> String { for component in path.as_ref().components() { match component { Component::Normal(os_str) => { - normalized_components.push(os_str.to_string_lossy()); + match os_str.to_str() { + Some(valid_str) => normalized_components.push(Cow::Borrowed(valid_str)), + None => { + recreate = true; + normalized_components.push(os_str.to_string_lossy()); + } + } + } Component::ParentDir => { + recreate = true; if normalized_components.len() > 1 { normalized_components.pop(); } } - _ => {} + _ => { + recreate = true; + } + } + } + if recreate { + normalized_components.join("/") + } else { + let original = original.unwrap(); + if !original.starts_with('/') { + let mut slash_original = String::with_capacity(original.len() + 1); + slash_original.push('/'); + slash_original.push_str(original); + slash_original + } else { + original.to_string() } } - normalized_components.join("/") } From 753eedb3a76ddf3dcff807ad0e5d1bd288fdb1ec Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 3 May 2024 14:06:29 -0700 Subject: [PATCH 02/10] perf: Drop `normalized_components` slightly sooner when not using it --- src/spec.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/spec.rs b/src/spec.rs index a80c489e..7ed4f6b4 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -250,6 +250,7 @@ pub(crate) fn path_to_string>(path: T) -> String { if recreate { normalized_components.join("/") } else { + drop(normalized_components); let original = original.unwrap(); if !original.starts_with('/') { let mut slash_original = String::with_capacity(original.len() + 1); From 22e8fdbf58901b59ad9d00a7a8882eaec2ddb104 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 3 May 2024 14:06:48 -0700 Subject: [PATCH 03/10] chore: Bug fix --- src/spec.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/spec.rs b/src/spec.rs index 7ed4f6b4..c3c4d422 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -218,7 +218,7 @@ impl Zip64CentralDirectoryEnd { /// Converts a path to the ZIP format (forward-slash-delimited and normalized). pub(crate) fn path_to_string>(path: T) -> String { let original = path.as_ref().to_str(); - let mut recreate = original.is_some(); + let mut recreate = original.is_none(); let mut normalized_components = Vec::new(); // Empty element ensures the path has a leading slash, with no extra allocation after the join @@ -234,7 +234,7 @@ pub(crate) fn path_to_string>(path: T) -> String { normalized_components.push(os_str.to_string_lossy()); } } - + } Component::ParentDir => { recreate = true; @@ -248,7 +248,7 @@ pub(crate) fn path_to_string>(path: T) -> String { } } if recreate { - normalized_components.join("/") + normalized_components.join("/") } else { drop(normalized_components); let original = original.unwrap(); From 6184232e195aade4c3cf7524c649296269c031e3 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 3 May 2024 14:11:03 -0700 Subject: [PATCH 04/10] perf: Speed up logic if main separator isn't '/' --- src/spec.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/spec.rs b/src/spec.rs index c3c4d422..05db162d 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -3,7 +3,7 @@ use crate::unstable::{LittleEndianReadExt, LittleEndianWriteExt}; use std::borrow::Cow; use std::io; use std::io::prelude::*; -use std::path::{Component, Path}; +use std::path::{Component, MAIN_SEPARATOR, Path}; pub const LOCAL_FILE_HEADER_SIGNATURE: u32 = 0x04034b50; pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE: u32 = 0x02014b50; @@ -217,8 +217,18 @@ impl Zip64CentralDirectoryEnd { /// Converts a path to the ZIP format (forward-slash-delimited and normalized). pub(crate) fn path_to_string>(path: T) -> String { - let original = path.as_ref().to_str(); - let mut recreate = original.is_none(); + let mut recreate = MAIN_SEPARATOR != '/'; + let original = if !recreate { + match path.as_ref().to_str() { + Some(original) => Some(original), + None => { + recreate = true; + None + } + } + } else { + None + }; let mut normalized_components = Vec::new(); // Empty element ensures the path has a leading slash, with no extra allocation after the join From 001967186ab7af3733ff781af5e524626e530915 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 3 May 2024 14:28:14 -0700 Subject: [PATCH 05/10] perf: Fast handling for separator-free paths --- src/spec.rs | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/src/spec.rs b/src/spec.rs index 05db162d..a24da82f 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -3,7 +3,7 @@ use crate::unstable::{LittleEndianReadExt, LittleEndianWriteExt}; use std::borrow::Cow; use std::io; use std::io::prelude::*; -use std::path::{Component, MAIN_SEPARATOR, Path}; +use std::path::{Component, Path, MAIN_SEPARATOR}; pub const LOCAL_FILE_HEADER_SIGNATURE: u32 = 0x04034b50; pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE: u32 = 0x02014b50; @@ -217,18 +217,17 @@ impl Zip64CentralDirectoryEnd { /// Converts a path to the ZIP format (forward-slash-delimited and normalized). pub(crate) fn path_to_string>(path: T) -> String { - let mut recreate = MAIN_SEPARATOR != '/'; - let original = if !recreate { - match path.as_ref().to_str() { - Some(original) => Some(original), - None => { - recreate = true; - None + let mut maybe_original = None; + if let Some(original) = path.as_ref().to_str() { + if MAIN_SEPARATOR == '/' || !original[1..].contains(MAIN_SEPARATOR) { + if original.starts_with(MAIN_SEPARATOR) { + maybe_original = Some(&original[1..]); + } else { + maybe_original = Some(original); } } - } else { - None - }; + } + let mut recreate = maybe_original.is_none(); let mut normalized_components = Vec::new(); // Empty element ensures the path has a leading slash, with no extra allocation after the join @@ -236,16 +235,13 @@ pub(crate) fn path_to_string>(path: T) -> String { for component in path.as_ref().components() { match component { - Component::Normal(os_str) => { - match os_str.to_str() { - Some(valid_str) => normalized_components.push(Cow::Borrowed(valid_str)), - None => { - recreate = true; - normalized_components.push(os_str.to_string_lossy()); - } + Component::Normal(os_str) => match os_str.to_str() { + Some(valid_str) => normalized_components.push(Cow::Borrowed(valid_str)), + None => { + recreate = true; + normalized_components.push(os_str.to_string_lossy()); } - - } + }, Component::ParentDir => { recreate = true; if normalized_components.len() > 1 { @@ -261,7 +257,7 @@ pub(crate) fn path_to_string>(path: T) -> String { normalized_components.join("/") } else { drop(normalized_components); - let original = original.unwrap(); + let original = maybe_original.unwrap(); if !original.starts_with('/') { let mut slash_original = String::with_capacity(original.len() + 1); slash_original.push('/'); From 5cd448802f864d69b97a514942dae3c7ef4ee3f7 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 3 May 2024 14:31:32 -0700 Subject: [PATCH 06/10] chore: Bug fix: must recreate if . or .. is a path element --- src/spec.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/spec.rs b/src/spec.rs index a24da82f..b2f9f221 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -219,7 +219,8 @@ impl Zip64CentralDirectoryEnd { pub(crate) fn path_to_string>(path: T) -> String { let mut maybe_original = None; if let Some(original) = path.as_ref().to_str() { - if MAIN_SEPARATOR == '/' || !original[1..].contains(MAIN_SEPARATOR) { + if (MAIN_SEPARATOR == '/' || !original.contains(MAIN_SEPARATOR)) + && !original.ends_with('.') && !original.contains("./") { if original.starts_with(MAIN_SEPARATOR) { maybe_original = Some(&original[1..]); } else { From 0fe12b2ec9bac7a7a7f26050d8052d13b654adf5 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 3 May 2024 14:34:05 -0700 Subject: [PATCH 07/10] chore: Bug fix: non-canonical path detection when MAIN_SEPARATOR is not slash or occurs twice in a row --- src/spec.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/spec.rs b/src/spec.rs index b2f9f221..b6238f90 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -220,7 +220,9 @@ pub(crate) fn path_to_string>(path: T) -> String { let mut maybe_original = None; if let Some(original) = path.as_ref().to_str() { if (MAIN_SEPARATOR == '/' || !original.contains(MAIN_SEPARATOR)) - && !original.ends_with('.') && !original.contains("./") { + && !original.ends_with('.') + && !original.contains([MAIN_SEPARATOR, MAIN_SEPARATOR]) + && !original.contains(['.', MAIN_SEPARATOR]) { if original.starts_with(MAIN_SEPARATOR) { maybe_original = Some(&original[1..]); } else { From 2adbbccb82ba60f90dd1302f0f6594213ca9abf0 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 3 May 2024 14:59:35 -0700 Subject: [PATCH 08/10] perf: Quick filter for paths that contain "/../" or "/./" or start with "./" or "../" --- src/spec.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/spec.rs b/src/spec.rs index b6238f90..e9cd6ea7 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -220,9 +220,12 @@ pub(crate) fn path_to_string>(path: T) -> String { let mut maybe_original = None; if let Some(original) = path.as_ref().to_str() { if (MAIN_SEPARATOR == '/' || !original.contains(MAIN_SEPARATOR)) - && !original.ends_with('.') + && !original.ends_with('.') + && !original.starts_with(['.', MAIN_SEPARATOR]) + && !original.starts_with(['.', '.', MAIN_SEPARATOR]) && !original.contains([MAIN_SEPARATOR, MAIN_SEPARATOR]) - && !original.contains(['.', MAIN_SEPARATOR]) { + && !original.contains([MAIN_SEPARATOR, '.', MAIN_SEPARATOR]) + && !original.contains([MAIN_SEPARATOR, '.', '.', MAIN_SEPARATOR]){ if original.starts_with(MAIN_SEPARATOR) { maybe_original = Some(&original[1..]); } else { From 74e76a94cabefcf506ccc8314be752c64b21e51d Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 3 May 2024 15:01:43 -0700 Subject: [PATCH 09/10] chore: Refactor: can short-circuit handling of paths that start with MAIN_SEPARATOR, no matter what MAIN_SEPARATOR is --- src/spec.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spec.rs b/src/spec.rs index e9cd6ea7..8e305235 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -219,7 +219,7 @@ impl Zip64CentralDirectoryEnd { pub(crate) fn path_to_string>(path: T) -> String { let mut maybe_original = None; if let Some(original) = path.as_ref().to_str() { - if (MAIN_SEPARATOR == '/' || !original.contains(MAIN_SEPARATOR)) + if (MAIN_SEPARATOR == '/' || !original[1..].contains(MAIN_SEPARATOR)) && !original.ends_with('.') && !original.starts_with(['.', MAIN_SEPARATOR]) && !original.starts_with(['.', '.', MAIN_SEPARATOR]) From 1b2c42b199dcc69a4d4a43004f6030a6e8db02c1 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 3 May 2024 15:18:31 -0700 Subject: [PATCH 10/10] style: cargo fmt --all --- src/spec.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/spec.rs b/src/spec.rs index 8e305235..7aa9f08e 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -220,12 +220,13 @@ pub(crate) fn path_to_string>(path: T) -> String { let mut maybe_original = None; if let Some(original) = path.as_ref().to_str() { if (MAIN_SEPARATOR == '/' || !original[1..].contains(MAIN_SEPARATOR)) - && !original.ends_with('.') + && !original.ends_with('.') && !original.starts_with(['.', MAIN_SEPARATOR]) && !original.starts_with(['.', '.', MAIN_SEPARATOR]) - && !original.contains([MAIN_SEPARATOR, MAIN_SEPARATOR]) + && !original.contains([MAIN_SEPARATOR, MAIN_SEPARATOR]) && !original.contains([MAIN_SEPARATOR, '.', MAIN_SEPARATOR]) - && !original.contains([MAIN_SEPARATOR, '.', '.', MAIN_SEPARATOR]){ + && !original.contains([MAIN_SEPARATOR, '.', '.', MAIN_SEPARATOR]) + { if original.starts_with(MAIN_SEPARATOR) { maybe_original = Some(&original[1..]); } else {