zip-rs-wasm/Cargo.toml
David Caldwell 73143a0ad6
perf: Faster cde rejection (#255)
* Use the tempfile crate instead of the tempdir crate (which is deprecated)

https://github.com/rust-lang-deprecated/tempdir?tab=readme-ov-file#deprecation-note

* perf: Add benchmark that measures the rejection speed of a large non-zip file

* perf: Speed up non-zip rejection by increasing END_WINDOW_SIZE

I tested several END_WINDOW_SIZEs across 2 machines:

Machine 1: macOS 15.0.1, aarch64 (apfs /tmp)
512:   test parse_large_non_zip  ... bench:  30,450,608 ns/iter (+/- 673,910)
4096:  test parse_large_non_zip  ... bench:   7,741,366 ns/iter (+/- 521,101)
8192:  test parse_large_non_zip  ... bench:   5,807,443 ns/iter (+/- 546,227)
16384: test parse_large_non_zip  ... bench:   4,794,314 ns/iter (+/- 419,114)
32768: test parse_large_non_zip  ... bench:   4,262,897 ns/iter (+/- 397,582)
65536: test parse_large_non_zip  ... bench:   4,060,847 ns/iter (+/- 280,964)

Machine 2: Debian testing, x86_64 (tmpfs /tmp)
512:   test parse_large_non_zip  ... bench:  65,132,581 ns/iter (+/- 7,429,976)
4096:  test parse_large_non_zip  ... bench:  14,109,503 ns/iter (+/- 2,892,086)
8192:  test parse_large_non_zip  ... bench:   9,942,500 ns/iter (+/- 1,886,063)
16384: test parse_large_non_zip  ... bench:   8,205,851 ns/iter (+/- 2,902,041)
32768: test parse_large_non_zip  ... bench:   7,012,011 ns/iter (+/- 2,222,879)
65536: test parse_large_non_zip  ... bench:   6,577,275 ns/iter (+/- 881,546)

In both cases END_WINDOW_SIZE=8192 performed about 6x better than 512 and >8192
didn't make much of a difference on top of that.

* perf: Speed up non-zip rejection by limiting search for EOCDR.

I benchmarked several search sizes across 2 machines
(these benches are using an 8192 END_WINDOW_SIZE):

Machine 1: macOS 15.0.1, aarch64 (apfs /tmp)
whole file:   test parse_large_non_zip              ... bench:   5,773,801 ns/iter (+/- 411,277)
last 128k:    test parse_large_non_zip              ... bench:      54,402 ns/iter (+/- 4,126)
last 66,000:  test parse_large_non_zip              ... bench:      36,152 ns/iter (+/- 4,293)

Machine 2: Debian testing, x86_64 (tmpfs /tmp)
whole file:   test parse_large_non_zip              ... bench:   9,942,306 ns/iter (+/- 1,963,522)
last 128k:    test parse_large_non_zip              ... bench:      73,604 ns/iter (+/- 16,662)
last 66,000:  test parse_large_non_zip              ... bench:      41,349 ns/iter (+/- 16,812)

As you might expect these significantly increase the rejection speed for
large non-zip files.

66,000 was the number previously used by zip-rs. It was changed to zero in
7a55945743.

128K is what Info-Zip uses[1]. This seems like a reasonable (non-zero)
choice for compatibility reasons.

[1] Info-zip is extremely old and doesn't not have an official git repo to
    link to. However, an unofficial fork can be found here:
    bb0c4755d4/zipfile.c (L4073)

---------

Co-authored-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com>
2024-11-19 18:49:35 +00:00

103 lines
3.1 KiB
TOML

[package]
name = "zip"
version = "2.2.0"
authors = [
"Mathijs van de Nes <git@mathijs.vd-nes.nl>",
"Marli Frost <marli@frost.red>",
"Ryan Levick <ryan.levick@gmail.com>",
"Chris Hennick <hennickc@amazon.com>",
]
license = "MIT"
repository = "https://github.com/zip-rs/zip2.git"
keywords = ["zip", "archive", "compression"]
rust-version = "1.73.0"
description = """
Library to support the reading and writing of zip files.
"""
edition = "2021"
exclude = ["tests/**", "examples/**", ".github/**", "fuzz_read/**", "fuzz_write/**"]
build = "src/build.rs"
[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs"]
[workspace.dependencies]
time = { version = "0.3.36", default-features = false }
[dependencies]
aes = { version = "0.8.4", optional = true }
bzip2 = { version = "0.4.4", optional = true }
chrono = { version = "0.4.38", optional = true }
constant_time_eq = { version = "0.3.1", optional = true }
crc32fast = "1.4.2"
displaydoc = { version = "0.2.5", default-features = false }
flate2 = { version = "1.0.33", default-features = false, optional = true }
indexmap = "2"
hmac = { version = "0.12.1", optional = true, features = ["reset"] }
memchr = "2.7.4"
pbkdf2 = { version = "0.12.2", optional = true }
rand = { version = "0.8.5", optional = true }
sha1 = { version = "0.10.6", optional = true }
thiserror = "2.0.3"
time = { workspace = true, optional = true, features = [
"std",
] }
zeroize = { version = "1.8.1", optional = true, features = ["zeroize_derive"] }
zstd = { version = "0.13.2", optional = true, default-features = false }
zopfli = { version = "0.8.1", optional = true }
deflate64 = { version = "0.1.9", optional = true }
lzma-rs = { version = "0.3.0", default-features = false, optional = true }
[target.'cfg(any(all(target_arch = "arm", target_pointer_width = "32"), target_arch = "mips", target_arch = "powerpc"))'.dependencies]
crossbeam-utils = "0.8.20"
[target.'cfg(fuzzing)'.dependencies]
arbitrary = { version = "1.3.2", features = ["derive"] }
[dev-dependencies]
bencher = "0.1.5"
getrandom = { version = "0.2.15", features = ["js", "std"] }
walkdir = "2.5.0"
time = { workspace = true, features = ["formatting", "macros"] }
anyhow = "1"
clap = { version = "=4.4.18", features = ["derive"] }
tempfile = "3"
[features]
aes-crypto = ["aes", "constant_time_eq", "hmac", "pbkdf2", "sha1", "rand", "zeroize"]
chrono = ["chrono/default"]
_deflate-any = []
_all-features = [] # Detect when --all-features is used
deflate = ["flate2/rust_backend", "deflate-zopfli", "deflate-flate2"]
deflate-flate2 = ["_deflate-any"]
# DEPRECATED: previously enabled `flate2/miniz_oxide` which is equivalent to `flate2/rust_backend`
deflate-miniz = ["deflate", "deflate-flate2"]
deflate-zlib = ["flate2/zlib", "deflate-flate2"]
deflate-zlib-ng = ["flate2/zlib-ng", "deflate-flate2"]
deflate-zopfli = ["zopfli", "_deflate-any"]
lzma = ["lzma-rs/stream"]
unreserved = []
xz = ["lzma-rs/raw_decoder"]
default = [
"aes-crypto",
"bzip2",
"deflate64",
"deflate",
"lzma",
"time",
"zstd",
"xz",
]
[[bench]]
name = "read_entry"
harness = false
[[bench]]
name = "read_metadata"
harness = false
[[bench]]
name = "merge_archive"
harness = false