fix: rewrite the EOCD/EOCD64 detection to fix extreme performance regression (#247)

* fix: resolve clippy warning in nightly * wip: major rework of cde location * wip: rework CDE lookup * refactor: magic finder, eocd lookup retry * wip: handle empty zips * fix: satisfy tests, add documentation * chore: remove unused dependencies * feat: support both zip32 and zip64 comments * feat: add zip64 comment functions to ZipWriter * fix: first pass on maintainer comments * fix: continue searching for EOCD when the central directory is invalid * chore: satisfy clippy lints * chore: satisfy style_and_docs * feat: support both directions in MagicFinder, correctly find first CDFH * fix: more checks to EOCD parsing, move comment size error from parse to write * fix: use saturating add when checking eocd64 record_size upper bound * fix: correctly handle mid window offsets in forward mode * fix: compare maximum possible comment length against file size, not search region end * feat: handle zip64 detection as a hint * fix: detect oversized central directories when locating EOCD64 * fix: oopsie --------- Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Co-authored-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com>
2024-12-16 04:32:55 +01:00 · 2024-12-16 04:32:55 +01:00 · 33c71ccc80
commit 33c71ccc80
parent 810d18a9a1
6 changed files with 800 additions and 517 deletions
--- a/src/read.rs
+++ b/src/read.rs
@ -8,10 +8,7 @@ use crate::crc32::Crc32Reader;
 use crate::extra_fields::{ExtendedTimestamp, ExtraField};
 use crate::read::zip_archive::{Shared, SharedBuilder};
 use crate::result::{ZipError, ZipResult};
-use crate::spec::{
-    self, FixedSizeBlock, Pod, Zip32CentralDirectoryEnd, Zip64CDELocatorBlock,
-    Zip64CentralDirectoryEnd, ZIP64_ENTRY_THR,
-};
+use crate::spec::{self, CentralDirectoryEndInfo, DataAndPosition, FixedSizeBlock, Pod};
 use crate::types::{
    AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData,
    ZipLocalEntryBlock,
@ -26,7 +23,6 @@ use std::mem;
 use std::mem::size_of;
 use std::ops::Deref;
 use std::path::{Path, PathBuf};
-use std::rc::Rc;
 use std::sync::{Arc, OnceLock};

 mod config;
@ -42,6 +38,8 @@ pub(crate) mod lzma;
 #[cfg(feature = "xz")]
 pub(crate) mod xz;

+pub(crate) mod magic_finder;
+
 // Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely
 pub(crate) mod zip_archive {
    use indexmap::IndexMap;
@ -56,6 +54,8 @@ pub(crate) mod zip_archive {
        // This isn't yet used anywhere, but it is here for use cases in the future.
        #[allow(dead_code)]
        pub(super) config: super::Config,
+        pub(crate) comment: Box<[u8]>,
+        pub(crate) zip64_comment: Option<Box<[u8]>>,
    }

    #[derive(Debug)]
@ -69,7 +69,7 @@ pub(crate) mod zip_archive {
    }

    impl SharedBuilder {
-        pub fn build(self) -> Shared {
+        pub fn build(self, comment: Box<[u8]>, zip64_comment: Option<Box<[u8]>>) -> Shared {
            let mut index_map = IndexMap::with_capacity(self.files.len());
            self.files.into_iter().for_each(|file| {
                index_map.insert(file.file_name.clone(), file);
@ -79,6 +79,8 @@ pub(crate) mod zip_archive {
                offset: self.offset,
                dir_start: self.dir_start,
                config: self.config,
+                comment,
+                zip64_comment,
            }
        }
    }
@ -108,7 +110,6 @@ pub(crate) mod zip_archive {
    pub struct ZipArchive<R> {
        pub(super) reader: R,
        pub(super) shared: Arc<Shared>,
-        pub(super) comment: Arc<[u8]>,
    }
 }

@ -360,6 +361,7 @@ fn find_data_start(
        block.file_name_length as u64 + block.extra_field_length as u64;
    let data_start =
        data.header_start + size_of::<ZipLocalEntryBlock>() as u64 + variable_fields_len;
+
    // Set the value so we don't have to read it again.
    match data.data_start.set(data_start) {
        Ok(()) => (),
@ -369,6 +371,7 @@ fn find_data_start(
            debug_assert_eq!(*data.data_start.get().unwrap(), data_start);
        }
    }
+
    Ok(data_start)
 }

@ -434,17 +437,62 @@ pub(crate) fn make_reader(
 pub(crate) struct CentralDirectoryInfo {
    pub(crate) archive_offset: u64,
    pub(crate) directory_start: u64,
-    pub(crate) cde_position: u64,
    pub(crate) number_of_files: usize,
    pub(crate) disk_number: u32,
    pub(crate) disk_with_central_directory: u32,
-    pub(crate) is_zip64: bool,
+}
+
+impl<'a> TryFrom<&'a CentralDirectoryEndInfo> for CentralDirectoryInfo {
+    type Error = ZipError;
+
+    fn try_from(value: &'a CentralDirectoryEndInfo) -> Result<Self, Self::Error> {
+        let (relative_cd_offset, number_of_files, disk_number, disk_with_central_directory) =
+            match &value.eocd64 {
+                Some(DataAndPosition { data: eocd64, .. }) => {
+                    if eocd64.number_of_files_on_this_disk > eocd64.number_of_files {
+                        return Err(InvalidArchive(
+                        "ZIP64 footer indicates more files on this disk than in the whole archive",
+                    ));
+                    } else if eocd64.version_needed_to_extract > eocd64.version_made_by {
+                        return Err(InvalidArchive(
+                        "ZIP64 footer indicates a new version is needed to extract this archive than the \
+                                 version that wrote it",
+                    ));
+                    }
+                    (
+                        eocd64.central_directory_offset,
+                        eocd64.number_of_files as usize,
+                        eocd64.disk_number,
+                        eocd64.disk_with_central_directory,
+                    )
+                }
+                _ => (
+                    value.eocd.data.central_directory_offset as u64,
+                    value.eocd.data.number_of_files_on_this_disk as usize,
+                    value.eocd.data.disk_number as u32,
+                    value.eocd.data.disk_with_central_directory as u32,
+                ),
+            };
+
+        let directory_start = relative_cd_offset
+            .checked_add(value.archive_offset)
+            .ok_or(InvalidArchive("Invalid central directory size or offset"))?;
+
+        Ok(Self {
+            archive_offset: value.archive_offset,
+            directory_start,
+            number_of_files,
+            disk_number,
+            disk_with_central_directory,
+        })
+    }
 }

 impl<R> ZipArchive<R> {
    pub(crate) fn from_finalized_writer(
        files: IndexMap<Box<str>, ZipFileData>,
        comment: Box<[u8]>,
+        zip64_comment: Option<Box<[u8]>>,
        reader: R,
        central_start: u64,
    ) -> ZipResult<Self> {
@ -459,12 +507,10 @@ impl<R> ZipArchive<R> {
            config: Config {
                archive_offset: ArchiveOffset::Known(initial_offset),
            },
+            comment,
+            zip64_comment,
        });
-        Ok(Self {
-            reader,
-            shared,
-            comment: comment.into(),
-        })
+        Ok(Self { reader, shared })
    }

    /// Total size of the files in the archive, if it can be known. Doesn't include directories or
@ -549,264 +595,36 @@ impl<R: Read + Seek> ZipArchive<R> {
        Ok(new_files)
    }

-    fn get_directory_info_zip32(
-        config: &Config,
-        reader: &mut R,
-        footer: &Zip32CentralDirectoryEnd,
-        cde_start_pos: u64,
-    ) -> ZipResult<CentralDirectoryInfo> {
-        let archive_offset = match config.archive_offset {
-            ArchiveOffset::Known(n) => n,
-            ArchiveOffset::FromCentralDirectory | ArchiveOffset::Detect => {
-                // Some zip files have data prepended to them, resulting in the
-                // offsets all being too small. Get the amount of error by comparing
-                // the actual file position we found the CDE at with the offset
-                // recorded in the CDE.
-                let mut offset = cde_start_pos
-                    .checked_sub(footer.central_directory_size as u64)
-                    .and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
-                    .ok_or(InvalidArchive("Invalid central directory size or offset"))?;
-
-                if config.archive_offset == ArchiveOffset::Detect {
-                    // Check whether the archive offset makes sense by peeking at the directory start. If it
-                    // doesn't, fall back to using no archive offset. This supports zips with the central
-                    // directory entries somewhere other than directly preceding the end of central directory.
-                    reader.seek(SeekFrom::Start(
-                        offset + footer.central_directory_offset as u64,
-                    ))?;
-                    let mut buf = [0; 4];
-                    reader.read_exact(&mut buf)?;
-                    if spec::Magic::from_le_bytes(buf)
-                        != spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE
-                    {
-                        offset = 0;
-                    }
-                }
-
-                offset
-            }
-        };
-
-        let directory_start = footer.central_directory_offset as u64 + archive_offset;
-        let number_of_files = footer.number_of_files_on_this_disk as usize;
-        Ok(CentralDirectoryInfo {
-            archive_offset,
-            directory_start,
-            number_of_files,
-            disk_number: footer.disk_number as u32,
-            disk_with_central_directory: footer.disk_with_central_directory as u32,
-            cde_position: cde_start_pos,
-            is_zip64: false,
-        })
-    }
-
-    const fn order_lower_upper_bounds(a: u64, b: u64) -> (u64, u64) {
-        if a > b {
-            (b, a)
-        } else {
-            (a, b)
-        }
-    }
-
-    fn get_directory_info_zip64(
-        config: &Config,
-        reader: &mut R,
-        cde_start_pos: u64,
-    ) -> ZipResult<Vec<ZipResult<CentralDirectoryInfo>>> {
-        // See if there's a ZIP64 footer. The ZIP64 locator if present will
-        // have its signature 20 bytes in front of the standard footer. The
-        // standard footer, in turn, is 22+N bytes large, where N is the
-        // comment length. Therefore:
-        reader.seek(SeekFrom::Start(
-            cde_start_pos
-                .checked_sub(size_of::<Zip64CDELocatorBlock>() as u64)
-                .ok_or(InvalidArchive(
-                    "No room for ZIP64 locator before central directory end",
-                ))?,
-        ))?;
-        let locator64 = spec::Zip64CentralDirectoryEndLocator::parse(reader)?;
-
-        // We need to reassess `archive_offset`. We know where the ZIP64
-        // central-directory-end structure *should* be, but unfortunately we
-        // don't know how to precisely relate that location to our current
-        // actual offset in the file, since there may be junk at its
-        // beginning. Therefore we need to perform another search, as in
-        // read::Zip32CentralDirectoryEnd::find_and_parse, except now we search
-        // forward. There may be multiple results because of Zip64 central-directory signatures in
-        // ZIP comment data.
-
-        let search_upper_bound = cde_start_pos
-            .checked_sub(
-                (size_of::<Zip64CentralDirectoryEnd>()
-                    + size_of::<spec::Zip64CentralDirectoryEndLocator>()) as u64,
-            )
-            .ok_or(InvalidArchive(
-                "File cannot contain ZIP64 central directory end",
-            ))?;
-
-        let (lower, upper) = Self::order_lower_upper_bounds(
-            locator64.end_of_central_directory_offset,
-            search_upper_bound,
-        );
-
-        let search_results = Zip64CentralDirectoryEnd::find_and_parse(reader, lower, upper)?;
-        let results: Vec<ZipResult<CentralDirectoryInfo>> =
-            search_results.into_iter().map(|(footer64, archive_offset)| {
-                let archive_offset = match config.archive_offset {
-                    ArchiveOffset::Known(n) => n,
-                    ArchiveOffset::FromCentralDirectory => archive_offset,
-                    ArchiveOffset::Detect => {
-                        archive_offset.checked_add(footer64.central_directory_offset)
-                            .and_then(|start| {
-                                // Check whether the archive offset makes sense by peeking at the directory start.
-                                //
-                                // If any errors occur or no header signature is found, fall back to no offset to see if that works.
-                                reader.seek(SeekFrom::Start(start)).ok()?;
-                                let mut buf = [0; 4];
-                                reader.read_exact(&mut buf).ok()?;
-                                if spec::Magic::from_le_bytes(buf) != spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
-                                    None
-                                } else {
-                                    Some(archive_offset)
-                                }
-                            })
-                        .unwrap_or(0)
-                    }
-                };
-                let directory_start = footer64
-                    .central_directory_offset
-                    .checked_add(archive_offset)
-                    .ok_or(InvalidArchive(
-                        "Invalid central directory size or offset",
-                    ))?;
-                if directory_start > search_upper_bound {
-                    Err(InvalidArchive(
-                        "Invalid central directory size or offset",
-                    ))
-                } else if footer64.number_of_files_on_this_disk > footer64.number_of_files {
-                    Err(InvalidArchive(
-                        "ZIP64 footer indicates more files on this disk than in the whole archive",
-                    ))
-                } else if footer64.version_needed_to_extract > footer64.version_made_by {
-                    Err(InvalidArchive(
-                        "ZIP64 footer indicates a new version is needed to extract this archive than the \
-                         version that wrote it",
-                    ))
-                } else {
-                    Ok(CentralDirectoryInfo {
-                        archive_offset,
-                        directory_start,
-                        number_of_files: footer64.number_of_files as usize,
-                        disk_number: footer64.disk_number,
-                        disk_with_central_directory: footer64.disk_with_central_directory,
-                        cde_position: cde_start_pos,
-                        is_zip64: true,
-                    })
-                }
-            }).collect();
-        Ok(results)
-    }
-
    /// Get the directory start offset and number of files. This is done in a
    /// separate function to ease the control flow design.
-    pub(crate) fn get_metadata(
-        config: Config,
-        reader: &mut R,
-    ) -> ZipResult<(Zip32CentralDirectoryEnd, Shared)> {
-        let mut invalid_errors_32 = Vec::new();
-        let mut unsupported_errors_32 = Vec::new();
-        let mut invalid_errors_64 = Vec::new();
-        let mut unsupported_errors_64 = Vec::new();
-        let mut ok_results = Vec::new();
-        let cde_locations = Zip32CentralDirectoryEnd::find_and_parse(reader)?;
-        cde_locations
-            .into_vec()
-            .into_iter()
-            .for_each(|(footer, cde_start_pos)| {
-                let zip32_result =
-                    Self::get_directory_info_zip32(&config, reader, &footer, cde_start_pos);
-                Self::sort_result(
-                    zip32_result,
-                    &mut invalid_errors_32,
-                    &mut unsupported_errors_32,
-                    &mut ok_results,
-                    &footer,
-                );
-                let mut inner_results = Vec::with_capacity(1);
-                // Check if file has a zip64 footer
-                let zip64_vec_result =
-                    Self::get_directory_info_zip64(&config, reader, cde_start_pos);
-                Self::sort_result(
-                    zip64_vec_result,
-                    &mut invalid_errors_64,
-                    &mut unsupported_errors_64,
-                    &mut inner_results,
-                    &(),
-                );
-                inner_results.into_iter().for_each(|(_, results)| {
-                    results.into_iter().for_each(|result| {
-                        Self::sort_result(
-                            result,
-                            &mut invalid_errors_64,
-                            &mut unsupported_errors_64,
-                            &mut ok_results,
-                            &footer,
-                        );
-                    });
-                });
-            });
-        ok_results.sort_by_key(|(_, result)| {
-            (
-                u64::MAX - result.cde_position, // try the last one first
-                !result.is_zip64,               // try ZIP64 first
-            )
-        });
-        let mut best_result = None;
-        for (footer, result) in ok_results {
-            let mut inner_result = Vec::with_capacity(1);
-            let is_zip64 = result.is_zip64;
-            Self::sort_result(
-                Self::read_central_header(result, config, reader),
-                if is_zip64 {
-                    &mut invalid_errors_64
-                } else {
-                    &mut invalid_errors_32
-                },
-                if is_zip64 {
-                    &mut unsupported_errors_64
-                } else {
-                    &mut unsupported_errors_32
-                },
-                &mut inner_result,
-                &(),
-            );
-            if let Some((_, shared)) = inner_result.into_iter().next() {
-                if shared.files.len() == footer.number_of_files as usize
-                    || (is_zip64 && footer.number_of_files == ZIP64_ENTRY_THR as u16)
-                {
-                    best_result = Some((footer, shared));
-                    break;
-                } else {
-                    if is_zip64 {
-                        &mut invalid_errors_64
-                    } else {
-                        &mut invalid_errors_32
-                    }
-                    .push(InvalidArchive("wrong number of files"))
-                }
-            }
+    pub(crate) fn get_metadata(config: Config, reader: &mut R) -> ZipResult<Shared> {
+        // End of the probed region, initially set to the end of the file
+        let file_len = reader.seek(io::SeekFrom::End(0))?;
+        let mut end_exclusive = file_len;
+
+        loop {
+            // Find the EOCD and possibly EOCD64 entries and determine the archive offset.
+            let cde = spec::find_central_directory(
+                reader,
+                config.archive_offset,
+                end_exclusive,
+                file_len,
+            )?;
+
+            // Turn EOCD into internal representation.
+            let Ok(shared) = CentralDirectoryInfo::try_from(&cde)
+                .and_then(|info| Self::read_central_header(info, config, reader))
+            else {
+                // The next EOCD candidate should start before the current one.
+                end_exclusive = cde.eocd.position;
+                continue;
+            };
+
+            return Ok(shared.build(
+                cde.eocd.data.zip_file_comment,
+                cde.eocd64.map(|v| v.data.extensible_data_sector),
+            ));
        }
-        let Some((footer, shared)) = best_result else {
-            return Err(unsupported_errors_32
-                .into_iter()
-                .chain(unsupported_errors_64)
-                .chain(invalid_errors_32)
-                .chain(invalid_errors_64)
-                .next()
-                .unwrap());
-        };
-        reader.seek(SeekFrom::Start(shared.dir_start))?;
-        Ok((Rc::try_unwrap(footer).unwrap(), shared.build()))
    }

    fn read_central_header(
@ -821,15 +639,22 @@ impl<R: Read + Seek> ZipArchive<R> {
        } else {
            dir_info.number_of_files
        };
+
        if dir_info.disk_number != dir_info.disk_with_central_directory {
            return unsupported_zip_error("Support for multi-disk files is not implemented");
        }
+
+        if file_capacity.saturating_mul(size_of::<ZipFileData>()) > isize::MAX as usize {
+            return unsupported_zip_error("Oversized central directory");
+        }
+
        let mut files = Vec::with_capacity(file_capacity);
        reader.seek(SeekFrom::Start(dir_info.directory_start))?;
        for _ in 0..dir_info.number_of_files {
-            let file = central_header_to_zip_file(reader, dir_info.archive_offset)?;
+            let file = central_header_to_zip_file(reader, &dir_info)?;
            files.push(file);
        }
+
        Ok(SharedBuilder {
            files,
            offset: dir_info.archive_offset,
@ -838,22 +663,6 @@ impl<R: Read + Seek> ZipArchive<R> {
        })
    }

-    fn sort_result<T, U: Clone>(
-        result: ZipResult<T>,
-        invalid_errors: &mut Vec<ZipError>,
-        unsupported_errors: &mut Vec<ZipError>,
-        ok_results: &mut Vec<(U, T)>,
-        footer: &U,
-    ) {
-        match result {
-            Err(ZipError::UnsupportedArchive(e)) => {
-                unsupported_errors.push(ZipError::UnsupportedArchive(e))
-            }
-            Err(e) => invalid_errors.push(e),
-            Ok(o) => ok_results.push((footer.clone(), o)),
-        }
-    }
-
    /// Returns the verification value and salt for the AES encryption of the file
    ///
    /// It fails if the file number is invalid.
@ -902,15 +711,12 @@ impl<R: Read + Seek> ZipArchive<R> {
    ///
    /// This uses the central directory record of the ZIP file, and ignores local file headers.
    pub fn with_config(config: Config, mut reader: R) -> ZipResult<ZipArchive<R>> {
-        reader.seek(SeekFrom::Start(0))?;
-        if let Ok((footer, shared)) = Self::get_metadata(config, &mut reader) {
-            return Ok(ZipArchive {
-                reader,
-                shared: shared.into(),
-                comment: footer.zip_file_comment.into(),
-            });
-        }
-        Err(InvalidArchive("No valid central directory found"))
+        let shared = Self::get_metadata(config, &mut reader)?;
+
+        Ok(ZipArchive {
+            reader,
+            shared: shared.into(),
+        })
    }

    /// Extract a Zip archive into a directory, overwriting files if they
@ -1050,7 +856,12 @@ impl<R: Read + Seek> ZipArchive<R> {

    /// Get the comment of the zip archive.
    pub fn comment(&self) -> &[u8] {
-        &self.comment
+        &self.shared.comment
+    }
+
+    /// Get the ZIP64 comment of the zip archive, if it is ZIP64.
+    pub fn zip64_comment(&self) -> Option<&[u8]> {
+        self.shared.zip64_comment.as_deref()
    }

    /// Returns an iterator over all the file and directory names in this archive.
@ -1235,21 +1046,36 @@ const fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T> {
 /// Parse a central directory entry to collect the information for the file.
 pub(crate) fn central_header_to_zip_file<R: Read + Seek>(
    reader: &mut R,
-    archive_offset: u64,
+    central_directory: &CentralDirectoryInfo,
 ) -> ZipResult<ZipFileData> {
    let central_header_start = reader.stream_position()?;

    // Parse central header
    let block = ZipCentralEntryBlock::parse(reader)?;
-    let file =
-        central_header_to_zip_file_inner(reader, archive_offset, central_header_start, block)?;
+
+    let file = central_header_to_zip_file_inner(
+        reader,
+        central_directory.archive_offset,
+        central_header_start,
+        block,
+    )?;
+
    let central_header_end = reader.stream_position()?;
-    let data_start = find_data_start(&file, reader)?;
-    if data_start > central_header_start {
+
+    if file.header_start >= central_directory.directory_start {
        return Err(InvalidArchive(
-            "A file can't start after its central-directory header",
+            "A local file entry can't start after the central directory",
        ));
    }
+
+    let data_start = find_data_start(&file, reader)?;
+
+    if data_start > central_directory.directory_start {
+        return Err(InvalidArchive(
+            "File data can't start after the central directory",
+        ));
+    }
+
    reader.seek(SeekFrom::Start(central_header_end))?;
    Ok(file)
 }
--- a/src/read/config.rs
+++ b/src/read/config.rs
@ -16,6 +16,7 @@ pub enum ArchiveOffset {
    #[default]
    Detect,
    /// Use the central directory length and offset to determine the start of the archive.
+    #[deprecated(since = "2.3.0", note = "use `Detect` instead")]
    FromCentralDirectory,
    /// Specify a fixed archive offset.
    Known(u64),
--- a/src/read/magic_finder.rs
+++ b/src/read/magic_finder.rs
@ -0,0 +1,279 @@
+use std::io::{Read, Seek, SeekFrom};
+
+use memchr::memmem::{Finder, FinderRev};
+
+use crate::result::ZipResult;
+
+pub trait FinderDirection<'a> {
+    fn new(needle: &'a [u8]) -> Self;
+    fn reset_cursor(bounds: (u64, u64), window_size: usize) -> u64;
+    fn scope_window(window: &[u8], mid_window_offset: usize) -> (&[u8], usize);
+
+    fn needle(&self) -> &[u8];
+    fn find(&self, haystack: &[u8]) -> Option<usize>;
+    fn move_cursor(&self, cursor: u64, bounds: (u64, u64), window_size: usize) -> Option<u64>;
+    fn move_scope(&self, offset: usize) -> usize;
+}
+
+pub struct Forward<'a>(Finder<'a>);
+impl<'a> FinderDirection<'a> for Forward<'a> {
+    fn new(needle: &'a [u8]) -> Self {
+        Self(Finder::new(needle))
+    }
+
+    fn reset_cursor((start_inclusive, _): (u64, u64), _: usize) -> u64 {
+        start_inclusive
+    }
+
+    fn scope_window(window: &[u8], mid_window_offset: usize) -> (&[u8], usize) {
+        (&window[mid_window_offset..], mid_window_offset)
+    }
+
+    fn find(&self, haystack: &[u8]) -> Option<usize> {
+        self.0.find(haystack)
+    }
+
+    fn needle(&self) -> &[u8] {
+        self.0.needle()
+    }
+
+    fn move_cursor(&self, cursor: u64, bounds: (u64, u64), window_size: usize) -> Option<u64> {
+        let magic_overlap = self.needle().len().saturating_sub(1) as u64;
+        let next = cursor.saturating_add(window_size as u64 - magic_overlap);
+
+        if next >= bounds.1 {
+            None
+        } else {
+            Some(next)
+        }
+    }
+
+    fn move_scope(&self, offset: usize) -> usize {
+        offset + self.needle().len()
+    }
+}
+
+pub struct Backwards<'a>(FinderRev<'a>);
+impl<'a> FinderDirection<'a> for Backwards<'a> {
+    fn new(needle: &'a [u8]) -> Self {
+        Self(FinderRev::new(needle))
+    }
+
+    fn reset_cursor(bounds: (u64, u64), window_size: usize) -> u64 {
+        bounds
+            .1
+            .saturating_sub(window_size as u64)
+            .clamp(bounds.0, bounds.1)
+    }
+
+    fn scope_window(window: &[u8], mid_window_offset: usize) -> (&[u8], usize) {
+        (&window[..mid_window_offset], 0)
+    }
+
+    fn find(&self, haystack: &[u8]) -> Option<usize> {
+        self.0.rfind(haystack)
+    }
+
+    fn needle(&self) -> &[u8] {
+        self.0.needle()
+    }
+
+    fn move_cursor(&self, cursor: u64, bounds: (u64, u64), window_size: usize) -> Option<u64> {
+        let magic_overlap = self.needle().len().saturating_sub(1) as u64;
+
+        if cursor <= bounds.0 {
+            None
+        } else {
+            Some(
+                cursor
+                    .saturating_add(magic_overlap)
+                    .saturating_sub(window_size as u64)
+                    .clamp(bounds.0, bounds.1),
+            )
+        }
+    }
+
+    fn move_scope(&self, offset: usize) -> usize {
+        offset
+    }
+}
+
+/// A utility for finding magic symbols from the end of a seekable reader.
+///
+/// Can be repurposed to recycle the internal buffer.
+pub struct MagicFinder<Direction> {
+    buffer: Box<[u8]>,
+    pub(self) finder: Direction,
+    cursor: u64,
+    mid_buffer_offset: Option<usize>,
+    bounds: (u64, u64),
+}
+
+impl<'a, T: FinderDirection<'a>> MagicFinder<T> {
+    /// Create a new magic bytes finder to look within specific bounds.
+    pub fn new(magic_bytes: &'a [u8], start_inclusive: u64, end_exclusive: u64) -> Self {
+        const BUFFER_SIZE: usize = 2048;
+
+        // Smaller buffer size would be unable to locate bytes.
+        // Equal buffer size would stall (the window could not be moved).
+        debug_assert!(BUFFER_SIZE >= magic_bytes.len());
+
+        Self {
+            buffer: vec![0; BUFFER_SIZE].into_boxed_slice(),
+            finder: T::new(magic_bytes),
+            cursor: T::reset_cursor((start_inclusive, end_exclusive), BUFFER_SIZE),
+            mid_buffer_offset: None,
+            bounds: (start_inclusive, end_exclusive),
+        }
+    }
+
+    /// Repurpose the finder for different bytes or bounds.
+    pub fn repurpose(&mut self, magic_bytes: &'a [u8], bounds: (u64, u64)) -> &mut Self {
+        debug_assert!(self.buffer.len() >= magic_bytes.len());
+
+        self.finder = T::new(magic_bytes);
+        self.cursor = T::reset_cursor(bounds, self.buffer.len());
+        self.bounds = bounds;
+
+        // Reset the mid-buffer offset, to invalidate buffer content.
+        self.mid_buffer_offset = None;
+
+        self
+    }
+
+    /// Find the next magic bytes in the direction specified in the type.
+    pub fn next<R: Read + Seek>(&mut self, reader: &mut R) -> ZipResult<Option<u64>> {
+        loop {
+            if self.cursor < self.bounds.0 || self.cursor >= self.bounds.1 {
+                // The finder is consumed
+                break;
+            }
+
+            /* Position the window and ensure correct length */
+            let window_start = self.cursor;
+            let window_end = self
+                .cursor
+                .saturating_add(self.buffer.len() as u64)
+                .min(self.bounds.1);
+
+            if window_end <= window_start {
+                // Short-circuit on zero-sized windows to prevent loop
+                break;
+            }
+
+            let window = &mut self.buffer[..(window_end - window_start) as usize];
+
+            if self.mid_buffer_offset.is_none() {
+                reader.seek(SeekFrom::Start(window_start))?;
+                reader.read_exact(window)?;
+            }
+
+            let (window, window_start_offset) = match self.mid_buffer_offset {
+                Some(mid_buffer_offset) => T::scope_window(window, mid_buffer_offset),
+                None => (&*window, 0usize),
+            };
+
+            if let Some(offset) = self.finder.find(window) {
+                let magic_pos = window_start + window_start_offset as u64 + offset as u64;
+                reader.seek(SeekFrom::Start(magic_pos))?;
+
+                self.mid_buffer_offset = Some(self.finder.move_scope(window_start_offset + offset));
+
+                return Ok(Some(magic_pos));
+            }
+
+            self.mid_buffer_offset = None;
+
+            match self
+                .finder
+                .move_cursor(self.cursor, self.bounds, self.buffer.len())
+            {
+                Some(new_cursor) => {
+                    self.cursor = new_cursor;
+                }
+                None => {
+                    // Destroy the finder when we've reached the end of the bounds.
+                    self.bounds.0 = self.bounds.1;
+                    break;
+                }
+            }
+        }
+
+        Ok(None)
+    }
+}
+
+/// A magic bytes finder with an optimistic guess that is tried before
+/// the inner finder begins searching from end. This enables much faster
+/// lookup in files without appended junk, because the magic bytes will be
+/// found directly.
+///
+/// The guess can be marked as mandatory to produce an error. This is useful
+/// if the ArchiveOffset is known and auto-detection is not desired.
+pub struct OptimisticMagicFinder<Direction> {
+    inner: MagicFinder<Direction>,
+    initial_guess: Option<(u64, bool)>,
+}
+
+/// This is a temporary restriction, to avoid heap allocation in [`Self::next_back`].
+///
+/// We only use magic bytes of size 4 at the moment.
+const STACK_BUFFER_SIZE: usize = 8;
+
+impl<'a, Direction: FinderDirection<'a>> OptimisticMagicFinder<Direction> {
+    /// Create a new empty optimistic magic bytes finder.
+    pub fn new_empty() -> Self {
+        Self {
+            inner: MagicFinder::new(&[], 0, 0),
+            initial_guess: None,
+        }
+    }
+
+    /// Repurpose the finder for different bytes, bounds and initial guesses.
+    pub fn repurpose(
+        &mut self,
+        magic_bytes: &'a [u8],
+        bounds: (u64, u64),
+        initial_guess: Option<(u64, bool)>,
+    ) -> &mut Self {
+        debug_assert!(magic_bytes.len() <= STACK_BUFFER_SIZE);
+
+        self.inner.repurpose(magic_bytes, bounds);
+        self.initial_guess = initial_guess;
+
+        self
+    }
+
+    /// Equivalent to `next_back`, with an optional initial guess attempted before
+    /// proceeding with reading from the back of the reader.
+    pub fn next<R: Read + Seek>(&mut self, reader: &mut R) -> ZipResult<Option<u64>> {
+        if let Some((v, mandatory)) = self.initial_guess {
+            reader.seek(SeekFrom::Start(v))?;
+
+            let mut buffer = [0; STACK_BUFFER_SIZE];
+            let buffer = &mut buffer[..self.inner.finder.needle().len()];
+
+            // Attempt to match only if there's enough space for the needle
+            if v.saturating_add(buffer.len() as u64) <= self.inner.bounds.1 {
+                reader.read_exact(buffer)?;
+
+                // If a match is found, yield it.
+                if self.inner.finder.needle() == buffer {
+                    self.initial_guess.take();
+                    reader.seek(SeekFrom::Start(v))?;
+                    return Ok(Some(v));
+                }
+            }
+
+            // If a match is not found, but the initial guess was mandatory, return an error.
+            if mandatory {
+                return Ok(None);
+            }
+
+            // If the initial guess was not mandatory, remove it, as it was not found.
+            self.initial_guess.take();
+        }
+
+        self.inner.next(reader)
+    }
+}
--- a/src/spec.rs
+++ b/src/spec.rs
@ -1,11 +1,11 @@
 #![macro_use]

+use crate::read::magic_finder::{Backwards, Forward, MagicFinder, OptimisticMagicFinder};
+use crate::read::ArchiveOffset;
 use crate::result::{ZipError, ZipResult};
 use core::mem;
-use memchr::memmem::FinderRev;
 use std::io;
 use std::io::prelude::*;
-use std::rc::Rc;
 use std::slice;

 /// "Magic" header values used in the zip spec to locate metadata records.
@ -22,6 +22,7 @@ impl Magic {
    }

    #[inline(always)]
+    #[allow(dead_code)]
    pub const fn from_le_bytes(bytes: [u8; 4]) -> Self {
        Self(u32::from_le_bytes(bytes))
    }
@ -289,7 +290,7 @@ pub(crate) struct Zip32CentralDirectoryEnd {
 }

 impl Zip32CentralDirectoryEnd {
-    fn block_and_comment(self) -> ZipResult<(Zip32CDEBlock, Box<[u8]>)> {
+    fn into_block_and_comment(self) -> (Zip32CDEBlock, Box<[u8]>) {
        let Self {
            disk_number,
            disk_with_central_directory,
@ -307,12 +308,10 @@ impl Zip32CentralDirectoryEnd {
            number_of_files,
            central_directory_size,
            central_directory_offset,
-            zip_file_comment_length: zip_file_comment
-                .len()
-                .try_into()
-                .map_err(|_| ZipError::InvalidArchive("File comment must be less than 64 KiB"))?,
+            zip_file_comment_length: zip_file_comment.len() as u16,
        };
-        Ok((block, zip_file_comment))
+
+        (block, zip_file_comment)
    }

    pub fn parse<T: Read>(reader: &mut T) -> ZipResult<Zip32CentralDirectoryEnd> {
@ -329,7 +328,15 @@ impl Zip32CentralDirectoryEnd {
        } = Zip32CDEBlock::parse(reader)?;

        let mut zip_file_comment = vec![0u8; zip_file_comment_length as usize].into_boxed_slice();
-        reader.read_exact(&mut zip_file_comment)?;
+        if let Err(e) = reader.read_exact(&mut zip_file_comment) {
+            if e.kind() == io::ErrorKind::UnexpectedEof {
+                return Err(ZipError::InvalidArchive(
+                    "EOCD comment exceeds file boundary",
+                ));
+            }
+
+            return Err(e.into());
+        }

        Ok(Zip32CentralDirectoryEnd {
            disk_number,
@ -342,99 +349,23 @@ impl Zip32CentralDirectoryEnd {
        })
    }

-    #[allow(clippy::type_complexity)]
-    pub fn find_and_parse<T: Read + Seek>(
-        reader: &mut T,
-    ) -> ZipResult<Box<[(Rc<Zip32CentralDirectoryEnd>, u64)]>> {
-        let mut results = vec![];
-        let file_length = reader.seek(io::SeekFrom::End(0))?;
-
-        if file_length < mem::size_of::<Zip32CDEBlock>() as u64 {
-            return Err(ZipError::InvalidArchive("Invalid zip header"));
-        }
-
-        // The End Of Central Directory Record should be the last thing in
-        // the file and so searching the last 65557 bytes of the file should
-        // be enough. However, not all zips are well-formed and other
-        // programs may consume zips with extra junk at the end without
-        // error, so we go back 128K to be compatible with them. 128K is
-        // arbitrary, but it matches what Info-Zip does.
-        const EOCDR_SEARCH_SIZE: u64 = 128 * 1024;
-        let search_lower_bound = file_length.saturating_sub(EOCDR_SEARCH_SIZE);
-
-        const END_WINDOW_SIZE: usize = 8192;
-        /* TODO: use static_assertions!() */
-        debug_assert!(END_WINDOW_SIZE > mem::size_of::<Magic>());
-
-        const SIG_BYTES: [u8; mem::size_of::<Magic>()] =
-            Magic::CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes();
-        let finder = FinderRev::new(&SIG_BYTES);
-
-        let mut window_start: u64 = file_length.saturating_sub(END_WINDOW_SIZE as u64);
-        let mut window = [0u8; END_WINDOW_SIZE];
-        while window_start >= search_lower_bound {
-            /* Go to the start of the window in the file. */
-            reader.seek(io::SeekFrom::Start(window_start))?;
-
-            /* Identify how many bytes to read (this may be less than the window size for files
-             * smaller than END_WINDOW_SIZE). */
-            let end = (window_start + END_WINDOW_SIZE as u64).min(file_length);
-            let cur_len = (end - window_start) as usize;
-            debug_assert!(cur_len > 0);
-            debug_assert!(cur_len <= END_WINDOW_SIZE);
-            let cur_window: &mut [u8] = &mut window[..cur_len];
-            /* Read the window into the bytes! */
-            reader.read_exact(cur_window)?;
-
-            /* Find instances of the magic signature. */
-            for offset in finder.rfind_iter(cur_window) {
-                let cde_start_pos = window_start + offset as u64;
-                reader.seek(io::SeekFrom::Start(cde_start_pos))?;
-                /* Drop any headers that don't parse. */
-                if let Ok(cde) = Self::parse(reader) {
-                    results.push((Rc::new(cde), cde_start_pos));
-                }
-            }
-
-            /* We always want to make sure we go allllll the way back to the start of the file if
-             * we can't find it elsewhere. However, our `while` condition doesn't check that. So we
-             * avoid infinite looping by checking at the end of the loop. */
-            if window_start == search_lower_bound {
-                break;
-            }
-            /* Shift the window by END_WINDOW_SIZE bytes, but make sure to cover matches that
-             * overlap our nice neat window boundaries! */
-            window_start = (window_start
-                /* NB: To catch matches across window boundaries, we need to make our blocks overlap
-                 * by the width of the pattern to match. */
-                + mem::size_of::<Magic>() as u64)
-                /* This should never happen, but make sure we don't go past the end of the file. */
-                .min(file_length);
-            window_start = window_start
-                .saturating_sub(
-                    /* Shift the window upon each iteration so we search END_WINDOW_SIZE bytes at
-                     * once (unless limited by file_length). */
-                    END_WINDOW_SIZE as u64,
-                )
-                /* This will never go below the value of `search_lower_bound`, so we have a special
-                 * `if window_start == search_lower_bound` check above. */
-                .max(search_lower_bound);
-        }
-        if results.is_empty() {
-            Err(ZipError::InvalidArchive(
-                "Could not find central directory end",
-            ))
-        } else {
-            Ok(results.into_boxed_slice())
-        }
-    }
-
    pub fn write<T: Write>(self, writer: &mut T) -> ZipResult<()> {
-        let (block, comment) = self.block_and_comment()?;
+        let (block, comment) = self.into_block_and_comment();
+
+        if comment.len() > u16::MAX as usize {
+            return Err(ZipError::InvalidArchive(
+                "EOCD comment length exceeds u16::MAX",
+            ));
+        }
+
        block.write(writer)?;
        writer.write_all(&comment)?;
        Ok(())
    }
+
+    pub fn may_be_zip64(&self) -> bool {
+        self.number_of_files == u16::MAX || self.central_directory_offset == u32::MAX
+    }
 }

 #[derive(Copy, Clone)]
@ -551,6 +482,7 @@ impl FixedSizeBlock for Zip64CDEBlock {
 }

 pub(crate) struct Zip64CentralDirectoryEnd {
+    pub record_size: u64,
    pub version_made_by: u16,
    pub version_needed_to_extract: u16,
    pub disk_number: u32,
@ -559,13 +491,13 @@ pub(crate) struct Zip64CentralDirectoryEnd {
    pub number_of_files: u64,
    pub central_directory_size: u64,
    pub central_directory_offset: u64,
-    //pub extensible_data_sector: Vec<u8>, <-- We don't do anything with this at the moment.
+    pub extensible_data_sector: Box<[u8]>,
 }

 impl Zip64CentralDirectoryEnd {
-    pub fn parse<T: Read>(reader: &mut T) -> ZipResult<Zip64CentralDirectoryEnd> {
+    pub fn parse<T: Read>(reader: &mut T, max_size: u64) -> ZipResult<Zip64CentralDirectoryEnd> {
        let Zip64CDEBlock {
-            // record_size,
+            record_size,
            version_made_by,
            version_needed_to_extract,
            disk_number,
@ -576,7 +508,20 @@ impl Zip64CentralDirectoryEnd {
            central_directory_offset,
            ..
        } = Zip64CDEBlock::parse(reader)?;
+
+        if record_size < 44 {
+            return Err(ZipError::InvalidArchive("Low EOCD64 record size"));
+        } else if record_size.saturating_add(12) > max_size {
+            return Err(ZipError::InvalidArchive(
+                "EOCD64 extends beyond EOCD64 locator",
+            ));
+        }
+
+        let mut zip_file_comment = vec![0u8; record_size as usize - 44].into_boxed_slice();
+        reader.read_exact(&mut zip_file_comment)?;
+
        Ok(Self {
+            record_size,
            version_made_by,
            version_needed_to_extract,
            disk_number,
@ -585,94 +530,13 @@ impl Zip64CentralDirectoryEnd {
            number_of_files,
            central_directory_size,
            central_directory_offset,
+            extensible_data_sector: zip_file_comment,
        })
    }

-    pub fn find_and_parse<T: Read + Seek>(
-        reader: &mut T,
-        search_lower_bound: u64,
-        search_upper_bound: u64,
-    ) -> ZipResult<Vec<(Zip64CentralDirectoryEnd, u64)>> {
-        let mut results = Vec::new();
-
-        const END_WINDOW_SIZE: usize = 2048;
-        /* TODO: use static_assertions!() */
-        debug_assert!(END_WINDOW_SIZE > mem::size_of::<Magic>());
-
-        const SIG_BYTES: [u8; mem::size_of::<Magic>()] =
-            Magic::ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes();
-        let finder = FinderRev::new(&SIG_BYTES);
-
-        let mut window_start: u64 = search_upper_bound
-            .saturating_sub(END_WINDOW_SIZE as u64)
-            .max(search_lower_bound);
-        let mut window = [0u8; END_WINDOW_SIZE];
-        while window_start >= search_lower_bound {
-            reader.seek(io::SeekFrom::Start(window_start))?;
-
-            /* Identify how many bytes to read (this may be less than the window size for files
-             * smaller than END_WINDOW_SIZE). */
-            let end = (window_start + END_WINDOW_SIZE as u64).min(search_upper_bound);
-
-            debug_assert!(end >= window_start);
-            let cur_len = (end - window_start) as usize;
-            if cur_len == 0 {
-                break;
-            }
-            debug_assert!(cur_len <= END_WINDOW_SIZE);
-            let cur_window: &mut [u8] = &mut window[..cur_len];
-            /* Read the window into the bytes! */
-            reader.read_exact(cur_window)?;
-
-            /* Find instances of the magic signature. */
-            for offset in finder.rfind_iter(cur_window) {
-                let cde_start_pos = window_start + offset as u64;
-                reader.seek(io::SeekFrom::Start(cde_start_pos))?;
-
-                debug_assert!(cde_start_pos >= search_lower_bound);
-                let archive_offset = cde_start_pos - search_lower_bound;
-                let cde = Self::parse(reader)?;
-
-                results.push((cde, archive_offset));
-            }
-
-            /* We always want to make sure we go allllll the way back to the start of the file if
-             * we can't find it elsewhere. However, our `while` condition doesn't check that. So we
-             * avoid infinite looping by checking at the end of the loop. */
-            if window_start == search_lower_bound {
-                break;
-            }
-            /* Shift the window by END_WINDOW_SIZE bytes, but make sure to cover matches that
-             * overlap our nice neat window boundaries! */
-            window_start = (window_start
-                /* NB: To catch matches across window boundaries, we need to make our blocks overlap
-                 * by the width of the pattern to match. */
-                + mem::size_of::<Magic>() as u64)
-                /* This may never happen, but make sure we don't go past the end of the specified
-                 * range. */
-                .min(search_upper_bound);
-            window_start = window_start
-                .saturating_sub(
-                    /* Shift the window upon each iteration so we search END_WINDOW_SIZE bytes at
-                     * once (unless limited by search_upper_bound). */
-                    END_WINDOW_SIZE as u64,
-                )
-                /* This will never go below the value of `search_lower_bound`, so we have a special
-                 * `if window_start == search_lower_bound` check above. */
-                .max(search_lower_bound);
-        }
-
-        if results.is_empty() {
-            Err(ZipError::InvalidArchive(
-                "Could not find ZIP64 central directory end",
-            ))
-        } else {
-            Ok(results)
-        }
-    }
-
-    pub fn block(self) -> Zip64CDEBlock {
+    pub fn into_block_and_comment(self) -> (Zip64CDEBlock, Box<[u8]>) {
        let Self {
+            record_size,
            version_made_by,
            version_needed_to_extract,
            disk_number,
@ -681,27 +545,277 @@ impl Zip64CentralDirectoryEnd {
            number_of_files,
            central_directory_size,
            central_directory_offset,
+            extensible_data_sector,
        } = self;
-        Zip64CDEBlock {
-            magic: Zip64CDEBlock::MAGIC,
-            /* currently unused */
-            record_size: 44,
-            version_made_by,
-            version_needed_to_extract,
-            disk_number,
-            disk_with_central_directory,
-            number_of_files_on_this_disk,
-            number_of_files,
-            central_directory_size,
-            central_directory_offset,
-        }
+
+        (
+            Zip64CDEBlock {
+                magic: Zip64CDEBlock::MAGIC,
+                record_size,
+                version_made_by,
+                version_needed_to_extract,
+                disk_number,
+                disk_with_central_directory,
+                number_of_files_on_this_disk,
+                number_of_files,
+                central_directory_size,
+                central_directory_offset,
+            },
+            extensible_data_sector,
+        )
    }

    pub fn write<T: Write>(self, writer: &mut T) -> ZipResult<()> {
-        self.block().write(writer)
+        let (block, comment) = self.into_block_and_comment();
+        block.write(writer)?;
+        writer.write_all(&comment)?;
+        Ok(())
    }
 }

+pub(crate) struct DataAndPosition<T> {
+    pub data: T,
+    #[allow(dead_code)]
+    pub position: u64,
+}
+
+impl<T> From<(T, u64)> for DataAndPosition<T> {
+    fn from(value: (T, u64)) -> Self {
+        Self {
+            data: value.0,
+            position: value.1,
+        }
+    }
+}
+
+pub(crate) struct CentralDirectoryEndInfo {
+    pub eocd: DataAndPosition<Zip32CentralDirectoryEnd>,
+    pub eocd64: Option<DataAndPosition<Zip64CentralDirectoryEnd>>,
+
+    pub archive_offset: u64,
+}
+
+/// Finds the EOCD and possibly the EOCD64 block and determines the archive offset.
+///
+/// In the best case scenario (no prepended junk), this function will not backtrack
+/// in the reader.
+pub(crate) fn find_central_directory<R: Read + Seek>(
+    reader: &mut R,
+    archive_offset: ArchiveOffset,
+    end_exclusive: u64,
+    file_len: u64,
+) -> ZipResult<CentralDirectoryEndInfo> {
+    const EOCD_SIG_BYTES: [u8; mem::size_of::<Magic>()] =
+        Magic::CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes();
+
+    const EOCD64_SIG_BYTES: [u8; mem::size_of::<Magic>()] =
+        Magic::ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes();
+
+    const CDFH_SIG_BYTES: [u8; mem::size_of::<Magic>()] =
+        Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE.to_le_bytes();
+
+    // Instantiate the mandatory finder
+    let mut eocd_finder = MagicFinder::<Backwards<'static>>::new(&EOCD_SIG_BYTES, 0, end_exclusive);
+    let mut subfinder: Option<OptimisticMagicFinder<Forward<'static>>> = None;
+
+    // Keep the last errors for cases of improper EOCD instances.
+    let mut parsing_error = None;
+
+    while let Some(eocd_offset) = eocd_finder.next(reader)? {
+        // Attempt to parse the EOCD block
+        let eocd = match Zip32CentralDirectoryEnd::parse(reader) {
+            Ok(eocd) => eocd,
+            Err(e) => {
+                if parsing_error.is_none() {
+                    parsing_error = Some(e);
+                }
+                continue;
+            }
+        };
+
+        // ! Relaxed (inequality) due to garbage-after-comment Python files
+        // Consistency check: the EOCD comment must terminate before the end of file
+        if eocd.zip_file_comment.len() as u64 + eocd_offset + 22 > file_len {
+            parsing_error = Some(ZipError::InvalidArchive("Invalid EOCD comment length"));
+            continue;
+        }
+
+        let zip64_metadata = if eocd.may_be_zip64() {
+            fn try_read_eocd64_locator(
+                reader: &mut (impl Read + Seek),
+                eocd_offset: u64,
+            ) -> ZipResult<(u64, Zip64CentralDirectoryEndLocator)> {
+                if eocd_offset < mem::size_of::<Zip64CDELocatorBlock>() as u64 {
+                    return Err(ZipError::InvalidArchive(
+                        "EOCD64 Locator does not fit in file",
+                    ));
+                }
+
+                let locator64_offset = eocd_offset - mem::size_of::<Zip64CDELocatorBlock>() as u64;
+
+                reader.seek(io::SeekFrom::Start(locator64_offset))?;
+                Ok((
+                    locator64_offset,
+                    Zip64CentralDirectoryEndLocator::parse(reader)?,
+                ))
+            }
+
+            try_read_eocd64_locator(reader, eocd_offset).ok()
+        } else {
+            None
+        };
+
+        let Some((locator64_offset, locator64)) = zip64_metadata else {
+            // Branch out for zip32
+            let relative_cd_offset = eocd.central_directory_offset as u64;
+
+            // If the archive is empty, there is nothing more to be checked, the archive is correct.
+            if eocd.number_of_files == 0 {
+                return Ok(CentralDirectoryEndInfo {
+                    eocd: (eocd, eocd_offset).into(),
+                    eocd64: None,
+                    archive_offset: eocd_offset.saturating_sub(relative_cd_offset),
+                });
+            }
+
+            // Consistency check: the CD relative offset cannot be after the EOCD
+            if relative_cd_offset >= eocd_offset {
+                parsing_error = Some(ZipError::InvalidArchive("Invalid CDFH offset in EOCD"));
+                continue;
+            }
+
+            // Attempt to find the first CDFH
+            let subfinder = subfinder
+                .get_or_insert_with(OptimisticMagicFinder::new_empty)
+                .repurpose(
+                    &CDFH_SIG_BYTES,
+                    // The CDFH must be before the EOCD and after the relative offset,
+                    // because prepended junk can only move it forward.
+                    (relative_cd_offset, eocd_offset),
+                    match archive_offset {
+                        ArchiveOffset::Known(n) => {
+                            Some((relative_cd_offset.saturating_add(n).min(eocd_offset), true))
+                        }
+                        _ => Some((relative_cd_offset, false)),
+                    },
+                );
+
+            // Consistency check: find the first CDFH
+            if let Some(cd_offset) = subfinder.next(reader)? {
+                // The first CDFH will define the archive offset
+                let archive_offset = cd_offset - relative_cd_offset;
+
+                return Ok(CentralDirectoryEndInfo {
+                    eocd: (eocd, eocd_offset).into(),
+                    eocd64: None,
+                    archive_offset,
+                });
+            }
+
+            parsing_error = Some(ZipError::InvalidArchive("No CDFH found"));
+            continue;
+        };
+
+        // Consistency check: the EOCD64 offset must be before EOCD64 Locator offset */
+        if locator64.end_of_central_directory_offset >= locator64_offset {
+            parsing_error = Some(ZipError::InvalidArchive("Invalid EOCD64 Locator CD offset"));
+            continue;
+        }
+
+        if locator64.number_of_disks > 1 {
+            parsing_error = Some(ZipError::InvalidArchive(
+                "Multi-disk ZIP files are not supported",
+            ));
+            continue;
+        }
+
+        // This was hidden inside a function to collect errors in a single place.
+        // Once try blocks are stabilized, this can go away.
+        fn try_read_eocd64<R: Read + Seek>(
+            reader: &mut R,
+            locator64: &Zip64CentralDirectoryEndLocator,
+            expected_length: u64,
+        ) -> ZipResult<Zip64CentralDirectoryEnd> {
+            let z64 = Zip64CentralDirectoryEnd::parse(reader, expected_length)?;
+
+            // Consistency check: EOCD64 locator should agree with the EOCD64
+            if z64.disk_with_central_directory != locator64.disk_with_central_directory {
+                return Err(ZipError::InvalidArchive(
+                    "Invalid EOCD64: inconsistency with Locator data",
+                ));
+            }
+
+            // Consistency check: the EOCD64 must have the expected length
+            if z64.record_size + 12 != expected_length {
+                return Err(ZipError::InvalidArchive(
+                    "Invalid EOCD64: inconsistent length",
+                ));
+            }
+
+            Ok(z64)
+        }
+
+        // Attempt to find the EOCD64 with an initial guess
+        let subfinder = subfinder
+            .get_or_insert_with(OptimisticMagicFinder::new_empty)
+            .repurpose(
+                &EOCD64_SIG_BYTES,
+                (locator64.end_of_central_directory_offset, locator64_offset),
+                match archive_offset {
+                    ArchiveOffset::Known(n) => Some((
+                        locator64
+                            .end_of_central_directory_offset
+                            .saturating_add(n)
+                            .min(locator64_offset),
+                        true,
+                    )),
+                    _ => Some((locator64.end_of_central_directory_offset, false)),
+                },
+            );
+
+        // Consistency check: Find the EOCD64
+        let mut local_error = None;
+        while let Some(eocd64_offset) = subfinder.next(reader)? {
+            let archive_offset = eocd64_offset - locator64.end_of_central_directory_offset;
+
+            match try_read_eocd64(
+                reader,
+                &locator64,
+                locator64_offset.saturating_sub(eocd64_offset),
+            ) {
+                Ok(eocd64) => {
+                    if eocd64_offset
+                        < eocd64
+                            .number_of_files
+                            .saturating_mul(
+                                mem::size_of::<crate::types::ZipCentralEntryBlock>() as u64
+                            )
+                            .saturating_add(eocd64.central_directory_offset)
+                    {
+                        local_error = Some(ZipError::InvalidArchive(
+                            "Invalid EOCD64: inconsistent number of files",
+                        ));
+                        continue;
+                    }
+
+                    return Ok(CentralDirectoryEndInfo {
+                        eocd: (eocd, eocd_offset).into(),
+                        eocd64: Some((eocd64, eocd64_offset).into()),
+                        archive_offset,
+                    });
+                }
+                Err(e) => {
+                    local_error = Some(e);
+                }
+            }
+        }
+
+        parsing_error = local_error.or(Some(ZipError::InvalidArchive("Could not find EOCD64")));
+    }
+
+    Err(parsing_error.unwrap_or(ZipError::InvalidArchive("Could not find EOCD")))
+}
+
 pub(crate) fn is_dir(filename: &str) -> bool {
    filename
        .chars()
--- a/src/write.rs
+++ b/src/write.rs
@ -160,6 +160,7 @@ pub(crate) mod zip_writer {
        pub(super) writing_to_file: bool,
        pub(super) writing_raw: bool,
        pub(super) comment: Box<[u8]>,
+        pub(super) zip64_comment: Option<Box<[u8]>>,
        pub(super) flush_on_finish_file: bool,
    }

@ -628,19 +629,19 @@ impl<A: Read + Write + Seek> ZipWriter<A> {
    /// This uses the given read configuration to initially read the archive.
    pub fn new_append_with_config(config: Config, mut readwriter: A) -> ZipResult<ZipWriter<A>> {
        readwriter.seek(SeekFrom::Start(0))?;
-        if let Ok((footer, shared)) = ZipArchive::get_metadata(config, &mut readwriter) {
-            Ok(ZipWriter {
-                inner: Storer(MaybeEncrypted::Unencrypted(readwriter)),
-                files: shared.files,
-                stats: Default::default(),
-                writing_to_file: false,
-                comment: footer.zip_file_comment,
-                writing_raw: true, // avoid recomputing the last file's header
-                flush_on_finish_file: false,
-            })
-        } else {
-            Err(InvalidArchive("No central-directory end header found"))
-        }
+
+        let shared = ZipArchive::get_metadata(config, &mut readwriter)?;
+
+        Ok(ZipWriter {
+            inner: Storer(MaybeEncrypted::Unencrypted(readwriter)),
+            files: shared.files,
+            stats: Default::default(),
+            writing_to_file: false,
+            comment: shared.comment,
+            zip64_comment: shared.zip64_comment,
+            writing_raw: true, // avoid recomputing the last file's header
+            flush_on_finish_file: false,
+        })
    }

    /// `flush_on_finish_file` is designed to support a streaming `inner` that may unload flushed
@ -774,8 +775,11 @@ impl<A: Read + Write + Seek> ZipWriter<A> {
        let central_start = self.finalize()?;
        let inner = mem::replace(&mut self.inner, Closed).unwrap();
        let comment = mem::take(&mut self.comment);
+        let zip64_comment = mem::take(&mut self.zip64_comment);
        let files = mem::take(&mut self.files);
-        let archive = ZipArchive::from_finalized_writer(files, comment, inner, central_start)?;
+
+        let archive =
+            ZipArchive::from_finalized_writer(files, comment, zip64_comment, inner, central_start)?;
        Ok(archive)
    }
 }
@ -794,6 +798,7 @@ impl<W: Write + Seek> ZipWriter<W> {
            writing_to_file: false,
            writing_raw: false,
            comment: Box::new([]),
+            zip64_comment: None,
            flush_on_finish_file: false,
        }
    }
@ -832,6 +837,35 @@ impl<W: Write + Seek> ZipWriter<W> {
        &self.comment
    }

+    /// Set ZIP64 archive comment.
+    pub fn set_zip64_comment<S>(&mut self, comment: Option<S>)
+    where
+        S: Into<Box<str>>,
+    {
+        self.set_raw_zip64_comment(comment.map(|v| v.into().into_boxed_bytes()))
+    }
+
+    /// Set ZIP64 archive comment.
+    ///
+    /// This sets the raw bytes of the comment. The comment
+    /// is typically expected to be encoded in UTF-8.
+    pub fn set_raw_zip64_comment(&mut self, comment: Option<Box<[u8]>>) {
+        self.zip64_comment = comment;
+    }
+
+    /// Get ZIP64 archive comment.
+    pub fn get_zip64_comment(&mut self) -> Option<Result<&str, Utf8Error>> {
+        self.get_raw_zip64_comment().map(from_utf8)
+    }
+
+    /// Get ZIP archive comment.
+    ///
+    /// This returns the raw bytes of the comment. The comment
+    /// is typically expected to be encoded in UTF-8.
+    pub fn get_raw_zip64_comment(&self) -> Option<&[u8]> {
+        self.zip64_comment.as_deref()
+    }
+
    /// Set the file length and crc32 manually.
    ///
    /// # Safety
@ -1516,11 +1550,15 @@ impl<W: Write + Seek> ZipWriter<W> {
            version_needed = version_needed.max(file.version_needed());
        }
        let central_size = writer.stream_position()? - central_start;
-
-        if self.files.len() > spec::ZIP64_ENTRY_THR
+        let is64 = self.files.len() > spec::ZIP64_ENTRY_THR
            || central_size.max(central_start) > spec::ZIP64_BYTES_THR
-        {
+            || self.zip64_comment.is_some();
+
+        if is64 {
+            let comment = self.zip64_comment.clone().unwrap_or_default();
+
            let zip64_footer = spec::Zip64CentralDirectoryEnd {
+                record_size: comment.len() as u64 + 44,
                version_made_by: version_needed,
                version_needed_to_extract: version_needed,
                disk_number: 0,
@ -1529,6 +1567,7 @@ impl<W: Write + Seek> ZipWriter<W> {
                number_of_files: self.files.len() as u64,
                central_directory_size: central_size,
                central_directory_offset: central_start,
+                extensible_data_sector: comment,
            };

            zip64_footer.write(writer)?;
--- a/tests/prepended_garbage.rs
+++ b/tests/prepended_garbage.rs
@ -0,0 +1,24 @@
+use std::io::Cursor;
+use zip::ZipArchive;
+
+#[test]
+fn test_prepended_garbage() {
+    let mut v = vec![0, 1, 2, 3];
+    v.extend_from_slice(include_bytes!("../tests/data/extended_timestamp.zip"));
+
+    let mut archive = ZipArchive::new(Cursor::new(v)).expect("couldn't open test zip file");
+
+    assert_eq!(2, archive.len());
+
+    for file_idx in 0..archive.len() {
+        let file = archive.by_index(file_idx).unwrap();
+        let outpath = file.enclosed_name().unwrap();
+
+        println!(
+            "Entry {} has name \"{}\" ({} bytes)",
+            file_idx,
+            outpath.display(),
+            file.size()
+        );
+    }
+}