fix: correctly parse EoCD for misaligned comment sizes

Also adds a test case for the same.
2025-05-04 11:23:48 +01:00 · 2025-01-07 13:29:39 +00:00 · 2025-01-07 13:29:39 +00:00 · 98c23ece3e
commit 98c23ece3e
parent 0f5a6d035c
3 changed files with 392 additions and 358 deletions
--- a/lib/init.luau
+++ b/lib/init.luau
@ -3,59 +3,59 @@ local crc32 = require("./crc")
 -- Little endian constant signatures used in the ZIP file format
 local SIGNATURES = table.freeze({
-	-- Marks the beginning of each file in the ZIP
+    -- Marks the beginning of each file in the ZIP
-	LOCAL_FILE = 0x04034b50,
+    LOCAL_FILE = 0x04034b50,
-	-- Marks the start of an data descriptor
+    -- Marks the start of an data descriptor
-	DATA_DESCRIPTOR = 0x08074b50,
+    DATA_DESCRIPTOR = 0x08074b50,
-	-- Marks entries in the central directory
+    -- Marks entries in the central directory
-	CENTRAL_DIR = 0x02014b50,
+    CENTRAL_DIR = 0x02014b50,
-	-- Marks the end of the central directory
+    -- Marks the end of the central directory
-	END_OF_CENTRAL_DIR = 0x06054b50,
+    END_OF_CENTRAL_DIR = 0x06054b50,
 })
 type CrcValidationOptions = {
-	skip: boolean,
+    skip: boolean,
-	expected: number,
+    expected: number,
 }
 local function validateCrc(decompressed: buffer, validation: CrcValidationOptions)
-	-- Unless skipping validation is requested, we verify the checksum
+    -- Unless skipping validation is requested, we verify the checksum
-	if not validation.skip then
+    if not validation.skip then
-		local computed = crc32(decompressed)
+        local computed = crc32(decompressed)
-		assert(
+        assert(
-			validation.expected == computed,
+            validation.expected == computed,
-			`Validation failed; CRC checksum does not match: {string.format("%x", computed)} ~= {string.format(
+            `Validation failed; CRC checksum does not match: {string.format("%x", computed)} ~= {string.format(
-				"%x",
+                "%x",
-				computed
+                computed
-			)} (expected ~= got)`
+            )} (expected ~= got)`
-		)
+        )
-	end
+    end
 end
 export type CompressionMethod = "STORE" | "DEFLATE"
 local DECOMPRESSION_ROUTINES: { [number]: { name: CompressionMethod, decompress: (buffer, number, CrcValidationOptions) -> buffer } } =
-	table.freeze({
+    table.freeze({
-		-- `STORE` decompression method - No compression
+        -- `STORE` decompression method - No compression
-		[0x00] = {
+        [0x00] = {
-			name = "STORE" :: CompressionMethod,
+            name = "STORE" :: CompressionMethod,
-			decompress = function(buf, _, validation)
+            decompress = function(buf, _, validation)
-				validateCrc(buf, validation)
+                validateCrc(buf, validation)
-				return buf
+                return buf
-			end,
+            end,
-		},
+        },
-		-- `DEFLATE` decompression method - Compressed raw deflate chunks
+        -- `DEFLATE` decompression method - Compressed raw deflate chunks
-		[0x08] = {
+        [0x08] = {
-			name = "DEFLATE" :: CompressionMethod,
+            name = "DEFLATE" :: CompressionMethod,
-			decompress = function(buf, uncompressedSize, validation)
+            decompress = function(buf, uncompressedSize, validation)
-				-- FIXME: Why is uncompressedSize not getting inferred correctly although it
+                -- FIXME: Why is uncompressedSize not getting inferred correctly although it
-				-- is typed?
+                -- is typed?
-				local decompressed = inflate(buf, uncompressedSize :: any)
+                local decompressed = inflate(buf, uncompressedSize :: any)
-				validateCrc(decompressed, validation)
+                validateCrc(decompressed, validation)
-				return decompressed
+                return decompressed
-			end,
+            end,
-		},
+        },
-	})
+    })
 -- TODO: ERROR HANDLING!
@ -67,7 +67,7 @@ type ZipEntryInner = {
    size: number,              -- Uncompressed size in bytes
    offset: number,            -- Absolute position of local header in ZIP
    timestamp: number,         -- MS-DOS format timestamp
-	method: CompressionMethod, -- Method used to compress the file
+    method: CompressionMethod, -- Method used to compress the file
    crc: number,               -- CRC32 checksum of uncompressed data
    isDirectory: boolean,      -- Whether the entry is a directory or not
    parent: ZipEntry?,         -- The parent of the current entry, nil for root
@ -75,407 +75,426 @@ type ZipEntryInner = {
 }
 function ZipEntry.new(
-	name: string,
+    name: string,
-	size: number,
+    size: number,
-	offset: number,
+    offset: number,
-	timestamp: number,
+    timestamp: number,
-	method: CompressionMethod?,
+    method: CompressionMethod?,
-	crc: number
+    crc: number
 ): ZipEntry
-	return setmetatable(
+    return setmetatable(
-		{
+        {
-			name = name,
+            name = name,
-			size = size,
+            size = size,
-			offset = offset,
+            offset = offset,
-			timestamp = timestamp,
+            timestamp = timestamp,
-			method = method,
+            method = method,
-			crc = crc,
+            crc = crc,
-			isDirectory = string.sub(name, -1) == "/",
+            isDirectory = string.sub(name, -1) == "/",
-			parent = nil,
+            parent = nil,
-			children = {},
+            children = {},
-		} :: ZipEntryInner,
+        } :: ZipEntryInner,
-		{ __index = ZipEntry }
+        { __index = ZipEntry }
-	)
+    )
 end
 function ZipEntry.getPath(self: ZipEntry): string
-	local path = self.name
+    local path = self.name
-	local current = self.parent
+    local current = self.parent
-	while current and current.name ~= "/" do
+    while current and current.name ~= "/" do
-		path = current.name .. path
+        path = current.name .. path
-		current = current.parent
+        current = current.parent
-	end
+    end
-	return path
+    return path
 end
 local ZipReader = {}
 export type ZipReader = typeof(setmetatable({} :: ZipReaderInner, { __index = ZipReader }))
 -- stylua: ignore
 type ZipReaderInner = {
-	data: buffer,                         -- The buffer containing the raw bytes of the ZIP
+    data: buffer,                         -- The buffer containing the raw bytes of the ZIP
-	entries: { ZipEntry },                -- The decoded entries present
+    comment: string,                      -- Comment associated with the ZIP
-	directories: { [string]: ZipEntry },  -- The directories and their respective entries 
+    entries: { ZipEntry },                -- The decoded entries present
-	root: ZipEntry,                       -- The entry of the root directory
+    directories: { [string]: ZipEntry },  -- The directories and their respective entries 
    root: ZipEntry,                       -- The entry of the root directory
 }
 function ZipReader.new(data): ZipReader
-	local root = ZipEntry.new("/", 0, 0, 0, nil, 0)
+    local root = ZipEntry.new("/", 0, 0, 0, nil, 0)
-	root.isDirectory = true
+    root.isDirectory = true
-	local this = setmetatable(
+    local this = setmetatable(
-		{
+        {
-			data = data,
+            data = data,
-			entries = {},
+            entries = {},
-			directories = {},
+            directories = {},
-			root = root,
+            root = root,
-		} :: ZipReaderInner,
+        } :: ZipReaderInner,
-		{ __index = ZipReader }
+        { __index = ZipReader }
-	)
+    )
-	this:parseCentralDirectory()
+    this:parseCentralDirectory()
-	this:buildDirectoryTree()
+    this:buildDirectoryTree()
-	return this
+    return this
 end
 function ZipReader.parseCentralDirectory(self: ZipReader): ()
-	-- ZIP files are read from the end, starting with the End of Central Directory record
+    -- ZIP files are read from the end, starting with the End of Central Directory record
-	-- The EoCD is at least 22 bytes and contains pointers to the rest of the ZIP structure
+    -- The EoCD is at least 22 bytes and contains pointers to the rest of the ZIP structure
-	local bufSize = buffer.len(self.data)
+    local bufSize = buffer.len(self.data)
 	local pos = bufSize - 22
-	-- Search backwards for the EoCD signature
+    -- Start from the minimum possible position of EoCD (22 bytes from end)
-	while pos > 0 do
+    local minPos = math.max(0, bufSize - (22 + 65535) --[[ max comment size: 64 KiB ]])
-		-- Read 4 bytes as uint32 in little-endian format
+    local pos = bufSize - 22
 		if buffer.readu32(self.data, pos) == SIGNATURES.END_OF_CENTRAL_DIR then
 			break
 		end
 		pos -= 1
 	end
-	-- Central Directory offset is stored 16 bytes into the EoCD record
+    -- Search backwards for the EoCD signature
-	local cdOffset = buffer.readu32(self.data, pos + 16)
+    while pos >= minPos do
-	-- Number of entries is stored 10 bytes into the EoCD record
+        if buffer.readu32(self.data, pos) == SIGNATURES.END_OF_CENTRAL_DIR then
-	local cdEntries = buffer.readu16(self.data, pos + 10)
+            break
        end
        pos -= 1
    end
-	-- Process each entry in the Central Directory
+    -- Verify we found the signature
-	pos = cdOffset
+    if pos < minPos then
-	for i = 1, cdEntries do
+        error("Could not find End of Central Directory signature")
-		-- Central Directory Entry format:
+    end
 		-- Offset  Bytes  Description
 		-- ------------------------------------------------
 		-- 0       4      Central directory entry signature
 		-- 8       2      General purpose bitflags
 		-- 10      2      Compression method (8 = DEFLATE)
 		-- 12      4      Last mod time/date
 		-- 28      2      File name length (n)
 		-- 30      2      Extra field length (m)
 		-- 32      2      Comment length (k)
 		-- 16      4      CRC-32
 		-- 24      4      Uncompressed size
 		-- 42      4      Local header offset
 		-- 46      n      File name
 		-- 46+n    m      Extra field
 		-- 46+n+m  k      Comment
-		local _bitflags = buffer.readu16(self.data, pos + 8)
+    -- End of Central Directory format:
-		local compressionMethod = buffer.readu16(self.data, pos + 10)
+    -- Offset  Bytes  Description
-		local nameLength = buffer.readu16(self.data, pos + 28)
+    -- 0       4      End of central directory signature
-		local extraLength = buffer.readu16(self.data, pos + 30)
+    -- 4       2      Number of this disk
-		local commentLength = buffer.readu16(self.data, pos + 32)
+    -- 6       2      Disk where central directory starts
-		local timestamp = buffer.readu32(self.data, pos + 12)
+    -- 8       2      Number of central directory records on this disk
-		local crc = buffer.readu32(self.data, pos + 16)
+    -- 10      2      Total number of central directory records
-		local size = buffer.readu32(self.data, pos + 24)
+    -- 12      4      Size of central directory (bytes)
-		local offset = buffer.readu32(self.data, pos + 42)
+    -- 16      4      Offset of start of central directory
-		local name = buffer.readstring(self.data, pos + 46, nameLength)
+    -- 20      2      Comment length (n)
    -- 22      n      Comment
-		local entry = ZipEntry.new(name, size, offset, timestamp, DECOMPRESSION_ROUTINES[compressionMethod].name, crc)
+    local cdOffset = buffer.readu32(self.data, pos + 16)
-		table.insert(self.entries, entry)
+    local cdEntries = buffer.readu16(self.data, pos + 10)
    local cdCommentLength = buffer.readu16(self.data, pos + 20)
    self.comment = buffer.readstring(self.data, pos + 22, cdCommentLength)
-		pos = pos + 46 + nameLength + extraLength + commentLength
+    -- Process each entry in the Central Directory
-	end
+    pos = cdOffset
    for i = 1, cdEntries do
        -- Central Directory Entry format:
        -- Offset  Bytes  Description
        -- 0       4      Central directory entry signature
        -- 8       2      General purpose bitflags
        -- 10      2      Compression method (8 = DEFLATE)
        -- 12      4      Last mod time/date
        -- 16      4      CRC-32
        -- 24      4      Uncompressed size
        -- 28      2      File name length (n)
        -- 30      2      Extra field length (m)
        -- 32      2      Comment length (k)
        -- 42      4      Local header offset
        -- 46      n      File name
        -- 46+n    m      Extra field
        -- 46+n+m  k      Comment
        local _bitflags = buffer.readu16(self.data, pos + 8)
        local timestamp = buffer.readu32(self.data, pos + 12)
        local compressionMethod = buffer.readu16(self.data, pos + 10)
        local crc = buffer.readu32(self.data, pos + 16)
        local size = buffer.readu32(self.data, pos + 24)
        local nameLength = buffer.readu16(self.data, pos + 28)
        local extraLength = buffer.readu16(self.data, pos + 30)
        local commentLength = buffer.readu16(self.data, pos + 32)
        local offset = buffer.readu32(self.data, pos + 42)
        local name = buffer.readstring(self.data, pos + 46, nameLength)
        local entry = ZipEntry.new(name, size, offset, timestamp, DECOMPRESSION_ROUTINES[compressionMethod].name, crc)
        table.insert(self.entries, entry)
        pos = pos + 46 + nameLength + extraLength + commentLength
    end
 end
 function ZipReader.buildDirectoryTree(self: ZipReader): ()
-	-- Sort entries to process directories first; I could either handle
+    -- Sort entries to process directories first; I could either handle
-	-- directories and files in separate passes over the entries, or sort
+    -- directories and files in separate passes over the entries, or sort
-	-- the entries so I handled the directories first -- I decided to do
+    -- the entries so I handled the directories first -- I decided to do
-	-- the latter
+    -- the latter
-	table.sort(self.entries, function(a, b)
+    table.sort(self.entries, function(a, b)
-		if a.isDirectory ~= b.isDirectory then
+        if a.isDirectory ~= b.isDirectory then
-			return a.isDirectory
+            return a.isDirectory
-		end
+        end
-		return a.name < b.name
+        return a.name < b.name
-	end)
+    end)
-	for _, entry in self.entries do
+    for _, entry in self.entries do
-		local parts = {}
+        local parts = {}
-		-- Split entry path into individual components
+        -- Split entry path into individual components
-		-- e.g. "folder/subfolder/file.txt" -> {"folder", "subfolder", "file.txt"}
+        -- e.g. "folder/subfolder/file.txt" -> {"folder", "subfolder", "file.txt"}
-		for part in string.gmatch(entry.name, "([^/]+)/?") do
+        for part in string.gmatch(entry.name, "([^/]+)/?") do
-			table.insert(parts, part)
+            table.insert(parts, part)
-		end
+        end
-		-- Start from root directory
+        -- Start from root directory
-		local current = self.root
+        local current = self.root
-		local path = ""
+        local path = ""
-		-- Process each path component
+        -- Process each path component
-		for i, part in parts do
+        for i, part in parts do
-			path ..= part
+            path ..= part
-			if i < #parts or entry.isDirectory then
+            if i < #parts or entry.isDirectory then
-				-- Create missing directory entries for intermediate paths
+                -- Create missing directory entries for intermediate paths
-				if not self.directories[path] then
+                if not self.directories[path] then
-					if entry.isDirectory and i == #parts then
+                    if entry.isDirectory and i == #parts then
-						-- Existing directory entry, reuse it
+                        -- Existing directory entry, reuse it
-						self.directories[path] = entry
+                        self.directories[path] = entry
-					else
+                    else
-						-- Create new directory entry for intermediate paths or undefined
+                        -- Create new directory entry for intermediate paths or undefined
-						-- parent directories in the ZIP
+                        -- parent directories in the ZIP
-						local dir = ZipEntry.new(path .. "/", 0, 0, entry.timestamp, nil, 0)
+                        local dir = ZipEntry.new(path .. "/", 0, 0, entry.timestamp, nil, 0)
-						dir.isDirectory = true
+                        dir.isDirectory = true
-						dir.parent = current
+                        dir.parent = current
-						self.directories[path] = dir
+                        self.directories[path] = dir
-					end
+                    end
-					-- Track directory in both lookup table and parent's children
+                    -- Track directory in both lookup table and parent's children
-					table.insert(current.children, self.directories[path])
+                    table.insert(current.children, self.directories[path])
-				end
+                end
-				-- Move deeper into the tree
+                -- Move deeper into the tree
-				current = self.directories[path]
+                current = self.directories[path]
-				continue
+                continue
-			end
+            end
-			-- Link file entry to its parent directory
+            -- Link file entry to its parent directory
-			entry.parent = current
+            entry.parent = current
-			table.insert(current.children, entry)
+            table.insert(current.children, entry)
-		end
+        end
-	end
+    end
 end
 function ZipReader.findEntry(self: ZipReader, path: string): ZipEntry?
-	if path == "/" then
+    if path == "/" then
-		-- If the root directory's entry was requested we do not
+        -- If the root directory's entry was requested we do not
-		-- need to do any additional work
+        -- need to do any additional work
-		return self.root
+        return self.root
-	end
+    end
-	-- Normalize path by removing leading and trailing slashes
+    -- Normalize path by removing leading and trailing slashes
-	-- This ensures consistent lookup regardless of input format
+    -- This ensures consistent lookup regardless of input format
-	-- e.g., "/folder/file.txt/" -> "folder/file.txt"
+    -- e.g., "/folder/file.txt/" -> "folder/file.txt"
-	path = string.gsub(path, "^/", ""):gsub("/$", "")
+    path = string.gsub(path, "^/", ""):gsub("/$", "")
-	-- First check regular files and explicit directories
+    -- First check regular files and explicit directories
-	for _, entry in self.entries do
+    for _, entry in self.entries do
-		-- Compare normalized paths
+        -- Compare normalized paths
-		if string.gsub(entry.name, "/$", "") == path then
+        if string.gsub(entry.name, "/$", "") == path then
-			return entry
+            return entry
-		end
+        end
-	end
+    end
-	-- If not found, check virtual directory entries
+    -- If not found, check virtual directory entries
-	-- These are directories that were created implicitly
+    -- These are directories that were created implicitly
-	return self.directories[path]
+    return self.directories[path]
 end
 type ExtractionOptions = {
-	decompress: boolean?,
+    decompress: boolean?,
-	isString: boolean?,
+    isString: boolean?,
-	skipCrcValidation: boolean?,
+    skipCrcValidation: boolean?,
-	skipSizeValidation: boolean?,
+    skipSizeValidation: boolean?,
 }
 function ZipReader.extract(self: ZipReader, entry: ZipEntry, options: ExtractionOptions?): buffer | string
-	-- Local File Header format:
+    -- Local File Header format:
-	-- Offset  Bytes  Description
+    -- Offset  Bytes  Description
-	-- 0       4      Local file header signature
+    -- 0       4      Local file header signature
-	-- 6       2      General purpose bitflags
+    -- 6       2      General purpose bitflags
-	-- 8       2      Compression method (8 = DEFLATE)
+    -- 8       2      Compression method (8 = DEFLATE)
-	-- 14      4      CRC32 checksum
+    -- 14      4      CRC32 checksum
-	-- 18      4      Compressed size
+    -- 18      4      Compressed size
-	-- 22      4      Uncompressed size
+    -- 22      4      Uncompressed size
-	-- 26      2      File name length (n)
+    -- 26      2      File name length (n)
-	-- 28      2      Extra field length (m)
+    -- 28      2      Extra field length (m)
-	-- 30      n      File name
+    -- 30      n      File name
-	-- 30+n    m      Extra field
+    -- 30+n    m      Extra field
-	-- 30+n+m  -      File data
+    -- 30+n+m  -      File data
-	if entry.isDirectory then
+    if entry.isDirectory then
-		error("Cannot extract directory")
+        error("Cannot extract directory")
-	end
+    end
-	local defaultOptions: ExtractionOptions = {
+    local defaultOptions: ExtractionOptions = {
-		decompress = true,
+        decompress = true,
-		isString = false,
+        isString = false,
-		skipValidation = false,
+        skipValidation = false,
-	}
+    }
-	-- TODO: Use a `Partial` type function for this in the future!
+    -- TODO: Use a `Partial` type function for this in the future!
-	local optionsOrDefault: {
+    local optionsOrDefault: {
-		decompress: boolean,
+        decompress: boolean,
-		isString: boolean,
+        isString: boolean,
-		skipCrcValidation: boolean,
+        skipCrcValidation: boolean,
-		skipSizeValidation: boolean,
+        skipSizeValidation: boolean,
-	} = if options
+    } = if options
-		then setmetatable(options, { __index = defaultOptions }) :: any
+        then setmetatable(options, { __index = defaultOptions }) :: any
-		else defaultOptions
+        else defaultOptions
-	local pos = entry.offset
+    local pos = entry.offset
-	if buffer.readu32(self.data, pos) ~= SIGNATURES.LOCAL_FILE then
+    if buffer.readu32(self.data, pos) ~= SIGNATURES.LOCAL_FILE then
-		error("Invalid local file header")
+        error("Invalid local file header")
-	end
+    end
-	local bitflags = buffer.readu16(self.data, pos + 6)
+    local bitflags = buffer.readu16(self.data, pos + 6)
-	local crcChecksum = buffer.readu32(self.data, pos + 14)
+    local crcChecksum = buffer.readu32(self.data, pos + 14)
-	local compressedSize = buffer.readu32(self.data, pos + 18)
+    local compressedSize = buffer.readu32(self.data, pos + 18)
-	local uncompressedSize = buffer.readu32(self.data, pos + 22)
+    local uncompressedSize = buffer.readu32(self.data, pos + 22)
-	local nameLength = buffer.readu16(self.data, pos + 26)
+    local nameLength = buffer.readu16(self.data, pos + 26)
-	local extraLength = buffer.readu16(self.data, pos + 28)
+    local extraLength = buffer.readu16(self.data, pos + 28)
-	pos = pos + 30 + nameLength + extraLength
+    pos = pos + 30 + nameLength + extraLength
-	if bit32.band(bitflags, 0x08) ~= 0 then
+    if bit32.band(bitflags, 0x08) ~= 0 then
-		-- The bit at offset 3 was set, meaning we did not have the file sizes
+        -- The bit at offset 3 was set, meaning we did not have the file sizes
-		-- and CRC checksum at the time of the creation of the ZIP. Instead, they
+        -- and CRC checksum at the time of the creation of the ZIP. Instead, they
-		-- were appended after the compressed data chunks in a data descriptor
+        -- were appended after the compressed data chunks in a data descriptor
-		-- Data Descriptor format:
+        -- Data Descriptor format:
-		-- Offset   Bytes   Description
+        -- Offset   Bytes   Description
-		-- 0        0 or 4  0x08074b50 (optional signature)
+        -- 0        0 or 4  0x08074b50 (optional signature)
-		-- 0 or 4   4       CRC32 checksum
+        -- 0 or 4   4       CRC32 checksum
-		-- 4 or 8   4       Compressed size
+        -- 4 or 8   4       Compressed size
-		-- 8 or 12  4       Uncompressed size
+        -- 8 or 12  4       Uncompressed size
-		-- Start at the compressed data
+        -- Start at the compressed data
-		local descriptorPos = pos
+        local descriptorPos = pos
-		while true do
+        while true do
-			-- Try reading a u32 starting from current offset
+            -- Try reading a u32 starting from current offset
-			local leading = buffer.readu32(self.data, descriptorPos)
+            local leading = buffer.readu32(self.data, descriptorPos)
-			if leading == SIGNATURES.DATA_DESCRIPTOR then
+            if leading == SIGNATURES.DATA_DESCRIPTOR then
-				-- If we find a data descriptor signature, that must mean
+                -- If we find a data descriptor signature, that must mean
-				-- the current offset points is the start of the descriptor
+                -- the current offset points is the start of the descriptor
-				break
+                break
-			end
+            end
-			if leading == entry.crc then
+            if leading == entry.crc then
-				-- If we find our file's CRC checksum, that means the data
+                -- If we find our file's CRC checksum, that means the data
-				-- descriptor signature was omitted, so our chunk starts 4
+                -- descriptor signature was omitted, so our chunk starts 4
-				-- bytes before
+                -- bytes before
-				descriptorPos -= 4
+                descriptorPos -= 4
-				break
+                break
-			end
+            end
-			-- Skip to the next byte
+            -- Skip to the next byte
-			descriptorPos += 1
+            descriptorPos += 1
-		end
+        end
-		crcChecksum = buffer.readu32(self.data, descriptorPos + 4)
+        crcChecksum = buffer.readu32(self.data, descriptorPos + 4)
-		compressedSize = buffer.readu32(self.data, descriptorPos + 8)
+        compressedSize = buffer.readu32(self.data, descriptorPos + 8)
-		uncompressedSize = buffer.readu32(self.data, descriptorPos + 12)
+        uncompressedSize = buffer.readu32(self.data, descriptorPos + 12)
-	end
+    end
-	local content = buffer.create(compressedSize)
+    local content = buffer.create(compressedSize)
-	buffer.copy(content, 0, self.data, pos, compressedSize)
+    buffer.copy(content, 0, self.data, pos, compressedSize)
-	if optionsOrDefault.decompress then
+    if optionsOrDefault.decompress then
-		local compressionMethod = buffer.readu16(self.data, entry.offset + 8)
+        local compressionMethod = buffer.readu16(self.data, entry.offset + 8)
-		local algo = DECOMPRESSION_ROUTINES[compressionMethod]
+        local algo = DECOMPRESSION_ROUTINES[compressionMethod]
-		if algo == nil then
+        if algo == nil then
-			error(`Unsupported compression, ID: {compressionMethod}`)
+            error(`Unsupported compression, ID: {compressionMethod}`)
-		end
+        end
-		content = algo.decompress(content, uncompressedSize, {
+        content = algo.decompress(content, uncompressedSize, {
-			expected = crcChecksum,
+            expected = crcChecksum,
-			skip = optionsOrDefault.skipCrcValidation,
+            skip = optionsOrDefault.skipCrcValidation,
-		})
+        })
-		-- Unless skipping validation is requested, we make sure the uncompressed size matches
+        -- Unless skipping validation is requested, we make sure the uncompressed size matches
-		assert(
+        assert(
-			optionsOrDefault.skipSizeValidation or uncompressedSize == buffer.len(content),
+            optionsOrDefault.skipSizeValidation or uncompressedSize == buffer.len(content),
-			"Validation failed; uncompressed size does not match"
+            "Validation failed; uncompressed size does not match"
-		)
+        )
-	end
+    end
-	return if optionsOrDefault.isString then buffer.tostring(content) else content
+    return if optionsOrDefault.isString then buffer.tostring(content) else content
 end
 function ZipReader.extractDirectory(
-	self: ZipReader,
+    self: ZipReader,
-	path: string,
+    path: string,
-	options: ExtractionOptions
+    options: ExtractionOptions
 ): { [string]: buffer } | { [string]: string }
-	local files: { [string]: buffer } | { [string]: string } = {}
+    local files: { [string]: buffer } | { [string]: string } = {}
-	-- Normalize path by removing leading slash for consistent prefix matching
+    -- Normalize path by removing leading slash for consistent prefix matching
-	path = string.gsub(path, "^/", "")
+    path = string.gsub(path, "^/", "")
-	-- Iterate through all entries to find files within target directory
+    -- Iterate through all entries to find files within target directory
-	for _, entry in self.entries do
+    for _, entry in self.entries do
-		-- Check if entry is a file (not directory) and its path starts with target directory
+        -- Check if entry is a file (not directory) and its path starts with target directory
-		if not entry.isDirectory and string.sub(entry.name, 1, #path) == path then
+        if not entry.isDirectory and string.sub(entry.name, 1, #path) == path then
-			-- Store extracted content mapped to full path
+            -- Store extracted content mapped to full path
-			files[entry.name] = self:extract(entry, options)
+            files[entry.name] = self:extract(entry, options)
-		end
+        end
-	end
+    end
-	-- Return a map of file to contents
+    -- Return a map of file to contents
-	return files
+    return files
 end
 function ZipReader.listDirectory(self: ZipReader, path: string): { ZipEntry }
-	-- Locate the entry with the path
+    -- Locate the entry with the path
-	local entry = self:findEntry(path)
+    local entry = self:findEntry(path)
-	if not entry or not entry.isDirectory then
+    if not entry or not entry.isDirectory then
-		-- If an entry was not found, we error
+        -- If an entry was not found, we error
-		error("Not a directory")
+        error("Not a directory")
-	end
+    end
-	-- Return the children of our discovered entry
+    -- Return the children of our discovered entry
-	return entry.children
+    return entry.children
 end
 function ZipReader.walk(self: ZipReader, callback: (entry: ZipEntry, depth: number) -> ()): ()
-	-- Wrapper function which recursively calls callback for every child
+    -- Wrapper function which recursively calls callback for every child
-	-- in an entry
+    -- in an entry
-	local function walkEntry(entry: ZipEntry, depth: number)
+    local function walkEntry(entry: ZipEntry, depth: number)
-		callback(entry, depth)
+        callback(entry, depth)
-		for _, child in entry.children do
+        for _, child in entry.children do
-			-- ooo spooky recursion... blame this if shit go wrong
+            -- ooo spooky recursion... blame this if shit go wrong
-			walkEntry(child, depth + 1)
+            walkEntry(child, depth + 1)
-		end
+        end
-	end
+    end
-	walkEntry(self.root, 0)
+    walkEntry(self.root, 0)
 end
 export type ZipStatistics = { fileCount: number, dirCount: number, totalSize: number }
 function ZipReader.getStats(self: ZipReader): ZipStatistics
-	local stats: ZipStatistics = {
+    local stats: ZipStatistics = {
-		fileCount = 0,
+        fileCount = 0,
-		dirCount = 0,
+        dirCount = 0,
-		totalSize = 0,
+        totalSize = 0,
-	}
+    }
-	-- Iterate through the entries, updating stats
+    -- Iterate through the entries, updating stats
-	for _, entry in self.entries do
+    for _, entry in self.entries do
-		if entry.isDirectory then
+        if entry.isDirectory then
-			stats.dirCount += 1
+            stats.dirCount += 1
-			continue
+            continue
-		end
+        end
-		stats.fileCount += 1
+        stats.fileCount += 1
-		stats.totalSize += entry.size
+        stats.totalSize += entry.size
-	end
+    end
-	return stats
+    return stats
 end
 return {
-	-- Creates a `ZipReader` from a `buffer` of ZIP data.
+    -- Creates a `ZipReader` from a `buffer` of ZIP data.
-	load = function(data: buffer)
+    load = function(data: buffer)
-		return ZipReader.new(data)
+        return ZipReader.new(data)
-	end,
+    end,
 }
--- a/tests/extract.luau
+++ b/tests/extract.luau
@ -19,8 +19,6 @@ local FALLIBLES = {
 	"chinese.zip",
    "non_utf8.zip", -- FIXME: Lune breaks for non utf8 data in process stdout
 	"pandoc_soft_links.zip", -- FIXME: Soft links are not handled correctly
 	-- FIXME: Files with a misaligned comments are not correctly located
 	-- "misaligned_comment.zip",
 }
 return function(test: typeof(frktest.test))
--- a/tests/misaligned_comment.luau
+++ b/tests/misaligned_comment.luau
@ -0,0 +1,17 @@
 local fs = require("@lune/fs")
 local frktest = require("../lune_packages/frktest")
 local check = frktest.assert.check
 local ZipReader = require("../lib")
 return function(test: typeof(frktest.test))
 	test.suite("ZIP extraction tests", function()
 		test.case("Handles misaligned comment properly", function()
 				local data = fs.readFile("tests/data/misaligned_comment.zip")
 				local zip = ZipReader.load(buffer.fromstring(data))
                check.equal(zip.comment, "short.")
 			end)
 		end)
 end