local inflate = require("./inflate") local validateCrc = require("./utils/validate_crc") local path = require("./utils/path") -- The maximum supported PKZIP format specification version that we support local MAX_SUPPORTED_PKZIP_VERSION = 63 -- Little endian constant signatures used in the ZIP file format local SIGNATURES = table.freeze({ -- Marks the beginning of each file in the ZIP LOCAL_FILE = 0x04034b50, -- Marks the start of a data descriptor DATA_DESCRIPTOR = 0x08074b50, -- Marks entries in the central directory CENTRAL_DIR = 0x02014b50, -- Marks the end of the central directory END_OF_CENTRAL_DIR = 0x06054b50, }) -- Decompression routines for each supported compression method local DECOMPRESSION_ROUTINES: { [number]: { name: CompressionMethod, decompress: (buffer, number, validateCrc.CrcValidationOptions) -> buffer } } = { -- `STORE` decompression method - No compression [0x00] = { name = "STORE" :: CompressionMethod, decompress = function(buf, _, validation): buffer validateCrc(buf, validation) return buf end, }, -- `DEFLATE` decompression method - Compressed raw deflate chunks [0x08] = { name = "DEFLATE" :: CompressionMethod, decompress = function(buf, uncompressedSize, validation): buffer local decompressed = inflate(buf, uncompressedSize) validateCrc(decompressed, validation) return decompressed end, }, } -- Set of placeholder entry properties for incompatible entries local EMPTY_PROPERTIES: ZipEntryProperties = table.freeze({ versionMadeBy = 0, compressedSize = 0, size = 0, attributes = 0, timestamp = 0, crc = 0, }) -- Lookup table for the OS that created the ZIP local MADE_BY_OS_LOOKUP: { [number]: MadeByOS } = { [0x0] = "FAT", [0x1] = "AMIGA", [0x2] = "VMS", [0x3] = "UNIX", [0x4] = "VM/CMS", [0x5] = "Atari ST", [0x6] = "OS/2", [0x7] = "MAC", [0x8] = "Z-System", [0x9] = "CP/M", [0xa] = "NTFS", [0xb] = "MVS", [0xc] = "VSE", [0xd] = "Acorn RISCOS", [0xe] = "VFAT", [0xf] = "Alternate MVS", [0x10] = "BeOS", [0x11] = "TANDEM", [0x12] = "OS/400", [0x13] = "OS/X", } --[=[ @class ZipEntry A single entry (a file or a directory) in a ZIP file, and its properties. ]=] local ZipEntry = {} --[=[ @interface ZipEntry @within ZipEntry @field name string -- File path within ZIP, '/' suffix indicates directory @field versionMadeBy { software: string, os: MadeByOS } -- Version of software and OS that created the ZIP @field compressedSize number -- Compressed size in bytes @field size number -- Uncompressed size in bytes @field offset number -- Absolute position of local header in ZIP @field timestamp number -- MS-DOS format timestamp @field method CompressionMethod -- Method used to compress the file @field crc number -- CRC32 checksum of the uncompressed data @field isDirectory boolean -- Whether the entry is a directory or not @field isText boolean -- Whether the entry is plain ASCII text or binary @field attributes number -- File attributes @field parent ZipEntry? -- Parent directory entry, `nil` if entry is root @field children { ZipEntry } -- Children of the entry, if it was a directory, empty array for files ]=] export type ZipEntry = typeof(setmetatable({} :: ZipEntryInner, { __index = ZipEntry })) type ZipEntryInner = { name: string, versionMadeBy: { software: string, os: MadeByOS, }, compressedSize: number, size: number, offset: number, timestamp: number, method: CompressionMethod, crc: number, isDirectory: boolean, isText: boolean, attributes: number, parent: ZipEntry?, children: { ZipEntry }, } -- stylua: ignore --[=[ @within ZipEntry @type MadeByOS "FAT" | "AMIGA" | "VMS" | "UNIX" | "VM/CMS" | "Atari ST" | "OS/2" | "MAC" | "Z-System" | "CP/M" | "NTFS" | "MVS" | "VSE" | "Acorn RISCOS" | "VFAT" | "Alternate MVS" | "BeOS" | "TANDEM" | "OS/400" | "OS/X" | "Unknown" The OS that created the ZIP. ]=] export type MadeByOS = | "FAT" -- 0x0; MS-DOS and OS/2 (FAT / VFAT / FAT32 file systems) | "AMIGA" -- 0x1; Amiga | "VMS" -- 0x2; OpenVMS | "UNIX" -- 0x3; Unix | "VM/CMS" -- 0x4; VM/CMS | "Atari ST" -- 0x5; Atari ST | "OS/2" -- 0x6; OS/2 HPFS | "MAC" -- 0x7; Macintosh | "Z-System" -- 0x8; Z-System | "CP/M" -- 0x9; Original CP/M | "NTFS" -- 0xa; Windows NTFS | "MVS" -- 0xb; OS/390 & VM/ESA | "VSE" -- 0xc; VSE | "Acorn RISCOS" -- 0xd; Acorn RISCOS | "VFAT" -- 0xe; VFAT | "Alternate MVS" -- 0xf; Alternate MVS | "BeOS" -- 0x10; BeOS | "TANDEM" -- 0x11; Tandem | "OS/400" -- 0x12; OS/400 | "OS/X" -- 0x13; Darwin | "Unknown" -- 0x14 - 0xff; Unused --[=[ @within ZipEntry @type CompressionMethod "STORE" | "DEFLATE" The method used to compress the file: - `STORE` - No compression - `DEFLATE` - Compressed raw deflate chunks ]=] export type CompressionMethod = "STORE" | "DEFLATE" --[=[ @interface ZipEntryProperties @within ZipEntry @private A set of properties that describe a ZIP entry. Used internally for construction of [ZipEntry] objects. @field versionMadeBy number -- Version of software and OS that created the ZIP @field compressedSize number -- Compressed size in bytes @field size number -- Uncompressed size in bytes @field attributes number -- File attributes @field timestamp number -- MS-DOS format timestamp @field method CompressionMethod? -- Method used @field crc number -- CRC32 checksum of the uncompressed data ]=] type ZipEntryProperties = { versionMadeBy: number, compressedSize: number, size: number, attributes: number, timestamp: number, method: CompressionMethod?, crc: number, } --[=[ @within ZipEntry @function new @private @param offset number -- Offset of the entry in the ZIP file @param name string -- File path within ZIP, '/' suffix indicates directory @param properties ZipEntryProperties -- Properties of the entry @return ZipEntry -- The constructed entry ]=] function ZipEntry.new(offset: number, name: string, properties: ZipEntryProperties): ZipEntry local versionMadeByOS = bit32.rshift(properties.versionMadeBy, 8) local versionMadeByVersion = bit32.band(properties.versionMadeBy, 0x00ff) return setmetatable( { name = name, versionMadeBy = { software = string.format("%d.%d", versionMadeByVersion / 10, versionMadeByVersion % 10), os = MADE_BY_OS_LOOKUP[versionMadeByOS] :: MadeByOS, }, compressedSize = properties.compressedSize, size = properties.size, offset = offset, timestamp = properties.timestamp, method = properties.method, crc = properties.crc, isDirectory = string.sub(name, -1) == "/", attributes = properties.attributes, parent = nil, children = {}, } :: ZipEntryInner, { __index = ZipEntry } ) end --[=[ @within ZipEntry @method isSymlink Returns whether the entry is a symlink. @return boolean ]=] function ZipEntry.isSymlink(self: ZipEntry): boolean return bit32.band(self.attributes, 0xA0000000) == 0xA0000000 end --[=[ @within ZipEntry @method getPath Resolves the path of the entry based on its relationship with other entries. It is recommended to use this method instead of accessing the `name` property directly, although they should be equivalent. > [!WARNING] > Never use this method when extracting files from the ZIP, since it can contain absolute paths > (say `/etc/passwd`) referencing directories outside the current directory (say `/tmp/extracted`), > causing unintended overwrites of files. @return string -- The path of the entry ]=] function ZipEntry.getPath(self: ZipEntry): string if self.name == "/" then return "/" end -- Get just the entry name without the path local name = string.match(self.name, "([^/]+)/?$") or self.name if not self.parent or self.parent.name == "/" then return self.name end -- Combine parent path with entry name local path = string.gsub(self.parent:getPath() .. name, "//+", "/") return path end --[=[ @within ZipEntry @method getSafePath Resolves the path of the entry based on its relationship with other entries and returns it only if it is safe to use for extraction, otherwise returns `nil`. @return string? -- Optional path of the entry if it was safe ]=] function ZipEntry.getSafePath(self: ZipEntry): string? local pathStr = self:getPath() if path.isSafe(pathStr) then return pathStr end return nil end --[=[ @within ZipEntry @method sanitizePath Sanitizes the path of the entry, potentially losing information, but ensuring the path is safe to use for extraction. @return string -- The sanitized path of the entry ]=] function ZipEntry.sanitizePath(self: ZipEntry): string local pathStr = self:getPath() return path.sanitize(pathStr) end --[=[ @within ZipEntry @method compressionEfficiency Calculates the compression efficiency of the entry, or `nil` if the entry is a directory. Uses the formula: `round((1 - compressedSize / size) * 100)` and outputs a percentage. @return number? -- Optional compression efficiency of the entry ]=] function ZipEntry.compressionEfficiency(self: ZipEntry): number? if self.size == 0 or self.compressedSize == 0 then return nil end local ratio = 1 - self.compressedSize / self.size return math.round(ratio * 100) end --[=[ @within ZipEntry @method isFile Returns whether the entry is a file, i.e., not a directory or symlink. @return boolean -- Whether the entry is a file ]=] function ZipEntry.isFile(self: ZipEntry): boolean return not (self.isDirectory and self:isSymlink()) end --[=[ @within ZipEntry @interface UnixMode A object representation of the UNIX mode. @field perms string -- The permission octal @field typeFlags string -- The type flags octal ]=] export type UnixMode = { perms: string, typeFlags: string } --[=[ @within ZipEntry @method unixMode Parses the entry's attributes to extract a UNIX mode, represented as a [UnixMode]. @return UnixMode? -- The UNIX mode of the entry, or `nil` if the entry is not a UNIX file ]=] function ZipEntry.unixMode(self: ZipEntry): UnixMode? if self.versionMadeBy.os ~= "UNIX" then return nil end local mode = bit32.rshift(self.attributes, 16) local typeFlags = bit32.band(self.attributes, 0x1FF) local perms = bit32.band(mode, 0x01FF) return { perms = string.format("0o%o", perms), typeFlags = string.format("0o%o", typeFlags), } end --[=[ @class ZipReader The main class which represents a decoded state of a ZIP file, holding references to its entries. This is the primary point of interaction with the ZIP file's contents. ]=] local ZipReader = {} --[=[ @interface ZipReader @within ZipReader @field data buffer -- The buffer containing the raw bytes of the ZIP @field comment string -- Comment associated with the ZIP @field entries { ZipEntry } -- The decoded entries present @field directories { [string]: ZipEntry } -- The directories and their respective entries @field root ZipEntry -- The entry of the root directory ]=] export type ZipReader = typeof(setmetatable({} :: ZipReaderInner, { __index = ZipReader })) type ZipReaderInner = { data: buffer, comment: string, entries: { ZipEntry }, directories: { [string]: ZipEntry }, root: ZipEntry, } --[=[ @within ZipReader @function new Creates a new ZipReader instance from the raw bytes of a ZIP file. **Errors if the ZIP file is invalid.** @param data buffer -- The buffer containing the raw bytes of the ZIP @return ZipReader -- The new ZipReader instance ]=] function ZipReader.new(data): ZipReader local root = ZipEntry.new(0, "/", EMPTY_PROPERTIES) root.isDirectory = true local this = setmetatable( { data = data, entries = {}, directories = {}, root = root, } :: ZipReaderInner, { __index = ZipReader } ) this:parseCentralDirectory() this:buildDirectoryTree() return this end --[=[ @within ZipReader @method findEocdPosition @private Finds the position of the End of Central Directory (EoCD) signature in the ZIP file. This implementation is inspired by that of [async_zip], a Rust library for parsing ZIP files asynchronously. This method involves buffered reading in reverse and reverse linear searching along those buffers for the EoCD signature. As a result of the buffered approach, we reduce individual reads when compared to reading every single byte sequentially, by a factor of the buffer size (4 KB by default). The buffer size of 4 KB was arrived at because it aligns with many systems' page sizes, and also provides a good balance between read efficiency (not too small), memory usage (not too large) and CPU cache performance. From my primitive benchmarks, this method is ~1.5x faster than the sequential approach. **Errors if the ZIP file is invalid.** [async_zip]: https://github.com/Majored/rs-async-zip/blob/527bda9/src/base/read/io/locator.rs#L37-L45 @error "Could not find End of Central Directory signature" @return number -- The offset to the End of Central Directory (including the signature) ]=] function ZipReader.findEocdPosition(self: ZipReader): number local BUFFER_SIZE = 4096 local SIGNATURE_LENGTH = 4 local bufSize = buffer.len(self.data) -- Start from the minimum possible position of EoCD (22 bytes from end) local position = math.max(0, bufSize - (22 + 65536) --[[ max comment size: 64 KB ]]) local searchBuf = buffer.create(BUFFER_SIZE) while position < bufSize do local readSize = math.min(BUFFER_SIZE, bufSize - position) buffer.copy(searchBuf, 0, self.data, position, readSize) -- Search backwards through buffer for signature for i = readSize - 1, SIGNATURE_LENGTH - 1, -1 do if buffer.readu32(searchBuf, i - SIGNATURE_LENGTH + 1) == SIGNATURES.END_OF_CENTRAL_DIR then return position + i - SIGNATURE_LENGTH + 1 end end -- Move position backward with overlap for cross-boundary signatures position += BUFFER_SIZE - SIGNATURE_LENGTH end error("Could not find End of Central Directory signature") end --[=[ @within ZipReader @interface EocdRecord @private A parsed End of Central Directory record. @field diskNumber number -- The disk number @field diskWithCD number -- The disk number of the disk with the Central Directory @field cdEntries number -- The number of entries in the Central Directory @field totalCDEntries number -- The total number of entries in the Central Directory @field cdSize number -- The size of the Central Directory @field cdOffset number -- The offset of the Central Directory @field comment string -- The comment associated with the ZIP ]=] export type EocdRecord = { diskNumber: number, diskWithCD: number, cdEntries: number, totalCDEntries: number, cdSize: number, cdOffset: number, comment: string, } --[=[ @within ZipReader @method parseEocdRecord @private Parses the End of Central Directory record at the given position, usually located using the [ZipReader:findEocdPosition]. **Errors if the ZIP file is invalid.** @error "Invalid Central Directory offset or size" @param pos number -- The offset to the End of Central Directory record @return EocdRecord -- Structural representation of the parsed record ]=] function ZipReader.parseEocdRecord(self: ZipReader, pos: number): EocdRecord -- End of Central Directory format: -- Offset Bytes Description -- 0 4 End of central directory signature -- 4 2 Number of this disk -- 6 2 Disk where central directory starts -- 8 2 Number of central directory records on this disk -- 10 2 Total number of central directory records -- 12 4 Size of central directory (bytes) -- 16 4 Offset of start of central directory -- 20 2 Comment length (n) -- 22 n Comment local cdEntries = buffer.readu16(self.data, pos + 10) local cdSize = buffer.readu32(self.data, pos + 12) local cdOffset = buffer.readu32(self.data, pos + 16) -- Validate CD boundaries and entry count local bufSize = buffer.len(self.data) if cdOffset >= bufSize or cdOffset + cdSize > bufSize then error("Invalid Central Directory offset or size") end -- Validate CD size range; min = 46 bytes per entry, max = 0xFFFF * 3 + 46 bytes per entry if cdSize < cdEntries * 46 or cdEntries * (0xFFFF * 3 + 46) < cdSize then error("Invalid Central Directory size for claimed number of entries") end local commentLength = buffer.readu16(self.data, pos + 20) return { diskNumber = buffer.readu16(self.data, pos + 4), diskWithCD = buffer.readu16(self.data, pos + 6), cdEntries = cdEntries, totalCDEntries = buffer.readu16(self.data, pos + 8), cdSize = cdSize, cdOffset = cdOffset, comment = buffer.readstring(self.data, pos + 22, commentLength), } end --[=[ @within ZipReader @method parseCentralDirectory @private Parses the central directory of the ZIP file and populates the `entries` and `directories` fields. Used internally during initialization of the [ZipReader]. **Errors if the ZIP file is invalid.** @error "Invalid Central Directory entry signature" @error "Found different entries than specified in Central Directory" ]=] function ZipReader.parseCentralDirectory(self: ZipReader): () local eocdPos = self:findEocdPosition() local record = self:parseEocdRecord(eocdPos) -- Track actual entries found local entriesFound = 0 local pos = record.cdOffset while pos < record.cdOffset + record.cdSize do if buffer.readu32(self.data, pos) ~= SIGNATURES.CENTRAL_DIR then error("Invalid Central Directory entry signature") end -- Central Directory Entry format: -- Offset Bytes Description -- 0 4 Central directory entry signature -- 4 2 Version made by -- 8 2 General purpose bitflags -- 10 2 Compression method (8 = DEFLATE) -- 12 4 Last mod time/date -- 16 4 CRC-32 -- 20 4 Compressed size -- 24 4 Uncompressed size -- 28 2 File name length (n) -- 30 2 Extra field length (m) -- 32 2 Comment length (k) -- 36 2 Internal file attributes -- 38 4 External file attributes -- 42 4 Local header offset -- 46 n File name -- 46+n m Extra field -- 46+n+m k Comment local versionMadeBy = buffer.readu16(self.data, pos + 4) local _bitflags = buffer.readu16(self.data, pos + 8) local timestamp = buffer.readu32(self.data, pos + 12) local compressionMethod = buffer.readu16(self.data, pos + 10) local crc = buffer.readu32(self.data, pos + 16) local compressedSize = buffer.readu32(self.data, pos + 20) local size = buffer.readu32(self.data, pos + 24) local nameLength = buffer.readu16(self.data, pos + 28) local extraLength = buffer.readu16(self.data, pos + 30) local commentLength = buffer.readu16(self.data, pos + 32) local internalAttrs = buffer.readu16(self.data, pos + 36) local externalAttrs = buffer.readu32(self.data, pos + 38) local offset = buffer.readu32(self.data, pos + 42) local name = buffer.readstring(self.data, pos + 46, nameLength) local entrySize = 46 + nameLength + extraLength + commentLength if pos + entrySize > record.cdOffset + record.cdSize then error("Invalid Central Directory entry size") end table.insert( self.entries, ZipEntry.new(offset, name, { versionMadeBy = versionMadeBy, compressedSize = compressedSize, size = size, crc = crc, method = DECOMPRESSION_ROUTINES[compressionMethod].name :: CompressionMethod, timestamp = timestamp, attributes = externalAttrs, isText = bit32.btest(internalAttrs, 0x0001), }) ) pos = pos + 46 + nameLength + extraLength + commentLength entriesFound += 1 end if entriesFound ~= record.cdEntries then error("Found different entries than specified in Central Directory") end self.comment = record.comment end --[=[ @within ZipReader @method buildDirectoryTree @private Builds the directory tree from the entries. Used internally during initialization of the [ZipReader]. ]=] function ZipReader.buildDirectoryTree(self: ZipReader): () -- Sort entries to process directories first; I could either handle -- directories and files in separate passes over the entries, or sort -- the entries so I handled the directories first -- I decided to do -- the latter table.sort(self.entries, function(a, b) if a.isDirectory ~= b.isDirectory then return a.isDirectory end return a.name < b.name end) for _, entry in self.entries do local parts = {} -- Split entry path into individual components -- e.g. "folder/subfolder/file.txt" -> {"folder", "subfolder", "file.txt"} for part in string.gmatch(entry.name, "([^/]+)/?") do table.insert(parts, part) end -- Start from root directory local current = self.root local path = "" -- Process each path component for i, part in parts do path ..= part if i < #parts or entry.isDirectory then -- Create missing directory entries for intermediate paths if not self.directories[path] then if entry.isDirectory and i == #parts then -- Existing directory entry, reuse it self.directories[path] = entry else -- Create new directory entry for intermediate paths or undefined -- parent directories in the ZIP local dir = ZipEntry.new(0, path .. "/", { versionMadeBy = 0, compressedSize = 0, size = 0, crc = 0, compressionMethod = "STORED", timestamp = entry.timestamp, attributes = entry.attributes, }) dir.versionMadeBy = entry.versionMadeBy dir.isDirectory = true dir.parent = current self.directories[path] = dir end -- Track directory in both lookup table and parent's children table.insert(current.children, self.directories[path]) end -- Move deeper into the tree current = self.directories[path] continue end -- Link file entry to its parent directory entry.parent = current table.insert(current.children, entry) end end end --[=[ @within ZipReader @method findEntry Finds a [ZipEntry] by its path in the ZIP archive. @param path string -- Path to the entry to find @return ZipEntry? -- The found entry, or `nil` if not found ]=] function ZipReader.findEntry(self: ZipReader, path: string): ZipEntry? if path == "/" then -- If the root directory's entry was requested we do not -- need to do any additional work return self.root end -- Normalize path by removing leading and trailing slashes -- This ensures consistent lookup regardless of input format -- e.g., "/folder/file.txt/" -> "folder/file.txt" path = string.gsub(path, "^/", ""):gsub("/$", "") -- First check regular files and explicit directories for _, entry in self.entries do -- Compare normalized paths if string.gsub(entry.name, "/$", "") == path then return entry end end -- If not found, check virtual directory entries -- These are directories that were created implicitly return self.directories[path] end --[=[ @interface ExtractionOptions @within ZipReader @ignore Options accepted by the [ZipReader:extract] method. @field followSymlinks boolean? -- Whether to follow symlinks @field decompress boolean -- Whether to decompress the entry or only return the raw data @field type ("binary" | "text")? -- The type of data to return, automatically inferred based on the type of contents if not specified @field skipCrcValidation boolean? -- Whether to skip CRC validation @field skipSizeValidation boolean? -- Whether to skip size validation ]=] type ExtractionOptions = { followSymlinks: boolean?, decompress: boolean?, type: ("binary" | "text")?, skipCrcValidation: boolean?, skipSizeValidation: boolean?, } --[=[ @within ZipReader @method extract Extracts the specified [ZipEntry] from the ZIP archive. See [ZipReader:extractDirectory] for extracting directories. @error "Cannot extract directory" -- If the entry is a directory, use [ZipReader:extractDirectory] instead @error "Invalid local file header" -- Invalid ZIP file, local header signature did not match @error "Unsupported PKZip spec version: {versionNeeded}" -- The ZIP file was created with an unsupported version of the ZIP specification @error "Symlink path not found" -- If `followSymlinks` of options is `true` and the symlink path was not found @error "Unsupported compression, ID: {compressionMethod}" -- The entry was compressed using an unsupported compression method @param entry ZipEntry -- The entry to extract @param options ExtractionOptions? -- Options for the extraction @return buffer | string -- The extracted data ]=] function ZipReader.extract(self: ZipReader, entry: ZipEntry, options: ExtractionOptions?): buffer | string -- Local File Header format: -- Offset Bytes Description -- 0 4 Local file header signature -- 4 2 Version needed to extract -- 6 2 General purpose bitflags -- 8 2 Compression method (8 = DEFLATE) -- 14 4 CRC32 checksum -- 18 4 Compressed size -- 22 4 Uncompressed size -- 26 2 File name length (n) -- 28 2 Extra field length (m) -- 30 n File name -- 30+n m Extra field -- 30+n+m - File data if entry.isDirectory then error("Cannot extract directory") end local defaultOptions: ExtractionOptions = { followSymlinks = false, decompress = true, type = if entry.isText then "text" else "binary", skipValidation = false, } -- TODO: Use a `Partial` type function for this in the future! local optionsOrDefault: { followSymlinks: boolean, decompress: boolean, type: "binary" | "text", skipCrcValidation: boolean, skipSizeValidation: boolean, } = if options then setmetatable(options, { __index = defaultOptions }) :: any else defaultOptions local pos = entry.offset if buffer.readu32(self.data, pos) ~= SIGNATURES.LOCAL_FILE then error("Invalid local file header") end -- Validate that the version needed to extract is supported local versionNeeded = buffer.readu16(self.data, pos + 4) assert(MAX_SUPPORTED_PKZIP_VERSION >= versionNeeded, `Unsupported PKZip spec version: {versionNeeded}`) local bitflags = buffer.readu16(self.data, pos + 6) local crcChecksum = buffer.readu32(self.data, pos + 14) local compressedSize = buffer.readu32(self.data, pos + 18) local uncompressedSize = buffer.readu32(self.data, pos + 22) local nameLength = buffer.readu16(self.data, pos + 26) local extraLength = buffer.readu16(self.data, pos + 28) pos = pos + 30 + nameLength + extraLength if bit32.btest(bitflags, 0x08) then -- The bit at offset 3 was set, meaning we did not have the file sizes -- and CRC checksum at the time of the creation of the ZIP. Instead, they -- were appended after the compressed data chunks in a data descriptor -- Data Descriptor format: -- Offset Bytes Description -- 0 0 or 4 0x08074b50 (optional signature) -- 0 or 4 4 CRC32 checksum -- 4 or 8 4 Compressed size -- 8 or 12 4 Uncompressed size -- Start at the compressed data local descriptorPos = pos while true do -- Try reading a u32 starting from current offset local leading = buffer.readu32(self.data, descriptorPos) if leading == SIGNATURES.DATA_DESCRIPTOR then -- If we find a data descriptor signature, that must mean -- the current offset points to the start of the descriptor break end if leading == entry.crc then -- If we find our file's CRC checksum, that means the data -- descriptor signature was omitted, so our chunk starts 4 -- bytes before descriptorPos -= 4 break end -- Skip to the next byte descriptorPos += 1 end crcChecksum = buffer.readu32(self.data, descriptorPos + 4) compressedSize = buffer.readu32(self.data, descriptorPos + 8) uncompressedSize = buffer.readu32(self.data, descriptorPos + 12) end local content = buffer.create(compressedSize) buffer.copy(content, 0, self.data, pos, compressedSize) if optionsOrDefault.decompress then local compressionMethod = buffer.readu16(self.data, entry.offset + 8) local algo = DECOMPRESSION_ROUTINES[compressionMethod] if algo == nil then error(`Unsupported compression, ID: {compressionMethod}`) end if optionsOrDefault.followSymlinks then local linkPath = buffer.tostring(algo.decompress(content, 0, { expected = 0x00000000, skip = true, })) -- Check if the path was a relative path if path.isRelative(linkPath) then if string.sub(linkPath, -1) ~= "/" then linkPath ..= "/" end linkPath = path.canonicalize(`{(entry.parent or self.root).name}{linkPath}`) end optionsOrDefault.followSymlinks = false optionsOrDefault.type = "binary" optionsOrDefault.skipCrcValidation = true optionsOrDefault.skipSizeValidation = true content = self:extract(self:findEntry(linkPath) or error("Symlink path not found"), optionsOrDefault) :: buffer end content = algo.decompress(content, uncompressedSize, { expected = crcChecksum, skip = optionsOrDefault.skipCrcValidation, }) -- Unless skipping validation is requested, we make sure the uncompressed size matches assert( optionsOrDefault.skipSizeValidation or uncompressedSize == buffer.len(content), "Validation failed; uncompressed size does not match" ) end return if optionsOrDefault.type == "text" then buffer.tostring(content) else content end --[=[ @within ZipReader @method extractDirectory Extracts all the files in a specified directory, skipping any directory entries. **Errors if [ZipReader:extract] errors on an entry in the directory.** @param path string -- The path to the directory to extract @param options ExtractionOptions? -- Options for the extraction @return { [string]: buffer } | { [string]: string } -- A map of extracted file paths and their contents ]=] function ZipReader.extractDirectory( self: ZipReader, path: string, options: ExtractionOptions? ): { [string]: buffer } | { [string]: string } local files: { [string]: buffer } | { [string]: string } = {} -- Normalize path by removing leading slash for consistent prefix matching path = string.gsub(path, "^/", "") -- Iterate through all entries to find files within target directory for _, entry in self.entries do -- Check if entry is a file (not directory) and its path starts with target directory if not entry.isDirectory and string.sub(entry.name, 1, #path) == path then -- Store extracted content mapped to full path files[entry.name] = self:extract(entry, options) end end -- Return a map of file to contents return files end --[=[ @within ZipReader @method listDirectory Lists the entries within a specified directory path. @error "Not a directory" -- If the path does not exist or is not a directory @param path string -- The path to the directory to list @return { ZipEntry } -- The list of entries in the directory ]=] function ZipReader.listDirectory(self: ZipReader, path: string): { ZipEntry } -- Locate the entry with the path local entry = self:findEntry(path) if not entry or not entry.isDirectory then -- If an entry was not found, we error error("Not a directory") end -- Return the children of our discovered entry return entry.children end --[=[ @within ZipReader @method walk Recursively walks through the ZIP file, calling the provided callback for each entry with the current entry and its depth. @param callback (entry: ZipEntry, depth: number) -> () -- The function to call for each entry ]=] function ZipReader.walk(self: ZipReader, callback: (entry: ZipEntry, depth: number) -> ()): () -- Wrapper function which recursively calls callback for every child -- in an entry local function walkEntry(entry: ZipEntry, depth: number) callback(entry, depth) for _, child in entry.children do -- ooo spooky recursion... blame this if shit go wrong walkEntry(child, depth + 1) end end walkEntry(self.root, 0) end --[=[ @interface ZipStatistics @within ZipReader @field fileCount number -- The number of files in the ZIP @field dirCount number -- The number of directories in the ZIP @field totalSize number -- The total size of all files in the ZIP ]=] export type ZipStatistics = { fileCount: number, dirCount: number, totalSize: number } --[=[ @within ZipReader @method getStats Retrieves statistics about the ZIP file. @return ZipStatistics -- The statistics about the ZIP file ]=] function ZipReader.getStats(self: ZipReader): ZipStatistics local stats: ZipStatistics = { fileCount = 0, dirCount = 0, totalSize = 0, } -- Iterate through the entries, updating stats for _, entry in self.entries do if entry.isDirectory then stats.dirCount += 1 continue end stats.fileCount += 1 stats.totalSize += entry.size end return stats end return { -- Creates a `ZipReader` from a `buffer` of ZIP data. load = function(data: buffer) return ZipReader.new(data) end, }