From df134c31e068babf3ff2bd2158fe6156d61efdf7 Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Sat, 28 Dec 2024 18:01:17 +0000 Subject: [PATCH] refactor: include code comments --- lib/init.luau | 187 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 124 insertions(+), 63 deletions(-) diff --git a/lib/init.luau b/lib/init.luau index 36cc7f7..d00898e 100644 --- a/lib/init.luau +++ b/lib/init.luau @@ -1,26 +1,30 @@ +-- Little endian constant signatures used in the ZIP file format local SIGNATURES = table.freeze({ + -- Marks the beginning of each file in the ZIP LOCAL_FILE = 0x04034b50, + -- Marks entries in the central directory CENTRAL_DIR = 0x02014b50, + -- Marks the end of the central directory END_OF_CENTRAL_DIR = 0x06054b50, }) --- TODO: ERROR HANDLING !! +-- TODO: ERROR HANDLING!! local ZipEntry = {} export type ZipEntry = typeof(setmetatable({} :: ZipEntryInner, { __index = ZipEntry })) +-- stylua: ignore type ZipEntryInner = { - name: string, - size: number, - offset: number, - timestamp: number, - crc: number, - isDirectory: boolean, - parent: ZipEntry?, - children: { ZipEntry }, - getPath: (ZipEntry) -> string, + name: string, -- File path within ZIP, '/' suffix indicates directory + size: number, -- Uncompressed size in bytes + offset: number, -- Absolute position of local header in ZIP + timestamp: number, -- MS-DOS format timestamp + crc: number, -- CRC32 checksum of uncompressed data + isDirectory: boolean, -- Whether the entry is a directory or not + parent: ZipEntry?, -- The parent of the current entry, nil for root + children: { ZipEntry }, -- The children of the entry } -function ZipEntry.new(name, size, offset, timestamp, crc): ZipEntry +function ZipEntry.new(name: string, size: number, offset: number, timestamp: number, crc: number): ZipEntry return setmetatable( { name = name, @@ -50,11 +54,12 @@ end local ZipReader = {} export type ZipReader = typeof(setmetatable({} :: ZipReaderInner, { __index = ZipReader })) +-- stylua: ignore type ZipReaderInner = { - data: buffer, - entries: { ZipEntry }, - directories: { [string]: ZipEntry }, - root: ZipEntry, + data: buffer, -- The buffer containing the raw bytes of the ZIP + entries: { ZipEntry }, -- The decoded entries present + directories: { [string]: ZipEntry }, -- The directories and their respective entries + root: ZipEntry, -- The entry of the root directory } function ZipReader.new(data): ZipReader @@ -77,28 +82,42 @@ function ZipReader.new(data): ZipReader end function ZipReader.parseCentralDirectory(self: ZipReader): () + -- ZIP files are read from the end, starting with the End of Central Directory record + -- The EoCD is at least 22 bytes and contains pointers to the rest of the ZIP structure local bufSize = buffer.len(self.data) local pos = bufSize - 22 + -- Search backwards for the EoCD signature while pos > 0 do + -- Read 4 bytes as uint32 in little-endian format if buffer.readu32(self.data, pos) == SIGNATURES.END_OF_CENTRAL_DIR then break end pos = pos - 1 end - if pos < 0 then - error("Invalid ZIP file: End of Central Directory not found") - end - + -- Central Directory offset is stored 16 bytes into the EoCD record local cdOffset = buffer.readu32(self.data, pos + 16) + -- Number of entries is stored 10 bytes into the EoCD record local cdEntries = buffer.readu16(self.data, pos + 10) + -- Process each entry in the Central Directory pos = cdOffset for i = 1, cdEntries do - if buffer.readu32(self.data, pos) ~= SIGNATURES.CENTRAL_DIR then - error("Invalid central directory header") - end + -- Central Directory Entry format: + -- Offset Bytes Description + -- ------------------------------------------------ + -- 0 4 Central directory entry signature + -- 28 2 File name length (n) + -- 30 2 Extra field length (m) + -- 32 2 Comment length (k) + -- 12 4 Last mod time/date + -- 16 4 CRC-32 + -- 24 4 Uncompressed size + -- 42 4 Local header offset + -- 46 n File name + -- 46+n m Extra field + -- 46+n+m k Comment local nameLength = buffer.readu16(self.data, pos + 28) local extraLength = buffer.readu16(self.data, pos + 30) @@ -121,52 +140,81 @@ end function ZipReader.buildDirectoryTree(self: ZipReader): () for _, entry in self.entries do - local parts = {} - for part in string.gmatch(entry.name, "([^/]+)/?") do - table.insert(parts, part) - end + local parts = {} + -- Split entry path into individual components + -- e.g. "folder/subfolder/file.txt" -> {"folder", "subfolder", "file.txt"} + for part in string.gmatch(entry.name, "([^/]+)/?") do + table.insert(parts, part) + end - local current = self.root - local path = "" + -- Start from root directory + local current = self.root + local path = "" - for i, part in parts do - path ..= part - if i < #parts then - if not self.directories[path] then - local dir = ZipEntry.new(path, 0, 0, entry.timestamp, 0) - dir.isDirectory = true - dir.parent = current + -- Process each path component + for i, part in parts do + path ..= part + if i < #parts then + -- Create missing directory entries for intermediate paths + if not self.directories[path] then + local dir = ZipEntry.new(path, 0, 0, entry.timestamp, 0) + dir.isDirectory = true + dir.parent = current - self.directories[path] = dir - table.insert(current.children, dir) - end + -- Track directory in both lookup table and parent's children + self.directories[path] = dir + table.insert(current.children, dir) + end - current = self.directories[path] - continue - end + -- Move deeper into the tree + current = self.directories[path] + continue + end - entry.parent = current - table.insert(current.children, entry) - end - end + -- Link file entry to its parent directory + entry.parent = current + table.insert(current.children, entry) + end + end end function ZipReader.findEntry(self: ZipReader, path: string): ZipEntry - if path == "/" then + if path == "/" then + -- If the root directory's entry was requested we do not + -- need to do any additional work return self.root - end + end - path = string.gsub(path, "^/", ""):gsub("/$", "") - for _, entry in self.entries do - if string.gsub(entry.name, "/$", "") == path then - return entry - end - end + -- Normalize path by removing leading and trailing slashes + -- This ensures consistent lookup regardless of input format + -- e.g., "/folder/file.txt/" -> "folder/file.txt" + path = string.gsub(path, "^/", ""):gsub("/$", "") - return self.directories[path] + -- First check regular files and explicit directories + for _, entry in self.entries do + -- Compare normalized paths + if string.gsub(entry.name, "/$", "") == path then + return entry + end + end + + -- If not found, check virtual directory entries + -- These are directories that were created implicitly + return self.directories[path] end + function ZipReader.extract(self: ZipReader, entry: ZipEntry): buffer + -- Local File Header format: + -- Offset Bytes Description + -- 0 4 Local file header signature + -- 8 2 Compression method (8 = DEFLATE) + -- 26 2 File name length (n) + -- 28 2 Extra field length (m) + -- 30 n File name + -- 30+n m Extra field + -- 30+n+m - File data + if entry.isDirectory then error("Cannot extract directory") end @@ -192,28 +240,39 @@ function ZipReader.extract(self: ZipReader, entry: ZipEntry): buffer end function ZipReader.extractDirectory(self: ZipReader, path: string): { [string]: buffer } - local files = {} - path = string.gsub(path, "^/", "") + local files = {} + -- Normalize path by removing leading slash for consistent prefix matching + path = string.gsub(path, "^/", "") - for _, entry in self.entries do - if not entry.isDirectory and string.sub(entry.name, 1, #path) == path then - files[entry.name] = self:extract(entry) - end - end + -- Iterate through all entries to find files within target directory + for _, entry in self.entries do + -- Check if entry is a file (not directory) and its path starts with target directory + if not entry.isDirectory and string.sub(entry.name, 1, #path) == path then + -- Store extracted content mapped to full path + files[entry.name] = self:extract(entry) + end + end - return files + -- Return a map of file to contents + return files end + function ZipReader.listDirectory(self: ZipReader, path: string): { ZipEntry } + -- Locate the entry with the path local entry = self:findEntry(path) if not entry or not entry.isDirectory then + -- If an entry was not found, we error error("Not a directory") end + -- Return the children of our discovered entry return entry.children end function ZipReader.walk(self: ZipReader, callback: (entry: ZipEntry, depth: number) -> ()): () + -- Wrapper function which recursively calls callback for every child + -- in an entry local function walkEntry(entry: ZipEntry, depth: number) callback(entry, depth) @@ -234,6 +293,7 @@ function ZipReader.getStats(self: ZipReader): ZipStatistics totalSize = 0, } + -- Iterate through the entries, updating stats for _, entry in self.entries do if entry.isDirectory then stats.dirCount = stats.dirCount + 1 @@ -248,7 +308,8 @@ function ZipReader.getStats(self: ZipReader): ZipStatistics end return { - load = function(data) + -- Creates a `ZipReader` from a `buffer` of ZIP data. + load = function(data: buffer) return ZipReader.new(data) end, }