refactor: include code comments

This commit is contained in:
Erica Marigold 2024-12-28 18:01:17 +00:00
parent b3777ba2b1
commit df134c31e0
Signed by: DevComp
GPG key ID: 429EF1C337871656

View file

@ -1,26 +1,30 @@
-- Little endian constant signatures used in the ZIP file format
local SIGNATURES = table.freeze({
-- Marks the beginning of each file in the ZIP
LOCAL_FILE = 0x04034b50,
-- Marks entries in the central directory
CENTRAL_DIR = 0x02014b50,
-- Marks the end of the central directory
END_OF_CENTRAL_DIR = 0x06054b50,
})
-- TODO: ERROR HANDLING !!
-- TODO: ERROR HANDLING!!
local ZipEntry = {}
export type ZipEntry = typeof(setmetatable({} :: ZipEntryInner, { __index = ZipEntry }))
-- stylua: ignore
type ZipEntryInner = {
name: string,
size: number,
offset: number,
timestamp: number,
crc: number,
isDirectory: boolean,
parent: ZipEntry?,
children: { ZipEntry },
getPath: (ZipEntry) -> string,
name: string, -- File path within ZIP, '/' suffix indicates directory
size: number, -- Uncompressed size in bytes
offset: number, -- Absolute position of local header in ZIP
timestamp: number, -- MS-DOS format timestamp
crc: number, -- CRC32 checksum of uncompressed data
isDirectory: boolean, -- Whether the entry is a directory or not
parent: ZipEntry?, -- The parent of the current entry, nil for root
children: { ZipEntry }, -- The children of the entry
}
function ZipEntry.new(name, size, offset, timestamp, crc): ZipEntry
function ZipEntry.new(name: string, size: number, offset: number, timestamp: number, crc: number): ZipEntry
return setmetatable(
{
name = name,
@ -50,11 +54,12 @@ end
local ZipReader = {}
export type ZipReader = typeof(setmetatable({} :: ZipReaderInner, { __index = ZipReader }))
-- stylua: ignore
type ZipReaderInner = {
data: buffer,
entries: { ZipEntry },
directories: { [string]: ZipEntry },
root: ZipEntry,
data: buffer, -- The buffer containing the raw bytes of the ZIP
entries: { ZipEntry }, -- The decoded entries present
directories: { [string]: ZipEntry }, -- The directories and their respective entries
root: ZipEntry, -- The entry of the root directory
}
function ZipReader.new(data): ZipReader
@ -77,28 +82,42 @@ function ZipReader.new(data): ZipReader
end
function ZipReader.parseCentralDirectory(self: ZipReader): ()
-- ZIP files are read from the end, starting with the End of Central Directory record
-- The EoCD is at least 22 bytes and contains pointers to the rest of the ZIP structure
local bufSize = buffer.len(self.data)
local pos = bufSize - 22
-- Search backwards for the EoCD signature
while pos > 0 do
-- Read 4 bytes as uint32 in little-endian format
if buffer.readu32(self.data, pos) == SIGNATURES.END_OF_CENTRAL_DIR then
break
end
pos = pos - 1
end
if pos < 0 then
error("Invalid ZIP file: End of Central Directory not found")
end
-- Central Directory offset is stored 16 bytes into the EoCD record
local cdOffset = buffer.readu32(self.data, pos + 16)
-- Number of entries is stored 10 bytes into the EoCD record
local cdEntries = buffer.readu16(self.data, pos + 10)
-- Process each entry in the Central Directory
pos = cdOffset
for i = 1, cdEntries do
if buffer.readu32(self.data, pos) ~= SIGNATURES.CENTRAL_DIR then
error("Invalid central directory header")
end
-- Central Directory Entry format:
-- Offset Bytes Description
-- ------------------------------------------------
-- 0 4 Central directory entry signature
-- 28 2 File name length (n)
-- 30 2 Extra field length (m)
-- 32 2 Comment length (k)
-- 12 4 Last mod time/date
-- 16 4 CRC-32
-- 24 4 Uncompressed size
-- 42 4 Local header offset
-- 46 n File name
-- 46+n m Extra field
-- 46+n+m k Comment
local nameLength = buffer.readu16(self.data, pos + 28)
local extraLength = buffer.readu16(self.data, pos + 30)
@ -121,52 +140,81 @@ end
function ZipReader.buildDirectoryTree(self: ZipReader): ()
for _, entry in self.entries do
local parts = {}
for part in string.gmatch(entry.name, "([^/]+)/?") do
table.insert(parts, part)
end
local parts = {}
-- Split entry path into individual components
-- e.g. "folder/subfolder/file.txt" -> {"folder", "subfolder", "file.txt"}
for part in string.gmatch(entry.name, "([^/]+)/?") do
table.insert(parts, part)
end
local current = self.root
local path = ""
-- Start from root directory
local current = self.root
local path = ""
for i, part in parts do
path ..= part
if i < #parts then
if not self.directories[path] then
local dir = ZipEntry.new(path, 0, 0, entry.timestamp, 0)
dir.isDirectory = true
dir.parent = current
-- Process each path component
for i, part in parts do
path ..= part
if i < #parts then
-- Create missing directory entries for intermediate paths
if not self.directories[path] then
local dir = ZipEntry.new(path, 0, 0, entry.timestamp, 0)
dir.isDirectory = true
dir.parent = current
self.directories[path] = dir
table.insert(current.children, dir)
end
-- Track directory in both lookup table and parent's children
self.directories[path] = dir
table.insert(current.children, dir)
end
current = self.directories[path]
continue
end
-- Move deeper into the tree
current = self.directories[path]
continue
end
entry.parent = current
table.insert(current.children, entry)
end
end
-- Link file entry to its parent directory
entry.parent = current
table.insert(current.children, entry)
end
end
end
function ZipReader.findEntry(self: ZipReader, path: string): ZipEntry
if path == "/" then
if path == "/" then
-- If the root directory's entry was requested we do not
-- need to do any additional work
return self.root
end
end
path = string.gsub(path, "^/", ""):gsub("/$", "")
for _, entry in self.entries do
if string.gsub(entry.name, "/$", "") == path then
return entry
end
end
-- Normalize path by removing leading and trailing slashes
-- This ensures consistent lookup regardless of input format
-- e.g., "/folder/file.txt/" -> "folder/file.txt"
path = string.gsub(path, "^/", ""):gsub("/$", "")
return self.directories[path]
-- First check regular files and explicit directories
for _, entry in self.entries do
-- Compare normalized paths
if string.gsub(entry.name, "/$", "") == path then
return entry
end
end
-- If not found, check virtual directory entries
-- These are directories that were created implicitly
return self.directories[path]
end
function ZipReader.extract(self: ZipReader, entry: ZipEntry): buffer
-- Local File Header format:
-- Offset Bytes Description
-- 0 4 Local file header signature
-- 8 2 Compression method (8 = DEFLATE)
-- 26 2 File name length (n)
-- 28 2 Extra field length (m)
-- 30 n File name
-- 30+n m Extra field
-- 30+n+m - File data
if entry.isDirectory then
error("Cannot extract directory")
end
@ -192,28 +240,39 @@ function ZipReader.extract(self: ZipReader, entry: ZipEntry): buffer
end
function ZipReader.extractDirectory(self: ZipReader, path: string): { [string]: buffer }
local files = {}
path = string.gsub(path, "^/", "")
local files = {}
-- Normalize path by removing leading slash for consistent prefix matching
path = string.gsub(path, "^/", "")
for _, entry in self.entries do
if not entry.isDirectory and string.sub(entry.name, 1, #path) == path then
files[entry.name] = self:extract(entry)
end
end
-- Iterate through all entries to find files within target directory
for _, entry in self.entries do
-- Check if entry is a file (not directory) and its path starts with target directory
if not entry.isDirectory and string.sub(entry.name, 1, #path) == path then
-- Store extracted content mapped to full path
files[entry.name] = self:extract(entry)
end
end
return files
-- Return a map of file to contents
return files
end
function ZipReader.listDirectory(self: ZipReader, path: string): { ZipEntry }
-- Locate the entry with the path
local entry = self:findEntry(path)
if not entry or not entry.isDirectory then
-- If an entry was not found, we error
error("Not a directory")
end
-- Return the children of our discovered entry
return entry.children
end
function ZipReader.walk(self: ZipReader, callback: (entry: ZipEntry, depth: number) -> ()): ()
-- Wrapper function which recursively calls callback for every child
-- in an entry
local function walkEntry(entry: ZipEntry, depth: number)
callback(entry, depth)
@ -234,6 +293,7 @@ function ZipReader.getStats(self: ZipReader): ZipStatistics
totalSize = 0,
}
-- Iterate through the entries, updating stats
for _, entry in self.entries do
if entry.isDirectory then
stats.dirCount = stats.dirCount + 1
@ -248,7 +308,8 @@ function ZipReader.getStats(self: ZipReader): ZipStatistics
end
return {
load = function(data)
-- Creates a `ZipReader` from a `buffer` of ZIP data.
load = function(data: buffer)
return ZipReader.new(data)
end,
}