mirror of
https://github.com/0x5eal/luau-unzip.git
synced 2025-04-04 06:30:53 +01:00
refactor: include code comments
This commit is contained in:
parent
b3777ba2b1
commit
df134c31e0
1 changed files with 124 additions and 63 deletions
187
lib/init.luau
187
lib/init.luau
|
@ -1,26 +1,30 @@
|
|||
-- Little endian constant signatures used in the ZIP file format
|
||||
local SIGNATURES = table.freeze({
|
||||
-- Marks the beginning of each file in the ZIP
|
||||
LOCAL_FILE = 0x04034b50,
|
||||
-- Marks entries in the central directory
|
||||
CENTRAL_DIR = 0x02014b50,
|
||||
-- Marks the end of the central directory
|
||||
END_OF_CENTRAL_DIR = 0x06054b50,
|
||||
})
|
||||
|
||||
-- TODO: ERROR HANDLING !!
|
||||
-- TODO: ERROR HANDLING!!
|
||||
|
||||
local ZipEntry = {}
|
||||
export type ZipEntry = typeof(setmetatable({} :: ZipEntryInner, { __index = ZipEntry }))
|
||||
-- stylua: ignore
|
||||
type ZipEntryInner = {
|
||||
name: string,
|
||||
size: number,
|
||||
offset: number,
|
||||
timestamp: number,
|
||||
crc: number,
|
||||
isDirectory: boolean,
|
||||
parent: ZipEntry?,
|
||||
children: { ZipEntry },
|
||||
getPath: (ZipEntry) -> string,
|
||||
name: string, -- File path within ZIP, '/' suffix indicates directory
|
||||
size: number, -- Uncompressed size in bytes
|
||||
offset: number, -- Absolute position of local header in ZIP
|
||||
timestamp: number, -- MS-DOS format timestamp
|
||||
crc: number, -- CRC32 checksum of uncompressed data
|
||||
isDirectory: boolean, -- Whether the entry is a directory or not
|
||||
parent: ZipEntry?, -- The parent of the current entry, nil for root
|
||||
children: { ZipEntry }, -- The children of the entry
|
||||
}
|
||||
|
||||
function ZipEntry.new(name, size, offset, timestamp, crc): ZipEntry
|
||||
function ZipEntry.new(name: string, size: number, offset: number, timestamp: number, crc: number): ZipEntry
|
||||
return setmetatable(
|
||||
{
|
||||
name = name,
|
||||
|
@ -50,11 +54,12 @@ end
|
|||
|
||||
local ZipReader = {}
|
||||
export type ZipReader = typeof(setmetatable({} :: ZipReaderInner, { __index = ZipReader }))
|
||||
-- stylua: ignore
|
||||
type ZipReaderInner = {
|
||||
data: buffer,
|
||||
entries: { ZipEntry },
|
||||
directories: { [string]: ZipEntry },
|
||||
root: ZipEntry,
|
||||
data: buffer, -- The buffer containing the raw bytes of the ZIP
|
||||
entries: { ZipEntry }, -- The decoded entries present
|
||||
directories: { [string]: ZipEntry }, -- The directories and their respective entries
|
||||
root: ZipEntry, -- The entry of the root directory
|
||||
}
|
||||
|
||||
function ZipReader.new(data): ZipReader
|
||||
|
@ -77,28 +82,42 @@ function ZipReader.new(data): ZipReader
|
|||
end
|
||||
|
||||
function ZipReader.parseCentralDirectory(self: ZipReader): ()
|
||||
-- ZIP files are read from the end, starting with the End of Central Directory record
|
||||
-- The EoCD is at least 22 bytes and contains pointers to the rest of the ZIP structure
|
||||
local bufSize = buffer.len(self.data)
|
||||
local pos = bufSize - 22
|
||||
|
||||
-- Search backwards for the EoCD signature
|
||||
while pos > 0 do
|
||||
-- Read 4 bytes as uint32 in little-endian format
|
||||
if buffer.readu32(self.data, pos) == SIGNATURES.END_OF_CENTRAL_DIR then
|
||||
break
|
||||
end
|
||||
pos = pos - 1
|
||||
end
|
||||
|
||||
if pos < 0 then
|
||||
error("Invalid ZIP file: End of Central Directory not found")
|
||||
end
|
||||
|
||||
-- Central Directory offset is stored 16 bytes into the EoCD record
|
||||
local cdOffset = buffer.readu32(self.data, pos + 16)
|
||||
-- Number of entries is stored 10 bytes into the EoCD record
|
||||
local cdEntries = buffer.readu16(self.data, pos + 10)
|
||||
|
||||
-- Process each entry in the Central Directory
|
||||
pos = cdOffset
|
||||
for i = 1, cdEntries do
|
||||
if buffer.readu32(self.data, pos) ~= SIGNATURES.CENTRAL_DIR then
|
||||
error("Invalid central directory header")
|
||||
end
|
||||
-- Central Directory Entry format:
|
||||
-- Offset Bytes Description
|
||||
-- ------------------------------------------------
|
||||
-- 0 4 Central directory entry signature
|
||||
-- 28 2 File name length (n)
|
||||
-- 30 2 Extra field length (m)
|
||||
-- 32 2 Comment length (k)
|
||||
-- 12 4 Last mod time/date
|
||||
-- 16 4 CRC-32
|
||||
-- 24 4 Uncompressed size
|
||||
-- 42 4 Local header offset
|
||||
-- 46 n File name
|
||||
-- 46+n m Extra field
|
||||
-- 46+n+m k Comment
|
||||
|
||||
local nameLength = buffer.readu16(self.data, pos + 28)
|
||||
local extraLength = buffer.readu16(self.data, pos + 30)
|
||||
|
@ -121,52 +140,81 @@ end
|
|||
|
||||
function ZipReader.buildDirectoryTree(self: ZipReader): ()
|
||||
for _, entry in self.entries do
|
||||
local parts = {}
|
||||
for part in string.gmatch(entry.name, "([^/]+)/?") do
|
||||
table.insert(parts, part)
|
||||
end
|
||||
local parts = {}
|
||||
-- Split entry path into individual components
|
||||
-- e.g. "folder/subfolder/file.txt" -> {"folder", "subfolder", "file.txt"}
|
||||
for part in string.gmatch(entry.name, "([^/]+)/?") do
|
||||
table.insert(parts, part)
|
||||
end
|
||||
|
||||
local current = self.root
|
||||
local path = ""
|
||||
-- Start from root directory
|
||||
local current = self.root
|
||||
local path = ""
|
||||
|
||||
for i, part in parts do
|
||||
path ..= part
|
||||
if i < #parts then
|
||||
if not self.directories[path] then
|
||||
local dir = ZipEntry.new(path, 0, 0, entry.timestamp, 0)
|
||||
dir.isDirectory = true
|
||||
dir.parent = current
|
||||
-- Process each path component
|
||||
for i, part in parts do
|
||||
path ..= part
|
||||
if i < #parts then
|
||||
-- Create missing directory entries for intermediate paths
|
||||
if not self.directories[path] then
|
||||
local dir = ZipEntry.new(path, 0, 0, entry.timestamp, 0)
|
||||
dir.isDirectory = true
|
||||
dir.parent = current
|
||||
|
||||
self.directories[path] = dir
|
||||
table.insert(current.children, dir)
|
||||
end
|
||||
-- Track directory in both lookup table and parent's children
|
||||
self.directories[path] = dir
|
||||
table.insert(current.children, dir)
|
||||
end
|
||||
|
||||
current = self.directories[path]
|
||||
continue
|
||||
end
|
||||
-- Move deeper into the tree
|
||||
current = self.directories[path]
|
||||
continue
|
||||
end
|
||||
|
||||
entry.parent = current
|
||||
table.insert(current.children, entry)
|
||||
end
|
||||
end
|
||||
-- Link file entry to its parent directory
|
||||
entry.parent = current
|
||||
table.insert(current.children, entry)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
function ZipReader.findEntry(self: ZipReader, path: string): ZipEntry
|
||||
if path == "/" then
|
||||
if path == "/" then
|
||||
-- If the root directory's entry was requested we do not
|
||||
-- need to do any additional work
|
||||
return self.root
|
||||
end
|
||||
end
|
||||
|
||||
path = string.gsub(path, "^/", ""):gsub("/$", "")
|
||||
for _, entry in self.entries do
|
||||
if string.gsub(entry.name, "/$", "") == path then
|
||||
return entry
|
||||
end
|
||||
end
|
||||
-- Normalize path by removing leading and trailing slashes
|
||||
-- This ensures consistent lookup regardless of input format
|
||||
-- e.g., "/folder/file.txt/" -> "folder/file.txt"
|
||||
path = string.gsub(path, "^/", ""):gsub("/$", "")
|
||||
|
||||
return self.directories[path]
|
||||
-- First check regular files and explicit directories
|
||||
for _, entry in self.entries do
|
||||
-- Compare normalized paths
|
||||
if string.gsub(entry.name, "/$", "") == path then
|
||||
return entry
|
||||
end
|
||||
end
|
||||
|
||||
-- If not found, check virtual directory entries
|
||||
-- These are directories that were created implicitly
|
||||
return self.directories[path]
|
||||
end
|
||||
|
||||
|
||||
function ZipReader.extract(self: ZipReader, entry: ZipEntry): buffer
|
||||
-- Local File Header format:
|
||||
-- Offset Bytes Description
|
||||
-- 0 4 Local file header signature
|
||||
-- 8 2 Compression method (8 = DEFLATE)
|
||||
-- 26 2 File name length (n)
|
||||
-- 28 2 Extra field length (m)
|
||||
-- 30 n File name
|
||||
-- 30+n m Extra field
|
||||
-- 30+n+m - File data
|
||||
|
||||
if entry.isDirectory then
|
||||
error("Cannot extract directory")
|
||||
end
|
||||
|
@ -192,28 +240,39 @@ function ZipReader.extract(self: ZipReader, entry: ZipEntry): buffer
|
|||
end
|
||||
|
||||
function ZipReader.extractDirectory(self: ZipReader, path: string): { [string]: buffer }
|
||||
local files = {}
|
||||
path = string.gsub(path, "^/", "")
|
||||
local files = {}
|
||||
-- Normalize path by removing leading slash for consistent prefix matching
|
||||
path = string.gsub(path, "^/", "")
|
||||
|
||||
for _, entry in self.entries do
|
||||
if not entry.isDirectory and string.sub(entry.name, 1, #path) == path then
|
||||
files[entry.name] = self:extract(entry)
|
||||
end
|
||||
end
|
||||
-- Iterate through all entries to find files within target directory
|
||||
for _, entry in self.entries do
|
||||
-- Check if entry is a file (not directory) and its path starts with target directory
|
||||
if not entry.isDirectory and string.sub(entry.name, 1, #path) == path then
|
||||
-- Store extracted content mapped to full path
|
||||
files[entry.name] = self:extract(entry)
|
||||
end
|
||||
end
|
||||
|
||||
return files
|
||||
-- Return a map of file to contents
|
||||
return files
|
||||
end
|
||||
|
||||
|
||||
function ZipReader.listDirectory(self: ZipReader, path: string): { ZipEntry }
|
||||
-- Locate the entry with the path
|
||||
local entry = self:findEntry(path)
|
||||
if not entry or not entry.isDirectory then
|
||||
-- If an entry was not found, we error
|
||||
error("Not a directory")
|
||||
end
|
||||
|
||||
-- Return the children of our discovered entry
|
||||
return entry.children
|
||||
end
|
||||
|
||||
function ZipReader.walk(self: ZipReader, callback: (entry: ZipEntry, depth: number) -> ()): ()
|
||||
-- Wrapper function which recursively calls callback for every child
|
||||
-- in an entry
|
||||
local function walkEntry(entry: ZipEntry, depth: number)
|
||||
callback(entry, depth)
|
||||
|
||||
|
@ -234,6 +293,7 @@ function ZipReader.getStats(self: ZipReader): ZipStatistics
|
|||
totalSize = 0,
|
||||
}
|
||||
|
||||
-- Iterate through the entries, updating stats
|
||||
for _, entry in self.entries do
|
||||
if entry.isDirectory then
|
||||
stats.dirCount = stats.dirCount + 1
|
||||
|
@ -248,7 +308,8 @@ function ZipReader.getStats(self: ZipReader): ZipStatistics
|
|||
end
|
||||
|
||||
return {
|
||||
load = function(data)
|
||||
-- Creates a `ZipReader` from a `buffer` of ZIP data.
|
||||
load = function(data: buffer)
|
||||
return ZipReader.new(data)
|
||||
end,
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue