mirror of
https://github.com/0x5eal/luau-unzip.git
synced 2025-04-10 17:20:53 +01:00
refactor: include code comments
This commit is contained in:
parent
b3777ba2b1
commit
df134c31e0
1 changed files with 124 additions and 63 deletions
187
lib/init.luau
187
lib/init.luau
|
@ -1,26 +1,30 @@
|
||||||
|
-- Little endian constant signatures used in the ZIP file format
|
||||||
local SIGNATURES = table.freeze({
|
local SIGNATURES = table.freeze({
|
||||||
|
-- Marks the beginning of each file in the ZIP
|
||||||
LOCAL_FILE = 0x04034b50,
|
LOCAL_FILE = 0x04034b50,
|
||||||
|
-- Marks entries in the central directory
|
||||||
CENTRAL_DIR = 0x02014b50,
|
CENTRAL_DIR = 0x02014b50,
|
||||||
|
-- Marks the end of the central directory
|
||||||
END_OF_CENTRAL_DIR = 0x06054b50,
|
END_OF_CENTRAL_DIR = 0x06054b50,
|
||||||
})
|
})
|
||||||
|
|
||||||
-- TODO: ERROR HANDLING !!
|
-- TODO: ERROR HANDLING!!
|
||||||
|
|
||||||
local ZipEntry = {}
|
local ZipEntry = {}
|
||||||
export type ZipEntry = typeof(setmetatable({} :: ZipEntryInner, { __index = ZipEntry }))
|
export type ZipEntry = typeof(setmetatable({} :: ZipEntryInner, { __index = ZipEntry }))
|
||||||
|
-- stylua: ignore
|
||||||
type ZipEntryInner = {
|
type ZipEntryInner = {
|
||||||
name: string,
|
name: string, -- File path within ZIP, '/' suffix indicates directory
|
||||||
size: number,
|
size: number, -- Uncompressed size in bytes
|
||||||
offset: number,
|
offset: number, -- Absolute position of local header in ZIP
|
||||||
timestamp: number,
|
timestamp: number, -- MS-DOS format timestamp
|
||||||
crc: number,
|
crc: number, -- CRC32 checksum of uncompressed data
|
||||||
isDirectory: boolean,
|
isDirectory: boolean, -- Whether the entry is a directory or not
|
||||||
parent: ZipEntry?,
|
parent: ZipEntry?, -- The parent of the current entry, nil for root
|
||||||
children: { ZipEntry },
|
children: { ZipEntry }, -- The children of the entry
|
||||||
getPath: (ZipEntry) -> string,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function ZipEntry.new(name, size, offset, timestamp, crc): ZipEntry
|
function ZipEntry.new(name: string, size: number, offset: number, timestamp: number, crc: number): ZipEntry
|
||||||
return setmetatable(
|
return setmetatable(
|
||||||
{
|
{
|
||||||
name = name,
|
name = name,
|
||||||
|
@ -50,11 +54,12 @@ end
|
||||||
|
|
||||||
local ZipReader = {}
|
local ZipReader = {}
|
||||||
export type ZipReader = typeof(setmetatable({} :: ZipReaderInner, { __index = ZipReader }))
|
export type ZipReader = typeof(setmetatable({} :: ZipReaderInner, { __index = ZipReader }))
|
||||||
|
-- stylua: ignore
|
||||||
type ZipReaderInner = {
|
type ZipReaderInner = {
|
||||||
data: buffer,
|
data: buffer, -- The buffer containing the raw bytes of the ZIP
|
||||||
entries: { ZipEntry },
|
entries: { ZipEntry }, -- The decoded entries present
|
||||||
directories: { [string]: ZipEntry },
|
directories: { [string]: ZipEntry }, -- The directories and their respective entries
|
||||||
root: ZipEntry,
|
root: ZipEntry, -- The entry of the root directory
|
||||||
}
|
}
|
||||||
|
|
||||||
function ZipReader.new(data): ZipReader
|
function ZipReader.new(data): ZipReader
|
||||||
|
@ -77,28 +82,42 @@ function ZipReader.new(data): ZipReader
|
||||||
end
|
end
|
||||||
|
|
||||||
function ZipReader.parseCentralDirectory(self: ZipReader): ()
|
function ZipReader.parseCentralDirectory(self: ZipReader): ()
|
||||||
|
-- ZIP files are read from the end, starting with the End of Central Directory record
|
||||||
|
-- The EoCD is at least 22 bytes and contains pointers to the rest of the ZIP structure
|
||||||
local bufSize = buffer.len(self.data)
|
local bufSize = buffer.len(self.data)
|
||||||
local pos = bufSize - 22
|
local pos = bufSize - 22
|
||||||
|
|
||||||
|
-- Search backwards for the EoCD signature
|
||||||
while pos > 0 do
|
while pos > 0 do
|
||||||
|
-- Read 4 bytes as uint32 in little-endian format
|
||||||
if buffer.readu32(self.data, pos) == SIGNATURES.END_OF_CENTRAL_DIR then
|
if buffer.readu32(self.data, pos) == SIGNATURES.END_OF_CENTRAL_DIR then
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
pos = pos - 1
|
pos = pos - 1
|
||||||
end
|
end
|
||||||
|
|
||||||
if pos < 0 then
|
-- Central Directory offset is stored 16 bytes into the EoCD record
|
||||||
error("Invalid ZIP file: End of Central Directory not found")
|
|
||||||
end
|
|
||||||
|
|
||||||
local cdOffset = buffer.readu32(self.data, pos + 16)
|
local cdOffset = buffer.readu32(self.data, pos + 16)
|
||||||
|
-- Number of entries is stored 10 bytes into the EoCD record
|
||||||
local cdEntries = buffer.readu16(self.data, pos + 10)
|
local cdEntries = buffer.readu16(self.data, pos + 10)
|
||||||
|
|
||||||
|
-- Process each entry in the Central Directory
|
||||||
pos = cdOffset
|
pos = cdOffset
|
||||||
for i = 1, cdEntries do
|
for i = 1, cdEntries do
|
||||||
if buffer.readu32(self.data, pos) ~= SIGNATURES.CENTRAL_DIR then
|
-- Central Directory Entry format:
|
||||||
error("Invalid central directory header")
|
-- Offset Bytes Description
|
||||||
end
|
-- ------------------------------------------------
|
||||||
|
-- 0 4 Central directory entry signature
|
||||||
|
-- 28 2 File name length (n)
|
||||||
|
-- 30 2 Extra field length (m)
|
||||||
|
-- 32 2 Comment length (k)
|
||||||
|
-- 12 4 Last mod time/date
|
||||||
|
-- 16 4 CRC-32
|
||||||
|
-- 24 4 Uncompressed size
|
||||||
|
-- 42 4 Local header offset
|
||||||
|
-- 46 n File name
|
||||||
|
-- 46+n m Extra field
|
||||||
|
-- 46+n+m k Comment
|
||||||
|
|
||||||
local nameLength = buffer.readu16(self.data, pos + 28)
|
local nameLength = buffer.readu16(self.data, pos + 28)
|
||||||
local extraLength = buffer.readu16(self.data, pos + 30)
|
local extraLength = buffer.readu16(self.data, pos + 30)
|
||||||
|
@ -121,52 +140,81 @@ end
|
||||||
|
|
||||||
function ZipReader.buildDirectoryTree(self: ZipReader): ()
|
function ZipReader.buildDirectoryTree(self: ZipReader): ()
|
||||||
for _, entry in self.entries do
|
for _, entry in self.entries do
|
||||||
local parts = {}
|
local parts = {}
|
||||||
for part in string.gmatch(entry.name, "([^/]+)/?") do
|
-- Split entry path into individual components
|
||||||
table.insert(parts, part)
|
-- e.g. "folder/subfolder/file.txt" -> {"folder", "subfolder", "file.txt"}
|
||||||
end
|
for part in string.gmatch(entry.name, "([^/]+)/?") do
|
||||||
|
table.insert(parts, part)
|
||||||
|
end
|
||||||
|
|
||||||
local current = self.root
|
-- Start from root directory
|
||||||
local path = ""
|
local current = self.root
|
||||||
|
local path = ""
|
||||||
|
|
||||||
for i, part in parts do
|
-- Process each path component
|
||||||
path ..= part
|
for i, part in parts do
|
||||||
if i < #parts then
|
path ..= part
|
||||||
if not self.directories[path] then
|
if i < #parts then
|
||||||
local dir = ZipEntry.new(path, 0, 0, entry.timestamp, 0)
|
-- Create missing directory entries for intermediate paths
|
||||||
dir.isDirectory = true
|
if not self.directories[path] then
|
||||||
dir.parent = current
|
local dir = ZipEntry.new(path, 0, 0, entry.timestamp, 0)
|
||||||
|
dir.isDirectory = true
|
||||||
|
dir.parent = current
|
||||||
|
|
||||||
self.directories[path] = dir
|
-- Track directory in both lookup table and parent's children
|
||||||
table.insert(current.children, dir)
|
self.directories[path] = dir
|
||||||
end
|
table.insert(current.children, dir)
|
||||||
|
end
|
||||||
|
|
||||||
current = self.directories[path]
|
-- Move deeper into the tree
|
||||||
continue
|
current = self.directories[path]
|
||||||
end
|
continue
|
||||||
|
end
|
||||||
|
|
||||||
entry.parent = current
|
-- Link file entry to its parent directory
|
||||||
table.insert(current.children, entry)
|
entry.parent = current
|
||||||
end
|
table.insert(current.children, entry)
|
||||||
end
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
function ZipReader.findEntry(self: ZipReader, path: string): ZipEntry
|
function ZipReader.findEntry(self: ZipReader, path: string): ZipEntry
|
||||||
if path == "/" then
|
if path == "/" then
|
||||||
|
-- If the root directory's entry was requested we do not
|
||||||
|
-- need to do any additional work
|
||||||
return self.root
|
return self.root
|
||||||
end
|
end
|
||||||
|
|
||||||
path = string.gsub(path, "^/", ""):gsub("/$", "")
|
-- Normalize path by removing leading and trailing slashes
|
||||||
for _, entry in self.entries do
|
-- This ensures consistent lookup regardless of input format
|
||||||
if string.gsub(entry.name, "/$", "") == path then
|
-- e.g., "/folder/file.txt/" -> "folder/file.txt"
|
||||||
return entry
|
path = string.gsub(path, "^/", ""):gsub("/$", "")
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return self.directories[path]
|
-- First check regular files and explicit directories
|
||||||
|
for _, entry in self.entries do
|
||||||
|
-- Compare normalized paths
|
||||||
|
if string.gsub(entry.name, "/$", "") == path then
|
||||||
|
return entry
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- If not found, check virtual directory entries
|
||||||
|
-- These are directories that were created implicitly
|
||||||
|
return self.directories[path]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
function ZipReader.extract(self: ZipReader, entry: ZipEntry): buffer
|
function ZipReader.extract(self: ZipReader, entry: ZipEntry): buffer
|
||||||
|
-- Local File Header format:
|
||||||
|
-- Offset Bytes Description
|
||||||
|
-- 0 4 Local file header signature
|
||||||
|
-- 8 2 Compression method (8 = DEFLATE)
|
||||||
|
-- 26 2 File name length (n)
|
||||||
|
-- 28 2 Extra field length (m)
|
||||||
|
-- 30 n File name
|
||||||
|
-- 30+n m Extra field
|
||||||
|
-- 30+n+m - File data
|
||||||
|
|
||||||
if entry.isDirectory then
|
if entry.isDirectory then
|
||||||
error("Cannot extract directory")
|
error("Cannot extract directory")
|
||||||
end
|
end
|
||||||
|
@ -192,28 +240,39 @@ function ZipReader.extract(self: ZipReader, entry: ZipEntry): buffer
|
||||||
end
|
end
|
||||||
|
|
||||||
function ZipReader.extractDirectory(self: ZipReader, path: string): { [string]: buffer }
|
function ZipReader.extractDirectory(self: ZipReader, path: string): { [string]: buffer }
|
||||||
local files = {}
|
local files = {}
|
||||||
path = string.gsub(path, "^/", "")
|
-- Normalize path by removing leading slash for consistent prefix matching
|
||||||
|
path = string.gsub(path, "^/", "")
|
||||||
|
|
||||||
for _, entry in self.entries do
|
-- Iterate through all entries to find files within target directory
|
||||||
if not entry.isDirectory and string.sub(entry.name, 1, #path) == path then
|
for _, entry in self.entries do
|
||||||
files[entry.name] = self:extract(entry)
|
-- Check if entry is a file (not directory) and its path starts with target directory
|
||||||
end
|
if not entry.isDirectory and string.sub(entry.name, 1, #path) == path then
|
||||||
end
|
-- Store extracted content mapped to full path
|
||||||
|
files[entry.name] = self:extract(entry)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
return files
|
-- Return a map of file to contents
|
||||||
|
return files
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
function ZipReader.listDirectory(self: ZipReader, path: string): { ZipEntry }
|
function ZipReader.listDirectory(self: ZipReader, path: string): { ZipEntry }
|
||||||
|
-- Locate the entry with the path
|
||||||
local entry = self:findEntry(path)
|
local entry = self:findEntry(path)
|
||||||
if not entry or not entry.isDirectory then
|
if not entry or not entry.isDirectory then
|
||||||
|
-- If an entry was not found, we error
|
||||||
error("Not a directory")
|
error("Not a directory")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- Return the children of our discovered entry
|
||||||
return entry.children
|
return entry.children
|
||||||
end
|
end
|
||||||
|
|
||||||
function ZipReader.walk(self: ZipReader, callback: (entry: ZipEntry, depth: number) -> ()): ()
|
function ZipReader.walk(self: ZipReader, callback: (entry: ZipEntry, depth: number) -> ()): ()
|
||||||
|
-- Wrapper function which recursively calls callback for every child
|
||||||
|
-- in an entry
|
||||||
local function walkEntry(entry: ZipEntry, depth: number)
|
local function walkEntry(entry: ZipEntry, depth: number)
|
||||||
callback(entry, depth)
|
callback(entry, depth)
|
||||||
|
|
||||||
|
@ -234,6 +293,7 @@ function ZipReader.getStats(self: ZipReader): ZipStatistics
|
||||||
totalSize = 0,
|
totalSize = 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
-- Iterate through the entries, updating stats
|
||||||
for _, entry in self.entries do
|
for _, entry in self.entries do
|
||||||
if entry.isDirectory then
|
if entry.isDirectory then
|
||||||
stats.dirCount = stats.dirCount + 1
|
stats.dirCount = stats.dirCount + 1
|
||||||
|
@ -248,7 +308,8 @@ function ZipReader.getStats(self: ZipReader): ZipStatistics
|
||||||
end
|
end
|
||||||
|
|
||||||
return {
|
return {
|
||||||
load = function(data)
|
-- Creates a `ZipReader` from a `buffer` of ZIP data.
|
||||||
|
load = function(data: buffer)
|
||||||
return ZipReader.new(data)
|
return ZipReader.new(data)
|
||||||
end,
|
end,
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue