mirror of
https://github.com/0x5eal/luau-unzip.git
synced 2025-04-19 13:13:45 +01:00
feat: support following symlinks
* Added new extraction option: `followSymlinks` * Added method for querying whether an entry is a symlink using `ZipEntry:isSymlink` * Include tests for symlinks
This commit is contained in:
parent
89ee51874b
commit
d329a3f273
2 changed files with 444 additions and 329 deletions
122
lib/init.luau
122
lib/init.luau
|
@ -70,27 +70,38 @@ type ZipEntryInner = {
|
||||||
method: CompressionMethod, -- Method used to compress the file
|
method: CompressionMethod, -- Method used to compress the file
|
||||||
crc: number, -- CRC32 checksum of uncompressed data
|
crc: number, -- CRC32 checksum of uncompressed data
|
||||||
isDirectory: boolean, -- Whether the entry is a directory or not
|
isDirectory: boolean, -- Whether the entry is a directory or not
|
||||||
|
isAscii: boolean, -- Whether the entry is plain ASCII text or binary
|
||||||
|
attributes: number, -- File attributes
|
||||||
parent: ZipEntry?, -- The parent of the current entry, nil for root
|
parent: ZipEntry?, -- The parent of the current entry, nil for root
|
||||||
children: { ZipEntry }, -- The children of the entry
|
children: { ZipEntry }, -- The children of the entry
|
||||||
}
|
}
|
||||||
|
|
||||||
function ZipEntry.new(
|
type ZipEntryProperties = {
|
||||||
name: string,
|
|
||||||
size: number,
|
size: number,
|
||||||
offset: number,
|
attributes: number,
|
||||||
timestamp: number,
|
timestamp: number,
|
||||||
method: CompressionMethod?,
|
method: CompressionMethod?,
|
||||||
crc: number
|
crc: number,
|
||||||
): ZipEntry
|
}
|
||||||
|
local EMPTY_PROPERTIES: ZipEntryProperties = table.freeze({
|
||||||
|
size = 0,
|
||||||
|
attributes = 0,
|
||||||
|
timestamp = 0,
|
||||||
|
method = nil,
|
||||||
|
crc = 0,
|
||||||
|
})
|
||||||
|
|
||||||
|
function ZipEntry.new(offset: number, name: string, properties: ZipEntryProperties): ZipEntry
|
||||||
return setmetatable(
|
return setmetatable(
|
||||||
{
|
{
|
||||||
name = name,
|
name = name,
|
||||||
size = size,
|
size = properties.size,
|
||||||
offset = offset,
|
offset = offset,
|
||||||
timestamp = timestamp,
|
timestamp = properties.timestamp,
|
||||||
method = method,
|
method = properties.method,
|
||||||
crc = crc,
|
crc = properties.crc,
|
||||||
isDirectory = string.sub(name, -1) == "/",
|
isDirectory = string.sub(name, -1) == "/",
|
||||||
|
attributes = properties.attributes,
|
||||||
parent = nil,
|
parent = nil,
|
||||||
children = {},
|
children = {},
|
||||||
} :: ZipEntryInner,
|
} :: ZipEntryInner,
|
||||||
|
@ -98,6 +109,10 @@ function ZipEntry.new(
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
function ZipEntry.isSymlink(self: ZipEntry): boolean
|
||||||
|
return bit32.band(self.attributes, 0xA0000000) == 0xA0000000
|
||||||
|
end
|
||||||
|
|
||||||
function ZipEntry.getPath(self: ZipEntry): string
|
function ZipEntry.getPath(self: ZipEntry): string
|
||||||
local path = self.name
|
local path = self.name
|
||||||
local current = self.parent
|
local current = self.parent
|
||||||
|
@ -122,7 +137,7 @@ type ZipReaderInner = {
|
||||||
}
|
}
|
||||||
|
|
||||||
function ZipReader.new(data): ZipReader
|
function ZipReader.new(data): ZipReader
|
||||||
local root = ZipEntry.new("/", 0, 0, 0, nil, 0)
|
local root = ZipEntry.new(0, "/", EMPTY_PROPERTIES)
|
||||||
root.isDirectory = true
|
root.isDirectory = true
|
||||||
|
|
||||||
local this = setmetatable(
|
local this = setmetatable(
|
||||||
|
@ -139,7 +154,6 @@ function ZipReader.new(data): ZipReader
|
||||||
this:buildDirectoryTree()
|
this:buildDirectoryTree()
|
||||||
return this
|
return this
|
||||||
end
|
end
|
||||||
|
|
||||||
function ZipReader.parseCentralDirectory(self: ZipReader): ()
|
function ZipReader.parseCentralDirectory(self: ZipReader): ()
|
||||||
-- ZIP files are read from the end, starting with the End of Central Directory record
|
-- ZIP files are read from the end, starting with the End of Central Directory record
|
||||||
-- The EoCD is at least 22 bytes and contains pointers to the rest of the ZIP structure
|
-- The EoCD is at least 22 bytes and contains pointers to the rest of the ZIP structure
|
||||||
|
@ -193,6 +207,8 @@ function ZipReader.parseCentralDirectory(self: ZipReader): ()
|
||||||
-- 28 2 File name length (n)
|
-- 28 2 File name length (n)
|
||||||
-- 30 2 Extra field length (m)
|
-- 30 2 Extra field length (m)
|
||||||
-- 32 2 Comment length (k)
|
-- 32 2 Comment length (k)
|
||||||
|
-- 36 2 Internal file attributes
|
||||||
|
-- 38 4 External file attributes
|
||||||
-- 42 4 Local header offset
|
-- 42 4 Local header offset
|
||||||
-- 46 n File name
|
-- 46 n File name
|
||||||
-- 46+n m Extra field
|
-- 46+n m Extra field
|
||||||
|
@ -206,11 +222,22 @@ function ZipReader.parseCentralDirectory(self: ZipReader): ()
|
||||||
local nameLength = buffer.readu16(self.data, pos + 28)
|
local nameLength = buffer.readu16(self.data, pos + 28)
|
||||||
local extraLength = buffer.readu16(self.data, pos + 30)
|
local extraLength = buffer.readu16(self.data, pos + 30)
|
||||||
local commentLength = buffer.readu16(self.data, pos + 32)
|
local commentLength = buffer.readu16(self.data, pos + 32)
|
||||||
|
local internalAttrs = buffer.readu16(self.data, pos + 36)
|
||||||
|
local externalAttrs = buffer.readu32(self.data, pos + 38)
|
||||||
local offset = buffer.readu32(self.data, pos + 42)
|
local offset = buffer.readu32(self.data, pos + 42)
|
||||||
local name = buffer.readstring(self.data, pos + 46, nameLength)
|
local name = buffer.readstring(self.data, pos + 46, nameLength)
|
||||||
|
|
||||||
local entry = ZipEntry.new(name, size, offset, timestamp, DECOMPRESSION_ROUTINES[compressionMethod].name, crc)
|
table.insert(
|
||||||
table.insert(self.entries, entry)
|
self.entries,
|
||||||
|
ZipEntry.new(offset, name, {
|
||||||
|
size = size,
|
||||||
|
crc = crc,
|
||||||
|
compressionMethod = DECOMPRESSION_ROUTINES[compressionMethod].name,
|
||||||
|
timestamp = timestamp,
|
||||||
|
attributes = externalAttrs,
|
||||||
|
isAscii = bit32.band(internalAttrs, 0x0001) ~= 0,
|
||||||
|
})
|
||||||
|
)
|
||||||
|
|
||||||
pos = pos + 46 + nameLength + extraLength + commentLength
|
pos = pos + 46 + nameLength + extraLength + commentLength
|
||||||
end
|
end
|
||||||
|
@ -253,7 +280,13 @@ function ZipReader.buildDirectoryTree(self: ZipReader): ()
|
||||||
else
|
else
|
||||||
-- Create new directory entry for intermediate paths or undefined
|
-- Create new directory entry for intermediate paths or undefined
|
||||||
-- parent directories in the ZIP
|
-- parent directories in the ZIP
|
||||||
local dir = ZipEntry.new(path .. "/", 0, 0, entry.timestamp, nil, 0)
|
local dir = ZipEntry.new(0, path .. "/", {
|
||||||
|
size = 0,
|
||||||
|
crc = 0,
|
||||||
|
compressionMethod = "STORED",
|
||||||
|
timestamp = entry.timestamp,
|
||||||
|
attributes = entry.attributes,
|
||||||
|
})
|
||||||
dir.isDirectory = true
|
dir.isDirectory = true
|
||||||
dir.parent = current
|
dir.parent = current
|
||||||
self.directories[path] = dir
|
self.directories[path] = dir
|
||||||
|
@ -301,6 +334,7 @@ function ZipReader.findEntry(self: ZipReader, path: string): ZipEntry?
|
||||||
end
|
end
|
||||||
|
|
||||||
type ExtractionOptions = {
|
type ExtractionOptions = {
|
||||||
|
followSymlinks: boolean?,
|
||||||
decompress: boolean?,
|
decompress: boolean?,
|
||||||
isString: boolean?,
|
isString: boolean?,
|
||||||
skipCrcValidation: boolean?,
|
skipCrcValidation: boolean?,
|
||||||
|
@ -326,13 +360,15 @@ function ZipReader.extract(self: ZipReader, entry: ZipEntry, options: Extraction
|
||||||
end
|
end
|
||||||
|
|
||||||
local defaultOptions: ExtractionOptions = {
|
local defaultOptions: ExtractionOptions = {
|
||||||
|
followSymlinks = false,
|
||||||
decompress = true,
|
decompress = true,
|
||||||
isString = false,
|
isString = entry.isAscii,
|
||||||
skipValidation = false,
|
skipValidation = false,
|
||||||
}
|
}
|
||||||
|
|
||||||
-- TODO: Use a `Partial` type function for this in the future!
|
-- TODO: Use a `Partial` type function for this in the future!
|
||||||
local optionsOrDefault: {
|
local optionsOrDefault: {
|
||||||
|
followSymlinks: boolean,
|
||||||
decompress: boolean,
|
decompress: boolean,
|
||||||
isString: boolean,
|
isString: boolean,
|
||||||
skipCrcValidation: boolean,
|
skipCrcValidation: boolean,
|
||||||
|
@ -406,6 +442,62 @@ function ZipReader.extract(self: ZipReader, entry: ZipEntry, options: Extraction
|
||||||
error(`Unsupported compression, ID: {compressionMethod}`)
|
error(`Unsupported compression, ID: {compressionMethod}`)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
if optionsOrDefault.followSymlinks then
|
||||||
|
local linkPath = buffer.tostring(algo.decompress(content, 0, {
|
||||||
|
expected = 0x00000000,
|
||||||
|
skip = true,
|
||||||
|
}))
|
||||||
|
|
||||||
|
--- Canonicalize a path by removing redundant components
|
||||||
|
local function canonicalize(path: string): string
|
||||||
|
-- NOTE: It is fine for us to use `/` here because ZIP file names
|
||||||
|
-- always use `/` as the path separator
|
||||||
|
local components = string.split(path, "/")
|
||||||
|
local result = {}
|
||||||
|
for _, component in components do
|
||||||
|
if component == "." then
|
||||||
|
-- Skip current directory
|
||||||
|
continue
|
||||||
|
end
|
||||||
|
|
||||||
|
if component == ".." then
|
||||||
|
-- Traverse one upwards
|
||||||
|
table.remove(result, #result)
|
||||||
|
continue
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Otherwise, add the component to the result
|
||||||
|
table.insert(result, component)
|
||||||
|
end
|
||||||
|
|
||||||
|
return table.concat(result, "/")
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Check if the path was a relative path
|
||||||
|
if
|
||||||
|
not (
|
||||||
|
string.match(linkPath, "^/")
|
||||||
|
or string.match(linkPath, "^[a-zA-Z]:[\\/]")
|
||||||
|
or string.match(linkPath, "^//")
|
||||||
|
)
|
||||||
|
then
|
||||||
|
if string.sub(linkPath, -1) ~= "/" then
|
||||||
|
linkPath ..= "/"
|
||||||
|
end
|
||||||
|
|
||||||
|
linkPath = canonicalize(`{(entry.parent or self.root).name}{linkPath}`)
|
||||||
|
end
|
||||||
|
|
||||||
|
optionsOrDefault.followSymlinks = false
|
||||||
|
optionsOrDefault.isString = false
|
||||||
|
optionsOrDefault.skipCrcValidation = true
|
||||||
|
optionsOrDefault.skipSizeValidation = true
|
||||||
|
content = self:extract(
|
||||||
|
self:findEntry(linkPath) or error("Symlink path not found"),
|
||||||
|
optionsOrDefault
|
||||||
|
) :: buffer
|
||||||
|
end
|
||||||
|
|
||||||
content = algo.decompress(content, uncompressedSize, {
|
content = algo.decompress(content, uncompressedSize, {
|
||||||
expected = crcChecksum,
|
expected = crcChecksum,
|
||||||
skip = optionsOrDefault.skipCrcValidation,
|
skip = optionsOrDefault.skipCrcValidation,
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
local fs = require("@lune/fs")
|
local fs = require("@lune/fs")
|
||||||
|
local process = require("@lune/process")
|
||||||
|
local serde = require("@lune/serde")
|
||||||
|
|
||||||
local frktest = require("../lune_packages/frktest")
|
local frktest = require("../lune_packages/frktest")
|
||||||
local check = frktest.assert.check
|
local check = frktest.assert.check
|
||||||
|
@ -13,5 +15,26 @@ return function(test: typeof(frktest.test))
|
||||||
|
|
||||||
check.equal(zip.comment, "short.")
|
check.equal(zip.comment, "short.")
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
test.case("Follows symlinks correctly", function()
|
||||||
|
-- TODO: More test files with symlinks
|
||||||
|
|
||||||
|
local data = fs.readFile("tests/data/pandoc_soft_links.zip")
|
||||||
|
local zip = ZipReader.load(buffer.fromstring(data))
|
||||||
|
|
||||||
|
local entry = assert(zip:findEntry("/pandoc-3.2-arm64/bin/pandoc-lua"))
|
||||||
|
assert(entry:isSymlink(), "Entry type must be a symlink")
|
||||||
|
|
||||||
|
local targetPath = zip:extract(entry, { isString = true }) :: string
|
||||||
|
check.equal(targetPath, "pandoc")
|
||||||
|
|
||||||
|
local bin = zip:extract(entry, { isString = false, followSymlinks = true }) :: buffer
|
||||||
|
local expectedBin = process.spawn("unzip", { "-p", "tests/data/pandoc_soft_links.zip", "pandoc-3.2-arm64/bin/pandoc" })
|
||||||
|
check.is_true(expectedBin.ok)
|
||||||
|
|
||||||
|
-- Compare hashes instead of the entire binary to improve speed and not print out
|
||||||
|
-- the entire binary data in case there's a mismatch
|
||||||
|
check.equal(serde.hash("blake3", bin), serde.hash("blake3", expectedBin.stdout))
|
||||||
|
end)
|
||||||
end)
|
end)
|
||||||
end
|
end
|
Loading…
Add table
Reference in a new issue