feat: support following symlinks

* Added new extraction option: `followSymlinks`
* Added method for querying whether an entry is a symlink using
  `ZipEntry:isSymlink`
* Include tests for symlinks
This commit is contained in:
Erica Marigold 2025-01-07 18:44:46 +00:00
parent 89ee51874b
commit d329a3f273
Signed by: DevComp
GPG key ID: 429EF1C337871656
2 changed files with 444 additions and 329 deletions

View file

@ -70,27 +70,38 @@ type ZipEntryInner = {
method: CompressionMethod, -- Method used to compress the file
crc: number, -- CRC32 checksum of uncompressed data
isDirectory: boolean, -- Whether the entry is a directory or not
isAscii: boolean, -- Whether the entry is plain ASCII text or binary
attributes: number, -- File attributes
parent: ZipEntry?, -- The parent of the current entry, nil for root
children: { ZipEntry }, -- The children of the entry
}
function ZipEntry.new(
name: string,
type ZipEntryProperties = {
size: number,
offset: number,
attributes: number,
timestamp: number,
method: CompressionMethod?,
crc: number
): ZipEntry
crc: number,
}
local EMPTY_PROPERTIES: ZipEntryProperties = table.freeze({
size = 0,
attributes = 0,
timestamp = 0,
method = nil,
crc = 0,
})
function ZipEntry.new(offset: number, name: string, properties: ZipEntryProperties): ZipEntry
return setmetatable(
{
name = name,
size = size,
size = properties.size,
offset = offset,
timestamp = timestamp,
method = method,
crc = crc,
timestamp = properties.timestamp,
method = properties.method,
crc = properties.crc,
isDirectory = string.sub(name, -1) == "/",
attributes = properties.attributes,
parent = nil,
children = {},
} :: ZipEntryInner,
@ -98,6 +109,10 @@ function ZipEntry.new(
)
end
function ZipEntry.isSymlink(self: ZipEntry): boolean
return bit32.band(self.attributes, 0xA0000000) == 0xA0000000
end
function ZipEntry.getPath(self: ZipEntry): string
local path = self.name
local current = self.parent
@ -122,7 +137,7 @@ type ZipReaderInner = {
}
function ZipReader.new(data): ZipReader
local root = ZipEntry.new("/", 0, 0, 0, nil, 0)
local root = ZipEntry.new(0, "/", EMPTY_PROPERTIES)
root.isDirectory = true
local this = setmetatable(
@ -139,7 +154,6 @@ function ZipReader.new(data): ZipReader
this:buildDirectoryTree()
return this
end
function ZipReader.parseCentralDirectory(self: ZipReader): ()
-- ZIP files are read from the end, starting with the End of Central Directory record
-- The EoCD is at least 22 bytes and contains pointers to the rest of the ZIP structure
@ -193,6 +207,8 @@ function ZipReader.parseCentralDirectory(self: ZipReader): ()
-- 28 2 File name length (n)
-- 30 2 Extra field length (m)
-- 32 2 Comment length (k)
-- 36 2 Internal file attributes
-- 38 4 External file attributes
-- 42 4 Local header offset
-- 46 n File name
-- 46+n m Extra field
@ -206,11 +222,22 @@ function ZipReader.parseCentralDirectory(self: ZipReader): ()
local nameLength = buffer.readu16(self.data, pos + 28)
local extraLength = buffer.readu16(self.data, pos + 30)
local commentLength = buffer.readu16(self.data, pos + 32)
local internalAttrs = buffer.readu16(self.data, pos + 36)
local externalAttrs = buffer.readu32(self.data, pos + 38)
local offset = buffer.readu32(self.data, pos + 42)
local name = buffer.readstring(self.data, pos + 46, nameLength)
local entry = ZipEntry.new(name, size, offset, timestamp, DECOMPRESSION_ROUTINES[compressionMethod].name, crc)
table.insert(self.entries, entry)
table.insert(
self.entries,
ZipEntry.new(offset, name, {
size = size,
crc = crc,
compressionMethod = DECOMPRESSION_ROUTINES[compressionMethod].name,
timestamp = timestamp,
attributes = externalAttrs,
isAscii = bit32.band(internalAttrs, 0x0001) ~= 0,
})
)
pos = pos + 46 + nameLength + extraLength + commentLength
end
@ -253,7 +280,13 @@ function ZipReader.buildDirectoryTree(self: ZipReader): ()
else
-- Create new directory entry for intermediate paths or undefined
-- parent directories in the ZIP
local dir = ZipEntry.new(path .. "/", 0, 0, entry.timestamp, nil, 0)
local dir = ZipEntry.new(0, path .. "/", {
size = 0,
crc = 0,
compressionMethod = "STORED",
timestamp = entry.timestamp,
attributes = entry.attributes,
})
dir.isDirectory = true
dir.parent = current
self.directories[path] = dir
@ -301,6 +334,7 @@ function ZipReader.findEntry(self: ZipReader, path: string): ZipEntry?
end
type ExtractionOptions = {
followSymlinks: boolean?,
decompress: boolean?,
isString: boolean?,
skipCrcValidation: boolean?,
@ -326,13 +360,15 @@ function ZipReader.extract(self: ZipReader, entry: ZipEntry, options: Extraction
end
local defaultOptions: ExtractionOptions = {
followSymlinks = false,
decompress = true,
isString = false,
isString = entry.isAscii,
skipValidation = false,
}
-- TODO: Use a `Partial` type function for this in the future!
local optionsOrDefault: {
followSymlinks: boolean,
decompress: boolean,
isString: boolean,
skipCrcValidation: boolean,
@ -406,6 +442,62 @@ function ZipReader.extract(self: ZipReader, entry: ZipEntry, options: Extraction
error(`Unsupported compression, ID: {compressionMethod}`)
end
if optionsOrDefault.followSymlinks then
local linkPath = buffer.tostring(algo.decompress(content, 0, {
expected = 0x00000000,
skip = true,
}))
--- Canonicalize a path by removing redundant components
local function canonicalize(path: string): string
-- NOTE: It is fine for us to use `/` here because ZIP file names
-- always use `/` as the path separator
local components = string.split(path, "/")
local result = {}
for _, component in components do
if component == "." then
-- Skip current directory
continue
end
if component == ".." then
-- Traverse one upwards
table.remove(result, #result)
continue
end
-- Otherwise, add the component to the result
table.insert(result, component)
end
return table.concat(result, "/")
end
-- Check if the path was a relative path
if
not (
string.match(linkPath, "^/")
or string.match(linkPath, "^[a-zA-Z]:[\\/]")
or string.match(linkPath, "^//")
)
then
if string.sub(linkPath, -1) ~= "/" then
linkPath ..= "/"
end
linkPath = canonicalize(`{(entry.parent or self.root).name}{linkPath}`)
end
optionsOrDefault.followSymlinks = false
optionsOrDefault.isString = false
optionsOrDefault.skipCrcValidation = true
optionsOrDefault.skipSizeValidation = true
content = self:extract(
self:findEntry(linkPath) or error("Symlink path not found"),
optionsOrDefault
) :: buffer
end
content = algo.decompress(content, uncompressedSize, {
expected = crcChecksum,
skip = optionsOrDefault.skipCrcValidation,

View file

@ -1,4 +1,6 @@
local fs = require("@lune/fs")
local process = require("@lune/process")
local serde = require("@lune/serde")
local frktest = require("../lune_packages/frktest")
local check = frktest.assert.check
@ -13,5 +15,26 @@ return function(test: typeof(frktest.test))
check.equal(zip.comment, "short.")
end)
test.case("Follows symlinks correctly", function()
-- TODO: More test files with symlinks
local data = fs.readFile("tests/data/pandoc_soft_links.zip")
local zip = ZipReader.load(buffer.fromstring(data))
local entry = assert(zip:findEntry("/pandoc-3.2-arm64/bin/pandoc-lua"))
assert(entry:isSymlink(), "Entry type must be a symlink")
local targetPath = zip:extract(entry, { isString = true }) :: string
check.equal(targetPath, "pandoc")
local bin = zip:extract(entry, { isString = false, followSymlinks = true }) :: buffer
local expectedBin = process.spawn("unzip", { "-p", "tests/data/pandoc_soft_links.zip", "pandoc-3.2-arm64/bin/pandoc" })
check.is_true(expectedBin.ok)
-- Compare hashes instead of the entire binary to improve speed and not print out
-- the entire binary data in case there's a mismatch
check.equal(serde.hash("blake3", bin), serde.hash("blake3", expectedBin.stdout))
end)
end)
end