mirror of
https://github.com/luau-lang/luau.git
synced 2024-12-12 21:10:37 +00:00
Convert Luau heap dumps to Chrome heap snapshots (#1554)
Adds a script for (approximately) converting Luau heap dumps to Chrome heap snapshots. Useful for visually inspecting a heap dump within Chrome's UI.
This commit is contained in:
parent
d0222bb554
commit
8b8118b027
1 changed files with 221 additions and 0 deletions
221
tools/heapsnapshot.py
Normal file
221
tools/heapsnapshot.py
Normal file
|
@ -0,0 +1,221 @@
|
|||
#!/usr/bin/python3
|
||||
# This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
|
||||
# Given a Luau heap dump, this tool generates a heap snapshot which can be imported by Chrome's DevTools Memory panel
|
||||
# To generate a snapshot, use luaC_dump, ideally preceded by luaC_fullgc
|
||||
# To import in Chrome, ensure the snapshot has the .heapsnapshot extension and go to: Inspect -> Memory -> Load Profile
|
||||
# A reference for the heap snapshot schema can be found here: https://learn.microsoft.com/en-us/microsoft-edge/devtools-guide-chromium/memory-problems/heap-snapshot-schema
|
||||
|
||||
# Usage: python3 heapsnapshot.py luauDump.json heapSnapshot.heapsnapshot
|
||||
|
||||
import json
|
||||
import sys
|
||||
|
||||
# Header describing the snapshot format, copied from a real Chrome heap snapshot
|
||||
snapshotMeta = {
|
||||
"node_fields": ["type", "name", "id", "self_size", "edge_count", "trace_node_id", "detachedness"],
|
||||
"node_types": [
|
||||
["hidden", "array", "string", "object", "code", "closure", "regexp", "number", "native", "synthetic", "concatenated string", "sliced string", "symbol", "bigint", "object shape"],
|
||||
"string", "number", "number", "number", "number", "number"
|
||||
],
|
||||
"edge_fields": ["type", "name_or_index", "to_node"],
|
||||
"edge_types": [
|
||||
["context", "element", "property", "internal", "hidden", "shortcut", "weak"],
|
||||
"string_or_number", "node"
|
||||
],
|
||||
"trace_function_info_fields": ["function_id", "name", "script_name", "script_id", "line", "column"],
|
||||
"trace_node_fields": ["id", "function_info_index", "count", "size", "children"],
|
||||
"sample_fields": ["timestamp_us", "last_assigned_id"],
|
||||
"location_fields": ["object_index", "script_id", "line", "column"],
|
||||
}
|
||||
|
||||
# These indices refer to the index in the snapshot's metadata header
|
||||
nodeTypeToMetaIndex = {type: i for i, type in enumerate(snapshotMeta["node_types"][0])}
|
||||
edgeTypeToMetaIndex = {type: i for i, type in enumerate(snapshotMeta["edge_types"][0])}
|
||||
|
||||
nodeFieldCount = len(snapshotMeta["node_fields"])
|
||||
edgeFieldCount = len(snapshotMeta["edge_fields"])
|
||||
|
||||
|
||||
def readAddresses(data):
|
||||
# Ordered list of addresses to ensure the registry is the first node, and also so we can process nodes in index order
|
||||
addresses = []
|
||||
addressToNodeIndex = {}
|
||||
|
||||
def addAddress(address):
|
||||
assert address not in addressToNodeIndex, f"Address already exists in the snapshot: '{address}'"
|
||||
addresses.append(address)
|
||||
addressToNodeIndex[address] = len(addresses) - 1
|
||||
|
||||
# The registry is a special case that needs to be either the first or last node to ensure gc "distances" are calculated correctly
|
||||
registryAddress = data["roots"]["registry"]
|
||||
addAddress(registryAddress)
|
||||
|
||||
for address, obj in data["objects"].items():
|
||||
if address == registryAddress:
|
||||
continue
|
||||
addAddress(address)
|
||||
|
||||
return addresses, addressToNodeIndex
|
||||
|
||||
|
||||
def convertToSnapshot(data):
|
||||
addresses, addressToNodeIndex = readAddresses(data)
|
||||
|
||||
# Some notable idiosyncrasies with the heap snapshot format:
|
||||
# 1. The snapshot format contains a flat array of nodes and edges. Oddly, edges must reference the "absolute" index of a node's first element after flattening.
|
||||
# 2. A node's outgoing edges are implicitly represented by a contiguous block of edges in the edges array which correspond to the node's position
|
||||
# in the nodes array and its edge count. So if the first node has 3 edges, the first 3 edges in the edges array are its edges, and so on.
|
||||
|
||||
nodes = []
|
||||
edges = []
|
||||
strings = []
|
||||
|
||||
stringToSnapshotIndex = {}
|
||||
|
||||
def getUniqueId(address):
|
||||
# TODO: we should hash this to an int32 instead of using the address directly
|
||||
# Addresses are hexadecimal strings
|
||||
return int(address, 16)
|
||||
|
||||
def addNode(node):
|
||||
assert len(node) == nodeFieldCount, f"Expected {nodeFieldCount} fields, got {len(node)}"
|
||||
nodes.append(node)
|
||||
|
||||
def addEdge(edge):
|
||||
assert len(edge) == edgeFieldCount, f"Expected {edgeFieldCount} fields, got {len(edge)}"
|
||||
edges.append(edge)
|
||||
|
||||
def getStringSnapshotIndex(string):
|
||||
assert isinstance(string, str), f"'{string}' is not of type string"
|
||||
if string not in stringToSnapshotIndex:
|
||||
strings.append(string)
|
||||
stringToSnapshotIndex[string] = len(strings) - 1
|
||||
return stringToSnapshotIndex[string]
|
||||
|
||||
def getNodeSnapshotIndex(address):
|
||||
# This is the index of the first element of the node in the flattened nodes array
|
||||
return addressToNodeIndex[address] * nodeFieldCount
|
||||
|
||||
for address in addresses:
|
||||
obj = data["objects"][address]
|
||||
edgeCount = 0
|
||||
|
||||
if obj["type"] == "table":
|
||||
# TODO: support weak references
|
||||
name = f"Registry ({address})" if address == data["roots"]["registry"] else f"Luau table ({address})"
|
||||
if "pairs" in obj:
|
||||
for i in range(0, len(obj["pairs"]), 2):
|
||||
key = obj["pairs"][i]
|
||||
value = obj["pairs"][i + 1]
|
||||
if key is None and value is None:
|
||||
# Both the key and value are value types, nothing meaningful to add here
|
||||
continue
|
||||
elif key is None:
|
||||
edgeCount += 1
|
||||
addEdge([edgeTypeToMetaIndex["property"], getStringSnapshotIndex("(Luau table key value type)"), getNodeSnapshotIndex(value)])
|
||||
elif value is None:
|
||||
edgeCount += 1
|
||||
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'Luau table key ref: {data["objects"][key]["type"]} ({key})'), getNodeSnapshotIndex(key)])
|
||||
elif data["objects"][key]["type"] == "string":
|
||||
edgeCount += 2
|
||||
# This is a special case where the key is a string, so we can use it as the edge name
|
||||
addEdge([edgeTypeToMetaIndex["property"], getStringSnapshotIndex(data["objects"][key]["data"]), getNodeSnapshotIndex(value)])
|
||||
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'Luau table key ref: {data["objects"][key]["type"]} ({key})'), getNodeSnapshotIndex(key)])
|
||||
else:
|
||||
edgeCount += 2
|
||||
addEdge([edgeTypeToMetaIndex["property"], getStringSnapshotIndex(f'{data["objects"][key]["type"]} ({key})'), getNodeSnapshotIndex(value)])
|
||||
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'Luau table key ref: {data["objects"][key]["type"]} ({key})'), getNodeSnapshotIndex(key)])
|
||||
if "array" in obj:
|
||||
for i, element in enumerate(obj["array"]):
|
||||
edgeCount += 1
|
||||
addEdge([edgeTypeToMetaIndex["element"], i, getNodeSnapshotIndex(element)])
|
||||
if "metatable" in obj:
|
||||
edgeCount += 1
|
||||
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'metatable ({obj["metatable"]})'), getNodeSnapshotIndex(obj["metatable"])])
|
||||
# TODO: consider distinguishing "object" and "array" node types
|
||||
addNode([nodeTypeToMetaIndex["object"], getStringSnapshotIndex(name), getUniqueId(address), obj["size"], edgeCount, 0, 0])
|
||||
elif obj["type"] == "thread":
|
||||
name = f'Luau thread: {obj["source"]}:{obj["line"]} ({address})' if "source" in obj else f"Luau thread ({address})"
|
||||
if address == data["roots"]["mainthread"]:
|
||||
name += " (main thread)"
|
||||
if "env" in obj:
|
||||
edgeCount += 1
|
||||
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f'env ({obj["env"]})'), getNodeSnapshotIndex(obj["env"])])
|
||||
if "stack" in obj:
|
||||
for i, frame in enumerate(obj["stack"]):
|
||||
edgeCount += 1
|
||||
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f"callstack[{i}]"), getNodeSnapshotIndex(frame)])
|
||||
addNode([nodeTypeToMetaIndex["native"], getStringSnapshotIndex(name), getUniqueId(address), obj["size"], edgeCount, 0, 0])
|
||||
elif obj["type"] == "function":
|
||||
name = f'Luau function: {obj["name"]} ({address})' if "name" in obj else f"Luau anonymous function ({address})"
|
||||
if "env" in obj:
|
||||
edgeCount += 1
|
||||
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f'env ({obj["env"]})'), getNodeSnapshotIndex(obj["env"])])
|
||||
if "proto" in obj:
|
||||
edgeCount += 1
|
||||
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f'proto ({obj["proto"]})'), getNodeSnapshotIndex(obj["proto"])])
|
||||
if "upvalues" in obj:
|
||||
for i, upvalue in enumerate(obj["upvalues"]):
|
||||
edgeCount += 1
|
||||
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f"up value ({upvalue})"), getNodeSnapshotIndex(upvalue)])
|
||||
addNode([nodeTypeToMetaIndex["closure"], getStringSnapshotIndex(name), getUniqueId(address), obj["size"], edgeCount, 0, 0])
|
||||
elif obj["type"] == "upvalue":
|
||||
if "object" in obj:
|
||||
edgeCount += 1
|
||||
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f'upvalue object ({obj["object"]})'), getNodeSnapshotIndex(obj["object"])])
|
||||
addNode([nodeTypeToMetaIndex["native"], getStringSnapshotIndex(f"Luau upvalue ({address})"), getUniqueId(address), obj["size"], edgeCount, 0, 0])
|
||||
elif obj["type"] == "userdata":
|
||||
if "metatable" in obj:
|
||||
edgeCount += 1
|
||||
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'metatable ({obj["metatable"]})'), getNodeSnapshotIndex(obj["metatable"])])
|
||||
addNode([nodeTypeToMetaIndex["native"], getStringSnapshotIndex(f"Luau userdata ({address})"), getUniqueId(address), obj["size"], edgeCount, 0, 0])
|
||||
elif obj["type"] == "proto":
|
||||
name = f'Luau proto: {obj["source"]}:{obj["line"]} ({address})' if "source" in obj else f"Luau proto ({address})"
|
||||
if "constants" in obj:
|
||||
for constant in obj["constants"]:
|
||||
edgeCount += 1
|
||||
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(constant), getNodeSnapshotIndex(constant)])
|
||||
if "protos" in obj:
|
||||
for proto in obj["protos"]:
|
||||
edgeCount += 1
|
||||
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(proto), getNodeSnapshotIndex(proto)])
|
||||
addNode([nodeTypeToMetaIndex["code"], getStringSnapshotIndex(name), getUniqueId(address), obj["size"], edgeCount, 0, 0])
|
||||
elif obj["type"] == "string":
|
||||
addNode([nodeTypeToMetaIndex["string"], getStringSnapshotIndex(obj["data"]), getUniqueId(address), obj["size"], 0, 0, 0])
|
||||
elif obj["type"] == "buffer":
|
||||
addNode([nodeTypeToMetaIndex["native"], getStringSnapshotIndex(f'buffer ({address})'), getUniqueId(address), obj["size"], 0, 0, 0])
|
||||
else:
|
||||
raise Exception(f"Unknown object type: '{obj['type']}'")
|
||||
|
||||
return {
|
||||
"snapshot": {
|
||||
"meta": snapshotMeta,
|
||||
"node_count": len(nodes),
|
||||
"edge_count": len(edges),
|
||||
"trace_function_count": 0,
|
||||
},
|
||||
# flatten the nodes and edges arrays
|
||||
"nodes": [field for node in nodes for field in node],
|
||||
"edges": [field for edge in edges for field in edge],
|
||||
"trace_function_infos": [],
|
||||
"trace_tree": [],
|
||||
"samples": [],
|
||||
"locations": [],
|
||||
"strings": strings,
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
luauDump = sys.argv[1]
|
||||
heapSnapshot = sys.argv[2]
|
||||
|
||||
with open(luauDump, "r") as file:
|
||||
dump = json.load(file)
|
||||
|
||||
snapshot = convertToSnapshot(dump)
|
||||
|
||||
with open(heapSnapshot, "w") as file:
|
||||
json.dump(snapshot, file)
|
||||
|
||||
print(f"Heap snapshot written to: '{heapSnapshot}'")
|
Loading…
Reference in a new issue