mirror of
https://github.com/luau-lang/luau.git
synced 2025-01-09 21:09:10 +00:00
222 lines
12 KiB
Python
222 lines
12 KiB
Python
|
#!/usr/bin/python3
|
||
|
# This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||
|
|
||
|
# Given a Luau heap dump, this tool generates a heap snapshot which can be imported by Chrome's DevTools Memory panel
|
||
|
# To generate a snapshot, use luaC_dump, ideally preceded by luaC_fullgc
|
||
|
# To import in Chrome, ensure the snapshot has the .heapsnapshot extension and go to: Inspect -> Memory -> Load Profile
|
||
|
# A reference for the heap snapshot schema can be found here: https://learn.microsoft.com/en-us/microsoft-edge/devtools-guide-chromium/memory-problems/heap-snapshot-schema
|
||
|
|
||
|
# Usage: python3 heapsnapshot.py luauDump.json heapSnapshot.heapsnapshot
|
||
|
|
||
|
import json
|
||
|
import sys
|
||
|
|
||
|
# Header describing the snapshot format, copied from a real Chrome heap snapshot
|
||
|
snapshotMeta = {
|
||
|
"node_fields": ["type", "name", "id", "self_size", "edge_count", "trace_node_id", "detachedness"],
|
||
|
"node_types": [
|
||
|
["hidden", "array", "string", "object", "code", "closure", "regexp", "number", "native", "synthetic", "concatenated string", "sliced string", "symbol", "bigint", "object shape"],
|
||
|
"string", "number", "number", "number", "number", "number"
|
||
|
],
|
||
|
"edge_fields": ["type", "name_or_index", "to_node"],
|
||
|
"edge_types": [
|
||
|
["context", "element", "property", "internal", "hidden", "shortcut", "weak"],
|
||
|
"string_or_number", "node"
|
||
|
],
|
||
|
"trace_function_info_fields": ["function_id", "name", "script_name", "script_id", "line", "column"],
|
||
|
"trace_node_fields": ["id", "function_info_index", "count", "size", "children"],
|
||
|
"sample_fields": ["timestamp_us", "last_assigned_id"],
|
||
|
"location_fields": ["object_index", "script_id", "line", "column"],
|
||
|
}
|
||
|
|
||
|
# These indices refer to the index in the snapshot's metadata header
|
||
|
nodeTypeToMetaIndex = {type: i for i, type in enumerate(snapshotMeta["node_types"][0])}
|
||
|
edgeTypeToMetaIndex = {type: i for i, type in enumerate(snapshotMeta["edge_types"][0])}
|
||
|
|
||
|
nodeFieldCount = len(snapshotMeta["node_fields"])
|
||
|
edgeFieldCount = len(snapshotMeta["edge_fields"])
|
||
|
|
||
|
|
||
|
def readAddresses(data):
|
||
|
# Ordered list of addresses to ensure the registry is the first node, and also so we can process nodes in index order
|
||
|
addresses = []
|
||
|
addressToNodeIndex = {}
|
||
|
|
||
|
def addAddress(address):
|
||
|
assert address not in addressToNodeIndex, f"Address already exists in the snapshot: '{address}'"
|
||
|
addresses.append(address)
|
||
|
addressToNodeIndex[address] = len(addresses) - 1
|
||
|
|
||
|
# The registry is a special case that needs to be either the first or last node to ensure gc "distances" are calculated correctly
|
||
|
registryAddress = data["roots"]["registry"]
|
||
|
addAddress(registryAddress)
|
||
|
|
||
|
for address, obj in data["objects"].items():
|
||
|
if address == registryAddress:
|
||
|
continue
|
||
|
addAddress(address)
|
||
|
|
||
|
return addresses, addressToNodeIndex
|
||
|
|
||
|
|
||
|
def convertToSnapshot(data):
|
||
|
addresses, addressToNodeIndex = readAddresses(data)
|
||
|
|
||
|
# Some notable idiosyncrasies with the heap snapshot format:
|
||
|
# 1. The snapshot format contains a flat array of nodes and edges. Oddly, edges must reference the "absolute" index of a node's first element after flattening.
|
||
|
# 2. A node's outgoing edges are implicitly represented by a contiguous block of edges in the edges array which correspond to the node's position
|
||
|
# in the nodes array and its edge count. So if the first node has 3 edges, the first 3 edges in the edges array are its edges, and so on.
|
||
|
|
||
|
nodes = []
|
||
|
edges = []
|
||
|
strings = []
|
||
|
|
||
|
stringToSnapshotIndex = {}
|
||
|
|
||
|
def getUniqueId(address):
|
||
|
# TODO: we should hash this to an int32 instead of using the address directly
|
||
|
# Addresses are hexadecimal strings
|
||
|
return int(address, 16)
|
||
|
|
||
|
def addNode(node):
|
||
|
assert len(node) == nodeFieldCount, f"Expected {nodeFieldCount} fields, got {len(node)}"
|
||
|
nodes.append(node)
|
||
|
|
||
|
def addEdge(edge):
|
||
|
assert len(edge) == edgeFieldCount, f"Expected {edgeFieldCount} fields, got {len(edge)}"
|
||
|
edges.append(edge)
|
||
|
|
||
|
def getStringSnapshotIndex(string):
|
||
|
assert isinstance(string, str), f"'{string}' is not of type string"
|
||
|
if string not in stringToSnapshotIndex:
|
||
|
strings.append(string)
|
||
|
stringToSnapshotIndex[string] = len(strings) - 1
|
||
|
return stringToSnapshotIndex[string]
|
||
|
|
||
|
def getNodeSnapshotIndex(address):
|
||
|
# This is the index of the first element of the node in the flattened nodes array
|
||
|
return addressToNodeIndex[address] * nodeFieldCount
|
||
|
|
||
|
for address in addresses:
|
||
|
obj = data["objects"][address]
|
||
|
edgeCount = 0
|
||
|
|
||
|
if obj["type"] == "table":
|
||
|
# TODO: support weak references
|
||
|
name = f"Registry ({address})" if address == data["roots"]["registry"] else f"Luau table ({address})"
|
||
|
if "pairs" in obj:
|
||
|
for i in range(0, len(obj["pairs"]), 2):
|
||
|
key = obj["pairs"][i]
|
||
|
value = obj["pairs"][i + 1]
|
||
|
if key is None and value is None:
|
||
|
# Both the key and value are value types, nothing meaningful to add here
|
||
|
continue
|
||
|
elif key is None:
|
||
|
edgeCount += 1
|
||
|
addEdge([edgeTypeToMetaIndex["property"], getStringSnapshotIndex("(Luau table key value type)"), getNodeSnapshotIndex(value)])
|
||
|
elif value is None:
|
||
|
edgeCount += 1
|
||
|
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'Luau table key ref: {data["objects"][key]["type"]} ({key})'), getNodeSnapshotIndex(key)])
|
||
|
elif data["objects"][key]["type"] == "string":
|
||
|
edgeCount += 2
|
||
|
# This is a special case where the key is a string, so we can use it as the edge name
|
||
|
addEdge([edgeTypeToMetaIndex["property"], getStringSnapshotIndex(data["objects"][key]["data"]), getNodeSnapshotIndex(value)])
|
||
|
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'Luau table key ref: {data["objects"][key]["type"]} ({key})'), getNodeSnapshotIndex(key)])
|
||
|
else:
|
||
|
edgeCount += 2
|
||
|
addEdge([edgeTypeToMetaIndex["property"], getStringSnapshotIndex(f'{data["objects"][key]["type"]} ({key})'), getNodeSnapshotIndex(value)])
|
||
|
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'Luau table key ref: {data["objects"][key]["type"]} ({key})'), getNodeSnapshotIndex(key)])
|
||
|
if "array" in obj:
|
||
|
for i, element in enumerate(obj["array"]):
|
||
|
edgeCount += 1
|
||
|
addEdge([edgeTypeToMetaIndex["element"], i, getNodeSnapshotIndex(element)])
|
||
|
if "metatable" in obj:
|
||
|
edgeCount += 1
|
||
|
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'metatable ({obj["metatable"]})'), getNodeSnapshotIndex(obj["metatable"])])
|
||
|
# TODO: consider distinguishing "object" and "array" node types
|
||
|
addNode([nodeTypeToMetaIndex["object"], getStringSnapshotIndex(name), getUniqueId(address), obj["size"], edgeCount, 0, 0])
|
||
|
elif obj["type"] == "thread":
|
||
|
name = f'Luau thread: {obj["source"]}:{obj["line"]} ({address})' if "source" in obj else f"Luau thread ({address})"
|
||
|
if address == data["roots"]["mainthread"]:
|
||
|
name += " (main thread)"
|
||
|
if "env" in obj:
|
||
|
edgeCount += 1
|
||
|
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f'env ({obj["env"]})'), getNodeSnapshotIndex(obj["env"])])
|
||
|
if "stack" in obj:
|
||
|
for i, frame in enumerate(obj["stack"]):
|
||
|
edgeCount += 1
|
||
|
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f"callstack[{i}]"), getNodeSnapshotIndex(frame)])
|
||
|
addNode([nodeTypeToMetaIndex["native"], getStringSnapshotIndex(name), getUniqueId(address), obj["size"], edgeCount, 0, 0])
|
||
|
elif obj["type"] == "function":
|
||
|
name = f'Luau function: {obj["name"]} ({address})' if "name" in obj else f"Luau anonymous function ({address})"
|
||
|
if "env" in obj:
|
||
|
edgeCount += 1
|
||
|
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f'env ({obj["env"]})'), getNodeSnapshotIndex(obj["env"])])
|
||
|
if "proto" in obj:
|
||
|
edgeCount += 1
|
||
|
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f'proto ({obj["proto"]})'), getNodeSnapshotIndex(obj["proto"])])
|
||
|
if "upvalues" in obj:
|
||
|
for i, upvalue in enumerate(obj["upvalues"]):
|
||
|
edgeCount += 1
|
||
|
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f"up value ({upvalue})"), getNodeSnapshotIndex(upvalue)])
|
||
|
addNode([nodeTypeToMetaIndex["closure"], getStringSnapshotIndex(name), getUniqueId(address), obj["size"], edgeCount, 0, 0])
|
||
|
elif obj["type"] == "upvalue":
|
||
|
if "object" in obj:
|
||
|
edgeCount += 1
|
||
|
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f'upvalue object ({obj["object"]})'), getNodeSnapshotIndex(obj["object"])])
|
||
|
addNode([nodeTypeToMetaIndex["native"], getStringSnapshotIndex(f"Luau upvalue ({address})"), getUniqueId(address), obj["size"], edgeCount, 0, 0])
|
||
|
elif obj["type"] == "userdata":
|
||
|
if "metatable" in obj:
|
||
|
edgeCount += 1
|
||
|
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'metatable ({obj["metatable"]})'), getNodeSnapshotIndex(obj["metatable"])])
|
||
|
addNode([nodeTypeToMetaIndex["native"], getStringSnapshotIndex(f"Luau userdata ({address})"), getUniqueId(address), obj["size"], edgeCount, 0, 0])
|
||
|
elif obj["type"] == "proto":
|
||
|
name = f'Luau proto: {obj["source"]}:{obj["line"]} ({address})' if "source" in obj else f"Luau proto ({address})"
|
||
|
if "constants" in obj:
|
||
|
for constant in obj["constants"]:
|
||
|
edgeCount += 1
|
||
|
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(constant), getNodeSnapshotIndex(constant)])
|
||
|
if "protos" in obj:
|
||
|
for proto in obj["protos"]:
|
||
|
edgeCount += 1
|
||
|
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(proto), getNodeSnapshotIndex(proto)])
|
||
|
addNode([nodeTypeToMetaIndex["code"], getStringSnapshotIndex(name), getUniqueId(address), obj["size"], edgeCount, 0, 0])
|
||
|
elif obj["type"] == "string":
|
||
|
addNode([nodeTypeToMetaIndex["string"], getStringSnapshotIndex(obj["data"]), getUniqueId(address), obj["size"], 0, 0, 0])
|
||
|
elif obj["type"] == "buffer":
|
||
|
addNode([nodeTypeToMetaIndex["native"], getStringSnapshotIndex(f'buffer ({address})'), getUniqueId(address), obj["size"], 0, 0, 0])
|
||
|
else:
|
||
|
raise Exception(f"Unknown object type: '{obj['type']}'")
|
||
|
|
||
|
return {
|
||
|
"snapshot": {
|
||
|
"meta": snapshotMeta,
|
||
|
"node_count": len(nodes),
|
||
|
"edge_count": len(edges),
|
||
|
"trace_function_count": 0,
|
||
|
},
|
||
|
# flatten the nodes and edges arrays
|
||
|
"nodes": [field for node in nodes for field in node],
|
||
|
"edges": [field for edge in edges for field in edge],
|
||
|
"trace_function_infos": [],
|
||
|
"trace_tree": [],
|
||
|
"samples": [],
|
||
|
"locations": [],
|
||
|
"strings": strings,
|
||
|
}
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
luauDump = sys.argv[1]
|
||
|
heapSnapshot = sys.argv[2]
|
||
|
|
||
|
with open(luauDump, "r") as file:
|
||
|
dump = json.load(file)
|
||
|
|
||
|
snapshot = convertToSnapshot(dump)
|
||
|
|
||
|
with open(heapSnapshot, "w") as file:
|
||
|
json.dump(snapshot, file)
|
||
|
|
||
|
print(f"Heap snapshot written to: '{heapSnapshot}'")
|