luau/tools/heapsnapshot.py
jkelaty-rbx 8b8118b027
Convert Luau heap dumps to Chrome heap snapshots (#1554)
Adds a script for (approximately) converting Luau heap dumps to Chrome
heap snapshots. Useful for visually inspecting a heap dump within
Chrome's UI.
2024-12-06 10:04:57 -08:00

221 lines
12 KiB
Python

#!/usr/bin/python3
# This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
# Given a Luau heap dump, this tool generates a heap snapshot which can be imported by Chrome's DevTools Memory panel
# To generate a snapshot, use luaC_dump, ideally preceded by luaC_fullgc
# To import in Chrome, ensure the snapshot has the .heapsnapshot extension and go to: Inspect -> Memory -> Load Profile
# A reference for the heap snapshot schema can be found here: https://learn.microsoft.com/en-us/microsoft-edge/devtools-guide-chromium/memory-problems/heap-snapshot-schema
# Usage: python3 heapsnapshot.py luauDump.json heapSnapshot.heapsnapshot
import json
import sys
# Header describing the snapshot format, copied from a real Chrome heap snapshot
snapshotMeta = {
"node_fields": ["type", "name", "id", "self_size", "edge_count", "trace_node_id", "detachedness"],
"node_types": [
["hidden", "array", "string", "object", "code", "closure", "regexp", "number", "native", "synthetic", "concatenated string", "sliced string", "symbol", "bigint", "object shape"],
"string", "number", "number", "number", "number", "number"
],
"edge_fields": ["type", "name_or_index", "to_node"],
"edge_types": [
["context", "element", "property", "internal", "hidden", "shortcut", "weak"],
"string_or_number", "node"
],
"trace_function_info_fields": ["function_id", "name", "script_name", "script_id", "line", "column"],
"trace_node_fields": ["id", "function_info_index", "count", "size", "children"],
"sample_fields": ["timestamp_us", "last_assigned_id"],
"location_fields": ["object_index", "script_id", "line", "column"],
}
# These indices refer to the index in the snapshot's metadata header
nodeTypeToMetaIndex = {type: i for i, type in enumerate(snapshotMeta["node_types"][0])}
edgeTypeToMetaIndex = {type: i for i, type in enumerate(snapshotMeta["edge_types"][0])}
nodeFieldCount = len(snapshotMeta["node_fields"])
edgeFieldCount = len(snapshotMeta["edge_fields"])
def readAddresses(data):
# Ordered list of addresses to ensure the registry is the first node, and also so we can process nodes in index order
addresses = []
addressToNodeIndex = {}
def addAddress(address):
assert address not in addressToNodeIndex, f"Address already exists in the snapshot: '{address}'"
addresses.append(address)
addressToNodeIndex[address] = len(addresses) - 1
# The registry is a special case that needs to be either the first or last node to ensure gc "distances" are calculated correctly
registryAddress = data["roots"]["registry"]
addAddress(registryAddress)
for address, obj in data["objects"].items():
if address == registryAddress:
continue
addAddress(address)
return addresses, addressToNodeIndex
def convertToSnapshot(data):
addresses, addressToNodeIndex = readAddresses(data)
# Some notable idiosyncrasies with the heap snapshot format:
# 1. The snapshot format contains a flat array of nodes and edges. Oddly, edges must reference the "absolute" index of a node's first element after flattening.
# 2. A node's outgoing edges are implicitly represented by a contiguous block of edges in the edges array which correspond to the node's position
# in the nodes array and its edge count. So if the first node has 3 edges, the first 3 edges in the edges array are its edges, and so on.
nodes = []
edges = []
strings = []
stringToSnapshotIndex = {}
def getUniqueId(address):
# TODO: we should hash this to an int32 instead of using the address directly
# Addresses are hexadecimal strings
return int(address, 16)
def addNode(node):
assert len(node) == nodeFieldCount, f"Expected {nodeFieldCount} fields, got {len(node)}"
nodes.append(node)
def addEdge(edge):
assert len(edge) == edgeFieldCount, f"Expected {edgeFieldCount} fields, got {len(edge)}"
edges.append(edge)
def getStringSnapshotIndex(string):
assert isinstance(string, str), f"'{string}' is not of type string"
if string not in stringToSnapshotIndex:
strings.append(string)
stringToSnapshotIndex[string] = len(strings) - 1
return stringToSnapshotIndex[string]
def getNodeSnapshotIndex(address):
# This is the index of the first element of the node in the flattened nodes array
return addressToNodeIndex[address] * nodeFieldCount
for address in addresses:
obj = data["objects"][address]
edgeCount = 0
if obj["type"] == "table":
# TODO: support weak references
name = f"Registry ({address})" if address == data["roots"]["registry"] else f"Luau table ({address})"
if "pairs" in obj:
for i in range(0, len(obj["pairs"]), 2):
key = obj["pairs"][i]
value = obj["pairs"][i + 1]
if key is None and value is None:
# Both the key and value are value types, nothing meaningful to add here
continue
elif key is None:
edgeCount += 1
addEdge([edgeTypeToMetaIndex["property"], getStringSnapshotIndex("(Luau table key value type)"), getNodeSnapshotIndex(value)])
elif value is None:
edgeCount += 1
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'Luau table key ref: {data["objects"][key]["type"]} ({key})'), getNodeSnapshotIndex(key)])
elif data["objects"][key]["type"] == "string":
edgeCount += 2
# This is a special case where the key is a string, so we can use it as the edge name
addEdge([edgeTypeToMetaIndex["property"], getStringSnapshotIndex(data["objects"][key]["data"]), getNodeSnapshotIndex(value)])
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'Luau table key ref: {data["objects"][key]["type"]} ({key})'), getNodeSnapshotIndex(key)])
else:
edgeCount += 2
addEdge([edgeTypeToMetaIndex["property"], getStringSnapshotIndex(f'{data["objects"][key]["type"]} ({key})'), getNodeSnapshotIndex(value)])
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'Luau table key ref: {data["objects"][key]["type"]} ({key})'), getNodeSnapshotIndex(key)])
if "array" in obj:
for i, element in enumerate(obj["array"]):
edgeCount += 1
addEdge([edgeTypeToMetaIndex["element"], i, getNodeSnapshotIndex(element)])
if "metatable" in obj:
edgeCount += 1
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'metatable ({obj["metatable"]})'), getNodeSnapshotIndex(obj["metatable"])])
# TODO: consider distinguishing "object" and "array" node types
addNode([nodeTypeToMetaIndex["object"], getStringSnapshotIndex(name), getUniqueId(address), obj["size"], edgeCount, 0, 0])
elif obj["type"] == "thread":
name = f'Luau thread: {obj["source"]}:{obj["line"]} ({address})' if "source" in obj else f"Luau thread ({address})"
if address == data["roots"]["mainthread"]:
name += " (main thread)"
if "env" in obj:
edgeCount += 1
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f'env ({obj["env"]})'), getNodeSnapshotIndex(obj["env"])])
if "stack" in obj:
for i, frame in enumerate(obj["stack"]):
edgeCount += 1
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f"callstack[{i}]"), getNodeSnapshotIndex(frame)])
addNode([nodeTypeToMetaIndex["native"], getStringSnapshotIndex(name), getUniqueId(address), obj["size"], edgeCount, 0, 0])
elif obj["type"] == "function":
name = f'Luau function: {obj["name"]} ({address})' if "name" in obj else f"Luau anonymous function ({address})"
if "env" in obj:
edgeCount += 1
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f'env ({obj["env"]})'), getNodeSnapshotIndex(obj["env"])])
if "proto" in obj:
edgeCount += 1
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f'proto ({obj["proto"]})'), getNodeSnapshotIndex(obj["proto"])])
if "upvalues" in obj:
for i, upvalue in enumerate(obj["upvalues"]):
edgeCount += 1
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f"up value ({upvalue})"), getNodeSnapshotIndex(upvalue)])
addNode([nodeTypeToMetaIndex["closure"], getStringSnapshotIndex(name), getUniqueId(address), obj["size"], edgeCount, 0, 0])
elif obj["type"] == "upvalue":
if "object" in obj:
edgeCount += 1
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f'upvalue object ({obj["object"]})'), getNodeSnapshotIndex(obj["object"])])
addNode([nodeTypeToMetaIndex["native"], getStringSnapshotIndex(f"Luau upvalue ({address})"), getUniqueId(address), obj["size"], edgeCount, 0, 0])
elif obj["type"] == "userdata":
if "metatable" in obj:
edgeCount += 1
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'metatable ({obj["metatable"]})'), getNodeSnapshotIndex(obj["metatable"])])
addNode([nodeTypeToMetaIndex["native"], getStringSnapshotIndex(f"Luau userdata ({address})"), getUniqueId(address), obj["size"], edgeCount, 0, 0])
elif obj["type"] == "proto":
name = f'Luau proto: {obj["source"]}:{obj["line"]} ({address})' if "source" in obj else f"Luau proto ({address})"
if "constants" in obj:
for constant in obj["constants"]:
edgeCount += 1
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(constant), getNodeSnapshotIndex(constant)])
if "protos" in obj:
for proto in obj["protos"]:
edgeCount += 1
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(proto), getNodeSnapshotIndex(proto)])
addNode([nodeTypeToMetaIndex["code"], getStringSnapshotIndex(name), getUniqueId(address), obj["size"], edgeCount, 0, 0])
elif obj["type"] == "string":
addNode([nodeTypeToMetaIndex["string"], getStringSnapshotIndex(obj["data"]), getUniqueId(address), obj["size"], 0, 0, 0])
elif obj["type"] == "buffer":
addNode([nodeTypeToMetaIndex["native"], getStringSnapshotIndex(f'buffer ({address})'), getUniqueId(address), obj["size"], 0, 0, 0])
else:
raise Exception(f"Unknown object type: '{obj['type']}'")
return {
"snapshot": {
"meta": snapshotMeta,
"node_count": len(nodes),
"edge_count": len(edges),
"trace_function_count": 0,
},
# flatten the nodes and edges arrays
"nodes": [field for node in nodes for field in node],
"edges": [field for edge in edges for field in edge],
"trace_function_infos": [],
"trace_tree": [],
"samples": [],
"locations": [],
"strings": strings,
}
if __name__ == "__main__":
luauDump = sys.argv[1]
heapSnapshot = sys.argv[2]
with open(luauDump, "r") as file:
dump = json.load(file)
snapshot = convertToSnapshot(dump)
with open(heapSnapshot, "w") as file:
json.dump(snapshot, file)
print(f"Heap snapshot written to: '{heapSnapshot}'")