diff --git a/CLI/Repl.cpp b/CLI/Repl.cpp index 83060f5b..5fe12bec 100644 --- a/CLI/Repl.cpp +++ b/CLI/Repl.cpp @@ -21,6 +21,10 @@ #include #endif +#ifdef CALLGRIND +#include +#endif + #include LUAU_FASTFLAG(DebugLuauTimeTracing) @@ -166,6 +170,36 @@ static int lua_collectgarbage(lua_State* L) luaL_error(L, "collectgarbage must be called with 'count' or 'collect'"); } +#ifdef CALLGRIND +static int lua_callgrind(lua_State* L) +{ + const char* option = luaL_checkstring(L, 1); + + if (strcmp(option, "running") == 0) + { + int r = RUNNING_ON_VALGRIND; + lua_pushboolean(L, r); + return 1; + } + + if (strcmp(option, "zero") == 0) + { + CALLGRIND_ZERO_STATS; + return 0; + } + + if (strcmp(option, "dump") == 0) + { + const char* name = luaL_checkstring(L, 2); + + CALLGRIND_DUMP_STATS_AT(name); + return 0; + } + + luaL_error(L, "callgrind must be called with one of 'running', 'zero', 'dump'"); +} +#endif + void setupState(lua_State* L) { luaL_openlibs(L); @@ -174,6 +208,9 @@ void setupState(lua_State* L) {"loadstring", lua_loadstring}, {"require", lua_require}, {"collectgarbage", lua_collectgarbage}, +#ifdef CALLGRIND + {"callgrind", lua_callgrind}, +#endif {NULL, NULL}, }; diff --git a/Makefile b/Makefile index 1082666d..b8077897 100644 --- a/Makefile +++ b/Makefile @@ -93,6 +93,10 @@ ifeq ($(config),fuzz) LDFLAGS+=-fsanitize=address,fuzzer endif +ifneq ($(CALLGRIND),) + CXXFLAGS+=-DCALLGRIND=$(CALLGRIND) +endif + # target-specific flags $(AST_OBJECTS): CXXFLAGS+=-std=c++17 -ICommon/include -IAst/include $(COMPILER_OBJECTS): CXXFLAGS+=-std=c++17 -ICompiler/include -ICommon/include -IAst/include diff --git a/bench/bench.py b/bench/bench.py index 67fc8cf7..b4b1eb1d 100644 --- a/bench/bench.py +++ b/bench/bench.py @@ -40,6 +40,7 @@ argumentParser.add_argument('--results', dest='results',type=str,nargs='*',help= argumentParser.add_argument('--run-test', action='store', default=None, help='Regex test filter') argumentParser.add_argument('--extra-loops', action='store',type=int,default=0, help='Amount of times to loop over one test (one test already performs multiple runs)') argumentParser.add_argument('--filename', action='store',type=str,default='bench', help='File name for graph and results file') +argumentParser.add_argument('--callgrind', dest='callgrind',action='store_const',const=1,default=0,help='Use callgrind to run benchmarks') if matplotlib != None: argumentParser.add_argument('--absolute', dest='absolute',action='store_const',const=1,default=0,help='Display absolute values instead of relative (enabled by default when benchmarking a single VM)') @@ -55,6 +56,9 @@ argumentParser.add_argument('--no-print-influx-debugging', action='store_false', argumentParser.add_argument('--no-print-final-summary', action='store_false', dest='print_final_summary', help="Don't print a table summarizing the results after all tests are run") +# Assume 2.5 IPC on a 4 GHz CPU; this is obviously incorrect but it allows us to display simulated instruction counts using regular time units +CALLGRIND_INSN_PER_SEC = 2.5 * 4e9 + def arrayRange(count): result = [] @@ -71,6 +75,21 @@ def arrayRangeOffset(count, offset): return result +def getCallgrindOutput(lines): + result = [] + name = None + + for l in lines: + if l.startswith("desc: Trigger: Client Request: "): + name = l[31:].strip() + elif l.startswith("summary: ") and name != None: + insn = int(l[9:]) + # Note: we only run each bench once under callgrind so we only report a single time per run; callgrind instruction count variance is ~0.01% so it might as well be zero + result += "|><|" + name + "|><|" + str(insn / CALLGRIND_INSN_PER_SEC * 1000.0) + "||_||" + name = None + + return "".join(result) + def getVmOutput(cmd): if os.name == "nt": try: @@ -79,6 +98,15 @@ def getVmOutput(cmd): exit(1) except: return "" + elif arguments.callgrind: + try: + subprocess.check_call("valgrind --tool=callgrind --callgrind-out-file=callgrind.out --combine-dumps=yes --dump-line=no " + cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, cwd=scriptdir) + file = open(os.path.join(scriptdir, "callgrind.out"), "r") + lines = file.readlines() + return getCallgrindOutput(lines) + except e: + print(e) + return "" else: with subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=scriptdir) as p: # Try to lock to a single processor diff --git a/bench/bench_support.lua b/bench/bench_support.lua index 171b8da7..a9608ecc 100644 --- a/bench/bench_support.lua +++ b/bench/bench_support.lua @@ -5,6 +5,16 @@ bench.runs = 20 bench.extraRuns = 4 function bench.runCode(f, description) + -- Under Callgrind, run the test only once and measure just the execution cost + if callgrind and callgrind("running") then + if collectgarbage then collectgarbage() end + + callgrind("zero") + f() -- unfortunately we can't easily separate setup cost from runtime cost in f unless it calls callgrind() + callgrind("dump", description) + return + end + local timeTable = {} for i = 1,bench.runs + bench.extraRuns do