bench: Implement first class support for callgrind (#570)

Since callgrind allows to control stats collection from the guest, this allows us to reset the collection right before the benchmark starts. This change exposes this to the benchmark runner and integrates callgrind data parsing into bench.py, so that we can run bench.py with --callgrind argument and, as long as the runner was built with callgrind support, we get instruction counts from the run. We convert instruction counts to seconds using 10G instructions/second rate; there's no correct way to do this without simulating the full CPU pipeline but it results in time units on a similar scale to real runs.
2025-08-26 11:27:08 +01:00 · 2022-07-04 11:13:07 -07:00 · 2022-07-04 11:13:07 -07:00 · 48aa7a5162
commit 48aa7a5162
parent 6467c855e8
6 changed files with 363 additions and 218 deletions
--- a/.github/workflows/benchmark-dev.yml
+++ b/.github/workflows/benchmark-dev.yml
@ -0,0 +1,270 @@
+name: benchmark-dev
+
+on:
+  push:
+    branches:
+      - master
+    paths-ignore:
+      - "docs/**"
+      - "papers/**"
+      - "rfcs/**"
+      - "*.md"
+      - "prototyping/**"
+
+jobs:
+  windows:
+    name: windows-${{matrix.arch}}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [windows-latest]
+        arch: [Win32, x64]
+        bench:
+          - {
+              script: "run-benchmarks",
+              timeout: 12,
+              title: "Luau Benchmarks",
+              cachegrindTitle: "Performance",
+              cachegrindIterCount: 20,
+            }
+        benchResultsRepo:
+          - { name: "luau-lang/benchmark-data", branch: "main" }
+
+    runs-on: ${{ matrix.os }}
+    steps:
+      - name: Checkout Luau repository
+        uses: actions/checkout@v3
+
+      - name: Build Luau
+        shell: bash # necessary for fail-fast
+        run: |
+          mkdir build && cd build
+          cmake .. -DCMAKE_BUILD_TYPE=Release
+          cmake --build . --target Luau.Repl.CLI --config Release
+          cmake --build . --target Luau.Analyze.CLI --config Release
+
+      - name: Move build files to root
+        run: |
+          move build/Release/* .
+
+      - uses: actions/setup-python@v3
+        with:
+          python-version: "3.9"
+          architecture: "x64"
+
+      - name: Install python dependencies
+        run: |
+          python -m pip install requests
+          python -m pip install --user numpy scipy matplotlib ipython jupyter pandas sympy nose
+
+      - name: Run benchmark
+        run: |
+          python bench/bench.py | tee ${{ matrix.bench.script }}-output.txt
+
+      - name: Checkout Benchmark Results repository
+        uses: actions/checkout@v3
+        with:
+          repository: ${{ matrix.benchResultsRepo.name }}
+          ref: ${{ matrix.benchResultsRepo.branch }}
+          token: ${{ secrets.BENCH_GITHUB_TOKEN }}
+          path: "./gh-pages"
+
+      - name: Store ${{ matrix.bench.title }} result
+        uses: Roblox/rhysd-github-action-benchmark@v-luau
+        with:
+          name: ${{ matrix.bench.title }} (Windows ${{matrix.arch}})
+          tool: "benchmarkluau"
+          output-file-path: ./${{ matrix.bench.script }}-output.txt
+          external-data-json-path: ./gh-pages/dev/bench/data.json
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Push benchmark results
+        if: github.event_name == 'push'
+        run: |
+          echo "Pushing benchmark results..."
+          cd gh-pages
+          git config user.name github-actions
+          git config user.email github@users.noreply.github.com
+          git add ./dev/bench/data.json
+          git commit -m "Add benchmarks results for ${{ github.sha }}"
+          git push
+          cd ..
+
+  unix:
+    name: ${{matrix.os}}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+        bench:
+          - {
+              script: "run-benchmarks",
+              timeout: 12,
+              title: "Luau Benchmarks",
+              cachegrindTitle: "Performance",
+              cachegrindIterCount: 20,
+            }
+        benchResultsRepo:
+          - { name: "luau-lang/benchmark-data", branch: "main" }
+
+    runs-on: ${{ matrix.os }}
+    steps:
+      - name: Checkout Luau repository
+        uses: actions/checkout@v3
+
+      - name: Build Luau
+        run: make config=release luau luau-analyze
+
+      - uses: actions/setup-python@v3
+        with:
+          python-version: "3.9"
+          architecture: "x64"
+
+      - name: Install python dependencies
+        run: |
+          python -m pip install requests
+          python -m pip install --user numpy scipy matplotlib ipython jupyter pandas sympy nose
+
+      - name: Run benchmark
+        run: |
+          python bench/bench.py | tee ${{ matrix.bench.script }}-output.txt
+
+      - name: Install valgrind
+        if: matrix.os == 'ubuntu-latest'
+        run: |
+          sudo apt-get install valgrind
+
+      - name: Run ${{ matrix.bench.title }} (Cold Cachegrind)
+        if: matrix.os == 'ubuntu-latest'
+        run: sudo bash ./scripts/run-with-cachegrind.sh python ./bench/bench.py "${{ matrix.bench.cachegrindTitle}}Cold" 1 | tee -a ${{ matrix.bench.script }}-output.txt
+
+      - name: Run ${{ matrix.bench.title }} (Warm Cachegrind)
+        if: matrix.os == 'ubuntu-latest'
+        run: sudo bash ./scripts/run-with-cachegrind.sh python ./bench/bench.py "${{ matrix.bench.cachegrindTitle }}" ${{ matrix.bench.cachegrindIterCount }} | tee -a ${{ matrix.bench.script }}-output.txt
+
+      - name: Checkout Benchmark Results repository
+        uses: actions/checkout@v3
+        with:
+          repository: ${{ matrix.benchResultsRepo.name }}
+          ref: ${{ matrix.benchResultsRepo.branch }}
+          token: ${{ secrets.BENCH_GITHUB_TOKEN }}
+          path: "./gh-pages"
+
+      - name: Store ${{ matrix.bench.title }} result
+        uses: Roblox/rhysd-github-action-benchmark@v-luau
+        with:
+          name: ${{ matrix.bench.title }}
+          tool: "benchmarkluau"
+          output-file-path: ./${{ matrix.bench.script }}-output.txt
+          external-data-json-path: ./gh-pages/dev/bench/data.json
+          github-token: ${{ secrets.BENCH_GITHUB_TOKEN }}
+
+      - name: Store ${{ matrix.bench.title }} result (CacheGrind)
+        if: matrix.os == 'ubuntu-latest'
+        uses: Roblox/rhysd-github-action-benchmark@v-luau
+        with:
+          name: ${{ matrix.bench.title }} (CacheGrind)
+          tool: "roblox"
+          output-file-path: ./${{ matrix.bench.script }}-output.txt
+          external-data-json-path: ./gh-pages/dev/bench/data.json
+          github-token: ${{ secrets.BENCH_GITHUB_TOKEN }}
+
+      - name: Push benchmark results
+        if: github.event_name == 'push'
+        run: |
+          echo "Pushing benchmark results..."
+          cd gh-pages
+          git config user.name github-actions
+          git config user.email github@users.noreply.github.com
+          git add ./dev/bench/data.json
+          git commit -m "Add benchmarks results for ${{ github.sha }}"
+          git push
+          cd ..
+
+  static-analysis:
+    name: luau-analyze
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        bench:
+          - {
+              script: "run-analyze",
+              timeout: 12,
+              title: "Luau Analyze",
+              cachegrindTitle: "Performance",
+              cachegrindIterCount: 20,
+            }
+        benchResultsRepo:
+          - { name: "luau-lang/benchmark-data", branch: "main" }
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          token: "${{ secrets.BENCH_GITHUB_TOKEN }}"
+
+      - name: Build Luau
+        run: make config=release luau luau-analyze
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.9"
+          architecture: "x64"
+
+      - name: Install python dependencies
+        run: |
+          sudo pip install requests numpy scipy matplotlib ipython jupyter pandas sympy nose
+
+      - name: Install valgrind
+        run: |
+          sudo apt-get install valgrind
+
+      - name: Run Luau Analyze on static file
+        run: sudo python ./bench/measure_time.py ./build/release/luau-analyze bench/static_analysis/LuauPolyfillMap.lua | tee ${{ matrix.bench.script }}-output.txt
+
+      - name: Run ${{ matrix.bench.title }} (Cold Cachegrind)
+        run: sudo ./scripts/run-with-cachegrind.sh python ./bench/measure_time.py "${{ matrix.bench.cachegrindTitle}}Cold" 1 ./build/release/luau-analyze bench/static_analysis/LuauPolyfillMap.lua | tee -a ${{ matrix.bench.script }}-output.txt
+
+      - name: Run ${{ matrix.bench.title }} (Warm Cachegrind)
+        run: sudo bash ./scripts/run-with-cachegrind.sh python ./bench/measure_time.py "${{ matrix.bench.cachegrindTitle}}" 1  ./build/release/luau-analyze bench/static_analysis/LuauPolyfillMap.lua | tee -a ${{ matrix.bench.script }}-output.txt
+
+      - name: Checkout Benchmark Results repository
+        uses: actions/checkout@v3
+        with:
+          repository: ${{ matrix.benchResultsRepo.name }}
+          ref: ${{ matrix.benchResultsRepo.branch }}
+          token: ${{ secrets.BENCH_GITHUB_TOKEN }}
+          path: "./gh-pages"
+
+      - name: Store ${{ matrix.bench.title }} result
+        uses: Roblox/rhysd-github-action-benchmark@v-luau
+        with:
+          name: ${{ matrix.bench.title }}
+          tool: "benchmarkluau"
+
+          gh-pages-branch: "main"
+          output-file-path: ./${{ matrix.bench.script }}-output.txt
+          external-data-json-path: ./gh-pages/dev/bench/data.json
+          github-token: ${{ secrets.BENCH_GITHUB_TOKEN }}
+
+      - name: Store ${{ matrix.bench.title }} result (CacheGrind)
+        uses: Roblox/rhysd-github-action-benchmark@v-luau
+        with:
+          name: ${{ matrix.bench.title }}
+          tool: "roblox"
+          gh-pages-branch: "main"
+          output-file-path: ./${{ matrix.bench.script }}-output.txt
+          external-data-json-path: ./gh-pages/dev/bench/data.json
+          github-token: ${{ secrets.BENCH_GITHUB_TOKEN }}
+
+      - name: Push benchmark results
+        if: github.event_name == 'push'
+        run: |
+          echo "Pushing benchmark results..."
+          cd gh-pages
+          git config user.name github-actions
+          git config user.email github@users.noreply.github.com
+          git add ./dev/bench/data.json
+          git commit -m "Add benchmarks results for ${{ github.sha }}"
+          git push
+          cd ..
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@ -12,21 +12,13 @@ on:
      - "prototyping/**"

 jobs:
-  windows:
-    name: windows-${{matrix.arch}}
+  callgrind:
+    name: callgrind ${{ matrix.compiler }}
    strategy:
      fail-fast: false
      matrix:
-        os: [windows-latest]
-        arch: [Win32, x64]
-        bench:
-          - {
-              script: "run-benchmarks",
-              timeout: 12,
-              title: "Luau Benchmarks",
-              cachegrindTitle: "Performance",
-              cachegrindIterCount: 20,
-            }
+        os: [ubuntu-22.04]
+        compiler: [g++]
        benchResultsRepo:
          - { name: "luau-lang/benchmark-data", branch: "main" }

@ -35,200 +27,18 @@ jobs:
      - name: Checkout Luau repository
        uses: actions/checkout@v3

-      - name: Build Luau
-        shell: bash # necessary for fail-fast
-        run: |
-          mkdir build && cd build
-          cmake .. -DCMAKE_BUILD_TYPE=Release
-          cmake --build . --target Luau.Repl.CLI --config Release
-          cmake --build . --target Luau.Analyze.CLI --config Release
-
-      - name: Move build files to root
-        run: |
-          move build/Release/* .
-
-      - uses: actions/setup-python@v3
-        with:
-          python-version: "3.9"
-          architecture: "x64"
-
-      - name: Install python dependencies
-        run: |
-          python -m pip install requests
-          python -m pip install --user numpy scipy matplotlib ipython jupyter pandas sympy nose
-
-      - name: Run benchmark
-        run: |
-          python bench/bench.py | tee ${{ matrix.bench.script }}-output.txt
-
-      - name: Checkout Benchmark Results repository
-        uses: actions/checkout@v3
-        with:
-          repository: ${{ matrix.benchResultsRepo.name }}
-          ref: ${{ matrix.benchResultsRepo.branch }}
-          token: ${{ secrets.BENCH_GITHUB_TOKEN }}
-          path: "./gh-pages"
-
-      - name: Store ${{ matrix.bench.title }} result
-        uses: Roblox/rhysd-github-action-benchmark@v-luau
-        with:
-          name: ${{ matrix.bench.title }} (Windows ${{matrix.arch}})
-          tool: "benchmarkluau"
-          output-file-path: ./${{ matrix.bench.script }}-output.txt
-          external-data-json-path: ./gh-pages/dev/bench/data.json
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Push benchmark results
-        if: github.event_name == 'push'
-        run: |
-          echo "Pushing benchmark results..."
-          cd gh-pages
-          git config user.name github-actions
-          git config user.email github@users.noreply.github.com
-          git add ./dev/bench/data.json
-          git commit -m "Add benchmarks results for ${{ github.sha }}"
-          git push
-          cd ..
-
-  unix:
-    name: ${{matrix.os}}
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest, macos-latest]
-        bench:
-          - {
-              script: "run-benchmarks",
-              timeout: 12,
-              title: "Luau Benchmarks",
-              cachegrindTitle: "Performance",
-              cachegrindIterCount: 20,
-            }
-        benchResultsRepo:
-          - { name: "luau-lang/benchmark-data", branch: "main" }
-
-    runs-on: ${{ matrix.os }}
-    steps:
-      - name: Checkout Luau repository
-        uses: actions/checkout@v3
-
-      - name: Build Luau
-        run: make config=release luau luau-analyze
-
-      - uses: actions/setup-python@v3
-        with:
-          python-version: "3.9"
-          architecture: "x64"
-
-      - name: Install python dependencies
-        run: |
-          python -m pip install requests
-          python -m pip install --user numpy scipy matplotlib ipython jupyter pandas sympy nose
-
-      - name: Run benchmark
-        run: |
-          python bench/bench.py | tee ${{ matrix.bench.script }}-output.txt
-
-      - name: Install valgrind
-        if: matrix.os == 'ubuntu-latest'
-        run: |
-          sudo apt-get install valgrind
-
-      - name: Run ${{ matrix.bench.title }} (Cold Cachegrind)
-        if: matrix.os == 'ubuntu-latest'
-        run: sudo bash ./scripts/run-with-cachegrind.sh python ./bench/bench.py "${{ matrix.bench.cachegrindTitle}}Cold" 1 | tee -a ${{ matrix.bench.script }}-output.txt
-
-      - name: Run ${{ matrix.bench.title }} (Warm Cachegrind)
-        if: matrix.os == 'ubuntu-latest'
-        run: sudo bash ./scripts/run-with-cachegrind.sh python ./bench/bench.py "${{ matrix.bench.cachegrindTitle }}" ${{ matrix.bench.cachegrindIterCount }} | tee -a ${{ matrix.bench.script }}-output.txt
-
-      - name: Checkout Benchmark Results repository
-        uses: actions/checkout@v3
-        with:
-          repository: ${{ matrix.benchResultsRepo.name }}
-          ref: ${{ matrix.benchResultsRepo.branch }}
-          token: ${{ secrets.BENCH_GITHUB_TOKEN }}
-          path: "./gh-pages"
-
-      - name: Store ${{ matrix.bench.title }} result
-        uses: Roblox/rhysd-github-action-benchmark@v-luau
-        with:
-          name: ${{ matrix.bench.title }}
-          tool: "benchmarkluau"
-          output-file-path: ./${{ matrix.bench.script }}-output.txt
-          external-data-json-path: ./gh-pages/dev/bench/data.json
-          github-token: ${{ secrets.BENCH_GITHUB_TOKEN }}
-
-      - name: Store ${{ matrix.bench.title }} result (CacheGrind)
-        if: matrix.os == 'ubuntu-latest'
-        uses: Roblox/rhysd-github-action-benchmark@v-luau
-        with:
-          name: ${{ matrix.bench.title }} (CacheGrind)
-          tool: "roblox"
-          output-file-path: ./${{ matrix.bench.script }}-output.txt
-          external-data-json-path: ./gh-pages/dev/bench/data.json
-          github-token: ${{ secrets.BENCH_GITHUB_TOKEN }}
-
-      - name: Push benchmark results
-        if: github.event_name == 'push'
-        run: |
-          echo "Pushing benchmark results..."
-          cd gh-pages
-          git config user.name github-actions
-          git config user.email github@users.noreply.github.com
-          git add ./dev/bench/data.json
-          git commit -m "Add benchmarks results for ${{ github.sha }}"
-          git push
-          cd ..
-
-  static-analysis:
-    name: luau-analyze
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest]
-        bench:
-          - {
-              script: "run-analyze",
-              timeout: 12,
-              title: "Luau Analyze",
-              cachegrindTitle: "Performance",
-              cachegrindIterCount: 20,
-            }
-        benchResultsRepo:
-          - { name: "luau-lang/benchmark-data", branch: "main" }
-    runs-on: ${{ matrix.os }}
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          token: "${{ secrets.BENCH_GITHUB_TOKEN }}"
-
-      - name: Build Luau
-        run: make config=release luau luau-analyze
-
-      - uses: actions/setup-python@v4
-        with:
-          python-version: "3.9"
-          architecture: "x64"
-
-      - name: Install python dependencies
-        run: |
-          sudo pip install requests numpy scipy matplotlib ipython jupyter pandas sympy nose
-
      - name: Install valgrind
        run: |
          sudo apt-get install valgrind

-      - name: Run Luau Analyze on static file
-        run: sudo python ./bench/measure_time.py ./build/release/luau-analyze bench/static_analysis/LuauPolyfillMap.lua | tee ${{ matrix.bench.script }}-output.txt
+      - name: Build Luau
+        run: CXX=${{ matrix.compiler }} make config=release CALLGRIND=1 luau

-      - name: Run ${{ matrix.bench.title }} (Cold Cachegrind)
-        run: sudo ./scripts/run-with-cachegrind.sh python ./bench/measure_time.py "${{ matrix.bench.cachegrindTitle}}Cold" 1 ./build/release/luau-analyze bench/static_analysis/LuauPolyfillMap.lua | tee -a ${{ matrix.bench.script }}-output.txt
+      - name: Run benchmark
+        run: |
+          python bench/bench.py --callgrind --vm "./luau -O2" | tee output.txt

-      - name: Run ${{ matrix.bench.title }} (Warm Cachegrind)
-        run: sudo bash ./scripts/run-with-cachegrind.sh python ./bench/measure_time.py "${{ matrix.bench.cachegrindTitle}}" 1  ./build/release/luau-analyze bench/static_analysis/LuauPolyfillMap.lua | tee -a ${{ matrix.bench.script }}-output.txt
-
-      - name: Checkout Benchmark Results repository
+      - name: Checkout benchmark results
        uses: actions/checkout@v3
        with:
          repository: ${{ matrix.benchResultsRepo.name }}
@ -236,26 +46,13 @@ jobs:
          token: ${{ secrets.BENCH_GITHUB_TOKEN }}
          path: "./gh-pages"

-      - name: Store ${{ matrix.bench.title }} result
+      - name: Store results
        uses: Roblox/rhysd-github-action-benchmark@v-luau
        with:
-          name: ${{ matrix.bench.title }}
+          name: callgrind ${{ matrix.compiler }}
          tool: "benchmarkluau"
-
-          gh-pages-branch: "main"
-          output-file-path: ./${{ matrix.bench.script }}-output.txt
-          external-data-json-path: ./gh-pages/dev/bench/data.json
-          github-token: ${{ secrets.BENCH_GITHUB_TOKEN }}
-
-      - name: Store ${{ matrix.bench.title }} result (CacheGrind)
-        uses: Roblox/rhysd-github-action-benchmark@v-luau
-        with:
-          name: ${{ matrix.bench.title }}
-          tool: "roblox"
-          gh-pages-branch: "main"
-          output-file-path: ./${{ matrix.bench.script }}-output.txt
-          external-data-json-path: ./gh-pages/dev/bench/data.json
-          github-token: ${{ secrets.BENCH_GITHUB_TOKEN }}
+          output-file-path: ./output.txt
+          external-data-json-path: ./gh-pages/bench/data.json

      - name: Push benchmark results
        if: github.event_name == 'push'
@ -264,7 +61,7 @@ jobs:
          cd gh-pages
          git config user.name github-actions
          git config user.email github@users.noreply.github.com
-          git add ./dev/bench/data.json
+          git add ./bench/data.json
          git commit -m "Add benchmarks results for ${{ github.sha }}"
          git push
          cd ..
--- a/CLI/Repl.cpp
+++ b/CLI/Repl.cpp
@ -21,6 +21,10 @@
 #include <fcntl.h>
 #endif

+#ifdef CALLGRIND
+#include <valgrind/callgrind.h>
+#endif
+
 #include <locale.h>

 LUAU_FASTFLAG(DebugLuauTimeTracing)
@ -166,6 +170,36 @@ static int lua_collectgarbage(lua_State* L)
    luaL_error(L, "collectgarbage must be called with 'count' or 'collect'");
 }

+#ifdef CALLGRIND
+static int lua_callgrind(lua_State* L)
+{
+    const char* option = luaL_checkstring(L, 1);
+
+    if (strcmp(option, "running") == 0)
+    {
+        int r = RUNNING_ON_VALGRIND;
+        lua_pushboolean(L, r);
+        return 1;
+    }
+
+    if (strcmp(option, "zero") == 0)
+    {
+        CALLGRIND_ZERO_STATS;
+        return 0;
+    }
+
+    if (strcmp(option, "dump") == 0)
+    {
+        const char* name = luaL_checkstring(L, 2);
+
+        CALLGRIND_DUMP_STATS_AT(name);
+        return 0;
+    }
+
+    luaL_error(L, "callgrind must be called with one of 'running', 'zero', 'dump'");
+}
+#endif
+
 void setupState(lua_State* L)
 {
    luaL_openlibs(L);
@ -174,6 +208,9 @@ void setupState(lua_State* L)
        {"loadstring", lua_loadstring},
        {"require", lua_require},
        {"collectgarbage", lua_collectgarbage},
+#ifdef CALLGRIND
+        {"callgrind", lua_callgrind},
+#endif
        {NULL, NULL},
    };

--- a/4
+++ b/4
@ -93,6 +93,10 @@ ifeq ($(config),fuzz)
 	LDFLAGS+=-fsanitize=address,fuzzer
 endif

+ifneq ($(CALLGRIND),)
+	CXXFLAGS+=-DCALLGRIND=$(CALLGRIND)
+endif
+
 # target-specific flags
 $(AST_OBJECTS): CXXFLAGS+=-std=c++17 -ICommon/include -IAst/include
 $(COMPILER_OBJECTS): CXXFLAGS+=-std=c++17 -ICompiler/include -ICommon/include -IAst/include
--- a/bench/bench.py
+++ b/bench/bench.py
@ -40,6 +40,7 @@ argumentParser.add_argument('--results', dest='results',type=str,nargs='*',help=
 argumentParser.add_argument('--run-test', action='store', default=None, help='Regex test filter')
 argumentParser.add_argument('--extra-loops', action='store',type=int,default=0, help='Amount of times to loop over one test (one test already performs multiple runs)')
 argumentParser.add_argument('--filename', action='store',type=str,default='bench', help='File name for graph and results file')
+argumentParser.add_argument('--callgrind', dest='callgrind',action='store_const',const=1,default=0,help='Use callgrind to run benchmarks')

 if matplotlib != None:
    argumentParser.add_argument('--absolute', dest='absolute',action='store_const',const=1,default=0,help='Display absolute values instead of relative (enabled by default when benchmarking a single VM)')
@ -55,6 +56,9 @@ argumentParser.add_argument('--no-print-influx-debugging', action='store_false',

 argumentParser.add_argument('--no-print-final-summary', action='store_false', dest='print_final_summary', help="Don't print a table summarizing the results after all tests are run")

+# Assume 2.5 IPC on a 4 GHz CPU; this is obviously incorrect but it allows us to display simulated instruction counts using regular time units
+CALLGRIND_INSN_PER_SEC = 2.5 * 4e9
+
 def arrayRange(count):
    result = []

@ -71,6 +75,21 @@ def arrayRangeOffset(count, offset):

    return result

+def getCallgrindOutput(lines):
+    result = []
+    name = None
+
+    for l in lines:
+        if l.startswith("desc: Trigger: Client Request: "):
+            name = l[31:].strip()
+        elif l.startswith("summary: ") and name != None:
+            insn = int(l[9:])
+            # Note: we only run each bench once under callgrind so we only report a single time per run; callgrind instruction count variance is ~0.01% so it might as well be zero
+            result += "|><|" + name + "|><|" + str(insn / CALLGRIND_INSN_PER_SEC * 1000.0) + "||_||"
+            name = None
+
+    return "".join(result)
+
 def getVmOutput(cmd):
    if os.name == "nt":
        try:
@ -79,6 +98,14 @@ def getVmOutput(cmd):
            exit(1)
        except:
            return ""
+    elif arguments.callgrind:
+        try:
+            subprocess.check_call("valgrind --tool=callgrind --callgrind-out-file=callgrind.out --combine-dumps=yes --dump-line=no " + cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, cwd=scriptdir)
+            file = open(os.path.join(scriptdir, "callgrind.out"), "r")
+            lines = file.readlines()
+            return getCallgrindOutput(lines)
+        except:
+            return ""
    else:
        with subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=scriptdir) as p:
            # Try to lock to a single processor
--- a/bench/bench_support.lua
+++ b/bench/bench_support.lua
@ -5,6 +5,16 @@ bench.runs = 20
 bench.extraRuns = 4

 function bench.runCode(f, description)
+    -- Under Callgrind, run the test only once and measure just the execution cost
+    if callgrind and callgrind("running") then
+        if collectgarbage then collectgarbage() end
+
+        callgrind("zero")
+        f() -- unfortunately we can't easily separate setup cost from runtime cost in f unless it calls callgrind()
+        callgrind("dump", description)
+        return
+    end
+
    local timeTable = {}

    for i = 1,bench.runs + bench.extraRuns do