mirror of
https://github.com/luau-lang/luau.git
synced 2025-01-10 13:29:09 +00:00
939 lines
32 KiB
Python
939 lines
32 KiB
Python
#!/usr/bin/python3
|
|
# This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
|
import argparse
|
|
import os
|
|
import subprocess
|
|
import math
|
|
import sys
|
|
import re
|
|
import json
|
|
|
|
# Taken from rotest
|
|
from color import colored, Color
|
|
from tabulate import TablePrinter, Alignment
|
|
|
|
try:
|
|
import matplotlib
|
|
import matplotlib.pyplot as plt
|
|
except ModuleNotFoundError:
|
|
matplotlib = None
|
|
|
|
try:
|
|
import scipy
|
|
from scipy import stats
|
|
except ModuleNotFoundError:
|
|
print("Warning: scipy package is not installed, confidence values will not be available")
|
|
stats = None
|
|
|
|
scriptdir = os.path.dirname(os.path.realpath(__file__))
|
|
defaultVm = 'luau.exe' if os.name == "nt" else './luau'
|
|
|
|
argumentParser = argparse.ArgumentParser(description='Benchmark Lua script execution with an option to compare different VMs')
|
|
|
|
argumentParser.add_argument('--vm', dest='vm',default=defaultVm,help='Lua executable to test (' + defaultVm + ' by default)')
|
|
argumentParser.add_argument('--folder', dest='folder',default=os.path.join(scriptdir, 'tests'),help='Folder with tests (tests by default)')
|
|
argumentParser.add_argument('--compare', dest='vmNext',type=str,nargs='*',help='List of Lua executables to compare against')
|
|
argumentParser.add_argument('--results', dest='results',type=str,nargs='*',help='List of json result files to compare and graph')
|
|
argumentParser.add_argument('--run-test', action='store', default=None, help='Regex test filter')
|
|
argumentParser.add_argument('--extra-loops', action='store',type=int,default=0, help='Amount of times to loop over one test (one test already performs multiple runs)')
|
|
argumentParser.add_argument('--filename', action='store',type=str,default='bench', help='File name for graph and results file')
|
|
argumentParser.add_argument('--callgrind', dest='callgrind',action='store_const',const=1,default=0,help='Use callgrind to run benchmarks')
|
|
argumentParser.add_argument('--show-commands', dest='show_commands',action='store_const',const=1,default=0,help='Show the command line used to launch the VM and tests')
|
|
|
|
if matplotlib != None:
|
|
argumentParser.add_argument('--absolute', dest='absolute',action='store_const',const=1,default=0,help='Display absolute values instead of relative (enabled by default when benchmarking a single VM)')
|
|
argumentParser.add_argument('--speedup', dest='speedup',action='store_const',const=1,default=0,help='Draw a speedup graph')
|
|
argumentParser.add_argument('--sort', dest='sort',action='store_const',const=1,default=0,help='Sort values from worst to best improvements, ignoring conf. int. (disabled by default)')
|
|
argumentParser.add_argument('--window', dest='window',action='store_const',const=1,default=0,help='Display window with resulting plot (disabled by default)')
|
|
argumentParser.add_argument('--graph-vertical', action='store_true',dest='graph_vertical', help="Draw graph with vertical bars instead of horizontal")
|
|
|
|
argumentParser.add_argument('--report-metrics', dest='report_metrics', help="Send metrics about this session to InfluxDB URL upon completion.")
|
|
|
|
argumentParser.add_argument('--print-influx-debugging', action='store_true', dest='print_influx_debugging', help="Print output to aid in debugging of influx metrics reporting.")
|
|
argumentParser.add_argument('--no-print-influx-debugging', action='store_false', dest='print_influx_debugging', help="Don't print output to aid in debugging of influx metrics reporting.")
|
|
|
|
argumentParser.add_argument('--no-print-final-summary', action='store_false', dest='print_final_summary', help="Don't print a table summarizing the results after all tests are run")
|
|
|
|
# Assume 2.5 IPC on a 4 GHz CPU; this is obviously incorrect but it allows us to display simulated instruction counts using regular time units
|
|
CALLGRIND_INSN_PER_SEC = 2.5 * 4e9
|
|
|
|
def arrayRange(count):
|
|
result = []
|
|
|
|
for i in range(count):
|
|
result.append(i)
|
|
|
|
return result
|
|
|
|
def arrayRangeOffset(count, offset):
|
|
result = []
|
|
|
|
for i in range(count):
|
|
result.append(i + offset)
|
|
|
|
return result
|
|
|
|
def getCallgrindOutput(stdout, lines):
|
|
result = []
|
|
name = None
|
|
|
|
for l in lines:
|
|
if l.startswith("desc: Trigger: Client Request: "):
|
|
name = l[31:].strip()
|
|
elif l.startswith("summary: ") and name != None:
|
|
insn = int(l[9:])
|
|
# Note: we only run each bench once under callgrind so we only report a single time per run; callgrind instruction count variance is ~0.01% so it might as well be zero
|
|
result += "|><|" + name + "|><|" + str(insn / CALLGRIND_INSN_PER_SEC * 1000.0) + "||_||"
|
|
name = None
|
|
|
|
# If no results were found above, this may indicate the native executable running
|
|
# the benchmark doesn't have support for callgrind builtin. In that case just
|
|
# report the "totals" from the output file.
|
|
if len(result) == 0:
|
|
elements = stdout.decode('utf8').split("|><|")
|
|
if len(elements) >= 2:
|
|
name = elements[1]
|
|
|
|
for l in lines:
|
|
if l.startswith("totals: "):
|
|
insn = int(l[8:])
|
|
# Note: we only run each bench once under callgrind so we only report a single time per run; callgrind instruction count variance is ~0.01% so it might as well be zero
|
|
result += "|><|" + name + "|><|" + str(insn / CALLGRIND_INSN_PER_SEC * 1000.0) + "||_||"
|
|
|
|
return "".join(result)
|
|
|
|
def conditionallyShowCommand(cmd):
|
|
if arguments.show_commands:
|
|
print(f'{colored(Color.BLUE, "EXECUTING")}: {cmd}')
|
|
|
|
def checkValgrindExecutable():
|
|
"""Return true if valgrind can be successfully spawned"""
|
|
try:
|
|
subprocess.check_call("valgrind --version", shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
except:
|
|
print(f"{colored(Color.YELLOW, 'WARNING')}: Unable to spawn 'valgrind'. Please ensure valgrind is installed when using '--callgrind'.")
|
|
return False
|
|
|
|
return True
|
|
|
|
def getVmOutput(cmd):
|
|
if os.name == "nt":
|
|
try:
|
|
fullCmd = "start /realtime /affinity 1 /b /wait cmd /C \"" + cmd + "\""
|
|
conditionallyShowCommand(fullCmd)
|
|
return subprocess.check_output(fullCmd, shell=True, cwd=scriptdir).decode()
|
|
except KeyboardInterrupt:
|
|
exit(1)
|
|
except:
|
|
return ""
|
|
elif arguments.callgrind:
|
|
if not checkValgrindExecutable():
|
|
return ""
|
|
output_path = os.path.join(scriptdir, "callgrind.out")
|
|
try:
|
|
os.unlink(output_path) # Remove stale output
|
|
except:
|
|
pass
|
|
fullCmd = "valgrind --tool=callgrind --callgrind-out-file=callgrind.out --combine-dumps=yes --dump-line=no " + cmd
|
|
conditionallyShowCommand(fullCmd)
|
|
try:
|
|
output = subprocess.check_output(fullCmd, shell=True, stderr=subprocess.DEVNULL, cwd=scriptdir)
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"{colored(Color.YELLOW, 'WARNING')}: Valgrind returned error code {e.returncode}")
|
|
output = e.output
|
|
with open(output_path, "r") as file:
|
|
lines = file.readlines()
|
|
os.unlink(output_path)
|
|
return getCallgrindOutput(output, lines)
|
|
else:
|
|
conditionallyShowCommand(cmd)
|
|
with subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=scriptdir) as p:
|
|
# Try to lock to a single processor
|
|
if sys.platform != "darwin":
|
|
os.sched_setaffinity(p.pid, { 0 })
|
|
|
|
# Try to set high priority (requires sudo)
|
|
try:
|
|
os.nice(-10)
|
|
except:
|
|
pass
|
|
|
|
return p.communicate()[0]
|
|
|
|
def getShortVmName(name):
|
|
# Hope that the path to executable doesn't contain spaces
|
|
argumentPos = name.find(" ")
|
|
|
|
if argumentPos != -1:
|
|
executableName = name[0:argumentPos]
|
|
arguments = name[argumentPos+1:]
|
|
|
|
pathPos = executableName.rfind("\\")
|
|
|
|
if pathPos == -1:
|
|
pathPos = executableName.rfind("/")
|
|
|
|
if pathPos != -1:
|
|
executableName = executableName[pathPos+1:]
|
|
|
|
return executableName + " " + arguments
|
|
|
|
pathPos = name.rfind("\\")
|
|
|
|
if pathPos == -1:
|
|
pathPos = name.rfind("/")
|
|
|
|
if pathPos != -1:
|
|
return name[pathPos+1:]
|
|
|
|
return name
|
|
|
|
class TestResult:
|
|
filename = ""
|
|
vm = ""
|
|
shortVm = ""
|
|
name = ""
|
|
|
|
values = []
|
|
count = 0
|
|
min = None
|
|
avg = 0
|
|
max = None
|
|
|
|
sampleStdDev = 0
|
|
unbiasedEst = 0
|
|
sampleConfidenceInterval = 0
|
|
|
|
def extractResult(filename, vm, output):
|
|
elements = output.split("|><|")
|
|
|
|
# Remove test output
|
|
elements.remove(elements[0])
|
|
|
|
result = TestResult()
|
|
|
|
result.filename = filename
|
|
result.vm = vm
|
|
result.shortVm = getShortVmName(vm)
|
|
|
|
result.name = elements[0]
|
|
elements.remove(elements[0])
|
|
|
|
timeTable = []
|
|
|
|
for el in elements:
|
|
timeTable.append(float(el))
|
|
|
|
result.values = timeTable
|
|
result.count = len(timeTable)
|
|
|
|
return result
|
|
|
|
def mergeResult(lhs, rhs):
|
|
for value in rhs.values:
|
|
lhs.values.append(value)
|
|
|
|
lhs.count = len(lhs.values)
|
|
|
|
def mergeResults(lhs, rhs):
|
|
for a, b in zip(lhs, rhs):
|
|
mergeResult(a, b)
|
|
|
|
def finalizeResult(result):
|
|
total = 0.0
|
|
|
|
# Compute basic parameters
|
|
for v in result.values:
|
|
if result.min == None or v < result.min:
|
|
result.min = v
|
|
|
|
if result.max == None or v > result.max:
|
|
result.max = v
|
|
|
|
total = total + v
|
|
|
|
if result.count > 0:
|
|
result.avg = total / result.count
|
|
else:
|
|
result.avg = 0
|
|
|
|
# Compute standard deviation
|
|
sumOfSquares = 0
|
|
|
|
for v in result.values:
|
|
sumOfSquares = sumOfSquares + (v - result.avg) ** 2
|
|
|
|
if result.count > 1:
|
|
result.sampleStdDev = math.sqrt(sumOfSquares / (result.count - 1))
|
|
result.unbiasedEst = result.sampleStdDev * result.sampleStdDev
|
|
|
|
if stats:
|
|
# Two-tailed distribution with 95% conf.
|
|
tValue = stats.t.ppf(1 - 0.05 / 2, result.count - 1)
|
|
|
|
# Compute confidence interval
|
|
result.sampleConfidenceInterval = tValue * result.sampleStdDev / math.sqrt(result.count)
|
|
else:
|
|
result.sampleConfidenceInterval = result.sampleStdDev
|
|
else:
|
|
result.sampleStdDev = 0
|
|
result.unbiasedEst = 0
|
|
result.sampleConfidenceInterval = 0
|
|
|
|
return result
|
|
|
|
# Full result set
|
|
allResults = []
|
|
|
|
|
|
# Data for the graph
|
|
plotLegend = []
|
|
|
|
plotLabels = []
|
|
|
|
plotValueLists = []
|
|
plotConfIntLists = []
|
|
|
|
# Totals
|
|
vmTotalMin = []
|
|
vmTotalAverage = []
|
|
vmTotalImprovement = []
|
|
vmTotalResults = []
|
|
|
|
# Data for Telegraf report
|
|
mainTotalMin = 0
|
|
mainTotalAverage = 0
|
|
mainTotalMax = 0
|
|
|
|
def getExtraArguments(filepath):
|
|
try:
|
|
with open(filepath) as f:
|
|
for i in f.readlines():
|
|
pos = i.find("--bench-args:")
|
|
if pos != -1:
|
|
return i[pos + 13:].strip()
|
|
except:
|
|
pass
|
|
|
|
return ""
|
|
|
|
def substituteArguments(cmd, extra):
|
|
if argumentSubstituionCallback != None:
|
|
cmd = argumentSubstituionCallback(cmd)
|
|
|
|
if cmd.find("@EXTRA") != -1:
|
|
cmd = cmd.replace("@EXTRA", extra)
|
|
else:
|
|
cmd = cmd + " " + extra
|
|
|
|
return cmd
|
|
|
|
def extractResults(filename, vm, output, allowFailure):
|
|
results = []
|
|
|
|
splitOutput = output.split("||_||")
|
|
|
|
if len(splitOutput) <= 1:
|
|
if allowFailure:
|
|
result = TestResult()
|
|
|
|
result.filename = filename
|
|
result.vm = vm
|
|
result.shortVm = getShortVmName(vm)
|
|
|
|
results.append(result)
|
|
|
|
return results
|
|
|
|
splitOutput.remove(splitOutput[len(splitOutput) - 1])
|
|
|
|
for el in splitOutput:
|
|
results.append(extractResult(filename, vm, el))
|
|
|
|
return results
|
|
|
|
def analyzeResult(subdir, main, comparisons):
|
|
# Aggregate statistics
|
|
global mainTotalMin, mainTotalAverage, mainTotalMax
|
|
|
|
mainTotalMin = mainTotalMin + main.min
|
|
mainTotalAverage = mainTotalAverage + main.avg
|
|
mainTotalMax = mainTotalMax + main.max
|
|
|
|
if arguments.vmNext != None:
|
|
resultPrinter.add_row({
|
|
'Test': main.name,
|
|
'Min': '{:8.3f}ms'.format(main.min),
|
|
'Average': '{:8.3f}ms'.format(main.avg),
|
|
'StdDev%': '{:8.3f}%'.format(main.sampleConfidenceInterval / main.avg * 100),
|
|
'Driver': main.shortVm,
|
|
'Speedup': "",
|
|
'Significance': "",
|
|
'P(T<=t)': ""
|
|
})
|
|
else:
|
|
resultPrinter.add_row({
|
|
'Test': main.name,
|
|
'Min': '{:8.3f}ms'.format(main.min),
|
|
'Average': '{:8.3f}ms'.format(main.avg),
|
|
'StdDev%': '{:8.3f}%'.format(main.sampleConfidenceInterval / main.avg * 100),
|
|
'Driver': main.shortVm
|
|
})
|
|
|
|
if influxReporter != None:
|
|
influxReporter.report_result(subdir, main.name, main.filename, "SUCCESS", main.min, main.avg, main.max, main.sampleConfidenceInterval, main.shortVm, main.vm)
|
|
|
|
print(colored(Color.GREEN, 'SUCCESS') + ': {:<40}'.format(main.name) + ": " + '{:8.3f}'.format(main.avg) + "ms +/- " +
|
|
'{:6.3f}'.format(main.sampleConfidenceInterval / main.avg * 100) + "% on " + main.shortVm)
|
|
|
|
plotLabels.append(main.name)
|
|
|
|
index = 0
|
|
|
|
if len(plotValueLists) < index + 1:
|
|
plotValueLists.append([])
|
|
plotConfIntLists.append([])
|
|
|
|
vmTotalMin.append(0.0)
|
|
vmTotalAverage.append(0.0)
|
|
vmTotalImprovement.append(0.0)
|
|
vmTotalResults.append(0)
|
|
|
|
if arguments.absolute or arguments.speedup:
|
|
scale = 1
|
|
else:
|
|
scale = 100 / main.avg
|
|
|
|
plotValueLists[index].append(main.avg * scale)
|
|
plotConfIntLists[index].append(main.sampleConfidenceInterval * scale)
|
|
|
|
vmTotalMin[index] += main.min
|
|
vmTotalAverage[index] += main.avg
|
|
|
|
for compare in comparisons:
|
|
index = index + 1
|
|
|
|
if len(plotValueLists) < index + 1 and not arguments.speedup:
|
|
plotValueLists.append([])
|
|
plotConfIntLists.append([])
|
|
|
|
vmTotalMin.append(0.0)
|
|
vmTotalAverage.append(0.0)
|
|
vmTotalImprovement.append(0.0)
|
|
vmTotalResults.append(0)
|
|
|
|
if compare.min == None:
|
|
print(colored(Color.RED, 'FAILED') + ": '" + main.name + "' on '" + compare.vm + "'")
|
|
|
|
resultPrinter.add_row({ 'Test': main.name, 'Min': "", 'Average': "FAILED", 'StdDev%': "", 'Driver': compare.shortVm, 'Speedup': "", 'Significance': "", 'P(T<=t)': "" })
|
|
|
|
if influxReporter != None:
|
|
influxReporter.report_result(subdir, main.filename, main.filename, "FAILED", 0.0, 0.0, 0.0, 0.0, compare.shortVm, compare.vm)
|
|
|
|
if arguments.speedup:
|
|
plotValueLists[0].pop()
|
|
plotValueLists[0].append(0)
|
|
|
|
plotConfIntLists[0].pop()
|
|
plotConfIntLists[0].append(0)
|
|
else:
|
|
plotValueLists[index].append(0)
|
|
plotConfIntLists[index].append(0)
|
|
|
|
continue
|
|
|
|
if main.count > 1 and stats:
|
|
pooledStdDev = math.sqrt((main.unbiasedEst + compare.unbiasedEst) / 2)
|
|
|
|
tStat = abs(main.avg - compare.avg) / (pooledStdDev * math.sqrt(2 / main.count))
|
|
degreesOfFreedom = 2 * main.count - 2
|
|
|
|
# Two-tailed distribution with 95% conf.
|
|
tCritical = stats.t.ppf(1 - 0.05 / 2, degreesOfFreedom)
|
|
|
|
noSignificantDifference = tStat < tCritical
|
|
pValue = 2 * (1 - stats.t.cdf(tStat, df = degreesOfFreedom))
|
|
else:
|
|
noSignificantDifference = None
|
|
pValue = -1
|
|
|
|
if noSignificantDifference is None:
|
|
verdict = ""
|
|
elif noSignificantDifference:
|
|
verdict = "likely same"
|
|
elif main.avg < compare.avg:
|
|
verdict = "likely worse"
|
|
else:
|
|
verdict = "likely better"
|
|
|
|
speedup = (plotValueLists[0][-1] / (compare.avg * scale) - 1)
|
|
speedupColor = Color.YELLOW if speedup < 0 and noSignificantDifference else Color.RED if speedup < 0 else Color.GREEN if speedup > 0 else Color.YELLOW
|
|
|
|
resultPrinter.add_row({
|
|
'Test': main.name,
|
|
'Min': '{:8.3f}ms'.format(compare.min),
|
|
'Average': '{:8.3f}ms'.format(compare.avg),
|
|
'StdDev%': '{:8.3f}%'.format(compare.sampleConfidenceInterval / compare.avg * 100),
|
|
'Driver': compare.shortVm,
|
|
'Speedup': colored(speedupColor, '{:8.3f}%'.format(speedup * 100)),
|
|
'Significance': verdict,
|
|
'P(T<=t)': '---' if pValue < 0 else '{:.0f}%'.format(pValue * 100)
|
|
})
|
|
|
|
print(colored(Color.GREEN, 'SUCCESS') + ': {:<40}'.format(main.name) + ": " + '{:8.3f}'.format(compare.avg) + "ms +/- " +
|
|
'{:6.3f}'.format(compare.sampleConfidenceInterval / compare.avg * 100) + "% on " + compare.shortVm +
|
|
' ({:+7.3f}%, '.format(speedup * 100) + verdict + ")")
|
|
|
|
if influxReporter != None:
|
|
influxReporter.report_result(subdir, main.name, main.filename, "SUCCESS", compare.min, compare.avg, compare.max, compare.sampleConfidenceInterval, compare.shortVm, compare.vm)
|
|
|
|
if arguments.speedup:
|
|
oldValue = plotValueLists[0].pop()
|
|
newValue = compare.avg
|
|
|
|
plotValueLists[0].append((oldValue / newValue - 1) * 100)
|
|
|
|
plotConfIntLists[0].pop()
|
|
plotConfIntLists[0].append(0)
|
|
else:
|
|
plotValueLists[index].append(compare.avg * scale)
|
|
plotConfIntLists[index].append(compare.sampleConfidenceInterval * scale)
|
|
|
|
vmTotalMin[index] += compare.min
|
|
vmTotalAverage[index] += compare.avg
|
|
vmTotalImprovement[index] += math.log(main.avg / compare.avg)
|
|
vmTotalResults[index] += 1
|
|
|
|
def runTest(subdir, filename, filepath):
|
|
filepath = os.path.abspath(filepath)
|
|
|
|
mainVm = os.path.abspath(arguments.vm)
|
|
|
|
# Process output will contain the test name and execution times
|
|
mainOutput = getVmOutput(substituteArguments(mainVm, getExtraArguments(filepath)) + " " + filepath)
|
|
mainResultSet = extractResults(filename, mainVm, mainOutput, False)
|
|
|
|
if len(mainResultSet) == 0:
|
|
print(colored(Color.RED, 'FAILED') + ": '" + filepath + "' on '" + mainVm + "'")
|
|
|
|
if arguments.vmNext != None:
|
|
resultPrinter.add_row({ 'Test': filepath, 'Min': "", 'Average': "FAILED", 'StdDev%': "", 'Driver': getShortVmName(mainVm), 'Speedup': "", 'Significance': "", 'P(T<=t)': "" })
|
|
else:
|
|
resultPrinter.add_row({ 'Test': filepath, 'Min': "", 'Average': "FAILED", 'StdDev%': "", 'Driver': getShortVmName(mainVm) })
|
|
|
|
if influxReporter != None:
|
|
influxReporter.report_result(subdir, filename, filename, "FAILED", 0.0, 0.0, 0.0, 0.0, getShortVmName(mainVm), mainVm)
|
|
return
|
|
|
|
compareResultSets = []
|
|
|
|
if arguments.vmNext != None:
|
|
for compareVm in arguments.vmNext:
|
|
compareVm = os.path.abspath(compareVm)
|
|
|
|
compareOutput = getVmOutput(substituteArguments(compareVm, getExtraArguments(filepath)) + " " + filepath)
|
|
compareResultSet = extractResults(filename, compareVm, compareOutput, True)
|
|
|
|
compareResultSets.append(compareResultSet)
|
|
|
|
if arguments.extra_loops > 0:
|
|
# get more results
|
|
for i in range(arguments.extra_loops):
|
|
extraMainOutput = getVmOutput(substituteArguments(mainVm, getExtraArguments(filepath)) + " " + filepath)
|
|
extraMainResultSet = extractResults(filename, mainVm, extraMainOutput, False)
|
|
|
|
mergeResults(mainResultSet, extraMainResultSet)
|
|
|
|
if arguments.vmNext != None:
|
|
i = 0
|
|
for compareVm in arguments.vmNext:
|
|
compareVm = os.path.abspath(compareVm)
|
|
|
|
extraCompareOutput = getVmOutput(substituteArguments(compareVm, getExtraArguments(filepath)) + " " + filepath)
|
|
extraCompareResultSet = extractResults(filename, compareVm, extraCompareOutput, True)
|
|
|
|
mergeResults(compareResultSets[i], extraCompareResultSet)
|
|
i += 1
|
|
|
|
# finalize results
|
|
for result in mainResultSet:
|
|
finalizeResult(result)
|
|
|
|
for compareResultSet in compareResultSets:
|
|
for result in compareResultSet:
|
|
finalizeResult(result)
|
|
|
|
# analyze results
|
|
for i in range(len(mainResultSet)):
|
|
mainResult = mainResultSet[i]
|
|
compareResults = []
|
|
|
|
for el in compareResultSets:
|
|
if i < len(el):
|
|
compareResults.append(el[i])
|
|
else:
|
|
noResult = TestResult()
|
|
|
|
noResult.filename = el[0].filename
|
|
noResult.vm = el[0].vm
|
|
noResult.shortVm = el[0].shortVm
|
|
|
|
compareResults.append(noResult)
|
|
|
|
analyzeResult(subdir, mainResult, compareResults)
|
|
|
|
mergedResults = []
|
|
mergedResults.append(mainResult)
|
|
|
|
for el in compareResults:
|
|
mergedResults.append(el)
|
|
|
|
allResults.append(mergedResults)
|
|
|
|
def rearrangeSortKeyForComparison(e):
|
|
if plotValueLists[1][e] == 0:
|
|
return 1
|
|
|
|
return plotValueLists[0][e] / plotValueLists[1][e]
|
|
|
|
def rearrangeSortKeyForSpeedup(e):
|
|
return plotValueLists[0][e]
|
|
|
|
def rearrangeSortKeyDescending(e):
|
|
return -plotValueLists[0][e]
|
|
|
|
# Re-arrange results from worst to best
|
|
def rearrange(key):
|
|
global plotLabels
|
|
|
|
index = arrayRange(len(plotLabels))
|
|
index = sorted(index, key=key)
|
|
|
|
# Recreate value lists in sorted order
|
|
plotLabelsPrev = plotLabels
|
|
plotLabels = []
|
|
|
|
for i in index:
|
|
plotLabels.append(plotLabelsPrev[i])
|
|
|
|
for group in range(len(plotValueLists)):
|
|
plotValueListPrev = plotValueLists[group]
|
|
plotValueLists[group] = []
|
|
|
|
plotConfIntListPrev = plotConfIntLists[group]
|
|
plotConfIntLists[group] = []
|
|
|
|
for i in index:
|
|
plotValueLists[group].append(plotValueListPrev[i])
|
|
plotConfIntLists[group].append(plotConfIntListPrev[i])
|
|
|
|
# Graph
|
|
def graph():
|
|
if len(plotValueLists) == 0:
|
|
print("No results")
|
|
return
|
|
|
|
ind = arrayRange(len(plotLabels))
|
|
width = 0.8 / len(plotValueLists)
|
|
|
|
if arguments.graph_vertical:
|
|
# Extend graph width when we have a lot of tests to draw
|
|
barcount = len(plotValueLists[0])
|
|
plt.figure(figsize=(max(8, barcount * 0.3), 8))
|
|
else:
|
|
# Extend graph height when we have a lot of tests to draw
|
|
barcount = len(plotValueLists[0])
|
|
plt.figure(figsize=(8, max(8, barcount * 0.3)))
|
|
|
|
plotBars = []
|
|
|
|
matplotlib.rc('xtick', labelsize=10)
|
|
matplotlib.rc('ytick', labelsize=10)
|
|
|
|
if arguments.graph_vertical:
|
|
# Draw Y grid behind the bars
|
|
plt.rc('axes', axisbelow=True)
|
|
plt.grid(True, 'major', 'y')
|
|
|
|
for i in range(len(plotValueLists)):
|
|
bar = plt.bar(arrayRangeOffset(len(plotLabels), i * width), plotValueLists[i], width, yerr=plotConfIntLists[i])
|
|
plotBars.append(bar[0])
|
|
|
|
if arguments.absolute:
|
|
plt.ylabel('Time (ms)')
|
|
elif arguments.speedup:
|
|
plt.ylabel('Speedup (%)')
|
|
else:
|
|
plt.ylabel('Relative time (%)')
|
|
|
|
plt.title('Benchmark')
|
|
plt.xticks(ind, plotLabels, rotation='vertical')
|
|
else:
|
|
# Draw X grid behind the bars
|
|
plt.rc('axes', axisbelow=True)
|
|
plt.grid(True, 'major', 'x')
|
|
|
|
for i in range(len(plotValueLists)):
|
|
bar = plt.barh(arrayRangeOffset(len(plotLabels), i * width), plotValueLists[i], width, xerr=plotConfIntLists[i])
|
|
plotBars.append(bar[0])
|
|
|
|
if arguments.absolute:
|
|
plt.xlabel('Time (ms)')
|
|
elif arguments.speedup:
|
|
plt.xlabel('Speedup (%)')
|
|
else:
|
|
plt.xlabel('Relative time (%)')
|
|
|
|
plt.title('Benchmark')
|
|
plt.yticks(ind, plotLabels)
|
|
|
|
plt.gca().invert_yaxis()
|
|
|
|
plt.legend(plotBars, plotLegend)
|
|
|
|
plt.tight_layout()
|
|
|
|
plt.savefig(arguments.filename + ".png", dpi=200)
|
|
|
|
if arguments.window:
|
|
plt.show()
|
|
|
|
def addTotalsToTable():
|
|
if len(vmTotalMin) == 0:
|
|
return
|
|
|
|
if arguments.vmNext != None:
|
|
index = 0
|
|
|
|
resultPrinter.add_row({
|
|
'Test': 'Total',
|
|
'Min': '{:8.3f}ms'.format(vmTotalMin[index]),
|
|
'Average': '{:8.3f}ms'.format(vmTotalAverage[index]),
|
|
'StdDev%': "---",
|
|
'Driver': getShortVmName(os.path.abspath(arguments.vm)),
|
|
'Speedup': "",
|
|
'Significance': "",
|
|
'P(T<=t)': ""
|
|
})
|
|
|
|
for compareVm in arguments.vmNext:
|
|
index = index + 1
|
|
|
|
speedup = vmTotalAverage[0] / vmTotalAverage[index] * 100 - 100
|
|
|
|
resultPrinter.add_row({
|
|
'Test': 'Total',
|
|
'Min': '{:8.3f}ms'.format(vmTotalMin[index]),
|
|
'Average': '{:8.3f}ms'.format(vmTotalAverage[index]),
|
|
'StdDev%': "---",
|
|
'Driver': getShortVmName(os.path.abspath(compareVm)),
|
|
'Speedup': colored(Color.RED if speedup < 0 else Color.GREEN if speedup > 0 else Color.YELLOW, '{:8.3f}%'.format(speedup)),
|
|
'Significance': "",
|
|
'P(T<=t)': ""
|
|
})
|
|
else:
|
|
resultPrinter.add_row({
|
|
'Test': 'Total',
|
|
'Min': '{:8.3f}ms'.format(vmTotalMin[0]),
|
|
'Average': '{:8.3f}ms'.format(vmTotalAverage[0]),
|
|
'StdDev%': "---",
|
|
'Driver': getShortVmName(os.path.abspath(arguments.vm))
|
|
})
|
|
|
|
def writeResultsToFile():
|
|
class TestResultEncoder(json.JSONEncoder):
|
|
def default(self, obj):
|
|
if isinstance(obj, TestResult):
|
|
return [obj.filename, obj.vm, obj.shortVm, obj.name, obj.values, obj.count]
|
|
return json.JSONEncoder.default(self, obj)
|
|
|
|
try:
|
|
with open(arguments.filename + ".json", "w") as allResultsFile:
|
|
allResultsFile.write(json.dumps(allResults, cls=TestResultEncoder))
|
|
except:
|
|
print("Failed to write results to a file")
|
|
|
|
def run(args, argsubcb):
|
|
global arguments, resultPrinter, influxReporter, argumentSubstituionCallback, allResults
|
|
arguments = args
|
|
argumentSubstituionCallback = argsubcb
|
|
|
|
if os.name == "nt" and arguments.callgrind:
|
|
print(f"{colored(Color.RED, 'ERROR')}: --callgrind is not supported on Windows. Please consider using this option on another OS, or Linux using WSL.")
|
|
sys.exit(1)
|
|
|
|
if arguments.report_metrics or arguments.print_influx_debugging:
|
|
import influxbench
|
|
influxReporter = influxbench.InfluxReporter(arguments)
|
|
else:
|
|
influxReporter = None
|
|
|
|
if matplotlib == None:
|
|
arguments.absolute = 0
|
|
arguments.speedup = 0
|
|
arguments.sort = 0
|
|
arguments.window = 0
|
|
|
|
# Load results from files
|
|
if arguments.results != None:
|
|
vmList = []
|
|
|
|
for result in arguments.results:
|
|
with open(result) as resultsFile:
|
|
resultArray = json.load(resultsFile)
|
|
|
|
for test in resultArray:
|
|
for i in range(len(test)):
|
|
arr = test[i]
|
|
|
|
tr = TestResult()
|
|
|
|
tr.filename = arr[0]
|
|
tr.vm = arr[1]
|
|
tr.shortVm = arr[2]
|
|
tr.name = arr[3]
|
|
tr.values = arr[4]
|
|
tr.count = arr[5]
|
|
|
|
test[i] = tr
|
|
|
|
for test in resultArray[0]:
|
|
if vmList.count(test.vm) > 0:
|
|
pointPos = result.rfind(".")
|
|
|
|
if pointPos != -1:
|
|
vmList.append(test.vm + " [" + result[0:pointPos] + "]")
|
|
else:
|
|
vmList.append(test.vm + " [" + result + "]")
|
|
else:
|
|
vmList.append(test.vm)
|
|
|
|
if len(allResults) == 0:
|
|
allResults = resultArray
|
|
else:
|
|
for prevEl in allResults:
|
|
found = False
|
|
|
|
for nextEl in resultArray:
|
|
if nextEl[0].filename == prevEl[0].filename and nextEl[0].name == prevEl[0].name:
|
|
for run in nextEl:
|
|
prevEl.append(run)
|
|
found = True
|
|
|
|
if not found:
|
|
el = resultArray[0]
|
|
|
|
for run in el:
|
|
result = TestResult()
|
|
|
|
result.filename = run.filename
|
|
result.vm = run.vm
|
|
result.shortVm = run.shortVm
|
|
result.name = run.name
|
|
|
|
prevEl.append(result)
|
|
|
|
arguments.vmNext = []
|
|
|
|
for i in range(len(vmList)):
|
|
if i == 0:
|
|
arguments.vm = vmList[i]
|
|
else:
|
|
arguments.vmNext.append(vmList[i])
|
|
|
|
plotLegend.append(getShortVmName(arguments.vm))
|
|
|
|
if arguments.vmNext != None:
|
|
for compareVm in arguments.vmNext:
|
|
plotLegend.append(getShortVmName(compareVm))
|
|
else:
|
|
arguments.absolute = 1 # When looking at one VM, I feel that relative graph doesn't make a lot of sense
|
|
|
|
# Results table formatting
|
|
if arguments.vmNext != None:
|
|
resultPrinter = TablePrinter([
|
|
{'label': 'Test', 'align': Alignment.LEFT},
|
|
{'label': 'Min', 'align': Alignment.RIGHT},
|
|
{'label': 'Average', 'align': Alignment.RIGHT},
|
|
{'label': 'StdDev%', 'align': Alignment.RIGHT},
|
|
{'label': 'Driver', 'align': Alignment.LEFT},
|
|
{'label': 'Speedup', 'align': Alignment.RIGHT},
|
|
{'label': 'Significance', 'align': Alignment.LEFT},
|
|
{'label': 'P(T<=t)', 'align': Alignment.RIGHT}
|
|
])
|
|
else:
|
|
resultPrinter = TablePrinter([
|
|
{'label': 'Test', 'align': Alignment.LEFT},
|
|
{'label': 'Min', 'align': Alignment.RIGHT},
|
|
{'label': 'Average', 'align': Alignment.RIGHT},
|
|
{'label': 'StdDev%', 'align': Alignment.RIGHT},
|
|
{'label': 'Driver', 'align': Alignment.LEFT}
|
|
])
|
|
|
|
if arguments.results != None:
|
|
for resultSet in allResults:
|
|
# finalize results
|
|
for result in resultSet:
|
|
finalizeResult(result)
|
|
|
|
# analyze results
|
|
mainResult = resultSet[0]
|
|
compareResults = []
|
|
|
|
for i in range(len(resultSet)):
|
|
if i != 0:
|
|
compareResults.append(resultSet[i])
|
|
|
|
analyzeResult('', mainResult, compareResults)
|
|
else:
|
|
all_files = [subdir + os.sep + filename for subdir, dirs, files in os.walk(arguments.folder) for filename in files]
|
|
for filepath in sorted(all_files):
|
|
subdir, filename = os.path.split(filepath)
|
|
if filename.endswith(".lua"):
|
|
if arguments.run_test == None or re.match(arguments.run_test, filename[:-4]):
|
|
runTest(subdir, filename, filepath)
|
|
|
|
if arguments.sort and len(plotValueLists) > 1:
|
|
rearrange(rearrangeSortKeyForComparison)
|
|
elif arguments.sort and len(plotValueLists) == 1:
|
|
rearrange(rearrangeSortKeyDescending)
|
|
elif arguments.speedup:
|
|
rearrange(rearrangeSortKeyForSpeedup)
|
|
|
|
plotLegend[0] = arguments.vm + " vs " + arguments.vmNext[0]
|
|
|
|
if arguments.print_final_summary:
|
|
addTotalsToTable()
|
|
|
|
print()
|
|
print(colored(Color.YELLOW, '==================================================RESULTS=================================================='))
|
|
resultPrinter.print(summary=False)
|
|
print(colored(Color.YELLOW, '---'))
|
|
|
|
if len(vmTotalMin) != 0 and arguments.vmNext != None:
|
|
index = 0
|
|
|
|
for compareVm in arguments.vmNext:
|
|
index = index + 1
|
|
|
|
name = getShortVmName(os.path.abspath(compareVm))
|
|
deltaGeoMean = math.exp(vmTotalImprovement[index] / vmTotalResults[index]) * 100 - 100
|
|
|
|
if deltaGeoMean > 0:
|
|
print("'{}' change is {:.3f}% positive on average".format(name, deltaGeoMean))
|
|
else:
|
|
print("'{}' change is {:.3f}% negative on average".format(name, deltaGeoMean))
|
|
|
|
if matplotlib != None:
|
|
graph()
|
|
|
|
writeResultsToFile()
|
|
|
|
if influxReporter != None:
|
|
influxReporter.report_result(arguments.folder, "Total", "all", "SUCCESS", mainTotalMin, mainTotalAverage, mainTotalMax, 0.0, getShortVmName(arguments.vm), os.path.abspath(arguments.vm))
|
|
influxReporter.flush(0)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
arguments = argumentParser.parse_args()
|
|
run(arguments, None)
|