2023-01-13 20:36:28 +00:00
#!/usr/bin/python3
2021-10-29 21:25:12 +01:00
# This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
import argparse
import os
import subprocess
import math
import sys
import re
import json
# Taken from rotest
from color import colored , Color
from tabulate import TablePrinter , Alignment
try :
import matplotlib
import matplotlib . pyplot as plt
except ModuleNotFoundError :
matplotlib = None
try :
import scipy
from scipy import stats
except ModuleNotFoundError :
2021-11-18 22:21:07 +00:00
print ( " Warning: scipy package is not installed, confidence values will not be available " )
stats = None
2021-10-29 21:25:12 +01:00
scriptdir = os . path . dirname ( os . path . realpath ( __file__ ) )
defaultVm = ' luau.exe ' if os . name == " nt " else ' ./luau '
argumentParser = argparse . ArgumentParser ( description = ' Benchmark Lua script execution with an option to compare different VMs ' )
argumentParser . add_argument ( ' --vm ' , dest = ' vm ' , default = defaultVm , help = ' Lua executable to test ( ' + defaultVm + ' by default) ' )
argumentParser . add_argument ( ' --folder ' , dest = ' folder ' , default = os . path . join ( scriptdir , ' tests ' ) , help = ' Folder with tests (tests by default) ' )
argumentParser . add_argument ( ' --compare ' , dest = ' vmNext ' , type = str , nargs = ' * ' , help = ' List of Lua executables to compare against ' )
argumentParser . add_argument ( ' --results ' , dest = ' results ' , type = str , nargs = ' * ' , help = ' List of json result files to compare and graph ' )
argumentParser . add_argument ( ' --run-test ' , action = ' store ' , default = None , help = ' Regex test filter ' )
argumentParser . add_argument ( ' --extra-loops ' , action = ' store ' , type = int , default = 0 , help = ' Amount of times to loop over one test (one test already performs multiple runs) ' )
argumentParser . add_argument ( ' --filename ' , action = ' store ' , type = str , default = ' bench ' , help = ' File name for graph and results file ' )
2022-07-08 02:05:31 +01:00
argumentParser . add_argument ( ' --callgrind ' , dest = ' callgrind ' , action = ' store_const ' , const = 1 , default = 0 , help = ' Use callgrind to run benchmarks ' )
2022-08-25 21:55:08 +01:00
argumentParser . add_argument ( ' --show-commands ' , dest = ' show_commands ' , action = ' store_const ' , const = 1 , default = 0 , help = ' Show the command line used to launch the VM and tests ' )
2021-10-29 21:25:12 +01:00
if matplotlib != None :
argumentParser . add_argument ( ' --absolute ' , dest = ' absolute ' , action = ' store_const ' , const = 1 , default = 0 , help = ' Display absolute values instead of relative (enabled by default when benchmarking a single VM) ' )
argumentParser . add_argument ( ' --speedup ' , dest = ' speedup ' , action = ' store_const ' , const = 1 , default = 0 , help = ' Draw a speedup graph ' )
argumentParser . add_argument ( ' --sort ' , dest = ' sort ' , action = ' store_const ' , const = 1 , default = 0 , help = ' Sort values from worst to best improvements, ignoring conf. int. (disabled by default) ' )
argumentParser . add_argument ( ' --window ' , dest = ' window ' , action = ' store_const ' , const = 1 , default = 0 , help = ' Display window with resulting plot (disabled by default) ' )
argumentParser . add_argument ( ' --graph-vertical ' , action = ' store_true ' , dest = ' graph_vertical ' , help = " Draw graph with vertical bars instead of horizontal " )
argumentParser . add_argument ( ' --report-metrics ' , dest = ' report_metrics ' , help = " Send metrics about this session to InfluxDB URL upon completion. " )
argumentParser . add_argument ( ' --print-influx-debugging ' , action = ' store_true ' , dest = ' print_influx_debugging ' , help = " Print output to aid in debugging of influx metrics reporting. " )
argumentParser . add_argument ( ' --no-print-influx-debugging ' , action = ' store_false ' , dest = ' print_influx_debugging ' , help = " Don ' t print output to aid in debugging of influx metrics reporting. " )
argumentParser . add_argument ( ' --no-print-final-summary ' , action = ' store_false ' , dest = ' print_final_summary ' , help = " Don ' t print a table summarizing the results after all tests are run " )
2022-07-08 02:05:31 +01:00
# Assume 2.5 IPC on a 4 GHz CPU; this is obviously incorrect but it allows us to display simulated instruction counts using regular time units
CALLGRIND_INSN_PER_SEC = 2.5 * 4e9
2021-10-29 21:25:12 +01:00
def arrayRange ( count ) :
result = [ ]
for i in range ( count ) :
result . append ( i )
return result
def arrayRangeOffset ( count , offset ) :
result = [ ]
for i in range ( count ) :
result . append ( i + offset )
return result
2023-06-16 18:01:18 +01:00
def getCallgrindOutput ( stdout , lines ) :
2022-07-08 02:05:31 +01:00
result = [ ]
name = None
for l in lines :
if l . startswith ( " desc: Trigger: Client Request: " ) :
name = l [ 31 : ] . strip ( )
elif l . startswith ( " summary: " ) and name != None :
insn = int ( l [ 9 : ] )
# Note: we only run each bench once under callgrind so we only report a single time per run; callgrind instruction count variance is ~0.01% so it might as well be zero
result + = " |><| " + name + " |><| " + str ( insn / CALLGRIND_INSN_PER_SEC * 1000.0 ) + " ||_|| "
name = None
2023-06-16 18:01:18 +01:00
# If no results were found above, this may indicate the native executable running
# the benchmark doesn't have support for callgrind builtin. In that case just
# report the "totals" from the output file.
if len ( result ) == 0 :
elements = stdout . decode ( ' utf8 ' ) . split ( " |><| " )
if len ( elements ) > = 2 :
name = elements [ 1 ]
for l in lines :
if l . startswith ( " totals: " ) :
insn = int ( l [ 8 : ] )
# Note: we only run each bench once under callgrind so we only report a single time per run; callgrind instruction count variance is ~0.01% so it might as well be zero
result + = " |><| " + name + " |><| " + str ( insn / CALLGRIND_INSN_PER_SEC * 1000.0 ) + " ||_|| "
2022-07-08 02:05:31 +01:00
return " " . join ( result )
2022-08-25 21:55:08 +01:00
def conditionallyShowCommand ( cmd ) :
if arguments . show_commands :
print ( f ' { colored ( Color . BLUE , " EXECUTING " ) } : { cmd } ' )
2023-06-16 18:01:18 +01:00
def checkValgrindExecutable ( ) :
""" Return true if valgrind can be successfully spawned """
try :
subprocess . check_call ( " valgrind --version " , shell = True , stdout = subprocess . DEVNULL , stderr = subprocess . DEVNULL )
except :
print ( f " { colored ( Color . YELLOW , ' WARNING ' ) } : Unable to spawn ' valgrind ' . Please ensure valgrind is installed when using ' --callgrind ' . " )
return False
return True
2021-10-29 21:25:12 +01:00
def getVmOutput ( cmd ) :
if os . name == " nt " :
try :
2022-08-25 21:55:08 +01:00
fullCmd = " start /realtime /affinity 1 /b /wait cmd /C \" " + cmd + " \" "
conditionallyShowCommand ( fullCmd )
return subprocess . check_output ( fullCmd , shell = True , cwd = scriptdir ) . decode ( )
2021-10-29 21:25:12 +01:00
except KeyboardInterrupt :
exit ( 1 )
except :
return " "
2022-07-08 02:05:31 +01:00
elif arguments . callgrind :
2023-06-16 18:01:18 +01:00
if not checkValgrindExecutable ( ) :
return " "
output_path = os . path . join ( scriptdir , " callgrind.out " )
2022-07-08 02:05:31 +01:00
try :
2023-06-16 18:01:18 +01:00
os . unlink ( output_path ) # Remove stale output
2022-07-08 02:05:31 +01:00
except :
2023-06-16 18:01:18 +01:00
pass
fullCmd = " valgrind --tool=callgrind --callgrind-out-file=callgrind.out --combine-dumps=yes --dump-line=no " + cmd
conditionallyShowCommand ( fullCmd )
try :
output = subprocess . check_output ( fullCmd , shell = True , stderr = subprocess . DEVNULL , cwd = scriptdir )
except subprocess . CalledProcessError as e :
print ( f " { colored ( Color . YELLOW , ' WARNING ' ) } : Valgrind returned error code { e . returncode } " )
output = e . output
with open ( output_path , " r " ) as file :
lines = file . readlines ( )
os . unlink ( output_path )
return getCallgrindOutput ( output , lines )
2021-10-29 21:25:12 +01:00
else :
2022-08-25 21:55:08 +01:00
conditionallyShowCommand ( cmd )
2021-10-29 21:25:12 +01:00
with subprocess . Popen ( cmd , shell = True , stdout = subprocess . PIPE , stderr = subprocess . PIPE , text = True , cwd = scriptdir ) as p :
# Try to lock to a single processor
if sys . platform != " darwin " :
os . sched_setaffinity ( p . pid , { 0 } )
# Try to set high priority (requires sudo)
try :
os . nice ( - 10 )
except :
pass
return p . communicate ( ) [ 0 ]
def getShortVmName ( name ) :
# Hope that the path to executable doesn't contain spaces
argumentPos = name . find ( " " )
if argumentPos != - 1 :
executableName = name [ 0 : argumentPos ]
arguments = name [ argumentPos + 1 : ]
pathPos = executableName . rfind ( " \\ " )
if pathPos == - 1 :
pathPos = executableName . rfind ( " / " )
if pathPos != - 1 :
executableName = executableName [ pathPos + 1 : ]
return executableName + " " + arguments
pathPos = name . rfind ( " \\ " )
if pathPos == - 1 :
pathPos = name . rfind ( " / " )
if pathPos != - 1 :
return name [ pathPos + 1 : ]
return name
class TestResult :
filename = " "
vm = " "
shortVm = " "
name = " "
values = [ ]
count = 0
min = None
avg = 0
max = None
sampleStdDev = 0
unbiasedEst = 0
sampleConfidenceInterval = 0
def extractResult ( filename , vm , output ) :
elements = output . split ( " |><| " )
# Remove test output
elements . remove ( elements [ 0 ] )
result = TestResult ( )
result . filename = filename
result . vm = vm
result . shortVm = getShortVmName ( vm )
result . name = elements [ 0 ]
elements . remove ( elements [ 0 ] )
timeTable = [ ]
for el in elements :
timeTable . append ( float ( el ) )
result . values = timeTable
result . count = len ( timeTable )
return result
def mergeResult ( lhs , rhs ) :
for value in rhs . values :
lhs . values . append ( value )
lhs . count = len ( lhs . values )
def mergeResults ( lhs , rhs ) :
for a , b in zip ( lhs , rhs ) :
mergeResult ( a , b )
def finalizeResult ( result ) :
total = 0.0
# Compute basic parameters
for v in result . values :
if result . min == None or v < result . min :
result . min = v
if result . max == None or v > result . max :
result . max = v
total = total + v
if result . count > 0 :
result . avg = total / result . count
else :
result . avg = 0
# Compute standard deviation
sumOfSquares = 0
for v in result . values :
sumOfSquares = sumOfSquares + ( v - result . avg ) * * 2
if result . count > 1 :
result . sampleStdDev = math . sqrt ( sumOfSquares / ( result . count - 1 ) )
result . unbiasedEst = result . sampleStdDev * result . sampleStdDev
2021-11-18 22:21:07 +00:00
if stats :
# Two-tailed distribution with 95% conf.
tValue = stats . t . ppf ( 1 - 0.05 / 2 , result . count - 1 )
2021-10-29 21:25:12 +01:00
2021-11-18 22:21:07 +00:00
# Compute confidence interval
result . sampleConfidenceInterval = tValue * result . sampleStdDev / math . sqrt ( result . count )
else :
result . sampleConfidenceInterval = result . sampleStdDev
2021-10-29 21:25:12 +01:00
else :
result . sampleStdDev = 0
result . unbiasedEst = 0
result . sampleConfidenceInterval = 0
return result
# Full result set
allResults = [ ]
# Data for the graph
plotLegend = [ ]
plotLabels = [ ]
plotValueLists = [ ]
plotConfIntLists = [ ]
# Totals
vmTotalMin = [ ]
vmTotalAverage = [ ]
vmTotalImprovement = [ ]
vmTotalResults = [ ]
# Data for Telegraf report
mainTotalMin = 0
mainTotalAverage = 0
mainTotalMax = 0
def getExtraArguments ( filepath ) :
try :
with open ( filepath ) as f :
for i in f . readlines ( ) :
pos = i . find ( " --bench-args: " )
if pos != - 1 :
return i [ pos + 13 : ] . strip ( )
except :
pass
return " "
def substituteArguments ( cmd , extra ) :
if argumentSubstituionCallback != None :
cmd = argumentSubstituionCallback ( cmd )
if cmd . find ( " @EXTRA " ) != - 1 :
cmd = cmd . replace ( " @EXTRA " , extra )
else :
cmd = cmd + " " + extra
return cmd
def extractResults ( filename , vm , output , allowFailure ) :
results = [ ]
splitOutput = output . split ( " ||_|| " )
if len ( splitOutput ) < = 1 :
if allowFailure :
result = TestResult ( )
result . filename = filename
result . vm = vm
result . shortVm = getShortVmName ( vm )
results . append ( result )
return results
splitOutput . remove ( splitOutput [ len ( splitOutput ) - 1 ] )
for el in splitOutput :
results . append ( extractResult ( filename , vm , el ) )
return results
def analyzeResult ( subdir , main , comparisons ) :
# Aggregate statistics
global mainTotalMin , mainTotalAverage , mainTotalMax
mainTotalMin = mainTotalMin + main . min
mainTotalAverage = mainTotalAverage + main . avg
mainTotalMax = mainTotalMax + main . max
if arguments . vmNext != None :
resultPrinter . add_row ( {
' Test ' : main . name ,
' Min ' : ' {:8.3f} ms ' . format ( main . min ) ,
' Average ' : ' {:8.3f} ms ' . format ( main . avg ) ,
' StdDev % ' : ' {:8.3f} % ' . format ( main . sampleConfidenceInterval / main . avg * 100 ) ,
' Driver ' : main . shortVm ,
' Speedup ' : " " ,
' Significance ' : " " ,
' P(T<=t) ' : " "
} )
else :
resultPrinter . add_row ( {
' Test ' : main . name ,
' Min ' : ' {:8.3f} ms ' . format ( main . min ) ,
' Average ' : ' {:8.3f} ms ' . format ( main . avg ) ,
' StdDev % ' : ' {:8.3f} % ' . format ( main . sampleConfidenceInterval / main . avg * 100 ) ,
' Driver ' : main . shortVm
} )
if influxReporter != None :
influxReporter . report_result ( subdir , main . name , main . filename , " SUCCESS " , main . min , main . avg , main . max , main . sampleConfidenceInterval , main . shortVm , main . vm )
2023-06-16 18:01:18 +01:00
print ( colored ( Color . GREEN , ' SUCCESS ' ) + ' : {:<40} ' . format ( main . name ) + " : " + ' {:8.3f} ' . format ( main . avg ) + " ms +/- " +
2021-10-29 21:25:12 +01:00
' {:6.3f} ' . format ( main . sampleConfidenceInterval / main . avg * 100 ) + " % o n " + main . shortVm )
plotLabels . append ( main . name )
index = 0
if len ( plotValueLists ) < index + 1 :
plotValueLists . append ( [ ] )
plotConfIntLists . append ( [ ] )
vmTotalMin . append ( 0.0 )
vmTotalAverage . append ( 0.0 )
vmTotalImprovement . append ( 0.0 )
vmTotalResults . append ( 0 )
if arguments . absolute or arguments . speedup :
scale = 1
else :
scale = 100 / main . avg
plotValueLists [ index ] . append ( main . avg * scale )
plotConfIntLists [ index ] . append ( main . sampleConfidenceInterval * scale )
vmTotalMin [ index ] + = main . min
vmTotalAverage [ index ] + = main . avg
for compare in comparisons :
index = index + 1
if len ( plotValueLists ) < index + 1 and not arguments . speedup :
plotValueLists . append ( [ ] )
plotConfIntLists . append ( [ ] )
vmTotalMin . append ( 0.0 )
vmTotalAverage . append ( 0.0 )
vmTotalImprovement . append ( 0.0 )
vmTotalResults . append ( 0 )
if compare . min == None :
print ( colored ( Color . RED , ' FAILED ' ) + " : ' " + main . name + " ' on ' " + compare . vm + " ' " )
resultPrinter . add_row ( { ' Test ' : main . name , ' Min ' : " " , ' Average ' : " FAILED " , ' StdDev % ' : " " , ' Driver ' : compare . shortVm , ' Speedup ' : " " , ' Significance ' : " " , ' P(T<=t) ' : " " } )
if influxReporter != None :
influxReporter . report_result ( subdir , main . filename , main . filename , " FAILED " , 0.0 , 0.0 , 0.0 , 0.0 , compare . shortVm , compare . vm )
if arguments . speedup :
plotValueLists [ 0 ] . pop ( )
plotValueLists [ 0 ] . append ( 0 )
plotConfIntLists [ 0 ] . pop ( )
plotConfIntLists [ 0 ] . append ( 0 )
else :
plotValueLists [ index ] . append ( 0 )
plotConfIntLists [ index ] . append ( 0 )
continue
2022-07-08 02:05:31 +01:00
if main . count > 1 and stats :
pooledStdDev = math . sqrt ( ( main . unbiasedEst + compare . unbiasedEst ) / 2 )
2021-10-29 21:25:12 +01:00
2022-07-08 02:05:31 +01:00
tStat = abs ( main . avg - compare . avg ) / ( pooledStdDev * math . sqrt ( 2 / main . count ) )
degreesOfFreedom = 2 * main . count - 2
2021-10-29 21:25:12 +01:00
2021-11-18 22:21:07 +00:00
# Two-tailed distribution with 95% conf.
tCritical = stats . t . ppf ( 1 - 0.05 / 2 , degreesOfFreedom )
2021-10-29 21:25:12 +01:00
2021-11-18 22:21:07 +00:00
noSignificantDifference = tStat < tCritical
pValue = 2 * ( 1 - stats . t . cdf ( tStat , df = degreesOfFreedom ) )
else :
noSignificantDifference = None
pValue = - 1
2021-10-29 21:25:12 +01:00
2021-11-18 22:21:07 +00:00
if noSignificantDifference is None :
verdict = " "
elif noSignificantDifference :
2021-10-29 21:25:12 +01:00
verdict = " likely same "
elif main . avg < compare . avg :
verdict = " likely worse "
else :
verdict = " likely better "
speedup = ( plotValueLists [ 0 ] [ - 1 ] / ( compare . avg * scale ) - 1 )
speedupColor = Color . YELLOW if speedup < 0 and noSignificantDifference else Color . RED if speedup < 0 else Color . GREEN if speedup > 0 else Color . YELLOW
resultPrinter . add_row ( {
' Test ' : main . name ,
' Min ' : ' {:8.3f} ms ' . format ( compare . min ) ,
' Average ' : ' {:8.3f} ms ' . format ( compare . avg ) ,
' StdDev % ' : ' {:8.3f} % ' . format ( compare . sampleConfidenceInterval / compare . avg * 100 ) ,
' Driver ' : compare . shortVm ,
' Speedup ' : colored ( speedupColor , ' {:8.3f} % ' . format ( speedup * 100 ) ) ,
' Significance ' : verdict ,
' P(T<=t) ' : ' --- ' if pValue < 0 else ' {:.0f} % ' . format ( pValue * 100 )
} )
2023-06-16 18:01:18 +01:00
print ( colored ( Color . GREEN , ' SUCCESS ' ) + ' : {:<40} ' . format ( main . name ) + " : " + ' {:8.3f} ' . format ( compare . avg ) + " ms +/- " +
2021-10-29 21:25:12 +01:00
' {:6.3f} ' . format ( compare . sampleConfidenceInterval / compare . avg * 100 ) + " % o n " + compare . shortVm +
' ( {:+7.3f} % , ' . format ( speedup * 100 ) + verdict + " ) " )
if influxReporter != None :
influxReporter . report_result ( subdir , main . name , main . filename , " SUCCESS " , compare . min , compare . avg , compare . max , compare . sampleConfidenceInterval , compare . shortVm , compare . vm )
if arguments . speedup :
oldValue = plotValueLists [ 0 ] . pop ( )
newValue = compare . avg
plotValueLists [ 0 ] . append ( ( oldValue / newValue - 1 ) * 100 )
plotConfIntLists [ 0 ] . pop ( )
plotConfIntLists [ 0 ] . append ( 0 )
else :
plotValueLists [ index ] . append ( compare . avg * scale )
plotConfIntLists [ index ] . append ( compare . sampleConfidenceInterval * scale )
vmTotalMin [ index ] + = compare . min
vmTotalAverage [ index ] + = compare . avg
vmTotalImprovement [ index ] + = math . log ( main . avg / compare . avg )
vmTotalResults [ index ] + = 1
def runTest ( subdir , filename , filepath ) :
filepath = os . path . abspath ( filepath )
mainVm = os . path . abspath ( arguments . vm )
# Process output will contain the test name and execution times
mainOutput = getVmOutput ( substituteArguments ( mainVm , getExtraArguments ( filepath ) ) + " " + filepath )
mainResultSet = extractResults ( filename , mainVm , mainOutput , False )
if len ( mainResultSet ) == 0 :
print ( colored ( Color . RED , ' FAILED ' ) + " : ' " + filepath + " ' on ' " + mainVm + " ' " )
if arguments . vmNext != None :
resultPrinter . add_row ( { ' Test ' : filepath , ' Min ' : " " , ' Average ' : " FAILED " , ' StdDev % ' : " " , ' Driver ' : getShortVmName ( mainVm ) , ' Speedup ' : " " , ' Significance ' : " " , ' P(T<=t) ' : " " } )
else :
resultPrinter . add_row ( { ' Test ' : filepath , ' Min ' : " " , ' Average ' : " FAILED " , ' StdDev % ' : " " , ' Driver ' : getShortVmName ( mainVm ) } )
if influxReporter != None :
influxReporter . report_result ( subdir , filename , filename , " FAILED " , 0.0 , 0.0 , 0.0 , 0.0 , getShortVmName ( mainVm ) , mainVm )
return
compareResultSets = [ ]
if arguments . vmNext != None :
for compareVm in arguments . vmNext :
compareVm = os . path . abspath ( compareVm )
compareOutput = getVmOutput ( substituteArguments ( compareVm , getExtraArguments ( filepath ) ) + " " + filepath )
compareResultSet = extractResults ( filename , compareVm , compareOutput , True )
compareResultSets . append ( compareResultSet )
if arguments . extra_loops > 0 :
# get more results
for i in range ( arguments . extra_loops ) :
extraMainOutput = getVmOutput ( substituteArguments ( mainVm , getExtraArguments ( filepath ) ) + " " + filepath )
extraMainResultSet = extractResults ( filename , mainVm , extraMainOutput , False )
mergeResults ( mainResultSet , extraMainResultSet )
if arguments . vmNext != None :
i = 0
for compareVm in arguments . vmNext :
compareVm = os . path . abspath ( compareVm )
extraCompareOutput = getVmOutput ( substituteArguments ( compareVm , getExtraArguments ( filepath ) ) + " " + filepath )
extraCompareResultSet = extractResults ( filename , compareVm , extraCompareOutput , True )
mergeResults ( compareResultSets [ i ] , extraCompareResultSet )
i + = 1
# finalize results
for result in mainResultSet :
finalizeResult ( result )
for compareResultSet in compareResultSets :
for result in compareResultSet :
finalizeResult ( result )
# analyze results
for i in range ( len ( mainResultSet ) ) :
mainResult = mainResultSet [ i ]
compareResults = [ ]
for el in compareResultSets :
if i < len ( el ) :
compareResults . append ( el [ i ] )
else :
noResult = TestResult ( )
noResult . filename = el [ 0 ] . filename
noResult . vm = el [ 0 ] . vm
noResult . shortVm = el [ 0 ] . shortVm
compareResults . append ( noResult )
analyzeResult ( subdir , mainResult , compareResults )
mergedResults = [ ]
mergedResults . append ( mainResult )
for el in compareResults :
mergedResults . append ( el )
allResults . append ( mergedResults )
def rearrangeSortKeyForComparison ( e ) :
if plotValueLists [ 1 ] [ e ] == 0 :
return 1
return plotValueLists [ 0 ] [ e ] / plotValueLists [ 1 ] [ e ]
def rearrangeSortKeyForSpeedup ( e ) :
return plotValueLists [ 0 ] [ e ]
def rearrangeSortKeyDescending ( e ) :
return - plotValueLists [ 0 ] [ e ]
# Re-arrange results from worst to best
def rearrange ( key ) :
global plotLabels
index = arrayRange ( len ( plotLabels ) )
index = sorted ( index , key = key )
# Recreate value lists in sorted order
plotLabelsPrev = plotLabels
plotLabels = [ ]
for i in index :
plotLabels . append ( plotLabelsPrev [ i ] )
for group in range ( len ( plotValueLists ) ) :
plotValueListPrev = plotValueLists [ group ]
plotValueLists [ group ] = [ ]
plotConfIntListPrev = plotConfIntLists [ group ]
plotConfIntLists [ group ] = [ ]
for i in index :
plotValueLists [ group ] . append ( plotValueListPrev [ i ] )
plotConfIntLists [ group ] . append ( plotConfIntListPrev [ i ] )
# Graph
def graph ( ) :
if len ( plotValueLists ) == 0 :
print ( " No results " )
return
ind = arrayRange ( len ( plotLabels ) )
width = 0.8 / len ( plotValueLists )
if arguments . graph_vertical :
# Extend graph width when we have a lot of tests to draw
barcount = len ( plotValueLists [ 0 ] )
plt . figure ( figsize = ( max ( 8 , barcount * 0.3 ) , 8 ) )
else :
# Extend graph height when we have a lot of tests to draw
barcount = len ( plotValueLists [ 0 ] )
plt . figure ( figsize = ( 8 , max ( 8 , barcount * 0.3 ) ) )
plotBars = [ ]
matplotlib . rc ( ' xtick ' , labelsize = 10 )
matplotlib . rc ( ' ytick ' , labelsize = 10 )
if arguments . graph_vertical :
# Draw Y grid behind the bars
plt . rc ( ' axes ' , axisbelow = True )
plt . grid ( True , ' major ' , ' y ' )
for i in range ( len ( plotValueLists ) ) :
bar = plt . bar ( arrayRangeOffset ( len ( plotLabels ) , i * width ) , plotValueLists [ i ] , width , yerr = plotConfIntLists [ i ] )
plotBars . append ( bar [ 0 ] )
if arguments . absolute :
plt . ylabel ( ' Time (ms) ' )
elif arguments . speedup :
plt . ylabel ( ' Speedup ( % ) ' )
else :
plt . ylabel ( ' Relative time ( % ) ' )
plt . title ( ' Benchmark ' )
plt . xticks ( ind , plotLabels , rotation = ' vertical ' )
else :
# Draw X grid behind the bars
plt . rc ( ' axes ' , axisbelow = True )
plt . grid ( True , ' major ' , ' x ' )
for i in range ( len ( plotValueLists ) ) :
bar = plt . barh ( arrayRangeOffset ( len ( plotLabels ) , i * width ) , plotValueLists [ i ] , width , xerr = plotConfIntLists [ i ] )
plotBars . append ( bar [ 0 ] )
if arguments . absolute :
plt . xlabel ( ' Time (ms) ' )
elif arguments . speedup :
plt . xlabel ( ' Speedup ( % ) ' )
else :
plt . xlabel ( ' Relative time ( % ) ' )
plt . title ( ' Benchmark ' )
plt . yticks ( ind , plotLabels )
plt . gca ( ) . invert_yaxis ( )
plt . legend ( plotBars , plotLegend )
plt . tight_layout ( )
plt . savefig ( arguments . filename + " .png " , dpi = 200 )
if arguments . window :
plt . show ( )
def addTotalsToTable ( ) :
if len ( vmTotalMin ) == 0 :
return
if arguments . vmNext != None :
index = 0
resultPrinter . add_row ( {
' Test ' : ' Total ' ,
' Min ' : ' {:8.3f} ms ' . format ( vmTotalMin [ index ] ) ,
' Average ' : ' {:8.3f} ms ' . format ( vmTotalAverage [ index ] ) ,
' StdDev % ' : " --- " ,
' Driver ' : getShortVmName ( os . path . abspath ( arguments . vm ) ) ,
' Speedup ' : " " ,
' Significance ' : " " ,
' P(T<=t) ' : " "
} )
for compareVm in arguments . vmNext :
index = index + 1
speedup = vmTotalAverage [ 0 ] / vmTotalAverage [ index ] * 100 - 100
resultPrinter . add_row ( {
' Test ' : ' Total ' ,
' Min ' : ' {:8.3f} ms ' . format ( vmTotalMin [ index ] ) ,
' Average ' : ' {:8.3f} ms ' . format ( vmTotalAverage [ index ] ) ,
' StdDev % ' : " --- " ,
' Driver ' : getShortVmName ( os . path . abspath ( compareVm ) ) ,
' Speedup ' : colored ( Color . RED if speedup < 0 else Color . GREEN if speedup > 0 else Color . YELLOW , ' {:8.3f} % ' . format ( speedup ) ) ,
' Significance ' : " " ,
' P(T<=t) ' : " "
} )
else :
resultPrinter . add_row ( {
' Test ' : ' Total ' ,
' Min ' : ' {:8.3f} ms ' . format ( vmTotalMin [ 0 ] ) ,
' Average ' : ' {:8.3f} ms ' . format ( vmTotalAverage [ 0 ] ) ,
' StdDev % ' : " --- " ,
' Driver ' : getShortVmName ( os . path . abspath ( arguments . vm ) )
} )
def writeResultsToFile ( ) :
class TestResultEncoder ( json . JSONEncoder ) :
def default ( self , obj ) :
if isinstance ( obj , TestResult ) :
return [ obj . filename , obj . vm , obj . shortVm , obj . name , obj . values , obj . count ]
return json . JSONEncoder . default ( self , obj )
try :
with open ( arguments . filename + " .json " , " w " ) as allResultsFile :
allResultsFile . write ( json . dumps ( allResults , cls = TestResultEncoder ) )
except :
print ( " Failed to write results to a file " )
def run ( args , argsubcb ) :
global arguments , resultPrinter , influxReporter , argumentSubstituionCallback , allResults
arguments = args
argumentSubstituionCallback = argsubcb
2023-06-16 18:01:18 +01:00
if os . name == " nt " and arguments . callgrind :
print ( f " { colored ( Color . RED , ' ERROR ' ) } : --callgrind is not supported on Windows. Please consider using this option on another OS, or Linux using WSL. " )
sys . exit ( 1 )
2021-10-29 21:25:12 +01:00
if arguments . report_metrics or arguments . print_influx_debugging :
2022-08-11 21:42:54 +01:00
import influxbench
2021-10-29 21:25:12 +01:00
influxReporter = influxbench . InfluxReporter ( arguments )
else :
influxReporter = None
if matplotlib == None :
arguments . absolute = 0
arguments . speedup = 0
arguments . sort = 0
arguments . window = 0
# Load results from files
if arguments . results != None :
vmList = [ ]
for result in arguments . results :
with open ( result ) as resultsFile :
resultArray = json . load ( resultsFile )
for test in resultArray :
for i in range ( len ( test ) ) :
arr = test [ i ]
tr = TestResult ( )
tr . filename = arr [ 0 ]
tr . vm = arr [ 1 ]
tr . shortVm = arr [ 2 ]
tr . name = arr [ 3 ]
tr . values = arr [ 4 ]
tr . count = arr [ 5 ]
test [ i ] = tr
for test in resultArray [ 0 ] :
if vmList . count ( test . vm ) > 0 :
pointPos = result . rfind ( " . " )
if pointPos != - 1 :
vmList . append ( test . vm + " [ " + result [ 0 : pointPos ] + " ] " )
else :
vmList . append ( test . vm + " [ " + result + " ] " )
else :
vmList . append ( test . vm )
if len ( allResults ) == 0 :
allResults = resultArray
else :
for prevEl in allResults :
found = False
for nextEl in resultArray :
if nextEl [ 0 ] . filename == prevEl [ 0 ] . filename and nextEl [ 0 ] . name == prevEl [ 0 ] . name :
for run in nextEl :
prevEl . append ( run )
found = True
if not found :
el = resultArray [ 0 ]
for run in el :
result = TestResult ( )
result . filename = run . filename
result . vm = run . vm
result . shortVm = run . shortVm
result . name = run . name
prevEl . append ( result )
arguments . vmNext = [ ]
for i in range ( len ( vmList ) ) :
if i == 0 :
arguments . vm = vmList [ i ]
else :
arguments . vmNext . append ( vmList [ i ] )
plotLegend . append ( getShortVmName ( arguments . vm ) )
if arguments . vmNext != None :
for compareVm in arguments . vmNext :
plotLegend . append ( getShortVmName ( compareVm ) )
else :
arguments . absolute = 1 # When looking at one VM, I feel that relative graph doesn't make a lot of sense
# Results table formatting
if arguments . vmNext != None :
resultPrinter = TablePrinter ( [
{ ' label ' : ' Test ' , ' align ' : Alignment . LEFT } ,
{ ' label ' : ' Min ' , ' align ' : Alignment . RIGHT } ,
{ ' label ' : ' Average ' , ' align ' : Alignment . RIGHT } ,
{ ' label ' : ' StdDev % ' , ' align ' : Alignment . RIGHT } ,
{ ' label ' : ' Driver ' , ' align ' : Alignment . LEFT } ,
{ ' label ' : ' Speedup ' , ' align ' : Alignment . RIGHT } ,
{ ' label ' : ' Significance ' , ' align ' : Alignment . LEFT } ,
{ ' label ' : ' P(T<=t) ' , ' align ' : Alignment . RIGHT }
] )
else :
resultPrinter = TablePrinter ( [
{ ' label ' : ' Test ' , ' align ' : Alignment . LEFT } ,
{ ' label ' : ' Min ' , ' align ' : Alignment . RIGHT } ,
{ ' label ' : ' Average ' , ' align ' : Alignment . RIGHT } ,
{ ' label ' : ' StdDev % ' , ' align ' : Alignment . RIGHT } ,
{ ' label ' : ' Driver ' , ' align ' : Alignment . LEFT }
] )
if arguments . results != None :
for resultSet in allResults :
# finalize results
for result in resultSet :
finalizeResult ( result )
# analyze results
mainResult = resultSet [ 0 ]
compareResults = [ ]
for i in range ( len ( resultSet ) ) :
if i != 0 :
compareResults . append ( resultSet [ i ] )
analyzeResult ( ' ' , mainResult , compareResults )
else :
2022-04-21 22:04:22 +01:00
all_files = [ subdir + os . sep + filename for subdir , dirs , files in os . walk ( arguments . folder ) for filename in files ]
for filepath in sorted ( all_files ) :
subdir , filename = os . path . split ( filepath )
if filename . endswith ( " .lua " ) :
if arguments . run_test == None or re . match ( arguments . run_test , filename [ : - 4 ] ) :
runTest ( subdir , filename , filepath )
2021-10-29 21:25:12 +01:00
if arguments . sort and len ( plotValueLists ) > 1 :
rearrange ( rearrangeSortKeyForComparison )
elif arguments . sort and len ( plotValueLists ) == 1 :
rearrange ( rearrangeSortKeyDescending )
elif arguments . speedup :
rearrange ( rearrangeSortKeyForSpeedup )
plotLegend [ 0 ] = arguments . vm + " vs " + arguments . vmNext [ 0 ]
if arguments . print_final_summary :
addTotalsToTable ( )
print ( )
print ( colored ( Color . YELLOW , ' ==================================================RESULTS================================================== ' ) )
resultPrinter . print ( summary = False )
print ( colored ( Color . YELLOW , ' --- ' ) )
if len ( vmTotalMin ) != 0 and arguments . vmNext != None :
index = 0
for compareVm in arguments . vmNext :
index = index + 1
name = getShortVmName ( os . path . abspath ( compareVm ) )
deltaGeoMean = math . exp ( vmTotalImprovement [ index ] / vmTotalResults [ index ] ) * 100 - 100
if deltaGeoMean > 0 :
print ( " ' {} ' change is {:.3f} % positive on average " . format ( name , deltaGeoMean ) )
else :
print ( " ' {} ' change is {:.3f} % negative on average " . format ( name , deltaGeoMean ) )
if matplotlib != None :
graph ( )
writeResultsToFile ( )
if influxReporter != None :
influxReporter . report_result ( arguments . folder , " Total " , " all " , " SUCCESS " , mainTotalMin , mainTotalAverage , mainTotalMax , 0.0 , getShortVmName ( arguments . vm ) , os . path . abspath ( arguments . vm ) )
influxReporter . flush ( 0 )
if __name__ == " __main__ " :
arguments = argumentParser . parse_args ( )
run ( arguments , None )