sho-lesson/ecdf.py
2019-01-21 08:12:36 +01:00

158 lines
4.6 KiB
Python

import sys
import csv
import argparse
import numpy as np
import matplotlib.pyplot as plt
from difflib import SequenceMatcher
def guess_number_evals(filenames):
"""Guess the number of evals from first file."""
with open(filenames[0], 'r') as fd:
nevals = len(fd.readlines())
return nevals
def along_runtime(filenames, data):
for fid,filename in enumerate(filenames):
with open(filename, 'r') as fd:
strdata = csv.reader(fd, delimiter=';')
for i,row in enumerate(strdata):
evals = int(row[0])
val = float(row[1])
data[evals,fid] = val
return data
def cumul(data, delta, optim = None, do_min = False):
# Keep only best values along columns.
for i in range(1,len(data)):
for j in range(len(data[i])):
data[i,j] = max( data[i,j], data[i-1,j] )
if not optim:
optim = data.max()
# Normalize.
norm = data/optim
# Threshold.
if do_min:
ecdf = (norm < delta)
else:
ecdf = (norm > delta)
# Sum across rows.
return ecdf.sum(axis=1)/data.shape[1]
def parse(filenames, delta, nb_rows = None, optim = None, do_min = False):
if not nb_rows:
nb_rows = guess_number_evals(filenames)
data = np.zeros( (nb_rows+1, len(filenames)) )
data = along_runtime(filenames,data)
ert = cumul(data, delta, optim, do_min)
return ert
def make_name(names, delta, erts, name_strip = [], do_min = False):
common = names[0]
for run in names:
match = SequenceMatcher(None, common, run).find_longest_match(0, len(common), 0, len(run))
common = common[match.a: match.a + match.size]
for strp in name_strip:
common = common.replace(strp,"")
name = u"{} $\Delta={}$".format(common,delta)
if name in erts:
i += 1
name += " ({})".format(i)
return name
if __name__ == "__main__":
can = argparse.ArgumentParser()
can.add_argument("-e", "--evals", metavar="NB", default=None, type=int,
help="Max number of evaluations to consider")
# can.add_argument("-q", "--quality", action='store_true',
# help="Produce Expected Quality ECDF, instead of Expected Runtime ECDF.")
can.add_argument("-m", "--min", action='store_true',
help="Minimization problem, instead of maximization.")
can.add_argument("-o", "--optimum", metavar="VAL", default=None, type=float,
help="Best value used for normalization (else, default to the max in the data).")
can.add_argument("-s", "--name-strip", metavar="STR", default=[],
type=str, action='append',
help="Remove this string from the labels.")
can.add_argument("-d", "--delta", metavar="PERC",
action='append', type=float, required=True,
help="Target(s), as a percentage of values normalized against optimum.")
can.add_argument("-r", "--runs", metavar="FILES", nargs='*', required=True, action='append')
the = can.parse_args()
erts = {}
names = []
i = 0
data_max = -1*float("inf")
if not the.evals:
nb_rows = guess_number_evals(the.runs[0])
data = np.zeros( (nb_rows+1, len(the.runs[0])) )
if the.optimum:
data_max = the.optimum
else:
sys.stderr.write("Compute max:\n")
i = 0
for runs in the.runs:
for delta in the.delta:
i += 1
sys.stderr.write( "\r{}/{}".format(i,len(the.runs)*len(the.delta)) )
data = along_runtime(runs,data)
data_max = max(data_max, data.max())
sys.stderr.write("\nCompute ECDFs:\n")
i = 0
for runs in the.runs:
for delta in the.delta:
i += 1
sys.stderr.write( "\r{}/{}".format(i,len(the.runs)*len(the.delta)) )
ert = parse(
runs, delta,
nb_rows = the.evals, optim = data_max, do_min = the.min
)
name = make_name(runs, delta, erts, the.name_strip, the.min)
erts[name] = ert
sys.stderr.write("\nPlot\n")
fig = plt.figure()
for name in erts:
plt.plot(erts[name], label=name)
plt.ylim([0,1])
if the.min:
comp = "<"
else:
comp=">"
# plt.ylabel(r"$P\left(f\left(\hat{x})\right)/"+str(the.optimum)+comp+r"\Delta\right)$")
plt.ylabel(r"$P\left(f\left(\hat{x}\right)/"+str(data_max)+comp+r"\Delta\right)$")
plt.xlabel("Time (#function evals)")
plt.title("Expected RunTime Empirical Cumulative Density Function ({} runs)".format(str(len(the.runs[0]))))
plt.legend()
plt.show()