Add scripts for parsing and archive link

This commit is contained in:
Alix ZHENG 2021-09-07 00:27:44 +02:00
commit 807be1b3c2
30 changed files with 1570 additions and 18 deletions

View file

@ -0,0 +1,86 @@
#!/usr/bin/env python3
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
#cmd eg :
#python3 best_out_of_elites.py ./fastga_results_all/fastga_results_planO/planO_maxExp=50000_maxEv=5n_2021-08-13T19:16+02:00_results_elites_all
#python3 best_out_of_elites.py ./fastga_results_all/fastga_results_random/maxEv=10000_nbAlgo=15_2021-08-21T20:53+02:00_results_randoms
#get the configuration of the best out of the elite
# recommendation suggested by 15 independant runs of irace
figdir=sys.argv[1] # directory of a result of one experiment
#eg : ./fastga_results_all/fastga_results_plan1/plan1_maxExp\=100000_maxEv\=5n_2021-08-13T19\:04+02\:00_results_elites_all/
#print(figdir.split('/')[-2], figdir.split('/'))
if("plan" in figdir.split('/')[-2]):
print("Operator,","op. ,",",".join(map(str,range(1,20))))
column={"pc" : 101, "SelectC": 7, "Crossover" : 10, "pm": 101,"SelectM" : 7, "Mutation": 11, "Replacement" : 11, "pop-size": 50, "offspring-size" : 50}
nbparam=(len(os.listdir(os.path.join(figdir,"raw/data"))[0].split("_"))-1) #-1 car il y a le pb
if( nbparam<len(column)):
del column["pop-size"]
del column["offspring-size"]
configs=[(-1,-1)]*19 #tuple(auc,config)
res=np.zeros((nbparam,19))
for fastgadir in os.listdir(os.path.join(figdir,"raw/data")): #fastgadir : directory of 50 runs of an elite configuration
#cum=np.cumsum([0.1]*10)
average=[]
for fname in os.listdir(os.path.join(figdir,"raw/data",fastgadir)):
with open(os.path.join(figdir,"raw/data",fastgadir,fname)) as fd:
auc = float(fd.readlines()[0]) * -1
average.append(auc)
pb=int(fastgadir.split("_")[0].split("=")[1])
new_auc=np.mean(average)
if(configs[pb][0]<new_auc):
configs[pb]=(new_auc,fastgadir)
for pb in range(19):
config=configs[pb][1].split("_")
configparam=[p.split("=")[1] for p in config[1:]]
res[:,pb]=configparam
ind=0 #index of param_name
for param_name in column.keys():
#print(map(str,res[ind]),res[ind], ",".join(map(str,res[ind])))
print(param_name+","+str(column[param_name])+",", ",".join(map(str,res[ind])))
ind+=1
#print(str(i)+",",",".join(map(str,np.mean(aucs[i],1))))
if("maxEv" in figdir.split('/')[-2]):
print("Operator,","op. ,",",".join(map(str,range(1,20))))
column={"pc" : 101, "SelectC": 7, "Crossover" : 10, "pm": 101,"SelectM" : 7, "Mutation": 11, "Replacement" : 11, "pop-size": 50, "offspring-size" : 50}
nbparam=(len(os.listdir(figdir)[0].split("_")))
if( nbparam<len(column)):
del column["pop-size"]
del column["offspring-size"]
configs=[(-1,-1)]*19 #tuple(auc,config)
bests=np.zeros((nbparam,19))
for algodir in os.listdir(figdir): #algodir : directory of one random algo
for fname in os.listdir(os.path.join(figdir,algodir,"data")): #fname : directory of 50 runs of fastga for one pb
average=[]
for res in os.listdir(os.path.join(figdir,algodir,"data",fname)):
with open(os.path.join(figdir,algodir,"data",fname,res)) as fd:
auc = float(fd.readlines()[0]) * -1
average.append(auc)
pb=int(fname.split("_")[0].split("=")[1])
new_auc=np.mean(average)
if(configs[pb][0]<new_auc):
configs[pb]=(new_auc,algodir)
for pb in range(19):
config=configs[pb][1].split("_")
configparam=[p.split("=")[1] for p in config]
bests[:,pb]=configparam
ind=0 #index of param_name
for param_name in column.keys():
#print(map(str,res[ind]),res[ind], ",".join(map(str,res[ind])))
print(param_name+","+str(column[param_name])+",", ",".join(map(str,bests[ind])))
ind+=1

View file

@ -0,0 +1,40 @@
#!/bin/bash
ldata="./fastga_results_all/" #fastga_results_all
figpath="./hist_and_csv/" #hist_and_csv
#get distribution of operators variants of all problems of each plan of fastga_results_all
#fastga_results_all contains all experiments of all plans
ldir=$(echo $(ls ${ldata})) #list of directory of each plan
for plan in ${ldir[@]} ; do #get the directory of each plan
lexperiment=$(echo $(ls ${ldata}/${plan}))
for experiments in ${lexperiment[@]} ; do
path="${ldata}/${plan}/${experiments}"
#----------------average aucs of each algo for each pb only for plan A,F,O ---------------
#myfig=${figpath}/auc_average_${plan}
#mkdir -p ${myfig}
#cmd="python3 parse_auc_average.py ${path} "
#$cmd > "${myfig}/auc_average_${experiments}.csv"
#--------------distribution of operators by pb and for all pb only for plan A,F,O ------
#myfig=${figpath}/distribution_op_${plan}
#mkdir -p ${myfig}
#cmd="python3 distribution_op_all.py ${path} ${myfig} "
#$cmd
#--------------best out csv--------
cmd="python3 best_out_of_elites.py ${path}"
myfig=${figpath}/best_out_${plan}
mkdir -p ${myfig}
$cmd > ${myfig}/best_out_all_pb_${experiments}.csv
echo ${cmd}
done
done
#---------------distribution of operators of randoma algo------------------
#rpath=${ldata}/fastga_results_random
#cmd="python3 dist_op_random.py ${rpath} ${figpath}"
#$cmd
#---------------random---------------

View file

@ -1,7 +1,7 @@
#!/bin/bash
ldata=$1
file_py=$2
csvdir="csv_FA"
csvdir="csv_FAO"
ldir=$(echo $(ls ${ldata}))
for data in ${ldir[@]} ; do
path="${ldata}/${data}"

View file

@ -0,0 +1,78 @@
#!/usr/bin/env python3
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
#cmd eg : python3 ./dist_op_random.py ./fastga_results_all/fastga_results_random/ ./hist_and_csv/
#get the distribution of operators variants recommended by 15 random algo for each maxEv
#pc and pm 10 possibilities : [0-0.1[ [0.1-0.2[ [0.2-0.3[ [0.3-0.4[ [0-0.5[ [0.5-0.6[ ...[0.9-1[
#pop-size and offspring-size 10 possibilities : 0-5 5-10, 10-15 15-20 20-25 25-30 30-35- 35-40 40-45 45-50
path=sys.argv[1] # directory of a result of one experiment
#eg : ./fastga_results_all/fastga_results_random/
figdir=sys.argv[2] #directory of where you want to store the data
if("random" in path):
#column : [operator : nbpossibilities]
distdir=figdir+"/distribution_random"
try:
os.makedirs(distdir)
except FileExistsError:
pass
nbparam=9 #-1 car il y a le pb
res=[]
for maxEvdir in os.listdir(path):
res.append({"crossover-rate":["pc" , np.zeros(10, dtype=int)],
"cross-selector":["SelectC", np.zeros(7, dtype=int)],
"crossover":["Crossover" , np.zeros(10, dtype=int)],
"mutation-rate":["pm",np.zeros(10, dtype=int)],
"mut-selector":["SelectM",np.zeros(10, dtype=int)],
"mutation":["Mutation", np.zeros(11, dtype=int)],
"replacement":["Replacement" , np.zeros(11, dtype=int)],
"pop-size":["pop-size", np.zeros(10, dtype=int)],
"offspring-size":["offspring-size" , np.zeros(10, dtype=int)]})
for algodir in os.listdir(os.path.join(path,maxEvdir)): #fastgadir : directory of 50 runs of an elite configuration
algo=algodir.split("_")
for param in algo:
name,val=param.split("=")[0],float(param.split("=")[1])
if(name in {"pop-size" ,"offspring-size"}):
if(val%5==0):
res[-1][name][1][int(val//5) -1]+=1
else:
#print(res[-1][name][1],val//5)
res[-1][name][1][int(val//5)]+=1
elif(name in {"crossover-rate","mutation-rate"} ):
if(int(val*10)==10): #case of val=1
res[-1][name][1][-1]+=1
else :
#print(int(float(val)*10), name,pb,val)
res[-1][name][1][int(val*10)]+=1
else :
res[-1][name][1][int(val)]+=1
ind=0
for maxEvdir in os.listdir(path):
name="distribution_random_"+maxEvdir.split("_")[0]+".csv" #the end of the path must be /
with open(os.path.join(distdir,name),"w+") as csvfile:
csvfile.write("Op index, "+",".join(map(str,range(0,11)))+"\n")
with open(os.path.join(distdir,name),"a") as csvfile:
for param_name in res[ind].keys():
#print(map(str,res[ind]),res[ind], ",".join(map(str,res[ind])))
csvfile.write(res[ind][param_name][0]+","+ ",".join(map(str,res[ind][param_name][1]))+",-"*(11-len(res[ind][param_name][1])) +"\n")
#print(str(i)+",",",".join(map(str,np.mean(aucs[i],1))))
ind+=1
#all problems
name ="distribution_all_random_"+path.split("/")[-1]+".csv"
with open(os.path.join(distdir,name),'w+') as csvfile:
csvfile.write("Op index, "+",".join(map(str,range(0,11)))+"\n")
with open(os.path.join(distdir,name),'a') as csvfile:
for param_name in res[0].keys():
#print(map(str,res[ind]),res[ind], ",".join(map(str,res[ind])))
csvfile.write(res[0][param_name][0]+","+ ",".join(map(str,np.sum([res[i][param_name][1] for i in range(ind-1)],0)))+",-"*(11-len(res[0][param_name][1])) +"\n") #res[0] only for getting the name of parameters
#print(str(i)+",",",".join(map(str,np.mean(aucs[i],1))))

View file

@ -0,0 +1,87 @@
#!/usr/bin/env python3
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
#get the distribution of operators variants recommended by 15 runs of irace for all problems and each problem
#also get an csv file
#pc and pm 10 possibilities : [0-0.1[ [0.1-0.2[ [0.2-0.3[ [0.3-0.4[ [0-0.5[ [0.5-0.6[ ...[0.9-1[
#pop-size and offspring-size 10 possibilities : 0-5 5-10, 10-15 15-20 20-25 25-30 30-35- 35-40 40-45 45-50
path=sys.argv[1] # directory of a result of one experiment
#eg : ./fastga_results_all/fastga_results_planO/planO_maxExp\=100000_maxEv\=5n_2021-08-13T19\:04+02\:00_results_elites_all/
if("fastga_results_plan" in path):
#column : [operator : nbpossibilities]
distdir=sys.argv[2]
try:
os.makedirs(distdir)
except FileExistsError:
pass
nbparam=(len(os.listdir(os.path.join(path,"raw/data"))[0].split("_"))-1)
if( nbparam==7):
res=[{"crossover-rate":["pc" , np.zeros(10, dtype=int)],
"cross-selector":["SelectC", np.zeros(7, dtype=int)],
"crossover":["Crossover" , np.zeros(10, dtype=int)],
"mutation-rate":["pm",np.zeros(10, dtype=int)],
"mut-selector":["SelectM",np.zeros(7, dtype=int)],
"mutation":["Mutation", np.zeros(11, dtype=int)],
"replacement":["Replacement" ,np.zeros(11, dtype=int)]} for i in range(19)]
else:
res=[{"crossover-rate":["pc" , np.zeros(10, dtype=int)],
"cross-selector":["SelectC", np.zeros(7, dtype=int)],
"crossover":["Crossover" , np.zeros(10, dtype=int)],
"mutation-rate":["pm",np.zeros(10, dtype=int)],
"mut-selector":["SelectM",np.zeros(7, dtype=int)],
"mutation":["Mutation", np.zeros(11, dtype=int)],
"replacement":["Replacement" , np.zeros(11, dtype=int)],
"pop-size":["pop-size", np.zeros(10, dtype=int)],
"offspring-size":["offspring-size" , np.zeros(10, dtype=int)]} for i in range(19)]
for fastgadir in os.listdir(os.path.join(path,"raw/data")): #fastgadir : directory of 50 runs of an elite configuration
algo=fastgadir.split("_")
pb=int(fastgadir.split("_")[0].split("=")[1])
for param in algo[1:]:
name,val=param.split("=")[0],float(param.split("=")[1])
if(name in {"pop-size" ,"offspring-size"}):
if(val%5==0):
res[pb][name][1][int(val//5) -1]+=1
else:
#print(res[pb][name][1],val//5)
res[pb][name][1][int(val//5)]+=1
elif(name in {"crossover-rate","mutation-rate"} ):
if(int(val*10)==10): #case of val=1
res[pb][name][1][-1]+=1
else :
#print(int(float(val)*10), name,pb,val)
res[pb][name][1][int(val*10)]+=1
else :
res[pb][name][1][int(val)]+=1
for pb in range(19):
name="distribution_pb="+str(pb)+"_"+path.split("/")[-2]+".csv" #the end of the path must be /
with open(os.path.join(distdir,name),"w+") as csvfile:
csvfile.write("Op index, "+",".join(map(str,range(0,11)))+"\n")
with open(os.path.join(distdir,name),"a") as csvfile:
for param_name in res[pb].keys():
#print(map(str,res[ind]),res[ind], ",".join(map(str,res[ind])))
csvfile.write(res[pb][param_name][0]+","+ ",".join(map(str,res[pb][param_name][1]))+",-"*(11-len(res[pb][param_name][1])) +"\n")
#print(str(i)+",",",".join(map(str,np.mean(aucs[i],1))))
#all problems
name ="distribution_all_pb_"+path.split("/")[-1]+".csv"
with open(os.path.join(path,"raw",name),'w+') as csvfile:
csvfile.write("Op index, "+",".join(map(str,range(0,11)))+"\n")
with open(os.path.join(path,"raw",name),'a') as csvfile:
for param_name in res[0].keys():
#print(map(str,res[ind]),res[ind], ",".join(map(str,res[ind])))
csvfile.write(res[0][param_name][0]+","+ ",".join(map(str,np.sum([res[i][param_name][1] for i in range(19)],0)))+",-"*(11-len(res[0][param_name][1])) +"\n") #res[0] only for getting the name of parameters
#print(str(i)+",",",".join(map(str,np.mean(aucs[i],1))))

View file

@ -1,15 +1,16 @@
#!/bin/bash
ldata=$1 # eg : ./csv_planF/ don t forget to end the path with /
file_sh=$2 #eg : ./run_elites_planF
ldata=$1
file_sh=$2
ldir=$(echo $(ls ${ldata}))
fastga_dir="fastga_results_all"
mkdir -p /scratchbeta/${USER}/${fatga_dir}
#mkdir -p "/home/${USER}/${fastga_dir}/fastga_results_plan1"
mkdir -p "/scratchbeta/${USER}/${fastga_dir}/fastga_results_planF"
mkdir -p "/scratchbeta/${USER}/${fastga_dir}/fastga_results_planA"
mkdir -p "/scratchbeta/${USER}/${fastga_dir}/fastga_results_planO"
for data in ${ldir[@]} ; do
path_csv="${ldata}${data}"
path_csv="${ldata}/${data}"
plan_name=$(echo ${data} | sed "s/results_irace_plan//")
mexp=$(echo ${data[@]} | cut -d _ -f4)
mexp_id=$(echo ${mexp} | cut -d = -f2)

View file

@ -0,0 +1,34 @@
#!/bin/bash
ldata="./fastga_results_all" #fastga_results_all
figpath="./hist_and_csv" #hist_and_csv
ldir=$(echo $(ls ${ldata})) #list of directory of each plan
for plan in ${ldir[@]} ; do #get the directory of each plan
#------------hist by budget of a Plan (O,R or F)
#path="${ldata}/${plan}"
#cmd="python3 hist_join.py ${path} ${figpath}"
#echo $cmd
#$cmd
#---------------------------hist by pb by budget---------------
path="${ldata}/${plan}"
cmd="python3 hist_by_pb_budget_plan.py ${path} ${figpath}"
echo $cmd
$cmd
done
#---------------random------------------
#rpath=${ldata}/fastga_results_random
#cmd="python3 hist_join_random.py ${rpath} ${figpath}"
#---------------random---------------
#--------------------Choose a Budget irace and a budget fastga
mexp=100000
mevals=1000
#-------------------histogram join each plan F,A,R,O and join all algorithms for the budget chosen
cmd="python3 hist_by_FARO.py ${ldata} ${figdir} ${mexp} ${mevals}"
$cmd
#-------------------histogram by pb join each plan F,A,R,O and join all algorithms for the budget chosen
cmd="python3 hist_by_FARO_pb.py ${ldata} ${figdir} ${mexp} ${mevals}"
$cmd

View file

@ -0,0 +1,71 @@
#!/usr/bin/env python3
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu
##python3 hist_by_FARO.py ./fastga_results_all/ ./hist_and_csv/ 100000 1000
#one plot for one experiment plan for the same budget fastga, and the same budget irace if there is a budget irace (A,F)
path=sys.argv[1]
figpath=sys.argv[2]
maxExp=sys.argv[3]
maxEv=sys.argv[4]
indF=-1
indFO=-1
averageConfigs=[]
name=[]
for fastga in os.listdir(path): #ddir : directory of fastga_plan
if(fastga in {"fastga_results_planA","fastga_results_planF","fastga_results_planO"}):
for plan in os.listdir(os.path.join(path,fastga)):
print("maxExp="+str(maxExp)+"_maxEv="+str(maxEv) in plan,plan,"maxExp="+str(maxExp)+"_maxEv="+str(maxEv))
if("maxExp="+str(maxExp)+"_maxEv="+str(maxEv) in plan):
average=[]
for fastgadir in os.listdir(os.path.join(path,fastga,plan,"raw","data")): #fastgadir : directory of 50 runs of a configuration
for fname in os.listdir(os.path.join(path,fastga,plan,"raw","data",fastgadir)):
with open(os.path.join(path,fastga,plan,"raw","data",fastgadir,fname)) as fd:
auc = float(fd.readlines()[0]) *(-1)
average.append(auc)
averageConfigs.append(average)
nameid=plan.split("_")[0][-1]
name.append("plan"+nameid+"_"+"_".join(plan.split("_")[1:3]))
if("random" in fastga):
for randir in os.listdir(os.path.join(path,fastga)):
#eg path: maxEv=100_nbAlgo=15_2021-08-20T1511+0200_results_randoms
average=[]
if("maxEv="+str(maxEv)+"_" in randir):
for ddir in os.listdir(os.path.join(path,fastga,randir)): #ddir : directory of one run_elites_all or more
if("crossover" in ddir):
#name.append("_".join(ddir.split("_")[1:3]))
for fastgadir in os.listdir(os.path.join(path,fastga,randir,ddir,"data")): #fastgadir : directory of 50 runs of a configuration
for fname in os.listdir(os.path.join(path,fastga,randir,ddir,"data",fastgadir)):
with open(os.path.join(path,fastga,randir,ddir,"data",fastgadir,fname)) as fd:
auc = float(fd.readlines()[0]) *(-1)
average.append(auc)
#hist[belonging(auc,cum)]+=1
averageConfigs.append(average)
name.append(randir.split("_")[0]+"_random")
figdir=os.path.join(figpath,"hist_FARO_by_budget")
try:
os.makedirs(figdir)
except FileExistsError:
pass
#_,pv=mannwhitneyu(averageConfigs[indFO],averageConfigs[indF])
#print(name,len(averageConfigs))
plt.figure()
plt.hist(averageConfigs,bins=10,range=(0,1),align="mid",rwidth=0.9,label=name) #no label
plt.xlabel("performances")
plt.ylabel("Number of runs")
plt.xlim(0,1)
plt.ylim(0,8000)
plt.yticks(range(0,8000,500))
#plt.title("pvalue="+str(pv)+"\n medianeF="+str(np.median(averageConfigs[indF]))+", medianeFO="+str(np.median(averageConfigs[indFO])))
plt.legend()
plt.savefig(figdir+"/hist_planFARO"+"_maxExp="+str(maxExp)+"_maxEv="+str(maxEv)+".png")
plt.close()

View file

@ -0,0 +1,88 @@
#!/usr/bin/env python3
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
##python3 hist_by_FARO_pb.py ./fastga_results_all/ ./hist_and_csv/ 100000 1000
#19 histograms by plan F,A ,R O
path=sys.argv[1]
figpath=sys.argv[2]
maxExp=sys.argv[3]
maxEv=sys.argv[4]
hist_pb=[[] for i in range(19)]
name=[]
for fastga in os.listdir(path): #ddir : directory of fastga_plan
if(fastga in {"fastga_results_planA", "fastga_results_planF","fastga_results_planO"}):
for plan in os.listdir(os.path.join(path,fastga)):
#print("maxExp="+str(maxExp)+"_maxEv="+str(maxEv)+"_" in plan,plan,"maxExp="+str(maxExp)+"_maxEv="+str(maxEv))
#print("maxExp="+str(maxExp)+"_maxEv="+str(maxEv) in plan,plan,"maxExp="+str(maxExp)+"_maxEv="+str(maxEv))
if("maxExp="+str(maxExp)+"_maxEv="+str(maxEv)+"_" in plan):
nameid=fastga[-1]
name.append("plan"+nameid+"_".join(plan.split("_")[1:3]))
for fastgadir in os.listdir(os.path.join(path,fastga,plan,"raw","data")): #fastgadir : directory of 50 runs of a configuration
pb=int(fastgadir.split("_")[0].split("=")[1])
average_pb=[]
for fname in os.listdir(os.path.join(path,fastga,plan,"raw","data",fastgadir)):
with open(os.path.join(path,fastga,plan,"raw","data",fastgadir,fname)) as fd:
auc = float(fd.readlines()[0]) *(-1)
average_pb.append(auc)
if(hist_pb[pb]==[]): #first algo
hist_pb[pb].append(average_pb)
elif(len(hist_pb[pb])!=len(name)):
hist_pb[pb].append(average_pb)
else:
hist_pb[pb][len(name)-1]+=average_pb #another algo for the same plan
if("random" in fastga):
for randir in os.listdir(os.path.join(path,fastga)):
#eg path: maxEv=100_nbAlgo=15_2021-08-20T1511+0200_results_randoms
if(("maxEv="+str(maxEv)+"_") in randir):
#print("maxEv="+str(maxEv) in randir,randir)
name.append(randir.split("_")[0]+"_random")
for ddir in os.listdir(os.path.join(path,fastga,randir)): #ddir : directory of one run_elites_all or more
if("crossover" in ddir):
#name.append("_".join(ddir.split("_")[1:3]))
for fastgadir in os.listdir(os.path.join(path,fastga,randir,ddir,"data")): #fastgadir : directory of 50 runs of a configuration
average_pb=[]
pb=int(fastgadir.split("_")[0].split("=")[1])
for fname in os.listdir(os.path.join(path,fastga,randir,ddir,"data",fastgadir)):
with open(os.path.join(path,fastga,randir,ddir,"data",fastgadir,fname)) as fd:
auc = float(fd.readlines()[0]) *(-1)
average_pb.append(auc)
#print(len(hist_pb[pb]),len(name), pb)
if(hist_pb[pb]==[]): #first algo
#print("entrer random vide")
hist_pb[pb].append(average_pb)
elif(len(hist_pb[pb])!=len(name)):
#print("entrer random !=")
hist_pb[pb].append(average_pb)
else:
hist_pb[pb][len(name)-1]+=average_pb #another algo for the same plan
figdir=os.path.join(figpath,"hist_by_FARO_pb_maxExp={}_maxEv={}".format(maxExp,maxEv))
try:
os.makedirs(figdir)
except FileExistsError:
pass
#colors=['yellow', 'green',"blue","pink","purple","orange","magenta","gray","darkred","cyan","brown","olivedrab","thistle","stateblue"]
print(name)
for pb in range(19):
print(pb, len(hist_pb[pb]))
for i in hist_pb[pb]:
print(len(i))
plt.figure()
plt.hist(hist_pb[pb],bins=10,range=(0,1),align="mid",rwidth=0.9,edgecolor="red",label=name) #no label color=colors[:len(name)]
#for aucs in range(len(hist_pb[pb])):
#plt.hist(hist_pb[pb][aucs],bins=10,range=(0,1),align="mid",rwidth=0.9,edgecolor="red",label=name[aucs]) #no label
plt.xlabel("performances")
plt.ylabel("Number of runs")
plt.ylim(0,800)
plt.xlim(0,1)
plt.yticks(range(0,800,50))
#plt.xticks(np.cumsum([0.1]*10))
plt.legend()
plt.savefig(figdir+"/hist_FARO_pb={}_maxExp={}_maxEv={}.png".format(pb,maxExp,maxEv))
plt.close()

View file

@ -0,0 +1,90 @@
#!/usr/bin/env python3
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
# python3 hist_by_pb_budget_plan.py ./fastga_results_all/fastga_results_planF/ ./hist_and_csv/
#python3 hist_by_pb_budget_plan.py ./fastga_results_all/fastga_results_planO ./hist_and_csv
#get 19 histograms with number of budget bars, same as hist_join but now is by pb
#argv : list of elite results
path=sys.argv[1]
figpath=sys.argv[2]
#plan_name=sys.argv[3]
hist_pb=[[] for i in range(19)]
name=[]
if("random" in path):
plan_name="R"
else:
plan_name=path.strip("/").split("/")[-1][-1]
for plandir in os.listdir(path): #plandir: directory of an experiment of elite results
if("results_elites_all" in plandir):
#eg : plan2_maxExp=10000_maxEv=1000_2021-08-20T1347+0200_results_elites_all
budget_irace=plandir.split("_")[1].split("=")[1]
budget_fastga=plandir.split("_")[2].split("=")[1]
name.append("plan="+plan_name+"_"+"".join(plandir.split("_")[1:3])) #plan=*_maxExp=*_maxEv=*
for algodir in os.listdir(os.path.join(path,plandir,"raw","data")):
average_pb=[]
pb=int(algodir.split("_")[0].split("=")[1])
for algo in os.listdir(os.path.join(path,plandir,"raw","data",algodir)):
with open(os.path.join(path,plandir,"raw","data",algodir,algo)) as fd:
auc = float(fd.readlines()[0]) *(-1)
average_pb.append(auc)
if(hist_pb[pb]==[]): #first algo
hist_pb[pb].append(average_pb)
elif(len(hist_pb[pb])!=len(name)):
hist_pb[pb].append(average_pb)
else:
hist_pb[pb][len(name)-1]+=average_pb #another algo for the same plan
if("results_randoms" in plandir):
#eg : maxEv=1000_2021-08-20T1347+0200_results_random
budget_fastga=plandir.split("_")[0].split("=")[1]
name.append("plan="+plan_name+"_"+"".join(plandir.split("_")[0])) #plan=*_maxExp=*_maxEv=*
for algodir in os.listdir(os.path.join(path,plandir)):
for algo in os.listdir(os.path.join(path,plandir,algodir,"data")):
pb=int(algo.split("_")[0].split("=")[1])
average_pb=[]
for fname in os.listdir(os.path.join(path,plandir,algodir,"data",algo)):
with open(os.path.join(path,plandir,algodir,"data",algo,fname)) as fd:
auc = float(fd.readlines()[0]) *(-1)
average_pb.append(auc)
if(hist_pb[pb]==[]): #first algo
print("entrer")
hist_pb[pb].append(average_pb)
elif(len(hist_pb[pb])!=len(name)):
hist_pb[pb].append(average_pb)
else:
hist_pb[pb][len(name)-1]+=average_pb #another algo for the same plan
print(path.split("/")[-1][-1])
figdir=os.path.join(figpath,"hist_by_{}_pb_budget_plan".format(plan_name))
#figdir=os.path.join(figpath,"hist_by_{}_pb_irace_maxEv={}".format(plan_name,1000))
try:
os.makedirs(figdir)
except FileExistsError:
pass
for pb in range(19):
print(pb, len(hist_pb[pb]))
plt.figure()
plt.hist(hist_pb[pb],bins=10,range=(0,1),align="mid",rwidth=0.9,edgecolor="red",label=name) #no label color=colors[:len(name)]
#for aucs in range(len(hist_pb[pb])):
#plt.hist(hist_pb[pb][aucs],bins=10,range=(0,1),align="mid",rwidth=0.9,edgecolor="red",label=name[aucs]) #no label
plt.xlabel("performances")
plt.ylabel("Number of runs")
plt.ylim(0,750)
plt.yticks(range(0,750,50))
plt.xlim(0,1)
plt.legend()
plt.savefig(figdir+"/hist_plan={}_pb={}_budget.png".format(plan_name,pb))
plt.close()

View file

@ -0,0 +1,68 @@
#!/usr/bin/env python3
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu
#cmd : python3 hist_join.py ./fastga_results_all/fastga_results_planO/ ./hist_and_csv/
#histogram by plan for the budgets (irace and fastag)
path=sys.argv[1] #argv : directory of a Plan (O, A, F)
figpath=sys.argv[2] #path to store the histograms
averageConfigs=[]
name=[]
if("fastga_results_plan" in path):
for ddir in os.listdir(path): #ddir : directory of one run_elites_all or more
if("plan" in ddir):
average=[]
name.append("_".join(ddir.split("_")[1:3]))
for fastgadir in os.listdir(os.path.join(path,ddir,"raw","data")): #fastgadir : directory of 50 runs of a configuration
for fname in os.listdir(os.path.join(path,ddir,"raw","data",fastgadir)):
with open(os.path.join(path,ddir,"raw","data",fastgadir,fname)) as fd:
auc = float(fd.readlines()[0]) *(-1)
average.append(auc)
#hist[belonging(auc,cum)]+=1
averageConfigs.append(average)
#print(hist)
#print(average)
figdir=os.path.join(figpath,"hist_join")
try:
os.makedirs(figdir)
except FileExistsError:
pass
print(name,len(averageConfigs))
"""
idd0=name[0].split("_")[0].split("=")[1][:-3]+"k"
idd1=name[1].split("_")[0].split("=")[1][:-3]+"k"
idd2=name[2].split("_")[0].split("=")[1][:-3]+"k"
#only for Budget irace 10000, 50000, 100000 ie: only three experiment results
titlename="median"+idd0+"={:.3f}".format(np.median(averageConfigs[0]))+" , median"+idd1+"={:.3f}".format(np.median(averageConfigs[1]))+" , median"+idd2+"={:.3f}".format(np.median(averageConfigs[2]))
_,pv=mannwhitneyu(averageConfigs[0],averageConfigs[1])
titlename+="\n pvalue{}={:.3f}".format(idd0+idd1,pv)
_,pv=mannwhitneyu(averageConfigs[0],averageConfigs[2])
titlename+=" ,pvalue{}={:.3f}".format(idd0+idd2,pv)
_,pv=mannwhitneyu(averageConfigs[1],averageConfigs[2])
titlename+=" ,pvalue{}={:.3f}".format(idd1+idd2,pv)
print(titlename)
"""
plt.figure()
plt.hist(averageConfigs,bins=10,range=(0,1),align="mid",rwidth=0.9,label=name) #no label
plt.xlabel("performances")
plt.ylabel("Number of runs")
plt.xlim(0,1)
plt.ylim(0,7000)
plt.yticks(range(0,7000,500))
#plt.title(titlename)
plt.legend()
plt.savefig(figdir+"/hist_plan"+path.strip("/")[-1]+"_by_budget.png")
#plt.savefig(figpath+"/hist_plan"+path.strip("/")[-1]+"_by_budgetI.png")
plt.close()

View file

@ -0,0 +1,46 @@
#!/usr/bin/env python3
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
##python3 hist_random.py ./dastga_results_all/fastga_results_random ./hist_and_csv/
#argv : list of elite results
data=sys.argv[1]
figpath=sys.argv[2]
averageConfigs=[]
name=[]
for path in os.listdir(data):
#eg path: maxEv=100_nbAlgo=15_2021-08-20T1511+0200_results_randoms
average=[]
if("maxEv" in path):
for ddir in os.listdir(os.path.join(data,path)): #ddir : directory of one run_elites_all or more
if("crossover" in ddir):
#name.append("_".join(ddir.split("_")[1:3]))
for fastgadir in os.listdir(os.path.join(data,path,ddir,"data")): #fastgadir : directory of 50 runs of a configuration
for fname in os.listdir(os.path.join(data,path,ddir,"data",fastgadir)):
with open(os.path.join(data,path,ddir,"data",fastgadir,fname)) as fd:
auc = float(fd.readlines()[0]) *(-1)
average.append(auc)
#hist[belonging(auc,cum)]+=1
averageConfigs.append(average)
name.append(path.split("_")[0])
figdir=os.path.join(figpath,"hist_join")
try:
os.makedirs(figdir)
except FileExistsError:
pass
colors=['yellow', 'green',"blue","pink","purple","orange","magenta","gray","darkred","cyan","brown","olivedrab","thistle","stateblue"]
plt.figure()
plt.hist(averageConfigs,bins=10,range=(0,1),align="mid",rwidth=0.5,label=name) #no label
plt.xlabel("performances")
plt.ylabel("Number of runs")
plt.ylim([0,8000])
plt.xlim(0,1)
plt.yticks(range(0,8000,500))
#plt.xticks(np.cumsum([0.1]*10))
plt.legend()
plt.savefig(figdir+"/hist_random_by_budget.png")
plt.close()

View file

@ -0,0 +1,13 @@
## Template for specifying forbidden parameter configurations in irace.
##
## This filename must be specified via the --forbidden-file command-line option
## (or forbiddenFile in scenario.txt).
##
## The format is one constraint per line. Each constraint is a logical
## expression (in R syntax). If a parameter configuration
## is generated that makes the logical expression evaluate to TRUE,
## then the configuration is discarded.
##
## Examples of valid logical operators are: == != >= <= > < & | ! %in%
(replacement %in% c(2,3,4,5,6,7,8,9,10)) & (offspringsize > popsize)
(replacement %in% c(1)) & (offspringsize < popsize)

View file

@ -0,0 +1,15 @@
## Template for specifying forbidden parameter configurations in irace.
##
## This filename must be specified via the --forbidden-file command-line option
## (or forbiddenFile in scenario.txt).
##
## The format is one constraint per line. Each constraint is a logical
## expression (in R syntax). If a parameter configuration
## is generated that makes the logical expression evaluate to TRUE,
## then the configuration is discarded.
##
## Examples of valid logical operators are: == != >= <= > < & | ! %in%
(replacement %in% c(2,3,4,5,6,7,8,9,10)) & (offspringsize > popsize)
(replacement %in% c(1)) & (offspringsize < popsize)
#(as.numeric(replacement) == 2) & (offspringsize > popsize)
#(as.numeric(replacement) == 3) & (offspringsize > popsize)

View file

@ -0,0 +1,48 @@
## This is an example of specifying instances with a file.
# Each line is an instance relative to trainInstancesDir
# (see scenario.txt.tmpl) and an optional sequence of instance-specific
# parameters that will be passed to target-runnerx when invoked on that
# instance.
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40

View file

@ -0,0 +1,228 @@
###################################################### -*- mode: r -*- #####
## Scenario setup for Iterated Race (irace).
############################################################################
## To use the default value of a parameter of iRace, simply do not set
## the parameter (comment it out in this file, and do not give any
## value on the command line).
## File that contains the description of the parameters of the target
## algorithm.
parameterFile = "./fastga.param"
## Directory where the programs will be run.
execDir = "."
## File to save tuning results as an R dataset, either absolute path or
## relative to execDir.
# logFile = "./irace.Rdata"
## Previously saved log file to recover the execution of irace, either
## absolute path or relative to the current directory. If empty or NULL,
## recovery is not performed.
# recoveryFile = ""
## Directory where training instances are located; either absolute path or
## relative to current directory. If no trainInstancesFiles is provided,
## all the files in trainInstancesDir will be listed as instances.
trainInstancesDir = "."
## File that contains a list of training instances and optionally
## additional parameters for them. If trainInstancesDir is provided, irace
## will search for the files in this folder.
trainInstancesFile = "./default.instances"
## File that contains a table of initial configurations. If empty or NULL,
## all initial configurations are randomly generated.
# configurationsFile = ""
## File that contains a list of logical expressions that cannot be TRUE
## for any evaluated configuration. If empty or NULL, do not use forbidden
## expressions.
# forbiddenFile = ""
## Script called for each configuration that executes the target algorithm
## to be tuned. See templates.
targetRunner = "./target-runner"
## Number of times to retry a call to targetRunner if the call failed.
# targetRunnerRetries = 0
## Optional data passed to targetRunner. This is ignored by the default
## targetRunner function, but it may be used by custom targetRunner
## functions to pass persistent data around.
# targetRunnerData = ""
## Optional R function to provide custom parallelization of targetRunner.
# targetRunnerParallel = ""
## Optional script or R function that provides a numeric value for each
## configuration. See templates/target-evaluator.tmpl
# targetEvaluator = ""
## Maximum number of runs (invocations of targetRunner) that will be
## performed. It determines the maximum budget of experiments for the
## tuning.
maxExperiments = 0 #100000
## Maximum total execution time in seconds for the executions of
## targetRunner. targetRunner must return two values: cost and time.
# maxTime = 60
## Fraction (smaller than 1) of the budget used to estimate the mean
## computation time of a configuration. Only used when maxTime > 0
# budgetEstimation = 0.02
## Maximum number of decimal places that are significant for numerical
## (real) parameters.
digits = 2
## Debug level of the output of irace. Set this to 0 to silence all debug
## messages. Higher values provide more verbose debug messages.
# debugLevel = 0
## Number of iterations.
# nbIterations = 0
## Number of runs of the target algorithm per iteration.
# nbExperimentsPerIteration = 0
## Randomly sample the training instances or use them in the order given.
# sampleInstances = 1
## Statistical test used for elimination. Default test is always F-test
## unless capping is enabled, in which case the default test is t-test.
## Valid values are: F-test (Friedman test), t-test (pairwise t-tests with
## no correction), t-test-bonferroni (t-test with Bonferroni's correction
## for multiple comparisons), t-test-holm (t-test with Holm's correction
## for multiple comparisons).
# testType = "F-test"
## Number of instances evaluated before the first elimination test. It
## must be a multiple of eachTest.
# firstTest = 5
## Number of instances evaluated between elimination tests.
# eachTest = 1
## Minimum number of configurations needed to continue the execution of
## each race (iteration).
# minNbSurvival = 0
## Number of configurations to be sampled and evaluated at each iteration.
# nbConfigurations = 0
## Parameter used to define the number of configurations sampled and
## evaluated at each iteration.
# mu = 5
## Confidence level for the elimination test.
# confidence = 0.95
## If the target algorithm is deterministic, configurations will be
## evaluated only once per instance.
# deterministic = 0
## Seed of the random number generator (by default, generate a random
## seed).
# seed = NA
## Number of calls to targetRunner to execute in parallel. Values 0 or 1
## mean no parallelization.
# parallel = 0
## Enable/disable load-balancing when executing experiments in parallel.
## Load-balancing makes better use of computing resources, but increases
## communication overhead. If this overhead is large, disabling
## load-balancing may be faster.
# loadBalancing = 1
## Enable/disable MPI. Use Rmpi to execute targetRunner in parallel
## (parameter parallel is the number of slaves).
# mpi = 0
## Specify how irace waits for jobs to finish when targetRunner submits
## jobs to a batch cluster: sge, pbs, torque or slurm. targetRunner must
## submit jobs to the cluster using, for example, qsub.
# batchmode = 0
## Enable/disable the soft restart strategy that avoids premature
## convergence of the probabilistic model.
# softRestart = 1
## Soft restart threshold value for numerical parameters. If NA, NULL or
## "", it is computed as 10^-digits.
# softRestartThreshold = ""
## Directory where testing instances are located, either absolute or
## relative to current directory.
# testInstancesDir = ""
## File containing a list of test instances and optionally additional
## parameters for them.
# testInstancesFile = ""
## Number of elite configurations returned by irace that will be tested if
## test instances are provided.
# testNbElites = 1
## Enable/disable testing the elite configurations found at each
## iteration.
# testIterationElites = 0
## Enable/disable elitist irace.
# elitist = 1
## Number of instances added to the execution list before previous
## instances in elitist irace.
# elitistNewInstances = 1
## In elitist irace, maximum number per race of elimination tests that do
## not eliminate a configuration. Use 0 for no limit.
# elitistLimit = 2
## User-defined R function that takes a configuration generated by irace
## and repairs it.
# repairConfiguration = ""
## Enable the use of adaptive capping, a technique designed for minimizing
## the computation time of configurations. This is only available when
## elitist is active.
# capping = 0
## Measure used to obtain the execution bound from the performance of the
## elite configurations: median, mean, worst, best.
# cappingType = "median"
## Method to calculate the mean performance of elite configurations:
## candidate or instance.
# boundType = "candidate"
## Maximum execution bound for targetRunner. It must be specified when
## capping is enabled.
# boundMax = 0
## Precision used for calculating the execution time. It must be specified
## when capping is enabled.
# boundDigits = 0
## Penalization constant for timed out executions (executions that reach
## boundMax execution time).
# boundPar = 1
## Replace the configuration cost of bounded executions with boundMax.
# boundAsTimeout = 1
## Percentage of the configuration budget used to perform a postselection
## race of the best configurations of each iteration after the execution
## of irace.
# postselection = 0
## Enable/disable AClib mode. This option enables compatibility with
## GenericWrapper4AC as targetRunner script.
# aclib = 0
## END of scenario file
############################################################################

View file

@ -0,0 +1,10 @@
# name switch type range
# continuator "--continuator=" c (0)
crossoverrate "--crossover-rate=" r (0,1)
crossselector "--cross-selector=" c (0,1,2,3,4,5,6)
# aftercrossselector "--aftercross-selector=" c (0)
crossover "--crossover=" c (0,1,2,3,4,5,6,7,8,9)
mutationrate "--mutation-rate=" r (0,1)
mutselector "--mut-selector=" c (0,1,2,3,4,5,6)
mutation "--mutation=" c (0,1,2,3,4,5,6,7,8,9,10)
replacement "--replacement=" c (0,1,2,3,4,5,6,7,8,9,10)

View file

@ -0,0 +1,88 @@
#!/bin/bash
###############################################################################
# This script is the command that is executed every run.
# Check the examples in examples/
#
# This script is run in the execution directory (execDir, --exec-dir).
#
# PARAMETERS:
# $1 is the candidate configuration number
# $2 is the instance ID
# $3 is the seed
# $4 is the instance name
# The rest ($* after `shift 4') are parameters to the run
#
# RETURN VALUE:
# This script should print one numerical value: the cost that must be minimized.
# Exit with 0 if no error, with 1 in case of error
###############################################################################
error() {
echo "`TZ=UTC date`: $0: error: $@"
exit 1
}
EXE="./fastga"
LOG_DIR="irace_logs"
FIXED_PARAMS="--problem=0"
MAX_EVALS=100
#
CONFIG_ID=$1
INSTANCE_ID=$2
SEED=$3
INSTANCE=$(echo $4 | sed 's/\//\n/g'|tail -n 1)
CROSSOVER_RATE=$5
CROSSOVER_SELECTOR=$6
CROSSOVER=$7
MUTATION_RATE=$8
MUT_SELECTOR=$9
MUTATION=${10}
REPLACEMENT=${11}
shift 11 || error "Not enough parameters"
INSTANCE_PARAMS=$*
# STDOUT=${LOG_DIR}/c${CONFIG_ID}_i${INSTANCE_ID}_s${SEED}.stdout
# STDERR=${LOG_DIR}/c${CONFIG_ID}_i${INSTANCE_ID}_s${SEED}.stderr
STDOUT="/dev/null"
STDERR="/dev/null"
if [ ! -x "${EXE}" ]; then
error "${EXE}: not found or not executable (pwd: $(pwd))"
fi
# If the program just prints a number, we can use 'exec' to avoid
# creating another process, but there can be no other commands after exec.
#exec $EXE ${FIXED_PARAMS} -i $INSTANCE ${INSTANCE_PARAMS}
# exit 1
#
# Otherwise, save the output to a file, and parse the result from it.
# (If you wish to ignore segmentation faults you can use '{}' around
# the command.)
cmd="$EXE ${FIXED_PARAMS} --instance=${INSTANCE} --seed=${SEED} ${CROSSOVER_RATE} ${CROSSOVER_SELECTOR} ${CROSSOVER} ${MUTATION_RATE} ${MUT_SELECTOR} ${MUTATION} ${REPLACEMENT}"
# NOTE: irace seems to capture both stderr and stdout, so you should not output to stderr
echo ${cmd} > ${STDERR}
$cmd 2> ${STDERR} | tee ${STDOUT}
# The following code is useless if the binary only output a single number on stdout.
# This may be used to introduce a delay if there are filesystem
# issues.
# SLEEPTIME=1
# while [ ! -s "${STDOUT}" ]; do
# sleep $SLEEPTIME
# let "SLEEPTIME += 1"
# done
# This is an example of reading a number from the output.
# It assumes that the objective value is the first number in
# the first column of the last line of the output.
# if [ -s "${STDOUT}" ]; then
# COST=$(tail -n 1 ${STDOUT} | grep -e '^[[:space:]]*[+-]\?[0-9]' | cut -f1)
# echo "$COST"
# rm -f "${STDOUT}" "${STDERR}"
# exit 0
# else
# error "${STDOUT}: No such file or directory"
# fi

View file

@ -0,0 +1,140 @@
#!/usr/bin/env python3
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu
##cmd eg :
# python3 hist_by_2_4_5.py ./fastga_results_all/ ./hist_and_csv/ 100000 1000
#get the Mann Whitney test U results between the plan F and plan R
# (change ligne 23 and 44 for other plan, and the maxExp, maxEv for other budget)
path=sys.argv[1]
figpath=sys.argv[2] #directory to store the data
maxExp=sys.argv[3]
maxEv=sys.argv[4]
hist_pb=[[] for i in range(19)]
name=[]
randind=-1
for fastga in os.listdir(path): #ddir : directory of fastga_plan
if(fastga in {"fastga_results_planF"}):
for plan in os.listdir(os.path.join(path,fastga)):
print("maxExp="+str(maxExp)+"_maxEv="+str(maxEv)+"_" in plan,plan,"maxExp="+str(maxExp)+"_maxEv="+str(maxEv))
#print("maxExp="+str(maxExp)+"_maxEv="+str(maxEv) in plan,plan,"maxExp="+str(maxExp)+"_maxEv="+str(maxEv))
if("maxExp="+str(maxExp)+"_maxEv="+str(maxEv)+"_" in plan):
name.append("_".join(plan.split("_")[:3]))
for fastgadir in os.listdir(os.path.join(path,fastga,plan,"raw","data")): #fastgadir : directory of 50 runs of a configuration
pb=int(fastgadir.split("_")[0].split("=")[1])
average_pb=[]
for fname in os.listdir(os.path.join(path,fastga,plan,"raw","data",fastgadir)):
with open(os.path.join(path,fastga,plan,"raw","data",fastgadir,fname)) as fd:
auc = float(fd.readlines()[0])
average_pb.append(auc)
if(hist_pb[pb]==[]): #first algo
hist_pb[pb].append(average_pb)
elif(len(hist_pb[pb])!=len(name)):
hist_pb[pb].append(average_pb)
else:
hist_pb[pb][len(name)-1]+=average_pb #another algo for the same plan
if("random" in fastga):
for randir in os.listdir(os.path.join(path,fastga)):
#eg path: maxEv=100_nbAlgo=15_2021-08-20T1511+0200_results_randoms
if(("maxEv="+str(maxEv)+"_") in randir):
print("maxEv="+str(maxEv) in randir,randir)
name.append(randir.split("_")[0]+"_random")
randind=len(name)-1
print(randind,name)
for ddir in os.listdir(os.path.join(path,fastga,randir)): #ddir : directory of one run_elites_all or more
if("crossover" in ddir):
for fastgadir in os.listdir(os.path.join(path,fastga,randir,ddir,"data")): #fastgadir : directory of 50 runs of a configuration
average_pb=[]
pb=int(fastgadir.split("_")[0].split("=")[1])
for fname in os.listdir(os.path.join(path,fastga,randir,ddir,"data",fastgadir)):
with open(os.path.join(path,fastga,randir,ddir,"data",fastgadir,fname)) as fd:
auc = float(fd.readlines()[0])
average_pb.append(auc)
#print(len(hist_pb[pb]),len(name), pb)
if(hist_pb[pb]==[]): #first algo
#print("entrer random vide")
hist_pb[pb].append(average_pb)
elif(len(hist_pb[pb])!=len(name)):
#print("entrer random !=")
hist_pb[pb].append(average_pb)
else:
hist_pb[pb][len(name)-1]+=average_pb #another algo for the same plan
figdir=os.path.join(figpath,"mwtestU_FR")
try:
os.makedirs(figdir)
except FileExistsError:
pass
#colors=['yellow', 'green',"blue","pink","purple","orange","magenta","gray","darkred","cyan","brown","olivedrab","thistle","stateblue"]
print(name)
filename="mwtestU_maxExp={}_maxEv={}_FR.csv".format(maxExp,maxEv)
with open(os.path.join(figdir,filename),'w+') as csvfile:
csvfile.write(" ,"+",".join(map(str,range(0,19)))+"\n")
meanvalue=[]
pvalue=[]
meanR=[]
meanF=[]
mdianR=[]
mdianF=[]
mdianvalue=[]
iqrR=[]
iqrF=[]
stdR=[]
stdF=[]
iqrvalue=[]
pstd=[]
for pb in range(19):
#hR,lR,_=plt.hist(hist_pb[pb][randind],bins=10,range=(-1,0),align="mid",label=name) #no label color=colors[:len(name)]
#hF,lF,_=plt.hist(hist_pb[pb][np.abs(1-randind)],bins=10,range=(-1,0),align="mid",label=name) #no label color=colors[:len(name)]
_,pv=mannwhitneyu(hist_pb[pb][np.abs(1-randind)],hist_pb[pb][randind])
print(_,pv)
#meanvalue.append(np.mean(np.array(hF)*np.array(lF[:len(lF)-1]))-np.mean(np.array(hR)*np.array(lR[:len(lR)-1])))
pstd.append(np.std(hist_pb[pb][np.abs(1-randind)])-np.std(hist_pb[pb][randind]))
stdF.append(np.std(hist_pb[pb][np.abs(1-randind)]))
stdR.append(np.std(hist_pb[pb][randind]))
meanF.append(np.mean(hist_pb[pb][np.abs(1-randind)]))
meanR.append(np.mean(hist_pb[pb][randind]))
mdianF.append(np.median(hist_pb[pb][np.abs(1-randind)]))
mdianR.append(np.median(hist_pb[pb][randind]))
mdianvalue.append(np.median(hist_pb[pb][np.abs(1-randind)])-np.median(hist_pb[pb][randind]))
meanvalue.append(np.mean(hist_pb[pb][np.abs(1-randind)])-np.mean(hist_pb[pb][randind]))
pvalue.append(pv)
Q1 = np.percentile(hist_pb[pb][np.abs(1-randind)], 25, interpolation = 'midpoint')
# Third quartile (Q3)
Q3 = np.percentile(hist_pb[pb][np.abs(1-randind)], 75, interpolation = 'midpoint')
# Interquaritle range (IQR)
iqrF.append( Q3 - Q1)
Q1 = np.percentile(hist_pb[pb][randind], 25, interpolation = 'midpoint')
# Third quartile (Q3)
Q3 = np.percentile(hist_pb[pb][randind], 75, interpolation = 'midpoint')
# Interquaritle range (IQR)
iqrR.append( Q3 - Q1)
print(_,pv)
iqrvalue=np.array(iqrF)-np.array(iqrR)
with open(os.path.join(figdir,filename),'a') as csvfile:
csvfile.write("mF-mR,"+",".join(map(str,meanvalue))+"\n")
csvfile.write("p_value,"+",".join(map(str,pvalue))+"\n")
csvfile.write("mF,"+",".join(map(str,meanF))+"\n")
csvfile.write("mR,"+",".join(map(str,meanR))+"\n")
csvfile.write("medianF-medianR,"+",".join(map(str,mdianvalue))+"\n")
csvfile.write("medianF,"+",".join(map(str,mdianF))+"\n")
csvfile.write("medianR,"+",".join(map(str,mdianR))+"\n")
csvfile.write("stdF-stdR,"+",".join(map(str,mdianvalue))+"\n")
csvfile.write("stdF,"+",".join(map(str,stdF))+"\n")
csvfile.write("stdR,"+",".join(map(str,stdR))+"\n")
csvfile.write("iqrF,"+",".join(map(str,iqrF))+"\n")
csvfile.write("iqrR,"+",".join(map(str,iqrR))+"\n")
csvfile.write("iqrF-iqrR,"+",".join(map(str,iqrvalue))+"\n")

View file

@ -0,0 +1,35 @@
#!/usr/bin/env python3
#parse data1
import os
import re
import sys
#print("pb,ecdf,id,crossover-rate,cross-selector,crossover,mutation-rate,mut-selector,mutation,replacement") #plan1
print("pb,ecdf,id,crossover-rate,cross-selector,crossover,mutation-rate,mut-selector,mutation,replacement")
#give the path of one experiment
argv=sys.argv[1]
for datadir in os.listdir(argv):
#if(os.path.isdir(os.path.join(argv,datadir))): check if argv/datadir is a directory
if(datadir.find("results_irace")>=0): #check if the directory is one JOB
for pb_dir in os.listdir(os.path.join(argv,datadir)):
if "results_problem" in pb_dir:
pb_id=pb_dir.replace("results_problem_","")
with open(os.path.join("./",argv,datadir,pb_dir,"irace.log")) as fd:
data = fd.readlines()
# Find the last best configuration
bests = [line.strip() for line in data if "Best-so-far" in line]
#print(datadir,bests)
best = bests[-1].split()
best_id, best_perf = best[2], best[5]
# print(best_id,best_perf)
# Filter the config detail
configs = [line.strip() for line in data if "--crossover-rate=" in line and best_id in line]
# print(configs)
# Format as CSV
algo = re.sub("\-\-\S*=", ",", configs[0])
csv_line = pb_id + "," + best_perf + "," + algo
print(csv_line.replace(" ",""))

View file

@ -0,0 +1,34 @@
#!/usr/bin/env python3
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
#get the auc average values of one experiment
figdir=sys.argv[1] # directory of a result of one experiment
#eg : ./fastga_results_all/fastga_results_planO/planO_maxExp\=100000_maxEv\=5n_2021-08-13T19\:04+02\:00_results_elites_all/raw
if("fastga_results_plan" in figdir):
print("FID,",",".join(map(str,range(1,16))))
aucs=[[] for i in range(19)]
for fastgadir in os.listdir(os.path.join(figdir,"raw/data")): #fastgadir : directory of 50 runs of an elite configuration
#cum=np.cumsum([0.1]*10)
average=[]
for fname in os.listdir(os.path.join(figdir,"raw/data",fastgadir)):
with open(os.path.join(figdir,"raw/data",fastgadir,fname)) as fd:
auc = float(fd.readlines()[0]) * -1
average.append(auc)
aucs[int(fastgadir.split("_")[0].split("=")[1])].append(average)
#print(np.shape(aucs))
for i in range(19):
print(str(i)+",",",".join(map(str,np.mean(aucs[i],1))))

View file

@ -14,7 +14,7 @@ outdir="${dir}/dataA_maxExp=${mexp}_maxEv=${mevals}_$(date --iso-8601=seconds)"
mkdir -p ${outdir}
for r in $(seq 2); do
echo "Run $r/15";
cmd="qsub -N iraceA_maxEv_${r} -q beta -l select=1:ncpus=1 -l walltime=00:30:00 -- ${scratchpath}/planA/r_iA.sh ${outdir} ${r} ${mexp} ${mevals} ${myhome}"
cmd="qsub -N iraceA_maxEv_${r} -q beta -l select=1:ncpus=1 -l walltime=00:25:00 -- ${scratchpath}/planA/r_iA.sh ${outdir} ${r} ${mexp} ${mevals} ${myhome}"
#cmd="bash ./r_iA_buckets.sh ${outdir} ${r} ${mexp} ${mevals}"
echo $cmd
time -p $cmd

View file

@ -15,7 +15,7 @@ for r in $(seq 2); do
echo "Run $r/15";
#date -Iseconds
#cmd="qsub -N irace_${runs}_${buckets}" -q beta -l select=1:ncpus=1 -l walltime=00:04:00 --${HOME}/run_irace.sh ${dir}
cmd="qsub -N iraceF_${mevals}_run=${r} -q beta -l select=1:ncpus=1 -l walltime=00:30:00 -- ${scratchpath}/planF/r_iF.sh ${dir} ${r} ${mexp} ${mevals} ${myhome}"
cmd="qsub -N iraceF_${mevals}_run=${r} -q beta -l select=1:ncpus=1 -l walltime=00:25:00 -- ${scratchpath}/planF/r_iF.sh ${dir} ${r} ${mexp} ${mevals} ${myhome}"
#time -p bash ${HOME}/plan2/run_irace2.sh ${dir} ${r} &> ${dir}/erreur_${r}.txt
#bash ${HOME}/test/r_i.sh
echo $cmd

View file

@ -0,0 +1,43 @@
#!/bin/bash
#run once each problem
. /etc/profile.d/modules.sh
export MODULEPATH=${MODULEPATH}${MODULEPATH:+:}/opt/dev/Modules/Anaconda:/opt/dev/Modules/Compilers:/opt/dev/Modules/Frameworks:/opt/dev/Modules/Libraries:/opt/dev/Modules/Tools:/opt/dev/Modules/IDEs:/opt/dev/Modules/MPI
module load LLVM/clang-llvm-10.0
module load R
dir=$1
run=$2
budget_irace=$3
buckets=$4
myhome=$5
cp -r ${myhome}/R .
cp -r ${myhome}/irace_files_pO .
outdir="${run}_$(date --iso-8601=seconds)_results_irace"
echo "start a job $(date -Iseconds)"
for pb in $(seq 0 18) ; do
echo "Problem ${pb}... "
res="results_problem_${pb}"
mkdir -p ${dir}/${outdir}/${res}
# Fore some reason, irace absolutely need those files...
cp ${myhome}/code/paradiseo/eo/contrib/irace/release/fastga ${dir}/${outdir}/${res}
cat ./irace_files_pO/example.scen | sed "s%\".%\"${dir}/${outdir}/${res}%g" | sed "s/maxExperiments = 0/maxExperiments=${budget_irace}/" > ${dir}/${outdir}/${res}/example.scen
cp ./irace_files_pO/default.instances ${dir}/${outdir}/${res}
cp ./irace_files_pO/fastga.param ${dir}/${outdir}/${res}
cat ./irace_files_pO/target-runner | sed "s/--problem=0/--problem=${pb}/" > ${dir}/${outdir}/${res}/target-runner
chmod u+x ${dir}/${outdir}/${res}/target-runner
echo "---start $(date)"
time -p ./R/x86_64-pc-linux-gnu-library/3.6/irace/bin/irace --scenario ${dir}/${outdir}/${res}/example.scen > ${dir}/${outdir}/${res}/irace.log
echo "---end $(date)"
echo "done run : ${run} pb : ${pb}"
date -Iseconds
done
echo "end a job $(date -Iseconds)---------------------"

View file

@ -0,0 +1,23 @@
#!/bin/bash
date -Iseconds
echo "STARTS"
myhome=$1
scratchpath=$2
mexp=$3
mevals=$4
name="dataO_maxExp=${mexp}_maxEv=${mevals}_$(date --iso-8601=seconds)"
dir=${scratchpath}/dataFAR/dataO/${name}
mkdir -p ${dir}
for r in $(seq 2); do
echo "Run $r/15";
cmd="qsub -N iraceO_maxExp=${exp}_maxEv=${evals}_${r} -q beta -l select=1:ncpus=1 -l walltime=00:10:00 -- ${scratchpath}/planO/r_iO.sh ${dir} ${r} ${mexp} ${mevals} ${myhome}"
echo $cmd
$cmd
#time (p=2; while [[ ${p} > 1 ]] ; do p=$(qqueue -u $USER | wc -l); echo "$r: $p"; sleep 300; done)
done
#echo "DONE"
#date -Iseconds

View file

@ -1,13 +1,21 @@
############################################
#Explanation of the experimental plans and the validation runs
############################################
1. INTRODUCTION
The aim of all the scripts is to make the experimental plans for Algorithm Configuration for Genetic Algorithms by using a fully modular benchmarking pipeline design of this article https://arxiv.org/abs/2102.06435 .
You can upload the data in : https://zenodo.org/record/5479538#.YTaT0Bnis2w
Plan A is an experimental plan for finding an efficient algorithm for all the functions that we consider.
Plan F is an experimental plan for finding an efficient algorithm for each function that we consider.
Plan R is an experimental plan for getting random algorithms.
Plan O is the reproduction of the experimental plan of the article.
2. VOCABULARIES
* maxExp : means maximum Experiments, the budget for irace
@ -20,18 +28,18 @@ dataA is a directory which we store all the runs of an experiment plan for sever
eg : /dataA/planA_maxExp=*_maxEv=**_$(data), * is a value of maxExp, and ** is a value of maxEv
*fastga_results_all : directory which we store all the data for validation runs. It constains only 3 subdirectories (fastga_results_planF, fastga_results_planA, fastga_results_random), created by running run_exp.sh
*fastga_results_all : directory which we store all the data for validation runs. It constains only 3 subdirectories (fastga_results_planF, fastga_results_planA, fastga_results_planO, fastga_results_random), created by running run_exp.sh
* fastga_results_planF, fastga_results_planA, fastga_results_random
* fastga_results_planF, fastga_results_planA, fastga_results_random, fastga_results_planO
Each directory store the data for validation runs of each experiment plan.
fastga_random directory are created by running run_exp.sh
fastga_results_planF and fastag_results_planA are created only after you have data in the dataA or dataF directories.
fastga_results_planF, fastag_results_planO and fastag_results_planA are created only after you have data in the dataA or dataF or dataO directories.
* planA_*, planF_*
If the planA_* or planF_* are in the dataFAR directory, the directory contains the data of experimental plan. This means that each plan contains the result of 15 runs of irace stored in irace.log file, and the data are provided by run_exp.sh.
* planA_*, planF_*, planO_*
If the planA_* or planF_* or planO_* are in the dataFAR directory, the directory contains the data of experimental plan. This means that each plan contains the result of 15 runs of irace stored in irace.log file, and the data are provided by run_exp.sh.
If the planA_* or planF_* directories are in the fastga_results_planA or fastga_results_planF, these directories contain the data of 50 validation runs by running all the best algorithms of each plan stores in dataFAR. The data are provided by running run_res.sh
If the planA_* or planF_* or planO_* directories are in the fastga_results_planA or fastga_results_planF, these directories contain the data of 50 validation runs by running all the best algorithms of each plan stores in dataFAR. The data are provided by running run_res.sh
*fastag_all_results : contains the directories of the validation run data.
@ -57,9 +65,9 @@ The directory which you load all the scripts contains :
* python files :
-parseA_irace_bests.py : for parsing the irace.log file of each data provided by running irace. By giving a bounch of directories of one experiment
-parseF_irace_bests.py
-parseF_irace_bests.py : for the plan plan F and plan O(in the plan O csv, there are label offspringsize and popsize, but there are not values)
* 4 directories :
* 6 directories :
-irace_files_pA :
-default.instances
-example.scen
@ -74,6 +82,12 @@ The directory which you load all the scripts contains :
-forbidden.txt
-target-runner
-irace_files_pO :
-default.instances :
-example.scen
-fastga.param
-target-runner
-planA :
-riaA.sh : for running 15 times r_iA.sh file by submitting to the mesu cluster
-r_iA.sh : for running irace for all the problems
@ -81,11 +95,14 @@ The directory which you load all the scripts contains :
-planF :
-riaF.sh : for running 15 times r_iF.sh file by submitting to the mesu cluster
-r_iF.sh : for running irace for each problem we considered
-planO :
-riaO.sh : for running 15 times r_iO.sh file by submitting to the mesu cluster
-r_iO.sh : for running irace for each problem we considered
The directories planA, planF contain the scripts to run one experiment of Plan A and Plan F.
The directories irace_files_pA and irace_files_pA contain the scripts needing for calling irace for one experiment of Plan A and Plan F. [Look at the irace package : User Guide for more information]
The directories irace_files_pA, irace_files_pO and irace_files_pF contain the scripts needing for calling irace for one experiment of Plan A, Plan O and Plan F. [Look at the irace package : User Guide for more information]
5. CONCLUSION
@ -97,5 +114,54 @@ Warning : run_exp.sh may take few days or few weeks depending on the Budget you
############################################
#Scripts for getting histograms and csv files of validation runs results.
############################################
get histograms or csv files for random data :
-hist_join_random.py : get one histogram for a plan by budget
-dist_op_random.py : get csv files of the distribution of operators by problems
get histograms or csv files for plan O,F,A :
-hist_join.py
-dist_op_all.py
-parse_auc_average # get the mean auc value of each problem and each irace run
get histograms for plan F, A , R, O
-hist_by_pb_budget_plan.py : get histograms by problem
-hist_by_FARO_pb.py :
-hist_by_FARO.py
-best_out_of_elites.py : get the best algorithm found among 15 runs of irace, for a plan
files to call all these files :
-csv_all.sh : get all the csv files (average of auc, best out ..), call best_out_of_elites.py, parse_auc_average.py, dist_op_*.py
-hist_all.sh : get all the histograms, call each hist_*.py file
file for other goal :
-mwtestU.py ; csv file for selected problems which irace algorithms gave better performances than random algorithms
-rep_std_mean_selected.py : to get the std, mean and the distribution of operators of the selected problems
############################################
#Summary
############################################
Get the experiment data :
run : bash run_exp.sh
-----------Only after you have the experiment data:
Get the validation run data :
run : bash run_res.sh
Get histograms :
run : bash hist_all.sh
Get csv files of validation run data :
run : bash csv_all.sh

View file

@ -0,0 +1,55 @@
#!/usr/bin/env python3
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas
#eg : python3 ./rep_std_mean_selected.py ./hist_and_csv/distribution_op_fastga_results_planF
#get the std of the selected problem
path=sys.argv[1] # directory of each distribution by pb
lpb={13,14,15,16,18} #set of pb selected
#column : [operator : nbpossibilities]
distdir=path+"/rep_std_mean"
try:
os.makedirs(distdir)
except FileExistsError:
pass
res=[]
for csvfile in os.listdir(os.path.join(path)):
if(int(csvfile.split("_")[1].split("=")[1]) in lpb):
print(csvfile)
res.append(pandas.read_csv(os.path.join(path,csvfile)))
#assert(len(res[0])==len(res[1]) , "each csv file does not have the same line " #check if the number of param is eq in each csv file
name ="std_rep_pb={}".format(str(lpb))+"".join(map(str,path.split("/")[-3].split("_")[:3]))+".csv"
with open(os.path.join(distdir,name),'w+') as fd:
fd.write("Op index, "+",".join(map(str,range(0,11)))+"\n")
globalname="rep_all_pb={}".format(str(lpb))+"".join(map(str,path.split("/")[-3].split("_")[:3]))+".csv"
with open(os.path.join(distdir,globalname),'w+') as fd:
fd.write("Op index, "+",".join(map(str,range(0,11)))+"\n")
meanname="mean_rep_pb={}".format(str(lpb))+"".join(map(str,path.split("/")[-3].split("_")[:3]))+".csv"
with open(os.path.join(distdir,meanname),'w+') as fd:
fd.write("Op index, "+",".join(map(str,range(0,11)))+"\n")
#print(res)
limparam=[10,7,10,10,7,11,11,10,10]
for i in range(1,10): #9 nb parameters
npval=np.zeros((len(res),limparam[i-1]),dtype=int)
for pb in range(len(res)):
print(i,np.array(np.array(res[pb][i-1:i])[0]),np.array(np.array(res[pb][i-1:i])[0][1:limparam[i-1]+1]))
npval[pb,:]=np.array(np.array(res[pb][i-1:i])[0][1:limparam[i-1]+1],dtype=int)
nameparam=np.array(res[pb][i-1:i])[0][0]
line= ",".join(map(str,np.std(npval,0)))+",-"*(11-limparam[i-1])
print("ligne ",line)
with open(os.path.join(distdir,name),'a') as fd:
fd.write(nameparam+","+line+"\n")
line= ",".join(map(str,np.sum(npval,0)))+",-"*(11-limparam[i-1])
with open(os.path.join(distdir,globalname),'a') as fd:
fd.write(nameparam+","+line+"\n")
line= ",".join(map(str,np.mean(npval,0)))+",-"*(11-limparam[i-1])
with open(os.path.join(distdir,meanname),'a') as fd:
fd.write(nameparam+","+line+"\n")

View file

@ -0,0 +1,64 @@
#!/bin/bash
#instance = seed
. /etc/profile.d/modules.sh
export MODULEPATH=${MODULEPATH}${MODULEPATH:+:}/opt/dev/Modules/Anaconda:/opt/dev/Modules/Compilers:/opt/dev/Modules/Frameworks:/opt/dev/Modules/Libraries:/opt/dev/Modules/Tools:/opt/dev/Modules/IDEs:/opt/dev/Modules/MPI
module load LLVM/clang-llvm-10.0
csv_file=$1 #contains all the configs of all the problems of one experiments
mexp=$2
mevals=$3
path=$4
echo "-----------------Start $(date -Iseconds) "
# Number of runs (=seeds).
runs=50
# You most probably want to run on release builds.
exe="/home/${USER}/fastga"
outdir="${path}/planO_maxExp=${mexp}_maxEv=${mevals}_$(date --iso-8601=minutes)_results_elites_all"
mkdir -p ${outdir}
mkdir -p ${outdir}/raw
mkdir -p ${outdir}/raw/data
mkdir -p ${outdir}/raw/logs
n=0
algoid=0
for line in $(cat ${csv_file}| sed 1,1d ); do
a=($(echo $line | sed "s/,/ /g"))
algo="--crossover-rate=${a[3]} --cross-selector=${a[4]} --crossover=${a[5]} --mutation-rate=${a[6]} --mut-selector=${a[7]} --mutation=${a[8]} --replacement=${a[9]}"
#perc=$(echo "scale=3;${n}/(285)*100.0" | bc)
#echo "${perc}% : algo ${algoid}/285"
# echo -n "Runs: "
name_dir="pb=${a[0]}_$(echo "${algo}" | sed 's/--//g' | sed 's/ /_/g')"
mkdir -p ${outdir}/raw/data/${name_dir}
mkdir -p ${outdir}/raw/logs/${name_dir}
for seed in $(seq ${runs}) ; do # Iterates over runs/seeds.
# This is the command to be ran.
#cmd="${exe} --full-log=1 --problem=${pb} --seed=${seed} ${algo}"
cmd="${exe} --problem=${a[0]} --seed=${seed} --instance=${seed} ${algo}"
#echo ${cmd} # Print the command.
# Forge a directory/log file name
# (remove double dashs and replace spaces with underscore).
name_run="pb=${a[0]}_seed=${seed}_$(echo "${algo}" | sed 's/--//g' | sed 's/ /_/g')"
# echo $name_run
# Actually start the command.
${cmd} > "${outdir}/raw/data/${name_dir}/${name_run}.dat" 2> "${outdir}/raw/logs/${name_dir}/${name_run}.log"
# Check for the most common problem in the log file.
#cat "${outdir}/raw/logs/${name_run}.log" | grep "illogical performance"
done # seed
n=$(($n+1))
algoid=$(($algoid+1))
done
# Move IOH logs in the results directory.
#mv ./FastGA_* ${outdir}
echo "Done $(date) -----------------------"
date

View file

@ -1,11 +1,12 @@
#!/bin/bash
lexp=(300 600 1000 10000)
levals=(100 500 1000)
lexp=(300 600)
levals=(100 500)
myscratchpath=/scratchbeta/$USER
myhome=${HOME}
for exp in ${lexp[@]} ; do
for evals in ${levals[@]} ; do
bash ./planF/riaF.sh ${myhome} ${myscratchpath} ${exp} ${evals}
bash ./planO/riaO.sh ${myhome} ${myscratchpath} ${exp} ${evals}
bash ./planA/riaA.sh ${myhome} ${myscratchpath} ${exp} ${evals}
done
done

View file

@ -14,7 +14,7 @@ done
#get validation run of each config
dir=/scratchbeta/$USER/csv_FA
dir=/scratchbeta/$USER/csv_FAO
listdir=$(echo $(ls ${dir}))
echo ${listdir[@]}
for csvdir in ${listdir[@]} ; do