From 807be1b3c2683eee8a923d12f338e0cf37d94bc1 Mon Sep 17 00:00:00 2001 From: Alix ZHENG Date: Tue, 7 Sep 2021 00:27:44 +0200 Subject: [PATCH] Add scripts for parsing and archive link --- .../irace/expe/beta/best_out_of_elites.py | 86 +++++++ eo/contrib/irace/expe/beta/csv_all.sh | 40 +++ eo/contrib/irace/expe/beta/csv_all_bests.sh | 2 +- eo/contrib/irace/expe/beta/dist_op_random.py | 78 ++++++ .../irace/expe/beta/distribution_op_all.py | 87 +++++++ .../irace/expe/beta/fastga_elites_all.sh | 7 +- eo/contrib/irace/expe/beta/hist_all.sh | 34 +++ eo/contrib/irace/expe/beta/hist_by_FARO.py | 71 ++++++ eo/contrib/irace/expe/beta/hist_by_FARO_pb.py | 88 +++++++ .../irace/expe/beta/hist_by_pb_budget_plan.py | 90 +++++++ eo/contrib/irace/expe/beta/hist_join.py | 68 ++++++ .../irace/expe/beta/hist_join_random.py | 46 ++++ .../expe/beta/irace_files_pA/forbidden.txt | 13 + .../expe/beta/irace_files_pF/forbidden.txt | 15 ++ .../beta/irace_files_pO/default.instances | 48 ++++ .../expe/beta/irace_files_pO/example.scen | 228 ++++++++++++++++++ .../expe/beta/irace_files_pO/fastga.param | 10 + .../expe/beta/irace_files_pO/target-runner | 88 +++++++ eo/contrib/irace/expe/beta/mwtestU.py | 140 +++++++++++ .../irace/expe/beta/parseO_irace_bests.py | 35 +++ .../irace/expe/beta/parse_auc_average.py | 34 +++ eo/contrib/irace/expe/beta/planA/riaA.sh | 2 +- eo/contrib/irace/expe/beta/planF/riaF.sh | 2 +- eo/contrib/irace/expe/beta/planO/r_iO.sh | 43 ++++ eo/contrib/irace/expe/beta/planO/riaO.sh | 23 ++ eo/contrib/irace/expe/beta/readme.txt | 84 ++++++- .../irace/expe/beta/rep_std_mean_selected.py | 55 +++++ .../irace/expe/beta/run_elites_planO.sh | 64 +++++ eo/contrib/irace/expe/beta/run_exp.sh | 5 +- eo/contrib/irace/expe/beta/run_res.sh | 2 +- 30 files changed, 1570 insertions(+), 18 deletions(-) create mode 100755 eo/contrib/irace/expe/beta/best_out_of_elites.py create mode 100755 eo/contrib/irace/expe/beta/csv_all.sh create mode 100755 eo/contrib/irace/expe/beta/dist_op_random.py create mode 100755 eo/contrib/irace/expe/beta/distribution_op_all.py create mode 100755 eo/contrib/irace/expe/beta/hist_all.sh create mode 100755 eo/contrib/irace/expe/beta/hist_by_FARO.py create mode 100755 eo/contrib/irace/expe/beta/hist_by_FARO_pb.py create mode 100755 eo/contrib/irace/expe/beta/hist_by_pb_budget_plan.py create mode 100755 eo/contrib/irace/expe/beta/hist_join.py create mode 100755 eo/contrib/irace/expe/beta/hist_join_random.py create mode 100755 eo/contrib/irace/expe/beta/irace_files_pA/forbidden.txt create mode 100755 eo/contrib/irace/expe/beta/irace_files_pF/forbidden.txt create mode 100755 eo/contrib/irace/expe/beta/irace_files_pO/default.instances create mode 100755 eo/contrib/irace/expe/beta/irace_files_pO/example.scen create mode 100755 eo/contrib/irace/expe/beta/irace_files_pO/fastga.param create mode 100755 eo/contrib/irace/expe/beta/irace_files_pO/target-runner create mode 100755 eo/contrib/irace/expe/beta/mwtestU.py create mode 100755 eo/contrib/irace/expe/beta/parseO_irace_bests.py create mode 100755 eo/contrib/irace/expe/beta/parse_auc_average.py create mode 100755 eo/contrib/irace/expe/beta/planO/r_iO.sh create mode 100755 eo/contrib/irace/expe/beta/planO/riaO.sh create mode 100755 eo/contrib/irace/expe/beta/rep_std_mean_selected.py create mode 100755 eo/contrib/irace/expe/beta/run_elites_planO.sh diff --git a/eo/contrib/irace/expe/beta/best_out_of_elites.py b/eo/contrib/irace/expe/beta/best_out_of_elites.py new file mode 100755 index 000000000..c6832bbe0 --- /dev/null +++ b/eo/contrib/irace/expe/beta/best_out_of_elites.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +import sys +import os +import numpy as np +import matplotlib.pyplot as plt +#cmd eg : +#python3 best_out_of_elites.py ./fastga_results_all/fastga_results_planO/planO_maxExp=50000_maxEv=5n_2021-08-13T19:16+02:00_results_elites_all +#python3 best_out_of_elites.py ./fastga_results_all/fastga_results_random/maxEv=10000_nbAlgo=15_2021-08-21T20:53+02:00_results_randoms + + +#get the configuration of the best out of the elite +# recommendation suggested by 15 independant runs of irace + +figdir=sys.argv[1] # directory of a result of one experiment +#eg : ./fastga_results_all/fastga_results_plan1/plan1_maxExp\=100000_maxEv\=5n_2021-08-13T19\:04+02\:00_results_elites_all/ +#print(figdir.split('/')[-2], figdir.split('/')) +if("plan" in figdir.split('/')[-2]): + print("Operator,","op. ,",",".join(map(str,range(1,20)))) + + column={"pc" : 101, "SelectC": 7, "Crossover" : 10, "pm": 101,"SelectM" : 7, "Mutation": 11, "Replacement" : 11, "pop-size": 50, "offspring-size" : 50} + nbparam=(len(os.listdir(os.path.join(figdir,"raw/data"))[0].split("_"))-1) #-1 car il y a le pb + + if( nbparam "${myfig}/auc_average_${experiments}.csv" + #--------------distribution of operators by pb and for all pb only for plan A,F,O ------ + #myfig=${figpath}/distribution_op_${plan} + #mkdir -p ${myfig} + #cmd="python3 distribution_op_all.py ${path} ${myfig} " + #$cmd + #--------------best out csv-------- + cmd="python3 best_out_of_elites.py ${path}" + myfig=${figpath}/best_out_${plan} + mkdir -p ${myfig} + $cmd > ${myfig}/best_out_all_pb_${experiments}.csv + echo ${cmd} + + done +done + +#---------------distribution of operators of randoma algo------------------ +#rpath=${ldata}/fastga_results_random +#cmd="python3 dist_op_random.py ${rpath} ${figpath}" +#$cmd +#---------------random--------------- \ No newline at end of file diff --git a/eo/contrib/irace/expe/beta/csv_all_bests.sh b/eo/contrib/irace/expe/beta/csv_all_bests.sh index 3f0fb3652..fb7926faf 100755 --- a/eo/contrib/irace/expe/beta/csv_all_bests.sh +++ b/eo/contrib/irace/expe/beta/csv_all_bests.sh @@ -1,7 +1,7 @@ #!/bin/bash ldata=$1 file_py=$2 -csvdir="csv_FA" +csvdir="csv_FAO" ldir=$(echo $(ls ${ldata})) for data in ${ldir[@]} ; do path="${ldata}/${data}" diff --git a/eo/contrib/irace/expe/beta/dist_op_random.py b/eo/contrib/irace/expe/beta/dist_op_random.py new file mode 100755 index 000000000..b7056cbd2 --- /dev/null +++ b/eo/contrib/irace/expe/beta/dist_op_random.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +import sys +import os +import numpy as np +import matplotlib.pyplot as plt + +#cmd eg : python3 ./dist_op_random.py ./fastga_results_all/fastga_results_random/ ./hist_and_csv/ +#get the distribution of operators variants recommended by 15 random algo for each maxEv +#pc and pm 10 possibilities : [0-0.1[ [0.1-0.2[ [0.2-0.3[ [0.3-0.4[ [0-0.5[ [0.5-0.6[ ...[0.9-1[ +#pop-size and offspring-size 10 possibilities : 0-5 5-10, 10-15 15-20 20-25 25-30 30-35- 35-40 40-45 45-50 + +path=sys.argv[1] # directory of a result of one experiment +#eg : ./fastga_results_all/fastga_results_random/ +figdir=sys.argv[2] #directory of where you want to store the data +if("random" in path): + #column : [operator : nbpossibilities] + distdir=figdir+"/distribution_random" + try: + os.makedirs(distdir) + except FileExistsError: + pass + + nbparam=9 #-1 car il y a le pb + + res=[] + + for maxEvdir in os.listdir(path): + res.append({"crossover-rate":["pc" , np.zeros(10, dtype=int)], + "cross-selector":["SelectC", np.zeros(7, dtype=int)], + "crossover":["Crossover" , np.zeros(10, dtype=int)], + "mutation-rate":["pm",np.zeros(10, dtype=int)], + "mut-selector":["SelectM",np.zeros(10, dtype=int)], + "mutation":["Mutation", np.zeros(11, dtype=int)], + "replacement":["Replacement" , np.zeros(11, dtype=int)], + "pop-size":["pop-size", np.zeros(10, dtype=int)], + "offspring-size":["offspring-size" , np.zeros(10, dtype=int)]}) + for algodir in os.listdir(os.path.join(path,maxEvdir)): #fastgadir : directory of 50 runs of an elite configuration + algo=algodir.split("_") + for param in algo: + name,val=param.split("=")[0],float(param.split("=")[1]) + if(name in {"pop-size" ,"offspring-size"}): + if(val%5==0): + res[-1][name][1][int(val//5) -1]+=1 + else: + #print(res[-1][name][1],val//5) + res[-1][name][1][int(val//5)]+=1 + + elif(name in {"crossover-rate","mutation-rate"} ): + if(int(val*10)==10): #case of val=1 + res[-1][name][1][-1]+=1 + else : + #print(int(float(val)*10), name,pb,val) + res[-1][name][1][int(val*10)]+=1 + else : + res[-1][name][1][int(val)]+=1 + + + ind=0 + for maxEvdir in os.listdir(path): + name="distribution_random_"+maxEvdir.split("_")[0]+".csv" #the end of the path must be / + with open(os.path.join(distdir,name),"w+") as csvfile: + csvfile.write("Op index, "+",".join(map(str,range(0,11)))+"\n") + with open(os.path.join(distdir,name),"a") as csvfile: + for param_name in res[ind].keys(): + #print(map(str,res[ind]),res[ind], ",".join(map(str,res[ind]))) + csvfile.write(res[ind][param_name][0]+","+ ",".join(map(str,res[ind][param_name][1]))+",-"*(11-len(res[ind][param_name][1])) +"\n") + #print(str(i)+",",",".join(map(str,np.mean(aucs[i],1)))) + ind+=1 + #all problems + name ="distribution_all_random_"+path.split("/")[-1]+".csv" + with open(os.path.join(distdir,name),'w+') as csvfile: + csvfile.write("Op index, "+",".join(map(str,range(0,11)))+"\n") + + with open(os.path.join(distdir,name),'a') as csvfile: + for param_name in res[0].keys(): + #print(map(str,res[ind]),res[ind], ",".join(map(str,res[ind]))) + csvfile.write(res[0][param_name][0]+","+ ",".join(map(str,np.sum([res[i][param_name][1] for i in range(ind-1)],0)))+",-"*(11-len(res[0][param_name][1])) +"\n") #res[0] only for getting the name of parameters + #print(str(i)+",",",".join(map(str,np.mean(aucs[i],1)))) \ No newline at end of file diff --git a/eo/contrib/irace/expe/beta/distribution_op_all.py b/eo/contrib/irace/expe/beta/distribution_op_all.py new file mode 100755 index 000000000..b2843c68b --- /dev/null +++ b/eo/contrib/irace/expe/beta/distribution_op_all.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +import sys +import os +import numpy as np +import matplotlib.pyplot as plt + +#get the distribution of operators variants recommended by 15 runs of irace for all problems and each problem +#also get an csv file +#pc and pm 10 possibilities : [0-0.1[ [0.1-0.2[ [0.2-0.3[ [0.3-0.4[ [0-0.5[ [0.5-0.6[ ...[0.9-1[ +#pop-size and offspring-size 10 possibilities : 0-5 5-10, 10-15 15-20 20-25 25-30 30-35- 35-40 40-45 45-50 + +path=sys.argv[1] # directory of a result of one experiment +#eg : ./fastga_results_all/fastga_results_planO/planO_maxExp\=100000_maxEv\=5n_2021-08-13T19\:04+02\:00_results_elites_all/ + +if("fastga_results_plan" in path): + #column : [operator : nbpossibilities] + distdir=sys.argv[2] + try: + os.makedirs(distdir) + except FileExistsError: + pass + + nbparam=(len(os.listdir(os.path.join(path,"raw/data"))[0].split("_"))-1) + + if( nbparam==7): + res=[{"crossover-rate":["pc" , np.zeros(10, dtype=int)], + "cross-selector":["SelectC", np.zeros(7, dtype=int)], + "crossover":["Crossover" , np.zeros(10, dtype=int)], + "mutation-rate":["pm",np.zeros(10, dtype=int)], + "mut-selector":["SelectM",np.zeros(7, dtype=int)], + "mutation":["Mutation", np.zeros(11, dtype=int)], + "replacement":["Replacement" ,np.zeros(11, dtype=int)]} for i in range(19)] + else: + res=[{"crossover-rate":["pc" , np.zeros(10, dtype=int)], + "cross-selector":["SelectC", np.zeros(7, dtype=int)], + "crossover":["Crossover" , np.zeros(10, dtype=int)], + "mutation-rate":["pm",np.zeros(10, dtype=int)], + "mut-selector":["SelectM",np.zeros(7, dtype=int)], + "mutation":["Mutation", np.zeros(11, dtype=int)], + "replacement":["Replacement" , np.zeros(11, dtype=int)], + "pop-size":["pop-size", np.zeros(10, dtype=int)], + "offspring-size":["offspring-size" , np.zeros(10, dtype=int)]} for i in range(19)] + + + for fastgadir in os.listdir(os.path.join(path,"raw/data")): #fastgadir : directory of 50 runs of an elite configuration + algo=fastgadir.split("_") + pb=int(fastgadir.split("_")[0].split("=")[1]) + for param in algo[1:]: + name,val=param.split("=")[0],float(param.split("=")[1]) + if(name in {"pop-size" ,"offspring-size"}): + if(val%5==0): + res[pb][name][1][int(val//5) -1]+=1 + else: + #print(res[pb][name][1],val//5) + res[pb][name][1][int(val//5)]+=1 + + elif(name in {"crossover-rate","mutation-rate"} ): + if(int(val*10)==10): #case of val=1 + res[pb][name][1][-1]+=1 + else : + #print(int(float(val)*10), name,pb,val) + res[pb][name][1][int(val*10)]+=1 + else : + res[pb][name][1][int(val)]+=1 + + + + for pb in range(19): + name="distribution_pb="+str(pb)+"_"+path.split("/")[-2]+".csv" #the end of the path must be / + with open(os.path.join(distdir,name),"w+") as csvfile: + csvfile.write("Op index, "+",".join(map(str,range(0,11)))+"\n") + with open(os.path.join(distdir,name),"a") as csvfile: + for param_name in res[pb].keys(): + #print(map(str,res[ind]),res[ind], ",".join(map(str,res[ind]))) + csvfile.write(res[pb][param_name][0]+","+ ",".join(map(str,res[pb][param_name][1]))+",-"*(11-len(res[pb][param_name][1])) +"\n") + #print(str(i)+",",",".join(map(str,np.mean(aucs[i],1)))) + + #all problems + name ="distribution_all_pb_"+path.split("/")[-1]+".csv" + with open(os.path.join(path,"raw",name),'w+') as csvfile: + csvfile.write("Op index, "+",".join(map(str,range(0,11)))+"\n") + + with open(os.path.join(path,"raw",name),'a') as csvfile: + for param_name in res[0].keys(): + #print(map(str,res[ind]),res[ind], ",".join(map(str,res[ind]))) + csvfile.write(res[0][param_name][0]+","+ ",".join(map(str,np.sum([res[i][param_name][1] for i in range(19)],0)))+",-"*(11-len(res[0][param_name][1])) +"\n") #res[0] only for getting the name of parameters + #print(str(i)+",",",".join(map(str,np.mean(aucs[i],1)))) \ No newline at end of file diff --git a/eo/contrib/irace/expe/beta/fastga_elites_all.sh b/eo/contrib/irace/expe/beta/fastga_elites_all.sh index a53eb189d..42f6c0453 100644 --- a/eo/contrib/irace/expe/beta/fastga_elites_all.sh +++ b/eo/contrib/irace/expe/beta/fastga_elites_all.sh @@ -1,15 +1,16 @@ #!/bin/bash -ldata=$1 # eg : ./csv_planF/ don t forget to end the path with / -file_sh=$2 #eg : ./run_elites_planF +ldata=$1 +file_sh=$2 ldir=$(echo $(ls ${ldata})) fastga_dir="fastga_results_all" mkdir -p /scratchbeta/${USER}/${fatga_dir} #mkdir -p "/home/${USER}/${fastga_dir}/fastga_results_plan1" mkdir -p "/scratchbeta/${USER}/${fastga_dir}/fastga_results_planF" mkdir -p "/scratchbeta/${USER}/${fastga_dir}/fastga_results_planA" +mkdir -p "/scratchbeta/${USER}/${fastga_dir}/fastga_results_planO" for data in ${ldir[@]} ; do - path_csv="${ldata}${data}" + path_csv="${ldata}/${data}" plan_name=$(echo ${data} | sed "s/results_irace_plan//") mexp=$(echo ${data[@]} | cut -d _ -f4) mexp_id=$(echo ${mexp} | cut -d = -f2) diff --git a/eo/contrib/irace/expe/beta/hist_all.sh b/eo/contrib/irace/expe/beta/hist_all.sh new file mode 100755 index 000000000..89ffe932d --- /dev/null +++ b/eo/contrib/irace/expe/beta/hist_all.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +ldata="./fastga_results_all" #fastga_results_all +figpath="./hist_and_csv" #hist_and_csv + +ldir=$(echo $(ls ${ldata})) #list of directory of each plan +for plan in ${ldir[@]} ; do #get the directory of each plan + #------------hist by budget of a Plan (O,R or F) + #path="${ldata}/${plan}" + #cmd="python3 hist_join.py ${path} ${figpath}" + #echo $cmd + #$cmd + + #---------------------------hist by pb by budget--------------- + path="${ldata}/${plan}" + cmd="python3 hist_by_pb_budget_plan.py ${path} ${figpath}" + echo $cmd + $cmd +done + +#---------------random------------------ +#rpath=${ldata}/fastga_results_random +#cmd="python3 hist_join_random.py ${rpath} ${figpath}" +#---------------random--------------- + +#--------------------Choose a Budget irace and a budget fastga +mexp=100000 +mevals=1000 +#-------------------histogram join each plan F,A,R,O and join all algorithms for the budget chosen +cmd="python3 hist_by_FARO.py ${ldata} ${figdir} ${mexp} ${mevals}" +$cmd +#-------------------histogram by pb join each plan F,A,R,O and join all algorithms for the budget chosen +cmd="python3 hist_by_FARO_pb.py ${ldata} ${figdir} ${mexp} ${mevals}" +$cmd diff --git a/eo/contrib/irace/expe/beta/hist_by_FARO.py b/eo/contrib/irace/expe/beta/hist_by_FARO.py new file mode 100755 index 000000000..bc6ae3ccc --- /dev/null +++ b/eo/contrib/irace/expe/beta/hist_by_FARO.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +import sys +import os +import numpy as np +import matplotlib.pyplot as plt +from scipy.stats import mannwhitneyu + +##python3 hist_by_FARO.py ./fastga_results_all/ ./hist_and_csv/ 100000 1000 +#one plot for one experiment plan for the same budget fastga, and the same budget irace if there is a budget irace (A,F) +path=sys.argv[1] +figpath=sys.argv[2] +maxExp=sys.argv[3] +maxEv=sys.argv[4] + +indF=-1 +indFO=-1 +averageConfigs=[] +name=[] +for fastga in os.listdir(path): #ddir : directory of fastga_plan + if(fastga in {"fastga_results_planA","fastga_results_planF","fastga_results_planO"}): + for plan in os.listdir(os.path.join(path,fastga)): + print("maxExp="+str(maxExp)+"_maxEv="+str(maxEv) in plan,plan,"maxExp="+str(maxExp)+"_maxEv="+str(maxEv)) + if("maxExp="+str(maxExp)+"_maxEv="+str(maxEv) in plan): + average=[] + + for fastgadir in os.listdir(os.path.join(path,fastga,plan,"raw","data")): #fastgadir : directory of 50 runs of a configuration + for fname in os.listdir(os.path.join(path,fastga,plan,"raw","data",fastgadir)): + with open(os.path.join(path,fastga,plan,"raw","data",fastgadir,fname)) as fd: + auc = float(fd.readlines()[0]) *(-1) + average.append(auc) + averageConfigs.append(average) + nameid=plan.split("_")[0][-1] + name.append("plan"+nameid+"_"+"_".join(plan.split("_")[1:3])) + if("random" in fastga): + for randir in os.listdir(os.path.join(path,fastga)): + #eg path: maxEv=100_nbAlgo=15_2021-08-20T1511+0200_results_randoms + average=[] + if("maxEv="+str(maxEv)+"_" in randir): + for ddir in os.listdir(os.path.join(path,fastga,randir)): #ddir : directory of one run_elites_all or more + if("crossover" in ddir): + #name.append("_".join(ddir.split("_")[1:3])) + for fastgadir in os.listdir(os.path.join(path,fastga,randir,ddir,"data")): #fastgadir : directory of 50 runs of a configuration + for fname in os.listdir(os.path.join(path,fastga,randir,ddir,"data",fastgadir)): + with open(os.path.join(path,fastga,randir,ddir,"data",fastgadir,fname)) as fd: + auc = float(fd.readlines()[0]) *(-1) + average.append(auc) + #hist[belonging(auc,cum)]+=1 + averageConfigs.append(average) + name.append(randir.split("_")[0]+"_random") + + +figdir=os.path.join(figpath,"hist_FARO_by_budget") +try: + os.makedirs(figdir) +except FileExistsError: + pass + +#_,pv=mannwhitneyu(averageConfigs[indFO],averageConfigs[indF]) +#print(name,len(averageConfigs)) +plt.figure() +plt.hist(averageConfigs,bins=10,range=(0,1),align="mid",rwidth=0.9,label=name) #no label +plt.xlabel("performances") +plt.ylabel("Number of runs") +plt.xlim(0,1) +plt.ylim(0,8000) +plt.yticks(range(0,8000,500)) +#plt.title("pvalue="+str(pv)+"\n medianeF="+str(np.median(averageConfigs[indF]))+", medianeFO="+str(np.median(averageConfigs[indFO]))) +plt.legend() +plt.savefig(figdir+"/hist_planFARO"+"_maxExp="+str(maxExp)+"_maxEv="+str(maxEv)+".png") +plt.close() + \ No newline at end of file diff --git a/eo/contrib/irace/expe/beta/hist_by_FARO_pb.py b/eo/contrib/irace/expe/beta/hist_by_FARO_pb.py new file mode 100755 index 000000000..70eb971cf --- /dev/null +++ b/eo/contrib/irace/expe/beta/hist_by_FARO_pb.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +import sys +import os +import numpy as np +import matplotlib.pyplot as plt + +##python3 hist_by_FARO_pb.py ./fastga_results_all/ ./hist_and_csv/ 100000 1000 +#19 histograms by plan F,A ,R O +path=sys.argv[1] +figpath=sys.argv[2] +maxExp=sys.argv[3] +maxEv=sys.argv[4] + +hist_pb=[[] for i in range(19)] +name=[] +for fastga in os.listdir(path): #ddir : directory of fastga_plan + if(fastga in {"fastga_results_planA", "fastga_results_planF","fastga_results_planO"}): + for plan in os.listdir(os.path.join(path,fastga)): + #print("maxExp="+str(maxExp)+"_maxEv="+str(maxEv)+"_" in plan,plan,"maxExp="+str(maxExp)+"_maxEv="+str(maxEv)) + #print("maxExp="+str(maxExp)+"_maxEv="+str(maxEv) in plan,plan,"maxExp="+str(maxExp)+"_maxEv="+str(maxEv)) + if("maxExp="+str(maxExp)+"_maxEv="+str(maxEv)+"_" in plan): + nameid=fastga[-1] + name.append("plan"+nameid+"_".join(plan.split("_")[1:3])) + for fastgadir in os.listdir(os.path.join(path,fastga,plan,"raw","data")): #fastgadir : directory of 50 runs of a configuration + pb=int(fastgadir.split("_")[0].split("=")[1]) + average_pb=[] + for fname in os.listdir(os.path.join(path,fastga,plan,"raw","data",fastgadir)): + with open(os.path.join(path,fastga,plan,"raw","data",fastgadir,fname)) as fd: + auc = float(fd.readlines()[0]) *(-1) + average_pb.append(auc) + if(hist_pb[pb]==[]): #first algo + hist_pb[pb].append(average_pb) + elif(len(hist_pb[pb])!=len(name)): + hist_pb[pb].append(average_pb) + else: + hist_pb[pb][len(name)-1]+=average_pb #another algo for the same plan + + if("random" in fastga): + for randir in os.listdir(os.path.join(path,fastga)): + #eg path: maxEv=100_nbAlgo=15_2021-08-20T1511+0200_results_randoms + if(("maxEv="+str(maxEv)+"_") in randir): + #print("maxEv="+str(maxEv) in randir,randir) + name.append(randir.split("_")[0]+"_random") + for ddir in os.listdir(os.path.join(path,fastga,randir)): #ddir : directory of one run_elites_all or more + if("crossover" in ddir): + #name.append("_".join(ddir.split("_")[1:3])) + for fastgadir in os.listdir(os.path.join(path,fastga,randir,ddir,"data")): #fastgadir : directory of 50 runs of a configuration + average_pb=[] + pb=int(fastgadir.split("_")[0].split("=")[1]) + for fname in os.listdir(os.path.join(path,fastga,randir,ddir,"data",fastgadir)): + with open(os.path.join(path,fastga,randir,ddir,"data",fastgadir,fname)) as fd: + auc = float(fd.readlines()[0]) *(-1) + average_pb.append(auc) + #print(len(hist_pb[pb]),len(name), pb) + if(hist_pb[pb]==[]): #first algo + #print("entrer random vide") + hist_pb[pb].append(average_pb) + elif(len(hist_pb[pb])!=len(name)): + #print("entrer random !=") + hist_pb[pb].append(average_pb) + else: + hist_pb[pb][len(name)-1]+=average_pb #another algo for the same plan + + +figdir=os.path.join(figpath,"hist_by_FARO_pb_maxExp={}_maxEv={}".format(maxExp,maxEv)) +try: + os.makedirs(figdir) +except FileExistsError: + pass +#colors=['yellow', 'green',"blue","pink","purple","orange","magenta","gray","darkred","cyan","brown","olivedrab","thistle","stateblue"] +print(name) +for pb in range(19): + print(pb, len(hist_pb[pb])) + for i in hist_pb[pb]: + print(len(i)) + plt.figure() + plt.hist(hist_pb[pb],bins=10,range=(0,1),align="mid",rwidth=0.9,edgecolor="red",label=name) #no label color=colors[:len(name)] + #for aucs in range(len(hist_pb[pb])): + #plt.hist(hist_pb[pb][aucs],bins=10,range=(0,1),align="mid",rwidth=0.9,edgecolor="red",label=name[aucs]) #no label + plt.xlabel("performances") + plt.ylabel("Number of runs") + plt.ylim(0,800) + plt.xlim(0,1) + plt.yticks(range(0,800,50)) + #plt.xticks(np.cumsum([0.1]*10)) + plt.legend() + plt.savefig(figdir+"/hist_FARO_pb={}_maxExp={}_maxEv={}.png".format(pb,maxExp,maxEv)) + plt.close() \ No newline at end of file diff --git a/eo/contrib/irace/expe/beta/hist_by_pb_budget_plan.py b/eo/contrib/irace/expe/beta/hist_by_pb_budget_plan.py new file mode 100755 index 000000000..a91d15e87 --- /dev/null +++ b/eo/contrib/irace/expe/beta/hist_by_pb_budget_plan.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +import sys +import os +import numpy as np +import matplotlib.pyplot as plt + +# python3 hist_by_pb_budget_plan.py ./fastga_results_all/fastga_results_planF/ ./hist_and_csv/ +#python3 hist_by_pb_budget_plan.py ./fastga_results_all/fastga_results_planO ./hist_and_csv +#get 19 histograms with number of budget bars, same as hist_join but now is by pb + +#argv : list of elite results +path=sys.argv[1] +figpath=sys.argv[2] +#plan_name=sys.argv[3] +hist_pb=[[] for i in range(19)] +name=[] +if("random" in path): + plan_name="R" +else: + plan_name=path.strip("/").split("/")[-1][-1] + + +for plandir in os.listdir(path): #plandir: directory of an experiment of elite results + if("results_elites_all" in plandir): + #eg : plan2_maxExp=10000_maxEv=1000_2021-08-20T1347+0200_results_elites_all + budget_irace=plandir.split("_")[1].split("=")[1] + budget_fastga=plandir.split("_")[2].split("=")[1] + name.append("plan="+plan_name+"_"+"".join(plandir.split("_")[1:3])) #plan=*_maxExp=*_maxEv=* + + for algodir in os.listdir(os.path.join(path,plandir,"raw","data")): + average_pb=[] + pb=int(algodir.split("_")[0].split("=")[1]) + for algo in os.listdir(os.path.join(path,plandir,"raw","data",algodir)): + with open(os.path.join(path,plandir,"raw","data",algodir,algo)) as fd: + auc = float(fd.readlines()[0]) *(-1) + average_pb.append(auc) + if(hist_pb[pb]==[]): #first algo + hist_pb[pb].append(average_pb) + elif(len(hist_pb[pb])!=len(name)): + hist_pb[pb].append(average_pb) + else: + hist_pb[pb][len(name)-1]+=average_pb #another algo for the same plan + + if("results_randoms" in plandir): + #eg : maxEv=1000_2021-08-20T1347+0200_results_random + budget_fastga=plandir.split("_")[0].split("=")[1] + name.append("plan="+plan_name+"_"+"".join(plandir.split("_")[0])) #plan=*_maxExp=*_maxEv=* + for algodir in os.listdir(os.path.join(path,plandir)): + + for algo in os.listdir(os.path.join(path,plandir,algodir,"data")): + pb=int(algo.split("_")[0].split("=")[1]) + average_pb=[] + for fname in os.listdir(os.path.join(path,plandir,algodir,"data",algo)): + with open(os.path.join(path,plandir,algodir,"data",algo,fname)) as fd: + auc = float(fd.readlines()[0]) *(-1) + average_pb.append(auc) + if(hist_pb[pb]==[]): #first algo + print("entrer") + hist_pb[pb].append(average_pb) + elif(len(hist_pb[pb])!=len(name)): + hist_pb[pb].append(average_pb) + else: + hist_pb[pb][len(name)-1]+=average_pb #another algo for the same plan + + + +print(path.split("/")[-1][-1]) + +figdir=os.path.join(figpath,"hist_by_{}_pb_budget_plan".format(plan_name)) +#figdir=os.path.join(figpath,"hist_by_{}_pb_irace_maxEv={}".format(plan_name,1000)) +try: + os.makedirs(figdir) +except FileExistsError: + pass + + +for pb in range(19): + print(pb, len(hist_pb[pb])) + plt.figure() + plt.hist(hist_pb[pb],bins=10,range=(0,1),align="mid",rwidth=0.9,edgecolor="red",label=name) #no label color=colors[:len(name)] + #for aucs in range(len(hist_pb[pb])): + #plt.hist(hist_pb[pb][aucs],bins=10,range=(0,1),align="mid",rwidth=0.9,edgecolor="red",label=name[aucs]) #no label + plt.xlabel("performances") + plt.ylabel("Number of runs") + plt.ylim(0,750) + plt.yticks(range(0,750,50)) + plt.xlim(0,1) + plt.legend() + plt.savefig(figdir+"/hist_plan={}_pb={}_budget.png".format(plan_name,pb)) + plt.close() \ No newline at end of file diff --git a/eo/contrib/irace/expe/beta/hist_join.py b/eo/contrib/irace/expe/beta/hist_join.py new file mode 100755 index 000000000..4ba2d9e13 --- /dev/null +++ b/eo/contrib/irace/expe/beta/hist_join.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +import sys +import os +import numpy as np +import matplotlib.pyplot as plt +from scipy.stats import mannwhitneyu + +#cmd : python3 hist_join.py ./fastga_results_all/fastga_results_planO/ ./hist_and_csv/ +#histogram by plan for the budgets (irace and fastag) + + +path=sys.argv[1] #argv : directory of a Plan (O, A, F) +figpath=sys.argv[2] #path to store the histograms +averageConfigs=[] +name=[] +if("fastga_results_plan" in path): + for ddir in os.listdir(path): #ddir : directory of one run_elites_all or more + if("plan" in ddir): + average=[] + name.append("_".join(ddir.split("_")[1:3])) + for fastgadir in os.listdir(os.path.join(path,ddir,"raw","data")): #fastgadir : directory of 50 runs of a configuration + for fname in os.listdir(os.path.join(path,ddir,"raw","data",fastgadir)): + with open(os.path.join(path,ddir,"raw","data",fastgadir,fname)) as fd: + auc = float(fd.readlines()[0]) *(-1) + average.append(auc) + #hist[belonging(auc,cum)]+=1 + averageConfigs.append(average) + #print(hist) + #print(average) + + figdir=os.path.join(figpath,"hist_join") + try: + os.makedirs(figdir) + except FileExistsError: + pass + + + print(name,len(averageConfigs)) + + """ + idd0=name[0].split("_")[0].split("=")[1][:-3]+"k" + idd1=name[1].split("_")[0].split("=")[1][:-3]+"k" + idd2=name[2].split("_")[0].split("=")[1][:-3]+"k" + + #only for Budget irace 10000, 50000, 100000 ie: only three experiment results + titlename="median"+idd0+"={:.3f}".format(np.median(averageConfigs[0]))+" , median"+idd1+"={:.3f}".format(np.median(averageConfigs[1]))+" , median"+idd2+"={:.3f}".format(np.median(averageConfigs[2])) + _,pv=mannwhitneyu(averageConfigs[0],averageConfigs[1]) + titlename+="\n pvalue{}={:.3f}".format(idd0+idd1,pv) + _,pv=mannwhitneyu(averageConfigs[0],averageConfigs[2]) + titlename+=" ,pvalue{}={:.3f}".format(idd0+idd2,pv) + _,pv=mannwhitneyu(averageConfigs[1],averageConfigs[2]) + titlename+=" ,pvalue{}={:.3f}".format(idd1+idd2,pv) + print(titlename) + """ + plt.figure() + plt.hist(averageConfigs,bins=10,range=(0,1),align="mid",rwidth=0.9,label=name) #no label + plt.xlabel("performances") + plt.ylabel("Number of runs") + plt.xlim(0,1) + plt.ylim(0,7000) + plt.yticks(range(0,7000,500)) + #plt.title(titlename) + plt.legend() + plt.savefig(figdir+"/hist_plan"+path.strip("/")[-1]+"_by_budget.png") + #plt.savefig(figpath+"/hist_plan"+path.strip("/")[-1]+"_by_budgetI.png") + plt.close() + + diff --git a/eo/contrib/irace/expe/beta/hist_join_random.py b/eo/contrib/irace/expe/beta/hist_join_random.py new file mode 100755 index 000000000..0b5d3c7d4 --- /dev/null +++ b/eo/contrib/irace/expe/beta/hist_join_random.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +import sys +import os +import numpy as np +import matplotlib.pyplot as plt + +##python3 hist_random.py ./dastga_results_all/fastga_results_random ./hist_and_csv/ +#argv : list of elite results +data=sys.argv[1] +figpath=sys.argv[2] +averageConfigs=[] +name=[] +for path in os.listdir(data): + #eg path: maxEv=100_nbAlgo=15_2021-08-20T1511+0200_results_randoms + average=[] + if("maxEv" in path): + for ddir in os.listdir(os.path.join(data,path)): #ddir : directory of one run_elites_all or more + if("crossover" in ddir): + #name.append("_".join(ddir.split("_")[1:3])) + for fastgadir in os.listdir(os.path.join(data,path,ddir,"data")): #fastgadir : directory of 50 runs of a configuration + for fname in os.listdir(os.path.join(data,path,ddir,"data",fastgadir)): + with open(os.path.join(data,path,ddir,"data",fastgadir,fname)) as fd: + auc = float(fd.readlines()[0]) *(-1) + average.append(auc) + #hist[belonging(auc,cum)]+=1 + averageConfigs.append(average) + name.append(path.split("_")[0]) + +figdir=os.path.join(figpath,"hist_join") +try: + os.makedirs(figdir) +except FileExistsError: + pass + +colors=['yellow', 'green',"blue","pink","purple","orange","magenta","gray","darkred","cyan","brown","olivedrab","thistle","stateblue"] +plt.figure() +plt.hist(averageConfigs,bins=10,range=(0,1),align="mid",rwidth=0.5,label=name) #no label +plt.xlabel("performances") +plt.ylabel("Number of runs") +plt.ylim([0,8000]) +plt.xlim(0,1) +plt.yticks(range(0,8000,500)) +#plt.xticks(np.cumsum([0.1]*10)) +plt.legend() +plt.savefig(figdir+"/hist_random_by_budget.png") +plt.close() diff --git a/eo/contrib/irace/expe/beta/irace_files_pA/forbidden.txt b/eo/contrib/irace/expe/beta/irace_files_pA/forbidden.txt new file mode 100755 index 000000000..56eb175cd --- /dev/null +++ b/eo/contrib/irace/expe/beta/irace_files_pA/forbidden.txt @@ -0,0 +1,13 @@ +## Template for specifying forbidden parameter configurations in irace. +## +## This filename must be specified via the --forbidden-file command-line option +## (or forbiddenFile in scenario.txt). +## +## The format is one constraint per line. Each constraint is a logical +## expression (in R syntax). If a parameter configuration +## is generated that makes the logical expression evaluate to TRUE, +## then the configuration is discarded. +## +## Examples of valid logical operators are: == != >= <= > < & | ! %in% +(replacement %in% c(2,3,4,5,6,7,8,9,10)) & (offspringsize > popsize) +(replacement %in% c(1)) & (offspringsize < popsize) diff --git a/eo/contrib/irace/expe/beta/irace_files_pF/forbidden.txt b/eo/contrib/irace/expe/beta/irace_files_pF/forbidden.txt new file mode 100755 index 000000000..86c8798e4 --- /dev/null +++ b/eo/contrib/irace/expe/beta/irace_files_pF/forbidden.txt @@ -0,0 +1,15 @@ +## Template for specifying forbidden parameter configurations in irace. +## +## This filename must be specified via the --forbidden-file command-line option +## (or forbiddenFile in scenario.txt). +## +## The format is one constraint per line. Each constraint is a logical +## expression (in R syntax). If a parameter configuration +## is generated that makes the logical expression evaluate to TRUE, +## then the configuration is discarded. +## +## Examples of valid logical operators are: == != >= <= > < & | ! %in% +(replacement %in% c(2,3,4,5,6,7,8,9,10)) & (offspringsize > popsize) +(replacement %in% c(1)) & (offspringsize < popsize) +#(as.numeric(replacement) == 2) & (offspringsize > popsize) +#(as.numeric(replacement) == 3) & (offspringsize > popsize) diff --git a/eo/contrib/irace/expe/beta/irace_files_pO/default.instances b/eo/contrib/irace/expe/beta/irace_files_pO/default.instances new file mode 100755 index 000000000..a0a1adfc3 --- /dev/null +++ b/eo/contrib/irace/expe/beta/irace_files_pO/default.instances @@ -0,0 +1,48 @@ +## This is an example of specifying instances with a file. + +# Each line is an instance relative to trainInstancesDir +# (see scenario.txt.tmpl) and an optional sequence of instance-specific +# parameters that will be passed to target-runnerx when invoked on that +# instance. + +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 diff --git a/eo/contrib/irace/expe/beta/irace_files_pO/example.scen b/eo/contrib/irace/expe/beta/irace_files_pO/example.scen new file mode 100755 index 000000000..8b9447333 --- /dev/null +++ b/eo/contrib/irace/expe/beta/irace_files_pO/example.scen @@ -0,0 +1,228 @@ +###################################################### -*- mode: r -*- ##### +## Scenario setup for Iterated Race (irace). +############################################################################ + +## To use the default value of a parameter of iRace, simply do not set +## the parameter (comment it out in this file, and do not give any +## value on the command line). + +## File that contains the description of the parameters of the target +## algorithm. +parameterFile = "./fastga.param" + +## Directory where the programs will be run. +execDir = "." + +## File to save tuning results as an R dataset, either absolute path or +## relative to execDir. +# logFile = "./irace.Rdata" + +## Previously saved log file to recover the execution of irace, either +## absolute path or relative to the current directory. If empty or NULL, +## recovery is not performed. +# recoveryFile = "" + +## Directory where training instances are located; either absolute path or +## relative to current directory. If no trainInstancesFiles is provided, +## all the files in trainInstancesDir will be listed as instances. +trainInstancesDir = "." + +## File that contains a list of training instances and optionally +## additional parameters for them. If trainInstancesDir is provided, irace +## will search for the files in this folder. +trainInstancesFile = "./default.instances" + +## File that contains a table of initial configurations. If empty or NULL, +## all initial configurations are randomly generated. +# configurationsFile = "" + +## File that contains a list of logical expressions that cannot be TRUE +## for any evaluated configuration. If empty or NULL, do not use forbidden +## expressions. +# forbiddenFile = "" + +## Script called for each configuration that executes the target algorithm +## to be tuned. See templates. +targetRunner = "./target-runner" + +## Number of times to retry a call to targetRunner if the call failed. +# targetRunnerRetries = 0 + +## Optional data passed to targetRunner. This is ignored by the default +## targetRunner function, but it may be used by custom targetRunner +## functions to pass persistent data around. +# targetRunnerData = "" + +## Optional R function to provide custom parallelization of targetRunner. +# targetRunnerParallel = "" + +## Optional script or R function that provides a numeric value for each +## configuration. See templates/target-evaluator.tmpl +# targetEvaluator = "" + +## Maximum number of runs (invocations of targetRunner) that will be +## performed. It determines the maximum budget of experiments for the +## tuning. +maxExperiments = 0 #100000 + + +## Maximum total execution time in seconds for the executions of +## targetRunner. targetRunner must return two values: cost and time. +# maxTime = 60 + +## Fraction (smaller than 1) of the budget used to estimate the mean +## computation time of a configuration. Only used when maxTime > 0 +# budgetEstimation = 0.02 + +## Maximum number of decimal places that are significant for numerical +## (real) parameters. +digits = 2 + +## Debug level of the output of irace. Set this to 0 to silence all debug +## messages. Higher values provide more verbose debug messages. +# debugLevel = 0 + +## Number of iterations. +# nbIterations = 0 + +## Number of runs of the target algorithm per iteration. +# nbExperimentsPerIteration = 0 + +## Randomly sample the training instances or use them in the order given. +# sampleInstances = 1 + +## Statistical test used for elimination. Default test is always F-test +## unless capping is enabled, in which case the default test is t-test. +## Valid values are: F-test (Friedman test), t-test (pairwise t-tests with +## no correction), t-test-bonferroni (t-test with Bonferroni's correction +## for multiple comparisons), t-test-holm (t-test with Holm's correction +## for multiple comparisons). +# testType = "F-test" + +## Number of instances evaluated before the first elimination test. It +## must be a multiple of eachTest. +# firstTest = 5 + +## Number of instances evaluated between elimination tests. +# eachTest = 1 + +## Minimum number of configurations needed to continue the execution of +## each race (iteration). +# minNbSurvival = 0 + +## Number of configurations to be sampled and evaluated at each iteration. +# nbConfigurations = 0 + +## Parameter used to define the number of configurations sampled and +## evaluated at each iteration. +# mu = 5 + +## Confidence level for the elimination test. +# confidence = 0.95 + +## If the target algorithm is deterministic, configurations will be +## evaluated only once per instance. +# deterministic = 0 + +## Seed of the random number generator (by default, generate a random +## seed). +# seed = NA + +## Number of calls to targetRunner to execute in parallel. Values 0 or 1 +## mean no parallelization. +# parallel = 0 + +## Enable/disable load-balancing when executing experiments in parallel. +## Load-balancing makes better use of computing resources, but increases +## communication overhead. If this overhead is large, disabling +## load-balancing may be faster. +# loadBalancing = 1 + +## Enable/disable MPI. Use Rmpi to execute targetRunner in parallel +## (parameter parallel is the number of slaves). +# mpi = 0 + +## Specify how irace waits for jobs to finish when targetRunner submits +## jobs to a batch cluster: sge, pbs, torque or slurm. targetRunner must +## submit jobs to the cluster using, for example, qsub. +# batchmode = 0 + +## Enable/disable the soft restart strategy that avoids premature +## convergence of the probabilistic model. +# softRestart = 1 + +## Soft restart threshold value for numerical parameters. If NA, NULL or +## "", it is computed as 10^-digits. +# softRestartThreshold = "" + +## Directory where testing instances are located, either absolute or +## relative to current directory. +# testInstancesDir = "" + +## File containing a list of test instances and optionally additional +## parameters for them. +# testInstancesFile = "" + +## Number of elite configurations returned by irace that will be tested if +## test instances are provided. +# testNbElites = 1 + +## Enable/disable testing the elite configurations found at each +## iteration. +# testIterationElites = 0 + +## Enable/disable elitist irace. +# elitist = 1 + +## Number of instances added to the execution list before previous +## instances in elitist irace. +# elitistNewInstances = 1 + +## In elitist irace, maximum number per race of elimination tests that do +## not eliminate a configuration. Use 0 for no limit. +# elitistLimit = 2 + +## User-defined R function that takes a configuration generated by irace +## and repairs it. +# repairConfiguration = "" + +## Enable the use of adaptive capping, a technique designed for minimizing +## the computation time of configurations. This is only available when +## elitist is active. +# capping = 0 + +## Measure used to obtain the execution bound from the performance of the +## elite configurations: median, mean, worst, best. +# cappingType = "median" + +## Method to calculate the mean performance of elite configurations: +## candidate or instance. +# boundType = "candidate" + +## Maximum execution bound for targetRunner. It must be specified when +## capping is enabled. +# boundMax = 0 + +## Precision used for calculating the execution time. It must be specified +## when capping is enabled. +# boundDigits = 0 + +## Penalization constant for timed out executions (executions that reach +## boundMax execution time). +# boundPar = 1 + +## Replace the configuration cost of bounded executions with boundMax. +# boundAsTimeout = 1 + +## Percentage of the configuration budget used to perform a postselection +## race of the best configurations of each iteration after the execution +## of irace. +# postselection = 0 + +## Enable/disable AClib mode. This option enables compatibility with +## GenericWrapper4AC as targetRunner script. +# aclib = 0 + +## END of scenario file +############################################################################ + diff --git a/eo/contrib/irace/expe/beta/irace_files_pO/fastga.param b/eo/contrib/irace/expe/beta/irace_files_pO/fastga.param new file mode 100755 index 000000000..9f8c088f4 --- /dev/null +++ b/eo/contrib/irace/expe/beta/irace_files_pO/fastga.param @@ -0,0 +1,10 @@ +# name switch type range +# continuator "--continuator=" c (0) +crossoverrate "--crossover-rate=" r (0,1) +crossselector "--cross-selector=" c (0,1,2,3,4,5,6) +# aftercrossselector "--aftercross-selector=" c (0) +crossover "--crossover=" c (0,1,2,3,4,5,6,7,8,9) +mutationrate "--mutation-rate=" r (0,1) +mutselector "--mut-selector=" c (0,1,2,3,4,5,6) +mutation "--mutation=" c (0,1,2,3,4,5,6,7,8,9,10) +replacement "--replacement=" c (0,1,2,3,4,5,6,7,8,9,10) diff --git a/eo/contrib/irace/expe/beta/irace_files_pO/target-runner b/eo/contrib/irace/expe/beta/irace_files_pO/target-runner new file mode 100755 index 000000000..941322610 --- /dev/null +++ b/eo/contrib/irace/expe/beta/irace_files_pO/target-runner @@ -0,0 +1,88 @@ +#!/bin/bash +############################################################################### +# This script is the command that is executed every run. +# Check the examples in examples/ +# +# This script is run in the execution directory (execDir, --exec-dir). +# +# PARAMETERS: +# $1 is the candidate configuration number +# $2 is the instance ID +# $3 is the seed +# $4 is the instance name +# The rest ($* after `shift 4') are parameters to the run +# +# RETURN VALUE: +# This script should print one numerical value: the cost that must be minimized. +# Exit with 0 if no error, with 1 in case of error +############################################################################### +error() { + echo "`TZ=UTC date`: $0: error: $@" + exit 1 +} + + +EXE="./fastga" +LOG_DIR="irace_logs" + +FIXED_PARAMS="--problem=0" +MAX_EVALS=100 +# +CONFIG_ID=$1 +INSTANCE_ID=$2 +SEED=$3 +INSTANCE=$(echo $4 | sed 's/\//\n/g'|tail -n 1) +CROSSOVER_RATE=$5 +CROSSOVER_SELECTOR=$6 +CROSSOVER=$7 +MUTATION_RATE=$8 +MUT_SELECTOR=$9 +MUTATION=${10} +REPLACEMENT=${11} +shift 11 || error "Not enough parameters" + +INSTANCE_PARAMS=$* + +# STDOUT=${LOG_DIR}/c${CONFIG_ID}_i${INSTANCE_ID}_s${SEED}.stdout +# STDERR=${LOG_DIR}/c${CONFIG_ID}_i${INSTANCE_ID}_s${SEED}.stderr +STDOUT="/dev/null" +STDERR="/dev/null" + +if [ ! -x "${EXE}" ]; then + error "${EXE}: not found or not executable (pwd: $(pwd))" +fi + +# If the program just prints a number, we can use 'exec' to avoid +# creating another process, but there can be no other commands after exec. +#exec $EXE ${FIXED_PARAMS} -i $INSTANCE ${INSTANCE_PARAMS} +# exit 1 +# +# Otherwise, save the output to a file, and parse the result from it. +# (If you wish to ignore segmentation faults you can use '{}' around +# the command.) +cmd="$EXE ${FIXED_PARAMS} --instance=${INSTANCE} --seed=${SEED} ${CROSSOVER_RATE} ${CROSSOVER_SELECTOR} ${CROSSOVER} ${MUTATION_RATE} ${MUT_SELECTOR} ${MUTATION} ${REPLACEMENT}" +# NOTE: irace seems to capture both stderr and stdout, so you should not output to stderr +echo ${cmd} > ${STDERR} +$cmd 2> ${STDERR} | tee ${STDOUT} + +# The following code is useless if the binary only output a single number on stdout. + +# This may be used to introduce a delay if there are filesystem +# issues. +# SLEEPTIME=1 +# while [ ! -s "${STDOUT}" ]; do +# sleep $SLEEPTIME +# let "SLEEPTIME += 1" +# done + +# This is an example of reading a number from the output. +# It assumes that the objective value is the first number in +# the first column of the last line of the output. +# if [ -s "${STDOUT}" ]; then +# COST=$(tail -n 1 ${STDOUT} | grep -e '^[[:space:]]*[+-]\?[0-9]' | cut -f1) +# echo "$COST" +# rm -f "${STDOUT}" "${STDERR}" +# exit 0 +# else +# error "${STDOUT}: No such file or directory" +# fi diff --git a/eo/contrib/irace/expe/beta/mwtestU.py b/eo/contrib/irace/expe/beta/mwtestU.py new file mode 100755 index 000000000..00b06ead6 --- /dev/null +++ b/eo/contrib/irace/expe/beta/mwtestU.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +import sys +import os +import numpy as np +import matplotlib.pyplot as plt +from scipy.stats import mannwhitneyu + +##cmd eg : +# python3 hist_by_2_4_5.py ./fastga_results_all/ ./hist_and_csv/ 100000 1000 + +#get the Mann Whitney test U results between the plan F and plan R +# (change ligne 23 and 44 for other plan, and the maxExp, maxEv for other budget) + +path=sys.argv[1] +figpath=sys.argv[2] #directory to store the data +maxExp=sys.argv[3] +maxEv=sys.argv[4] + +hist_pb=[[] for i in range(19)] +name=[] +randind=-1 +for fastga in os.listdir(path): #ddir : directory of fastga_plan + if(fastga in {"fastga_results_planF"}): + for plan in os.listdir(os.path.join(path,fastga)): + print("maxExp="+str(maxExp)+"_maxEv="+str(maxEv)+"_" in plan,plan,"maxExp="+str(maxExp)+"_maxEv="+str(maxEv)) + #print("maxExp="+str(maxExp)+"_maxEv="+str(maxEv) in plan,plan,"maxExp="+str(maxExp)+"_maxEv="+str(maxEv)) + if("maxExp="+str(maxExp)+"_maxEv="+str(maxEv)+"_" in plan): + name.append("_".join(plan.split("_")[:3])) + for fastgadir in os.listdir(os.path.join(path,fastga,plan,"raw","data")): #fastgadir : directory of 50 runs of a configuration + pb=int(fastgadir.split("_")[0].split("=")[1]) + average_pb=[] + for fname in os.listdir(os.path.join(path,fastga,plan,"raw","data",fastgadir)): + with open(os.path.join(path,fastga,plan,"raw","data",fastgadir,fname)) as fd: + auc = float(fd.readlines()[0]) + average_pb.append(auc) + if(hist_pb[pb]==[]): #first algo + hist_pb[pb].append(average_pb) + elif(len(hist_pb[pb])!=len(name)): + hist_pb[pb].append(average_pb) + else: + hist_pb[pb][len(name)-1]+=average_pb #another algo for the same plan + + + if("random" in fastga): + for randir in os.listdir(os.path.join(path,fastga)): + #eg path: maxEv=100_nbAlgo=15_2021-08-20T1511+0200_results_randoms + if(("maxEv="+str(maxEv)+"_") in randir): + print("maxEv="+str(maxEv) in randir,randir) + name.append(randir.split("_")[0]+"_random") + randind=len(name)-1 + print(randind,name) + for ddir in os.listdir(os.path.join(path,fastga,randir)): #ddir : directory of one run_elites_all or more + if("crossover" in ddir): + for fastgadir in os.listdir(os.path.join(path,fastga,randir,ddir,"data")): #fastgadir : directory of 50 runs of a configuration + average_pb=[] + pb=int(fastgadir.split("_")[0].split("=")[1]) + for fname in os.listdir(os.path.join(path,fastga,randir,ddir,"data",fastgadir)): + with open(os.path.join(path,fastga,randir,ddir,"data",fastgadir,fname)) as fd: + auc = float(fd.readlines()[0]) + average_pb.append(auc) + #print(len(hist_pb[pb]),len(name), pb) + if(hist_pb[pb]==[]): #first algo + #print("entrer random vide") + hist_pb[pb].append(average_pb) + elif(len(hist_pb[pb])!=len(name)): + #print("entrer random !=") + hist_pb[pb].append(average_pb) + else: + hist_pb[pb][len(name)-1]+=average_pb #another algo for the same plan + + +figdir=os.path.join(figpath,"mwtestU_FR") +try: + os.makedirs(figdir) +except FileExistsError: + pass +#colors=['yellow', 'green',"blue","pink","purple","orange","magenta","gray","darkred","cyan","brown","olivedrab","thistle","stateblue"] +print(name) + +filename="mwtestU_maxExp={}_maxEv={}_FR.csv".format(maxExp,maxEv) +with open(os.path.join(figdir,filename),'w+') as csvfile: + csvfile.write(" ,"+",".join(map(str,range(0,19)))+"\n") +meanvalue=[] +pvalue=[] +meanR=[] +meanF=[] +mdianR=[] +mdianF=[] +mdianvalue=[] +iqrR=[] +iqrF=[] +stdR=[] +stdF=[] +iqrvalue=[] +pstd=[] + +for pb in range(19): + #hR,lR,_=plt.hist(hist_pb[pb][randind],bins=10,range=(-1,0),align="mid",label=name) #no label color=colors[:len(name)] + #hF,lF,_=plt.hist(hist_pb[pb][np.abs(1-randind)],bins=10,range=(-1,0),align="mid",label=name) #no label color=colors[:len(name)] + _,pv=mannwhitneyu(hist_pb[pb][np.abs(1-randind)],hist_pb[pb][randind]) + print(_,pv) + #meanvalue.append(np.mean(np.array(hF)*np.array(lF[:len(lF)-1]))-np.mean(np.array(hR)*np.array(lR[:len(lR)-1]))) + pstd.append(np.std(hist_pb[pb][np.abs(1-randind)])-np.std(hist_pb[pb][randind])) + stdF.append(np.std(hist_pb[pb][np.abs(1-randind)])) + stdR.append(np.std(hist_pb[pb][randind])) + meanF.append(np.mean(hist_pb[pb][np.abs(1-randind)])) + meanR.append(np.mean(hist_pb[pb][randind])) + mdianF.append(np.median(hist_pb[pb][np.abs(1-randind)])) + mdianR.append(np.median(hist_pb[pb][randind])) + mdianvalue.append(np.median(hist_pb[pb][np.abs(1-randind)])-np.median(hist_pb[pb][randind])) + meanvalue.append(np.mean(hist_pb[pb][np.abs(1-randind)])-np.mean(hist_pb[pb][randind])) + pvalue.append(pv) + Q1 = np.percentile(hist_pb[pb][np.abs(1-randind)], 25, interpolation = 'midpoint') + # Third quartile (Q3) + Q3 = np.percentile(hist_pb[pb][np.abs(1-randind)], 75, interpolation = 'midpoint') + # Interquaritle range (IQR) + iqrF.append( Q3 - Q1) + Q1 = np.percentile(hist_pb[pb][randind], 25, interpolation = 'midpoint') + # Third quartile (Q3) + Q3 = np.percentile(hist_pb[pb][randind], 75, interpolation = 'midpoint') + # Interquaritle range (IQR) + iqrR.append( Q3 - Q1) + print(_,pv) +iqrvalue=np.array(iqrF)-np.array(iqrR) +with open(os.path.join(figdir,filename),'a') as csvfile: + csvfile.write("mF-mR,"+",".join(map(str,meanvalue))+"\n") + csvfile.write("p_value,"+",".join(map(str,pvalue))+"\n") + csvfile.write("mF,"+",".join(map(str,meanF))+"\n") + csvfile.write("mR,"+",".join(map(str,meanR))+"\n") + csvfile.write("medianF-medianR,"+",".join(map(str,mdianvalue))+"\n") + csvfile.write("medianF,"+",".join(map(str,mdianF))+"\n") + csvfile.write("medianR,"+",".join(map(str,mdianR))+"\n") + csvfile.write("stdF-stdR,"+",".join(map(str,mdianvalue))+"\n") + csvfile.write("stdF,"+",".join(map(str,stdF))+"\n") + csvfile.write("stdR,"+",".join(map(str,stdR))+"\n") + csvfile.write("iqrF,"+",".join(map(str,iqrF))+"\n") + csvfile.write("iqrR,"+",".join(map(str,iqrR))+"\n") + csvfile.write("iqrF-iqrR,"+",".join(map(str,iqrvalue))+"\n") + + \ No newline at end of file diff --git a/eo/contrib/irace/expe/beta/parseO_irace_bests.py b/eo/contrib/irace/expe/beta/parseO_irace_bests.py new file mode 100755 index 000000000..c1811acdf --- /dev/null +++ b/eo/contrib/irace/expe/beta/parseO_irace_bests.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 +#parse data1 +import os +import re +import sys +#print("pb,ecdf,id,crossover-rate,cross-selector,crossover,mutation-rate,mut-selector,mutation,replacement") #plan1 +print("pb,ecdf,id,crossover-rate,cross-selector,crossover,mutation-rate,mut-selector,mutation,replacement") + + +#give the path of one experiment +argv=sys.argv[1] +for datadir in os.listdir(argv): + #if(os.path.isdir(os.path.join(argv,datadir))): check if argv/datadir is a directory + if(datadir.find("results_irace")>=0): #check if the directory is one JOB + for pb_dir in os.listdir(os.path.join(argv,datadir)): + if "results_problem" in pb_dir: + pb_id=pb_dir.replace("results_problem_","") + with open(os.path.join("./",argv,datadir,pb_dir,"irace.log")) as fd: + data = fd.readlines() + + # Find the last best configuration + bests = [line.strip() for line in data if "Best-so-far" in line] + #print(datadir,bests) + best = bests[-1].split() + best_id, best_perf = best[2], best[5] + # print(best_id,best_perf) + + # Filter the config detail + configs = [line.strip() for line in data if "--crossover-rate=" in line and best_id in line] + # print(configs) + + # Format as CSV + algo = re.sub("\-\-\S*=", ",", configs[0]) + csv_line = pb_id + "," + best_perf + "," + algo + print(csv_line.replace(" ","")) diff --git a/eo/contrib/irace/expe/beta/parse_auc_average.py b/eo/contrib/irace/expe/beta/parse_auc_average.py new file mode 100755 index 000000000..b2d20dbd3 --- /dev/null +++ b/eo/contrib/irace/expe/beta/parse_auc_average.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +import sys +import os +import numpy as np +import matplotlib.pyplot as plt + +#get the auc average values of one experiment +figdir=sys.argv[1] # directory of a result of one experiment +#eg : ./fastga_results_all/fastga_results_planO/planO_maxExp\=100000_maxEv\=5n_2021-08-13T19\:04+02\:00_results_elites_all/raw + +if("fastga_results_plan" in figdir): + print("FID,",",".join(map(str,range(1,16)))) + aucs=[[] for i in range(19)] + for fastgadir in os.listdir(os.path.join(figdir,"raw/data")): #fastgadir : directory of 50 runs of an elite configuration + #cum=np.cumsum([0.1]*10) + average=[] + for fname in os.listdir(os.path.join(figdir,"raw/data",fastgadir)): + with open(os.path.join(figdir,"raw/data",fastgadir,fname)) as fd: + auc = float(fd.readlines()[0]) * -1 + average.append(auc) + aucs[int(fastgadir.split("_")[0].split("=")[1])].append(average) + #print(np.shape(aucs)) + + + + for i in range(19): + print(str(i)+",",",".join(map(str,np.mean(aucs[i],1)))) + + + + + + + diff --git a/eo/contrib/irace/expe/beta/planA/riaA.sh b/eo/contrib/irace/expe/beta/planA/riaA.sh index c28359626..0653a3358 100755 --- a/eo/contrib/irace/expe/beta/planA/riaA.sh +++ b/eo/contrib/irace/expe/beta/planA/riaA.sh @@ -14,7 +14,7 @@ outdir="${dir}/dataA_maxExp=${mexp}_maxEv=${mevals}_$(date --iso-8601=seconds)" mkdir -p ${outdir} for r in $(seq 2); do echo "Run $r/15"; - cmd="qsub -N iraceA_maxEv_${r} -q beta -l select=1:ncpus=1 -l walltime=00:30:00 -- ${scratchpath}/planA/r_iA.sh ${outdir} ${r} ${mexp} ${mevals} ${myhome}" + cmd="qsub -N iraceA_maxEv_${r} -q beta -l select=1:ncpus=1 -l walltime=00:25:00 -- ${scratchpath}/planA/r_iA.sh ${outdir} ${r} ${mexp} ${mevals} ${myhome}" #cmd="bash ./r_iA_buckets.sh ${outdir} ${r} ${mexp} ${mevals}" echo $cmd time -p $cmd diff --git a/eo/contrib/irace/expe/beta/planF/riaF.sh b/eo/contrib/irace/expe/beta/planF/riaF.sh index 5791a1a1d..e400f152a 100755 --- a/eo/contrib/irace/expe/beta/planF/riaF.sh +++ b/eo/contrib/irace/expe/beta/planF/riaF.sh @@ -15,7 +15,7 @@ for r in $(seq 2); do echo "Run $r/15"; #date -Iseconds #cmd="qsub -N irace_${runs}_${buckets}" -q beta -l select=1:ncpus=1 -l walltime=00:04:00 --${HOME}/run_irace.sh ${dir} - cmd="qsub -N iraceF_${mevals}_run=${r} -q beta -l select=1:ncpus=1 -l walltime=00:30:00 -- ${scratchpath}/planF/r_iF.sh ${dir} ${r} ${mexp} ${mevals} ${myhome}" + cmd="qsub -N iraceF_${mevals}_run=${r} -q beta -l select=1:ncpus=1 -l walltime=00:25:00 -- ${scratchpath}/planF/r_iF.sh ${dir} ${r} ${mexp} ${mevals} ${myhome}" #time -p bash ${HOME}/plan2/run_irace2.sh ${dir} ${r} &> ${dir}/erreur_${r}.txt #bash ${HOME}/test/r_i.sh echo $cmd diff --git a/eo/contrib/irace/expe/beta/planO/r_iO.sh b/eo/contrib/irace/expe/beta/planO/r_iO.sh new file mode 100755 index 000000000..b69a18941 --- /dev/null +++ b/eo/contrib/irace/expe/beta/planO/r_iO.sh @@ -0,0 +1,43 @@ +#!/bin/bash +#run once each problem + +. /etc/profile.d/modules.sh +export MODULEPATH=${MODULEPATH}${MODULEPATH:+:}/opt/dev/Modules/Anaconda:/opt/dev/Modules/Compilers:/opt/dev/Modules/Frameworks:/opt/dev/Modules/Libraries:/opt/dev/Modules/Tools:/opt/dev/Modules/IDEs:/opt/dev/Modules/MPI +module load LLVM/clang-llvm-10.0 +module load R + +dir=$1 +run=$2 +budget_irace=$3 +buckets=$4 +myhome=$5 + +cp -r ${myhome}/R . +cp -r ${myhome}/irace_files_pO . + +outdir="${run}_$(date --iso-8601=seconds)_results_irace" +echo "start a job $(date -Iseconds)" + +for pb in $(seq 0 18) ; do + echo "Problem ${pb}... " + res="results_problem_${pb}" + mkdir -p ${dir}/${outdir}/${res} + # Fore some reason, irace absolutely need those files... + cp ${myhome}/code/paradiseo/eo/contrib/irace/release/fastga ${dir}/${outdir}/${res} + + cat ./irace_files_pO/example.scen | sed "s%\".%\"${dir}/${outdir}/${res}%g" | sed "s/maxExperiments = 0/maxExperiments=${budget_irace}/" > ${dir}/${outdir}/${res}/example.scen + cp ./irace_files_pO/default.instances ${dir}/${outdir}/${res} + cp ./irace_files_pO/fastga.param ${dir}/${outdir}/${res} + cat ./irace_files_pO/target-runner | sed "s/--problem=0/--problem=${pb}/" > ${dir}/${outdir}/${res}/target-runner + chmod u+x ${dir}/${outdir}/${res}/target-runner + + echo "---start $(date)" + time -p ./R/x86_64-pc-linux-gnu-library/3.6/irace/bin/irace --scenario ${dir}/${outdir}/${res}/example.scen > ${dir}/${outdir}/${res}/irace.log + echo "---end $(date)" + + echo "done run : ${run} pb : ${pb}" + date -Iseconds +done + +echo "end a job $(date -Iseconds)---------------------" + diff --git a/eo/contrib/irace/expe/beta/planO/riaO.sh b/eo/contrib/irace/expe/beta/planO/riaO.sh new file mode 100755 index 000000000..76e7d822e --- /dev/null +++ b/eo/contrib/irace/expe/beta/planO/riaO.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +date -Iseconds +echo "STARTS" +myhome=$1 +scratchpath=$2 +mexp=$3 +mevals=$4 +name="dataO_maxExp=${mexp}_maxEv=${mevals}_$(date --iso-8601=seconds)" +dir=${scratchpath}/dataFAR/dataO/${name} +mkdir -p ${dir} + +for r in $(seq 2); do + echo "Run $r/15"; + cmd="qsub -N iraceO_maxExp=${exp}_maxEv=${evals}_${r} -q beta -l select=1:ncpus=1 -l walltime=00:10:00 -- ${scratchpath}/planO/r_iO.sh ${dir} ${r} ${mexp} ${mevals} ${myhome}" + echo $cmd + $cmd + #time (p=2; while [[ ${p} > 1 ]] ; do p=$(qqueue -u $USER | wc -l); echo "$r: $p"; sleep 300; done) +done + +#echo "DONE" +#date -Iseconds + diff --git a/eo/contrib/irace/expe/beta/readme.txt b/eo/contrib/irace/expe/beta/readme.txt index 6d53b7d90..85e30d5af 100755 --- a/eo/contrib/irace/expe/beta/readme.txt +++ b/eo/contrib/irace/expe/beta/readme.txt @@ -1,13 +1,21 @@ +############################################ +#Explanation of the experimental plans and the validation runs + +############################################ 1. INTRODUCTION The aim of all the scripts is to make the experimental plans for Algorithm Configuration for Genetic Algorithms by using a fully modular benchmarking pipeline design of this article https://arxiv.org/abs/2102.06435 . +You can upload the data in : https://zenodo.org/record/5479538#.YTaT0Bnis2w + Plan A is an experimental plan for finding an efficient algorithm for all the functions that we consider. Plan F is an experimental plan for finding an efficient algorithm for each function that we consider. Plan R is an experimental plan for getting random algorithms. +Plan O is the reproduction of the experimental plan of the article. + 2. VOCABULARIES * maxExp : means maximum Experiments, the budget for irace @@ -20,18 +28,18 @@ dataA is a directory which we store all the runs of an experiment plan for sever eg : /dataA/planA_maxExp=*_maxEv=**_$(data), * is a value of maxExp, and ** is a value of maxEv -*fastga_results_all : directory which we store all the data for validation runs. It constains only 3 subdirectories (fastga_results_planF, fastga_results_planA, fastga_results_random), created by running run_exp.sh +*fastga_results_all : directory which we store all the data for validation runs. It constains only 3 subdirectories (fastga_results_planF, fastga_results_planA, fastga_results_planO, fastga_results_random), created by running run_exp.sh -* fastga_results_planF, fastga_results_planA, fastga_results_random +* fastga_results_planF, fastga_results_planA, fastga_results_random, fastga_results_planO Each directory store the data for validation runs of each experiment plan. fastga_random directory are created by running run_exp.sh -fastga_results_planF and fastag_results_planA are created only after you have data in the dataA or dataF directories. +fastga_results_planF, fastag_results_planO and fastag_results_planA are created only after you have data in the dataA or dataF or dataO directories. -* planA_*, planF_* -If the planA_* or planF_* are in the dataFAR directory, the directory contains the data of experimental plan. This means that each plan contains the result of 15 runs of irace stored in irace.log file, and the data are provided by run_exp.sh. +* planA_*, planF_*, planO_* +If the planA_* or planF_* or planO_* are in the dataFAR directory, the directory contains the data of experimental plan. This means that each plan contains the result of 15 runs of irace stored in irace.log file, and the data are provided by run_exp.sh. -If the planA_* or planF_* directories are in the fastga_results_planA or fastga_results_planF, these directories contain the data of 50 validation runs by running all the best algorithms of each plan stores in dataFAR. The data are provided by running run_res.sh +If the planA_* or planF_* or planO_* directories are in the fastga_results_planA or fastga_results_planF, these directories contain the data of 50 validation runs by running all the best algorithms of each plan stores in dataFAR. The data are provided by running run_res.sh *fastag_all_results : contains the directories of the validation run data. @@ -57,9 +65,9 @@ The directory which you load all the scripts contains : * python files : -parseA_irace_bests.py : for parsing the irace.log file of each data provided by running irace. By giving a bounch of directories of one experiment - -parseF_irace_bests.py + -parseF_irace_bests.py : for the plan plan F and plan O(in the plan O csv, there are label offspringsize and popsize, but there are not values) - * 4 directories : + * 6 directories : -irace_files_pA : -default.instances -example.scen @@ -74,6 +82,12 @@ The directory which you load all the scripts contains : -forbidden.txt -target-runner + -irace_files_pO : + -default.instances : + -example.scen + -fastga.param + -target-runner + -planA : -riaA.sh : for running 15 times r_iA.sh file by submitting to the mesu cluster -r_iA.sh : for running irace for all the problems @@ -81,11 +95,14 @@ The directory which you load all the scripts contains : -planF : -riaF.sh : for running 15 times r_iF.sh file by submitting to the mesu cluster -r_iF.sh : for running irace for each problem we considered + -planO : + -riaO.sh : for running 15 times r_iO.sh file by submitting to the mesu cluster + -r_iO.sh : for running irace for each problem we considered The directories planA, planF contain the scripts to run one experiment of Plan A and Plan F. -The directories irace_files_pA and irace_files_pA contain the scripts needing for calling irace for one experiment of Plan A and Plan F. [Look at the irace package : User Guide for more information] +The directories irace_files_pA, irace_files_pO and irace_files_pF contain the scripts needing for calling irace for one experiment of Plan A, Plan O and Plan F. [Look at the irace package : User Guide for more information] 5. CONCLUSION @@ -97,5 +114,54 @@ Warning : run_exp.sh may take few days or few weeks depending on the Budget you +############################################ +#Scripts for getting histograms and csv files of validation runs results. + +############################################ + +get histograms or csv files for random data : +-hist_join_random.py : get one histogram for a plan by budget +-dist_op_random.py : get csv files of the distribution of operators by problems + +get histograms or csv files for plan O,F,A : +-hist_join.py +-dist_op_all.py +-parse_auc_average # get the mean auc value of each problem and each irace run + +get histograms for plan F, A , R, O +-hist_by_pb_budget_plan.py : get histograms by problem +-hist_by_FARO_pb.py : +-hist_by_FARO.py +-best_out_of_elites.py : get the best algorithm found among 15 runs of irace, for a plan +files to call all these files : +-csv_all.sh : get all the csv files (average of auc, best out ..), call best_out_of_elites.py, parse_auc_average.py, dist_op_*.py +-hist_all.sh : get all the histograms, call each hist_*.py file + +file for other goal : +-mwtestU.py ; csv file for selected problems which irace algorithms gave better performances than random algorithms +-rep_std_mean_selected.py : to get the std, mean and the distribution of operators of the selected problems + + + + + +############################################ +#Summary + +############################################ + +Get the experiment data : +run : bash run_exp.sh + +-----------Only after you have the experiment data: +Get the validation run data : +run : bash run_res.sh + +Get histograms : +run : bash hist_all.sh + +Get csv files of validation run data : +run : bash csv_all.sh + diff --git a/eo/contrib/irace/expe/beta/rep_std_mean_selected.py b/eo/contrib/irace/expe/beta/rep_std_mean_selected.py new file mode 100755 index 000000000..1add3e9ab --- /dev/null +++ b/eo/contrib/irace/expe/beta/rep_std_mean_selected.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +import sys +import os +import numpy as np +import matplotlib.pyplot as plt +import pandas + +#eg : python3 ./rep_std_mean_selected.py ./hist_and_csv/distribution_op_fastga_results_planF +#get the std of the selected problem +path=sys.argv[1] # directory of each distribution by pb +lpb={13,14,15,16,18} #set of pb selected +#column : [operator : nbpossibilities] +distdir=path+"/rep_std_mean" +try: + os.makedirs(distdir) +except FileExistsError: + pass + +res=[] +for csvfile in os.listdir(os.path.join(path)): + if(int(csvfile.split("_")[1].split("=")[1]) in lpb): + print(csvfile) + res.append(pandas.read_csv(os.path.join(path,csvfile))) + +#assert(len(res[0])==len(res[1]) , "each csv file does not have the same line " #check if the number of param is eq in each csv file + + +name ="std_rep_pb={}".format(str(lpb))+"".join(map(str,path.split("/")[-3].split("_")[:3]))+".csv" +with open(os.path.join(distdir,name),'w+') as fd: + fd.write("Op index, "+",".join(map(str,range(0,11)))+"\n") +globalname="rep_all_pb={}".format(str(lpb))+"".join(map(str,path.split("/")[-3].split("_")[:3]))+".csv" +with open(os.path.join(distdir,globalname),'w+') as fd: + fd.write("Op index, "+",".join(map(str,range(0,11)))+"\n") +meanname="mean_rep_pb={}".format(str(lpb))+"".join(map(str,path.split("/")[-3].split("_")[:3]))+".csv" +with open(os.path.join(distdir,meanname),'w+') as fd: + fd.write("Op index, "+",".join(map(str,range(0,11)))+"\n") +#print(res) +limparam=[10,7,10,10,7,11,11,10,10] +for i in range(1,10): #9 nb parameters + npval=np.zeros((len(res),limparam[i-1]),dtype=int) + for pb in range(len(res)): + print(i,np.array(np.array(res[pb][i-1:i])[0]),np.array(np.array(res[pb][i-1:i])[0][1:limparam[i-1]+1])) + npval[pb,:]=np.array(np.array(res[pb][i-1:i])[0][1:limparam[i-1]+1],dtype=int) + nameparam=np.array(res[pb][i-1:i])[0][0] + line= ",".join(map(str,np.std(npval,0)))+",-"*(11-limparam[i-1]) + print("ligne ",line) + + with open(os.path.join(distdir,name),'a') as fd: + fd.write(nameparam+","+line+"\n") + line= ",".join(map(str,np.sum(npval,0)))+",-"*(11-limparam[i-1]) + with open(os.path.join(distdir,globalname),'a') as fd: + fd.write(nameparam+","+line+"\n") + line= ",".join(map(str,np.mean(npval,0)))+",-"*(11-limparam[i-1]) + with open(os.path.join(distdir,meanname),'a') as fd: + fd.write(nameparam+","+line+"\n") \ No newline at end of file diff --git a/eo/contrib/irace/expe/beta/run_elites_planO.sh b/eo/contrib/irace/expe/beta/run_elites_planO.sh new file mode 100755 index 000000000..8f93293c4 --- /dev/null +++ b/eo/contrib/irace/expe/beta/run_elites_planO.sh @@ -0,0 +1,64 @@ +#!/bin/bash +#instance = seed + +. /etc/profile.d/modules.sh +export MODULEPATH=${MODULEPATH}${MODULEPATH:+:}/opt/dev/Modules/Anaconda:/opt/dev/Modules/Compilers:/opt/dev/Modules/Frameworks:/opt/dev/Modules/Libraries:/opt/dev/Modules/Tools:/opt/dev/Modules/IDEs:/opt/dev/Modules/MPI +module load LLVM/clang-llvm-10.0 + + + + +csv_file=$1 #contains all the configs of all the problems of one experiments +mexp=$2 +mevals=$3 +path=$4 + +echo "-----------------Start $(date -Iseconds) " +# Number of runs (=seeds). +runs=50 + +# You most probably want to run on release builds. +exe="/home/${USER}/fastga" + +outdir="${path}/planO_maxExp=${mexp}_maxEv=${mevals}_$(date --iso-8601=minutes)_results_elites_all" +mkdir -p ${outdir} +mkdir -p ${outdir}/raw +mkdir -p ${outdir}/raw/data +mkdir -p ${outdir}/raw/logs + +n=0 +algoid=0 +for line in $(cat ${csv_file}| sed 1,1d ); do + a=($(echo $line | sed "s/,/ /g")) + algo="--crossover-rate=${a[3]} --cross-selector=${a[4]} --crossover=${a[5]} --mutation-rate=${a[6]} --mut-selector=${a[7]} --mutation=${a[8]} --replacement=${a[9]}" + + #perc=$(echo "scale=3;${n}/(285)*100.0" | bc) + #echo "${perc}% : algo ${algoid}/285" + # echo -n "Runs: " + name_dir="pb=${a[0]}_$(echo "${algo}" | sed 's/--//g' | sed 's/ /_/g')" + mkdir -p ${outdir}/raw/data/${name_dir} + mkdir -p ${outdir}/raw/logs/${name_dir} + for seed in $(seq ${runs}) ; do # Iterates over runs/seeds. + # This is the command to be ran. + #cmd="${exe} --full-log=1 --problem=${pb} --seed=${seed} ${algo}" + cmd="${exe} --problem=${a[0]} --seed=${seed} --instance=${seed} ${algo}" + #echo ${cmd} # Print the command. + # Forge a directory/log file name + # (remove double dashs and replace spaces with underscore). + name_run="pb=${a[0]}_seed=${seed}_$(echo "${algo}" | sed 's/--//g' | sed 's/ /_/g')" + # echo $name_run + # Actually start the command. + ${cmd} > "${outdir}/raw/data/${name_dir}/${name_run}.dat" 2> "${outdir}/raw/logs/${name_dir}/${name_run}.log" + # Check for the most common problem in the log file. + #cat "${outdir}/raw/logs/${name_run}.log" | grep "illogical performance" + done # seed + + n=$(($n+1)) + algoid=$(($algoid+1)) +done + +# Move IOH logs in the results directory. +#mv ./FastGA_* ${outdir} + +echo "Done $(date) -----------------------" +date diff --git a/eo/contrib/irace/expe/beta/run_exp.sh b/eo/contrib/irace/expe/beta/run_exp.sh index c3669a1c7..44630176c 100644 --- a/eo/contrib/irace/expe/beta/run_exp.sh +++ b/eo/contrib/irace/expe/beta/run_exp.sh @@ -1,11 +1,12 @@ #!/bin/bash -lexp=(300 600 1000 10000) -levals=(100 500 1000) +lexp=(300 600) +levals=(100 500) myscratchpath=/scratchbeta/$USER myhome=${HOME} for exp in ${lexp[@]} ; do for evals in ${levals[@]} ; do bash ./planF/riaF.sh ${myhome} ${myscratchpath} ${exp} ${evals} + bash ./planO/riaO.sh ${myhome} ${myscratchpath} ${exp} ${evals} bash ./planA/riaA.sh ${myhome} ${myscratchpath} ${exp} ${evals} done done diff --git a/eo/contrib/irace/expe/beta/run_res.sh b/eo/contrib/irace/expe/beta/run_res.sh index de5b352ad..b3579863a 100644 --- a/eo/contrib/irace/expe/beta/run_res.sh +++ b/eo/contrib/irace/expe/beta/run_res.sh @@ -14,7 +14,7 @@ done #get validation run of each config -dir=/scratchbeta/$USER/csv_FA +dir=/scratchbeta/$USER/csv_FAO listdir=$(echo $(ls ${dir})) echo ${listdir[@]} for csvdir in ${listdir[@]} ; do