paradiseo/eo/contrib/irace/expe/beta/mwtestU.py
2021-09-07 00:27:44 +02:00

140 lines
No EOL
6.8 KiB
Python
Executable file

#!/usr/bin/env python3
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu
##cmd eg :
# python3 hist_by_2_4_5.py ./fastga_results_all/ ./hist_and_csv/ 100000 1000
#get the Mann Whitney test U results between the plan F and plan R
# (change ligne 23 and 44 for other plan, and the maxExp, maxEv for other budget)
path=sys.argv[1]
figpath=sys.argv[2] #directory to store the data
maxExp=sys.argv[3]
maxEv=sys.argv[4]
hist_pb=[[] for i in range(19)]
name=[]
randind=-1
for fastga in os.listdir(path): #ddir : directory of fastga_plan
if(fastga in {"fastga_results_planF"}):
for plan in os.listdir(os.path.join(path,fastga)):
print("maxExp="+str(maxExp)+"_maxEv="+str(maxEv)+"_" in plan,plan,"maxExp="+str(maxExp)+"_maxEv="+str(maxEv))
#print("maxExp="+str(maxExp)+"_maxEv="+str(maxEv) in plan,plan,"maxExp="+str(maxExp)+"_maxEv="+str(maxEv))
if("maxExp="+str(maxExp)+"_maxEv="+str(maxEv)+"_" in plan):
name.append("_".join(plan.split("_")[:3]))
for fastgadir in os.listdir(os.path.join(path,fastga,plan,"raw","data")): #fastgadir : directory of 50 runs of a configuration
pb=int(fastgadir.split("_")[0].split("=")[1])
average_pb=[]
for fname in os.listdir(os.path.join(path,fastga,plan,"raw","data",fastgadir)):
with open(os.path.join(path,fastga,plan,"raw","data",fastgadir,fname)) as fd:
auc = float(fd.readlines()[0])
average_pb.append(auc)
if(hist_pb[pb]==[]): #first algo
hist_pb[pb].append(average_pb)
elif(len(hist_pb[pb])!=len(name)):
hist_pb[pb].append(average_pb)
else:
hist_pb[pb][len(name)-1]+=average_pb #another algo for the same plan
if("random" in fastga):
for randir in os.listdir(os.path.join(path,fastga)):
#eg path: maxEv=100_nbAlgo=15_2021-08-20T1511+0200_results_randoms
if(("maxEv="+str(maxEv)+"_") in randir):
print("maxEv="+str(maxEv) in randir,randir)
name.append(randir.split("_")[0]+"_random")
randind=len(name)-1
print(randind,name)
for ddir in os.listdir(os.path.join(path,fastga,randir)): #ddir : directory of one run_elites_all or more
if("crossover" in ddir):
for fastgadir in os.listdir(os.path.join(path,fastga,randir,ddir,"data")): #fastgadir : directory of 50 runs of a configuration
average_pb=[]
pb=int(fastgadir.split("_")[0].split("=")[1])
for fname in os.listdir(os.path.join(path,fastga,randir,ddir,"data",fastgadir)):
with open(os.path.join(path,fastga,randir,ddir,"data",fastgadir,fname)) as fd:
auc = float(fd.readlines()[0])
average_pb.append(auc)
#print(len(hist_pb[pb]),len(name), pb)
if(hist_pb[pb]==[]): #first algo
#print("entrer random vide")
hist_pb[pb].append(average_pb)
elif(len(hist_pb[pb])!=len(name)):
#print("entrer random !=")
hist_pb[pb].append(average_pb)
else:
hist_pb[pb][len(name)-1]+=average_pb #another algo for the same plan
figdir=os.path.join(figpath,"mwtestU_FR")
try:
os.makedirs(figdir)
except FileExistsError:
pass
#colors=['yellow', 'green',"blue","pink","purple","orange","magenta","gray","darkred","cyan","brown","olivedrab","thistle","stateblue"]
print(name)
filename="mwtestU_maxExp={}_maxEv={}_FR.csv".format(maxExp,maxEv)
with open(os.path.join(figdir,filename),'w+') as csvfile:
csvfile.write(" ,"+",".join(map(str,range(0,19)))+"\n")
meanvalue=[]
pvalue=[]
meanR=[]
meanF=[]
mdianR=[]
mdianF=[]
mdianvalue=[]
iqrR=[]
iqrF=[]
stdR=[]
stdF=[]
iqrvalue=[]
pstd=[]
for pb in range(19):
#hR,lR,_=plt.hist(hist_pb[pb][randind],bins=10,range=(-1,0),align="mid",label=name) #no label color=colors[:len(name)]
#hF,lF,_=plt.hist(hist_pb[pb][np.abs(1-randind)],bins=10,range=(-1,0),align="mid",label=name) #no label color=colors[:len(name)]
_,pv=mannwhitneyu(hist_pb[pb][np.abs(1-randind)],hist_pb[pb][randind])
print(_,pv)
#meanvalue.append(np.mean(np.array(hF)*np.array(lF[:len(lF)-1]))-np.mean(np.array(hR)*np.array(lR[:len(lR)-1])))
pstd.append(np.std(hist_pb[pb][np.abs(1-randind)])-np.std(hist_pb[pb][randind]))
stdF.append(np.std(hist_pb[pb][np.abs(1-randind)]))
stdR.append(np.std(hist_pb[pb][randind]))
meanF.append(np.mean(hist_pb[pb][np.abs(1-randind)]))
meanR.append(np.mean(hist_pb[pb][randind]))
mdianF.append(np.median(hist_pb[pb][np.abs(1-randind)]))
mdianR.append(np.median(hist_pb[pb][randind]))
mdianvalue.append(np.median(hist_pb[pb][np.abs(1-randind)])-np.median(hist_pb[pb][randind]))
meanvalue.append(np.mean(hist_pb[pb][np.abs(1-randind)])-np.mean(hist_pb[pb][randind]))
pvalue.append(pv)
Q1 = np.percentile(hist_pb[pb][np.abs(1-randind)], 25, interpolation = 'midpoint')
# Third quartile (Q3)
Q3 = np.percentile(hist_pb[pb][np.abs(1-randind)], 75, interpolation = 'midpoint')
# Interquaritle range (IQR)
iqrF.append( Q3 - Q1)
Q1 = np.percentile(hist_pb[pb][randind], 25, interpolation = 'midpoint')
# Third quartile (Q3)
Q3 = np.percentile(hist_pb[pb][randind], 75, interpolation = 'midpoint')
# Interquaritle range (IQR)
iqrR.append( Q3 - Q1)
print(_,pv)
iqrvalue=np.array(iqrF)-np.array(iqrR)
with open(os.path.join(figdir,filename),'a') as csvfile:
csvfile.write("mF-mR,"+",".join(map(str,meanvalue))+"\n")
csvfile.write("p_value,"+",".join(map(str,pvalue))+"\n")
csvfile.write("mF,"+",".join(map(str,meanF))+"\n")
csvfile.write("mR,"+",".join(map(str,meanR))+"\n")
csvfile.write("medianF-medianR,"+",".join(map(str,mdianvalue))+"\n")
csvfile.write("medianF,"+",".join(map(str,mdianF))+"\n")
csvfile.write("medianR,"+",".join(map(str,mdianR))+"\n")
csvfile.write("stdF-stdR,"+",".join(map(str,mdianvalue))+"\n")
csvfile.write("stdF,"+",".join(map(str,stdF))+"\n")
csvfile.write("stdR,"+",".join(map(str,stdR))+"\n")
csvfile.write("iqrF,"+",".join(map(str,iqrF))+"\n")
csvfile.write("iqrR,"+",".join(map(str,iqrR))+"\n")
csvfile.write("iqrF-iqrR,"+",".join(map(str,iqrvalue))+"\n")