feat: add tailored irace config files

2020-10-06 15:44:08 +02:00 · 2020-10-06 15:44:08 +02:00 · 2c80a01fad
commit 2c80a01fad
parent b2a6fea7d2
4 changed files with 378 additions and 0 deletions
--- a/eo/contrib/irace/irace-config/default.instances
+++ b/eo/contrib/irace/irace-config/default.instances
@ -0,0 +1,48 @@
+## This is an example of specifying instances with a file.
+
+# Each line is an instance relative to trainInstancesDir
+# (see scenario.txt.tmpl) and an optional sequence of instance-specific
+# parameters that will be passed to target-runnerx when invoked on that
+# instance.
+
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
--- a/eo/contrib/irace/irace-config/example.scen
+++ b/eo/contrib/irace/irace-config/example.scen
@ -0,0 +1,227 @@
+###################################################### -*- mode: r -*- #####
+## Scenario setup for Iterated Race (irace).
+############################################################################
+
+## To use the default value of a parameter of iRace, simply do not set
+## the parameter (comment it out in this file, and do not give any
+## value on the command line).
+
+## File that contains the description of the parameters of the target
+## algorithm.
+parameterFile = "./fastga.param"
+
+## Directory where the programs will be run.
+execDir = "."
+
+## File to save tuning results as an R dataset, either absolute path or
+## relative to execDir.
+# logFile = "./irace.Rdata"
+
+## Previously saved log file to recover the execution of irace, either
+## absolute path or relative to the current directory.  If empty or NULL,
+## recovery is not performed.
+# recoveryFile = ""
+
+## Directory where training instances are located; either absolute path or
+## relative to current directory. If no trainInstancesFiles is provided,
+## all the files in trainInstancesDir will be listed as instances.
+trainInstancesDir = "."
+
+## File that contains a list of training instances and optionally
+## additional parameters for them. If trainInstancesDir is provided, irace
+## will search for the files in this folder.
+trainInstancesFile = "./default.instances"
+
+## File that contains a table of initial configurations. If empty or NULL,
+## all initial configurations are randomly generated.
+# configurationsFile = ""
+
+## File that contains a list of logical expressions that cannot be TRUE
+## for any evaluated configuration. If empty or NULL, do not use forbidden
+## expressions.
+# forbiddenFile = ""
+
+## Script called for each configuration that executes the target algorithm
+## to be tuned. See templates.
+targetRunner = "./target-runner"
+
+## Number of times to retry a call to targetRunner if the call failed.
+# targetRunnerRetries = 0
+
+## Optional data passed to targetRunner. This is ignored by the default
+## targetRunner function, but it may be used by custom targetRunner
+## functions to pass persistent data around.
+# targetRunnerData = ""
+
+## Optional R function to provide custom parallelization of targetRunner.
+# targetRunnerParallel = ""
+
+## Optional script or R function that provides a numeric value for each
+## configuration. See templates/target-evaluator.tmpl
+# targetEvaluator = ""
+
+## Maximum number of runs (invocations of targetRunner) that will be
+## performed. It determines the maximum budget of experiments for the
+## tuning.
+maxExperiments = 2000
+
+## Maximum total execution time in seconds for the executions of
+## targetRunner. targetRunner must return two values: cost and time.
+# maxTime = 60
+
+## Fraction (smaller than 1) of the budget used to estimate the mean
+## computation time of a configuration. Only used when maxTime > 0
+# budgetEstimation = 0.02
+
+## Maximum number of decimal places that are significant for numerical
+## (real) parameters.
+digits = 2
+
+## Debug level of the output of irace. Set this to 0 to silence all debug
+## messages. Higher values provide more verbose debug messages.
+# debugLevel = 0
+
+## Number of iterations.
+# nbIterations = 0
+
+## Number of runs of the target algorithm per iteration.
+# nbExperimentsPerIteration = 0
+
+## Randomly sample the training instances or use them in the order given.
+# sampleInstances = 1
+
+## Statistical test used for elimination. Default test is always F-test
+## unless capping is enabled, in which case the default test is t-test.
+## Valid values are: F-test (Friedman test), t-test (pairwise t-tests with
+## no correction), t-test-bonferroni (t-test with Bonferroni's correction
+## for multiple comparisons), t-test-holm (t-test with Holm's correction
+## for multiple comparisons).
+# testType = "F-test"
+
+## Number of instances evaluated before the first elimination test. It
+## must be a multiple of eachTest.
+# firstTest = 5
+
+## Number of instances evaluated between elimination tests.
+# eachTest = 1
+
+## Minimum number of configurations needed to continue the execution of
+## each race (iteration).
+# minNbSurvival = 0
+
+## Number of configurations to be sampled and evaluated at each iteration.
+# nbConfigurations = 0
+
+## Parameter used to define the number of configurations sampled and
+## evaluated at each iteration.
+# mu = 5
+
+## Confidence level for the elimination test.
+# confidence = 0.95
+
+## If the target algorithm is deterministic, configurations will be
+## evaluated only once per instance.
+# deterministic = 0
+
+## Seed of the random number generator (by default, generate a random
+## seed).
+# seed = NA
+
+## Number of calls to targetRunner to execute in parallel. Values 0 or 1
+## mean no parallelization.
+# parallel = 0
+
+## Enable/disable load-balancing when executing experiments in parallel.
+## Load-balancing makes better use of computing resources, but increases
+## communication overhead. If this overhead is large, disabling
+## load-balancing may be faster.
+# loadBalancing = 1
+
+## Enable/disable MPI. Use Rmpi to execute targetRunner in parallel
+## (parameter parallel is the number of slaves).
+# mpi = 0
+
+## Specify how irace waits for jobs to finish when targetRunner submits
+## jobs to a batch cluster: sge, pbs, torque or slurm. targetRunner must
+## submit jobs to the cluster using, for example, qsub.
+# batchmode = 0
+
+## Enable/disable the soft restart strategy that avoids premature
+## convergence of the probabilistic model.
+# softRestart = 1
+
+## Soft restart threshold value for numerical parameters. If NA, NULL or
+## "", it is computed as 10^-digits.
+# softRestartThreshold = ""
+
+## Directory where testing instances are located, either absolute or
+## relative to current directory.
+# testInstancesDir = ""
+
+## File containing a list of test instances and optionally additional
+## parameters for them.
+# testInstancesFile = ""
+
+## Number of elite configurations returned by irace that will be tested if
+## test instances are provided.
+# testNbElites = 1
+
+## Enable/disable testing the elite configurations found at each
+## iteration.
+# testIterationElites = 0
+
+## Enable/disable elitist irace.
+# elitist = 1
+
+## Number of instances added to the execution list before previous
+## instances in elitist irace.
+# elitistNewInstances = 1
+
+## In elitist irace, maximum number per race of elimination tests that do
+## not eliminate a configuration. Use 0 for no limit.
+# elitistLimit = 2
+
+## User-defined R function that takes a configuration generated by irace
+## and repairs it.
+# repairConfiguration = ""
+
+## Enable the use of adaptive capping, a technique designed for minimizing
+## the computation time of configurations. This is only available when
+## elitist is active.
+# capping = 0
+
+## Measure used to obtain the execution bound from the performance of the
+## elite configurations: median, mean, worst, best.
+# cappingType = "median"
+
+## Method to calculate the mean performance of elite configurations:
+## candidate or instance.
+# boundType = "candidate"
+
+## Maximum execution bound for targetRunner. It must be specified when
+## capping is enabled.
+# boundMax = 0
+
+## Precision used for calculating the execution time. It must be specified
+## when capping is enabled.
+# boundDigits = 0
+
+## Penalization constant for timed out executions (executions that reach
+## boundMax execution time).
+# boundPar = 1
+
+## Replace the configuration cost of bounded executions with boundMax.
+# boundAsTimeout = 1
+
+## Percentage of the configuration budget used to perform a postselection
+## race of the best configurations of each iteration after the execution
+## of irace.
+# postselection = 0
+
+## Enable/disable AClib mode. This option enables compatibility with
+## GenericWrapper4AC as targetRunner script.
+# aclib = 0
+
+## END of scenario file
+############################################################################
+
--- a/eo/contrib/irace/irace-config/target-runner
+++ b/eo/contrib/irace/irace-config/target-runner
@ -0,0 +1,84 @@
+#!/bin/bash
+###############################################################################
+# This script is the command that is executed every run.
+# Check the examples in examples/
+#
+# This script is run in the execution directory (execDir, --exec-dir).
+#
+# PARAMETERS:
+# $1 is the candidate configuration number
+# $2 is the instance ID
+# $3 is the seed
+# $4 is the instance name
+# The rest ($* after `shift 4') are parameters to the run
+#
+# RETURN VALUE:
+# This script should print one numerical value: the cost that must be minimized.
+# Exit with 0 if no error, with 1 in case of error
+###############################################################################
+error() {
+    echo "`TZ=UTC date`: $0: error: $@"
+    exit 1
+}
+
+
+EXE="./fastga"
+
+FIXED_PARAMS="--problem=0"
+
+CONFIG_ID=$1
+INSTANCE_ID=$2
+SEED=$3
+INSTANCE=$(echo $4 | sed 's/\//\n/g'|tail -n 1)
+CROSSOVER_RATE=$5
+CROSSOVER_SELECTOR=$6
+CROSSOVER=$7
+MUTATION_RATE=$8
+MUT_SELECTOR=$9
+MUTATION=${10}
+REPLACEMENT=${11}
+shift 11 || error "Not enough parameters"
+
+INSTANCE_PARAMS=$*
+
+STDOUT=c${CONFIG_ID}_i${INSTANCE_ID}_s${SEED}.stdout
+STDERR=c${CONFIG_ID}_i${INSTANCE_ID}_s${SEED}.stderr
+
+if [ ! -x "${EXE}" ]; then
+    error "${EXE}: not found or not executable (pwd: $(pwd))"
+fi
+
+# If the program just prints a number, we can use 'exec' to avoid
+# creating another process, but there can be no other commands after exec.
+#exec $EXE ${FIXED_PARAMS} -i $INSTANCE ${INSTANCE_PARAMS} 
+# exit 1
+# 
+# Otherwise, save the output to a file, and parse the result from it.
+# (If you wish to ignore segmentation faults you can use '{}' around
+# the command.)
+cmd="$EXE ${FIXED_PARAMS} --instance=${INSTANCE} --seed=${SEED} ${CROSSOVER_RATE} ${CROSSOVER_SELECTOR} ${CROSSOVER} ${MUTATION_RATE} ${MUT_SELECTOR} ${MUTATION} ${REPLACEMENT}"
+# NOTE: irace seems to capture both stderr and stdout, so you should not output to stderr
+echo ${cmd} > ${STDERR}
+$cmd 2> ${STDERR} | tee ${STDOUT}
+
+# The following code is useless if the binary only output a single number on stdout.
+
+# This may be used to introduce a delay if there are filesystem
+# issues.
+# SLEEPTIME=1
+# while [ ! -s "${STDOUT}" ]; do
+#     sleep $SLEEPTIME
+#     let "SLEEPTIME += 1"
+# done
+
+# This is an example of reading a number from the output.
+# It assumes that the objective value is the first number in
+# the first column of the last line of the output.
+# if [ -s "${STDOUT}" ]; then
+#     COST=$(tail -n 1 ${STDOUT} | grep -e '^[[:space:]]*[+-]\?[0-9]' | cut -f1)
+#     echo "$COST"
+#     rm -f "${STDOUT}" "${STDERR}"
+#     exit 0
+# else
+#     error "${STDOUT}: No such file or directory"
+# fi
--- a/eo/contrib/irace/run_irace.sh
+++ b/eo/contrib/irace/run_irace.sh
@ -0,0 +1,19 @@
+#!/bin/bash
+
+if [[ $# != 1 ]] ; then
+    echo "ERROR: build dir not indicated"
+    exit 1
+fi
+
+cd $1
+pwd
+
+# Fore some reason, irace absolutely need those files...
+cp ../irace-config/example.scen .
+cp ../irace-config/target-runner .
+cp ../irace-config/default.instances .
+
+# Generate the parameter list file.
+./fastga -h > fastga.param 2>/dev/null
+/usr/lib/R/site-library/irace/bin/irace --scenario example.scen
+