Source code for configuration_comparing_aggregation_algorithms

# -*- coding: utf-8 -*-
"""
Configuration for the simulations, for the single-player case, for comparing Aggregation algorithms.
"""
from __future__ import division, print_function  # Python 2 compatibility

__author__ = "Lilian Besson"
__version__ = "0.7"

# Tries to know number of CPU
try:
    from multiprocessing import cpu_count
    CPU_COUNT = cpu_count()
except ImportError:
    CPU_COUNT = 1

from os import getenv

if __name__ == '__main__':
    print("Warning: this script 'configuration_comparing_aggregation_algorithms.py' is NOT executable. Use 'main.py configuration_comparing_aggregation_algorithms' or 'make comparing_aggregation_algorithms' ...")  # DEBUG
    exit(0)

# Import arms and algorithms
try:
    from Arms import *
    from Policies import *
except ImportError:
    from SMPyBandits.Arms import *
    from SMPyBandits.Policies import *

#: HORIZON : number of time steps of the experiments.
#: Warning Should be >= 10000 to be interesting "asymptotically".
HORIZON = 500
HORIZON = 2000
HORIZON = 3000
HORIZON = 5000
HORIZON = 10000
# HORIZON = 20000
# HORIZON = 30000
# # # HORIZON = 40000
# HORIZON = 100000
HORIZON = int(getenv('T', HORIZON))

#: REPETITIONS : number of repetitions of the experiments.
#: Warning: Should be >= 10 to be statistically trustworthy.
REPETITIONS = 1  # XXX To profile the code, turn down parallel computing
REPETITIONS = 4  # Nb of cores, to have exactly one repetition process by cores
# REPETITIONS = 1000
# REPETITIONS = 200
# REPETITIONS = 100
# REPETITIONS = 50
# REPETITIONS = 20
REPETITIONS = int(getenv('N', REPETITIONS))

#: To profile the code, turn down parallel computing
DO_PARALLEL = False  # XXX do not let this = False  # To profile the code, turn down parallel computing
DO_PARALLEL = True
DO_PARALLEL = (REPETITIONS > 1) and DO_PARALLEL

#: Number of jobs to use for the parallel computations. -1 means all the CPU cores, 1 means no parallelization.
N_JOBS = -1 if DO_PARALLEL else 1
if CPU_COUNT > 4:  # We are on a server, let's be nice and not use all cores
    N_JOBS = min(CPU_COUNT, max(int(CPU_COUNT / 3), CPU_COUNT - 8))
N_JOBS = int(getenv('N_JOBS', N_JOBS))

#: Number of arms for non-hard-coded problems (Bayesian problems)
NB_ARMS = 9
NB_ARMS = int(getenv('K', NB_ARMS))
NB_ARMS = int(getenv('NB_ARMS', NB_ARMS))

# Random events
RANDOM_SHUFFLE = False  #: The arms are shuffled (``shuffle(arms)``).
RANDOM_INVERT = False  #: The arms are inverted (``arms = arms[::-1]``).
NB_RANDOM_EVENTS = 5  #: Number of random events. They are uniformly spaced in time steps.

TEST_Aggregator = False  # XXX do not let this = False if you want to test my Aggregator policy
TEST_Aggregator = True

TEST_CORRAL = True
TEST_CORRAL = False  # XXX do not let this = False if you want to test the CORRAL policy

TEST_LEARNEXP = False  # XXX do not let this = False if you want to test the LearnExp policy
TEST_LEARNEXP = True

TEST_HEDGE = False  # XXX do not let this = False if you want to test the Hedge policy
TEST_HEDGE = True

#: Should we cache rewards? The random rewards will be the same for all the REPETITIONS simulations for each algorithms.
CACHE_REWARDS = TEST_Aggregator or TEST_CORRAL or TEST_LEARNEXP or TEST_HEDGE
CACHE_REWARDS = False  # XXX to disable manually this feature

#: Should the Aggregator policy update the trusts in each child or just the one trusted for last decision?
UPDATE_ALL_CHILDREN = True
UPDATE_ALL_CHILDREN = False  # XXX do not let this = False

#: Should the rewards for Aggregator policy use as biased estimator, ie just ``r_t``, or unbiased estimators, ``r_t / p_t``
UNBIASED = True
UNBIASED = False

#: Should we update the trusts proba like in Exp4 or like in my initial Aggregator proposal
UPDATE_LIKE_EXP4 = True     # trusts^(t+1) = exp(rate_t * estimated rewards upto time t)
UPDATE_LIKE_EXP4 = False    # trusts^(t+1) <-- trusts^t * exp(rate_t * estimate reward at time t)


# Parameters for the arms
TRUNC = 1  #: Trunc parameter, ie amplitude, for Exponential arms

VARIANCE = 0.05   #: Variance of Gaussian arms
# VARIANCE = 0.25   #: Variance of Gaussian arms
MINI = 0  #: lower bound on rewards from Gaussian arms
MAXI = 1  #: upper bound on rewards from Gaussian arms, ie amplitude = 1

SCALE = 1   #: Scale of Gamma arms

#: Type of arms for non-hard-coded problems (Bayesian problems)
ARM_TYPE = "Bernoulli"
ARM_TYPE = str(getenv('ARM_TYPE', ARM_TYPE))
ARM_TYPE = mapping_ARM_TYPE[ARM_TYPE]


#: This dictionary configures the experiments
configuration = {
    # --- Duration of the experiment
    "horizon": HORIZON,
    # --- Number of repetition of the experiment (to have an average)
    "repetitions": REPETITIONS,
    # --- Parameters for the use of joblib.Parallel
    "n_jobs": N_JOBS,    # = nb of CPU cores
    "verbosity": 6,      # Max joblib verbosity
    # --- Random events
    "random_shuffle": RANDOM_SHUFFLE,
    "random_invert": RANDOM_INVERT,
    "nb_random_events": NB_RANDOM_EVENTS,
    # --- Cache rewards: use the same random rewards for the Aggregator[..] and the algorithms
    "cache_rewards": CACHE_REWARDS,
    # --- Arms
    "environment": [  # 1)  Bernoulli arms
        # {   # A very easy problem, but it is used in a lot of articles
        #     "arm_type": Bernoulli,
        #     "params": [0.1, 0.5, 0.9]
        # },
        # {   # A easy problem, but it is used in a lot of articles
        #     "arm_type": Bernoulli,
        #     "params": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
        # },
        # {   # An other problem, best arm = last, with three groups: very bad arms (0.01, 0.02), middle arms (0.3 - 0.6) and very good arms (0.78, 0.8, 0.82)
        #     "arm_type": Bernoulli,
        #     "params": [0.01, 0.02, 0.3, 0.4, 0.5, 0.6, 0.795, 0.8, 0.805]
        # },
        # {   # A very hard problem, as used in [Cappé et al, 2012]
        #     "arm_type": Bernoulli,
        #     "params": [0.01, 0.01, 0.01, 0.02, 0.02, 0.02, 0.05, 0.05, 0.1]
        # },
        # XXX Default!
        {   # A very easy problem (X arms), but it is used in a lot of articles
            "arm_type": ARM_TYPE,
            "params": uniformMeans(NB_ARMS, 1 / (1. + NB_ARMS))
        }
    # ],
    # # "environment": [  # 2)  Exponential arms
    #     {   # An example problem with 9 arms
    #         "arm_type": Exponential,
    #         "params": [(2, TRUNC), (3, TRUNC), (4, TRUNC), (5, TRUNC), (6, TRUNC), (7, TRUNC), (8, TRUNC), (9, TRUNC), (10, TRUNC)]
    #     },
    # # ],
    # # "environment": [  # 3)  Gaussian arms
    #     {   # An example problem with 3 or 9 arms
    #         "arm_type": Gaussian,
    #         # "params": [(mean, VARIANCE, MINI, MAXI) for mean in list(range(-8, 10, 2))]
    #         "params": [(mean, VARIANCE) for mean in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]]
    #         # "params": [(mean, VARIANCE) for mean in [0.1, 0.5, 0.9]]
    #     },
    # # # "environment": [  # 4)  Mix between Bernoulli and Gaussian and Exponential arms
    #     [
    #         arm_type(mean)
    #         for mean in [0.1, 0.5, 0.9]
    #         for arm_type in [Bernoulli, lambda mean: Gaussian(mean, VARIANCE), ExponentialFromMean]
    #     ],
    # # "environment": [  # 5)  Mix between Bernoulli and Gaussian and Exponential arms
    #     [
    #         arm_type(mean)
    #         for mean in [0.01, 0.02, 0.09]
    #         for arm_type in [Bernoulli, lambda mean: Gaussian(mean, VARIANCE), ExponentialFromMean]
    #     ],
    # # ],
    # # "environment": [  # XXX Gamma arms
    # #     {   # An example problem with 3 arms
    # #         "arm_type": GammaFromMean,
    # #         "params": [(shape, SCALE, 0, 10) for shape in [1, 2, 3, 4, 5]]
    # #     },
    # # ],
    ],
}

# if len(configuration['environment']) > 1:
#     raise ValueError("WARNING do not use this hack if you try to use more than one environment.")
#     # Note: I dropped the support for more than one environments, for this part of the configuration, but not the simulation code


#: And get LOWER, AMPLITUDE values
LOWER, AMPLITUDE = 0, 1
try:
    for env in configuration['environment']:
        if isinstance(env, dict) and 'params' in env and 'arm_type' in env:
            nbArms = len(env['params'])
            arm_type = env['arm_type']
            for param in env['params']:
                arm = arm_type(*param) if isinstance(param, (dict, tuple, list)) else arm_type(param)
                l, a = arm.lower_amplitude
                LOWER = min(LOWER, l)
                AMPLITUDE = max(AMPLITUDE, a)
        else:  # the env must be a list of arm, already created
            for arm in env:
                l, a = arm.lower_amplitude
                LOWER = min(LOWER, l)
                AMPLITUDE = max(AMPLITUDE, a)
    mini, maxi = LOWER, LOWER + AMPLITUDE
    print("Apparently, the arms have rewards in [{}, {}] (lower = {}, amplitude = {})".format(LOWER, LOWER + AMPLITUDE, LOWER, AMPLITUDE))
except Exception as e:
    print("Warning: Possibly wrong estimate of lower, amplitude ....")


# Custom klucb function
_klucbGauss = klucbGauss


[docs]def klucbGauss(x, d, precision=0.):
    """klucbGauss(x, d, sig2x) with the good variance (= 0.05)."""
    return _klucbGauss(x, d, 0.25)
    # return _klucbGauss(x, d, VARIANCE)


_klucbGamma = klucbGamma


[docs]def klucbGamma(x, d, precision=0.):
    """klucbGamma(x, d, sig2x) with the good scale (= 1)."""
    return _klucbGamma(x, d, SCALE)


configuration.update({
    "policies": [
        # {
        #     "archtype": Uniform,   # The stupidest policy, fully uniform
        #     "params": {}
        # },
        # --- UCBalpha algorithm
        # {
        #     "archtype": UCBalpha,
        #     "params": {
        #         "alpha": 4,
        #         "lower": LOWER, "amplitude": AMPLITUDE,
        #     }
        # },
        {
            "archtype": UCBalpha,
            "params": {
                "alpha": 1,
                "lower": LOWER, "amplitude": AMPLITUDE,
            }
        },
        # {
        #     "archtype": UCBalpha,
        #     "params": {
        #         "alpha": 0.5,
        #         "lower": LOWER, "amplitude": AMPLITUDE,
        #     }
        # },
        # --- Thompson algorithm
        {
            "archtype": Thompson,
            "params": {
                "lower": LOWER, "amplitude": AMPLITUDE,
            }
        },
        # --- KL algorithms, here only klUCBPlus with different klucb functions
        {
            "archtype": klUCB,
            "params": {
                "lower": LOWER, "amplitude": AMPLITUDE,
                "klucb": klucbBern,  # "horizon": HORIZON,
            }
        },
        {
            "archtype": klUCB,
            "params": {
                "lower": LOWER, "amplitude": AMPLITUDE,
                "klucb": klucbExp,  # "horizon": HORIZON,
            }
        },
        {
            "archtype": klUCB,
            "params": {
                "lower": LOWER, "amplitude": AMPLITUDE,
                "klucb": klucbGauss,  # "horizon": HORIZON,
            }
        },
        # {
        #     "archtype": klUCB,
        #     "params": {
        #         "lower": LOWER, "amplitude": AMPLITUDE,
        #         "klucb": klucbGamma,  # "horizon": HORIZON,
        #     }
        # },
        # --- BayesUCB algorithm
        {
            "archtype": BayesUCB,
            "params": {
                "lower": LOWER, "amplitude": AMPLITUDE,
            }
        },
        # # --- Finite-Horizon Gittins index
        # {
        #     "archtype": ApproximatedFHGittins,
        #     "params": {
        #         "horizon": 1.1 * HORIZON,
        #         "alpha": 2,
        #     }
        # },
        # {
        #     "archtype": ApproximatedFHGittins,
        #     "params": {
        #         "horizon": 1.1 * HORIZON,
        #         "alpha": 1,
        #     }
        # },
        # {
        #     "archtype": ApproximatedFHGittins,
        #     "params": {
        #         "horizon": 1.1 * HORIZON,
        #         "alpha": 0.5,
        #     }
        # },
    ]
})


from itertools import product  # XXX If needed!
NON_AGGR_POLICIES = configuration["policies"]
# NON_AGGR_POLICIES += [{
#     "archtype": Uniform,   # The stupidest policy, fully uniform
#     "params": {}
# }]


# Dynamic hack to force the LearnExp (policies aggregator) to use all the policies previously/already defined
if TEST_LEARNEXP:
    # ETA_VALUES = [0.2, 0.4, 0.6, 0.8]
    ETA_VALUES = [0.9]
    # UNBIASED_VALUES = [False, True]
    UNBIASED_VALUES = [True]
    for ETA in ETA_VALUES:
        for UNBIASED in UNBIASED_VALUES:
            CURRENT_POLICIES = configuration["policies"]
            # Add one LearnExp policy
            configuration["policies"] = [{
                "archtype": LearnExp,
                "params": {
                    "children": NON_AGGR_POLICIES,
                    "unbiased": UNBIASED,
                    "eta": ETA,
                },
            }] + CURRENT_POLICIES


# Dynamic hack to force the CORRAL (policies aggregator) to use all the policies previously/already defined
if TEST_CORRAL:
    # UNBIASED_VALUES = [False, True]
    UNBIASED_VALUES = [True]
    # BROADCAST_ALL_VALUES = [False, True]
    BROADCAST_ALL_VALUES = [True]
    for UNBIASED in UNBIASED_VALUES:
        for BROADCAST_ALL in BROADCAST_ALL_VALUES:
            CURRENT_POLICIES = configuration["policies"]
            # Add one CORRAL policy
            configuration["policies"] = [{
                "archtype": CORRAL,
                "params": {
                    "children": NON_AGGR_POLICIES,
                    "horizon": HORIZON,
                    "unbiased": UNBIASED,
                    "broadcast_all": BROADCAST_ALL,
                },
            }] + CURRENT_POLICIES


# Dynamic hack to force the Aggregator (policies aggregator) to use all the policies previously/already defined
if TEST_Aggregator:
    UPDATE_LIKE_EXP4_VALUES = [False, True]
    # UPDATE_LIKE_EXP4_VALUES = [True]
    # UPDATE_ALL_CHILDREN_VALUES = [False, True]
    UPDATE_ALL_CHILDREN_VALUES = [True]
    # for UPDATE_LIKE_EXP4 in UPDATE_LIKE_EXP4_VALUES:
    #    for UPDATE_ALL_CHILDREN in UPDATE_ALL_CHILDREN_VALUES:
    # for (UPDATE_LIKE_EXP4, UPDATE_ALL_CHILDREN) in [(True, False), (False, True), (False, False)]:
    for (UPDATE_LIKE_EXP4, UPDATE_ALL_CHILDREN) in [(True, False), (False, False)]:
            CURRENT_POLICIES = configuration["policies"]
            # Add one Aggregator policy
            configuration["policies"] = [{
                "archtype": Aggregator,
                "params": {
                    "children": NON_AGGR_POLICIES,
                    "unbiased": UNBIASED,
                    "update_all_children": UPDATE_ALL_CHILDREN,
                    "decreaseRate": "auto",
                    "update_like_exp4": UPDATE_LIKE_EXP4
                },
            }] + CURRENT_POLICIES


print("Loaded experiments configuration from 'configuration_comparing_aggregation_algorithms.py' :")
print("configuration['policies'] =", configuration["policies"])  # DEBUG