Scippy

    SCIP

    Solving Constraint Integer Programs

    pub_bandit_exp3.h
    Go to the documentation of this file.
    1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
    2/* */
    3/* This file is part of the program and library */
    4/* SCIP --- Solving Constraint Integer Programs */
    5/* */
    6/* Copyright (c) 2002-2025 Zuse Institute Berlin (ZIB) */
    7/* */
    8/* Licensed under the Apache License, Version 2.0 (the "License"); */
    9/* you may not use this file except in compliance with the License. */
    10/* You may obtain a copy of the License at */
    11/* */
    12/* http://www.apache.org/licenses/LICENSE-2.0 */
    13/* */
    14/* Unless required by applicable law or agreed to in writing, software */
    15/* distributed under the License is distributed on an "AS IS" BASIS, */
    16/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
    17/* See the License for the specific language governing permissions and */
    18/* limitations under the License. */
    19/* */
    20/* You should have received a copy of the Apache-2.0 license */
    21/* along with SCIP; see the file LICENSE. If not visit scipopt.org. */
    22/* */
    23/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
    24
    25/**@file pub_bandit_exp3.h
    26 * @ingroup PublicBanditMethods
    27 * @brief public methods for Exp.3
    28 * @author Gregor Hendel
    29 */
    30
    31/*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
    32
    33#ifndef SRC_SCIP_PUB_BANDIT_EXP3_H_
    34#define SRC_SCIP_PUB_BANDIT_EXP3_H_
    35
    36#include "scip/def.h"
    37#include "scip/type_bandit.h"
    38#include "scip/type_retcode.h"
    39#include "scip/type_scip.h"
    40
    41#ifdef __cplusplus
    42extern "C" {
    43#endif
    44
    45/**@addtogroup PublicBanditMethods
    46 *
    47 * ## Exp.3
    48 *
    49 * Exp.3 is a randomized selection method for the multi-armed bandit problem
    50 *
    51 * Exp3 maintains a probability distribution
    52 * according to which an action is drawn
    53 * in every iteration.
    54 * The probability distribution is a mixture between
    55 * a uniform distribution and a softmax distribution
    56 * based on the cumulative rewards of the actions.
    57 * The weight of the uniform distribution in the mixture
    58 * is controlled by the parameter \f$ \gamma \f$, ie.,
    59 * setting \f$ \gamma = 1\f$ uses a uniform distribution
    60 * in every selection step.
    61 * The cumulative reward for the actions can be
    62 * fine-tuned by adding a general bias for all actions.
    63 * The bias is given by the parameter \f$ \beta \f$.
    64 *
    65 * @{
    66 */
    67
    68/** creates and resets an Exp.3 bandit algorithm using \p scip pointer */
    69SCIP_EXPORT
    71 SCIP* scip, /**< SCIP data structure */
    72 SCIP_BANDIT** exp3, /**< pointer to store bandit algorithm */
    73 SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
    74 SCIP_Real gammaparam, /**< weight between uniform (gamma ~ 1) and weight driven (gamma ~ 0) probability distribution */
    75 SCIP_Real beta, /**< gain offset between 0 and 1 at every observation */
    76 int nactions, /**< the positive number of actions for this bandit algorithm */
    77 unsigned int initseed /**< initial seed for random number generation */
    78 );
    79
    80/** set gamma parameter of Exp.3 bandit algorithm to increase weight of uniform distribution */
    81SCIP_EXPORT
    83 SCIP_BANDIT* exp3, /**< bandit algorithm */
    84 SCIP_Real gammaparam /**< weight between uniform (gamma ~ 1) and weight driven (gamma ~ 0) probability distribution */
    85 );
    86
    87/** set beta parameter of Exp.3 bandit algorithm to increase gain offset for actions that were not played */
    88SCIP_EXPORT
    90 SCIP_BANDIT* exp3, /**< bandit algorithm */
    91 SCIP_Real beta /**< gain offset between 0 and 1 at every observation */
    92 );
    93
    94/** returns probability to play an action */
    95SCIP_EXPORT
    97 SCIP_BANDIT* exp3, /**< bandit algorithm */
    98 int action /**< index of the requested action */
    99 );
    100
    101/** @}*/
    102
    103#ifdef __cplusplus
    104}
    105#endif
    106
    107#endif
    common defines and data types used in all packages of SCIP
    #define SCIP_Real
    Definition: def.h:156
    void SCIPsetGammaExp3(SCIP_BANDIT *exp3, SCIP_Real gammaparam)
    Definition: bandit_exp3.c:337
    SCIP_RETCODE SCIPcreateBanditExp3(SCIP *scip, SCIP_BANDIT **exp3, SCIP_Real *priorities, SCIP_Real gammaparam, SCIP_Real beta, int nactions, unsigned int initseed)
    Definition: bandit_exp3.c:311
    void SCIPsetBetaExp3(SCIP_BANDIT *exp3, SCIP_Real beta)
    Definition: bandit_exp3.c:350
    SCIP_Real SCIPgetProbabilityExp3(SCIP_BANDIT *exp3, int action)
    Definition: bandit_exp3.c:363
    type definitions for bandit selection algorithms
    type definitions for return codes for SCIP methods
    enum SCIP_Retcode SCIP_RETCODE
    Definition: type_retcode.h:63
    type definitions for SCIP's main datastructure