Scippy

SCIP

Solving Constraint Integer Programs

bandit_epsgreedy.c
Go to the documentation of this file.
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /* */
3 /* This file is part of the program and library */
4 /* SCIP --- Solving Constraint Integer Programs */
5 /* */
6 /* Copyright 2002-2022 Zuse Institute Berlin */
7 /* */
8 /* Licensed under the Apache License, Version 2.0 (the "License"); */
9 /* you may not use this file except in compliance with the License. */
10 /* You may obtain a copy of the License at */
11 /* */
12 /* http://www.apache.org/licenses/LICENSE-2.0 */
13 /* */
14 /* Unless required by applicable law or agreed to in writing, software */
15 /* distributed under the License is distributed on an "AS IS" BASIS, */
16 /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
17 /* See the License for the specific language governing permissions and */
18 /* limitations under the License. */
19 /* */
20 /* You should have received a copy of the Apache-2.0 license */
21 /* along with SCIP; see the file LICENSE. If not visit scipopt.org. */
22 /* */
23 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 
25 /**@file bandit_epsgreedy.c
26  * @ingroup OTHER_CFILES
27  * @brief implementation of epsilon greedy bandit algorithm
28  * @author Gregor Hendel
29  */
30 
31 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
32 
33 #include "scip/bandit.h"
34 #include "scip/bandit_epsgreedy.h"
35 #include "scip/pub_bandit.h"
36 #include "scip/pub_message.h"
37 #include "scip/pub_misc.h"
38 #include "scip/scip_bandit.h"
39 #include "scip/scip_mem.h"
40 #include "scip/scip_randnumgen.h"
41 
42 #define BANDIT_NAME "eps-greedy"
43 #define EPSGREEDY_SMALL 1e-6
44 
45 /*
46  * Data structures
47  */
48 
49 /** private data structure of epsilon greedy bandit algorithm */
50 struct SCIP_BanditData
51 {
52  SCIP_Real* weights; /**< weights for every action */
53  SCIP_Real* priorities; /**< saved priorities for tie breaking */
54  int* sels; /**< individual number of selections per action */
55  SCIP_Real eps; /**< epsilon parameter (between 0 and 1) to control epsilon greedy */
56  SCIP_Real decayfactor; /**< the factor to reduce the weight of older observations if exponential decay is enabled */
57  int avglim; /**< nonnegative limit on observation number before the exponential decay starts,
58  * only relevant if exponential decay is enabled
59  */
60  int nselections; /**< counter for the number of selection calls */
61  SCIP_Bool preferrecent; /**< should the weights be updated in an exponentially decaying way? */
62 };
63 
64 /*
65  * Callback methods of bandit algorithm virtual function table
66  */
67 
68 /** callback to free bandit specific data structures */
69 SCIP_DECL_BANDITFREE(SCIPbanditFreeEpsgreedy)
70 { /*lint --e{715}*/
71  SCIP_BANDITDATA* banditdata;
72  int nactions;
73 
74  assert(bandit != NULL);
75 
76  banditdata = SCIPbanditGetData(bandit);
77  assert(banditdata != NULL);
78  assert(banditdata->weights != NULL);
79  nactions = SCIPbanditGetNActions(bandit);
80 
81  BMSfreeBlockMemoryArray(blkmem, &banditdata->weights, nactions);
82  BMSfreeBlockMemoryArray(blkmem, &banditdata->priorities, nactions);
83  BMSfreeBlockMemoryArray(blkmem, &banditdata->sels, nactions);
84  BMSfreeBlockMemory(blkmem, &banditdata);
85 
86  SCIPbanditSetData(bandit, NULL);
87 
88  return SCIP_OKAY;
89 }
90 
91 /** selection callback for bandit algorithm */
92 SCIP_DECL_BANDITSELECT(SCIPbanditSelectEpsgreedy)
93 { /*lint --e{715}*/
94  SCIP_BANDITDATA* banditdata;
95  SCIP_Real randnr;
96  SCIP_Real curreps;
97  SCIP_RANDNUMGEN* rng;
98  int nactions;
99  assert(bandit != NULL);
100  assert(selection != NULL);
101 
102  banditdata = SCIPbanditGetData(bandit);
103  assert(banditdata != NULL);
104  rng = SCIPbanditGetRandnumgen(bandit);
105  assert(rng != NULL);
106 
107  nactions = SCIPbanditGetNActions(bandit);
108 
109  /* roll the dice to check if the best element should be picked, or an element at random */
110  randnr = SCIPrandomGetReal(rng, 0.0, 1.0);
111 
112  /* make epsilon decrease with an increasing number of selections */
113  banditdata->nselections++;
114  curreps = banditdata->eps * sqrt((SCIP_Real)nactions/(SCIP_Real)banditdata->nselections);
115 
116  /* select the best action seen so far */
117  if( randnr >= curreps )
118  {
119  SCIP_Real* weights = banditdata->weights;
120  SCIP_Real* priorities = banditdata->priorities;
121  int j;
122  SCIP_Real maxweight;
123 
124  assert(weights != NULL);
125  assert(priorities != NULL);
126 
127  /* pick the element with the largest reward */
128  maxweight = weights[0];
129  *selection = 0;
130 
131  /* determine reward for every element */
132  for( j = 1; j < nactions; ++j )
133  {
134  SCIP_Real weight = weights[j];
135 
136  /* select the action that maximizes the reward, breaking ties by action priorities */
137  if( maxweight < weight
138  || (weight >= maxweight - EPSGREEDY_SMALL && priorities[j] > priorities[*selection] ) )
139  {
140  *selection = j;
141  maxweight = weight;
142  }
143  }
144  }
145  else
146  {
147  /* play one of the actions at random */
148  *selection = SCIPrandomGetInt(rng, 0, nactions - 1);
149  }
150 
151  return SCIP_OKAY;
152 }
153 
154 /** update callback for bandit algorithm */
155 SCIP_DECL_BANDITUPDATE(SCIPbanditUpdateEpsgreedy)
156 { /*lint --e{715}*/
157  SCIP_BANDITDATA* banditdata;
158 
159  assert(bandit != NULL);
160 
161  banditdata = SCIPbanditGetData(bandit);
162  assert(banditdata != NULL);
163 
164  /* increase the selection count */
165  ++banditdata->sels[selection];
166 
167  /* the very first observation is directly stored as weight for both average or exponential decay */
168  if( banditdata->sels[selection] == 1 )
169  banditdata->weights[selection] = score;
170  else
171  {
172  /* use exponentially decreasing weights for older observations */
173  if( banditdata->preferrecent && banditdata->sels[selection] > banditdata->avglim )
174  {
175  /* decrease old weights by decay factor */
176  banditdata->weights[selection] *= banditdata->decayfactor;
177  banditdata->weights[selection] += (1.0 - banditdata->decayfactor) * score;
178  }
179  else
180  {
181  /* update average score */
182  SCIP_Real diff = score - banditdata->weights[selection];
183  banditdata->weights[selection] += diff / (SCIP_Real)(banditdata->sels[selection]);
184  }
185  }
186 
187  return SCIP_OKAY;
188 }
189 
190 /** reset callback for bandit algorithm */
191 SCIP_DECL_BANDITRESET(SCIPbanditResetEpsgreedy)
192 { /*lint --e{715}*/
193  SCIP_BANDITDATA* banditdata;
194  SCIP_Real* weights;
195  int w;
196  int nactions;
197  SCIP_RANDNUMGEN* rng;
198 
199  assert(bandit != NULL);
200 
201  banditdata = SCIPbanditGetData(bandit);
202  assert(banditdata != NULL);
203 
204  weights = banditdata->weights;
205  nactions = SCIPbanditGetNActions(bandit);
206  assert(weights != NULL);
207  assert(banditdata->priorities != NULL);
208  assert(nactions > 0);
209 
210  rng = SCIPbanditGetRandnumgen(bandit);
211  assert(rng != NULL);
212 
213  /* alter priorities slightly to make them unique */
214  if( priorities != NULL )
215  {
216  for( w = 1; w < nactions; ++w )
217  {
218  assert(priorities[w] >= 0);
219  banditdata->priorities[w] = priorities[w] + SCIPrandomGetReal(rng, -EPSGREEDY_SMALL, EPSGREEDY_SMALL);
220  }
221  }
222  else
223  {
224  /* use random priorities */
225  for( w = 0; w < nactions; ++w )
226  banditdata->priorities[w] = SCIPrandomGetReal(rng, 0.0, 1.0);
227  }
228 
229  /* reset weights and selection counters to 0 */
230  BMSclearMemoryArray(weights, nactions);
231  BMSclearMemoryArray(banditdata->sels, nactions);
232 
233  banditdata->nselections = 0;
234 
235  return SCIP_OKAY;
236 }
237 
238 /*
239  * interface methods of the Epsilon Greedy bandit algorithm
240  */
241 
242 /** internal method to create and reset epsilon greedy bandit algorithm */
244  BMS_BLKMEM* blkmem, /**< block memory */
245  BMS_BUFMEM* bufmem, /**< buffer memory */
246  SCIP_BANDITVTABLE* vtable, /**< virtual function table with epsilon greedy callbacks */
247  SCIP_BANDIT** epsgreedy, /**< pointer to store the epsilon greedy bandit algorithm */
248  SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
249  SCIP_Real eps, /**< parameter to increase probability for exploration between all actions */
250  SCIP_Bool preferrecent, /**< should the weights be updated in an exponentially decaying way? */
251  SCIP_Real decayfactor, /**< the factor to reduce the weight of older observations if exponential decay is enabled */
252  int avglim, /**< nonnegative limit on observation number before the exponential decay starts,
253  * only relevant if exponential decay is enabled */
254  int nactions, /**< the positive number of possible actions */
255  unsigned int initseed /**< initial random seed */
256  )
257 {
258  SCIP_BANDITDATA* banditdata;
259 
260  SCIP_ALLOC( BMSallocBlockMemory(blkmem, &banditdata) );
261  assert(banditdata != NULL);
262  assert(eps >= 0.0);
263 
264  SCIP_ALLOC( BMSallocBlockMemoryArray(blkmem, &banditdata->weights, nactions) );
265  SCIP_ALLOC( BMSallocBlockMemoryArray(blkmem, &banditdata->priorities, nactions) );
266  SCIP_ALLOC( BMSallocBlockMemoryArray(blkmem, &banditdata->sels, nactions) );
267  banditdata->eps = eps;
268  banditdata->nselections = 0;
269  banditdata->preferrecent = preferrecent;
270  banditdata->decayfactor = decayfactor;
271  banditdata->avglim = avglim;
272 
273  SCIP_CALL( SCIPbanditCreate(epsgreedy, vtable, blkmem, bufmem, priorities, nactions, initseed, banditdata) );
274 
275  return SCIP_OKAY;
276 }
277 
278 /** create and resets an epsilon greedy bandit algorithm */
280  SCIP* scip, /**< SCIP data structure */
281  SCIP_BANDIT** epsgreedy, /**< pointer to store the epsilon greedy bandit algorithm */
282  SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
283  SCIP_Real eps, /**< parameter to increase probability for exploration between all actions */
284  SCIP_Bool preferrecent, /**< should the weights be updated in an exponentially decaying way? */
285  SCIP_Real decayfactor, /**< the factor to reduce the weight of older observations if exponential decay is enabled */
286  int avglim, /**< nonnegative limit on observation number before the exponential decay starts,
287  * only relevant if exponential decay is enabled */
288  int nactions, /**< the positive number of possible actions */
289  unsigned int initseed /**< initial seed for random number generation */
290  )
291 {
292  SCIP_BANDITVTABLE* vtable;
293  assert(scip != NULL);
294  assert(epsgreedy != NULL);
295 
296  vtable = SCIPfindBanditvtable(scip, BANDIT_NAME);
297  if( vtable == NULL )
298  {
299  SCIPerrorMessage("Could not find virtual function table for %s bandit algorithm\n", BANDIT_NAME);
300  return SCIP_INVALIDDATA;
301  }
302 
303  SCIP_CALL( SCIPbanditCreateEpsgreedy(SCIPblkmem(scip), SCIPbuffer(scip), vtable, epsgreedy,
304  priorities, eps, preferrecent, decayfactor, avglim, nactions, SCIPinitializeRandomSeed(scip, initseed)) );
305 
306  return SCIP_OKAY;
307 }
308 
309 /** get weights array of epsilon greedy bandit algorithm */
311  SCIP_BANDIT* epsgreedy /**< epsilon greedy bandit algorithm */
312  )
313 {
314  SCIP_BANDITDATA* banditdata;
315  assert(epsgreedy != NULL);
316  banditdata = SCIPbanditGetData(epsgreedy);
317  assert(banditdata != NULL);
318 
319  return banditdata->weights;
320 }
321 
322 /** set epsilon parameter of epsilon greedy bandit algorithm */
324  SCIP_BANDIT* epsgreedy, /**< epsilon greedy bandit algorithm */
325  SCIP_Real eps /**< parameter to increase probability for exploration between all actions */
326  )
327 {
328  SCIP_BANDITDATA* banditdata;
329  assert(epsgreedy != NULL);
330  assert(eps >= 0);
331 
332  banditdata = SCIPbanditGetData(epsgreedy);
333 
334  banditdata->eps = eps;
335 }
336 
337 
338 /** creates the epsilon greedy bandit algorithm includes it in SCIP */
340  SCIP* scip /**< SCIP data structure */
341  )
342 {
343  SCIP_BANDITVTABLE* banditvtable;
344 
345  SCIP_CALL( SCIPincludeBanditvtable(scip, &banditvtable, BANDIT_NAME,
346  SCIPbanditFreeEpsgreedy, SCIPbanditSelectEpsgreedy, SCIPbanditUpdateEpsgreedy, SCIPbanditResetEpsgreedy) );
347 
348  return SCIP_OKAY;
349 }
SCIP_RETCODE SCIPcreateBanditEpsgreedy(SCIP *scip, SCIP_BANDIT **epsgreedy, SCIP_Real *priorities, SCIP_Real eps, SCIP_Bool preferrecent, SCIP_Real decayfactor, int avglim, int nactions, unsigned int initseed)
void SCIPsetEpsilonEpsgreedy(SCIP_BANDIT *epsgreedy, SCIP_Real eps)
public methods for memory management
internal methods for bandit algorithms
enum SCIP_Retcode SCIP_RETCODE
Definition: type_retcode.h:63
#define BANDIT_NAME
int SCIPrandomGetInt(SCIP_RANDNUMGEN *randnumgen, int minrandval, int maxrandval)
Definition: misc.c:10012
SCIP_BANDITDATA * SCIPbanditGetData(SCIP_BANDIT *bandit)
Definition: bandit.c:190
real eps
SCIP_VAR * w
Definition: circlepacking.c:67
BMS_BUFMEM * SCIPbuffer(SCIP *scip)
Definition: scip_mem.c:72
SCIP_DECL_BANDITSELECT(SCIPbanditSelectEpsgreedy)
#define SCIPerrorMessage
Definition: pub_message.h:64
BMS_BLKMEM * SCIPblkmem(SCIP *scip)
Definition: scip_mem.c:57
SCIP_BANDITVTABLE * SCIPfindBanditvtable(SCIP *scip, const char *name)
Definition: scip_bandit.c:80
#define NULL
Definition: lpi_spx1.cpp:164
SCIP_RETCODE SCIPbanditCreateEpsgreedy(BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_BANDITVTABLE *vtable, SCIP_BANDIT **epsgreedy, SCIP_Real *priorities, SCIP_Real eps, SCIP_Bool preferrecent, SCIP_Real decayfactor, int avglim, int nactions, unsigned int initseed)
#define SCIP_CALL(x)
Definition: def.h:393
void SCIPbanditSetData(SCIP_BANDIT *bandit, SCIP_BANDITDATA *banditdata)
Definition: bandit.c:200
#define BMSfreeBlockMemory(mem, ptr)
Definition: memory.h:467
public data structures and miscellaneous methods
#define SCIP_Bool
Definition: def.h:93
#define BMSallocBlockMemoryArray(mem, ptr, num)
Definition: memory.h:456
SCIP_RETCODE SCIPincludeBanditvtable(SCIP *scip, SCIP_BANDITVTABLE **banditvtable, const char *name, SCIP_DECL_BANDITFREE((*banditfree)), SCIP_DECL_BANDITSELECT((*banditselect)), SCIP_DECL_BANDITUPDATE((*banditupdate)), SCIP_DECL_BANDITRESET((*banditreset)))
Definition: scip_bandit.c:48
#define BMSfreeBlockMemoryArray(mem, ptr, num)
Definition: memory.h:469
SCIP_DECL_BANDITRESET(SCIPbanditResetEpsgreedy)
public methods for bandit algorithms
SCIP_Real SCIPrandomGetReal(SCIP_RANDNUMGEN *randnumgen, SCIP_Real minrandval, SCIP_Real maxrandval)
Definition: misc.c:10034
public methods for bandit algorithms
struct SCIP_BanditData SCIP_BANDITDATA
Definition: type_bandit.h:56
public methods for random numbers
SCIP_DECL_BANDITFREE(SCIPbanditFreeEpsgreedy)
#define EPSGREEDY_SMALL
SCIP_DECL_BANDITUPDATE(SCIPbanditUpdateEpsgreedy)
public methods for message output
#define SCIP_Real
Definition: def.h:186
SCIP_Real * SCIPgetWeightsEpsgreedy(SCIP_BANDIT *epsgreedy)
int SCIPbanditGetNActions(SCIP_BANDIT *bandit)
Definition: bandit.c:303
SCIP_RETCODE SCIPincludeBanditvtableEpsgreedy(SCIP *scip)
internal methods for epsilon greedy bandit selection
SCIP_RANDNUMGEN * SCIPbanditGetRandnumgen(SCIP_BANDIT *bandit)
Definition: bandit.c:293
#define BMSallocBlockMemory(mem, ptr)
Definition: memory.h:453
#define BMSclearMemoryArray(ptr, num)
Definition: memory.h:132
unsigned int SCIPinitializeRandomSeed(SCIP *scip, unsigned int initialseedvalue)
struct BMS_BlkMem BMS_BLKMEM
Definition: memory.h:439
#define SCIP_ALLOC(x)
Definition: def.h:404
SCIP_RETCODE SCIPbanditCreate(SCIP_BANDIT **bandit, SCIP_BANDITVTABLE *banditvtable, BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_Real *priorities, int nactions, unsigned int initseed, SCIP_BANDITDATA *banditdata)
Definition: bandit.c:42