Scippy

SCIP

Solving Constraint Integer Programs

bandit_epsgreedy.c
Go to the documentation of this file.
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /* */
3 /* This file is part of the program and library */
4 /* SCIP --- Solving Constraint Integer Programs */
5 /* */
6 /* Copyright (C) 2002-2018 Konrad-Zuse-Zentrum */
7 /* fuer Informationstechnik Berlin */
8 /* */
9 /* SCIP is distributed under the terms of the ZIB Academic License. */
10 /* */
11 /* You should have received a copy of the ZIB Academic License */
12 /* along with SCIP; see the file COPYING. If not visit scip.zib.de. */
13 /* */
14 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
15 
16 /**@file bandit_epsgreedy.c
17  * @brief implementation of epsilon greedy bandit algorithm
18  * @author Gregor Hendel
19  */
20 
21 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
22 
23 #include "scip/bandit.h"
24 #include "scip/bandit_epsgreedy.h"
25 #include "scip/pub_bandit.h"
26 #include "scip/pub_message.h"
27 #include "scip/pub_misc.h"
28 #include "scip/scip_bandit.h"
29 #include "scip/scip_mem.h"
30 #include "scip/scip_randnumgen.h"
31 
32 #define BANDIT_NAME "eps-greedy"
33 #define EPSGREEDY_SMALL 1e-6
34 /*
35  * Data structures
36  */
37 
38 /** private data structure of epsilon greedy bandit algorithm */
39 struct SCIP_BanditData
40 {
41  SCIP_Real* weights; /**< weights for every action */
42  SCIP_Real* priorities; /**< saved priorities for tie breaking */
43  int* sels; /**< individual number of selections per action */
44  SCIP_Real eps; /**< epsilon parameter (between 0 and 1) to control epsilon greedy */
45  SCIP_Real decayfactor; /**< the factor to reduce the weight of older observations if exponential decay is enabled */
46  int avglim; /**< nonnegative limit on observation number before the exponential decay starts,
47  * only relevant if exponential decay is enabled
48  */
49  int nselections; /**< counter for the number of selection calls */
50  SCIP_Bool preferrecent; /**< should the weights be updated in an exponentially decaying way? */
51 };
52 
53 /*
54  * Callback methods of bandit algorithm virtual function table
55  */
56 
57 /** callback to free bandit specific data structures */
58 SCIP_DECL_BANDITFREE(SCIPbanditFreeEpsgreedy)
59 { /*lint --e{715}*/
60  SCIP_BANDITDATA* banditdata;
61  int nactions;
62 
63  assert(bandit != NULL);
64 
65  banditdata = SCIPbanditGetData(bandit);
66  assert(banditdata != NULL);
67  assert(banditdata->weights != NULL);
68  nactions = SCIPbanditGetNActions(bandit);
69 
70  BMSfreeBlockMemoryArray(blkmem, &banditdata->weights, nactions);
71  BMSfreeBlockMemoryArray(blkmem, &banditdata->priorities, nactions);
72  BMSfreeBlockMemoryArray(blkmem, &banditdata->sels, nactions);
73  BMSfreeBlockMemory(blkmem, &banditdata);
74 
75  SCIPbanditSetData(bandit, NULL);
76 
77  return SCIP_OKAY;
78 }
79 
80 /** selection callback for bandit algorithm */
81 SCIP_DECL_BANDITSELECT(SCIPbanditSelectEpsgreedy)
82 { /*lint --e{715}*/
83  SCIP_BANDITDATA* banditdata;
84  SCIP_Real randnr;
85  SCIP_Real curreps;
86  SCIP_RANDNUMGEN* rng;
87  int nactions;
88  assert(bandit != NULL);
89  assert(selection != NULL);
90 
91  banditdata = SCIPbanditGetData(bandit);
92  assert(banditdata != NULL);
93  rng = SCIPbanditGetRandnumgen(bandit);
94  assert(rng != NULL);
95 
96  nactions = SCIPbanditGetNActions(bandit);
97 
98  /* roll the dice to check if the best element should be picked, or an element at random */
99  randnr = SCIPrandomGetReal(rng, 0.0, 1.0);
100 
101  /* make epsilon decrease with an increasing number of selections */
102  banditdata->nselections++;
103  curreps = banditdata->eps * sqrt((SCIP_Real)nactions/(SCIP_Real)banditdata->nselections);
104 
105  /* select the best action seen so far */
106  if( randnr >= curreps )
107  {
108  SCIP_Real* weights = banditdata->weights;
109  SCIP_Real* priorities = banditdata->priorities;
110  int j;
111  SCIP_Real maxweight;
112 
113  assert(weights != NULL);
114  assert(priorities != NULL);
115 
116  /* pick the element with the largest reward */
117  maxweight = weights[0];
118  *selection = 0;
119 
120  /* determine reward for every element */
121  for( j = 1; j < nactions; ++j )
122  {
123  SCIP_Real weight = weights[j];
124 
125  /* select the action that maximizes the reward, breaking ties by action priorities */
126  if( maxweight < weight
127  || (weight >= maxweight - EPSGREEDY_SMALL && priorities[j] > priorities[*selection] ) )
128  {
129  *selection = j;
130  maxweight = weight;
131  }
132  }
133  }
134  else
135  {
136  /* play one of the actions at random */
137  *selection = SCIPrandomGetInt(rng, 0, nactions - 1);
138  }
139 
140  return SCIP_OKAY;
141 }
142 
143 /** update callback for bandit algorithm */
144 SCIP_DECL_BANDITUPDATE(SCIPbanditUpdateEpsgreedy)
145 { /*lint --e{715}*/
146  SCIP_BANDITDATA* banditdata;
147 
148  assert(bandit != NULL);
149 
150  banditdata = SCIPbanditGetData(bandit);
151  assert(banditdata != NULL);
152 
153  /* increase the selection count */
154  ++banditdata->sels[selection];
155 
156  /* the very first observation is directly stored as weight for both average or exponential decay */
157  if( banditdata->sels[selection] == 1 )
158  banditdata->weights[selection] = score;
159  else
160  {
161  /* use exponentially decreasing weights for older observations */
162  if( banditdata->preferrecent && banditdata->sels[selection] > banditdata->avglim )
163  {
164  /* decrease old weights by decay factor */
165  banditdata->weights[selection] *= banditdata->decayfactor;
166  banditdata->weights[selection] += (1.0 - banditdata->decayfactor) * score;
167  }
168  else
169  {
170  /* update average score */
171  SCIP_Real diff = score - banditdata->weights[selection];
172  banditdata->weights[selection] += diff / (SCIP_Real)(banditdata->sels[selection]);
173  }
174  }
175 
176  return SCIP_OKAY;
177 }
178 
179 /** reset callback for bandit algorithm */
180 SCIP_DECL_BANDITRESET(SCIPbanditResetEpsgreedy)
181 { /*lint --e{715}*/
182  SCIP_BANDITDATA* banditdata;
183  SCIP_Real* weights;
184  int w;
185  int nactions;
186  SCIP_RANDNUMGEN* rng;
187 
188  assert(bandit != NULL);
189 
190  banditdata = SCIPbanditGetData(bandit);
191  assert(banditdata != NULL);
192 
193  weights = banditdata->weights;
194  nactions = SCIPbanditGetNActions(bandit);
195  assert(weights != NULL);
196  assert(banditdata->priorities != NULL);
197  assert(nactions > 0);
198 
199  rng = SCIPbanditGetRandnumgen(bandit);
200  assert(rng != NULL);
201 
202  /* alter priorities slightly to make them unique */
203  if( priorities != NULL )
204  {
205  for( w = 1; w < nactions; ++w )
206  {
207  assert(priorities[w] >= 0);
208  banditdata->priorities[w] = priorities[w] + SCIPrandomGetReal(rng, -EPSGREEDY_SMALL, EPSGREEDY_SMALL);
209  }
210  }
211  else
212  {
213  /* use random priorities */
214  for( w = 0; w < nactions; ++w )
215  banditdata->priorities[w] = SCIPrandomGetReal(rng, 0.0, 1.0);
216  }
217 
218  /* reset weights and selection counters to 0 */
219  BMSclearMemoryArray(weights, nactions);
220  BMSclearMemoryArray(banditdata->sels, nactions);
221 
222  banditdata->nselections = 0;
223 
224  return SCIP_OKAY;
225 }
226 
227 /*
228  * interface methods of the Epsilon Greedy bandit algorithm
229  */
230 
231 /** internal method to create and reset epsilon greedy bandit algorithm */
233  BMS_BLKMEM* blkmem, /**< block memory */
234  BMS_BUFMEM* bufmem, /**< buffer memory */
235  SCIP_BANDITVTABLE* vtable, /**< virtual function table with epsilon greedy callbacks */
236  SCIP_BANDIT** epsgreedy, /**< pointer to store the epsilon greedy bandit algorithm */
237  SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
238  SCIP_Real eps, /**< parameter to increase probability for exploration between all actions */
239  SCIP_Bool preferrecent, /**< should the weights be updated in an exponentially decaying way? */
240  SCIP_Real decayfactor, /**< the factor to reduce the weight of older observations if exponential decay is enabled */
241  int avglim, /**< nonnegative limit on observation number before the exponential decay starts,
242  * only relevant if exponential decay is enabled
243  */
244  int nactions, /**< the positive number of possible actions */
245  unsigned int initseed /**< initial random seed */
246  )
247 {
248  SCIP_BANDITDATA* banditdata;
249 
250  SCIP_ALLOC( BMSallocBlockMemory(blkmem, &banditdata) );
251  assert(banditdata != NULL);
252  assert(eps >= 0.0);
253 
254  SCIP_ALLOC( BMSallocBlockMemoryArray(blkmem, &banditdata->weights, nactions) );
255  SCIP_ALLOC( BMSallocBlockMemoryArray(blkmem, &banditdata->priorities, nactions) );
256  SCIP_ALLOC( BMSallocBlockMemoryArray(blkmem, &banditdata->sels, nactions) );
257  banditdata->eps = eps;
258  banditdata->nselections = 0;
259  banditdata->preferrecent = preferrecent;
260  banditdata->decayfactor = decayfactor;
261  banditdata->avglim = avglim;
262 
263  SCIP_CALL( SCIPbanditCreate(epsgreedy, vtable, blkmem, bufmem, priorities, nactions, initseed, banditdata) );
264 
265  return SCIP_OKAY;
266 }
267 
268 /** create and resets an epsilon greedy bandit algorithm */
270  SCIP* scip, /**< SCIP data structure */
271  SCIP_BANDIT** epsgreedy, /**< pointer to store the epsilon greedy bandit algorithm */
272  SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
273  SCIP_Real eps, /**< parameter to increase probability for exploration between all actions */
274  SCIP_Bool preferrecent, /**< should the weights be updated in an exponentially decaying way? */
275  SCIP_Real decayfactor, /**< the factor to reduce the weight of older observations if exponential decay is enabled */
276  int avglim, /**< nonnegative limit on observation number before the exponential decay starts,
277  * only relevant if exponential decay is enabled
278  */
279  int nactions, /**< the positive number of possible actions */
280  unsigned int initseed /**< initial seed for random number generation */
281  )
282 {
283  SCIP_BANDITVTABLE* vtable;
284  assert(scip != NULL);
285  assert(epsgreedy != NULL);
286 
287  vtable = SCIPfindBanditvtable(scip, BANDIT_NAME);
288  if( vtable == NULL )
289  {
290  SCIPerrorMessage("Could not find virtual function table for %s bandit algorithm\n", BANDIT_NAME);
291  return SCIP_INVALIDDATA;
292  }
293 
294  SCIP_CALL( SCIPbanditCreateEpsgreedy(SCIPblkmem(scip), SCIPbuffer(scip), vtable, epsgreedy,
295  priorities, eps, preferrecent, decayfactor, avglim, nactions, SCIPinitializeRandomSeed(scip, (int)(initseed % INT_MAX))) );
296 
297  return SCIP_OKAY;
298 }
299 
300 /** get weights array of epsilon greedy bandit algorithm */
302  SCIP_BANDIT* epsgreedy /**< epsilon greedy bandit algorithm */
303  )
304 {
305  SCIP_BANDITDATA* banditdata;
306  assert(epsgreedy != NULL);
307  banditdata = SCIPbanditGetData(epsgreedy);
308  assert(banditdata != NULL);
309 
310  return banditdata->weights;
311 }
312 
313 /** set epsilon parameter of epsilon greedy bandit algorithm */
315  SCIP_BANDIT* epsgreedy, /**< epsilon greedy bandit algorithm */
316  SCIP_Real eps /**< parameter to increase probability for exploration between all actions */
317  )
318 {
319  SCIP_BANDITDATA* banditdata;
320  assert(epsgreedy != NULL);
321  assert(eps >= 0);
322 
323  banditdata = SCIPbanditGetData(epsgreedy);
324 
325  banditdata->eps = eps;
326 }
327 
328 
329 /** creates the epsilon greedy bandit algorithm includes it in SCIP */
331  SCIP* scip /**< SCIP data structure */
332  )
333 {
334  SCIP_BANDITVTABLE* banditvtable;
335 
336  SCIP_CALL( SCIPincludeBanditvtable(scip, &banditvtable, BANDIT_NAME,
337  SCIPbanditFreeEpsgreedy, SCIPbanditSelectEpsgreedy, SCIPbanditUpdateEpsgreedy, SCIPbanditResetEpsgreedy) );
338 
339  return SCIP_OKAY;
340 }
SCIP_RETCODE SCIPcreateBanditEpsgreedy(SCIP *scip, SCIP_BANDIT **epsgreedy, SCIP_Real *priorities, SCIP_Real eps, SCIP_Bool preferrecent, SCIP_Real decayfactor, int avglim, int nactions, unsigned int initseed)
#define NULL
Definition: def.h:239
void SCIPsetEpsilonEpsgreedy(SCIP_BANDIT *epsgreedy, SCIP_Real eps)
public methods for memory management
internal methods for bandit algorithms
enum SCIP_Retcode SCIP_RETCODE
Definition: type_retcode.h:53
#define BANDIT_NAME
int SCIPrandomGetInt(SCIP_RANDNUMGEN *randnumgen, int minrandval, int maxrandval)
Definition: misc.c:9372
SCIP_BANDITDATA * SCIPbanditGetData(SCIP_BANDIT *bandit)
Definition: bandit.c:180
real eps
SCIP_VAR * w
Definition: circlepacking.c:58
BMS_BUFMEM * SCIPbuffer(SCIP *scip)
Definition: scip_mem.c:143
SCIP_DECL_BANDITSELECT(SCIPbanditSelectEpsgreedy)
#define SCIPerrorMessage
Definition: pub_message.h:45
SCIPInterval sqrt(const SCIPInterval &x)
BMS_BLKMEM * SCIPblkmem(SCIP *scip)
Definition: scip_mem.c:128
SCIP_BANDITVTABLE * SCIPfindBanditvtable(SCIP *scip, const char *name)
Definition: scip_bandit.c:64
SCIP_RETCODE SCIPbanditCreateEpsgreedy(BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_BANDITVTABLE *vtable, SCIP_BANDIT **epsgreedy, SCIP_Real *priorities, SCIP_Real eps, SCIP_Bool preferrecent, SCIP_Real decayfactor, int avglim, int nactions, unsigned int initseed)
#define SCIP_CALL(x)
Definition: def.h:351
void SCIPbanditSetData(SCIP_BANDIT *bandit, SCIP_BANDITDATA *banditdata)
Definition: bandit.c:190
#define BMSfreeBlockMemory(mem, ptr)
Definition: memory.h:446
public data structures and miscellaneous methods
#define SCIP_Bool
Definition: def.h:62
#define BMSallocBlockMemoryArray(mem, ptr, num)
Definition: memory.h:435
SCIP_RETCODE SCIPincludeBanditvtable(SCIP *scip, SCIP_BANDITVTABLE **banditvtable, const char *name, SCIP_DECL_BANDITFREE((*banditfree)), SCIP_DECL_BANDITSELECT((*banditselect)), SCIP_DECL_BANDITUPDATE((*banditupdate)), SCIP_DECL_BANDITRESET((*banditreset)))
Definition: scip_bandit.c:32
#define BMSfreeBlockMemoryArray(mem, ptr, num)
Definition: memory.h:448
SCIP_DECL_BANDITRESET(SCIPbanditResetEpsgreedy)
public methods for bandit algorithms
SCIP_Real SCIPrandomGetReal(SCIP_RANDNUMGEN *randnumgen, SCIP_Real minrandval, SCIP_Real maxrandval)
Definition: misc.c:9394
public methods for bandit algorithms
struct SCIP_BanditData SCIP_BANDITDATA
Definition: type_bandit.h:47
public methods for random numbers
SCIP_DECL_BANDITFREE(SCIPbanditFreeEpsgreedy)
#define EPSGREEDY_SMALL
SCIP_DECL_BANDITUPDATE(SCIPbanditUpdateEpsgreedy)
public methods for message output
#define SCIP_Real
Definition: def.h:150
SCIP_Real * SCIPgetWeightsEpsgreedy(SCIP_BANDIT *epsgreedy)
int SCIPbanditGetNActions(SCIP_BANDIT *bandit)
Definition: bandit.c:293
SCIP_RETCODE SCIPincludeBanditvtableEpsgreedy(SCIP *scip)
internal methods for epsilon greedy bandit selection
SCIP_RANDNUMGEN * SCIPbanditGetRandnumgen(SCIP_BANDIT *bandit)
Definition: bandit.c:283
#define BMSallocBlockMemory(mem, ptr)
Definition: memory.h:433
#define BMSclearMemoryArray(ptr, num)
Definition: memory.h:112
unsigned int SCIPinitializeRandomSeed(SCIP *scip, unsigned int initialseedvalue)
struct BMS_BlkMem BMS_BLKMEM
Definition: memory.h:419
#define SCIP_ALLOC(x)
Definition: def.h:362
SCIP_RETCODE SCIPbanditCreate(SCIP_BANDIT **bandit, SCIP_BANDITVTABLE *banditvtable, BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_Real *priorities, int nactions, unsigned int initseed, SCIP_BANDITDATA *banditdata)
Definition: bandit.c:32