stats.h

   1 #ifndef ZZGO_STATS_H
   2 #define ZZGO_STATS_H
   3
   4 /* Move statistics; we track how good value each move has. */
   5 /* These operations are supposed to be atomic - reasonably
   6  * safe to perform by multiple threads at once on the same stats.
   7  * What this means in practice is that perhaps the value will get
   8  * slightly wrong, but not drastically corrupted. */
   9
  10 struct move_stats {
  11         int playouts; // # of playouts
  12         float value; // BLACK wins/playouts
  13 };
  14
  15 /* Add a result to the stats. */
  16 static void stats_add_result(struct move_stats *s, float result, int playouts);
  17
  18 /* Remove a result from the stats. */
  19 static void stats_rm_result(struct move_stats *s, float result, int playouts);
  20
  21 /* Merge two stats together. THIS IS NOT ATOMIC! */
  22 static void stats_merge(struct move_stats *dest, struct move_stats *src);
  23
  24 /* Reverse stats parity. */
  25 static void stats_reverse_parity(struct move_stats *s);
  26
  27 /* Temper value based on parent value in specified way - the value should be
  28  * usable standalone then, representing an improvement against parent value. */
  29 static float stats_temper_value(float val, float pval, int mode);
  30
  31
  32 /* We actually do the atomicity in a pretty hackish way - we simply
  33  * rely on the fact that int,float operations should be atomic with
  34  * reasonable compilers (gcc) on reasonable architectures (i386,
  35  * x86_64). */
  36 /* There is a write order dependency - when we bump the playouts,
  37  * our value must be already correct, otherwise the node will receive
  38  * invalid evaluation if that's made in parallel, esp. when
  39  * current s->playouts is zero. */
  40
  41 static inline void
  42 stats_add_result(struct move_stats *s, float result, int playouts)
  43 {
  44         int s_playouts = s->playouts;
  45         float s_value = s->value;
  46         /* Force the load, another thread can work on the
  47          * values in parallel. */
  48         __sync_synchronize(); /* full memory barrier */
  49
  50         s_playouts += playouts;
  51         s_value += (result - s_value) * playouts / s_playouts;
  52
  53         /* We rely on the fact that these two assignments are atomic. */
  54         s->value = s_value;
  55         __sync_synchronize(); /* full memory barrier */
  56         s->playouts = s_playouts;
  57 }
  58
  59 static inline void
  60 stats_rm_result(struct move_stats *s, float result, int playouts)
  61 {
  62         if (s->playouts > playouts) {
  63                 int s_playouts = s->playouts;
  64                 float s_value = s->value;
  65                 /* Force the load, another thread can work on the
  66                  * values in parallel. */
  67                 __sync_synchronize(); /* full memory barrier */
  68
  69                 s_playouts -= playouts;
  70                 s_value += (s_value - result) * playouts / s_playouts;
  71
  72                 /* We rely on the fact that these two assignments are atomic. */
  73                 s->value = s_value;
  74                 __sync_synchronize(); /* full memory barrier */
  75                 s->playouts = s_playouts;
  76
  77         } else {
  78                 /* We don't touch the value, since in parallel, another
  79                  * thread can be adding a result, thus raising the
  80                  * playouts count after we zero the value. Instead,
  81                  * leaving the value as is with zero playouts should
  82                  * not break anything. */
  83                 s->playouts = 0;
  84         }
  85 }
  86
  87 static inline void
  88 stats_merge(struct move_stats *dest, struct move_stats *src)
  89 {
  90         /* In a sense, this is non-atomic version of stats_add_result(). */
  91         if (src->playouts) {
  92                 dest->playouts += src->playouts;
  93                 dest->value += (src->value - dest->value) * src->playouts / dest->playouts;
  94         }
  95 }
  96
  97 static inline void
  98 stats_reverse_parity(struct move_stats *s)
  99 {
 100         s->value = 1 - s->value;
 101 }
 102
 103 static inline float
 104 stats_temper_value(float val, float pval, int mode)
 105 {
 106         float tval = val;
 107         float expd = val - pval;
 108         switch (mode) {
 109                 case 1: /* no tempering */
 110                         tval = val;
 111                         break;
 112                 case 2: /* 0.5+(result-expected)/2 */
 113                         tval = 0.5 + expd / 2;
 114                         break;
 115                 case 3: { /* 0.5+bzz((result-expected)^2) */
 116                         float ntval = expd * expd;
 117                         /* val = 1 pval = 0.8 : ntval = 0.04 tval = 0.54
 118                          * val = 1 pval = 0.6 : ntval = 0.16 tval = 0.66
 119                          * val = 1 pval = 0.3 : ntval = 0.49 tval = 0.99
 120                          * val = 1 pval = 0.1 : ntval = 0.81 tval = 1.31 */
 121                         tval = 0.5 + (val > 0.5 ? 1 : -1) * ntval;
 122                         break; }
 123                 default: assert(0); break;
 124         }
 125         return tval;
 126 }
 127
 128 #endif