time_left(): Allow time_left==0, stones_left==0 from strange GTP peers
[pachi/t.git] / stats.h
blobbf8c5b55b1487fbf7b8401c4ccf7a2514929e866
1 #ifndef ZZGO_STATS_H
2 #define ZZGO_STATS_H
4 /* Move statistics; we track how good value each move has. */
5 /* These operations are supposed to be atomic - reasonably
6 * safe to perform by multiple threads at once on the same stats.
7 * What this means in practice is that perhaps the value will get
8 * slightly wrong, but not drastically corrupted. */
10 struct move_stats {
11 int playouts; // # of playouts
12 float value; // BLACK wins/playouts
15 /* Add a result to the stats. */
16 static void stats_add_result(struct move_stats *s, float result, int playouts);
18 /* Remove a result from the stats. */
19 static void stats_rm_result(struct move_stats *s, float result, int playouts);
21 /* Merge two stats together. THIS IS NOT ATOMIC! */
22 static void stats_merge(struct move_stats *dest, struct move_stats *src);
24 /* Reverse stats parity. */
25 static void stats_reverse_parity(struct move_stats *s);
27 /* Temper value based on parent value in specified way - the value should be
28 * usable standalone then, representing an improvement against parent value. */
29 static float stats_temper_value(float val, float pval, int mode);
32 /* We actually do the atomicity in a pretty hackish way - we simply
33 * rely on the fact that int,float operations should be atomic with
34 * reasonable compilers (gcc) on reasonable architectures (i386,
35 * x86_64). */
36 /* There is a write order dependency - when we bump the playouts,
37 * our value must be already correct, otherwise the node will receive
38 * invalid evaluation if that's made in parallel, esp. when
39 * current s->playouts is zero. */
41 static inline void
42 stats_add_result(struct move_stats *s, float result, int playouts)
44 int s_playouts = s->playouts;
45 float s_value = s->value;
46 /* Force the load, another thread can work on the
47 * values in parallel. */
48 __sync_synchronize(); /* full memory barrier */
50 s_playouts += playouts;
51 s_value += (result - s_value) * playouts / s_playouts;
53 /* We rely on the fact that these two assignments are atomic. */
54 s->value = s_value;
55 __sync_synchronize(); /* full memory barrier */
56 s->playouts = s_playouts;
59 static inline void
60 stats_rm_result(struct move_stats *s, float result, int playouts)
62 if (s->playouts > playouts) {
63 int s_playouts = s->playouts;
64 float s_value = s->value;
65 /* Force the load, another thread can work on the
66 * values in parallel. */
67 __sync_synchronize(); /* full memory barrier */
69 s_playouts -= playouts;
70 s_value += (s_value - result) * playouts / s_playouts;
72 /* We rely on the fact that these two assignments are atomic. */
73 s->value = s_value;
74 __sync_synchronize(); /* full memory barrier */
75 s->playouts = s_playouts;
77 } else {
78 /* We don't touch the value, since in parallel, another
79 * thread can be adding a result, thus raising the
80 * playouts count after we zero the value. Instead,
81 * leaving the value as is with zero playouts should
82 * not break anything. */
83 s->playouts = 0;
87 static inline void
88 stats_merge(struct move_stats *dest, struct move_stats *src)
90 /* In a sense, this is non-atomic version of stats_add_result(). */
91 if (src->playouts) {
92 dest->playouts += src->playouts;
93 dest->value += (src->value - dest->value) * src->playouts / dest->playouts;
97 static inline void
98 stats_reverse_parity(struct move_stats *s)
100 s->value = 1 - s->value;
103 static inline float
104 stats_temper_value(float val, float pval, int mode)
106 float tval = val;
107 float expd = val - pval;
108 switch (mode) {
109 case 1: /* no tempering */
110 tval = val;
111 break;
112 case 2: /* 0.5+(result-expected)/2 */
113 tval = 0.5 + expd / 2;
114 break;
115 case 3: { /* 0.5+bzz((result-expected)^2) */
116 float ntval = expd * expd;
117 /* val = 1 pval = 0.8 : ntval = 0.04 tval = 0.54
118 * val = 1 pval = 0.6 : ntval = 0.16 tval = 0.66
119 * val = 1 pval = 0.3 : ntval = 0.49 tval = 0.99
120 * val = 1 pval = 0.1 : ntval = 0.81 tval = 1.31 */
121 tval = 0.5 + (val > 0.5 ? 1 : -1) * ntval;
122 break; }
123 default: assert(0); break;
125 return tval;
128 #endif