UCT: Keep prior moves in separate stats
[pachi/peepo.git] / uct / policy / ucb1amaf.c
blobeef4fdd35ca8bfc80b2f6af04cd5ad782e1e7908
1 #include <assert.h>
2 #include <math.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
7 #include "board.h"
8 #include "debug.h"
9 #include "move.h"
10 #include "uct/internal.h"
11 #include "uct/tree.h"
13 /* This implements the UCB1 policy with an extra AMAF heuristics. */
15 struct ucb1_policy {
16 /* This is what the Modification of UCT with Patterns in Monte Carlo Go
17 * paper calls 'p'. Original UCB has this on 2, but this seems to
18 * produce way too wide searches; reduce this to get deeper and
19 * narrower readouts - try 0.2. */
20 float explore_p;
21 /* First Play Urgency - if set to less than infinity (the MoGo paper
22 * above reports 1.0 as the best), new branches are explored only
23 * if none of the existing ones has higher urgency than fpu. */
24 float fpu;
25 /* Equivalent experience for prior knowledge. MoGo paper recommends
26 * 50 playouts per source. */
27 int eqex;
31 struct tree_node *ucb1_choose(struct uct_policy *p, struct tree_node *node, struct board *b, enum stone color);
33 struct tree_node *ucb1_descend(struct uct_policy *p, struct tree *tree, struct tree_node *node, int parity, bool allow_pass);
35 void ucb1_prior(struct uct_policy *p, struct tree *tree, struct tree_node *node, struct board *b, enum stone color, int parity);
37 static void
38 update_node(struct uct_policy *p, struct tree_node *node, int result)
40 node->u.playouts++;
41 node->u.wins += result;
42 node->u.value = ((float)node->u.wins + (float)node->prior.wins) / (node->u.playouts + node->prior.playouts);
45 void
46 ucb1amaf_update(struct uct_policy *p, struct tree_node *node, enum stone color, struct playout_amafmap *map, int result)
48 for (; node; node = node->parent, color = stone_other(color)) {
49 /* Account for root node. */
50 /* But we do the update everytime, since it simply seems
51 * to make more sense to give the main branch more weight
52 * than other orders of play. */
53 update_node(p, node, result);
54 for (struct tree_node *ni = node->children; ni; ni = ni->sibling) {
55 if (is_pass(ni->coord) || map->map[ni->coord] != color)
56 continue;
57 update_node(p, node, result);
63 struct uct_policy *
64 policy_ucb1amaf_init(struct uct *u, char *arg)
66 struct uct_policy *p = calloc(1, sizeof(*p));
67 struct ucb1_policy *b = calloc(1, sizeof(*b));
68 p->uct = u;
69 p->data = b;
70 p->descend = ucb1_descend;
71 p->choose = ucb1_choose;
72 p->update = ucb1amaf_update;
73 p->wants_amaf = true;
75 b->explore_p = 0.2;
76 b->fpu = INFINITY;
78 if (arg) {
79 char *optspec, *next = arg;
80 while (*next) {
81 optspec = next;
82 next += strcspn(next, ":");
83 if (*next) { *next++ = 0; } else { *next = 0; }
85 char *optname = optspec;
86 char *optval = strchr(optspec, '=');
87 if (optval) *optval++ = 0;
89 if (!strcasecmp(optname, "explore_p")) {
90 b->explore_p = atof(optval);
91 } else if (!strcasecmp(optname, "prior")) {
92 b->eqex = optval ? atoi(optval) : 50;
93 if (b->eqex)
94 p->prior = ucb1_prior;
95 } else if (!strcasecmp(optname, "fpu") && optval) {
96 b->fpu = atof(optval);
97 } else {
98 fprintf(stderr, "ucb1: Invalid policy argument %s or missing value\n", optname);
103 return p;