UCT: Separate amaf movestats
[pachi.git] / uct / policy / ucb1amaf.c
blobfcb08d63c58ed069f3b14e4e3a762aeb779e41ab
1 #include <assert.h>
2 #include <math.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
7 #include "board.h"
8 #include "debug.h"
9 #include "move.h"
10 #include "uct/internal.h"
11 #include "uct/tree.h"
13 /* This implements the UCB1 policy with an extra AMAF heuristics. */
15 struct ucb1_policy {
16 /* This is what the Modification of UCT with Patterns in Monte Carlo Go
17 * paper calls 'p'. Original UCB has this on 2, but this seems to
18 * produce way too wide searches; reduce this to get deeper and
19 * narrower readouts - try 0.2. */
20 float explore_p;
21 /* First Play Urgency - if set to less than infinity (the MoGo paper
22 * above reports 1.0 as the best), new branches are explored only
23 * if none of the existing ones has higher urgency than fpu. */
24 float fpu;
25 /* Equivalent experience for prior knowledge. MoGo paper recommends
26 * 50 playouts per source. */
27 int eqex;
31 struct tree_node *ucb1_choose(struct uct_policy *p, struct tree_node *node, struct board *b, enum stone color);
33 struct tree_node *ucb1_descend(struct uct_policy *p, struct tree *tree, struct tree_node *node, int parity, bool allow_pass);
35 void ucb1_prior(struct uct_policy *p, struct tree *tree, struct tree_node *node, struct board *b, enum stone color, int parity);
37 static void
38 update_node(struct uct_policy *p, struct tree_node *node, int result)
40 node->u.playouts++;
41 node->u.wins += result;
42 tree_update_node_value(node);
44 static void
45 update_node_amaf(struct uct_policy *p, struct tree_node *node, int result)
47 node->amaf.playouts++;
48 node->amaf.wins += result;
49 tree_update_node_value(node);
52 void
53 ucb1amaf_update(struct uct_policy *p, struct tree_node *node, enum stone color, struct playout_amafmap *map, int result)
55 for (; node; node = node->parent, color = stone_other(color)) {
56 /* Account for root node. */
57 /* But we do the update everytime, since it simply seems
58 * to make more sense to give the main branch more weight
59 * than other orders of play. */
60 update_node(p, node, result);
61 for (struct tree_node *ni = node->children; ni; ni = ni->sibling) {
62 if (is_pass(ni->coord) || map->map[ni->coord] != color)
63 continue;
64 update_node_amaf(p, node, result);
70 struct uct_policy *
71 policy_ucb1amaf_init(struct uct *u, char *arg)
73 struct uct_policy *p = calloc(1, sizeof(*p));
74 struct ucb1_policy *b = calloc(1, sizeof(*b));
75 p->uct = u;
76 p->data = b;
77 p->descend = ucb1_descend;
78 p->choose = ucb1_choose;
79 p->update = ucb1amaf_update;
80 p->wants_amaf = true;
82 b->explore_p = 0.2;
83 b->fpu = INFINITY;
85 if (arg) {
86 char *optspec, *next = arg;
87 while (*next) {
88 optspec = next;
89 next += strcspn(next, ":");
90 if (*next) { *next++ = 0; } else { *next = 0; }
92 char *optname = optspec;
93 char *optval = strchr(optspec, '=');
94 if (optval) *optval++ = 0;
96 if (!strcasecmp(optname, "explore_p")) {
97 b->explore_p = atof(optval);
98 } else if (!strcasecmp(optname, "prior")) {
99 b->eqex = optval ? atoi(optval) : 50;
100 if (b->eqex)
101 p->prior = ucb1_prior;
102 } else if (!strcasecmp(optname, "fpu") && optval) {
103 b->fpu = atof(optval);
104 } else {
105 fprintf(stderr, "ucb1: Invalid policy argument %s or missing value\n", optname);
110 return p;