10 #include "uct/internal.h"
13 /* This implements the UCB1 policy with an extra AMAF heuristics. */
16 /* This is what the Modification of UCT with Patterns in Monte Carlo Go
17 * paper calls 'p'. Original UCB has this on 2, but this seems to
18 * produce way too wide searches; reduce this to get deeper and
19 * narrower readouts - try 0.2. */
21 /* First Play Urgency - if set to less than infinity (the MoGo paper
22 * above reports 1.0 as the best), new branches are explored only
23 * if none of the existing ones has higher urgency than fpu. */
25 /* Equivalent experience for prior knowledge. MoGo paper recommends
26 * 50 playouts per source. */
31 struct tree_node
*ucb1_choose(struct uct_policy
*p
, struct tree_node
*node
, struct board
*b
, enum stone color
);
33 struct tree_node
*ucb1_descend(struct uct_policy
*p
, struct tree
*tree
, struct tree_node
*node
, int parity
, bool allow_pass
);
35 void ucb1_prior(struct uct_policy
*p
, struct tree
*tree
, struct tree_node
*node
, struct board
*b
, enum stone color
, int parity
);
38 update_node(struct uct_policy
*p
, struct tree_node
*node
, int result
)
41 node
->u
.wins
+= result
;
42 tree_update_node_value(node
);
45 update_node_amaf(struct uct_policy
*p
, struct tree_node
*node
, int result
)
47 node
->amaf
.playouts
++;
48 node
->amaf
.wins
+= result
;
49 tree_update_node_value(node
);
53 ucb1amaf_update(struct uct_policy
*p
, struct tree_node
*node
, enum stone color
, struct playout_amafmap
*map
, int result
)
55 for (; node
; node
= node
->parent
, color
= stone_other(color
)) {
56 /* Account for root node. */
57 /* But we do the update everytime, since it simply seems
58 * to make more sense to give the main branch more weight
59 * than other orders of play. */
60 update_node(p
, node
, result
);
61 for (struct tree_node
*ni
= node
->children
; ni
; ni
= ni
->sibling
) {
62 if (is_pass(ni
->coord
) || map
->map
[ni
->coord
] != color
)
64 update_node_amaf(p
, node
, result
);
71 policy_ucb1amaf_init(struct uct
*u
, char *arg
)
73 struct uct_policy
*p
= calloc(1, sizeof(*p
));
74 struct ucb1_policy
*b
= calloc(1, sizeof(*b
));
77 p
->descend
= ucb1_descend
;
78 p
->choose
= ucb1_choose
;
79 p
->update
= ucb1amaf_update
;
86 char *optspec
, *next
= arg
;
89 next
+= strcspn(next
, ":");
90 if (*next
) { *next
++ = 0; } else { *next
= 0; }
92 char *optname
= optspec
;
93 char *optval
= strchr(optspec
, '=');
94 if (optval
) *optval
++ = 0;
96 if (!strcasecmp(optname
, "explore_p")) {
97 b
->explore_p
= atof(optval
);
98 } else if (!strcasecmp(optname
, "prior")) {
99 b
->eqex
= optval
? atoi(optval
) : 50;
101 p
->prior
= ucb1_prior
;
102 } else if (!strcasecmp(optname
, "fpu") && optval
) {
103 b
->fpu
= atof(optval
);
105 fprintf(stderr
, "ucb1: Invalid policy argument %s or missing value\n", optname
);