10 #include "uct/internal.h"
13 /* This implements the UCB1 policy with an extra AMAF heuristics. */
16 /* This is what the Modification of UCT with Patterns in Monte Carlo Go
17 * paper calls 'p'. Original UCB has this on 2, but this seems to
18 * produce way too wide searches; reduce this to get deeper and
19 * narrower readouts - try 0.2. */
21 /* First Play Urgency - if set to less than infinity (the MoGo paper
22 * above reports 1.0 as the best), new branches are explored only
23 * if none of the existing ones has higher urgency than fpu. */
25 /* Equivalent experience for prior knowledge. MoGo paper recommends
26 * 50 playouts per source. */
31 struct tree_node
*ucb1_choose(struct uct_policy
*p
, struct tree_node
*node
, struct board
*b
, enum stone color
);
33 struct tree_node
*ucb1_descend(struct uct_policy
*p
, struct tree
*tree
, struct tree_node
*node
, int parity
, bool allow_pass
);
35 void ucb1_prior(struct uct_policy
*p
, struct tree
*tree
, struct tree_node
*node
, struct board
*b
, enum stone color
, int parity
);
38 update_node(struct uct_policy
*p
, struct tree_node
*node
, int result
)
41 node
->u
.wins
+= result
;
42 node
->u
.value
= ((float)node
->u
.wins
+ (float)node
->prior
.wins
) / (node
->u
.playouts
+ node
->prior
.playouts
);
46 ucb1amaf_update(struct uct_policy
*p
, struct tree_node
*node
, enum stone color
, struct playout_amafmap
*map
, int result
)
48 for (; node
; node
= node
->parent
, color
= stone_other(color
)) {
49 /* Account for root node. */
50 /* But we do the update everytime, since it simply seems
51 * to make more sense to give the main branch more weight
52 * than other orders of play. */
53 update_node(p
, node
, result
);
54 for (struct tree_node
*ni
= node
->children
; ni
; ni
= ni
->sibling
) {
55 if (is_pass(ni
->coord
) || map
->map
[ni
->coord
] != color
)
57 update_node(p
, node
, result
);
64 policy_ucb1amaf_init(struct uct
*u
, char *arg
)
66 struct uct_policy
*p
= calloc(1, sizeof(*p
));
67 struct ucb1_policy
*b
= calloc(1, sizeof(*b
));
70 p
->descend
= ucb1_descend
;
71 p
->choose
= ucb1_choose
;
72 p
->update
= ucb1amaf_update
;
79 char *optspec
, *next
= arg
;
82 next
+= strcspn(next
, ":");
83 if (*next
) { *next
++ = 0; } else { *next
= 0; }
85 char *optname
= optspec
;
86 char *optval
= strchr(optspec
, '=');
87 if (optval
) *optval
++ = 0;
89 if (!strcasecmp(optname
, "explore_p")) {
90 b
->explore_p
= atof(optval
);
91 } else if (!strcasecmp(optname
, "prior")) {
92 b
->eqex
= optval
? atoi(optval
) : 50;
94 p
->prior
= ucb1_prior
;
95 } else if (!strcasecmp(optname
, "fpu") && optval
) {
96 b
->fpu
= atof(optval
);
98 fprintf(stderr
, "ucb1: Invalid policy argument %s or missing value\n", optname
);