10 #include "uct/internal.h"
13 /* This implements the UCB1-TUNED policy. */
16 /* This is what the Modification of UCT with Patterns in Monte Carlo Go
17 * paper calls 'p'. Original UCB has this on 2, but this seems to
18 * produce way too wide searches; reduce this to get deeper and
19 * narrower readouts - try 0.2. */
24 static struct tree_node
*
25 ucb1_choose(struct uct_policy
*p
, struct tree_node
*node
, struct board
*b
, enum stone color
)
27 struct tree_node
*nbest
= NULL
;
28 for (struct tree_node
*ni
= node
->children
; ni
; ni
= ni
->sibling
)
29 // we compare playouts and choose the best-explored
30 // child; comparing values is more brittle
31 if (!nbest
|| ni
->playouts
> nbest
->playouts
) {
32 /* Play pass only if we can afford scoring */
33 if (is_pass(ni
->coord
)) {
34 float score
= board_official_score(b
);
37 //fprintf(stderr, "%d score %f\n", color, score);
47 static struct tree_node
*
48 ucb1_descend(struct uct_policy
*p
, struct tree
*tree
, struct tree_node
*node
, int parity
, bool allow_pass
)
50 struct ucb1_policy
*b
= p
->data
;
51 float xpl
= log(node
->playouts
) * b
->explore_p
;
53 struct tree_node
*nbest
= node
->children
;
54 float best_urgency
= -9999;
55 for (struct tree_node
*ni
= node
->children
; ni
; ni
= ni
->sibling
) {
56 /* Do not consider passing early. */
57 if (likely(!allow_pass
) && unlikely(is_pass(ni
->coord
)))
59 float xpl_loc
= (ni
->value
- ni
->value
* ni
->value
);
60 if (parity
< 0) xpl_loc
= 1 - xpl_loc
;
61 xpl_loc
+= sqrt(xpl
/ ni
->playouts
);
62 if (xpl_loc
> 1.0/4) xpl_loc
= 1.0/4;
63 float urgency
= ni
->value
* parity
+ sqrt(xpl
* xpl_loc
/ ni
->playouts
);
64 if (urgency
> best_urgency
) {
65 best_urgency
= urgency
;
74 policy_ucb1tuned_init(struct uct
*u
, char *arg
)
76 struct uct_policy
*p
= calloc(1, sizeof(*p
));
77 struct ucb1_policy
*b
= calloc(1, sizeof(*b
));
80 p
->descend
= ucb1_descend
;
81 p
->choose
= ucb1_choose
;
86 char *optspec
, *next
= arg
;
89 next
+= strcspn(next
, "+");
90 if (*next
) { *next
++ = 0; } else { *next
= 0; }
92 char *optname
= optspec
;
93 char *optval
= strchr(optspec
, '=');
94 if (optval
) *optval
++ = 0;
96 if (!strcasecmp(optname
, "explore_p")) {
97 b
->explore_p
= atof(optval
);
99 fprintf(stderr
, "ucb1tuned: Invalid policy argument %s or missing value\n", optname
);