10 #include "uct/internal.h"
13 /* This implements the UCB1-TUNED policy. */
15 struct ucb1_policy_tuned
{
16 /* This is what the Modification of UCT with Patterns in Monte Carlo Go
17 * paper calls 'p'. Original UCB has this on 2, but this seems to
18 * produce way too wide searches; reduce this to get deeper and
19 * narrower readouts - try 0.2. */
25 struct tree_node
*ucb1_choose(struct uct_policy
*p
, struct tree_node
*node
, struct board
*b
, enum stone color
);
28 ucb1tuned_descend(struct uct_policy
*p
, struct tree
*tree
, struct tree_node
*node
, int parity
, bool allow_pass
)
30 struct ucb1_policy_tuned
*b
= p
->data
;
31 float xpl
= log(node
->u
.playouts
) * b
->explore_p
;
33 struct tree_node
*nbest
= node
->children
;
34 float best_urgency
= -9999;
35 for (struct tree_node
*ni
= node
->children
; ni
; ni
= ni
->sibling
) {
36 /* Do not consider passing early. */
37 if (likely(!allow_pass
) && unlikely(is_pass(ni
->coord
)))
39 float xpl_loc
= (ni
->u
.value
- ni
->u
.value
* ni
->u
.value
);
40 if (tree_parity(tree
, parity
) < 0) xpl_loc
= 1 - xpl_loc
;
41 xpl_loc
+= sqrt(xpl
/ ni
->u
.playouts
);
42 if (xpl_loc
> 1.0/4) xpl_loc
= 1.0/4;
43 float urgency
= tree_node_get_value(tree
, ni
, u
, parity
) + sqrt(xpl
* xpl_loc
/ ni
->u
.playouts
);
44 if (urgency
> best_urgency
) {
45 best_urgency
= urgency
;
52 void ucb1_update(struct uct_policy
*p
, struct tree
*tree
, struct tree_node
*node
, enum stone node_color
, enum stone player_color
, struct playout_amafmap
*map
, int result
);
56 policy_ucb1tuned_init(struct uct
*u
, char *arg
)
58 struct uct_policy
*p
= calloc(1, sizeof(*p
));
59 struct ucb1_policy_tuned
*b
= calloc(1, sizeof(*b
));
62 p
->descend
= ucb1tuned_descend
;
63 p
->choose
= ucb1_choose
;
64 p
->update
= ucb1_update
;
70 char *optspec
, *next
= arg
;
73 next
+= strcspn(next
, ":");
74 if (*next
) { *next
++ = 0; } else { *next
= 0; }
76 char *optname
= optspec
;
77 char *optval
= strchr(optspec
, '=');
78 if (optval
) *optval
++ = 0;
80 if (!strcasecmp(optname
, "explore_p")) {
81 b
->explore_p
= atof(optval
);
83 fprintf(stderr
, "ucb1tuned: Invalid policy argument %s or missing value\n", optname
);