uctp_generic_choose(): call expensive uct_pass_is_safe() only if pass is the best...
[pachi.git] / uct / policy / generic.c
blob67844211424373cac2f7921efc3e2e0959c494a8
1 #include <assert.h>
2 #include <math.h>
3 #include <stdio.h>
4 #include <stdlib.h>
6 #include "board.h"
7 #include "debug.h"
8 #include "move.h"
9 #include "tactics/util.h"
10 #include "random.h"
11 #include "uct/internal.h"
12 #include "uct/tree.h"
13 #include "uct/policy/generic.h"
15 struct tree_node *
16 uctp_generic_choose(struct uct_policy *p, struct tree_node *node, struct board *b, enum stone color, coord_t exclude)
18 struct tree_node *nbest = node->children;
19 if (!nbest) return NULL;
20 struct tree_node *nbest2 = NULL;
22 /* This function is called while the tree is updated by other threads.
23 * We rely on node->children being set only after the node has been fully expanded. */
24 for (struct tree_node *ni = nbest->sibling; ni; ni = ni->sibling)
25 // we compare playouts and choose the best-explored
26 // child; comparing values is more brittle
27 if (ni->u.playouts > nbest->u.playouts) {
28 if (node_coord(ni) == exclude)
29 continue;
30 if (ni->hints & TREE_HINT_INVALID)
31 continue;
32 nbest2 = nbest;
33 nbest = ni;
35 /* Play pass only if we can afford scoring. Call expensive uct_pass_is_safe() only if
36 * pass is indeed the best move. */
37 if (is_pass(node_coord(nbest)) && !uct_pass_is_safe(p->uct, b, color, p->uct->pass_all_alive))
38 return nbest2;
39 return nbest;
42 /* Return the node with best value instead of best explored. We must use the heuristic
43 * value (using prior and possibly rave), because the raw value is meaningless for
44 * nodes evaluated rarely.
45 * This function is called while the tree is updated by other threads */
46 void
47 uctp_generic_winner(struct uct_policy *p, struct tree *tree, struct uct_descent *descent)
49 if (!p->evaluate)
50 return;
51 bool allow_pass = false; /* At worst forces some extra playouts at the end */
52 int parity = tree_node_parity(tree, descent->node);
54 uctd_try_node_children(tree, descent, allow_pass, parity, p->uct->tenuki_d, di, urgency) {
55 urgency = p->evaluate(p, tree, &di, parity);
56 } uctd_set_best_child(di, urgency);
58 uctd_get_best_child(descent);