Add function uctp_generic_winner, needed for time control.
[pachi.git] / uct / policy / generic.c
blob8a53dc994c0707229ff21edbe7753a84f7f420fa
1 #include <assert.h>
2 #include <math.h>
3 #include <stdio.h>
4 #include <stdlib.h>
6 #include "board.h"
7 #include "debug.h"
8 #include "move.h"
9 #include "tactics.h"
10 #include "random.h"
11 #include "uct/internal.h"
12 #include "uct/tree.h"
13 #include "uct/policy/generic.h"
15 struct tree_node *
16 uctp_generic_choose(struct uct_policy *p, struct tree_node *node, struct board *b, enum stone color)
18 struct tree_node *nbest = NULL;
19 /* This function is called while the tree is updated by other threads.
20 * We rely on node->children being set only after the node has been fully expanded. */
21 for (struct tree_node *ni = node->children; ni; ni = ni->sibling)
22 // we compare playouts and choose the best-explored
23 // child; comparing values is more brittle
24 if (!nbest || ni->u.playouts > nbest->u.playouts) {
25 /* Play pass only if we can afford scoring */
26 if (is_pass(ni->coord) && !uct_pass_is_safe(p->uct, b, color, p->uct->pass_all_alive))
27 continue;
28 nbest = ni;
30 return nbest;
33 /* Return the node with best value instead of best explored. We must use the heuristic
34 * value (using prior and possibly rave), because the raw value is meaningless for
35 * nodes evaluated rarely.
36 * This function is called while the tree is updated by other threads */
37 struct tree_node *
38 uctp_generic_winner(struct uct_policy *p, struct tree *tree, struct tree_node *node)
40 if (!p->evaluate)
41 return NULL;
42 bool allow_pass = false; /* At worst forces some extra playouts at the end */
43 void *state; /* TODO: remove this unused parameter. */
44 int parity = ((node->depth ^ tree->root->depth) & 1) ? -1 : 1;
46 uctd_try_node_children(node, allow_pass, ni, urgency) {
47 urgency = p->evaluate(p, state, tree, ni, parity);
48 } uctd_set_best_child(ni, urgency);
50 return uctd_get_best_child();