tree_update_node_rvalue(): Introduce; honor amaf_prior setting in _value()
[pachi.git] / uct / prior.c
blob737fc4b2da63143bd18762fa7cef1a069d86fc05
1 #include <assert.h>
2 #include <math.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
7 #include "board.h"
8 #include "debug.h"
9 #include "move.h"
10 #include "random.h"
11 #include "uct/internal.h"
12 #include "uct/prior.h"
13 #include "uct/tree.h"
15 /* Applying heuristic values to the tree nodes, skewing the reading in
16 * most interesting directions. */
19 void
20 uct_prior(struct uct *u, struct tree *tree, struct tree_node *node,
21 struct board *b, enum stone color, int parity)
23 /* Initialization of UCT values based on prior knowledge */
25 /* Q_{even} */
26 /* This may be dubious for normal UCB1 but is essential for
27 * reading stability of RAVE, it appears. */
28 if (u->even_eqex) {
29 node->prior.playouts += u->even_eqex;
30 node->prior.wins += u->even_eqex / 2;
33 /* Discourage playing into our own eyes. However, we cannot
34 * completely prohibit it:
35 * ######
36 * ...XX.
37 * XOOOXX
38 * X.OOOO
39 * .XXXX. */
40 if (board_is_one_point_eye(b, &node->coord, color)) {
41 node->prior.playouts += u->eqex;
42 node->prior.wins += tree_parity(tree, parity) > 0 ? 0 : u->eqex;
45 /* Q_{grandparent} */
46 if (u->gp_eqex && node->parent && node->parent->parent && node->parent->parent->parent) {
47 struct tree_node *gpp = node->parent->parent->parent;
48 for (struct tree_node *ni = gpp->children; ni; ni = ni->sibling) {
49 /* Be careful not to emphasize too random results. */
50 if (ni->coord == node->coord && ni->u.playouts > u->gp_eqex) {
51 node->prior.playouts += u->gp_eqex;
52 node->prior.wins += u->gp_eqex * ni->u.wins / ni->u.playouts;
53 node->hints |= 1;
58 /* Q_{playout-policy} */
59 if (u->policy_eqex) {
60 int assess = 0;
61 struct playout_policy *playout = u->playout;
62 if (playout->assess) {
63 struct move m = { node->coord, color };
64 assess = playout->assess(playout, b, &m, u->policy_eqex);
66 if (assess) {
67 node->prior.playouts += abs(assess);
68 /* Good moves for enemy are losses for us.
69 * We will properly maximize this in the UCB1
70 * decision. */
71 assess *= tree_parity(tree, parity);
72 if (assess > 0) node->prior.wins += assess;
73 node->hints |= 2;
77 if (node->prior.playouts) {
78 node->prior.value = (float) node->prior.wins / node->prior.playouts;
79 tree_update_node_value(node, u->amaf_prior);
82 //fprintf(stderr, "%s,%s prior: %d/%d = %f (%f)\n", coord2sstr(node->parent->coord, b), coord2sstr(node->coord, b), node->prior.wins, node->prior.playouts, node->prior.value, assess);