UCT: Progressive Unpruning support (for all policies, tunable)
[pachi.git] / uct / policy / generic.h
blob61b0f8496948165fc1044f0f9360b0d81b3c6e68
1 #ifndef ZZGO_UCT_POLICY_GENERIC_H
2 #define ZZGO_UCT_POLICY_GENERIC_H
4 /* Some default policy routines and templates. */
6 #include "board.h"
7 #include "stone.h"
8 #include "uct/internal.h"
10 struct board;
11 struct tree_node;
13 struct tree_node *uctp_generic_choose(struct uct_policy *p, struct tree_node *node, struct board *b, enum stone color, coord_t exclude);
14 void uctp_generic_winner(struct uct_policy *p, struct tree *tree, struct uct_descent *descent);
15 static int uct_get_prune_level(struct uct *u, struct tree_node *node);
18 /* Some generic stitching for tree descent. */
20 #define uctd_try_node_children(policy, tree, descent, allow_pass, parity, tenuki_d, di, urgency) \
21 /* Information abound best children. */ \
22 /* XXX: We assume board <=25x25. */ \
23 struct uct_descent dbest[BOARD_MAX_MOVES + 1] = { { .node = descent->node->children, .lnode = NULL } }; int dbests = 1; \
24 floating_t best_urgency = -9999; \
25 int prune_level = uct_get_prune_level(policy->uct, descent->node); \
26 /* Descent children iterator. */ \
27 struct uct_descent dci = { .node = descent->node->children, .lnode = descent->lnode ? descent->lnode->children : NULL }; \
29 for (; dci.node; dci.node = dci.node->sibling) { \
30 floating_t urgency; \
31 /* Do not consider passing early. */ \
32 if (unlikely((!allow_pass && is_pass(dci.node->coord)) || (dci.node->hints & TREE_HINT_INVALID))) \
33 continue; \
34 /* Skip temporarily pruned moves. */ \
35 if (dci.node->prune_rank >= prune_level) \
36 continue; \
37 /* Position dci.lnode to point at or right after the local
38 * node corresponding to dci.node. */ \
39 while (dci.lnode && dci.lnode->coord < dci.node->coord) \
40 dci.lnode = dci.lnode->sibling; \
41 /* Set up descent-further iterator. This is the public-accessible
42 * one, and usually is similar to dci. However, in case of local
43 * trees, we may keep next-candidate pointer in dci while storing
44 * actual-specimen in di. */ \
45 struct uct_descent di = dci; \
46 if (dci.lnode) { \
47 /* Set lnode to local tree node corresponding
48 * to node (dci.lnode, pass-lnode or NULL). */ \
49 di.lnode = tree_lnode_for_node(tree, dci.node, dci.lnode, tenuki_d); \
52 /* ...your urgency computation code goes here... */
54 #define uctd_set_best_child(di, urgency) \
55 if (urgency - best_urgency > __FLT_EPSILON__) { /* urgency > best_urgency */ \
56 best_urgency = urgency; dbests = 0; \
57 } \
58 if (urgency - best_urgency > -__FLT_EPSILON__) { /* urgency >= best_urgency */ \
59 /* We want to always choose something else than a pass \
60 * in case of a tie. pass causes degenerative behaviour. */ \
61 if (dbests == 1 && is_pass(dbest[0].node->coord)) { \
62 dbests--; \
63 } \
64 struct uct_descent db = di; \
65 /* Make sure lnode information is meaningful. */ \
66 if (db.lnode && is_pass(db.lnode->coord)) \
67 db.lnode = NULL; \
68 dbest[dbests++] = db; \
69 } \
72 #define uctd_get_best_child(descent) *(descent) = dbest[fast_random(dbests)];
75 static inline int
76 uct_get_prune_level(struct uct *u, struct tree_node *node)
78 int level = u->prune_initial;
79 if (node->u.playouts < u->prune_playouts)
80 return level;
81 /* Chaslot et al: PROGRESSIVE STRATEGIES FOR MONTE-CARLO TREE SEARCH
82 * k nodes are unpruned when the number of simulations in the parent
83 * surpasses A * B^(k-kinit) simulated games. */
84 level += (floating_t) log(node->u.playouts / u->prune_playouts) / u->prune_speedup;
85 return level;
88 #endif