uct/policy/generic.h

   1 #ifndef ZZGO_UCT_POLICY_GENERIC_H
   2 #define ZZGO_UCT_POLICY_GENERIC_H
   3
   4 /* Some default policy routines and templates. */
   5
   6 #include "board.h"
   7 #include "stone.h"
   8 #include "uct/internal.h"
   9
  10 struct board;
  11 struct tree_node;
  12
  13 struct tree_node *uctp_generic_choose(struct uct_policy *p, struct tree_node *node, struct board *b, enum stone color, coord_t exclude);
  14 void uctp_generic_winner(struct uct_policy *p, struct tree *tree, struct uct_descent *descent);
  15 static int uct_get_prune_level(struct uct *u, struct tree_node *node);
  16
  17
  18 /* Some generic stitching for tree descent. */
  19
  20 #define uctd_try_node_children(policy, tree, descent, allow_pass, parity, tenuki_d, di, urgency) \
  21         /* Information abound best children. */ \
  22         /* XXX: We assume board <=25x25. */ \
  23         struct uct_descent dbest[BOARD_MAX_MOVES + 1] = { { .node = descent->node->children, .lnode = NULL } }; int dbests = 1; \
  24         floating_t best_urgency = -9999; \
  25         int prune_level = uct_get_prune_level(policy->uct, descent->node); \
  26         /* Descent children iterator. */ \
  27         struct uct_descent dci = { .node = descent->node->children, .lnode = descent->lnode ? descent->lnode->children : NULL }; \
  28         \
  29         for (; dci.node; dci.node = dci.node->sibling) { \
  30                 floating_t urgency; \
  31                 /* Do not consider passing early. */ \
  32                 if (unlikely((!allow_pass && is_pass(dci.node->coord)) || (dci.node->hints & TREE_HINT_INVALID))) \
  33                         continue; \
  34                 /* Skip temporarily pruned moves. */ \
  35                 if (dci.node->prune_rank >= prune_level) \
  36                         continue; \
  37                 /* Position dci.lnode to point at or right after the local
  38                  * node corresponding to dci.node. */ \
  39                 while (dci.lnode && dci.lnode->coord < dci.node->coord) \
  40                         dci.lnode = dci.lnode->sibling; \
  41                 /* Set up descent-further iterator. This is the public-accessible
  42                  * one, and usually is similar to dci. However, in case of local
  43                  * trees, we may keep next-candidate pointer in dci while storing
  44                  * actual-specimen in di. */ \
  45                 struct uct_descent di = dci; \
  46                 if (dci.lnode) { \
  47                         /* Set lnode to local tree node corresponding
  48                          * to node (dci.lnode, pass-lnode or NULL). */ \
  49                         di.lnode = tree_lnode_for_node(tree, dci.node, dci.lnode, tenuki_d); \
  50                 }
  51
  52                 /* ...your urgency computation code goes here... */
  53
  54 #define uctd_set_best_child(di, urgency) \
  55                 if (urgency - best_urgency > __FLT_EPSILON__) { /* urgency > best_urgency */ \
  56                         best_urgency = urgency; dbests = 0; \
  57                 } \
  58                 if (urgency - best_urgency > -__FLT_EPSILON__) { /* urgency >= best_urgency */ \
  59                         /* We want to always choose something else than a pass \
  60                          * in case of a tie. pass causes degenerative behaviour. */ \
  61                         if (dbests == 1 && is_pass(dbest[0].node->coord)) { \
  62                                 dbests--; \
  63                         } \
  64                         struct uct_descent db = di; \
  65                         /* Make sure lnode information is meaningful. */ \
  66                         if (db.lnode && is_pass(db.lnode->coord)) \
  67                                 db.lnode = NULL; \
  68                         dbest[dbests++] = db; \
  69                 } \
  70         }
  71
  72 #define uctd_get_best_child(descent) *(descent) = dbest[fast_random(dbests)];
  73
  74
  75 static inline int
  76 uct_get_prune_level(struct uct *u, struct tree_node *node)
  77 {
  78         int level = u->prune_initial;
  79         if (node->u.playouts < u->prune_playouts)
  80                 return level;
  81         /* Chaslot et al: PROGRESSIVE STRATEGIES FOR MONTE-CARLO TREE SEARCH
  82          * k nodes are unpruned when the number of simulations in the parent
  83          * surpasses A * B^(k-kinit) simulated games. */
  84         level += (floating_t) log(node->u.playouts / u->prune_playouts) / u->prune_speedup;
  85         return level;
  86 }
  87
  88 #endif