uct/policy/ucb1amaf.c

   1 #include <assert.h>
   2 #include <math.h>
   3 #include <stdio.h>
   4 #include <stdlib.h>
   5 #include <string.h>
   6
   7 #include "board.h"
   8 #include "debug.h"
   9 #include "move.h"
  10 #include "uct/internal.h"
  11 #include "uct/tree.h"
  12
  13 /* This implements the UCB1 policy with an extra AMAF heuristics. */
  14
  15 struct ucb1_policy {
  16         /* This is what the Modification of UCT with Patterns in Monte Carlo Go
  17          * paper calls 'p'. Original UCB has this on 2, but this seems to
  18          * produce way too wide searches; reduce this to get deeper and
  19          * narrower readouts - try 0.2. */
  20         float explore_p;
  21         /* First Play Urgency - if set to less than infinity (the MoGo paper
  22          * above reports 1.0 as the best), new branches are explored only
  23          * if none of the existing ones has higher urgency than fpu. */
  24         float fpu;
  25         /* Equivalent experience for prior knowledge. MoGo paper recommends
  26          * 50 playouts per source. */
  27         int eqex;
  28 };
  29
  30
  31 struct tree_node *ucb1_choose(struct uct_policy *p, struct tree_node *node, struct board *b, enum stone color);
  32
  33 struct tree_node *ucb1_descend(struct uct_policy *p, struct tree *tree, struct tree_node *node, int parity, bool allow_pass);
  34
  35 void ucb1_prior(struct uct_policy *p, struct tree *tree, struct tree_node *node, struct board *b, enum stone color, int parity);
  36
  37 static void
  38 update_node(struct uct_policy *p, struct tree_node *node, int result)
  39 {
  40         node->u.playouts++;
  41         node->u.wins += result;
  42         node->u.value = ((float)node->u.wins + (float)node->prior.wins) / (node->u.playouts + node->prior.playouts);
  43 }
  44
  45 void
  46 ucb1amaf_update(struct uct_policy *p, struct tree_node *node, enum stone color, struct playout_amafmap *map, int result)
  47 {
  48         for (; node; node = node->parent, color = stone_other(color)) {
  49                 /* Account for root node. */
  50                 /* But we do the update everytime, since it simply seems
  51                  * to make more sense to give the main branch more weight
  52                  * than other orders of play. */
  53                 update_node(p, node, result);
  54                 for (struct tree_node *ni = node->children; ni; ni = ni->sibling) {
  55                         if (is_pass(ni->coord) || map->map[ni->coord] != color)
  56                                 continue;
  57                         update_node(p, node, result);
  58                 }
  59         }
  60 }
  61
  62
  63 struct uct_policy *
  64 policy_ucb1amaf_init(struct uct *u, char *arg)
  65 {
  66         struct uct_policy *p = calloc(1, sizeof(*p));
  67         struct ucb1_policy *b = calloc(1, sizeof(*b));
  68         p->uct = u;
  69         p->data = b;
  70         p->descend = ucb1_descend;
  71         p->choose = ucb1_choose;
  72         p->update = ucb1amaf_update;
  73         p->wants_amaf = true;
  74
  75         b->explore_p = 0.2;
  76         b->fpu = INFINITY;
  77
  78         if (arg) {
  79                 char *optspec, *next = arg;
  80                 while (*next) {
  81                         optspec = next;
  82                         next += strcspn(next, ":");
  83                         if (*next) { *next++ = 0; } else { *next = 0; }
  84
  85                         char *optname = optspec;
  86                         char *optval = strchr(optspec, '=');
  87                         if (optval) *optval++ = 0;
  88
  89                         if (!strcasecmp(optname, "explore_p")) {
  90                                 b->explore_p = atof(optval);
  91                         } else if (!strcasecmp(optname, "prior")) {
  92                                 b->eqex = optval ? atoi(optval) : 50;
  93                                 if (b->eqex)
  94                                         p->prior = ucb1_prior;
  95                         } else if (!strcasecmp(optname, "fpu") && optval) {
  96                                 b->fpu = atof(optval);
  97                         } else {
  98                                 fprintf(stderr, "ucb1: Invalid policy argument %s or missing value\n", optname);
  99                         }
 100                 }
 101         }
 102
 103         return p;
 104 }