uct/policy/ucb1amaf.c

   1 #include <assert.h>
   2 #include <math.h>
   3 #include <stdio.h>
   4 #include <stdlib.h>
   5 #include <string.h>
   6
   7 #include "board.h"
   8 #include "debug.h"
   9 #include "move.h"
  10 #include "uct/internal.h"
  11 #include "uct/tree.h"
  12
  13 /* This implements the UCB1 policy with an extra AMAF heuristics. */
  14
  15 struct ucb1_policy {
  16         /* This is what the Modification of UCT with Patterns in Monte Carlo Go
  17          * paper calls 'p'. Original UCB has this on 2, but this seems to
  18          * produce way too wide searches; reduce this to get deeper and
  19          * narrower readouts - try 0.2. */
  20         float explore_p;
  21         /* First Play Urgency - if set to less than infinity (the MoGo paper
  22          * above reports 1.0 as the best), new branches are explored only
  23          * if none of the existing ones has higher urgency than fpu. */
  24         float fpu;
  25         /* Equivalent experience for prior knowledge. MoGo paper recommends
  26          * 50 playouts per source. */
  27         int eqex;
  28 };
  29
  30
  31 struct tree_node *ucb1_choose(struct uct_policy *p, struct tree_node *node, struct board *b, enum stone color);
  32
  33 struct tree_node *ucb1_descend(struct uct_policy *p, struct tree *tree, struct tree_node *node, int parity, bool allow_pass);
  34
  35 void ucb1_prior(struct uct_policy *p, struct tree *tree, struct tree_node *node, struct board *b, enum stone color, int parity);
  36
  37 static void
  38 update_node(struct uct_policy *p, struct tree_node *node, int result)
  39 {
  40         node->u.playouts++;
  41         node->u.wins += result;
  42         tree_update_node_value(node);
  43 }
  44 static void
  45 update_node_amaf(struct uct_policy *p, struct tree_node *node, int result)
  46 {
  47         node->amaf.playouts++;
  48         node->amaf.wins += result;
  49         tree_update_node_value(node);
  50 }
  51
  52 void
  53 ucb1amaf_update(struct uct_policy *p, struct tree_node *node, enum stone color, struct playout_amafmap *map, int result)
  54 {
  55         for (; node; node = node->parent, color = stone_other(color)) {
  56                 /* Account for root node. */
  57                 /* But we do the update everytime, since it simply seems
  58                  * to make more sense to give the main branch more weight
  59                  * than other orders of play. */
  60                 update_node(p, node, result);
  61                 for (struct tree_node *ni = node->children; ni; ni = ni->sibling) {
  62                         if (is_pass(ni->coord) || map->map[ni->coord] != color)
  63                                 continue;
  64                         update_node_amaf(p, node, result);
  65                 }
  66         }
  67 }
  68
  69
  70 struct uct_policy *
  71 policy_ucb1amaf_init(struct uct *u, char *arg)
  72 {
  73         struct uct_policy *p = calloc(1, sizeof(*p));
  74         struct ucb1_policy *b = calloc(1, sizeof(*b));
  75         p->uct = u;
  76         p->data = b;
  77         p->descend = ucb1_descend;
  78         p->choose = ucb1_choose;
  79         p->update = ucb1amaf_update;
  80         p->wants_amaf = true;
  81
  82         b->explore_p = 0.2;
  83         b->fpu = INFINITY;
  84
  85         if (arg) {
  86                 char *optspec, *next = arg;
  87                 while (*next) {
  88                         optspec = next;
  89                         next += strcspn(next, ":");
  90                         if (*next) { *next++ = 0; } else { *next = 0; }
  91
  92                         char *optname = optspec;
  93                         char *optval = strchr(optspec, '=');
  94                         if (optval) *optval++ = 0;
  95
  96                         if (!strcasecmp(optname, "explore_p")) {
  97                                 b->explore_p = atof(optval);
  98                         } else if (!strcasecmp(optname, "prior")) {
  99                                 b->eqex = optval ? atoi(optval) : 50;
 100                                 if (b->eqex)
 101                                         p->prior = ucb1_prior;
 102                         } else if (!strcasecmp(optname, "fpu") && optval) {
 103                                 b->fpu = atof(optval);
 104                         } else {
 105                                 fprintf(stderr, "ucb1: Invalid policy argument %s or missing value\n", optname);
 106                         }
 107                 }
 108         }
 109
 110         return p;
 111 }