uct/plugin/wolf.c

   1 /* This is a Pachi UCT plugin for Thomas Wolf's move evaluation API. */
   2 /* This file is released under the same licence conditions as
   3  * <uct/plugin.h>. */
   4
   5 /* We will add positive priors (1.0) for moves that play in-between
   6  * of two different groups of the same color; that is, moves that connect
   7  * two groups or the same color or separate two groups of the same color.
   8  * This is not a very good prior actually, since it leads to a lot of
   9  * useless moves. (Maybe doing this in simulations would be more interesting?)
  10  * But it is a simple enough example. :-) */
  11
  12 /* Compile the plugin like this:
  13  * gcc -Wall -O3 -march=native -Ipachi_source_root -shared -fPIC -o wolf.so wolf.c
  14  * Then, load it in Pachi by passing plugin=wolf.so as a parameter.
  15  * You can also pass it parameters: plugin=wolf.so:p1=v1:p2=v2.
  16  * The plugin supports these parameters:
  17  * file         Filename of the real module used
  18  * eqex         Number of prior'd simulations, overrides Pachi default
  19  * threshold    Threshold value when to stop iterating influence/strength values.
  20  * overrelax    Overrelaxation parameter. Should probably not be changed from 1.0
  21  * iterations   Upper bound of the number of iters for each empty point or chain.
  22  */
  23
  24
  25 /* This is the Thomas Wolf's API we implement:
  26
  27 The library currently provides 5 functions:
  28
  29  SETPARAM
  30  EVALFUN1
  31  EVALFUN2
  32  FINDMOVE1
  33  FINDMOVE2
  34
  35 of which SETPARAM, EVALFUN2, FINDMOVE2 are the ones likely to be used
  36 in other programs.
  37
  38 - - - -
  39
  40 SETPARAM
  41
  42 This procedure has to be called before any EVALFUN call but can also be
  43 called again later. It sets parameters for the computation of the
  44 influence function.
  45
  46 The first parameter is a threshold value when to stop iterating
  47 influence/strength values. Characteristics: The smaller the value the
  48 more accurate the influence value but the slower the computation due
  49 to an increase in iterations. But because accuracy is limited by
  50 systematic errors, too small values do not bring improvement,
  51 especially not in unstable situations.
  52
  53 The second is an overrelaxation parameter. Should probably not be
  54 changed from 1.0 .
  55
  56 The third is an upper bound of the number of iterations for each empty
  57 point or chain. Again, the more the better but also the slower. Again,
  58 improvement is overshadowed by systematic errors, especially in
  59 unstable situations.  Typical values have the range 3..65000.
  60
  61 - - - -
  62
  63 EVALFUN1, EVALFUN2
  64
  65 Both are one and the same static evaluation function, only with
  66 different interface.
  67
  68 Input:
  69
  70 For both functions the first parameter is a string
  71 which provides the necessary input by specifying the board position,
  72 who moves next, specifying whether the evaluation function should
  73 be applied ad hoc (task=3) or incrementally (task=4) and which
  74 optionally allows to specify unconditionally alive chains. The
  75 syntax of the input for EVALFUN1 and EVALFUN2 is the following:
  76
  77 - Only for ad hoc mode:
  78   A single digit specifying the size of the board:
  79    1 --> 19x19, 2 --> 17x17, 3 --> 15x15, 4 --> 13x13,
  80    5 --> 11x11, 6 -->  9x9,  7 -->  7x7,  8 -->  5x5
  81 - Only for ad hoc mode:
  82   All stones on the board.
  83   Because there are more than 256 fields and sending non-alphanumeric
  84   characters may give problems we need 2 coordinates for each stone
  85   anyway. In these coordinates (1,1) is in the lower left corner
  86   and the 1st increases to the right and the 2nd increases vertically
  87   to the top.
  88   Black stone coordinates are encoded by chr(64+x), so 'A' would
  89   be 1, 'B' be 2,... and white stone coordinates by chr(96+x), so
  90   'a' is 1, 'b' is 2, ...
  91 - If there is a field forbidden by ko then the first coordinate
  92   is a capital letter and the second a lower case letter.
  93 - Next is a single character '%' indicating the end of the sequence of fields.
  94 - Next is a single character specifying who moves first and form of solution:
  95   'b' Black moves first                                                 <*>
  96   'w' White moves first                                                 <*>
  97 - The task: a single digit that specifies the task.
  98   Available currently:
  99 3 ... evaluation of all chains and empty fields within a region, adhoc version.
 100   If the region is the whole board no further data follow.
 101   If the region is bounded then the following data consist of points (x,y).
 102   Each point is encoded by 2 byte, either as chr(64+x) chr(64+y), i.e. as
 103   upper case letters or as chr(96+x) chr(96+y), i.e. lower case letters.
 104   Whether upper or lower case does not depend on the colour (the colour of the
 105   stones has already been specified above) but is defined as follows:
 106   - The first point (occupied or not) marks the inside of the region to be
 107     evaluated. For that one can use upper or lower case letters.
 108   - Each one of any further points marks a chain that shall be assumed to be
 109     alive, i.e. such a chain will be a boundary to the region to be evaluated.
 110     If a field marking a chain is encoded in upper case letters by
 111     chr(64+x) chr(64+y) then the chain is assumed to be statically alive which
 112     is a property that will be respected in future calls of task 4 below. If
 113     the chain marking field is encoded in lower case letters then it treated
 114     as alive for now in this computation of the evaluation function but not
 115     necessarily in future calls of task 4.
 116
 117 4 ... evaluation of all chains and empty fields within a region, incremental version.
 118  Input is exactly exactly like task 3. The difference to task 3 is that this
 119  position has already been evaluated in the previous call with task 3 or 4.
 120  Therefore, no boardsize, no stones and no ko-field are specified. That means
 121  the complete input is:
 122  - The first character is '%'
 123  - followed by 'b' or 'w' to mark the colour moving next,      <*>.
 124  - a digit for the task, so 4,
 125  - optional, like in task 3: fields marking alive chains,
 126  - a '%' character to mark the end of the optional list of alive chains
 127  - a point (x,y) encoded by 2 byte chr(64+x) chr(64+y) that marks
 128    the coordinates of the next move of the colour given in <*>.
 129
 130 Output:
 131
 132 EVALFUN1 has a simple interface, providing with its 2nd parameter a
 133 string that gives for each board intersection
 134 - the influence value of Black (in the range 0 .. 1.0) if the
 135   intersection is empty
 136 - the strength value of the chain occupying that intersection
 137   (in the range 0.0 = dead .. 1.0 = uncnditionally alive) if the
 138   intersection is not empty.
 139 This interface is simple but slow because this string has to be
 140 generated and to be decoded to use any information.
 141
 142 EVALFUN2 gives direct access to the relevant data.
 143 - For an intersection with coordinates (x,y) (1<=x<=boardsize from
 144   left to right, 1<=y<=boardsize from bottom to top) the 2-dim
 145   array PChainNo(x,y)
 146   - is 0 if the intersection is empty,
 147   - gives the chain number of the chain that occupies (x,y).
 148 - For an empty intersection with coordinates (x,y) (1<=x<=boardsize from
 149   left to right, 1<=y<=boardsize from bottom to top) the record/structure
 150   PPD(i,j) stores relevant data and boc is the influence value of Black.
 151 - For an intersection (x,y) occupied by a chain with number n the
 152   record/structure PCD(n) stores the relevant data of this chain
 153   and the svl component gives a strength value in the interval
 154   0.0 (= unconditionally dead) to 1.0 (= unconditionally alive).
 155
 156 - - - -
 157
 158 FINDMOVE1, FINDMOVE2
 159
 160 Before calling FINDMOVE1/2 the evaluation function EVALFUN1 or
 161 EVALFUN2 for the current board position has to be executed.
 162
 163 Both, FINDMOVE1 and FINDMOVE2, are one and the same function that
 164 provides a ranking of legal moves, only with different
 165 interface. Currently FINDMOVE performs each legal move, updates the
 166 static evaluation and adds up probabilities over the whole board to
 167 arrive at a rough score for each move. Thus, depending on the number
 168 of legal moves this function is up to 360 times slower than EVALFUN in
 169 update mode.
 170
 171 - The first parameter is specifying which sides moves next: White=-1, Black=1.
 172 - The 2nd parameter of FINDMOVE1 is an output string giving for each legal
 173   intersection a measure of value of doing the next move there.
 174   The first move is the supposedly best move. All other moves are
 175   listed line by line of the board.
 176 - For FINDMOVE2 the 2nd and 3rd parameter are the x- and y-coordinate
 177   of the best move and the 4th parameter is a 'value' of that move.
 178   The 5th parameter is a 2-dim array which for each intersection
 179   gives a 'value' of moving there. To identify whether the intersection is
 180   empty the last parameter is a 2-dim array giving a value 0 if it is
 181   empty (otherwise the number of the chain that occupies the intersection).
 182
 183 - - - -
 184
 185  */
 186
 187 #include <dlfcn.h>
 188 #include <stdio.h>
 189 #include <stdlib.h>
 190
 191 /* The basic plugin interface. */
 192 #include "uct/plugin.h"
 193
 194 /* The API types: */
 195 #define MAXBOARDSIZE 19
 196 typedef char byte_board[MAXBOARDSIZE+2][MAXBOARDSIZE+2]; // The array indices are 1-based!
 197 typedef floating_t influ_board[MAXBOARDSIZE][MAXBOARDSIZE];
 198
 199 /* Our context structure. */
 200 struct context {
 201         int eqex;
 202
 203         void *dlh;
 204         void (*SETPARAM)(double sv, double omega, uint16_t mi);
 205         void (*EVALFUN1)(char *javp, char *InfluField);
 206         void (*FINDMOVE2)(int fa, char *mi, char *mj, floating_t *mxscore, influ_board *SB, byte_board **PChainNo);
 207 };
 208
 209
 210 char
 211 bsize2digit(int size)
 212 {
 213         switch (size) {
 214                 case 19: return '1';
 215                 case 17: return '2';
 216                 case 15: return '3';
 217                 case 13: return '4';
 218                 case 11: return '5';
 219                 case 9: return '6';
 220                 case 7: return '7';
 221                 case 5: return '8';
 222                 default:
 223                         fprintf(stderr, "wolf plugin: Unsupported board size: %d\n", size);
 224                         exit(1);
 225         }
 226 }
 227
 228 char
 229 coord2digit(enum stone color, int coord)
 230 {
 231         assert(color == S_BLACK || color == S_WHITE);
 232         return (color == S_BLACK ? 64 + coord : 96 + coord);
 233 }
 234
 235 void
 236 pachi_plugin_prior(void *data, struct tree_node *node, struct prior_map *map, int eqex)
 237 {
 238         struct context *ctx = data;
 239         struct board *b = map->b;
 240         if (ctx->eqex >= 0)
 241                 eqex = ctx->eqex; // override Pachi default
 242
 243         /* First, create a string representation of current board. */
 244 #define BIGSTR 10000
 245         char bin[BIGSTR];
 246         char bout[BIGSTR];
 247         char *bip = bin;
 248         *bip++ = bsize2digit(board_size(b) - 2);
 249         foreach_point(b) {
 250                 enum stone s = board_at(b, c);
 251                 if (s == S_NONE || s == S_OFFBOARD) continue;
 252                 *bip++ = coord2digit(s, coord_x(c, b));
 253                 *bip++ = coord2digit(s, coord_y(c, b));
 254         } foreach_point_end;
 255         if (!is_pass(b->ko.coord)) {
 256                 *bip++ = coord2digit(S_BLACK, coord_x(b->ko.coord, b));
 257                 *bip++ = coord2digit(S_WHITE, coord_y(b->ko.coord, b));
 258         }
 259         *bip++ = '%';
 260         *bip++ = map->to_play == S_BLACK ? 'b' : 'w';
 261         *bip++ = '3';
 262         *bip = 0;
 263
 264         /* Seed the evaluation of the situation. */
 265         // fprintf(stderr, "board desc: %s\n", bin);
 266         ctx->EVALFUN1(bin, bout);
 267         /* We do not care about bout. */
 268
 269         /* Retrieve values of moves. */
 270         char bestx, besty;
 271         floating_t bestval;
 272         influ_board values;
 273         byte_board *chaininfo;
 274         ctx->FINDMOVE2(map->to_play == S_BLACK ? 1 : -1, &bestx, &besty, &bestval, &values, &chaininfo);
 275         // fprintf(stderr, "best is (%d,%d)%s %f\n", bestx, besty, coord2sstr(coord_xy(b, bestx, besty), b), bestval);
 276
 277         /* In the first pass, determine best and worst value. (Best value
 278          * reported by FINDMOVE2 is wrong.) In the second pass, we set the
 279          * priors by normalization based on the determined values. */
 280         floating_t best = -1000, worst = 1000;
 281         foreach_free_point(map->b) {
 282                 if (!map->consider[c])
 283                         continue;
 284                 floating_t value = values[coord_x(c, b) - 1][coord_y(c, b) - 1];
 285                 if (map->to_play == S_WHITE) value = -value;
 286                 if (value > best) best = value;
 287                 else if (value < worst) worst = value;
 288         } foreach_point_end;
 289
 290         foreach_free_point(map->b) {
 291                 if (!map->consider[c])
 292                         continue;
 293
 294                 /* Take the value and normalize it somehow. */
 295                 /* Right now, we just do this by linear rescaling from
 296                  * [worst, best] to [0,1]. */
 297                 floating_t value = values[coord_x(c, b) - 1][coord_y(c, b) - 1];
 298                 if (map->to_play == S_WHITE) value = -value;
 299                 value = (value - worst) / (best - worst);
 300                 // fprintf(stderr, "\t[%s %s] %f/%f\n", stone2str(map->to_play), coord2sstr(c, b), value, best);
 301
 302                 add_prior_value(map, c, (value - worst) / best, eqex);
 303         } foreach_free_point_end;
 304 }
 305
 306
 307 void *
 308 pachi_plugin_init(char *arg, struct board *b, int seed)
 309 {
 310         struct context *ctx = calloc(1, sizeof(*ctx));
 311
 312         /* Initialize ctx defaults here. */
 313         char *file = NULL;
 314         floating_t overrelax = 1.0, threshold = 0.001;
 315         int iterations = 13;
 316         ctx->eqex = -1;
 317
 318         /* This is the canonical Pachi arguments parser. You do not strictly
 319          * need to decypher it, you can just use it as a boilerplate. */
 320         if (arg) {
 321                 char *optspec, *next = arg;
 322                 while (*next) {
 323                         optspec = next;
 324                         next += strcspn(next, ":");
 325                         if (*next) { *next++ = 0; } else { *next = 0; }
 326
 327                         char *optname = optspec;
 328                         char *optval = strchr(optspec, '=');
 329                         if (optval) *optval++ = 0;
 330
 331                         if (!strcasecmp(optname, "eqex") && optval) {
 332                                 /* eqex takes a required integer argument */
 333                                 ctx->eqex = atoi(optval);
 334
 335                         } else if (!strcasecmp(optname, "file") && optval) {
 336                                 file = strdup(optval);
 337
 338                         } else if (!strcasecmp(optname, "threshold") && optval) {
 339                                 threshold = atof(optval);
 340
 341                         } else if (!strcasecmp(optname, "overrelax") && optval) {
 342                                 overrelax = atof(optval);
 343
 344                         } else if (!strcasecmp(optname, "iterations") && optval) {
 345                                 iterations = atoi(optval);
 346
 347                         } else {
 348                                 fprintf(stderr, "wolf plugin: Invalid argument %s or missing value\n", optname);
 349                                 exit(1);
 350                         }
 351                 }
 352         }
 353
 354         /* Initialize the rest of ctx (depending on arguments) here. */
 355         if (!file) {
 356                 fprintf(stderr, "wolf plugin: file argument not specified\n");
 357                 exit(1);
 358         }
 359         ctx->dlh = dlopen(file, RTLD_NOW);
 360         if (!ctx->dlh) {
 361                 fprintf(stderr, "Cannot load file %s: %s\n", file, dlerror());
 362                 exit(EXIT_FAILURE);
 363         }
 364 #define loadsym(s_) do {\
 365         ctx->s_ = dlsym(ctx->dlh, #s_); \
 366         if (!ctx->s_) { \
 367                 fprintf(stderr, "Cannot find %s in module: %s\n", #s_, dlerror()); \
 368                 exit(EXIT_FAILURE); \
 369         } \
 370 } while (0)
 371         loadsym(SETPARAM);
 372         loadsym(EVALFUN1);
 373         loadsym(FINDMOVE2);
 374
 375         ctx->SETPARAM(threshold, overrelax, iterations);
 376
 377         return ctx;
 378 }
 379
 380 void
 381 pachi_plugin_done(void *data)
 382 {
 383         struct context *ctx = data;
 384         dlclose(ctx->dlh);
 385         free(ctx);
 386 }