uct/plugin/wolf.c

   1 /* This is a Pachi UCT plugin for Thomas Wolf's move evaluation API. */
   2 /* This file is released under the same licence conditions as
   3  * <uct/plugin.h>. */
   4
   5 /* We will add positive priors (1.0) for moves that play in-between
   6  * of two different groups of the same color; that is, moves that connect
   7  * two groups or the same color or separate two groups of the same color.
   8  * This is not a very good prior actually, since it leads to a lot of
   9  * useless moves. (Maybe doing this in simulations would be more interesting?)
  10  * But it is a simple enough example. :-) */
  11
  12 /* Compile the plugin like this:
  13  * gcc -Wall -O3 -march=native -Ipachi_source_root -shared -fPIC -o wolf.so wolf.c
  14  * Then, load it in Pachi by passing plugin=wolf.so as a parameter.
  15  * You can also pass it parameters: plugin=wolf.so:p1=v1:p2=v2.
  16  * The plugin supports these parameters:
  17  * file         Filename of the real module used
  18  * eqex         Number of prior'd simulations, overrides Pachi default
  19  * threshold    Threshold value when to stop iterating influence/strength values.
  20  * overrelax    Overrelaxation parameter. Should probably not be changed from 1.0
  21  * iterations   Upper bound of the number of iters for each empty point or chain.
  22  */
  23
  24
  25 /* This is the Thomas Wolf's API we implement:
  26
  27 The library currently provides 5 functions:
  28
  29  SETPARAM
  30  EVALFUN1
  31  EVALFUN2
  32  FINDMOVE1
  33  FINDMOVE2
  34
  35 of which SETPARAM, EVALFUN2, FINDMOVE2 are the ones likely to be used
  36 in other programs.
  37
  38 - - - -
  39
  40 SETPARAM
  41
  42 This procedure has to be called before any EVALFUN call but can also be
  43 called again later. It sets parameters for the computation of the
  44 influence function.
  45
  46 The first parameter is a threshold value when to stop iterating
  47 influence/strength values. Characteristics: The smaller the value the
  48 more accurate the influence value but the slower the computation due
  49 to an increase in iterations. But because accuracy is limited by
  50 systematic errors, too small values do not bring improvement,
  51 especially not in unstable situations.
  52
  53 The second is an overrelaxation parameter. Should probably not be
  54 changed from 1.0 .
  55
  56 The third is an upper bound of the number of iterations for each empty
  57 point or chain. Again, the more the better but also the slower. Again,
  58 improvement is overshadowed by systematic errors, especially in
  59 unstable situations.  Typical values have the range 3..65000.
  60
  61 - - - -
  62
  63 EVALFUN1, EVALFUN2
  64
  65 Both are one and the same static evaluation function, only with
  66 different interface.
  67
  68 Input:
  69
  70 For both functions the first parameter is a string
  71 which provides the necessary input by specifying the board position,
  72 who moves next, specifying whether the evaluation function should
  73 be applied ad hoc (task=3) or incrementally (task=4) and which
  74 optionally allows to specify unconditionally alive chains. The
  75 syntax of the input for EVALFUN1 and EVALFUN2 is the following:
  76
  77 - Only for ad hoc mode:
  78   A single digit specifying the size of the board:
  79    1 --> 19x19, 2 --> 17x17, 3 --> 15x15, 4 --> 13x13,
  80    5 --> 11x11, 6 -->  9x9,  7 -->  7x7,  8 -->  5x5
  81 - Only for ad hoc mode:
  82   All stones on the board.
  83   Because there are more than 256 fields and sending non-alphanumeric
  84   characters may give problems we need 2 coordinates for each stone
  85   anyway. In these coordinates (1,1) is in the lower left corner
  86   and the 1st increases to the right and the 2nd increases vertically
  87   to the top.
  88   Black stone coordinates are encoded by chr(64+x), so 'A' would
  89   be 1, 'B' be 2,... and white stone coordinates by chr(96+x), so
  90   'a' is 1, 'b' is 2, ...
  91 - If there is a field forbidden by ko then the first coordinate
  92   is a capital letter and the second a lower case letter.
  93 - Next is a single character '%' indicating the end of the sequence of fields.
  94 - Next is a single character specifying who moves first and form of solution:
  95   'b' Black moves first                                                 <*>
  96   'w' White moves first                                                 <*>
  97 - The task: a single digit that specifies the task.
  98   Available currently:
  99 3 ... evaluation of all chains and empty fields within a region, adhoc version.
 100   If the region is the whole board no further data follow.
 101   If the region is bounded then the following data consist of points (x,y).
 102   Each point is encoded by 2 byte, either as chr(64+x) chr(64+y), i.e. as
 103   upper case letters or as chr(96+x) chr(96+y), i.e. lower case letters.
 104   Whether upper or lower case does not depend on the colour (the colour of the
 105   stones has already been specified above) but is defined as follows:
 106   - The first point (occupied or not) marks the inside of the region to be
 107     evaluated. For that one can use upper or lower case letters.
 108   - Each one of any further points marks a chain that shall be assumed to be
 109     alive, i.e. such a chain will be a boundary to the region to be evaluated.
 110     If a field marking a chain is encoded in upper case letters by
 111     chr(64+x) chr(64+y) then the chain is assumed to be statically alive which
 112     is a property that will be respected in future calls of task 4 below. If
 113     the chain marking field is encoded in lower case letters then it treated
 114     as alive for now in this computation of the evaluation function but not
 115     necessarily in future calls of task 4.
 116
 117 4 ... evaluation of all chains and empty fields within a region, incremental version.
 118  Input is exactly exactly like task 3. The difference to task 3 is that this
 119  position has already been evaluated in the previous call with task 3 or 4.
 120  Therefore, no boardsize, no stones and no ko-field are specified. That means
 121  the complete input is:
 122  - The first character is '%'
 123  - followed by 'b' or 'w' to mark the colour moving next,      <*>.
 124  - a digit for the task, so 4,
 125  - optional, like in task 3: fields marking alive chains,
 126  - a '%' character to mark the end of the optional list of alive chains
 127  - a point (x,y) encoded by 2 byte chr(64+x) chr(64+y) that marks
 128    the coordinates of the next move of the colour given in <*>.
 129
 130 Output:
 131
 132 EVALFUN1 has a simple interface, providing with its 2nd parameter a
 133 string that gives for each board intersection
 134 - the influence value of Black (in the range 0 .. 1.0) if the
 135   intersection is empty
 136 - the strength value of the chain occupying that intersection
 137   (in the range 0.0 = dead .. 1.0 = uncnditionally alive) if the
 138   intersection is not empty.
 139 This interface is simple but slow because this string has to be
 140 generated and to be decoded to use any information.
 141
 142 EVALFUN2 gives direct access to the relevant data.
 143 - For an intersection with coordinates (x,y) (1<=x<=boardsize from
 144   left to right, 1<=y<=boardsize from bottom to top) the 2-dim
 145   array PChainNo(x,y)
 146   - is 0 if the intersection is empty,
 147   - gives the chain number of the chain that occupies (x,y).
 148 - For an empty intersection with coordinates (x,y) (1<=x<=boardsize from
 149   left to right, 1<=y<=boardsize from bottom to top) the record/structure
 150   PPD(i,j) stores relevant data and boc is the influence value of Black.
 151 - For an intersection (x,y) occupied by a chain with number n the
 152   record/structure PCD(n) stores the relevant data of this chain
 153   and the svl component gives a strength value in the interval
 154   0.0 (= unconditionally dead) to 1.0 (= unconditionally alive).
 155
 156 - - - -
 157
 158 FINDMOVE1, FINDMOVE2
 159
 160 Before calling FINDMOVE1/2 the evaluation function EVALFUN1 or
 161 EVALFUN2 for the current board position has to be executed.
 162
 163 Both, FINDMOVE1 and FINDMOVE2, are one and the same function that
 164 provides a ranking of legal moves, only with different
 165 interface. Currently FINDMOVE performs each legal move, updates the
 166 static evaluation and adds up probabilities over the whole board to
 167 arrive at a rough score for each move. Thus, depending on the number
 168 of legal moves this function is up to 360 times slower than EVALFUN in
 169 update mode.
 170
 171 - The first parameter is specifying which sides moves next: White=-1, Black=1.
 172 - The 2nd parameter of FINDMOVE1 is an output string giving for each legal
 173   intersection a measure of value of doing the next move there.
 174   The first move is the supposedly best move. All other moves are
 175   listed line by line of the board.
 176 - For FINDMOVE2 the 2nd and 3rd parameter are the x- and y-coordinate
 177   of the best move and the 4th parameter is a 'value' of that move.
 178   The 5th parameter is a 2-dim array which for each intersection
 179   gives a 'value' of moving there. To identify whether the intersection is
 180   empty the last parameter is a 2-dim array giving a value 0 if it is
 181   empty (otherwise the number of the chain that occupies the intersection).
 182
 183 - - - -
 184
 185  */
 186
 187 #include <dlfcn.h>
 188 #include <stdio.h>
 189 #include <stdlib.h>
 190
 191 /* The basic plugin interface. */
 192 #include "uct/plugin.h"
 193
 194 /* The API types: */
 195 #define MAXBOARDSIZE 19
 196 typedef char byte_board[MAXBOARDSIZE+2][MAXBOARDSIZE+2]; // The array indices are 1-based!
 197 typedef float influ_board[MAXBOARDSIZE][MAXBOARDSIZE];
 198
 199 /* Our context structure. */
 200 struct context {
 201         int eqex;
 202
 203         void *dlh;
 204         void (*SETPARAM)(double sv, double omega, uint16_t mi);
 205         void (*EVALFUN1)(char *javp, char *InfluField);
 206         void (*FINDMOVE2)(int fa, char *mi, char *mj, float *mxscore, influ_board *SB, byte_board **PChainNo);
 207 };
 208
 209
 210 char
 211 bsize2digit(int size)
 212 {
 213         switch (size) {
 214                 case 19: return '1';
 215                 case 17: return '2';
 216                 case 15: return '3';
 217                 case 13: return '4';
 218                 case 11: return '5';
 219                 case 9: return '6';
 220                 case 7: return '7';
 221                 case 5: return '8';
 222                 default:
 223                         fprintf(stderr, "wolf plugin: Unsupported board size: %d\n", size);
 224                         exit(1);
 225         }
 226 }
 227
 228 char
 229 coord2digit(enum stone color, int coord)
 230 {
 231         assert(color == S_BLACK || color == S_WHITE);
 232         return (color == S_BLACK ? 64 + coord : 96 + coord);
 233 }
 234
 235 void
 236 pachi_plugin_prior(void *data, struct tree_node *node, struct prior_map *map, int eqex)
 237 {
 238         struct context *ctx = data;
 239         struct board *b = map->b;
 240         if (ctx->eqex >= 0)
 241                 eqex = ctx->eqex; // override Pachi default
 242
 243         /* First, create a string representation of current board. */
 244 #define BIGSTR 10000
 245         char bin[BIGSTR];
 246         char bout[BIGSTR];
 247         char *bip = bin;
 248         *bip++ = bsize2digit(board_size(b) - 2);
 249         foreach_point(b) {
 250                 enum stone s = board_at(b, c);
 251                 if (s == S_NONE || s == S_OFFBOARD) continue;
 252                 *bip++ = coord2digit(s, coord_x(c, b));
 253                 *bip++ = coord2digit(s, coord_y(c, b));
 254         } foreach_point_end;
 255         if (!is_pass(b->ko.coord)) {
 256                 *bip++ = coord2digit(S_BLACK, coord_x(b->ko.coord, b));
 257                 *bip++ = coord2digit(S_WHITE, coord_y(b->ko.coord, b));
 258         }
 259         *bip++ = '%';
 260         *bip++ = map->to_play == S_BLACK ? 'b' : 'w';
 261         *bip++ = '3';
 262         *bip = 0;
 263
 264         /* Seed the evaluation of the situation. */
 265         // fprintf(stderr, "board desc: %s\n", bin);
 266         ctx->EVALFUN1(bin, bout);
 267         /* We do not care about bout. */
 268
 269         /* Retrieve values of moves. */
 270         char bestx, besty;
 271         float bestval;
 272         influ_board values;
 273         byte_board *chaininfo;
 274         ctx->FINDMOVE2(map->to_play == S_BLACK ? 1 : -1, &bestx, &besty, &bestval, &values, &chaininfo);
 275         // fprintf(stderr, "best is (%d,%d)%s %f\n", bestx, besty, coord2sstr(coord_xy(b, bestx, besty), b), bestval);
 276
 277         foreach_free_point(map->b) {
 278                 if (!map->consider[c])
 279                         continue;
 280
 281                 /* Take the value and normalize it somehow. */
 282                 /* Right now, we just do this by rescaling to [0,1] most
 283                  * naively. */
 284                 float value = values[coord_x(c, b) - 1][coord_y(c, b) - 1];
 285                 // fprintf(stderr, "\t[%s %s] %f/%f\n", stone2str(map->to_play), coord2sstr(c, b), value, bestval);
 286                 value /= bestval;
 287
 288                 add_prior_value(map, c, value, eqex);
 289         } foreach_free_point_end;
 290 }
 291
 292
 293 void *
 294 pachi_plugin_init(char *arg, struct board *b, int seed)
 295 {
 296         struct context *ctx = calloc(1, sizeof(*ctx));
 297
 298         /* Initialize ctx defaults here. */
 299         char *file = NULL;
 300         float overrelax = 1.0, threshold = 0.001;
 301         int iterations = 13;
 302         ctx->eqex = -1;
 303
 304         /* This is the canonical Pachi arguments parser. You do not strictly
 305          * need to decypher it, you can just use it as a boilerplate. */
 306         if (arg) {
 307                 char *optspec, *next = arg;
 308                 while (*next) {
 309                         optspec = next;
 310                         next += strcspn(next, ":");
 311                         if (*next) { *next++ = 0; } else { *next = 0; }
 312
 313                         char *optname = optspec;
 314                         char *optval = strchr(optspec, '=');
 315                         if (optval) *optval++ = 0;
 316
 317                         if (!strcasecmp(optname, "eqex") && optval) {
 318                                 /* eqex takes a required integer argument */
 319                                 ctx->eqex = atoi(optval);
 320
 321                         } else if (!strcasecmp(optname, "file") && optval) {
 322                                 file = strdup(optval);
 323
 324                         } else if (!strcasecmp(optname, "threshold") && optval) {
 325                                 threshold = atof(optval);
 326
 327                         } else if (!strcasecmp(optname, "overrelax") && optval) {
 328                                 overrelax = atof(optval);
 329
 330                         } else if (!strcasecmp(optname, "iterations") && optval) {
 331                                 iterations = atoi(optval);
 332
 333                         } else {
 334                                 fprintf(stderr, "wolf plugin: Invalid argument %s or missing value\n", optname);
 335                                 exit(1);
 336                         }
 337                 }
 338         }
 339
 340         /* Initialize the rest of ctx (depending on arguments) here. */
 341         if (!file) {
 342                 fprintf(stderr, "wolf plugin: file argument not specified\n");
 343                 exit(1);
 344         }
 345         ctx->dlh = dlopen(file, RTLD_NOW);
 346         if (!ctx->dlh) {
 347                 fprintf(stderr, "Cannot load file %s: %s\n", file, dlerror());
 348                 exit(EXIT_FAILURE);
 349         }
 350 #define loadsym(s_) do {\
 351         ctx->s_ = dlsym(ctx->dlh, #s_); \
 352         if (!ctx->s_) { \
 353                 fprintf(stderr, "Cannot find %s in module: %s\n", #s_, dlerror()); \
 354                 exit(EXIT_FAILURE); \
 355         } \
 356 } while (0)
 357         loadsym(SETPARAM);
 358         loadsym(EVALFUN1);
 359         loadsym(FINDMOVE2);
 360
 361         ctx->SETPARAM(threshold, overrelax, iterations);
 362
 363         return ctx;
 364 }
 365
 366 void
 367 pachi_plugin_done(void *data)
 368 {
 369         struct context *ctx = data;
 370         dlclose(ctx->dlh);
 371         free(ctx);
 372 }