1 /* This is a Pachi UCT plugin for Thomas Wolf's move evaluation API. */
2 /* This file is released under the same licence conditions as
5 /* We will add positive priors (1.0) for moves that play in-between
6 * of two different groups of the same color; that is, moves that connect
7 * two groups or the same color or separate two groups of the same color.
8 * This is not a very good prior actually, since it leads to a lot of
9 * useless moves. (Maybe doing this in simulations would be more interesting?)
10 * But it is a simple enough example. :-) */
12 /* Compile the plugin like this:
13 * gcc -Wall -O3 -march=native -Ipachi_source_root -shared -fPIC -o wolf.so wolf.c
14 * Then, load it in Pachi by passing plugin=wolf.so as a parameter.
15 * You can also pass it parameters: plugin=wolf.so:p1=v1:p2=v2.
16 * The plugin supports these parameters:
17 * file Filename of the real module used
18 * eqex Number of prior'd simulations, overrides Pachi default
19 * threshold Threshold value when to stop iterating influence/strength values.
20 * overrelax Overrelaxation parameter. Should probably not be changed from 1.0
21 * iterations Upper bound of the number of iters for each empty point or chain.
25 /* This is the Thomas Wolf's API we implement:
27 The library currently provides 5 functions:
35 of which SETPARAM, EVALFUN2, FINDMOVE2 are the ones likely to be used
42 This procedure has to be called before any EVALFUN call but can also be
43 called again later. It sets parameters for the computation of the
46 The first parameter is a threshold value when to stop iterating
47 influence/strength values. Characteristics: The smaller the value the
48 more accurate the influence value but the slower the computation due
49 to an increase in iterations. But because accuracy is limited by
50 systematic errors, too small values do not bring improvement,
51 especially not in unstable situations.
53 The second is an overrelaxation parameter. Should probably not be
56 The third is an upper bound of the number of iterations for each empty
57 point or chain. Again, the more the better but also the slower. Again,
58 improvement is overshadowed by systematic errors, especially in
59 unstable situations. Typical values have the range 3..65000.
65 Both are one and the same static evaluation function, only with
70 For both functions the first parameter is a string
71 which provides the necessary input by specifying the board position,
72 who moves next, specifying whether the evaluation function should
73 be applied ad hoc (task=3) or incrementally (task=4) and which
74 optionally allows to specify unconditionally alive chains. The
75 syntax of the input for EVALFUN1 and EVALFUN2 is the following:
77 - Only for ad hoc mode:
78 A single digit specifying the size of the board:
79 1 --> 19x19, 2 --> 17x17, 3 --> 15x15, 4 --> 13x13,
80 5 --> 11x11, 6 --> 9x9, 7 --> 7x7, 8 --> 5x5
81 - Only for ad hoc mode:
82 All stones on the board.
83 Because there are more than 256 fields and sending non-alphanumeric
84 characters may give problems we need 2 coordinates for each stone
85 anyway. In these coordinates (1,1) is in the lower left corner
86 and the 1st increases to the right and the 2nd increases vertically
88 Black stone coordinates are encoded by chr(64+x), so 'A' would
89 be 1, 'B' be 2,... and white stone coordinates by chr(96+x), so
90 'a' is 1, 'b' is 2, ...
91 - If there is a field forbidden by ko then the first coordinate
92 is a capital letter and the second a lower case letter.
93 - Next is a single character '%' indicating the end of the sequence of fields.
94 - Next is a single character specifying who moves first and form of solution:
95 'b' Black moves first <*>
96 'w' White moves first <*>
97 - The task: a single digit that specifies the task.
99 3 ... evaluation of all chains and empty fields within a region, adhoc version.
100 If the region is the whole board no further data follow.
101 If the region is bounded then the following data consist of points (x,y).
102 Each point is encoded by 2 byte, either as chr(64+x) chr(64+y), i.e. as
103 upper case letters or as chr(96+x) chr(96+y), i.e. lower case letters.
104 Whether upper or lower case does not depend on the colour (the colour of the
105 stones has already been specified above) but is defined as follows:
106 - The first point (occupied or not) marks the inside of the region to be
107 evaluated. For that one can use upper or lower case letters.
108 - Each one of any further points marks a chain that shall be assumed to be
109 alive, i.e. such a chain will be a boundary to the region to be evaluated.
110 If a field marking a chain is encoded in upper case letters by
111 chr(64+x) chr(64+y) then the chain is assumed to be statically alive which
112 is a property that will be respected in future calls of task 4 below. If
113 the chain marking field is encoded in lower case letters then it treated
114 as alive for now in this computation of the evaluation function but not
115 necessarily in future calls of task 4.
117 4 ... evaluation of all chains and empty fields within a region, incremental version.
118 Input is exactly exactly like task 3. The difference to task 3 is that this
119 position has already been evaluated in the previous call with task 3 or 4.
120 Therefore, no boardsize, no stones and no ko-field are specified. That means
121 the complete input is:
122 - The first character is '%'
123 - followed by 'b' or 'w' to mark the colour moving next, <*>.
124 - a digit for the task, so 4,
125 - optional, like in task 3: fields marking alive chains,
126 - a '%' character to mark the end of the optional list of alive chains
127 - a point (x,y) encoded by 2 byte chr(64+x) chr(64+y) that marks
128 the coordinates of the next move of the colour given in <*>.
132 EVALFUN1 has a simple interface, providing with its 2nd parameter a
133 string that gives for each board intersection
134 - the influence value of Black (in the range 0 .. 1.0) if the
135 intersection is empty
136 - the strength value of the chain occupying that intersection
137 (in the range 0.0 = dead .. 1.0 = uncnditionally alive) if the
138 intersection is not empty.
139 This interface is simple but slow because this string has to be
140 generated and to be decoded to use any information.
142 EVALFUN2 gives direct access to the relevant data.
143 - For an intersection with coordinates (x,y) (1<=x<=boardsize from
144 left to right, 1<=y<=boardsize from bottom to top) the 2-dim
146 - is 0 if the intersection is empty,
147 - gives the chain number of the chain that occupies (x,y).
148 - For an empty intersection with coordinates (x,y) (1<=x<=boardsize from
149 left to right, 1<=y<=boardsize from bottom to top) the record/structure
150 PPD(i,j) stores relevant data and boc is the influence value of Black.
151 - For an intersection (x,y) occupied by a chain with number n the
152 record/structure PCD(n) stores the relevant data of this chain
153 and the svl component gives a strength value in the interval
154 0.0 (= unconditionally dead) to 1.0 (= unconditionally alive).
160 Before calling FINDMOVE1/2 the evaluation function EVALFUN1 or
161 EVALFUN2 for the current board position has to be executed.
163 Both, FINDMOVE1 and FINDMOVE2, are one and the same function that
164 provides a ranking of legal moves, only with different
165 interface. Currently FINDMOVE performs each legal move, updates the
166 static evaluation and adds up probabilities over the whole board to
167 arrive at a rough score for each move. Thus, depending on the number
168 of legal moves this function is up to 360 times slower than EVALFUN in
171 - The first parameter is specifying which sides moves next: White=-1, Black=1.
172 - The 2nd parameter of FINDMOVE1 is an output string giving for each legal
173 intersection a measure of value of doing the next move there.
174 The first move is the supposedly best move. All other moves are
175 listed line by line of the board.
176 - For FINDMOVE2 the 2nd and 3rd parameter are the x- and y-coordinate
177 of the best move and the 4th parameter is a 'value' of that move.
178 The 5th parameter is a 2-dim array which for each intersection
179 gives a 'value' of moving there. To identify whether the intersection is
180 empty the last parameter is a 2-dim array giving a value 0 if it is
181 empty (otherwise the number of the chain that occupies the intersection).
191 /* The basic plugin interface. */
192 #include "uct/plugin.h"
195 #define MAXBOARDSIZE 19
196 typedef char byte_board
[MAXBOARDSIZE
+2][MAXBOARDSIZE
+2]; // The array indices are 1-based!
197 typedef floating_t influ_board
[MAXBOARDSIZE
][MAXBOARDSIZE
];
199 /* Our context structure. */
204 void (*SETPARAM
)(double sv
, double omega
, uint16_t mi
);
205 void (*EVALFUN1
)(char *javp
, char *InfluField
);
206 void (*FINDMOVE2
)(int fa
, char *mi
, char *mj
, floating_t
*mxscore
, influ_board
*SB
, byte_board
**PChainNo
);
211 bsize2digit(int size
)
223 fprintf(stderr
, "wolf plugin: Unsupported board size: %d\n", size
);
229 coord2digit(enum stone color
, int coord
)
231 assert(color
== S_BLACK
|| color
== S_WHITE
);
232 return (color
== S_BLACK
? 64 + coord
: 96 + coord
);
236 pachi_plugin_prior(void *data
, struct tree_node
*node
, struct prior_map
*map
, int eqex
)
238 struct context
*ctx
= data
;
239 struct board
*b
= map
->b
;
241 eqex
= ctx
->eqex
; // override Pachi default
243 /* First, create a string representation of current board. */
248 *bip
++ = bsize2digit(board_size(b
) - 2);
250 enum stone s
= board_at(b
, c
);
251 if (s
== S_NONE
|| s
== S_OFFBOARD
) continue;
252 *bip
++ = coord2digit(s
, coord_x(c
, b
));
253 *bip
++ = coord2digit(s
, coord_y(c
, b
));
255 if (!is_pass(b
->ko
.coord
)) {
256 *bip
++ = coord2digit(S_BLACK
, coord_x(b
->ko
.coord
, b
));
257 *bip
++ = coord2digit(S_WHITE
, coord_y(b
->ko
.coord
, b
));
260 *bip
++ = map
->to_play
== S_BLACK
? 'b' : 'w';
264 /* Seed the evaluation of the situation. */
265 // fprintf(stderr, "board desc: %s\n", bin);
266 ctx
->EVALFUN1(bin
, bout
);
267 /* We do not care about bout. */
269 /* Retrieve values of moves. */
273 byte_board
*chaininfo
;
274 ctx
->FINDMOVE2(map
->to_play
== S_BLACK
? 1 : -1, &bestx
, &besty
, &bestval
, &values
, &chaininfo
);
275 // fprintf(stderr, "best is (%d,%d)%s %f\n", bestx, besty, coord2sstr(coord_xy(b, bestx, besty), b), bestval);
277 /* In the first pass, determine best and worst value. (Best value
278 * reported by FINDMOVE2 is wrong.) In the second pass, we set the
279 * priors by normalization based on the determined values. */
280 floating_t best
= -1000, worst
= 1000;
281 foreach_free_point(map
->b
) {
282 if (!map
->consider
[c
])
284 floating_t value
= values
[coord_x(c
, b
) - 1][coord_y(c
, b
) - 1];
285 if (map
->to_play
== S_WHITE
) value
= -value
;
286 if (value
> best
) best
= value
;
287 else if (value
< worst
) worst
= value
;
290 foreach_free_point(map
->b
) {
291 if (!map
->consider
[c
])
294 /* Take the value and normalize it somehow. */
295 /* Right now, we just do this by linear rescaling from
296 * [worst, best] to [0,1]. */
297 floating_t value
= values
[coord_x(c
, b
) - 1][coord_y(c
, b
) - 1];
298 if (map
->to_play
== S_WHITE
) value
= -value
;
299 value
= (value
- worst
) / (best
- worst
);
300 // fprintf(stderr, "\t[%s %s] %f/%f\n", stone2str(map->to_play), coord2sstr(c, b), value, best);
302 add_prior_value(map
, c
, (value
- worst
) / best
, eqex
);
303 } foreach_free_point_end
;
308 pachi_plugin_init(char *arg
, struct board
*b
, int seed
)
310 struct context
*ctx
= calloc(1, sizeof(*ctx
));
312 /* Initialize ctx defaults here. */
314 floating_t overrelax
= 1.0, threshold
= 0.001;
318 /* This is the canonical Pachi arguments parser. You do not strictly
319 * need to decypher it, you can just use it as a boilerplate. */
321 char *optspec
, *next
= arg
;
324 next
+= strcspn(next
, ":");
325 if (*next
) { *next
++ = 0; } else { *next
= 0; }
327 char *optname
= optspec
;
328 char *optval
= strchr(optspec
, '=');
329 if (optval
) *optval
++ = 0;
331 if (!strcasecmp(optname
, "eqex") && optval
) {
332 /* eqex takes a required integer argument */
333 ctx
->eqex
= atoi(optval
);
335 } else if (!strcasecmp(optname
, "file") && optval
) {
336 file
= strdup(optval
);
338 } else if (!strcasecmp(optname
, "threshold") && optval
) {
339 threshold
= atof(optval
);
341 } else if (!strcasecmp(optname
, "overrelax") && optval
) {
342 overrelax
= atof(optval
);
344 } else if (!strcasecmp(optname
, "iterations") && optval
) {
345 iterations
= atoi(optval
);
348 fprintf(stderr
, "wolf plugin: Invalid argument %s or missing value\n", optname
);
354 /* Initialize the rest of ctx (depending on arguments) here. */
356 fprintf(stderr
, "wolf plugin: file argument not specified\n");
359 ctx
->dlh
= dlopen(file
, RTLD_NOW
);
361 fprintf(stderr
, "Cannot load file %s: %s\n", file
, dlerror());
364 #define loadsym(s_) do {\
365 ctx->s_ = dlsym(ctx->dlh, #s_); \
367 fprintf(stderr, "Cannot find %s in module: %s\n", #s_, dlerror()); \
368 exit(EXIT_FAILURE); \
375 ctx
->SETPARAM(threshold
, overrelax
, iterations
);
381 pachi_plugin_done(void *data
)
383 struct context
*ctx
= data
;