REV. SUBMISSION
[gostyle.git] / tex / gostyle.tex
blob370a3bc70199d982574a09ba0e5d292f1bb93e39
1 \documentclass[journal]{IEEEtran}
3 \usepackage{cite}
4 % cite.sty was written by Donald Arseneau
5 % V1.6 and later of IEEEtran pre-defines the format of the cite.sty package
6 % \cite{} output to follow that of IEEE. Loading the cite package will
7 % result in citation numbers being automatically sorted and properly
8 % "compressed/ranged". e.g., [1], [9], [2], [7], [5], [6] without using
9 % cite.sty will become [1], [2], [5]--[7], [9] using cite.sty. cite.sty's
10 % \cite will automatically add leading space, if needed. Use cite.sty's
11 % noadjust option (cite.sty V3.8 and later) if you want to turn this off.
12 % cite.sty is already installed on most LaTeX systems. Be sure and use
13 % version 4.0 (2003-05-27) and later if using hyperref.sty. cite.sty does
14 % not currently provide for hyperlinked citations.
15 % The latest version can be obtained at:
16 % http://www.ctan.org/tex-archive/macros/latex/contrib/cite/
17 % The documentation is contained in the cite.sty file itself.
20 % *** GRAPHICS RELATED PACKAGES ***
22 \ifCLASSINFOpdf
23 % \usepackage[pdftex]{graphicx}
24 % declare the path(s) where your graphic files are
25 % \graphicspath{{../pdf/}{../jpeg/}}
26 % and their extensions so you won't have to specify these with
27 % every instance of \includegraphics
28 % \DeclareGraphicsExtensions{.pdf,.jpeg,.png}
29 \else
30 % or other class option (dvipsone, dvipdf, if not using dvips). graphicx
31 % will default to the driver specified in the system graphics.cfg if no
32 % driver is specified.
33 % \usepackage[dvips]{graphicx}
34 \usepackage{graphicx}
35 % declare the path(s) where your graphic files are
36 % \graphicspath{{../eps/}}
37 % and their extensions so you won't have to specify these with
38 % every instance of \includegraphics
39 % \DeclareGraphicsExtensions{.eps}
40 \fi
42 \usepackage{threeparttable}
44 \usepackage{psgo}
45 \setgounit{0.4cm}
47 \usepackage{soul}
48 \newcommand{\rv}[1]{\ul{#1}}
50 \usepackage{algorithm}
51 \usepackage{algorithmic}
52 %\usepackage{algpseudocode}
53 % WICKED: nefunguje ani jedno???
54 % algorithmic.sty can be obtained at:
55 % http://www.ctan.org/tex-archive/macros/latex/contrib/algorithms/
56 % There is also a support site at:
57 % http://algorithms.berlios.de/index.html
58 % Also of interest may be the (relatively newer and more customizable)
59 % algorithmicx.sty package by Szasz Janos:
60 % http://www.ctan.org/tex-archive/macros/latex/contrib/algorithmicx/
62 % *** ALIGNMENT PACKAGES ***
64 %\usepackage{array}
65 % http://www.ctan.org/tex-archive/macros/latex/required/tools/
68 \usepackage{amsmath}
69 %\usepackage{mdwtab}
70 % http://www.ctan.org/tex-archive/macros/latex/contrib/mdwtools/
73 % IEEEtran contains the IEEEeqnarray family of commands that can be used to
74 % generate multiline equations as well as matrices, tables, etc., of high
75 % quality.
77 %\usepackage{eqparbox}
78 % Also of notable interest is Scott Pakin's eqparbox package for creating
79 % (automatically sized) equal width boxes - aka "natural width parboxes".
80 % Available at:
81 % http://www.ctan.org/tex-archive/macros/latex/contrib/eqparbox/
85 % *** SUBFIGURE PACKAGES ***
86 %\usepackage[tight,footnotesize]{subfigure}
87 % subfigure.sty was written by Steven Douglas Cochran. This package makes it
88 % easy to put subfigures in your figures. e.g., "Figure 1a and 1b". For IEEE
89 % work, it is a good idea to load it with the tight package option to reduce
90 % the amount of white space around the subfigures. subfigure.sty is already
91 % installed on most LaTeX systems. The latest version and documentation can
92 % be obtained at:
93 % http://www.ctan.org/tex-archive/obsolete/macros/latex/contrib/subfigure/
94 % subfigure.sty has been superceeded by subfig.sty.
98 %\usepackage[caption=false]{caption}
99 %\usepackage[font=footnotesize]{subfig}
100 % subfig.sty, also written by Steven Douglas Cochran, is the modern
101 % replacement for subfigure.sty. However, subfig.sty requires and
102 % automatically loads Axel Sommerfeldt's caption.sty which will override
103 % IEEEtran.cls handling of captions and this will result in nonIEEE style
104 % figure/table captions. To prevent this problem, be sure and preload
105 % caption.sty with its "caption=false" package option. This is will preserve
106 % IEEEtran.cls handing of captions. Version 1.3 (2005/06/28) and later
107 % (recommended due to many improvements over 1.2) of subfig.sty supports
108 % the caption=false option directly:
109 %\usepackage[caption=false,font=footnotesize]{subfig}
111 % The latest version and documentation can be obtained at:
112 % http://www.ctan.org/tex-archive/macros/latex/contrib/subfig/
113 % The latest version and documentation of caption.sty can be obtained at:
114 % http://www.ctan.org/tex-archive/macros/latex/contrib/caption/
118 % *** FLOAT PACKAGES ***
120 %\usepackage{fixltx2e}
121 % fixltx2e, the successor to the earlier fix2col.sty, was written by
122 % Frank Mittelbach and David Carlisle. This package corrects a few problems
123 % in the LaTeX2e kernel, the most notable of which is that in current
124 % LaTeX2e releases, the ordering of single and double column floats is not
125 % guaranteed to be preserved. Thus, an unpatched LaTeX2e can allow a
126 % single column figure to be placed prior to an earlier double column
127 % figure. The latest version and documentation can be found at:
128 % http://www.ctan.org/tex-archive/macros/latex/base/
132 %\usepackage{stfloats}
133 % stfloats.sty was written by Sigitas Tolusis. This package gives LaTeX2e
134 % the ability to do double column floats at the bottom of the page as well
135 % as the top. (e.g., "\begin{figure*}[!b]" is not normally possible in
136 % LaTeX2e). It also provides a command:
137 %\fnbelowfloat
138 % to enable the placement of footnotes below bottom floats (the standard
139 % LaTeX2e kernel puts them above bottom floats). This is an invasive package
140 % which rewrites many portions of the LaTeX2e float routines. It may not work
141 % with other packages that modify the LaTeX2e float routines. The latest
142 % version and documentation can be obtained at:
143 % http://www.ctan.org/tex-archive/macros/latex/contrib/sttools/
144 % Documentation is contained in the stfloats.sty comments as well as in the
145 % presfull.pdf file. Do not use the stfloats baselinefloat ability as IEEE
146 % does not allow \baselineskip to stretch. Authors submitting work to the
147 % IEEE should note that IEEE rarely uses double column equations and
148 % that authors should try to avoid such use. Do not be tempted to use the
149 % cuted.sty or midfloat.sty packages (also by Sigitas Tolusis) as IEEE does
150 % not format its papers in such ways.
153 %\ifCLASSOPTIONcaptionsoff
154 % \usepackage[nomarkers]{endfloat}
155 % \let\MYoriglatexcaption\caption
156 % \renewcommand{\caption}[2][\relax]{\MYoriglatexcaption[#2]{#2}}
157 %\fi
158 % endfloat.sty was written by James Darrell McCauley and Jeff Goldberg.
159 % This package may be useful when used in conjunction with IEEEtran.cls'
160 % captionsoff option. Some IEEE journals/societies require that submissions
161 % have lists of figures/tables at the end of the paper and that
162 % figures/tables without any captions are placed on a page by themselves at
163 % the end of the document. If needed, the draftcls IEEEtran class option or
164 % \CLASSINPUTbaselinestretch interface can be used to increase the line
165 % spacing as well. Be sure and use the nomarkers option of endfloat to
166 % prevent endfloat from "marking" where the figures would have been placed
167 % in the text. The two hack lines of code above are a slight modification of
168 % that suggested by in the endfloat docs (section 8.3.1) to ensure that
169 % the full captions always appear in the list of figures/tables - even if
170 % the user used the short optional argument of \caption[]{}.
171 % IEEE papers do not typically make use of \caption[]'s optional argument,
172 % so this should not be an issue. A similar trick can be used to disable
173 % captions of packages such as subfig.sty that lack options to turn off
174 % the subcaptions:
175 % For subfig.sty:
176 % \let\MYorigsubfloat\subfloat
177 % \renewcommand{\subfloat}[2][\relax]{\MYorigsubfloat[]{#2}}
178 % For subfigure.sty:
179 % \let\MYorigsubfigure\subfigure
180 % \renewcommand{\subfigure}[2][\relax]{\MYorigsubfigure[]{#2}}
181 % However, the above trick will not work if both optional arguments of
182 % the \subfloat/subfig command are used. Furthermore, there needs to be a
183 % description of each subfigure *somewhere* and endfloat does not add
184 % subfigure captions to its list of figures. Thus, the best approach is to
185 % avoid the use of subfigure captions (many IEEE journals avoid them anyway)
186 % and instead reference/explain all the subfigures within the main caption.
187 % The latest version of endfloat.sty and its documentation can obtained at:
188 % http://www.ctan.org/tex-archive/macros/latex/contrib/endfloat/
190 % The IEEEtran \ifCLASSOPTIONcaptionsoff conditional can also be used
191 % later in the document, say, to conditionally put the References on a
192 % page by themselves.
194 % *** PDF, URL AND HYPERLINK PACKAGES ***
196 \usepackage{url}
197 % url.sty was written by Donald Arseneau. It provides better support for
198 % handling and breaking URLs. url.sty is already installed on most LaTeX
199 % systems. The latest version can be obtained at:
200 % http://www.ctan.org/tex-archive/macros/latex/contrib/misc/
201 % Read the url.sty source comments for usage information. Basically,
202 % \url{my_url_here}.
205 % *** Do not adjust lengths that control margins, column widths, etc. ***
206 % *** Do not use packages that alter fonts (such as pslatex). ***
207 % There should be no need to do such things with IEEEtran.cls V1.6 and later.
208 % (Unless specifically asked to do so by the journal or conference you plan
209 % to submit to, of course. )
211 % correct bad hyphenation here
212 \hyphenation{op-tical net-works semi-conduc-tor know-ledge}
215 \begin{document}
217 % paper title
218 % can use linebreaks \\ within to get better formatting as desired
219 \title{On Move Pattern Trends\\in Large Go Games Corpus}
221 % use \thanks{} to gain access to the first footnote area
222 % a separate \thanks must be used for each paragraph as LaTeX2e's \thanks
223 % was not built to handle multiple paragraphs
224 \author{Petr~Baudi\v{s},~Josef~Moud\v{r}\'{i}k% <-this % stops a space
225 \thanks{P. Baudi\v{s} is student at the Faculty of Math and Physics, Charles University, Prague, CZ, and also does some of his Computer Go research as an employee of SUSE Labs Prague, Novell CZ.}% <-this % stops a space
226 \thanks{J. Moud\v{r}\'{i}k is student at the Faculty of Math and Physics, Charles University, Prague, CZ.}}
228 % note the % following the last \IEEEmembership and also \thanks -
229 % these prevent an unwanted space from occurring between the last author name
230 % and the end of the author line. i.e., if you had this:
232 % \author{....lastname \thanks{...} \thanks{...} }
233 % ^------------^------------^----Do not want these spaces!
235 % a space would be appended to the last name and could cause every name on that
236 % line to be shifted left slightly. This is one of those "LaTeX things". For
237 % instance, "\textbf{A} \textbf{B}" will typeset as "A B" not "AB". To get
238 % "AB" then you have to do: "\textbf{A}\textbf{B}"
239 % \thanks is no different in this regard, so shield the last } of each \thanks
240 % that ends a line with a % and do not let a space in before the next \thanks.
241 % Spaces after \IEEEmembership other than the last one are OK (and needed) as
242 % you are supposed to have spaces between the names. For what it is worth,
243 % this is a minor point as most people would not even notice if the said evil
244 % space somehow managed to creep in.
247 % The paper headers
248 \markboth{Transactions on Computational Intelligence and AI in Games --- REV. SUBMISSION}%
249 {On Move Pattern Trends in Large Go Games Corpus --- REV. SUBMISSION}
250 % The only time the second header will appear is for the odd numbered pages
251 % after the title page when using the twoside option.
253 % *** Note that you probably will NOT want to include the author's ***
254 % *** name in the headers of peer review papers. ***
255 % You can use \ifCLASSOPTIONpeerreview for conditional compilation here if
256 % you desire.
261 % If you want to put a publisher's ID mark on the page you can do it like
262 % this:
263 %\IEEEpubid{0000--0000/00\$00.00~\copyright~2007 IEEE}
264 % Remember, if you use this you must call \IEEEpubidadjcol in the second
265 % column for its text to clear the IEEEpubid mark.
269 % use for special paper notices
270 %\IEEEspecialpapernotice{(Invited Paper)}
275 % make the title area
276 \maketitle
279 \begin{abstract}
280 %\boldmath
282 We process a~large corpus of game records of the board game of Go and propose
283 a~way of extracting summary information on played moves. We then apply several
284 basic data-mining methods on the summary information to identify the most
285 differentiating features within the summary information, and discuss their
286 correspondence with traditional Go knowledge. We show statistically significant
287 mappings of the features to player attributes such as playing strength or
288 informally perceived ``playing style'' (e.g. territoriality or aggressivity),
289 describe accurate classifiers for these attributes, and propose applications
290 including seeding real-work ranks of internet players, aiding in Go study and
291 tuning of Go-playing programs, or contribution to Go-theoretical discussion on
292 the scope of ``playing style''.
294 \end{abstract}
295 % IEEEtran.cls defaults to using nonbold math in the Abstract.
296 % This preserves the distinction between vectors and scalars. However,
297 % if the journal you are submitting to favors bold math in the abstract,
298 % then you can use LaTeX's standard command \boldmath at the very start
299 % of the abstract to achieve this. Many IEEE journals frown on math
300 % in the abstract anyway.
302 % Note that keywords are not normally used for peerreview papers.
303 \begin{IEEEkeywords}
304 Board games, Evaluation, Function approximation, Go, Machine learning, Neural networks, User modelling
305 \end{IEEEkeywords}
312 % For peer review papers, you can put extra information on the cover
313 % page as needed:
314 % \ifCLASSOPTIONpeerreview
315 % \begin{center} \bfseries EDICS Category: 3-BBND \end{center}
316 % \fi
318 % For peerreview papers, this IEEEtran command inserts a page break and
319 % creates the second title. It will be ignored for other modes.
320 \IEEEpeerreviewmaketitle
322 % TODO remove!??
323 This work has been submitted to the IEEE for possible publication. Copyright
324 may be transferred without notice, after which this version may no longer be
325 accessible.
328 \section{Introduction}
329 % The very first letter is a 2 line initial drop letter followed
330 % by the rest of the first word in caps.
332 % form to use if the first word consists of a single letter:
333 % \IEEEPARstart{A}{demo} file is ....
335 % form to use if you need the single drop letter followed by
336 % normal text (unknown if ever used by IEEE):
337 % \IEEEPARstart{A}{}demo file is ....
339 % Some journals put the first two words in caps:
340 % \IEEEPARstart{T}{his demo} file is ....
342 % Here we have the typical use of a "T" for an initial drop letter
343 % and "HIS" in caps to complete the first word.
344 \IEEEPARstart{T}{he} field of Computer Go usually focuses on the problem
345 of creating a~program to play the game, finding the best move from a~given
346 board position. \cite{GellySilver2008}
347 We will make use of one method developed in the course
348 of such research and apply it to the analysis of existing game records
349 with the aim of helping humans to play and understand the game better
350 instead.
352 Go is a~two-player full-information board game played
353 on a~square grid (usually $19\times19$ lines) with black and white
354 stones; the goal of the game is to surround the most territory and
355 capture enemy stones. We assume basic familiarity with the game.
357 Many Go players are eager to play using computers (usually over
358 the internet) and review games played by others on computers as well.
359 This means that large amounts of game records are collected and digitally
360 stored, enabling easy processing of such collections. However, so far
361 only little has been done with the available data --- we are aware
362 only of uses for simple win/loss statistics \cite{KGSAnalytics} \cite{ProGoR}
363 and ``next move'' statistics on a~specific position \cite{Kombilo} \cite{MoyoGo}.
365 We present a~more in-depth approach --- from all played moves, we devise
366 a~compact evaluation of each player. We then explore correlations between
367 evaluations of various players in the light of externally given information.
368 This way, we can discover similarity between move characteristics of
369 players with the same playing strength, or discuss the meaning of the
370 ``playing style'' concept on the assumption that similar playing styles
371 should yield similar move characteristics.
373 \rv{We show that a~sample of player's games can be used to quite reliably estimate player's strength,
374 game style, or even a time when he/she was active. Apart from these practical results,
375 the research may prove to be useful for Go theoretists by investigating the principles behind
376 the classical ``style'' classification.}
378 % XXX \rv{ \ref{bla} } is not working
380 \rv{We shall first present details of the extraction and summarization of
381 information from the game corpus (section~}\ref{pattern-vectors}\rv{).
382 Afterwards, we will explain the statistical methods applied (section~}\ref{data-mining}\rv{),
383 and then describe our findings on particular game collections,
384 regarding the analysis of either strength (section~}\ref{strength-analysis}\rv{)
385 or playing styles (section~}\ref{style-analysis}\rv{).
386 Finally, we will explore possible interpretations and few applications
387 of our research (section~}\ref{proposed-apps-and-discussion}\rv{)
388 and point out some possible future research directions (section~}\ref{future-research}\rv{).}
391 \section{Data Extraction}
392 \label{pattern-vectors}
394 As the input of our analysis, we use large collections of game records%
395 \footnote{We use the SGF format \cite{SGF} in our implementation.}
396 \rv{grouped by the primary object of analysis
397 (player name when analyzing style of a particular player,
398 player rank when looking at the effect of rank on data, etc.).}
399 We process the games, generating a description for each
400 played move -- a {\em pattern}, being a combination of several
401 {\em pattern features} described below.
403 We \rv{compute the occurence counts of all encountered patterns,
404 eventually} composing $n$-dimensional {\em pattern vector}
405 $\vec p$ of counts of the $n$ globally most frequent patterns%
406 \footnote{We use $n=500$ in our analysis.}
407 (the mapping from patterns to vector elements is common for
408 \rv{all generated vectors}).
409 We can then process and compare just the pattern vectors.
411 \subsection{Pattern Features}
412 When deciding how to compose the patterns we use to describe moves,
413 we need to consider a specificity tradeoff --- overly general descriptions carry too few
414 information to discern various player attributes; too specific descriptions
415 gather too few specimen over the games sample and the vector differences are
416 not statistically significant.
418 We have chosen an intuitive and simple approach inspired by pattern features
419 used when computing Elo ratings for candidate patterns in Computer Go play.
420 \cite{PatElo} Each pattern is a~combination of several {\em pattern features}
421 (name--value pairs) matched at the position of the played move.
422 We use these features:
424 \begin{itemize}
425 \item capture move flag
426 \item atari move flag
427 \item atari escape flag
428 \item contiguity-to-last flag%
429 \footnote{We do not consider contiguity features in some cases when we are working
430 on small game samples and need to reduce pattern diversity.}
431 --- whether the move has been played in one of 8 neighbors of the last move
432 \item contiguity-to-second-last flag
433 \item board edge distance --- only up to distance 4
434 \item spatial pattern --- configuration of stones around the played move
435 \end{itemize}
437 The spatial patterns are normalized (using a dictionary) to be always
438 black-to-play and maintain translational and rotational symmetry.
439 Configurations of radius between 2 and 9 in the gridcular metric%
440 \footnote{The {\em gridcular} metric
441 $d(x,y) = |\delta x| + |\delta y| + \max(|\delta x|, |\delta y|)$ produces
442 a circle-like structure on the Go board square grid. \cite{SpatPat} }
443 are matched.
445 Pattern vectors representing these features contain information on
446 played shape as well as a basic representation of tactical dynamics
447 --- threats to capture stones, replying to last move, or ignoring
448 opponent's move elsewhere to return to an urgent local situation.
449 The shapes \rv{often} correspond to opening moves
450 (either in empty corners and sides, or as part of {\em joseki}
451 --- commonly played sequences) characteristic for a certain
452 strategic aim. In the opening, even a single-line difference
453 in the distance from the border can have dramatic impact on
454 further local and global development.
456 \subsection{Vector Rescaling}
458 The pattern vector elements can have diverse values since for each object,
459 we consider a different number of games (and thus patterns).
460 Therefore, we normalize the values to range $[-1,1]$,
461 the most frequent pattern having the value of $1$ and the least occuring
462 one being $-1$.
463 Thus, we obtain vectors describing relative frequency of played patterns
464 independent on number of gathered patterns.
465 But there are multiple ways to approach the normalization.
467 \begin{figure}[!t]
468 \centering
469 \includegraphics{patcountdist}
470 \caption{Log-scaled number of pattern occurences
471 in the GoGoD games examined in sec. \ref{style-analysis}.}
472 \label{fig:patcountdist}
473 \end{figure}
475 \subsubsection{Linear Normalization}
477 One is simply to linearly re-scale the values using:
478 $$y_i = {x_i - x_{\rm min} \over x_{\rm max}}$$
479 This is the default approach; we have used data processed by only this
480 computation unless we note otherwise.
481 As shown on fig. \ref{fig:patcountdist}, most of the spectrum is covered
482 by the few most-occuring patterns (describing mostly large-diameter
483 shapes from the game opening). This means that most patterns will be
484 always represented by only very small values near the lower bound.
486 \subsubsection{Extended Normalization}
487 \label{xnorm}
489 To alleviate this problem, we have also tried to modify the linear
490 normalization by applying two steps --- {\em pre-processing}
491 the raw counts using
492 $$x_i' = \log (x_i + 1)$$
493 and {\em post-processing} the re-scaled values by the logistic function:
494 $$y_i' = {2 \over 1 + e^{-cy_i}}-1$$
495 However, we have found that this method is not universally beneficial.
496 In our styles case study (sec. \ref{style-analysis}), this normalization
497 produced PCA decomposition with significant dimensions corresponding
498 better to some of the prior knowledge and more instructive for manual
499 inspection, but ultimately worsened accuracy of our classifiers;
500 we conjecture from this that the most frequently occuring patterns are
501 also most important for classification of major style aspects.
503 \subsection{Implementation}
505 We have implemented the data extraction by making use of the pattern
506 features matching implementation%
507 \footnote{\rv{We have developed} the pattern features matcher
508 according to the Elo-rating pattern selection scheme. \cite{PatElo}}
509 within the Pachi go-playing program \cite{Pachi}.
510 We extract information on players by converting the SGF game
511 records to GTP stream \cite{GTP} that feeds Pachi's {\tt patternscan}
512 engine, \rv{producing} a~single {\em patternspec} (string representation
513 of the particular pattern features combination) per move. Of course,
514 only moves played by the appropriate \rv{player} are collected.
516 \section{Data Mining}
517 \label{data-mining}
519 To assess the properties of gathered pattern vectors
520 and their influence on playing styles,
521 we analyze the data using several basic data minining techniques.
523 The first two methods {\em (analytic)} rely purely on single data set
524 and serve to show internal structure and correlations within the data set.
526 Principal Component Analysis \cite{Jolliffe1986}
527 finds orthogonal vector components that \rv{represent} the largest variance
528 \rv{of values within the dataset.
529 That is, PCA will produce vectors representing
530 the overall variability within the dataset --- the first vector representing
531 the ``primary axis'' of the dataset, the next vectors representing the less
532 significant axes; each vector has an associated number that
533 determines its impact on the overall dataset variance: $1.0$ would mean
534 that all points within the dataset lie on this vector, value close to zero
535 would mean that removing this dimension would have little effect on the
536 overall shape of the dataset.}
537 Reversing the process of the PCA%
538 \footnote{Looking at dependencies of a single orthogonal vector component
539 in the original vector space.}
540 can indicate which patterns correlate with each component.
541 Additionally, PCA can be used as vector preprocessing for methods
542 that are negatively sensitive to pattern vector component correlations.
544 \rv{On the other hand,} Sociomaps \cite{Sociomaps} \cite{TeamProf} \cite{SociomapsPersonal} produce
545 spatial representation of the data set elements (e.g. players) based on
546 similarity of their data set features; we can then project other
547 information on the map to illutrate its connection to the data set.%
548 \footnote{\rv{We also attempted to visualise the player relationships
549 using Kohonen maps, but that did not produce very useful results.}}
551 Furthermore, we test several \emph{classification} methods that assign
552 an \emph{output vector} $\vec O$ \rv{to} each pattern vector $\vec P$,
553 \rv{the output vector representing the information we want to infer
554 from the game sample} --- e.g.~\rv{assessment of} the playing style,
555 player's strength or even meta-information like the player's era
556 or the country of origin.
558 Initially, the methods must be calibrated (trained) on some prior knowledge,
559 usually in the form of \emph{reference pairs} of pattern vectors
560 and the associated output vectors.
561 The reference set is divided into training and testing pairs
562 and the methods can be compared by the mean square error (MSE) on testing data set
563 (difference of output vectors approximated by the method and their real desired value).
565 %\footnote{However, note that dicrete characteristics such as country of origin are
566 %not very feasible to use here, since WHAT??? is that even true?? }
568 The most trivial method is approximation by the PCA representation
569 matrix, provided that the PCA dimensions have some already well-defined
570 \rv{interpretation}; this can be true for single-dimensional information like
571 the playing strength.
573 Aside of that, we test the $k$-Nearest Neighbors \cite{CoverHart1967} classifier
574 that approximates $\vec O$ by composing the output vectors
575 of $k$ reference pattern vectors closest to $\vec P$.
577 Another classifier is a~multi-layer feed-forward Artificial Neural Network \rv{(see e.g. }\cite{Haykin1994}\rv{)}:
578 the neural network can learn correlations between input and output vectors
579 and generalize the ``knowledge'' to unknown vectors; it can be more flexible
580 in the interpretation of different pattern vector elements and discern more
581 complex relations than the $k$-NN classifier,
582 but may not be as stable and expects larger training sample.
584 Finally, a commonly used classifier in statistical inference is
585 the Naive Bayes Classifier \cite{Bayes};
586 it can infer relative probability of membership
587 in various classes based on previous evidence (training patterns).
589 \subsection{Statistical Methods}
590 We use couple of general statistical analysis \rv{methods} together
591 with the particular techniques.
593 \label{pearson}
594 To find correlations within or between extracted data and
595 some prior knowledge (player rank, style vector), we compute the well-known
596 {\em Pearson product-moment correlation coefficient} \cite{Pearson},
597 measuring the strength of the linear dependence%
598 \footnote{A desirable property of PMCC is that it is invariant to translations and rescaling
599 of the vectors.}
600 between any two dimensions:
602 $$ r_{X,Y} = {{\rm cov}(X,Y) \over \sigma_X \sigma_Y} $$
604 To compare classifier performance on the reference data, we employ
605 {\em $k$-fold cross validation}:
606 we randomly divide the training set
607 into $k$ distinct segments of similar sizes and then iteratively
608 use each part as a~testing set as the other $k-1$ parts are used as a~training set.
609 We then average results over all iterations.
611 \subsection{Principal Component Analysis}
612 \label{PCA}
613 We use Principal Component Analysis \emph{PCA}
614 to reduce the dimensions of the pattern vectors while preserving
615 as much information as possible, assuming inter-dependencies between
616 pattern vector dimensions are linear.
618 \rv{Technically}, PCA is an eigenvalue decomposition of a~covariance matrix of centered pattern vectors,
619 producing a~linear mapping $o$ from $n$-dimensional vector space
620 to a~reduced $m$-dimensional vector space.
621 The $m$ eigenvectors of the original vectors' covariance matrix
622 with the largest eigenvalues are used as the base of the reduced vector space;
623 the eigenvectors form projection matrix $W$.
625 For each original pattern vector $\vec p_i$,
626 we obtain its new representation $\vec r_i$ in the PCA base
627 as shown in the following equation:
628 \begin{equation}
629 \vec r_i = W \cdot \vec p_i
630 \end{equation}
632 The whole process is described in the Algorithm \ref{alg:pca}.
634 \begin{algorithm}
635 \caption{PCA -- Principal Component Analysis}
636 \begin{algorithmic}
637 \label{alg:pca}
638 \REQUIRE{$m > 0$, set of players $R$ with pattern vectors $p_r$}
639 \STATE $\vec \mu \leftarrow 1/|R| \cdot \sum_{r \in R}{\vec p_r}$
640 \FOR{ $r \in R$}
641 \STATE $\vec p_r \leftarrow \vec p_r - \vec \mu$
642 \ENDFOR
643 \FOR{ $(i,j) \in \{1,... ,n\} \times \{1,... ,n\}$}
644 \STATE $\mathit{Cov}[i,j] \leftarrow 1/|R| \cdot \sum_{r \in R}{\vec p_{ri} \cdot \vec p_{rj}}$
645 \ENDFOR
646 \STATE Compute Eigenvalue Decomposition of $\mathit{Cov}$ matrix
647 \STATE Get $m$ largest eigenvalues
648 \STATE Most significant eigenvectors ordered by decreasing eigenvalues form the rows of matrix $W$
649 \FOR{ $r \in R$}
650 \STATE $\vec r_r\leftarrow W \vec p_r$
651 \ENDFOR
652 \end{algorithmic}
653 \end{algorithm}
655 \subsection{Sociomaps}
656 \label{soc}
657 Sociomaps are a general mechanism for \rv{visualizing}
658 relationships on a 2D plane such that \rv{given} ordering of the
659 \rv{player} distances in the dataset is preserved in distances on the plane.
661 In our particular case,%
662 \footnote{A special case of the {\em Subject-to-Object Relation Mapping (STORM)} indirect sociomap.}
663 we will consider a dataset $\vec S$ of small-dimensional
664 vectors $\vec s_i$. First, we estimate the {\em significance}
665 of difference {\rv of} each two subjects.
666 Then, we determine projection $\varphi$ of all the $\vec s_i$
667 to spatial coordinates of an Euclidean plane, such that it reflects
668 the estimated difference significances.
670 % TODO: Clarify, clean up references
672 To quantify the differences between the subjects ({\em team profiling})
673 into an $A$ matrix, for each two subjects $i, j$ we compute the scalar distance%
674 \footnote{We use the {\em Manhattan} metric $d(x,y) = \sum_i |x_i - y_i|$.}
675 of $s_i, s_j$ and then estimate the $A_{ij}$ probability of at least such distance
676 occuring in uniformly-distributed input (the higher the probability, the more
677 significant and therefore important to preserve the difference is).
679 To visualize the quantified differences, we need to find
680 the $\varphi$ projection such that it maximizes a {\em three-way ordering} criterion:
681 ordering of any three members within $A$ and on the plane
682 (by Euclidean metric) must be the same.
684 $$ \max_\varphi \sum_{i\ne j\ne k} \Phi(\varphi, i, j, k) $$
685 $$ \Phi(\varphi, i, j, k) = \begin{cases}
686 1 & \delta(1,A_{ij},A_{ik}) = \delta(\varphi(i),\varphi(j),\varphi(k)) \\
687 0 & \hbox{otherwise} \end{cases} $$
688 $$ \delta(a, b, c) = \begin{cases}
689 1 & |a-b| > |a-c| \\
690 0 & |a-b| = |a-c| \\
691 -1 & |a-b| < |a-c| \end{cases} $$
693 The $\varphi$ projection is then determined by randomly initializing
694 the position of each subject and then employing gradient descent methods.
696 \subsection{k-nearest Neighbors Classifier}
697 \label{knn}
698 Our goal is to approximate player's output vector $\vec O$,
699 knowing their pattern vector $\vec P$.
700 We further assume that similarities in players' pattern vectors
701 uniformly correlate with similarities in players' output vectors.
703 We require a set of reference players $R$ with known \emph{pattern vectors} $\vec p_r$
704 and \emph{output vectors} $\vec o_r$.
706 $\vec O$ is approximated as weighted average of \emph{output vectors}
707 $\vec o_i$ of $k$ players with \emph{pattern vectors} $\vec p_i$ closest to $\vec P$.
708 This is illustrated in the Algorithm \ref{alg:knn}.
709 Note that the weight is a function of distance and is not explicitly defined in Algorithm \ref{alg:knn}.
710 During our research, exponentially decreasing weight has proven to be sufficient.%
711 \footnote{We present concrete formulas in each of the case studies.}
713 \begin{algorithm}
714 \caption{k-Nearest Neighbors}
715 \begin{algorithmic}
716 \label{alg:knn}
717 \REQUIRE{pattern vector $\vec P$, $k > 0$, set of reference players $R$}
718 \FORALL{$r \in R$ }
719 \STATE $D[r] \leftarrow \mathit{EuclideanDistance}(\vec p_r, \vec P)$
720 \ENDFOR
721 \STATE $N \leftarrow \mathit{SelectSmallest}(k, R, D)$
722 \STATE $\vec O \leftarrow \vec 0$
723 \FORALL{$r \in N $}
724 \STATE $\vec O \leftarrow \vec O + \mathit{Weight}(D[r]) \cdot \vec o_r $
725 \ENDFOR
726 \end{algorithmic}
727 \end{algorithm}
729 \subsection{Neural Network Classifier}
730 \label{neural-net}
732 Feed-forward neural networks are known for their ability to generalize
733 and find correlations between input patterns and output classifications.
734 Before use, the network is iteratively trained on the training data
735 until the error on the training set is reasonably small.
737 %Neural network is an adaptive system that must undergo a training
738 %period similarly to the requirement
739 %of reference vectors for the k-Nearest Neighbors algorithm above.
741 \subsubsection{Computation and activation of the NN}
742 Technically, the neural network is a network of interconnected
743 computational units called neurons.
744 A feed-forward neural network has a layered topology;
745 it usually has one \emph{input layer}, one \emph{output layer}
746 and an arbitrary number of \emph{hidden layers} between.
748 Each neuron $i$ gets input from all neurons in the previous layer,
749 each connection having specific weight $w_{ij}$.
751 The computation proceeds in discrete time steps.
752 In the first step, the neurons in the \emph{input layer}
753 are \emph{activated} according to the \emph{input vector}.
754 Then, we iteratively compute output of each neuron in the next layer
755 until the output layer is reached.
756 The activity of output layer is then presented as the result.
758 The activation $y_i$ of neuron $i$ from the layer $I$ is computed as
759 \begin{equation}
760 y_i = f\left(\sum_{j \in J}{w_{ij} y_j}\right)
761 \end{equation}
762 where $J$ is the previous layer, while $y_j$ is the activation for neurons from $J$ layer.
763 Function $f()$ is a~so-called \emph{activation function}
764 and its purpose is to bound the outputs of neurons.
765 A typical example of an activation function is the sigmoid function.%
766 \footnote{A special case of the logistic function $\sigma(x)=(1+e^{-(rx+k)})^{-1}$.
767 Parameters control the growth rate $r$ and the x-position $k$.}
769 \subsubsection{Training}
770 Training of the feed-forward neural network usually involves some
771 modification of supervised Backpropagation learning algorithm.
772 We use first-order optimization algorithm called RPROP. \cite{Riedmiller1993}
774 %Because the \emph{reference set} is usually not very large,
775 %we have devised a simple method for its extension.
776 %This enhancement is based upon adding random linear combinations
777 %of \emph{style and pattern vectors} to the training set.
779 As outlined above, the training set $T$ consists of
780 $(\vec p_i, \vec o_i)$ pairs.
781 The training algorithm is shown in Algorithm \ref{alg:tnn}.
783 \begin{algorithm}
784 \caption{Training Neural Network}
785 \begin{algorithmic}
786 \label{alg:tnn}
787 \REQUIRE{Train set $T$, desired error $e$, max iterations $M$}
788 \STATE $N \leftarrow \mathit{RandomlyInitializedNetwork}()$
789 \STATE $\mathit{It} \leftarrow 0$
790 \REPEAT
791 \STATE $\mathit{It} \leftarrow \mathit{It} + 1$
792 \STATE $\Delta \vec w \leftarrow \vec 0$
793 \STATE $\mathit{TotalError} \leftarrow 0$
794 %\FORALL{$(\overrightarrow{Input}, \overrightarrow{DesiredOutput}) \in T$}
795 %\STATE $\overrightarrow{Output} \leftarrow Result(N, \overrightarrow{Input})$
796 %\STATE $E \leftarrow |\overrightarrow{DesiredOutput} - \overrightarrow{Output}|$
797 \FORALL{$(\mathit{Input}, \mathit{DesiredOutput}) \in T$}
798 \STATE $\mathit{Output} \leftarrow \mathit{Result}(N, \mathit{Input})$
799 \STATE $\mathit{Error} \leftarrow |\mathit{DesiredOutput} - \mathit{Output}|$
800 \STATE $\Delta \vec w \leftarrow \Delta \vec w + \mathit{WeightUpdate}(N,\mathit{Error})$
801 \STATE $\mathit{TotalError} \leftarrow \mathit{TotalError} + \mathit{Error}$
802 \ENDFOR
803 \STATE $N \leftarrow \mathit{ModifyWeights}(N, \Delta \vec w)$
804 \UNTIL{$\mathit{TotalError} < e$ or $ \mathit{It} > M$}
805 \end{algorithmic}
806 \end{algorithm}
808 \subsection{Naive Bayes Classifier}
809 \label{naive-bayes}
811 Naive Bayes Classifier uses existing information to construct
812 probability model of likelihoods of given {\em feature variables}
813 based on a discrete-valued {\em class variable}.
814 Using the Bayes equation, we can then estimate the probability distribution
815 of class variable for particular values of the feature variables.
817 In order to approximate the player's output vector $\vec O$ based on
818 pattern vector $\vec P$, we will compute each element of the
819 output vector separately, covering the output domain by several $k$-sized
820 discrete intervals (classes).
822 We will also in fact work on
823 PCA-represented input $\vec R$ (using the 10 most significant
824 dimensions), since smaller input dimension is more computationally
825 feasible and $\vec R$ also better fits the pre-requisites of the
826 classifier, the dimensions being more independent and
827 better approximating the normal distribution.
829 When training the classifier for $\vec O$ element $o_i$
830 of class $c = \lfloor o_i/k \rfloor$,
831 we assume the $\vec R$ elements are normally distributed and
832 feed the classifier information in the form
833 $$ \vec R \mid c $$
834 estimating the mean $\mu_c$ and standard deviation $\sigma_c$
835 of each $\vec R$ element for each encountered $c$
836 (see algorithm \ref{alg:tnb}).
838 Then, we can query the built probability model on
839 $$ \max_c P(c \mid \vec R) $$
840 obtaining the most probable class $i$ for an arbitrary $\vec R$
841 Each probability is obtained using the normal distribution formula:
842 $$ P(c \mid x) = {1\over \sqrt{2\pi\sigma_c^2}}\exp{-(x-\mu_c)^2\over2\sigma_c^2} $$
844 \begin{algorithm}
845 \caption{Training Naive Bayes}
846 \begin{algorithmic}
847 \label{alg:tnb}
848 \REQUIRE{Train set $T = (\mathit{R, c})$}
849 \FORALL{$(R, c) \in T$}
850 \STATE $\mathit{RbyC}_c \leftarrow \{\mathit{RbyC}_c, R\}$
851 \ENDFOR
852 \FORALL{$c$}
853 \STATE $\mu_c \leftarrow {1 \over |\mathit{RbyC}_c|} \sum_{R \in \mathit{RbyC}_c} R$
854 \ENDFOR
855 \FORALL{$c$}
856 \STATE $\sigma_c \leftarrow {1 \over |\mathit{RbyC}_c|} \sum_{R \in \mathit{RbyC}_c} R-\mu_c $
857 \ENDFOR
858 \end{algorithmic}
859 \end{algorithm}
861 \subsection{Implementation}
863 We have implemented the data mining methods as the
864 ``gostyle'' open-source framework \cite{GoStyle},
865 made available under the GNU GPL licence.
867 The majority of our basic processing and \rv{analysis
868 is} implemented in the Python \cite{Python25} programming language.
869 We use several external libraries, most notably the MDP library \cite{MDP} \rv{for the PCA analysis}.
870 The neural network \rv{component} is written using the libfann C library. \cite{Nissen2003}
871 The Naive Bayes Classifier \rv{is built around} the {\tt AI::NaiveBayes1} Perl module. \cite{NaiveBayes1}
873 The sociomap has been visualised using the Team Profile Analyzer \cite{TPA}
874 which is part of the Sociomap suite \cite{SociomapSite}.
877 \section{Strength Analysis}
878 \label{strength-analysis}
880 \begin{figure*}[!t]
881 \centering
882 \includegraphics[width=7in]{strength-pca}
883 \caption{PCA of by-strength vectors}
884 \label{fig:strength_pca}
885 \end{figure*}
887 First, we have used our framework to analyse correlations of pattern vectors
888 and playing strength. Like in other competitively played board games, Go players
889 receive real-world {\em rating number} based on tournament games,
890 and {\em rank} based on their rating.%
891 \footnote{Elo-type rating system \cite{GoR} is usually used,
892 corresponding to even win chances for game of two players with the same rank,
893 and about 2:3 win chance for the stronger in case of one rank difference.}
894 \footnote{Professional ranks and dan ranks in some Asia countries may
895 be assigned differently.}
896 The amateur ranks range from 30-kyu (beginner) to 1-kyu (intermediate)
897 and then follows 1-dan to 9-dan\footnote{7-dan in some systems.} (top-level player).
898 Multiple independent real-world ranking scales exist
899 (geographically based), \rv{while} online servers \rv{also} maintain their own user rank \rv{list};
900 the difference between scales can be up to several ranks and the rank
901 distributions also differ. \cite{RankComparison}
903 \subsection{Data source}
904 As the source game collection, we use Go Teaching Ladder reviews archive%
905 \footnote{The reviews contain comments and variations --- we consider only the main
906 variation with the actual played game.}
907 \cite{GTL} --- this collection contains 7700 games of players with strength ranging
908 from 30-kyu to 4-dan; we consider only even games with clear rank information.
909 Since the rank information is provided by the users and may not be consistent,
910 we are forced to take a simplified look at the ranks,
911 discarding the differences between various systems and thus somewhat
912 increasing error in our model.\footnote{Since our results seem satisfying,
913 we did not pursue to try another collection;
914 one could e.g. look at game archives of some Go server to work within
915 single more-or-less consistent rank model.}
917 We represent the rank in our dataset \rv{as an integer in the range} $[-3,30]$ with positive
918 numbers representing the kyu ranks and numbers smaller than 1 representing the dan
919 ranks: 4-dan maps to $-3$, 1-dan to $0$, etc.
921 \subsection{Strength PCA analysis}
922 First, we have created a single pattern vector for each rank between 30-kyu to 4-dan;
923 we have performed PCA analysis on the pattern vectors, achieving near-perfect
924 rank correspondence in the first PCA dimension%
925 \footnote{The eigenvalue of the second dimension was four times smaller,
926 with no discernable structure revealed within the lower-order eigenvectors.}
927 (figure \ref{fig:strength_pca}).
929 We measure the accuracy of the strength approximation by the first PCA dimension
930 using Pearson's $r$ (see \ref{pearson}), yielding very satisfying value of $r=0.979$
931 implying extremely strong correlation.\footnote{Extended vector normalization (sec. \ref{xnorm})
932 produced noticeably less clear-cut results.}
934 \rv{This reflects the trivial fact that the most important ``defining characteristic''
935 of a set of players grouped by strength is indeed their strength and confirms
936 that our methodics is correct.
937 At the same time, this result suggests that it is possible to accurately estimate
938 player's strength from a sample of his games,%
939 \footnote{The point is of course that the pattern analysis can be done even if we do not know the opponent's strength, or even the game result.}
940 as we confirm below.}
942 \rv{When investigating a player's $\vec p$, the PCA decomposition could be also
943 useful for study suggestions --- a program could examine the pattern gradient at the
944 player's position on the PCA dimensions and suggest patterns to avoid and patterns
945 to play more often. Of course, such an advice alone is certainly not enough and it
946 must be used only as a basis of a more thorough analysis of reasons behind the fact
947 that the player plays other patterns than they ``should''.}
949 \subsection{Strength Classification}
950 \label{strength-class}
952 \rv{In line with results of the PCA analysis, we have tested the strength approximation ability
953 of $k$-NN (sec.}\ref{knn}\rv{), neural network (sec. }\ref{neural-net}\rv{),
954 and a simple PCA-based classifier (sec. }\ref{PCA}\rv{).}
956 \subsubsection{Reference (Training) Data}
957 \rv{We have trained the tested classifiers using one pattern vector per rank
958 (aggregate over all games played by some player declaring the given rank),
959 then performing PCA analysis to reduced the dimension of pattern vectors.}
961 We have explored the influence of different game sample sizes ($k$)
962 on the classification accuracy to \rv{determine the} practicality and scaling
963 abilities of the classifiers.
964 In order to reduce the diversity of patterns (negatively impacting accuracy
965 on small samples), we do not consider the contiguity pattern features.
967 The classifiers were compared by running a many-fold validation by repeatedly and
968 exhaustively taking disjunct $k$-game samples of the same rank from the collection%
969 \footnote{Arbitrary game numbers are approximated by pattern file sizes,
970 iteratively selecting all games of randomly selected player
971 of the required strength.}
972 and measuring the standard error of the classifier.
974 %We have randomly separated $10\%$ of the game database as a testing set,
975 %Using the most %of players within the test group yields MSE TODO, thus providing
976 %reasonably satisfying accuracy by itself.
978 %Using the Naive Bayes classifier yields MSE TODO.
980 \subsubsection{Results}
981 \rv{The results are shown in the table~}\ref{table-str-class}\rv{.
982 The $\sim \mathit{games}$ column describes the number of games in each sample,
983 $\mathit{MSE}$ column shows measured mean square error and $\sigma$ is the empirical standard deviation.
984 Methods are
985 compared (column $\mathit{Cmp}$) to the random classifier by the quotient of their~$\sigma$.}
987 \rv{From the table, it should be obvious that the $k$-NN is obtaining good
988 accuracy even on as few as 5 games as a sample.
989 For a large number of training vectors -- albeit not very accurate due to small
990 sample sizes -- the neural network classifier performs very similarly.
991 For samples of 2 games, the neural network is even slightly better on average.
992 However, due to the decreasing number of training vectors with increasing game sample sizes,
993 the neural network gets unusable for large sample sizes.
994 The table therefore only shows the neural network results for samples of 17 games and smaller.}
996 \rv{PCA-based classifier (the most significant PCA eigenvector position is simply directly taken as a~rank) and
997 a random classifier are listed mainly for the sake of comparison, because they do not perform
998 competetively.}
1000 \begin{table}[!t]
1001 % increase table row spacing, adjust to taste
1002 \renewcommand{\arraystretch}{1.3}
1003 \caption{Strength Classifier Performance}
1004 \label{table-str-class}
1005 \centering
1006 \begin{tabular}{|c|c||c|c||c|}
1007 \hline
1008 Method & $\sim$ games & MSE & $\sigma$ & Cmp \\ \hline
1009 $k$-NN&$85$ & $5.514$ & $2.348$ & $6.150$ \\
1010 &$43$ & $8.449$ & $2.907$ & $4.968$ \\
1011 &$17$ & $10.096$& $3.177$ & $4.545$ \\
1012 &$9$ & $21.343$& $4.620$ & $3.126$ \\
1013 &$2$ & $52.212$& $7.226$ & $1.998$ \\\hline
1015 \rv{Neural Network} & $17$ & $110.633$ & $10.518$ & $1.373$ \\
1016 &$9$ & $44.512$ & $6.672$ & $2.164$ \\
1017 &$2$ & $43.682$ & $6.609$ & $2.185$ \\ \hline
1018 %&$1$ & $58.051$ & $7.619$ & $1.895$ \\ \hline
1020 PCA & $85$ & $24.070$ & $4.906$ & $2.944$ \\
1021 &$43$ & $31.324$ & $5.597$ & $2.580$ \\
1022 &$17$ & $50.390$ & $7.099$ & $2.034$ \\
1023 &$9$ & $72.528$ & $8.516$ & $1.696$ \\
1024 &$2$ & $128.660$& $11.343$ & $1.273$ \\ \hline
1026 Rnd & N/A & $208.549$ & $14.441$ & $1.000$ \\ \hline
1027 \end{tabular}
1028 \end{table}
1030 \subsubsection{$k$-NN parameters}
1031 \rv{Using the $4$-Nearest Neighbors classifier with the weight function}
1032 \begin{equation}
1033 \mathit{Weight}(\vec x) = 0.9^{M*\mathit{Distance}(\vec x)}
1034 \end{equation}
1035 (parameter $M$ ranging from $30$ to $6$).
1037 \subsubsection{Neural network's parameters}
1038 \rv{The neural network classifier had three-layered architecture (one hidden layer)
1039 comprising of these numbers of neurons:}
1040 \vspace{4mm}
1041 \noindent
1042 %\begin{table}
1043 \begin{center}
1044 %\caption{Styles}
1045 \begin{tabular}{|c|c|c|}
1046 \hline
1047 \multicolumn{3}{|c|}{Layer} \\\hline
1048 Input & Hidden & Output \\ \hline
1049 119 & 35 & 1 \\ \hline
1050 \end{tabular}
1051 \end{center}
1052 %\end{table}
1053 \vspace{4mm}
1055 \rv{The network was trained until the square error on the training set was smaller than $0.0005$.
1056 Due to the small number of input vectors,
1057 this only took about $20$ iterations of RPROP learning algorithm on average.}
1060 %#Finally, we used $8$-fold cross validation on one-file-per-rank data,
1061 %yielding a MSE $0.085$ which is equivalent to standard deviation of $15\%$.
1063 \section{Style Analysis}
1064 \label{style-analysis}
1066 As a~second case study for our pattern analysis,
1067 we investigate pattern vectors $\vec p$ of various well-known players,
1068 their relationships in-between and to prior knowledge
1069 in order to explore the correlation of prior knowledge with extracted patterns.
1070 We look for relationships between pattern vectors and perceived
1071 ``playing style'' and attempt to use our classifiers to transform
1072 the pattern vector $\vec p$ to a style vector $\vec s$.
1074 \subsection{Data sources}
1075 \subsubsection{Game database}
1076 The source game collection is GoGoD Winter 2008 \cite{GoGoD} containing 55000
1077 professional games, dating from the early Go history 1500 years ago to the present.
1078 We consider only games of a small subset of players (table \ref{fig:style_marks});
1079 we have chosen them for being well-known within the players community,
1080 having large number of played games in our collection and not playing too long
1081 ago.\footnote{Over time, many commonly used sequences get altered, adopted and
1082 dismissed; usual playing conditions can also differ significantly.}
1084 \subsubsection{Expert-based knowledge}
1085 \label{style-vectors}
1086 In order to provide a reference frame for our style analysis,
1087 we have gathered some information from game experts about various
1088 traditionally perceived style aspects to use as a prior knowledge.
1089 This expert-based knowledge allows us to predict styles of unknown players
1090 based on the similarity of their pattern vectors,
1091 as well as discover correlations between styles and \rv{particular}
1092 move patterns.
1094 Experts were asked to mark four style aspects of each of the given players
1095 on the scale from 1 to 10. The style aspects are defined as shown:
1097 \vspace{4mm}
1098 \noindent
1099 %\begin{table}
1100 \begin{center}
1101 %\caption{Styles}
1102 \begin{tabular}{|c|c|c|}
1103 \hline
1104 Style & 1 & 10\\ \hline
1105 Territoriality $\tau$ & Moyo & Territory \\
1106 Orthodoxity $\omega$ & Classic & Novel \\
1107 Aggressivity $\alpha$ & Calm & Fighting \\
1108 Thickness $\theta$ & Safe & Shinogi \\ \hline
1109 \end{tabular}
1110 \end{center}
1111 %\end{table}
1112 \vspace{4mm}
1114 We have devised these four style aspects based on our own Go experience
1115 and consultations with other experts.
1116 The used terminology has quite
1117 clear meaning to any experienced Go player and there is not too much
1118 room for confusion, except possibly in the case of ``thickness'' ---
1119 but the concept is not easy to pin-point succintly and we also did not
1120 add extra comments on the style aspects to the questionnaire deliberately
1121 to accurately reflect any diversity in understanding of the terms.
1123 Averaging this expert based evaluation yields \emph{reference style vector}
1124 $\vec s_r$ (of dimension $4$) for each player $r$
1125 from the set of \emph{reference players} $R$.
1127 Throughout our research, we have experimentally found that playing era
1128 is also a major factor differentiating between patterns. Thus, we have
1129 further extended the $\vec s_r$ by median year over all games played
1130 by the player.
1132 \begin{table}[!t]
1133 % increase table row spacing, adjust to taste
1134 \renewcommand{\arraystretch}{1.3}
1135 \caption{Covariance Measure of Prior Information Dimensions}
1136 \label{fig:style_marks_r}
1137 \centering
1138 % Some packages, such as MDW tools, offer better commands for making tables
1139 % than the plain LaTeX2e tabular which is used here.
1140 \begin{tabular}{|r||r||r||r||r||r|}
1141 \hline
1142 & $\tau$ & $\omega$ & $\alpha$ & $\theta$ & year \\
1143 \hline
1144 $\tau$ &$1.000$&$\mathbf{-0.438}$&$\mathbf{-0.581}$&$\mathbf{ 0.721}$&$ 0.108$\\
1145 $\omega$& &$ 1.000$&$\mathbf{ 0.682}$&$ 0.014$&$-0.021$\\
1146 $\alpha$& & &$ 1.000$&$-0.081$&$ 0.030$\\
1147 $\theta$& &\multicolumn{1}{c||}{---}
1148 & &$ 1.000$&$-0.073$\\
1149 y. & & & & &$ 1.000$\\
1150 \hline
1151 \end{tabular}
1152 \end{table}
1154 Three high-level Go players (Alexander Dinerstein 3-pro, Motoki Noguchi
1155 7-dan and V\'{i}t Brunner 4-dan) have judged the style of the reference
1156 players.
1157 The complete list of answers is in table \ref{fig:style_marks}.
1158 Standard error of the answers is 0.952, making the data reasonably reliable,
1159 though much larger sample would of course be more desirable
1160 (but beyond our means to collect).
1161 We have also found a~significant correlation between the various
1162 style aspects, as shown by the Pearson's $r$ values
1163 in table \ref{fig:style_marks_r}.
1165 \rv{We have made few manual adjustments in the dataset, disregarding some
1166 players or portions of their games. This was done to achieve better
1167 consistency of the games (e.g. considering only games of roughly the
1168 same age) and to consider only sets of games that can be reasonably
1169 rated as a whole by human experts (who can give a clear feedback in this
1170 effect). This filtering methodology can be easily reproduced
1171 and such arbitrary decisions are neccessary only
1172 for processing the training dataset, not for using it (either for exloration
1173 or classification).}
1175 \begin{table}[!t]
1176 % increase table row spacing, adjust to taste
1177 \renewcommand{\arraystretch}{1.4}
1178 \begin{threeparttable}
1179 \caption{Expert-Based Style Aspects of Selected Professionals\tnote{1} \tnote{2}}
1180 \label{fig:style_marks}
1181 \centering
1182 % Some packages, such as MDW tools, offer better commands for making tables
1183 % than the plain LaTeX2e tabular which is used here.
1184 \begin{tabular}{|c||c||c||c||c|}
1185 \hline
1186 {Player} & $\tau$ & $\omega$ & $\alpha$ & $\theta$ \\
1187 \hline
1188 Go Seigen\tnote{3} & $6.0 \pm 2.0$ & $9.0 \pm 1.0$ & $8.0 \pm 1.0$ & $5.0 \pm 1.0$ \\
1189 Ishida Yoshio\tnote{4}&$8.0 \pm 1.4$ & $5.0 \pm 1.4$ & $3.3 \pm 1.2$ & $5.3 \pm 0.5$ \\
1190 Miyazawa Goro & $1.5 \pm 0.5$ & $10 \pm 0 $ & $9.5 \pm 0.5$ & $4.0 \pm 1.0$ \\
1191 Yi Ch'ang-ho\tnote{5}& $7.0 \pm 0.8$ & $5.0 \pm 1.4$ & $2.6 \pm 0.9$ & $2.6 \pm 1.2$ \\
1192 Sakata Eio & $7.6 \pm 1.7$ & $4.6 \pm 0.5$ & $7.3 \pm 0.9$ & $8.0 \pm 1.6$ \\
1193 Fujisawa Hideyuki & $3.5 \pm 0.5$ & $9.0 \pm 1.0$ & $7.0 \pm 0.0$ & $4.0 \pm 0.0$ \\
1194 Otake Hideo & $4.3 \pm 0.5$ & $3.0 \pm 0.0$ & $4.6 \pm 1.2$ & $3.6 \pm 0.9$ \\
1195 Kato Masao & $2.5 \pm 0.5$ & $4.5 \pm 1.5$ & $9.5 \pm 0.5$ & $4.0 \pm 0.0$ \\
1196 Takemiya Masaki\tnote{4}&$1.3\pm 0.5$& $6.3 \pm 2.1$ & $7.0 \pm 0.8$ & $1.3 \pm 0.5$ \\
1197 Kobayashi Koichi & $9.0 \pm 1.0$ & $2.5 \pm 0.5$ & $2.5 \pm 0.5$ & $5.5 \pm 0.5$ \\
1198 Cho Chikun & $9.0 \pm 0.8$ & $7.6 \pm 0.9$ & $6.6 \pm 1.2$ & $9.0 \pm 0.8$ \\
1199 Ma Xiaochun & $8.0 \pm 2.2$ & $6.3 \pm 0.5$ & $5.6 \pm 1.9$ & $8.0 \pm 0.8$ \\
1200 Yoda Norimoto & $6.3 \pm 1.7$ & $4.3 \pm 2.1$ & $4.3 \pm 2.1$ & $3.3 \pm 1.2$ \\
1201 Luo Xihe & $7.3 \pm 0.9$ & $7.3 \pm 2.5$ & $7.6 \pm 0.9$ & $6.0 \pm 1.4$ \\
1202 O Meien & $2.6 \pm 1.2$ & $9.6 \pm 0.5$ & $8.3 \pm 1.7$ & $3.6 \pm 1.2$ \\
1203 Rui Naiwei & $4.6 \pm 1.2$ & $5.6 \pm 0.5$ & $9.0 \pm 0.8$ & $3.3 \pm 1.2$ \\
1204 Yuki Satoshi & $3.0 \pm 1.0$ & $8.5 \pm 0.5$ & $9.0 \pm 1.0$ & $4.5 \pm 0.5$ \\
1205 Hane Naoki & $7.5 \pm 0.5$ & $2.5 \pm 0.5$ & $4.0 \pm 0.0$ & $4.5 \pm 1.5$ \\
1206 Takao Shinji & $5.0 \pm 1.0$ & $3.5 \pm 0.5$ & $5.5 \pm 1.5$ & $4.5 \pm 0.5$ \\
1207 Yi Se-tol & $5.3 \pm 0.5$ & $6.6 \pm 2.5$ & $9.3 \pm 0.5$ & $6.6 \pm 1.2$ \\
1208 Yamashita Keigo\tnote{4}&$2.0\pm 0.0$& $9.0 \pm 1.0$ & $9.5 \pm 0.5$ & $3.0 \pm 1.0$ \\
1209 Cho U & $7.3 \pm 2.4$ & $6.0 \pm 0.8$ & $5.3 \pm 1.7$ & $6.3 \pm 1.7$ \\
1210 Gu Li & $5.6 \pm 0.9$ & $7.0 \pm 0.8$ & $9.0 \pm 0.8$ & $4.0 \pm 0.8$ \\
1211 Chen Yaoye & $6.0 \pm 1.0$ & $4.0 \pm 1.0$ & $6.0 \pm 1.0$ & $5.5 \pm 0.5$ \\
1212 \hline
1213 \end{tabular}
1214 \begin{tablenotes}
1215 \item [1] Including standard deviation. Only players where we received at least two out of three answers are included.
1216 \item [2] Since the playing era column does not fit into the table, we at least sort the players ascending by their median year.
1217 \item [3] We do not consider games of Go Seigen due to him playing across several distinct eras and also being famous for radical opening experiments throughout the time, and thus featuring especially high diversity in patterns.
1218 \item [4] We do not consider games of Ishida Yoshio and Yamashita Keigo for the PCA analysis since they are significant outliers, making high-order dimensions much like purely ``similarity to this player''. Takemiya Masaki has the similar effect for the first dimension, but that case corresponds to common knowledge of him being an extreme proponent of anti-territorial (``moyo'') style.
1219 \item [5] We consider games only up to year 2004, since Yi Ch'ang-ho was prominent representative of a balanced, careful player until then and still has this reputation in minds of many players, but is regarded to have altered his style significantly afterwards.
1220 \end{tablenotes}
1221 \end{threeparttable}
1222 \end{table}
1224 \subsection{Style PCA analysis}
1226 \begin{figure}[!t]
1227 \centering
1228 \includegraphics[width=3in]{style-pca}
1229 \caption{Columns with the most significant PCA dimensions of the dataset.}
1230 \label{fig:style_pca}
1231 \end{figure}
1233 We have looked at the ten most significant dimensions of the pattern data
1234 yielded by the PCA analysis of the reference player set%
1235 \footnote{We also tried to observe PCA effect of removing outlying Takemiya
1236 Masaki. That way, the second dimension strongly
1237 correlated to territoriality and third dimension strongly correlacted to era,
1238 however the first dimension remained mysteriously uncorrelated and with no
1239 obvious interpretation.}
1240 (fig. \ref{fig:style_pca} shows the first three).
1241 We have again computed the Pearson's $r$ for all combinations of PCA dimensions
1242 and dimensions of the prior knowledge style vectors to find correlations.
1244 \begin{table}[!t]
1245 % increase table row spacing, adjust to taste
1246 \renewcommand{\arraystretch}{1.4}
1247 \caption{Covariance Measure of PCA and Prior Information}
1248 \label{fig:style_r}
1249 \centering
1250 % Some packages, such as MDW tools, offer better commands for making tables
1251 % than the plain LaTeX2e tabular which is used here.
1252 \begin{tabular}{|c||r||r||r||r||r|}
1253 \hline
1254 Eigenval. & $\tau$ & $\omega$ & $\alpha$ & $\theta$ & Year \\
1255 \hline
1256 $0.4473697$ & $\mathbf{-0.530}$ & $ 0.323$ & $ 0.298$ & $\mathbf{-0.554}$ & $ 0.090$ \\
1257 $0.1941057$ & $\mathbf{-0.547}$ & $ 0.215$ & $ 0.249$ & $-0.293$ & $\mathbf{-0.630}$ \\
1258 $0.0463189$ & $ 0.131$ & $-0.002$ & $-0.128$ & $ 0.242$ & $\mathbf{-0.630}$ \\
1259 $0.0280301$ & $-0.011$ & $ 0.225$ & $ 0.186$ & $ 0.131$ & $ 0.067$ \\
1260 $0.0243231$ & $-0.181$ & $ 0.174$ & $-0.032$ & $-0.216$ & $ 0.352$ \\
1261 $0.0180875$ & $-0.364$ & $ 0.226$ & $ 0.339$ & $-0.136$ & $ 0.113$ \\
1262 $0.0138478$ & $-0.194$ & $-0.048$ & $-0.099$ & $-0.333$ & $ 0.055$ \\
1263 $0.0110575$ & $-0.040$ & $-0.254$ & $-0.154$ & $-0.054$ & $-0.089$ \\
1264 $0.0093587$ & $-0.199$ & $-0.115$ & $ 0.358$ & $-0.234$ & $-0.028$ \\
1265 $0.0084930$ & $ 0.046$ & $ 0.190$ & $ 0.305$ & $ 0.176$ & $ 0.089$ \\
1266 \hline
1267 \end{tabular}
1268 \end{table}
1270 \begin{table}[!t]
1271 % increase table row spacing, adjust to taste
1272 \renewcommand{\arraystretch}{1.6}
1273 \begin{threeparttable}
1274 \caption{Characteristic Patterns of PCA$_{1,2}$ Dimensions \tnote{1}}
1275 \label{fig:style_patterns}
1276 \centering
1277 % Some packages, such as MDW tools, offer better commands for making tables
1278 % than the plain LaTeX2e tabular which is used here.
1279 \begin{tabular}{|p{2.3cm}p{2.4cm}p{2.4cm}p{0cm}|}
1280 % The virtual last column is here because otherwise we get random syntax errors.
1282 \hline \multicolumn{4}{|c|}{PCA$_1$ --- Moyo-oriented, thin-playing player} \\
1283 \centering \begin{psgopartialboard*}{(8,1)(12,6)}
1284 \stone[\marktr]{black}{k}{4}
1285 \end{psgopartialboard*} &
1286 \centering \begin{psgopartialboard*}{(1,2)(5,6)}
1287 \stone{white}{d}{3}
1288 \stone[\marktr]{black}{d}{5}
1289 \end{psgopartialboard*} &
1290 \centering \begin{psgopartialboard*}{(5,1)(10,6)}
1291 \stone{white}{f}{3}
1292 \stone[\marktr]{black}{j}{4}
1293 \end{psgopartialboard*} & \\
1294 \centering $0.274$ & \centering $0.086$ & \centering $0.083$ & \\
1295 \centering high corner/side opening \tnote{2} & \centering high corner approach & \centering high distant pincer & \\
1297 \hline \multicolumn{4}{|c|}{PCA$_1$ --- Territorial, thick-playing player} \\
1298 \centering \begin{psgopartialboard*}{(3,1)(7,6)}
1299 \stone{white}{d}{4}
1300 \stone[\marktr]{black}{f}{3}
1301 \end{psgopartialboard*} &
1302 \centering \begin{psgopartialboard*}{(3,1)(7,6)}
1303 \stone{white}{c}{6}
1304 \stone{black}{d}{4}
1305 \stone[\marktr]{black}{f}{3}
1306 \end{psgopartialboard*} &
1307 \centering \begin{psgopartialboard*}{(3,1)(7,6)}
1308 \stone{black}{d}{4}
1309 \stone[\marktr]{black}{f}{3}
1310 \end{psgopartialboard*} & \\
1311 \centering $-0.399$ & \centering $-0.399$ & \centering $-0.177$ & \\
1312 \centering low corner approach & \centering low corner reply & \centering low corner enclosure & \\
1314 \hline \multicolumn{4}{|c|}{PCA$_2$ --- Territorial, current player \tnote{3}} \\
1315 \centering \begin{psgopartialboard*}{(3,1)(7,6)}
1316 \stone{white}{c}{6}
1317 \stone{black}{d}{4}
1318 \stone[\marktr]{black}{f}{3}
1319 \end{psgopartialboard*} &
1320 \centering \begin{psgopartialboard*}{(3,1)(8,6)}
1321 \stone{white}{d}{4}
1322 \stone[\marktr]{black}{g}{4}
1323 \end{psgopartialboard*} &
1324 \centering \begin{psgopartialboard*}{(4,1)(9,6)}
1325 \stone{black}{d}{4}
1326 \stone{white}{f}{3}
1327 \stone[\marktr]{black}{h}{3}
1328 \end{psgopartialboard*} & \\
1329 \centering $-0.193$ & \centering $-0.139$ & \centering $-0.135$ & \\
1330 \centering low corner reply \tnote{4} & \centering high distant approach/pincer & \centering near low pincer & \\
1332 \hline
1333 \end{tabular}
1334 \begin{tablenotes}
1335 \item [1] We present the patterns in a simplified compact form;
1336 in reality, they are usually somewhat larger and always circle-shaped
1337 (centered on the triangled move).
1338 We omit only pattern segments that are entirely empty.
1339 \item [2] We give some textual interpretation of the patterns, especially
1340 since some of them may not be obvious unless seen in game context; we choose
1341 the descriptions based on the most frequently observer contexts, but of course
1342 the pattern can be also matched in other positions and situations.
1343 \item [3] In the second PCA dimension, we find no correlated patterns;
1344 only uncorrelated and anti-correlated ones.
1345 \item [4] As the second most significant pattern,
1346 we skip a slide follow-up pattern to this move.
1347 \end{tablenotes}
1348 \end{threeparttable}
1349 \end{table}
1351 \begin{table}[!t]
1352 % increase table row spacing, adjust to taste
1353 \renewcommand{\arraystretch}{1.8}
1354 \begin{threeparttable}
1355 \caption{Characteristic Patterns of PCA$_3$ Dimension \tnote{1}}
1356 \label{fig:style_patterns3}
1357 \centering
1358 % Some packages, such as MDW tools, offer better commands for making tables
1359 % than the plain LaTeX2e tabular which is used here.
1360 \begin{tabular}{|p{2.4cm}p{2.4cm}p{2.4cm}p{0cm}|}
1361 % The virtual last column is here because otherwise we get random syntax errors.
1363 \hline \multicolumn{4}{|c|}{PCA$_3$ --- Old-time player} \\
1364 \centering \begin{psgopartialboard*}{(1,3)(5,7)}
1365 \stone{white}{d}{4}
1366 \stone[\marktr]{black}{c}{6}
1367 \end{psgopartialboard*} &
1368 \centering \begin{psgopartialboard*}{(8,1)(12,5)}
1369 \stone[\marktr]{black}{k}{3}
1370 \end{psgopartialboard*} &
1371 \centering \begin{psgopartialboard*}{(1,1)(5,5)}
1372 \stone[\marktr]{black}{c}{3}
1373 \end{psgopartialboard*} & \\
1374 \centering $0.515$ & \centering $0.264$ & \centering $0.258$ & \\
1375 \centering low corner approach & \centering low side or mokuhazushi opening & \centering san-san opening & \\
1377 \hline \multicolumn{4}{|c|}{PCA$_3$ --- Current player} \\
1378 \centering \begin{psgopartialboard*}{(3,1)(7,5)}
1379 \stone{black}{d}{4}
1380 \stone[\marktr]{black}{f}{3}
1381 \end{psgopartialboard*} &
1382 \centering \begin{psgopartialboard*}{(1,1)(5,5)}
1383 \stone[\marktr]{black}{c}{4}
1384 \end{psgopartialboard*} &
1385 \centering \begin{psgopartialboard*}{(1,2)(5,6)}
1386 \stone{black}{d}{3}
1387 \stone{white}{d}{5}
1388 \stone[\marktr]{black}{c}{5}
1389 \end{psgopartialboard*} & \\
1390 \centering $-0.276$ & \centering $-0.273$ & \centering $-0.116$ & \\
1391 \centering low corner enclosure & \centering 3-4 corner opening \tnote{2} & \centering high approach reply & \\
1393 \hline
1394 \end{tabular}
1395 \begin{tablenotes}
1396 \item [1] We cannot use terms ``classic'' and ''modern'' in case of PCA$_3$
1397 since the current patterns are commonplace in games of past centuries
1398 (not included in our training set) and many would call a lot of the old-time patterns
1399 modern inventions. Perhaps we can infer that the latest 21th-century play trends abandon
1400 many of the 20th-century experiments (lower echelon of our by-year samples)
1401 to return to the more ordinary but effective classic patterns.
1402 \item [2] At this point, we skip two patterns already shown elsewhere:
1403 {\em high side/corner opening} and {\em low corner reply}.
1404 \end{tablenotes}
1405 \end{threeparttable}
1406 \end{table}
1408 It is immediately
1409 obvious both from the measured $r$ and visual observation
1410 that by far the most significant vector corresponds very well
1411 to the territoriality of the players,
1412 confirming the intuitive notion that this aspect of style
1413 is the one easiest to pin-point and also
1414 most obvious in the played shapes and sequences
1415 (that can obviously aim directly at taking secure territory
1416 or building center-oriented framework). Thick (solid) play also plays
1417 a role, but these two style dimensions are already
1418 correlated in the prior data.
1420 The other PCA dimensions are somewhat harder to interpret, but there
1421 certainly is significant influence of the styles on the patterns;
1422 the correlations are all presented in table \ref{fig:style_r}.
1423 (Larger absolute value means better linear correspondence.)
1425 We also list the characteristic spatial patterns of the PCA dimension
1426 extremes (tables \ref{fig:style_patterns}, \ref{fig:style_patterns3}), determined by their coefficients
1427 in the PCA projection matrix --- however, such naive approach
1428 has limited reliability, better methods will have to be researched.%
1429 \footnote{For example, as one of highly ranked ``Takemiya's'' PCA1 patterns,
1430 3,3 corner opening was generated, completely inappropriately;
1431 it reflects some weak ordering in bottom half of the dimension,
1432 not global ordering within the dimension.}
1433 We do not show the other pattern features since they carry no useful
1434 information in the opening stage.%
1435 \footnote{The board distance feature can be useful in some cases,
1436 but here all the spatial patterns are wide enough to reach to the edge
1437 on their own.}
1439 \begin{table}[!t]
1440 % increase table row spacing, adjust to taste
1441 \renewcommand{\arraystretch}{1.4}
1442 \caption{Covariance Measure of Externed-Normalization PCA and~Prior Information}
1443 \label{fig:style_normr}
1444 \centering
1445 % Some packages, such as MDW tools, offer better commands for making tables
1446 % than the plain LaTeX2e tabular which is used here.
1447 \begin{tabular}{|c||r||r||r||r||r|}
1448 \hline
1449 Eigenval. & $\tau$ & $\omega$ & $\alpha$ & $\theta$ & Year \\
1450 \hline
1451 $6.3774289$ & $ \mathbf{0.436}$ & $-0.220$ & $-0.289$ & $ \mathbf{0.404}$ & $\mathbf{-0.576}$ \\
1452 $1.7269775$ & $\mathbf{-0.690}$ & $ 0.340$ & $ 0.315$ & $\mathbf{-0.445}$ & $\mathbf{-0.639}$ \\
1453 $1.1747101$ & $-0.185$ & $ 0.156$ & $ 0.107$ & $-0.315$ & $ 0.320$ \\
1454 $0.8452797$ & $ 0.064$ & $-0.102$ & $-0.189$ & $ 0.032$ & $ 0.182$ \\
1455 $0.8038992$ & $-0.185$ & $ 0.261$ & $ \mathbf{0.620}$ & $ 0.120$ & $ 0.056$ \\
1456 $0.6679533$ & $-0.027$ & $ 0.055$ & $ 0.147$ & $-0.198$ & $ 0.155$ \\
1457 $0.5790000$ & $ 0.079$ & $ \mathbf{0.509}$ & $ 0.167$ & $ 0.294$ & $-0.019$ \\
1458 $0.4971474$ & $ 0.026$ & $-0.119$ & $-0.071$ & $ 0.049$ & $ 0.043$ \\
1459 $0.4938777$ & $-0.061$ & $ 0.061$ & $ 0.104$ & $-0.168$ & $ 0.015$ \\
1460 $0.4888848$ & $ 0.203$ & $-0.283$ & $-0.120$ & $ 0.083$ & $-0.220$ \\
1461 \hline
1462 \end{tabular}
1463 \end{table}
1465 The PCA results presented above do not show much correlation between
1466 the significant PCA dimensions and the $\omega$ and $\alpha$ style dimensions.
1467 However, when we applied the extended vector normalization
1468 (sec. \ref{xnorm}; see table \ref{fig:style_normr}),
1469 some less significant PCA dimensions exhibited clear correlations.%
1470 \footnote{We have found that $c=6$ in the post-processing logistic function
1471 produces the most instructive PCA output on our particular game collection.}
1472 While we do not use the extended normalization results elsewhere since
1473 they produced noticeably less accurate classifiers in all dimensions
1474 (including $\omega$ and $\alpha$), it is instructive to look at the PCA dimensions.
1476 \break
1478 \rv{In contrast with the emphasis of opening patterns in the $\tau$ and $\theta$
1479 dimensions, the most contributing patterns of the $\omega$ and $\alpha$
1480 dimensions are the middle-game patterns that occur less frequently and require
1481 the extended normalization not to be over-shadowed by the opening patterns.}%
1482 \footnote{In the middle game, \rv{basic areas of influence have been staked
1483 out and invasions and group attacks are being played out}.
1484 Notably, the board is much more filled \rv{than in the opening} and thus
1485 particular specific-shape patterns repeat less often.}
1486 E.g. the most characteristic patterns
1487 on the aggressiveness dimension represent moves that make life with small,
1488 unstable groups (connecting kosumi on second line or mouth-shape eyespace
1489 move), while the novel-ranked players seem to like the (in)famous tsuke-nobi
1490 joseki sequence.%
1491 \footnote{\rv{Tsuke-nobi is a well-known joseki popular among beginners,
1492 but professionals usually play it only in special contexts.}}
1493 \rv{This may either mean that novel players like to play the joseki more,
1494 or (more likely, in our opinion) that novel players are more likely to
1495 get into unorthodox situation that require resorting to the tsuke-nobi
1496 sequence.}
1498 We believe that the next step in interpreting our analytical results
1499 will be more refined prior information input
1500 and precise analysis of the outputs by Go experts.
1502 \begin{figure}[!t]
1503 \centering
1504 \includegraphics[width=3.5in,angle=-90]{sociomap}
1505 \caption{Sociomap visualisation. The spatial positioning of players
1506 is based on the expert knowledge, while the node heights (depicted by
1507 contour lines) represent the pattern vectors.%
1508 %The light lines denote coherence-based hierarchical clusters.
1510 \label{fig:sociomap}
1511 \end{figure}
1513 Fig. \ref{fig:sociomap} shows the Sociomap visualisation
1514 as an alternate view of the player relationships and similarity,
1515 as well as correlation between the expert-given style marks
1516 and the PCA decomposition. The four-dimensional style vectors
1517 are used as input for the Sociomap renderer and determine the
1518 spatial positions of players. The height of a node is then
1519 determined using first two PCA dimensions $R_1,R_2$ and their
1520 eigenvalues $\lambda_1,\lambda_2$ as their linear combination:
1521 $$ h=\lambda_1R_1 + \lambda_2R_2 $$
1523 We can observe that the terrain of the sociomap is reasonably
1524 ``smooth'', again demonstrating some level of connection between
1525 the style vectors and data-mined information. High countour density
1526 indicates some discrepancy; in case of Takemiya Masaki and Yi Ch'ang-ho,
1527 this seems to be merely an issue of scale,
1528 while the Rui Naiwei --- Gu Li cliff suggests a genuine problem;
1529 we cannot say now whether it is because of imprecise prior information
1530 or lacking approximation abilities of our model.
1532 \subsection{Style Classification}
1533 \label{style-class}
1535 %TODO vsude zcheckovat jestli pouzivame stejny cas "we perform, we apply" X "we have performed, ..."
1537 Similarly to the the Strength classification (section \ref{strength-class}), we have tested the style inference ability
1538 of $k$-NN (sec. \ref{knn}), neural network (sec. \ref{neural-net}), and Bayes (sec. \ref{naive-bayes}) classifers.
1540 \subsubsection{Reference (Training) Data}
1541 As the~reference data, we use expert-based knowledge presented in section \ref{style-vectors}.
1542 For each reference player, that gives $4$-dimensional \emph{style vector} (each component in the
1543 range of $[1,10]$).\footnote{Since the neural network has activation function with range $[-1,1]$, we
1544 have linearly rescaled the \emph{style vectors} from interval $[1,10]$ to $[-1,1]$ before using the training
1545 data. The network's output was afterwards rescaled back to allow for MSE comparison.}
1547 All input (pattern) vectors were preprocessed using PCA, reducing the input dimension from $400$ to $23$.
1548 We measure the performance on the same reference data using $5$-fold cross validation.
1549 To put our measurements in scale, we also include a~random classifier in our results.
1551 \subsubsection{Results}
1552 The results are shown in the table \ref{crossval-cmp}. Second to fifth columns in the table represent
1553 mean square error (MSE) of the examined styles, $\mathit{Mean}$ is the
1554 mean square error across the styles and finally, the last column $\mathit{Cmp}$
1555 represents $\mathit{Mean}(\mathit{Random classifier}) / \mathit{Mean}(\mathit{X})$ -- comparison of mean square error
1556 of each method with the random classifier. To minimize the
1557 effect of random variables, all numbers were taken as an average of $200$ runs of the cross validation.
1559 Analysis of the performance of $k$-NN classifier for different $k$-values has shown that different
1560 $k$-values are suitable to approximate different styles. Combining the $k$-NN classifiers with the
1561 neural network (so that each style is approximated by the method with lowest MSE in that style)
1562 results in \emph{Joint classifier}, which outperforms all other methods.
1563 The \emph{Joint classifier} has outstanding MSE $3.960$, which is equivalent to standard error
1564 of $\sigma = 1.99$ per style.%
1565 \footnote{We should note that the pattern vector for each tested player
1566 was generated over at least few tens of games.}
1568 \begin{table}[!t]
1569 \renewcommand{\arraystretch}{1.4}
1570 \begin{threeparttable}
1571 \caption{Comparison of style classifiers}
1572 \label{crossval-cmp}
1573 \begin{tabular}{|c|c|c|c|c|c|c|}
1574 \hline
1575 %Classifier & $\sigma_\tau$ & $\sigma_\omega$ & $\sigma_\alpha$ & $\sigma_\theta$ & Tot $\sigma$ & $\mathit{RndC}$\\ \hline
1576 %Neural network & 0.420 & 0.488 & 0.365 & 0.371 & 0.414 & 1.82 \\
1577 %$k$-NN ($k=4$) & 0.394 & 0.507 & 0.457 & 0.341 & 0.429 & 1.76 \\
1578 %Random classifier & 0.790 & 0.773 & 0.776 & 0.677 & 0.755 & 1.00 \\ \hline
1579 &\multicolumn{5}{|c|}{MSE}& \\ \hline
1580 {Classifier} & $\tau$ & $\omega$ & $\alpha$ & $\theta$ & {\bf Mean} & {\bf Cmp}\\ \hline
1581 Joint classifier\tnote{1} & 4.04 & {\bf 5.25} & {\bf 3.52} & {\bf 3.05} & {\bf 3.960}& 2.97 \\\hline
1582 Neural network & {\bf 4.03} & 6.15 & {\bf 3.58} & 3.79 & 4.388 & 2.68 \\
1583 $k$-NN ($k=2$) & 4.08 & 5.40 & 4.77 & 3.37 & 4.405 & 2.67 \\
1584 $k$-NN ($k=3$) & 4.05 & 5.58 & 5.06 & 3.41 & 4.524 & 2.60 \\
1585 $k$-NN ($k=1$) & 4.52 & {\bf 5.26} & 5.36 & {\bf 3.09} & 4.553 & 2.59 \\
1586 $k$-NN ($k=4$) & 4.10 & 5.88 & 5.16 & 3.60 & 4.684 & 2.51 \\
1587 Naive Bayes & 4.48 & 6.90 & 5.48 & 3.70 & 5.143 & 2.29 \\
1588 Random class. & 12.26 & 12.33 & 12.40 & 10.11 & 11.776 & 1.00 \\\hline
1590 \end{tabular}
1591 \begin{tablenotes}
1592 \item [1] Note that these measurements have a certain variance.
1593 Since the Joint classifier performance was measured from scratch,
1594 the precise values may not match appropriate cells of the used methods.
1595 \end{tablenotes}
1596 \end{threeparttable}
1597 \end{table}
1599 % TODO presunout konkretni parametry do Appendixu? (neni jich tolik, mozna ne)
1600 \subsubsection{$k$-NN parameters}
1601 All three variants of $k$-NN classifier ($k=2,3,4$) had the weight function
1602 \begin{equation}
1603 \mathit{Weight}(\vec x) = 0.8^{10*\mathit{Distance}(\vec x)}
1604 \end{equation}
1605 The parameters were chosen empirically to minimize the MSE.
1607 \subsubsection{Neural network's parameters}
1608 The neural network classifier had three-layered architecture (one hidden layer)
1609 comprising of these numbers of neurons:
1610 \vspace{4mm}
1611 \noindent
1612 %\begin{table}
1613 \begin{center}
1614 %\caption{Styles}
1615 \begin{tabular}{|c|c|c|}
1616 \hline
1617 \multicolumn{3}{|c|}{Layer} \\\hline
1618 Input & Hidden & Output \\ \hline
1619 23 & 30 & 4 \\ \hline
1620 \end{tabular}
1621 \end{center}
1622 %\end{table}
1623 \vspace{4mm}
1625 The network was trained until the square error on the training set was smaller than $0.0003$.
1626 Due to a small number of input vectors, this only took $20$ iterations of RPROP learning algorithm on average.
1628 \subsubsection{Naive Bayes parameters}
1630 We have chosen $k = 10/7$ as our discretization parameter;
1631 ideally, we would use $k = 1$ to fully cover the style marks
1632 domain, however our training sample \rv{turns out to be} too small for
1633 that.
1635 \section{Proposed Applications}
1636 %\section{Proposed Applications and Discussion}
1637 \label{proposed-apps-and-discussion}
1639 %\rv{TODO Discussion}
1641 We believe that our findings might be useful for many applications
1642 in the area of Go support software as well as Go-playing computer engines.
1644 \rv{However, our foremost aim is to use the style analysis as an excellent
1645 teaching aid} --- classifying style
1646 dimensions based on player's pattern vector, many study recommendations
1647 can be given, e.g. about the professional games to replay, the goal being
1648 balancing understanding of various styles to achieve well-rounded skill set.%
1649 \footnote{\rv{The strength analysis could be also used in a similar fashion,
1650 but the lesson learned cannot simply be ``play pattern $X$ more often'';
1651 instead, the insight lays in the underlying reason of disproportionate
1652 frequency of some patterns.}}
1653 \rv{A user-friendly tool based on our work is currently in development.}
1655 \rv{Another promising application we envision is helping to}
1656 determine initial real-world rating of a player before their
1657 first tournament based on a sample of their games played on the internet;
1658 some players especially in less populated areas could get fairly strong
1659 before playing in their first real tournament.
1660 \rv{Similarly, a computer Go program can quickly} classify the level of its
1661 \rv{human opponent} based on the pattern vector from \rv{their previous games}
1662 and auto-adjust its difficulty settings accordingly
1663 to provide more even games for beginners.%
1664 \footnote{The program can also do this based on win-loss statistics,
1665 but pattern vector analysis \rv{should} converge faster \rv{initially,
1666 providing much better user experience}.}
1668 We hope that more strong players will look into the style dimensions found
1669 by our statistical analysis --- analysis of most played patterns of prospective
1670 opponents might prepare for \rv{a tournament} game, but we especially hope that new insights
1671 on strategic purposes of various shapes and general human understanding
1672 of the game might be \rv{improved} by investigating the style-specific patterns.
1674 \rv{Of course, it is challenging to predict all possible uses of our work by others.
1675 Some less obvious applications might include}
1676 analysis of pattern vectors extracted from games of Go-playing programs:
1677 the strength and style \rv{classification} might help to highlight some weaknesses
1678 and room for improvements.%
1679 \footnote{Of course, correlation does not imply causation \rv{and we certainly do not
1680 suggest simply optimizing Go-playing programs according to these vectors.
1681 However, they could hint on general shortcomings of the playing engines if the
1682 actual cause of e.g. surprisingly low strength prediction is investigated.}}
1683 Also, new historical game records are still \rv{occassionally} being discovered;
1684 pattern-based classification might help to suggest \rv{or verify} origin of the games
1685 in Go Archeology.
1688 % An example of a floating figure using the graphicx package.
1689 % Note that \label must occur AFTER (or within) \caption.
1690 % For figures, \caption should occur after the \includegraphics.
1691 % Note that IEEEtran v1.7 and later has special internal code that
1692 % is designed to preserve the operation of \label within \caption
1693 % even when the captionsoff option is in effect. However, because
1694 % of issues like this, it may be the safest practice to put all your
1695 % \label just after \caption rather than within \caption{}.
1697 % Reminder: the "draftcls" or "draftclsnofoot", not "draft", class
1698 % option should be used if it is desired that the figures are to be
1699 % displayed while in draft mode.
1701 %\begin{figure}[!t]
1702 %\centering
1703 %\includegraphics[width=2.5in]{myfigure}
1704 % where an .eps filename suffix will be assumed under latex,
1705 % and a .pdf suffix will be assumed for pdflatex; or what has been declared
1706 % via \DeclareGraphicsExtensions.
1707 %\caption{Simulation Results}
1708 %\label{fig_sim}
1709 %\end{figure}
1711 % Note that IEEE typically puts floats only at the top, even when this
1712 % results in a large percentage of a column being occupied by floats.
1715 % An example of a double column floating figure using two subfigures.
1716 % (The subfig.sty package must be loaded for this to work.)
1717 % The subfigure \label commands are set within each subfloat command, the
1718 % \label for the overall figure must come after \caption.
1719 % \hfil must be used as a separator to get equal spacing.
1720 % The subfigure.sty package works much the same way, except \subfigure is
1721 % used instead of \subfloat.
1723 %\begin{figure*}[!t]
1724 %\centerline{\subfloat[Case I]\includegraphics[width=2.5in]{subfigcase1}%
1725 %\label{fig_first_case}}
1726 %\hfil
1727 %\subfloat[Case II]{\includegraphics[width=2.5in]{subfigcase2}%
1728 %\label{fig_second_case}}}
1729 %\caption{Simulation results}
1730 %\label{fig_sim}
1731 %\end{figure*}
1733 % Note that often IEEE papers with subfigures do not employ subfigure
1734 % captions (using the optional argument to \subfloat), but instead will
1735 % reference/describe all of them (a), (b), etc., within the main caption.
1738 % An example of a floating table. Note that, for IEEE style tables, the
1739 % \caption command should come BEFORE the table. Table text will default to
1740 % \footnotesize as IEEE normally uses this smaller font for tables.
1741 % The \label must come after \caption as always.
1743 %\begin{table}[!t]
1744 %% increase table row spacing, adjust to taste
1745 %\renewcommand{\arraystretch}{1.3}
1746 % if using array.sty, it might be a good idea to tweak the value of
1747 % \extrarowheight as needed to properly center the text within the cells
1748 %\caption{An Example of a Table}
1749 %\label{table_example}
1750 %\centering
1751 %% Some packages, such as MDW tools, offer better commands for making tables
1752 %% than the plain LaTeX2e tabular which is used here.
1753 %\begin{tabular}{|c||c|}
1754 %\hline
1755 %One & Two\\
1756 %\hline
1757 %Three & Four\\
1758 %\hline
1759 %\end{tabular}
1760 %\end{table}
1763 % Note that IEEE does not put floats in the very first column - or typically
1764 % anywhere on the first page for that matter. Also, in-text middle ("here")
1765 % positioning is not used. Most IEEE journals use top floats exclusively.
1766 % Note that, LaTeX2e, unlike IEEE journals, places footnotes above bottom
1767 % floats. This can be corrected via the \fnbelowfloat command of the
1768 % stfloats package.
1772 \section{Future Research}
1773 \label{future-research}
1775 Since we are not aware of any previous research on this topic and we
1776 are limited by space and time constraints, plenty of research remains
1777 to be done in all parts of our analysis --- we have already noted
1778 many in the text above. Most significantly, different methods of generating
1779 and normalizing the $\vec p$ vectors can be explored
1780 and other data mining methods could be investigated.
1781 Better ways of visualising the relationships would be desirable,
1782 together with thorough expert dissemination of internal structure
1783 of the player pattern vectors space:
1784 more professional players should be consulted on the findings
1785 and for style scales calibration.
1787 It can be argued that many players adjust their style by game conditions
1788 (Go development era, handicap, komi and color, time limits, opponent)
1789 or that styles might express differently in various game stages;
1790 these factors should be explored by building pattern vectors more
1791 carefully than by simply considering all moves in all games of a player.
1792 Impact of handicap and uneven games on by-strength
1793 $\vec p$ distribution should be also investigated.
1795 % TODO: Future research --- Sparse PCA
1797 \section{Conclusion}
1798 We have proposed a way to extract summary pattern information from
1799 game collections and combined this with various data mining methods
1800 to show correspondence of our pattern summaries with various player
1801 meta-information like playing strength, era of play or playing style
1802 as ranked by expert players. We have implemented and measured our
1803 proposals in two case studies: per-rank characteristics of amateur
1804 players and per-player style/era characteristics of well-known
1805 professionals.
1807 While many details remain to be worked out,
1808 we have demonstrated that many significant correlations \rv{doubtlessly}
1809 do exist and
1810 it is practically viable to infer the player meta-information from
1811 extracted pattern summaries \rv{and we have proposed applications}
1812 for such inference. Finally, we outlined some of the many possible
1813 directions of future work in this newly staked research field
1814 on the boundary of Computer Go, Data Mining and Go Theory.
1817 % if have a single appendix:
1818 %\appendix[Proof of the Zonklar Equations]
1819 % or
1820 %\appendix % for no appendix heading
1821 % do not use \section anymore after \appendix, only \section*
1822 % is possibly needed
1824 % use appendices with more than one appendix
1825 % then use \section to start each appendix
1826 % you must declare a \section before using any
1827 % \subsection or using \label (\appendices by itself
1828 % starts a section numbered zero.)
1832 %\appendices
1833 %\section{Proof of the First Zonklar Equation}
1834 %Appendix one text goes here.
1836 %% you can choose not to have a title for an appendix
1837 %% if you want by leaving the argument blank
1838 %\section{}
1839 %Appendix two text goes here.
1842 % use section* for acknowledgement
1843 \section*{Acknowledgment}
1844 \label{acknowledgement}
1846 Foremostly, we are very grateful for detailed input on specific go styles
1847 by Alexander Dinerstein, Motoki Noguchi and V\'{i}t Brunner.
1848 We appreciate helpful comments on our general methodology
1849 by John Fairbairn, T. M. Hall, Cyril H\"oschl, Robert Jasiek, Franti\v{s}ek Mr\'{a}z
1850 and several GoDiscussions.com users. \cite{GoDiscThread}
1851 Finally, we would like to thank Radka ``chidori'' Hane\v{c}kov\'{a}
1852 for the original research idea and acknowledge major inspiration
1853 by R\'{e}mi Coulom's paper \cite{PatElo} on the extraction of pattern information.
1856 % Can use something like this to put references on a page
1857 % by themselves when using endfloat and the captionsoff option.
1858 \ifCLASSOPTIONcaptionsoff
1859 \newpage
1864 % trigger a \newpage just before the given reference
1865 % number - used to balance the columns on the last page
1866 % adjust value as needed - may need to be readjusted if
1867 % the document is modified later
1868 %\IEEEtriggeratref{8}
1869 % The "triggered" command can be changed if desired:
1870 %\IEEEtriggercmd{\enlargethispage{-5in}}
1872 % references section
1874 % can use a bibliography generated by BibTeX as a .bbl file
1875 % BibTeX documentation can be easily obtained at:
1876 % http://www.ctan.org/tex-archive/biblio/bibtex/contrib/doc/
1877 % The IEEEtran BibTeX style support page is at:
1878 % http://www.michaelshell.org/tex/ieeetran/bibtex/
1879 \bibliographystyle{IEEEtran}
1880 % argument is your BibTeX string definitions and bibliography database(s)
1881 \bibliography{gostyle}
1883 % <OR> manually copy in the resultant .bbl file
1884 % set second argument of \begin to the number of references
1885 % (used to reserve space for the reference number labels box)
1886 %\begin{thebibliography}{1}
1888 %\bibitem{MasterMCTS}
1890 %\end{thebibliography}
1892 % biography section
1894 % If you have an EPS/PDF photo (graphicx package needed) extra braces are
1895 % needed around the contents of the optional argument to biography to prevent
1896 % the LaTeX parser from getting confused when it sees the complicated
1897 % \includegraphics command within an optional argument. (You could create
1898 % your own custom macro containing the \includegraphics command to make things
1899 % simpler here.)
1900 %\begin{biography}[{\includegraphics[width=1in,height=1.25in,clip,keepaspectratio]{mshell}}]{Michael Shell}
1901 % or if you just want to reserve a space for a photo:
1903 %\begin{IEEEbiography}{Petr Baudi\v{s}}
1904 %Biography text here.
1905 %\end{IEEEbiography}
1907 % if you will not have a photo at all:
1908 \begin{IEEEbiographynophoto}{Petr Baudi\v{s}}
1909 Received BSc degree in Informatics at Charles University, Prague in 2009,
1910 currently a graduate student.
1911 Doing research in the fields of Computer Go, Monte Carlo Methods
1912 and Version Control Systems.
1913 Plays Go with the rank of 2-kyu on European tournaments
1914 and 2-dan on the KGS Go Server.
1915 \end{IEEEbiographynophoto}
1917 \begin{IEEEbiographynophoto}{Josef Moud\v{r}\'{i}k}
1918 Received BSc degree in Informatics at Charles University, Prague in 2009,
1919 currently a graduate student.
1920 Doing research in the fields of Neural Networks and Cognitive Sciences.
1921 His Go skills are not worth mentioning.
1922 \end{IEEEbiographynophoto}
1924 % insert where needed to balance the two columns on the last page with
1925 % biographies
1926 %\newpage
1928 %\begin{IEEEbiographynophoto}{Jane Doe}
1929 %Biography text here.
1930 %\end{IEEEbiographynophoto}
1932 % You can push biographies down or up by placing
1933 % a \vfill before or after them. The appropriate
1934 % use of \vfill depends on what kind of text is
1935 % on the last page and whether or not the columns
1936 % are being equalized.
1938 %\vfill
1940 % Can be used to pull up biographies so that the bottom of the last one
1941 % is flush with the other column.
1942 %\enlargethispage{-5in}
1946 % that's all folks
1947 \end{document}