blob | 2347cc30f55b69b93d919c1ce51980e6e8d2abae |

4 % cite.sty was written by Donald Arseneau

5 % V1.6 and later of IEEEtran pre-defines the format of the cite.sty package

6 % \cite{} output to follow that of IEEE. Loading the cite package will

7 % result in citation numbers being automatically sorted and properly

8 % "compressed/ranged". e.g., [1], [9], [2], [7], [5], [6] without using

9 % cite.sty will become [1], [2], [5]--[7], [9] using cite.sty. cite.sty's

10 % \cite will automatically add leading space, if needed. Use cite.sty's

11 % noadjust option (cite.sty V3.8 and later) if you want to turn this off.

12 % cite.sty is already installed on most LaTeX systems. Be sure and use

13 % version 4.0 (2003-05-27) and later if using hyperref.sty. cite.sty does

14 % not currently provide for hyperlinked citations.

15 % The latest version can be obtained at:

16 % http://www.ctan.org/tex-archive/macros/latex/contrib/cite/

17 % The documentation is contained in the cite.sty file itself.

20 % *** GRAPHICS RELATED PACKAGES ***

21 %

22 \ifCLASSINFOpdf

23 % \usepackage[pdftex]{graphicx}

24 % declare the path(s) where your graphic files are

25 % \graphicspath{{../pdf/}{../jpeg/}}

26 % and their extensions so you won't have to specify these with

27 % every instance of \includegraphics

28 % \DeclareGraphicsExtensions{.pdf,.jpeg,.png}

29 \else

30 % or other class option (dvipsone, dvipdf, if not using dvips). graphicx

31 % will default to the driver specified in the system graphics.cfg if no

32 % driver is specified.

33 % \usepackage[dvips]{graphicx}

35 % declare the path(s) where your graphic files are

36 % \graphicspath{{../eps/}}

37 % and their extensions so you won't have to specify these with

38 % every instance of \includegraphics

39 % \DeclareGraphicsExtensions{.eps}

40 \fi

49 %\usepackage{algpseudocode}

50 % WICKED: nefunguje ani jedno???

51 % algorithmic.sty can be obtained at:

52 % http://www.ctan.org/tex-archive/macros/latex/contrib/algorithms/

53 % There is also a support site at:

54 % http://algorithms.berlios.de/index.html

55 % Also of interest may be the (relatively newer and more customizable)

56 % algorithmicx.sty package by Szasz Janos:

57 % http://www.ctan.org/tex-archive/macros/latex/contrib/algorithmicx/

59 % *** ALIGNMENT PACKAGES ***

60 %

61 %\usepackage{array}

62 % http://www.ctan.org/tex-archive/macros/latex/required/tools/

66 %\usepackage{mdwtab}

67 % http://www.ctan.org/tex-archive/macros/latex/contrib/mdwtools/

70 % IEEEtran contains the IEEEeqnarray family of commands that can be used to

71 % generate multiline equations as well as matrices, tables, etc., of high

72 % quality.

74 %\usepackage{eqparbox}

75 % Also of notable interest is Scott Pakin's eqparbox package for creating

76 % (automatically sized) equal width boxes - aka "natural width parboxes".

77 % Available at:

78 % http://www.ctan.org/tex-archive/macros/latex/contrib/eqparbox/

82 % *** SUBFIGURE PACKAGES ***

83 %\usepackage[tight,footnotesize]{subfigure}

84 % subfigure.sty was written by Steven Douglas Cochran. This package makes it

85 % easy to put subfigures in your figures. e.g., "Figure 1a and 1b". For IEEE

86 % work, it is a good idea to load it with the tight package option to reduce

87 % the amount of white space around the subfigures. subfigure.sty is already

88 % installed on most LaTeX systems. The latest version and documentation can

89 % be obtained at:

90 % http://www.ctan.org/tex-archive/obsolete/macros/latex/contrib/subfigure/

91 % subfigure.sty has been superceeded by subfig.sty.

95 %\usepackage[caption=false]{caption}

96 %\usepackage[font=footnotesize]{subfig}

97 % subfig.sty, also written by Steven Douglas Cochran, is the modern

98 % replacement for subfigure.sty. However, subfig.sty requires and

99 % automatically loads Axel Sommerfeldt's caption.sty which will override

100 % IEEEtran.cls handling of captions and this will result in nonIEEE style

101 % figure/table captions. To prevent this problem, be sure and preload

102 % caption.sty with its "caption=false" package option. This is will preserve

103 % IEEEtran.cls handing of captions. Version 1.3 (2005/06/28) and later

104 % (recommended due to many improvements over 1.2) of subfig.sty supports

105 % the caption=false option directly:

106 %\usepackage[caption=false,font=footnotesize]{subfig}

107 %

108 % The latest version and documentation can be obtained at:

109 % http://www.ctan.org/tex-archive/macros/latex/contrib/subfig/

110 % The latest version and documentation of caption.sty can be obtained at:

111 % http://www.ctan.org/tex-archive/macros/latex/contrib/caption/

115 % *** FLOAT PACKAGES ***

116 %

117 %\usepackage{fixltx2e}

118 % fixltx2e, the successor to the earlier fix2col.sty, was written by

119 % Frank Mittelbach and David Carlisle. This package corrects a few problems

120 % in the LaTeX2e kernel, the most notable of which is that in current

121 % LaTeX2e releases, the ordering of single and double column floats is not

122 % guaranteed to be preserved. Thus, an unpatched LaTeX2e can allow a

123 % single column figure to be placed prior to an earlier double column

124 % figure. The latest version and documentation can be found at:

125 % http://www.ctan.org/tex-archive/macros/latex/base/

129 %\usepackage{stfloats}

130 % stfloats.sty was written by Sigitas Tolusis. This package gives LaTeX2e

131 % the ability to do double column floats at the bottom of the page as well

132 % as the top. (e.g., "\begin{figure*}[!b]" is not normally possible in

133 % LaTeX2e). It also provides a command:

134 %\fnbelowfloat

135 % to enable the placement of footnotes below bottom floats (the standard

136 % LaTeX2e kernel puts them above bottom floats). This is an invasive package

137 % which rewrites many portions of the LaTeX2e float routines. It may not work

138 % with other packages that modify the LaTeX2e float routines. The latest

139 % version and documentation can be obtained at:

140 % http://www.ctan.org/tex-archive/macros/latex/contrib/sttools/

141 % Documentation is contained in the stfloats.sty comments as well as in the

142 % presfull.pdf file. Do not use the stfloats baselinefloat ability as IEEE

143 % does not allow \baselineskip to stretch. Authors submitting work to the

144 % IEEE should note that IEEE rarely uses double column equations and

145 % that authors should try to avoid such use. Do not be tempted to use the

146 % cuted.sty or midfloat.sty packages (also by Sigitas Tolusis) as IEEE does

147 % not format its papers in such ways.

150 %\ifCLASSOPTIONcaptionsoff

151 % \usepackage[nomarkers]{endfloat}

152 % \let\MYoriglatexcaption\caption

153 % \renewcommand{\caption}[2][\relax]{\MYoriglatexcaption[#2]{#2}}

154 %\fi

155 % endfloat.sty was written by James Darrell McCauley and Jeff Goldberg.

156 % This package may be useful when used in conjunction with IEEEtran.cls'

157 % captionsoff option. Some IEEE journals/societies require that submissions

158 % have lists of figures/tables at the end of the paper and that

159 % figures/tables without any captions are placed on a page by themselves at

160 % the end of the document. If needed, the draftcls IEEEtran class option or

161 % \CLASSINPUTbaselinestretch interface can be used to increase the line

162 % spacing as well. Be sure and use the nomarkers option of endfloat to

163 % prevent endfloat from "marking" where the figures would have been placed

164 % in the text. The two hack lines of code above are a slight modification of

165 % that suggested by in the endfloat docs (section 8.3.1) to ensure that

166 % the full captions always appear in the list of figures/tables - even if

167 % the user used the short optional argument of \caption[]{}.

168 % IEEE papers do not typically make use of \caption[]'s optional argument,

169 % so this should not be an issue. A similar trick can be used to disable

170 % captions of packages such as subfig.sty that lack options to turn off

171 % the subcaptions:

172 % For subfig.sty:

173 % \let\MYorigsubfloat\subfloat

174 % \renewcommand{\subfloat}[2][\relax]{\MYorigsubfloat[]{#2}}

175 % For subfigure.sty:

176 % \let\MYorigsubfigure\subfigure

177 % \renewcommand{\subfigure}[2][\relax]{\MYorigsubfigure[]{#2}}

178 % However, the above trick will not work if both optional arguments of

179 % the \subfloat/subfig command are used. Furthermore, there needs to be a

180 % description of each subfigure *somewhere* and endfloat does not add

181 % subfigure captions to its list of figures. Thus, the best approach is to

182 % avoid the use of subfigure captions (many IEEE journals avoid them anyway)

183 % and instead reference/explain all the subfigures within the main caption.

184 % The latest version of endfloat.sty and its documentation can obtained at:

185 % http://www.ctan.org/tex-archive/macros/latex/contrib/endfloat/

186 %

187 % The IEEEtran \ifCLASSOPTIONcaptionsoff conditional can also be used

188 % later in the document, say, to conditionally put the References on a

189 % page by themselves.

191 % *** PDF, URL AND HYPERLINK PACKAGES ***

192 %

194 % url.sty was written by Donald Arseneau. It provides better support for

195 % handling and breaking URLs. url.sty is already installed on most LaTeX

196 % systems. The latest version can be obtained at:

197 % http://www.ctan.org/tex-archive/macros/latex/contrib/misc/

198 % Read the url.sty source comments for usage information. Basically,

199 % \url{my_url_here}.

202 % *** Do not adjust lengths that control margins, column widths, etc. ***

203 % *** Do not use packages that alter fonts (such as pslatex). ***

204 % There should be no need to do such things with IEEEtran.cls V1.6 and later.

205 % (Unless specifically asked to do so by the journal or conference you plan

206 % to submit to, of course. )

208 % correct bad hyphenation here

213 %

214 % paper title

215 % can use linebreaks \\ within to get better formatting as desired

218 % use \thanks{} to gain access to the first footnote area

219 % a separate \thanks must be used for each paragraph as LaTeX2e's \thanks

220 % was not built to handle multiple paragraphs

222 \thanks{P. Baudi\v{s} is student at the Faculty of Math and Physics, Charles University, Prague, CZ, and also does some of his Computer Go research as an employee of SUSE Labs Prague, Novell CZ.}% <-this % stops a space

223 \thanks{J. Moud\v{r}\'{i}k is student at the Faculty of Math and Physics, Charles University, Prague, CZ.}}

225 % note the % following the last \IEEEmembership and also \thanks -

226 % these prevent an unwanted space from occurring between the last author name

227 % and the end of the author line. i.e., if you had this:

228 %

229 % \author{....lastname \thanks{...} \thanks{...} }

230 % ^------------^------------^----Do not want these spaces!

231 %

232 % a space would be appended to the last name and could cause every name on that

233 % line to be shifted left slightly. This is one of those "LaTeX things". For

234 % instance, "\textbf{A} \textbf{B}" will typeset as "A B" not "AB". To get

235 % "AB" then you have to do: "\textbf{A}\textbf{B}"

236 % \thanks is no different in this regard, so shield the last } of each \thanks

237 % that ends a line with a % and do not let a space in before the next \thanks.

238 % Spaces after \IEEEmembership other than the last one are OK (and needed) as

239 % you are supposed to have spaces between the names. For what it is worth,

240 % this is a minor point as most people would not even notice if the said evil

241 % space somehow managed to creep in.

244 % The paper headers

247 % The only time the second header will appear is for the odd numbered pages

248 % after the title page when using the twoside option.

249 %

250 % *** Note that you probably will NOT want to include the author's ***

251 % *** name in the headers of peer review papers. ***

252 % You can use \ifCLASSOPTIONpeerreview for conditional compilation here if

253 % you desire.

258 % If you want to put a publisher's ID mark on the page you can do it like

259 % this:

260 %\IEEEpubid{0000--0000/00\$00.00~\copyright~2007 IEEE}

261 % Remember, if you use this you must call \IEEEpubidadjcol in the second

262 % column for its text to clear the IEEEpubid mark.

266 % use for special paper notices

267 %\IEEEspecialpapernotice{(Invited Paper)}

272 % make the title area

273 \maketitle

277 %\boldmath

279 We process a~large corpus of game records of the board game of Go and

280 propose a~way to extract summary information on played moves.

281 We then apply several basic data-mining methods on the summary

282 information to identify the most differentiating features within the

283 summary information, and discuss their correspondence with traditional

284 Go knowledge. We show mappings of the features to player attributes

285 like playing strength or informally perceived ``playing style'' (such as

286 territoriality or aggressivity), and propose applications including

287 seeding real-work ranks of internet players, aiding in Go study, or

288 contribution to Go-theoretical discussion on the scope of ``playing

289 style''.

292 % IEEEtran.cls defaults to using nonbold math in the Abstract.

293 % This preserves the distinction between vectors and scalars. However,

294 % if the journal you are submitting to favors bold math in the abstract,

295 % then you can use LaTeX's standard command \boldmath at the very start

296 % of the abstract to achieve this. Many IEEE journals frown on math

297 % in the abstract anyway.

299 % Note that keywords are not normally used for peerreview papers.

301 board games, go, computer go, data mining, go theory,

302 pattern recongition, player strength, playing style,

303 neural networks, Kohonen maps, principal component analysis

311 % For peer review papers, you can put extra information on the cover

312 % page as needed:

313 % \ifCLASSOPTIONpeerreview

314 % \begin{center} \bfseries EDICS Category: 3-BBND \end{center}

315 % \fi

316 %

317 % For peerreview papers, this IEEEtran command inserts a page break and

318 % creates the second title. It will be ignored for other modes.

319 \IEEEpeerreviewmaketitle

324 % The very first letter is a 2 line initial drop letter followed

325 % by the rest of the first word in caps.

326 %

327 % form to use if the first word consists of a single letter:

328 % \IEEEPARstart{A}{demo} file is ....

329 %

330 % form to use if you need the single drop letter followed by

331 % normal text (unknown if ever used by IEEE):

332 % \IEEEPARstart{A}{}demo file is ....

333 %

334 % Some journals put the first two words in caps:

335 % \IEEEPARstart{T}{his demo} file is ....

336 %

337 % Here we have the typical use of a "T" for an initial drop letter

338 % and "HIS" in caps to complete the first word.

340 of creating a~program to play the game, finding the best move from a~given

342 We will make use of one method developed in the course

343 of such research and apply it to the analysis of existing game records

344 with the aim of helping humans to play and understand the game better

345 instead.

347 Go is a~two-player full-information board game played

349 stones; the goal of the game is to surround the most territory and

350 capture enemy stones. We assume basic familiarity with the game.

352 Many Go players are eager to play using computers (usually over

353 the internet) and review games played by others on computers as well.

354 This means that large amounts of game records are collected and digitally

355 stored, enabling easy processing of such collections. However, so far

356 only little has been done with the available data --- we are aware

360 We present a~more in-depth approach --- from all played moves, we devise

361 a~compact evaluation of each player. We then explore correlations between

362 evaluations of various players in light of externally given information.

363 This way, we can discover similarity between moves characteristics of

364 players with the same playing strength, or discuss the meaning of the

365 "playing style" concept on the assumption that similar playing styles

366 should yield similar moves characteristics.

372 As the input of our analysis, we use large collections of game records%

374 grouped by the primary object of analysis (player name, player rank, etc.).

375 We process the games by object, generating a description for each

379 We keep track of the most

383 (the mapping from patterns to vector elements is common for all objects).

384 We can then process and compare just the pattern vectors.

387 When deciding how to compose the patterns we use to describe moves,

388 we need to consider a specificity tradeoff --- overly general descriptions carry too few

389 information to discern various player attributes; too specific descriptions

390 gather too few specimen over the games sample and the vector differences are

391 not statistically significant.

393 We have chosen an intuitive and simple approach inspired by pattern features

394 used when computing Elo ratings for candidate patterns in Computer Go play.

396 (name--value pairs) matched at the position of the played move.

397 We use these features:

400 \item capture move flag

401 \item atari move flag

402 \item atari escape flag

403 \item contiguity-to-last flag --- whether the move has been played in one of 8 neighbors of the last move

404 \item contiguity-to-second-last flag

406 \item spatial pattern --- configuration of stones around the played move

409 The spatial patterns are normalized (using a dictionary) to be always

410 black-to-play and maintain translational and rotational symmetry.

415 are matched.

417 Pattern vectors representing these features contain information on

418 played shape as well as basic representation of tactical dynamics

419 --- threats to capture stones, replying to last move, or ignoring

420 opponent's move elsewhere to return to an urgent local situation.

421 The shapes most frequently correspond to opening moves

423 --- commonly played sequences) characteristic for a certain

424 strategic aim. In the opening, even a single-line difference

425 in the distance from the border can have dramatic impact on

426 further local and global development.

430 The pattern vector elements can have diverse values since for each object,

431 we consider different number of games (and thus patterns).

433 the most frequent pattern having the value of $1$ and the least occuring

434 one being $-1$.

435 Thus, we obtain vectors describing relative frequency of played patterns

436 independent on number of gathered patterns.

437 But there are multiple ways to approach the normalization.

441 One is simply to linearly re-scale the values using:

443 This is the default approach; we have used data processed by only this

444 computation unless we note otherwise.

446 by the few most-occuring patterns (describing mostly large-diameter

447 shapes from the game opening). This means that most patterns will be

448 always represented by only very small values near the lower bound.

453 To alleviate this problem, we have also tried to modify the linear

455 the raw counts using

459 However, we have found that this method is not universally beneficial.

461 produced PCA decomposition with significant dimensions corresponding

462 better to some of the prior knowledge and more instructive for manual

463 inspection, but ultimately worsened accuracy of our classifiers.

467 We have implemented the data extraction by making use of the pattern

468 features matching implementation%

472 We extract information on players by converting the SGF game

475 of the particular pattern features combination) per move. Of course,

476 only moves played by the appropriate color in the game are collected.

481 To assess the properties of gathered pattern vectors

482 and their influence on playing styles,

483 we process the data by several basic data minining techniques.

486 from the game collection

487 and serve to show internal structure and correlations within the data set.

489 Principal Component Analysis finds orthogonal vector components that

490 have the largest variance.

491 Reversing the process can indicate which patterns correlate with each component.

492 Additionally, PCA can be used as vector preprocessing for methods

493 that are negatively sensitive to pattern vector component correlations.

495 The~second method of Kohonen Maps

496 is based on the theory of self-organizing maps of abstract units (neurons) that

497 compete against each other for the representation of the input space.

498 Because neurons in the network are organized in a two-dimensional plane,

499 the trained network spreads the vectors on a 2D plane,

500 allowing for visualization of clusters of players with similar properties.

505 representing e.g.~information about styles, player's strength or even

506 meta-information like the player's era or a country of origin.

507 Initially, the methods must be calibrated (trained) on some prior knowledge,

509 and the associated output vectors.

511 Moreover, the reference set can be divided into training and testing pairs

512 and the methods can be compared by the mean square error on testing data set

513 (difference of output vectors approximated by the method and their real desired value).

515 %\footnote{However, note that dicrete characteristics such as country of origin are

516 %not very feasible to use here, since WHAT??? is that even true?? }

519 approximates $\vec O$ by composing the output vectors

520 of $k$ reference pattern vectors closest to $\vec P$.

522 The other classifier is a~multi-layer feed-forward Artificial Neural Network:

523 the neural network can learn correlations between input and output vectors

524 and generalize the ``knowledge'' to unknown vectors; it can be more flexible

525 in the interpretation of different pattern vector elements and discern more

526 complex relations than the kNN classifier,

527 but may not be as stable and requires larger training sample.

532 to reduce the dimensions of the pattern vectors while preserving

533 as much information as possible, assuming inter-dependencies between

534 pattern vector dimensions are linear.

536 Briefly, PCA is an eigenvalue decomposition of a~covariance matrix of centered pattern vectors,

537 producing a~linear mapping $o$ from $n$-dimensional vector space

538 to a~reduced $m$-dimensional vector space.

539 The $m$ eigenvectors of the original vectors' covariance matrix

540 with the largest eigenvalues are used as the base of the reduced vector space;

541 the eigenvectors form projection matrix $W$.

543 For each original pattern vector $\vec p_i$,

544 we obtain its new representation $\vec r_i$ in the PCA base

545 as shown in the following equation:

560 \ENDFOR

563 \ENDFOR

565 \STATE Get $m$ largest eigenvalues

566 \STATE Most significant eigenvectors ordered by decreasing eigenvalues form the rows of matrix $W$

569 \ENDFOR

574 We want to find correlations between PCA dimensions and

575 some prior knowledge (player rank, style vector).

576 For this purpose, we compute the well-known

578 measuring the strength of the linear dependence%

580 of the vectors.}

581 between the dimensions:

587 Kohonen map is a self-organizing network with neurons spread evenly over a~two-dimensional plane.

588 Neurons $\vec n$ in the map compete for representation of portions of the input vector space,

589 each vector being represented by some neuron.

590 The network is trained so that the neurons

591 that are topologically close tend to represent vectors that are close in suitable metric as well.

593 First, a~randomly initialized network is sequentially trained;

594 in each iteration, we choose a~random training vector $\vec t$

597 We then adapt neurons $n$ from the neighborhood of $\vec w$ employing the equation

601 where $\alpha$ is a learning parameter, usually decreasing in time.

602 $Influence()$ is a function that forces neurons to spread.

603 Such function is usually realised using a mexican hat function or a difference-of-gaussians

605 The state of the network can be evaluated by calculating mean square difference

619 \REPEAT

624 \ENDFOR

627 \STATE $\vec n \leftarrow \vec n + \alpha(It) \cdot \mathit{Influence}(\vec w, \vec n) \cdot ( \vec t - \vec n ) $

628 \ENDFOR

636 Our goal is to approximate player's output vector $\vec O$;

637 we know his pattern vector $\vec P$.

638 We further assume that similarities in players' pattern vectors

639 uniformly correlate with similarities in players' output vectors.

647 Note that the weight is a function of distance and is not explicitly defined in Algorithm \ref{alg:knn}.

648 During our research, exponentially decreasing weight has proven to be sufficient.

657 \ENDFOR

662 \ENDFOR

670 and find correlations between input patterns and output classifications.

671 Before use, the network is iteratively trained on the training data

672 until the error on the training set is reasonably small.

674 %Neural network is an adaptive system that must undergo a training

675 %period similarly to the requirement

676 %of reference vectors for the k-Nearest Neighbors algorithm above.

679 Technically, the neural network is a network of interconnected

680 computational units called neurons.

681 A feedforward neural network has a layered topology;

685 Each neuron $i$ is connected to all neurons in the previous layer and each connection has its weight $w_{ij}$

687 The computation proceeds in discrete time steps.

690 Then, we iteratively compute output of each neuron in the next layer

691 until the output layer is reached.

692 The activity of output layer is then presented as the result.

694 The activation $y_i$ of neuron $i$ from the layer $I$ is computed as

698 where $J$ is the previous layer, while $y_j$ is the activation for neurons from $J$ layer.

700 and its purpose is to bound the outputs of neurons.

701 A typical example of an activation function is the sigmoid function.%

704 and the x-position ($k$).}

707 Training of the feed-forward neural network usually involves some

708 modification of supervised Backpropagation learning algorithm.

711 %Because the \emph{reference set} is usually not very large,

712 %we have devised a simple method for its extension.

713 %This enhancement is based upon adding random linear combinations

714 %of \emph{style and pattern vectors} to the training set.

716 As outlined above, the training set $T$ consists of

727 \REPEAT

731 %\FORALL{$(\overrightarrow{Input}, \overrightarrow{DesiredOutput}) \in T$}

732 %\STATE $\overrightarrow{Output} \leftarrow Result(N, \overrightarrow{Input})$

733 %\STATE $E \leftarrow |\overrightarrow{DesiredOutput} - \overrightarrow{Output}|$

739 \ENDFOR

747 We have implemented the data mining methods as the

749 made available under the GNU GPL licence.

751 The majority of our basic processing and the analysis parts

753 We use several external libraries, most notably the MDP library \cite{MDP} (used for PCA analysis)

761 \centering

767 First, we have used our framework to analyse correlations of pattern vectors

768 and playing strength. Like in other competitively played board games, Go players

772 corresponding to even win chances for game of two players with the same rank,

775 be assigned differently.}

778 Multiple independent real-world ranking scales exist

779 (geographically based), also online servers maintain their own user ranking;

780 the difference between scales can be up to several ranks and the rank

783 As the source game collection, we use Go Teaching Ladder reviews archive%

785 variation with the actual played game.}

788 and then randomly separate 770 games as a testing set.

789 Since the rank information is provided by the users and may not be consistent,

790 we are forced to take a simplified look at the ranks,

791 discarding the differences between various systems and thus somewhat

793 we did not pursue to try another collection;

794 one could e.g. look at game archives of some Go server.}

797 we have performed PCA analysis on the pattern vectors, achieving near-perfect

798 rank correspondence in the first PCA dimension%

800 with no discernable structure revealed within the lower-order eigenvectors.}

803 We measure the accuracy of strength approximation by the first dimension

805 implying extremely strong correlation.

806 Using the eigenvector position directly for classification

807 of players within the test group yields MSE TODO, thus providing

808 reasonably satisfying accuracy by itself.%

810 produced noticeably less clear-cut results.}

812 To further enhance the strength estimator accuracy,

813 we have tried to train a NN classifier on our train set, consisting

815 for activation of input neurons and rank number as result of the output

816 neuron. We then proceeded to test the NN on per-player pattern vectors built

817 from the games in the test set, yielding MSE of TODO with TODO games per player

818 on average.

824 As a~second case study for our pattern analysis,

825 we investigate pattern vectors $\vec p$ of various well-known players,

826 their relationships in-between and to prior knowledge

827 in order to explore the correlation of prior knowledge with extracted patterns.

828 We look for relationships between pattern vectors and perceived

829 ``playing style'' and attempt to use our classifiers to transform

833 professional games, dating from the early Go history 1500 years ago to the present.

835 we have chosen them for being well-known within the players community,

836 having large number of played games in our collection and not playing too long

838 dismissed; usual playing conditions can also differ significantly.}

842 In order to provide a reference frame for our style analysis,

843 we have gathered some expert-based information about various

844 traditionally perceived style aspects to use as a prior knowledge.

845 This expert-based knowledge allows us to predict styles of unknown players

846 based on the similarity of their pattern vectors,

847 as well as discover correlations between styles and proportions

848 of played patterns.

850 Experts were asked to mark four style aspects of each of the given players

854 \noindent

855 %\begin{table}

857 %\caption{Styles}

859 \hline

861 Territoriality $\tau$ & Moyo & Territory \\

862 Orthodoxity $\omega$ & Classic & Novel \\

863 Aggressivity $\alpha$ & Calm & Figting \\

867 %\end{table}

870 We have devised these four style aspects based on our own Go experience

871 and consultations with other experts.

872 The used terminology has quite

873 clear meaning to any experienced Go player and there is not too much

874 room for confusion, except possibly in the case of ``thickness'' ---

875 but the concept is not easy to pin-point succintly and we also did not

876 add extra comments on the style aspects to the questionnaire deliberately

877 to accurately reflect any diversity in understanding of the terms.

883 Throughout our research, we have experimentally found that playing era

884 is also a major factor differentiating between patterns. Thus, we have

885 further extended the $\vec s_r$ by median year over all games played

886 by the player.

889 % increase table row spacing, adjust to taste

893 \centering

894 % Some packages, such as MDW tools, offer better commands for making tables

895 % than the plain LaTeX2e tabular which is used here.

897 \hline

899 \hline

905 y. & & & & &$ 1.000$\\

906 \hline

910 Three high-level Go players (Alexander Dinerstein 3-pro, Motoki Noguchi

912 players.

914 Mean standard deviation of the answers is 0.952,

915 making the data reasonably reliable,

916 though much larger sample would of course be more desirable.

917 We have also found significant correlation between the various

918 style aspects, as shown by the Pearson's $r$ values

922 % increase table row spacing, adjust to taste

927 \centering

928 % Some packages, such as MDW tools, offer better commands for making tables

929 % than the plain LaTeX2e tabular which is used here.

931 \hline

933 \hline

958 \hline

961 \item [1] Including standard deviation. Only players where we received at least two out of three answers are included.

962 \item [2] Since the playing era column does not fit into the table, we at least sort the players ascending by their median year.

963 \item [3] We do not consider games of Go Seigen due to him playing across several distinct eras and also being famous for radical opening experiments throughout the time, and thus featuring especially high diversity in patterns.

964 \item [4] We do not consider games of Ishida Yoshio and Yamashita Keigo for the PCA analysis since they are significant outliers, making high-order dimensions much like purely ``similarity to this player''. Takemiya Masaki has the similar effect for the first dimension, but that case corresponds to common knowledge of him being an extreme proponent of anti-territorial (``moyo'') style.

965 \item [5] We consider games only up to year 2004, since Yi Ch'ang-ho was prominent representative of a balanced, careful player until then and still has this reputation in minds of many players, but is regarded to have altered his style significantly afterwards.

973 \centering

979 We have looked at the five most significant dimensions of the pattern data

980 yielded by the PCA analysis of the reference player set%

982 Masaki. That way, the second dimension strongly

983 correlated to territoriality and third dimension strongly correlacted to era,

984 however the first dimension remained mysteriously uncorrelated and with no

985 obvious interpretation.}

987 We have again computed the Pearson's $r$ for all combinations of PCA dimensions

988 and dimensions of the prior knowledge style vectors to find correlations.

991 % increase table row spacing, adjust to taste

995 \centering

996 % Some packages, such as MDW tools, offer better commands for making tables

997 % than the plain LaTeX2e tabular which is used here.

999 \hline

1001 \hline

1007 \hline

1012 % increase table row spacing, adjust to taste

1017 \centering

1018 % Some packages, such as MDW tools, offer better commands for making tables

1019 % than the plain LaTeX2e tabular which is used here.

1021 % The virtual last column is here because otherwise we get random syntax errors.

1036 \centering high corner/side opening \tnote{2} & \centering high corner approach & \centering high distant pincer & \\

1053 \centering low corner approach & \centering low corner reply & \centering low corner enclosure & \\

1071 \centering low corner reply \tnote{4} & \centering high distant approach/pincer & \centering near low pincer & \\

1073 \hline

1077 in reality, they are usually somewhat larger and always circle-shaped

1078 (centered on the triangled move).

1079 We omit only pattern segments that are entirely empty.

1081 since some of them may not be obvious unless seen in game context; we choose

1082 the descriptions based on the most frequently observer contexts, but of course

1083 the pattern can be also matched in other positions and situations.

1085 only uncorrelated and anti-correlated ones.

1087 we skip a slide follow-up pattern to this move.

1093 % increase table row spacing, adjust to taste

1098 \centering

1099 % Some packages, such as MDW tools, offer better commands for making tables

1100 % than the plain LaTeX2e tabular which is used here.

1102 % The virtual last column is here because otherwise we get random syntax errors.

1116 \centering low corner approach & \centering low side or mokuhazushi opening & \centering san-san opening & \\

1132 \centering low corner enclosure & \centering 3-4 corner opening \tnote{2} & \centering high approach reply & \\

1134 \hline

1138 since the current patterns are commonplace in games of past centuries

1139 (not included in our training set) and many would call a lot of the old-time patterns

1140 modern inventions. Perhaps we can infer that the latest 21th-century play trends abandon

1141 many of the 20th-century experiments (lower echelon of our by-year samples)

1142 to return to the more ordinary but effective classic patterns.

1149 It is immediately

1150 obvious both from the measured $r$ and visual observation

1151 that by far the most significant vector corresponds very well

1152 to the territoriality of the players,%

1154 territorial player, is not well visible in the cluster, but he is

1156 confirming the intuitive notion that this aspect of style

1157 is the one easiest to pin-point and also

1158 most obvious in the played shapes and sequences

1159 (that can obviously aim directly at taking secure territory

1160 or building center-oriented framework). Thick (solid) play also plays

1161 a role, but these two style dimensions are already

1162 correlated in the prior data.

1164 The other PCA dimensions are somewhat harder to interpret, but there

1165 certainly is significant influence of the styles on the patterns;

1167 (Larger absolute value means better linear correspondence.)

1169 We also list the characteristic spatial patterns of the PCA dimension

1170 extremes (tables \ref{fig:style_patterns}, \ref{fig:style_patterns3}), determined by their coefficients

1171 in the PCA projection matrix --- however, such naive approach

1172 has limited reliability, better methods will have to be researched.%

1175 it reflects some weak ordering in bottom half of the dimension,

1176 not global ordering within the dimension.}

1177 We do not show the other pattern features since they carry no useful

1178 information in the opening stage.%

1180 but here all the spatial patterns are big enough to reach to the edge

1181 on their own.}

1183 The PCA results presented above do not show much correlation between

1185 However, when we applied the extended vector normalization

1187 some less significant PCA dimensions exhibited clear correlations.%

1189 produces the most instructive PCA output on our particular game collection.}

1190 It appears that less-frequent patterns that appear only in the middle-game

1192 particular specific-shape patterns repeat less often.} are defining

1193 for these dimensions, and these are not represented in the pattern vectors

1194 as well as the common opening patterns.

1195 However, we do not use the extended normalization results since

1196 they produced noticeably less accurate classifiers in all dimensions,

1199 We believe that the next step

1200 in interpreting our results will be more refined prior information input

1201 and precise analysis by Go experts.

1203 TODO: Kohonen map view. Possibly a Sociomap view.

1207 %TODO vsude zcheckovat jestli pouzivame stejny cas "we perform, we apply" X "we have performed, ..."

1209 Apart from the PCA-based analysis, we tested the style inference ability

1212 To compare and evaluate both methods, we have performed $5$-fold cross validation

1213 and compared their performance with a~random classificator.

1214 In the $5$-fold cross-validation, we randomly divide the training set

1215 (organized by players) into $5$ distinct parts with comparable

1216 sizes and then iteratively use each part as a~testing set (yielding square error value), while

1217 the rest (remaining $4$ parts) is taken as a~training set. The square errors across all $5$ iterations are

1218 averaged, yielding mean square error.

1220 The results are shown in table \ref{crossval-cmp}. Second to fifth columns in the table represent

1223 represents $\mathit{Mean}_\mathit{RND} / \mathit{X}$ -- comparison of mean square error (across styles)

1224 with random classificator. To minimize the

1225 effect of random variables, all numbers were taken as an average of $30$ runs of the cross validation.

1233 \hline

1234 %Classifier & $\sigma_\tau$ & $\sigma_\omega$ & $\sigma_\alpha$ & $\sigma_\theta$ & Tot $\sigma$ & $\mathit{RndC}$\\ \hline

1235 %Neural network & 0.420 & 0.488 & 0.365 & 0.371 & 0.414 & 1.82 \\

1236 %$k$-NN ($k=4$) & 0.394 & 0.507 & 0.457 & 0.341 & 0.429 & 1.76 \\

1237 %Random classifier & 0.790 & 0.773 & 0.776 & 0.677 & 0.755 & 1.00 \\ \hline

1249 For both methods to yield comparable errors, we have rescaled the style vectors from $[1,10]$ to $[-1,1]$

1250 (this is also the range of our neuron activation function).

1252 % TODO presunout konkretni parametry do Appendixu? (neni jich tolik, mozna ne)

1262 We believe that our findings might be useful for many applications

1263 in the area of Go support software as well as Go-playing computer engines.

1265 The style analysis can be an excellent teaching aid --- classifying style

1266 dimensions based on player's pattern vector, many study recommendations

1267 can be given, e.g. about the professional games to replay, the goal being

1268 balancing understanding of various styles to achieve well-rounded skill set.

1269 This was also our original aim when starting the research and a user-friendly

1270 tool based on our work is now being created.

1272 We hope that more strong players will look into the style dimensions found

1273 by our statistical analysis --- analysis of most played patterns of prospective

1274 opponents might prepare for the game, but we especially hope that new insights

1275 on strategic purposes of various shapes and general human understanding

1276 of the game might be achieved by investigating the style-specific patterns.

1277 Time by time, new historical game records are still being discovered;

1278 pattern-based classification might help to suggest origin of the games

1279 in Go Archeology.

1281 Classifying playing strength of a pattern vector of a player can be used

1282 e.g. to help determine initial real-world rating of a player before their

1283 first tournament based on games played on the internet; some players especially

1284 in less populated areas could get fairly strong before playing their first

1285 real tournament.

1287 Analysis of pattern vectors extracted from games of Go-playing programs

1288 in light of the shown strength and style distributions might help to

1289 highlight some weaknesses and room for improvements. (However, since

1290 correlation does not imply causation, simply optimizing Go-playing programs

1291 according to these vectors is unlikely to yield good results.)

1292 Another interesting applications in Go-playing programs might be strength

1293 adjustment; the program can classify the player's level based on the pattern

1294 vector from its previous games and auto-adjust its difficulty settings

1295 accordingly to provide more even games for beginners.

1298 % An example of a floating figure using the graphicx package.

1299 % Note that \label must occur AFTER (or within) \caption.

1300 % For figures, \caption should occur after the \includegraphics.

1301 % Note that IEEEtran v1.7 and later has special internal code that

1302 % is designed to preserve the operation of \label within \caption

1303 % even when the captionsoff option is in effect. However, because

1304 % of issues like this, it may be the safest practice to put all your

1305 % \label just after \caption rather than within \caption{}.

1306 %

1307 % Reminder: the "draftcls" or "draftclsnofoot", not "draft", class

1308 % option should be used if it is desired that the figures are to be

1309 % displayed while in draft mode.

1310 %

1311 %\begin{figure}[!t]

1312 %\centering

1313 %\includegraphics[width=2.5in]{myfigure}

1314 % where an .eps filename suffix will be assumed under latex,

1315 % and a .pdf suffix will be assumed for pdflatex; or what has been declared

1316 % via \DeclareGraphicsExtensions.

1317 %\caption{Simulation Results}

1318 %\label{fig_sim}

1319 %\end{figure}

1321 % Note that IEEE typically puts floats only at the top, even when this

1322 % results in a large percentage of a column being occupied by floats.

1325 % An example of a double column floating figure using two subfigures.

1326 % (The subfig.sty package must be loaded for this to work.)

1327 % The subfigure \label commands are set within each subfloat command, the

1328 % \label for the overall figure must come after \caption.

1329 % \hfil must be used as a separator to get equal spacing.

1330 % The subfigure.sty package works much the same way, except \subfigure is

1331 % used instead of \subfloat.

1332 %

1333 %\begin{figure*}[!t]

1334 %\centerline{\subfloat[Case I]\includegraphics[width=2.5in]{subfigcase1}%

1335 %\label{fig_first_case}}

1336 %\hfil

1337 %\subfloat[Case II]{\includegraphics[width=2.5in]{subfigcase2}%

1338 %\label{fig_second_case}}}

1339 %\caption{Simulation results}

1340 %\label{fig_sim}

1341 %\end{figure*}

1342 %

1343 % Note that often IEEE papers with subfigures do not employ subfigure

1344 % captions (using the optional argument to \subfloat), but instead will

1345 % reference/describe all of them (a), (b), etc., within the main caption.

1348 % An example of a floating table. Note that, for IEEE style tables, the

1349 % \caption command should come BEFORE the table. Table text will default to

1350 % \footnotesize as IEEE normally uses this smaller font for tables.

1351 % The \label must come after \caption as always.

1352 %

1353 %\begin{table}[!t]

1354 %% increase table row spacing, adjust to taste

1355 %\renewcommand{\arraystretch}{1.3}

1356 % if using array.sty, it might be a good idea to tweak the value of

1357 % \extrarowheight as needed to properly center the text within the cells

1358 %\caption{An Example of a Table}

1359 %\label{table_example}

1360 %\centering

1361 %% Some packages, such as MDW tools, offer better commands for making tables

1362 %% than the plain LaTeX2e tabular which is used here.

1363 %\begin{tabular}{|c||c|}

1364 %\hline

1365 %One & Two\\

1366 %\hline

1367 %Three & Four\\

1368 %\hline

1369 %\end{tabular}

1370 %\end{table}

1373 % Note that IEEE does not put floats in the very first column - or typically

1374 % anywhere on the first page for that matter. Also, in-text middle ("here")

1375 % positioning is not used. Most IEEE journals use top floats exclusively.

1376 % Note that, LaTeX2e, unlike IEEE journals, places footnotes above bottom

1377 % floats. This can be corrected via the \fnbelowfloat command of the

1378 % stfloats package.

1384 Since we are not aware of any previous research on this topic and we

1385 are limited by space and time constraints, plenty of research remains

1386 to be done, in all parts of our analysis --- we have already noted

1387 many in the text above. Most significantly, different methods of generating

1388 and normalizing the $\vec p$ vectors can be explored

1389 and other data mining methods could be investigated.

1390 Better ways of visualising the relationships would be desirable,

1391 together with thorough dissemination of internal structure

1392 of the player pattern vectors space.

1394 It can be argued that many players adjust their style by game conditions

1395 (Go development era, handicap, komi and color, time limits, opponent)

1396 or styles might express differently in various game stages.

1397 More professional players could be consulted on the findings

1398 and for style scales calibration.

1399 Impact of handicap games on by-strength

1400 $\vec p$ distribution should be also investigated.

1402 % TODO: Future research --- Sparse PCA

1405 We have proposed a way to extract summary pattern information from

1406 game collections and combined this with various data mining methods

1407 to show correspondence of our pattern summaries with various player

1408 meta-information like playing strength, era of play or playing style

1409 as ranked by expert players. We have implemented and measured our

1410 proposals in two case studies: per-rank characteristics of amateur

1411 players and per-player style/era characteristics of well-known

1412 professionals.

1414 While many details remain to be worked out,

1415 we have demonstrated that many significant correlations do exist and

1416 it is practically viable to infer the player meta-information from

1417 extracted pattern summaries. We proposed wide range of applications

1418 for such inference. Finally, we outlined some of the many possible

1419 directions of future work in this newly staked research field

1420 on the boundary of Computer Go, Data Mining and Go Theory.

1423 % if have a single appendix:

1424 %\appendix[Proof of the Zonklar Equations]

1425 % or

1426 %\appendix % for no appendix heading

1427 % do not use \section anymore after \appendix, only \section*

1428 % is possibly needed

1430 % use appendices with more than one appendix

1431 % then use \section to start each appendix

1432 % you must declare a \section before using any

1433 % \subsection or using \label (\appendices by itself

1434 % starts a section numbered zero.)

1435 %

1438 %\appendices

1439 %\section{Proof of the First Zonklar Equation}

1440 %Appendix one text goes here.

1441 %

1442 %% you can choose not to have a title for an appendix

1443 %% if you want by leaving the argument blank

1444 %\section{}

1445 %Appendix two text goes here.

1448 % use section* for acknowledgement

1452 Foremostly, we are very grateful for detailed input on specific go styles

1454 We appreciate X reviewing our paper, and helpful comments on our general methodology

1458 for the original research idea and acknowledge major inspiration

1462 % Can use something like this to put references on a page

1463 % by themselves when using endfloat and the captionsoff option.

1464 \ifCLASSOPTIONcaptionsoff

1465 \newpage

1466 \fi

1470 % trigger a \newpage just before the given reference

1471 % number - used to balance the columns on the last page

1472 % adjust value as needed - may need to be readjusted if

1473 % the document is modified later

1474 %\IEEEtriggeratref{8}

1475 % The "triggered" command can be changed if desired:

1476 %\IEEEtriggercmd{\enlargethispage{-5in}}

1478 % references section

1480 % can use a bibliography generated by BibTeX as a .bbl file

1481 % BibTeX documentation can be easily obtained at:

1482 % http://www.ctan.org/tex-archive/biblio/bibtex/contrib/doc/

1483 % The IEEEtran BibTeX style support page is at:

1484 % http://www.michaelshell.org/tex/ieeetran/bibtex/

1486 % argument is your BibTeX string definitions and bibliography database(s)

1488 %

1489 % <OR> manually copy in the resultant .bbl file

1490 % set second argument of \begin to the number of references

1491 % (used to reserve space for the reference number labels box)

1492 %\begin{thebibliography}{1}

1493 %

1494 %\bibitem{MasterMCTS}

1495 %

1496 %\end{thebibliography}

1498 % biography section

1499 %

1500 % If you have an EPS/PDF photo (graphicx package needed) extra braces are

1501 % needed around the contents of the optional argument to biography to prevent

1502 % the LaTeX parser from getting confused when it sees the complicated

1503 % \includegraphics command within an optional argument. (You could create

1504 % your own custom macro containing the \includegraphics command to make things

1505 % simpler here.)

1506 %\begin{biography}[{\includegraphics[width=1in,height=1.25in,clip,keepaspectratio]{mshell}}]{Michael Shell}

1507 % or if you just want to reserve a space for a photo:

1509 %\begin{IEEEbiography}{Petr Baudi\v{s}}

1510 %Biography text here.

1511 %\end{IEEEbiography}

1513 % if you will not have a photo at all:

1515 Received BSc degree in Informatics at Charles University, Prague in 2009,

1516 currently a graduate student.

1517 Doing research in the fields of Computer Go, Monte Carlo Methods

1518 and Version Control Systems.

1519 Plays Go with the rank of 2-kyu on European tournaments

1520 and 2-dan on the KGS Go Server.

1524 Received BSc degree in Informatics at Charles University, Prague in 2009,

1525 currently a graduate student.

1526 Doing research in the fields of Genetic Algorithms and Cognitive Sciences.

1527 TODO TODO TODO

1530 % insert where needed to balance the two columns on the last page with

1531 % biographies

1532 %\newpage

1534 %\begin{IEEEbiographynophoto}{Jane Doe}

1535 %Biography text here.

1536 %\end{IEEEbiographynophoto}

1538 % You can push biographies down or up by placing

1539 % a \vfill before or after them. The appropriate

1540 % use of \vfill depends on what kind of text is

1541 % on the last page and whether or not the columns

1542 % are being equalized.

1544 %\vfill

1546 % Can be used to pull up biographies so that the bottom of the last one

1547 % is flush with the other column.

1548 %\enlargethispage{-5in}

1552 % that's all folks