From cbfac9b44e5cbcc600aa9e76010e9db2596ae857 Mon Sep 17 00:00:00 2001 From: Joe Moudrik Date: Fri, 24 May 2013 17:45:18 +0200 Subject: [PATCH] clanek: features, ml --- clanek_go_congress/clanek.bib | 17 ++++ clanek_go_congress/clanek.tex | 208 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 223 insertions(+), 2 deletions(-) diff --git a/clanek_go_congress/clanek.bib b/clanek_go_congress/clanek.bib index 94c9ef8..d296b22 100644 --- a/clanek_go_congress/clanek.bib +++ b/clanek_go_congress/clanek.bib @@ -9,6 +9,23 @@ owner = "jm" } +@MastersThesis{Moudrik13, + author = "Josef Moud{\v{r}\'{i}}k", + title = "Meta-learning methods for analyzing Go playing trends", + school = "Charles University, Faculty of Mathematics and Physics", + address = "Prague, Czech Republic", + year = "2013", +} + +@ONLINE{GoAncientChina, + author = "John Fairbairn", + year = "1995", + title = "Go in Ancient China", + url = "http://www.pandanet.co.jp/English/essay/goancientchina.html", + cited = "15.\,1.\,2013", + owner = "jm" +} + @ONLINE{ wiki:Go, author = "Wikipedia", title = "Go (game) --- Wikipedia{,} The Free Encyclopedia", diff --git a/clanek_go_congress/clanek.tex b/clanek_go_congress/clanek.tex index 2bdd5ee..a92c8e5 100644 --- a/clanek_go_congress/clanek.tex +++ b/clanek_go_congress/clanek.tex @@ -1,5 +1,7 @@ \documentclass[12pt,a4paper,notitlepage]{article} +\usepackage[a4paper,vmargin={20mm,20mm},hmargin={20mm,20mm}]{geometry} + %% Použité kódování znaků: obvykle latin2, cp1250 nebo utf8: \usepackage[utf8]{inputenc} @@ -118,17 +120,216 @@ Section~\ref{sec:disc} discusses applications and future work. \section{Feature Extraction} \label{sec:feat} +This section presents the methods for extracting the evaluation +vector (call it $ev$) from a set of games. Because we should +distinguish between both players in any particular game, +each game in the +set is accompanied by the color which specifies our player of +interest. The sample is therefore is regarded as a \emph{set +of colored games}, $GC = \{ (game_1, color_1), ...\}$. For example, +the $color_1$ specifies the player of interest in $game_1$. + +The evaluation vector $ev$ is composed by concatenating several +sub-vectors we call \emph{features} -- examples include the +aforementioned local patterns or statistics of sente and gote +sequences. These will be detailed in the rest of this section. +Some of the explanations are simplified to fit the size of +the paper, please see \citep{Moudrik13} for precise details. + +\subsection{Raw Game Processing} +Firstly, we need to specify how do we process the games. +\footnote{ +We use the standard \emph{.sgf} file format as input, \cite{SGF}. +} +We have used the Pachi Go +Engine~\citep{Pachi} which -- apart +from being quite a good performing Go Bot -- allows to extract +raw information from each game on a per-move basis. +For each move, +Pachi outputs a list of key-value pairs regarding the current move: -\subsection{Game Processing} +\begin{itemize} + \item \textbf{atari flag} --- whether the move put enemy stones in atari, + \item \textbf{atari escape flag} --- whether the move saved own stones from atari, + \item \textbf{capture} --- number of enemy stones the move captured, + \item \textbf{contiguity to last move} --- the gridcular distance (presented + below) from the last move, + \item \textbf{board edge distance} --- the distance from the nearest edge of the board, + \item \textbf{spatial pattern} --- configuration of stones around the played move. +\end{itemize} + +We use this information to compute the higher level features given below. +The spatial pattern pictures positions of stones around the current move up to +a certain distance.\footnote{ + The distance is given by the {\em gridcular} metric +$d(x,y) = |\delta x| + |\delta y| + \max(|\delta x|, |\delta y|)$, which produces +a circle-like structure on the Go board square grid \cite{SpatPat}. +Spatial patterns of sizes 2 to 6 are regarded. +} \subsection{Patterns} -\subsection{Local Sente and Gote Sequences} +The first feature collects a statistics of $N = 400$ most frequently ocurring +spatial patterns (together with both atari flags). The list of the $N$ most frequently +played patterns is computed beforehand from the whole database of games. + +Given a set of of colored games $GC$ we then count how many times was each of the $N$ +patterns played -- thus obtaining a vector $c, |c| = 400$ of counts. +Like this, however, the particular counts $c_i$ increase proportionally to +number of games in $GC$. To maintain invariancy under the number of games in the sample, +a normalization is needed. We do this by dividing the $c$ by $|GC|$, though other schemes +are possible, see \citep{Moudrik13}. + +\subsection{$\omega$-local Sente and Gote Sequences} +Because the concept of sente and gote is very important in real games, we devised +a statistics which tries to capture distribution of sente and gote plays in the games +from the sample. Because deciding what moves are sente or gote can be hard even +for human players, we restricted ourselves to what we call $\omega$-local (sente +and gote) sequences. The simplification has a clear assumption -- the responses to +a sente move are always local. We say, that a move is $\omega$-local (with respect +to the previous move) if its gridcular distance from previous move +is smaller than a fixed number $\omega$. Of course, this assumption might not always +hold, but the feature proves to be useful nonetheless. + +We than partition each game into $\omega$-local sequences (that is, each move in the +sequence is $\omega$-local with respect to its directly previous move) and observe +whether the player who started the sequence is different from the player who ended it. +If it is so, the $\omega$-local sequence is said to be sente for player who started it +because he gets to play somewhere else first (tenuki). Similarly if the player who +started the sequence had to respond at last we say that the sequence is gote for him. +Based on this partitioning, we can count the average number of sente and gote +sequences per game from the sample $GC$. These two numbers, along with their difference, +form the second feature. + \subsection{Border Distance} +The third feature is a two dimensional histogram, counting the average number of moves +in the sample played low or high in different game stages. The original idea was to help +to distinguish between territorial and influence based moves in the opening. + +The first dimension is specified by +the move's border distance, the second one by the number of the current move. The size of each +dimension is given by intervals dividing the domains. +We use +$$ByMoves = \{ \langle1, 10\rangle, \langle 11, 64\rangle, \langle 65,200\rangle, \langle 201, \infty)\}$$ +for the move coordinate -- the motivation is to (very roughly) distinguish +between opening (say first ten moves), early middle game (moves 11-64), middle game +and endgame. +The border distance dimension is given by +$$ByDist = \{ \langle1, 2\rangle, \langle 3 \rangle, \langle4\rangle, \langle 5, \infty)\}$$ +(distinguishing between first 2 lines, 3rd line of territory, 4th line of influence and +higher plays for the rest). + +If we use the $ByMoves$ and $ByDist$ intervals to divide the domains, we obtain a histogram +of total $|ByMoves| * |ByDist| = 16$ field. For each move, we increase the count in the +appropriate histogram fields. In the end, the whole histogram is normalized +to establish invariancy under the number of games scanned by dividing the +histogram elements by $|GC|$. These 16 numbers form the third feature. + \subsection{Captured Stones} +Apart from the border distance feature, we realized a two-dimensional histogram +which counts numbers of captured stones in different game stages. The motivation is +simple -- especially beginners tend to capture stones because ``they could'' instead of +because it is the ''best move''. For example, in the opening such capture might +be a grave mistake. + +As before, one of the dimensions is given by intervals +$$ByMoves = \{ \langle1, 60\rangle, \langle 61, 240\rangle, \langle 241, \infty)\}$$ +which try to specify the game stages (opening, middle game, endgame). +The second dimension has a fixed size of three bins. Along the number of captives +of the player of interest (the first bin), we also count the number of his +opponent's captives (the second bin) and a difference between the two numbers +(the third bin). Together, we obtain a histogram of $|ByMoves| * 3 = 9$ elements. +These 9 numbers (again normalized by dividing by $|GC|$) are the output of the fourth +feature. + \subsection{Win/Loss Statistics} +Finally, we came up with a simple feature which makes statistics of +wins and losses and whether they were by points or by resignation\footnote{ + We disregard forfeited, unfinished or jigo games in this feature + because the frequency of these events is so small it would + require a very large dataset to utilize them reliably. +}. +For example, quite a lot of weak players continues playing already lost games +until the end, mainly because their counting is not very good (they do not +know there is no way to win), while professionals do not hesitate to resign +if they think that nothing can be done. + +For the colored games of $GC$ we count how many times did the player of interest: +\begin{itemize} + \item win standardly, + \item win by resignation, + \item lost standardly, + \item and lost by resignation. +\end{itemize} +Again, we divide these four numbers by $|GC|$ to maintain the invariancy under number of games +in $GC$. Furthermore, for the games won or lost standardly we count: +\begin{itemize} + \item average number of points the player won by for won games, + \item average number of points he lost by for lost games. +\end{itemize} +The six numbers form the last feature. \section{Machine Learning} \label{sec:mach} +So far, we have learned how we can turn a set of coloured games $GC$ into +an evaluation. Now, we are going to study how to utilize the evaulation. +If we are to predict various player attributes, we need some input data +to learn from. Suppose we have a dataset $D$ consisting +of pairs $D=\{ (GC_i, y_i),...\}$, where $GC_i$ +corresponds to a set of colored games of $i$-th player and $y_i$ is the +target attribute. The $y_i$ might be fairly arbitrary, as long as it has +\emph{some} relation to the $GC_i$. For example, $y_i$ might be $i$'s strength. + +Now, lets denote our evaluation process we presented before as $eval$ and +let $ev_i$ be evaluation of $i$-th player, $ev_i = eval(GC_i)$. Then, +we can transform $D$ into $D_{ev} = \{(ev_i, y_i),. \}$, which forms +our training data. +The task of our machine learning algorithm is to generalize the knowledge +from the dataset $D_{ev}$ to predict correct $y_X$ even to previously unseen $GC_X$. +In the case of strength, we might therefore be able to predict strength $y_X$ +of an unknown player $X$ given a set of his games $GC_X$ (from which we can +compute the evaluation $ev_X$). + +In this work, we have used a bagged artificial neural network +to learn the dependency. +Neural networks are a standard technique in machine learning. The network is +composed of simple computational units which are organized in a layered topology. +Please see the monograph by \citet{haykin_nn} to learn more. +We have used a simple feedforward neural network with 20 hidden units, trained +using the RPROP algorithm \citep{Riedmiller1993}. + +The bagging \citep{breimanbag96} is a method which combines an ensemble of +$N$ models (trained on differently sampled data) to improve their +performance and robustness. In this work, we used $N=20$. Please refer to the +paper to learn more about bagging. + +\subsection{Measuring the Performance} +Given a dataset $D_{ev}$, it would be nice to estimate performance of a given machine +learning algorithm (in our case the bagged neural network). A performance measure +allows to compare different algorithms and give estimates of method precision for +unseen inputs. A standard way to do this is to divide the $D_{ev}$ in to training +and testing parts and compute the error of the method on the testing part. + +A commonly used measure is the mean square error ($MSE$) which estimates variance of +the error distribution. We use its square root ($RMSE$) which is an estimate of +standard deviation of the predictions. + +$$ MSE = \frac{1}{|Ts|} \sum_{(ev, y) \in Ts}{ (predict(ev) - y)^2} $$ + +Where the machine learning model $predict$ is trained on the +training data $Tr$ and $Ts$ denotes the testing data. +Now we will describe how do we split the data into testing and training for the +error estimation to be robust. + +\subsubsection*{Cross-Validation} + +Cross-validation is a standard statistical technique for estimation of parameters. +The idea is to split the data into $k$ disjunct subsets (called \emph{folds}), and then +iteratively compose the training and testing sets and measure errors. +In each of the $k$ iterations, $k$-th fold is chosen as the testing data, and +all the remaining $k-1$ folds form the training data. The division into the folds is +done randomly, and so that the folds have approximately the +same size. Refer to~\citep{crossval} to learn more. In this work, we have used 10-fold +cross validation. \section{Experiments and Results} \label{sec:expe} @@ -139,6 +340,9 @@ Section~\ref{sec:disc} discusses applications and future work. \section{Conclusion} \label{sec:conc} +\section{Implementation} +\label{sec:impl} + \bibliographystyle{abbrvnat} \bibliography{clanek} -- 2.11.4.GIT