From 2b001f98ebed4031008682ef3c097df2a9ff81ce Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Sun, 14 Mar 2010 03:17:35 +0100 Subject: [PATCH] tex: Sociomaps - details --- tex/gostyle.bib | 10 +++++++++- tex/gostyle.tex | 49 +++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/tex/gostyle.bib b/tex/gostyle.bib index 11ea184..eb5011b 100644 --- a/tex/gostyle.bib +++ b/tex/gostyle.bib @@ -107,6 +107,15 @@ url = {http://www.sociomap.com/} } +@book{Sociomaps, + author = {{H}\"{o}schl, {C}yril}, + title = {Visualization of Sociomaps}, + note = {{Bachelor Thesis}}, + year = {2006}, + location = {Praha}, + publisher = {MFF UK}, +} + @inproceedings{SpatPat, author = {Stern, David and Herbrich, Ralf and Graepel, Thore}, title = {Bayesian pattern ranking for move prediction in the game of Go}, @@ -124,7 +133,6 @@ title = { {C}omputing {E}lo {R}atings of {M}ove {P}atterns in the {G}ame of {G}o}, author = {{C}oulom, {R}{\'e}mi}, abstract = {{M}ove patterns are an essential method to incorporate domain knowledge into {G}o-playing programs. {T}his paper presents a new {B}ayesian technique for supervised learning of such patterns from game records, based on a generalization of {E}lo ratings. {E}ach sample move in the training data is considered as a victory of a team of pattern features. {E}lo ratings of individual pattern features are computed from these victories, and can be used in previously unseen positions to compute a probability distribution over legal moves. {I}n this approach, several pattern features may be combined, without an exponential cost in the number of features. {D}espite a very small number of training games (652), this algorithm outperforms most previous pattern-learning algorithms, both in terms of mean log-evidence (−2.69), and prediction rate (34.9%). {A} 19x19 {M}onte-{C}arlo program improved with these patterns reached the level of the strongest classical programs.}, - language = {{A}nglais}, affiliation = {{SEQUEL} - {INRIA} {F}uturs - {INRIA} - {CNRS} : {UMR}8022 - {CNRS} : {UMR}8146 - {U}niversit{\'e} des {S}ciences et {T}echnologies de {L}ille - {L}ille {I} - {U}niversit{\'e} {C}harles de {G}aulle - {L}ille {III} - {E}cole {C}entrale de {L}ille }, booktitle = {{C}omputer {G}ames {W}orkshop }, address = {{A}msterdam {P}ays-{B}as }, diff --git a/tex/gostyle.tex b/tex/gostyle.tex index 6f36a02..c5d5028 100644 --- a/tex/gostyle.tex +++ b/tex/gostyle.tex @@ -493,8 +493,7 @@ To assess the properties of gathered pattern vectors and their influence on playing styles, we process the data by several basic data minining techniques. -The first two methods {\em (analytic)} rely purely on data gathered -from the game collection +The first two methods {\em (analytic)} rely purely on single data set and serve to show internal structure and correlations within the data set. Principal Component Analysis finds orthogonal vector components that @@ -503,9 +502,10 @@ Reversing the process can indicate which patterns correlate with each component. Additionally, PCA can be used as vector preprocessing for methods that are negatively sensitive to pattern vector component correlations. -The~second method of Sociomaps -is TODO TODO TODO. - +The~second method of Sociomaps \cite{Sociomaps} creates spatial +representation of the data set elements (e.g. players) based on +similarity of their data set features; we can then project other +information on the map to illutrate its connection to the data set. Furthermore, we test several \emph{classification} methods that assign each pattern vector $\vec P$ an \emph{output vector} $\vec O$, @@ -595,7 +595,27 @@ $$ r_{X,Y} = {{\rm cov}(X,Y) \over \sigma_X \sigma_Y} $$ \subsection{Sociomaps} \label{soc} -TODO TODO. +Sociomaps are a general mechanism for visualising possibly assymetric +relationships on a 2D plane such that ordering of the maximum possible +object distances in the dataset is preserved in distances on the plane. + +In our particular case,% +\footnote{A special case of the {\em Subject-to-Object Relation Mapping (STORM)} indirect sociomap.} +we will consider a dataset $\vec S$ of small-dimensional +vectors $\vec s_i$ and determine projection $\varphi$ of all the $\vec s_i$ +to spatial coordinates of an Euclidean plane. +The $\varphi$ projection shall maximize the {\em three-way ordering} criterion: +ordering of any three members in the dataset and on the plane +(by Euclidean metric) must be the same. + +$$ \min_\varphi \sum_{i\ne j\ne k} \Phi(\varphi, i, j, k) $$ +$$ \Phi(\varphi, i, j, k) = \begin{cases} + 1 & \delta(s_i,s_j,s_k) = \delta(\varphi(i),\varphi(j),\varphi(k)) \\ + 0 & \hbox{otherwise} \end{cases} $$ +$$ \delta(a, b, c) = \begin{cases} + 1 & |a-b| > |a-c| \\ + 0 & |a-b| = |a-c| \\ + -1 & |a-b| < |a-c| \end{cases} $$ \subsection{k-nearest Neighbors Classifier} \label{knn} @@ -1249,6 +1269,10 @@ However, we do not use the extended normalization results since they produced noticeably less accurate classifiers in all dimensions, including $\omega$ and $\alpha$. +We believe that the next step in interpreting our analytical results +will be more refined prior information input +and precise analysis of the outputs by Go experts. + \begin{figure}[!t] \centering \includegraphics[width=3.5in,angle=-90]{sociomap} @@ -1270,9 +1294,14 @@ determined using first two PCA dimensions $R_1,R_2$ and their eigenvalues $\lambda_1,\lambda_2$ as their linear combination: $$ h=\lambda_1R_1 + \lambda_2R_2 $$ -We believe that the next step -in interpreting our results will be more refined prior information input -and precise analysis of the outputs by Go experts. +We can observe that the terrain of the sociomap is reasonably +``smooth'', again demonstrating some level of connection between +the style vectors and data-mined information. High countour density +indicates some discrepancy; in case of Takemiya Masaki and Yi Ch'ang-ho, +this seems to be merely an issue of scale, +while the Rui Naiwei --- Gu Li cliff suggests a genuine problem; +we cannot say now whether it is because of imprecise prior information +or bad approximation abilities of our model. \subsection{Style Classification} @@ -1291,7 +1320,7 @@ data. The network's output was afterwards rescaled back to allow for MSE compari All input vectors were preprocessed using PCA, reducing the input dimension from $400$ to $23$. \subsubsection{Cross-validation} -To compare and evaluate both methods, we have performed $5$-fold cross validation \cite{TODO} +To compare and evaluate both methods, we have performed $5$-fold cross validation and compared their performance with a~random classifier. In the $5$-fold cross-validation, we randomly divide the training set (organized by players) into $5$ distinct parts with comparable -- 2.11.4.GIT