From 4c9915c294d766294207f8373b8e77c5e031d94f Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Wed, 10 Mar 2010 22:01:27 +0100 Subject: [PATCH] tex: Chi-squared -> Pearson r --- tex/gostyle.bib | 15 ++++++++------- tex/gostyle.tex | 41 ++++++++++++++++++++++------------------- 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/tex/gostyle.bib b/tex/gostyle.bib index 34acba7..5e74177 100644 --- a/tex/gostyle.bib +++ b/tex/gostyle.bib @@ -55,14 +55,15 @@ } @ARTICLE{Pearson, - author = {R. L. Plackett}, - title = {Karl Pearson and the Chi-Squared Test}, - journal = {International Statistical Review}, - year = {1983}, - volume = {51}, - pages = {59--72}, + author = {J. L. Rodgers and W. A. Nicewander}, + title = {Thirteen ways to look at the correlation coefficient}, + journal = {The American Statistician}, + year = {1988}, + month = {Feb}, + volume = {42}, number = {1}, - issn = {03067734}, + pages = {59--66}, + issn = {}, owner = {pasky} } diff --git a/tex/gostyle.tex b/tex/gostyle.tex index 9c4cafd..409a31d 100644 --- a/tex/gostyle.tex +++ b/tex/gostyle.tex @@ -511,14 +511,16 @@ The whole process is described in the Algorithm \ref{alg:pca}. \end{algorithmic} \end{algorithm} -We will want to find dependencies between PCA dimensions and dimensions -of some prior knowledge (player rank, style vector). For this, we use -the well-known {\em Pearson's $\chi^2$ test} \cite{Pearson}; the test -yields the probability of a null hypothesis that two distributions -are statistically independent, we will instead use the probability -of the alternative hypothesis that they are in fact dependent. +\label{pearson} +We will want to find correlations between PCA dimensions and +some prior knowledge (player rank, style vector). +We compute the well-known {\em Pearson product-moment correlation coefficient} \cite{Pearson} +values for this purpose, measuring the strength of the linear dependence% +\footnote{A desirable property of PMCC is that it is invariant to translations and rescaling +of the vectors.} +between the dimensions: -TODO: Chi-square computation. +$$ r_{X,Y} = {{\rm cov}(X,Y) \over \sigma_X \sigma_Y} $$ \subsection{Kohonen Maps} \label{koh} @@ -747,9 +749,8 @@ second dimension was four orders of magnitude smaller, with no discernable structure revealed within the lower-order eigenvectors.} (figure \ref{fig:strength_pca}). -In order to measure the accuracy of approximation of strength by the first dimension, -we have used the $\chi^2$ test, yielding probability $p=TODO$ that it is dependent -on the player strength. +We measure the accuracy of strength approximation by the first dimension +using Pearson's $r$ (see \ref{pearson}), yielding satisfying value $r=TODO$. Using the eigenvector position directly for classification of players within the test group yields MSE TODO, thus providing reasonably satisfying accuracy. @@ -874,16 +875,14 @@ Chen Yaoye & $6.0 \pm 1.0$ & $4.0 \pm 1.0$ & $6.0 \pm 1.0$ & $5.5 \pm \label{fig:style_pca} \end{figure} -We have looked at the three most significant dimensions of the pattern data -yielded by the PCA analysis (fig. \ref{fig:style_pca}). We have again -performend $\chi^2$--test between the three most significant PCA dimensions -and dimensions of the prior knowledge style vectors to find correlations; -the found correlations are presented in table \ref{fig:style_chisq}. -We also list the characteristic spatial patterns of the PCA dimension -extremes (table \ref{fig:style_patterns}). +We have looked at the five most significant dimensions of the pattern data +yielded by the PCA analysis (fig. \ref{fig:style_pca} shows three). +We have again computed the Pearson's $r$ for all combinations of PCA dimensions +and dimensions of the prior knowledge style vectors to find correlations. It is immediately -obvious that by far the most significant vector corresponds very well +obvious both from the measured $r$ and visual observation +that by far the most significant vector corresponds very well to the player territoriality,\footnote{Cho Chikun, perhaps the best-known super-territorial player, is not well visible in the cluster, but he is positioned just below $-0.5$ on the first dimension.} @@ -893,7 +892,11 @@ most obvious in the played shapes and sequences (that can obviously aim directly at taking secure territory or building center-oriented framework). -The other PCA dimensions are far less obvious --- TODO. +Other PCA dimensions are far less to identify and name, but there +certainly is some influence of the styles on the patterns; +the found correlations are presented in table \ref{fig:style_chisq}. +We also list the characteristic spatial patterns of the PCA dimension +extremes (table \ref{fig:style_patterns}). Kohonen map view. -- 2.11.4.GIT