From 1fce0ff0fda9be973dc9d414fa8a5153e87569f8 Mon Sep 17 00:00:00 2001 From: AJ Rossini Date: Mon, 6 Jul 2009 04:56:59 +0200 Subject: [PATCH] Tony's DSC talk, initial useless version --- Doc/talks/Rossini-DSC-July2009.tex | 1096 ++++++++++++++++++++++++++++++++++++ 1 file changed, 1096 insertions(+) create mode 100644 Doc/talks/Rossini-DSC-July2009.tex diff --git a/Doc/talks/Rossini-DSC-July2009.tex b/Doc/talks/Rossini-DSC-July2009.tex new file mode 100644 index 0000000..f1e36fe --- /dev/null +++ b/Doc/talks/Rossini-DSC-July2009.tex @@ -0,0 +1,1096 @@ +\documentclass{beamer} + +\mode +{ + \usetheme{classic} + \setbeamercovered{transparent} +} + +\usepackage[english]{babel} +\usepackage[latin1]{inputenc} +\usepackage{times} +\usepackage[T1]{fontenc} + + +\title[CLS]{Common Lisp Statistics} +\subtitle{Using History to design better data analysis environments} +\author[Rossini]{Anthony~(Tony)~Rossini} + +\institute[Novartis and University of Washington] % (optional, but mostly needed) +{ + Group Head, Modeling and Simulation\\ + Novartis Pharma AG, Switzerland + \and + Affiliate Assoc Prof, Biomedical and Health Informatics\\ + University of Washington, USA} + +\date[DSC2009]{DSC 2009, Copenhagen} +\subject{Statistical Computing Environments} + +\begin{document} + +\begin{frame} + \titlepage +\end{frame} + +\begin{frame}{Outline} + \tableofcontents +\end{frame} + +% Structuring a talk is a difficult task and the following structure +% may not be suitable. Here are some rules that apply for this +% solution: + +% - Exactly two or three sections (other than the summary). +% - At *most* three subsections per section. +% - Talk about 30s to 2min per frame. So there should be between about +% 15 and 30 frames, all told. + +% - A conference audience is likely to know very little of what you +% are going to talk about. So *simplify*! +% - In a 20min talk, getting the main ideas across is hard +% enough. Leave out details, even if it means being less precise than +% you think necessary. +% - If you omit details that are vital to the proof/implementation, +% just say so once. Everybody will be happy with that. + +\section{What Works?} + +\begin{frame}{Silly Visualization Example} +\includegraphics[width=3in,height=3in]{/home/tony/test1.png} +\end{frame} + + +\begin{frame}{Linear Regression} + +\end{frame} + + +\begin{frame}{Descriptives} + +\end{frame} + +\begin{frame}{Data Management} + +\end{frame} + + +\begin{frame}{Workflow Management} + +\end{frame} + + +\section{The Practical} +\label{sec:practice} + + +\begin{frame}[fragile]{Graphics Device} +\begin{verbatim} +(defparameter *frame2* + (as-frame + (create-xlib-image-context 200 200) + :background-color +white+)) +(bind ((#2A((f1 f2) (f3 f4)) + (split-frame *frame2* + (percent 50) (percent 50)))) + (defparameter *f1* f1) ; bottom left + (defparameter *f2* f2) ; bottom right 3 4 + (defparameter *f3* f3) ; top left 1 2 + (defparameter *f4* f4)) ; top right +\end{verbatim} +\end{frame} + +\begin{frame}[fragile]{Functions to Plot} +\begin{verbatim} +(plot-function *f1* #'sin + (interval-of 0 2) + :x-title "x" :y-title "sin(x)") +(plot-function *f2* #'cos (interval-of 0 2) + :x-title "x" :y-title "cos(x)") +(plot-function *f3* #'tan (interval-of 0 2) + :x-title "x" :y-title "tan(x)") +\end{verbatim} +\end{frame} + +\begin{frame}[fragile]{Things to Plot} +\begin{verbatim} +(let* ((n 500) + (xs (num-sequence + :from 0 :to 10 :length n)) + (ys (map 'vector + #'(lambda (x) (+ x 8 (random 4.0))) + xs)) + (weights + (replicate #'(lambda () (1+ (random 10))) + n 'fixnum)) + (da (plot-simple *f4* + (interval-of 0 10) (interval-of 10 20) + :x-title "x" :y-title "y"))) + (draw-symbols da xs ys :weights weights)) +\end{verbatim} +\end{frame} + +\begin{frame}[fragile]{Copying existing graphics} + And we generated the figure on the first page by: +\begin{verbatim} +(xlib-image-context-to-png + (context *f1*) + "/home/tony/test1.png") +\end{verbatim} +\end{frame} + + +\section{Common Lisp Statistics} + +\begin{frame}{Why CLS?} + \begin{itemize} + \item a component-based structure for statistical computing + \item ability to leverage non-statisticians interested in computing + technologies (compilers, protocols, interfaces) + \end{itemize} +\end{frame} + +\begin{frame}{Current Functionality} + \begin{itemize} + \item dataframes (similar to R) + \item Basic regression (similar to XLispStat) + \item matrix storage both in foreign and lisp-centric areas. + \item LAPACK (small percentage, increasing), working with both + matrix storage types + \item static graphics (X11) including preliminary grid functionality based + on CAIRO. Generation of PNG files from graphics windows. + \item CSV file support + \item Common Lisp! + \end{itemize} +\end{frame} + +\begin{frame}{Computational Environment Supported} + \begin{itemize} + \item Should work on Linux, with recent SBCL versions + \item Definitely works on bleeding edge Debian (unstable). + \item Has worked on 4 different people's environments (not quite, + but sort of requires a \verb+/home/tony/+ !) + \item + \end{itemize} +\end{frame} + +\begin{frame}{Common Lisp} + advanced iteration +\end{frame} + + +\begin{frame}[fragile]{Finding out things} + \begin{itemize} + \item CL-NUMLIB + num-sequence :from LOW to: HIGH :length SEQ-LENGTH + seq(from,to,by/length) + \item +\begin{verbatim} +(documentation + 'cl-numlib:num-sequence + 'function) +\end{verbatim} + \item This + \end{itemize} +\end{frame} + + +\section{Computable Statistics} + +\begin{frame}{Why are we doing this?} + Computable and Executable Statistics +\end{frame} + +\begin{frame}{Can we compute with them?} + 3 Examples: + \begin{itemize} + \item Research + \item Consulting + \item Reimplementation + \end{itemize} + Consider whether one can ``compute'' with the information given? +\end{frame} + +\begin{frame}[fragile]{Example 1: Theory\ldots} + \label{example1} + Let $f(x;\theta)$ describe the likelihood of XX under the following + assumptions. + \begin{enumerate} + \item assumption-1 + \item assumption-2 + \end{enumerate} + Then if we use the following algorithm: + \begin{enumerate} + \item step-1 + \item step-2 + \end{enumerate} + then $\hat{\theta}$ should be $N(0,\hat\sigma^2)$ with the following + characteristics\ldots +\end{frame} + +\begin{frame} + \frametitle{Can we compute, using this description?} + Given the information at hand: + \begin{itemize} + \item we ought to have a framework for initial coding for the + actual simulations (test-first!) + \item the implementation is somewhat clear + \item We should ask: what theorems have similar assumptions? + \item We should ask: what theorems have similar conclusions but + different assumptions? + \end{itemize} +\end{frame} + +\begin{frame}[fragile]{Realizing Theory} + \small{ +\begin{verbatim} +(define-theorem my-proposed-theorem + (:theorem-type '(distribution-properties + frequentist + likelihood)) + (:assumes '(assumption-1 assumption-2)) + (:likelihood-form + (defun likelihood (data theta gamma) + (exponential-family theta gamma))) + (:compute-by + '(progn + (compute-starting-values thetahat gammahat) + (until (convergence) + (setf convergence + (or (step-1 thetahat) + (step-2 gammahat)))))) + (:claim (assert + (and (equal-distribution thetahat 'normal) + (equal-distribution gammahat 'normal))))) +\end{verbatim} + } +\end{frame} + +\begin{frame}[fragile]{It would be nice to have} +\begin{verbatim} + (theorem-veracity 'my-proposed-theorem) +\end{verbatim} +\end{frame} + +\begin{frame}[fragile]{and why not...?} +\begin{verbatim} + (when (theorem-veracity + 'my-proposed-theorem) + (write-paper 'my-proposed-theorem + :style :JASA + :output-format + '(LaTeX MSWord))) +\end{verbatim} +\end{frame} + +\begin{frame}{Comments} + \begin{itemize} + \item The general problem is very difficult + \item Some progress has been made in small areas of basic + statistics: currently working on linear regression (LS-based, + Normal-bayesian) and the T-test. + \item Areas targetted for medium-term future: resampling methods and + similar algorithms. + \end{itemize} + +\end{frame} + +\begin{frame} + \frametitle{Example 2: Practice\ldots} + \label{example2} + The dataset comes from a series of clinical trials. We model the + primary endpoint, ``relief'', as a binary random variable. There is + a random trial effect on relief as well as severity due to + differences in recruitment and inclusion/exclusion criteria. +\end{frame} + +\begin{frame} + \frametitle{Can we compute, using this description?} + \begin{itemize} + \item With a real such description, it is clear what some of the + potential models might be for this dataset + \item It should be clear how to start thinking of a data dictionary + for this problem. + \end{itemize} +\end{frame} + +\begin{frame}[fragile]{Can we compute?} +\begin{verbatim} + (dataset-metadata paper-1 + :context 'clinical-trials + :variables '((relief :model-type dependent + :distribution binary) + (trial :model-type independent + :distribution categorical) + (disease-severity)) + :metadata '(inclusion-criteria + exclusion-criteria + recruitment-rate)) + (propose-analysis paper-1) + ; => '(tables + ; (logistic regression)) +\end{verbatim} +\end{frame} + +\begin{frame}{Example 3: The Round-trip\ldots} + \label{example3} + The first examples describe ``ideas $\rightarrow$ code'' + + Consider the last time you read someone else's implementation of a + statistical procedure (i.e. R package code). When you read the + code, could you see: + \begin{itemize} + \item the assumptions used? + \item the algorithm implemented? + \item practical guidance for when you might select the algorithm + over others? + \item practical guidance for when you might select the + implementation over others? + \end{itemize} + These are usually components of any reasonable journal article. + \textit{(Q: have you actually read an R package that wasn't yours?)} +\end{frame} + +\begin{frame}{Exercise left to the reader!} + + (aside: I have been looking at the \textbf{stats} and \textbf{lme4} + packages recently -- \textit{for me}, very clear numerically, much + less so statistically) +\end{frame} + + + +\section{Discussion} + +\begin{frame}{Conclusion} + \begin{itemize} + \item Numerics: Linear algebra basics done -- full development + \end{itemize} +\end{frame} + + +\end{document} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +\section{Preliminaries} + +\subsection{Context} + +\begin{frame}{Goals for this Talk}{(define, strategic approach, + justify)} + + \begin{itemize} + \item To describe the concept of \alert{computable and executable + statistics}, placing it in a historical context. + + \item To demonstrate that \alert{a research program} + implemented through simple steps can increase the efficiency of + statistical computing approaches by clearly describing both: + \begin{itemize} + \item numerical characteristics of procedures, + \item statistical concepts driving them. + \end{itemize} + + \item To justify that the \alert{approach is worthwhile} and + represents a staged effort towards \alert{increased use of best + practices}. + \end{itemize} + (unfortunately, the last is still incomplete) +\end{frame} + + +\begin{frame}{Historical Computing Languages} + \begin{itemize} + \item FORTRAN : FORmula TRANslator. Original numerical computing + language, designed for clean implementation of numerical + algorithms + \item LISP : LISt Processor. Associated with symbolic + manipulation, AI, and knowledge approaches + \end{itemize} + + They represent the 2 generalized needs of statistical computing, + which could be summarized as + \begin{itemize} + \item algorithms/numerics, + \item elicitation, communication, and generation of knowledge (``data + analysis'') + \end{itemize} +\end{frame} + +\begin{frame}{Statistical Computing Environments} + + Past: + \begin{itemize} + \item SPSS / BMDP / SAS + \item S ( S, S-PLUS, R) + \item LispStat ( XLispStat, ViSta, ARC , CommonLispStat ) ; QUAIL + \item XGobi (Orca / GGobi / Statistical Reality Engine) + \item MiniTab + \item Stata + \item DataDesk + \item Augsburg Impressionist series (MANET, + \item Excel + \end{itemize} + many others... + +\end{frame} + +\begin{frame}{How many are left?} + + \begin{itemize} + \item R + \item SAS + \item SPSS + \item Stata + \item Minitab + \item very few others... + \end{itemize} + ``R is the Microsoft of the statistical computing world'' -- anonymous. +\end{frame} + +\begin{frame}{Selection Pressure} + \begin{itemize} + \item the R user population is growing rapidly, fueled by critical + mass, quality, and value + \item R is a great system for applied data analysis + \item R is not such a great system for research into statistical + computing (backwards compatibility, inertia due to user population) + \end{itemize} + There is a need for alternative experiments for developing new + approaches/ideas/concepts. +\end{frame} + +\begin{frame}{Philosophically, why Common Lisp?} + Philosophically: + \begin{itemize} + \item Lisp can cleanly present computational intentions, both + symbolically and numerically. + \item Semantics and context are important: well supported by Lisp + paradigms. + \item Lisp's parentheses describe singular, multi-scale, + \alert{complete thoughts}. + \end{itemize} + +\end{frame} + +\begin{frame}{Technically, why Common Lisp?} + \begin{itemize} + \item interactive COMPILED language (``R with a compiler'') + \item CLOS is R's S4 object system ``done right''. + \item clean semantics: modality, typing, can be expressed the way + one wants it. + \item programs are data, data are programs, leading to + \item Most modern computing tools available (XML, WWW technologies) + \item ``executable XML'' + \end{itemize} + Common Lisp is very close in usage to how people currently use R + (mostly interactive, some batch, and a wish for compilation efficiency). +\end{frame} + +\subsection{Background} + +\begin{frame} + \frametitle{Desire: Semantics and Statistics} + \begin{itemize} + \item The semantic web (content which is self-descriptive) is an + interesting and potentially useful idea. + + \item + Biological informatics support (GO, Entrez) has allowed for + precise definitions of concepts in biology. + + \item It is a shame that a field like statistics, requiring such + precision, has less than an imprecise and temporally instable + field such as biology\ldots + \end{itemize} + + How can we express statistical work (research, applied work) which + is both human and computer readable (perhaps subject to + transformations first)? +\end{frame} + + +% \subsection{Context} + +% \begin{frame}{Context}{(where I'm coming from, my ``priors'')} +% \begin{itemize} +% \item Pharmaceutical Industry +% \item Modeling and Simulation uses mathematical models/constructs to +% record beliefs (biology, pharmacology, clinical science) for +% explication, clinical team alignment, decision support, and +% quality. +% \item My work at Novartis is at the intersection of biomedical +% informatics, statistics, and mathematical modeling. +% \item As manager: I need a mix of applications and novel research development to +% solve our challenges better, faster, more efficiently. +% \item Data analysis is a specialized approach to computer +% programming, \alert{different} than applications programming or +% systems programming. +% \end{itemize} +% \end{frame} + + +\subsection{Literate Programming is insufficient} + +\begin{frame}{Literate Statistical Practice.} + \begin{enumerate} + \item Literate Programming applied to data analysis (Rossini, 1997/2001) + \item among the \alert{most annoying} techniques to integrate into + work-flow if one is not perfectly methodological. + \item Some tools: + \begin{itemize} + \item ESS: supports interactive creation of literate programs. + \item Sweave: tool which exemplifies reporting context; odfWeave + primarily simplifies reporting. + \item Roxygen: primarily supports a literate programming + documentation style, not a literate data analysis programming + style. + \end{itemize} + \item ROI demonstrated in specialized cases: BioConductor. + \item \alert{usually done after the fact} (final step of work-flow) + as a documentation/computational reproducibility technique, rarely + integrated into work-flow. + \end{enumerate} + Many contributors: + Knuth, Claerbout, Carey, de Leeuw, Leisch, Gentleman, Temple-Lang, + \ldots{} +\end{frame} + +\begin{frame} + \frametitle{Literate Programming} + \framesubtitle{Why isn't it enough for Data Analysis?} + + Only 2 contexts: (executable) code and documentation. Fine for + application programming, but for data analysis, we could benefit + from: + \begin{itemize} + \item classification of statistical procedures + \item descriptions of assumptions + \item pragmatic recommendations + \item inheritance of structure through the work-flow of a + statistical methodology or data analysis project + \item datasets and metadata + \end{itemize} + Concept: ontologies describing mathematical assumptions, applications + of methods, work-flow, and statistical data structures can enable + machine communication. + + (i.e. informatics framework ala biology) +\end{frame} + + +\begin{frame}{Communication in Statistical Practice}{\ldots is essential for \ldots} + \begin{itemize} + \item finding + \item explanations + \item agreement + \item receiving information + \end{itemize} + \alert{``machine-readable'' communication/computation lets the + computer help} \\ + Semantic Web is about ``machine-enabled computability''. +\end{frame} + +\begin{frame} \frametitle{Semantics} + \framesubtitle{One definition: description and context} + + Interoperability is the key, with respect to + \begin{itemize} + \item ``Finding things'' + \item Applications and activities with related functionality + \begin{itemize} + \item moving information from one state to another (paper, journal + article, computer program) + \item computer programs which implement solutions to similar tasks + \end{itemize} + \end{itemize} +\end{frame} + + +\begin{frame}{Statistical Practice is somewhat restricted} + {...but in a good sense, enabling potential for semantics...} + + There is a restrictable set of intended actions for what can be done + -- the critical goal is to be able to make a difference by + accelerating activities that should be ``computable'': + \begin{itemize} + \item restricted natural language processing + \item mathematical translation + \item common description of activities for simpler programming/data + analysis (S approach to objects and methods) + \end{itemize} + R is a good basic start (model formulation approach, simple + ``programming with data'' paradigm); we should see if we can do + better! +\end{frame} + +\begin{frame}{Computable and Executable Statistics requires} + + \begin{itemize} + \item approaches to describe data and metadata (``data'') + \begin{itemize} + \item semantic WWW + \item metadata management and integration, driving + \item data integration + \end{itemize} + \item approaches to describe data analysis methods (``models'') + \begin{itemize} + \item quantitatively: many ontologies (AMS, etc), few meeting + statistical needs. + \item many substantive fields have implementations + (bioinformatics, etc) but not well focused. + \end{itemize} + \item approaches to describe the specific form of interaction + (``instances of models'') + \begin{itemize} + \item Original idea behind ``Literate Statistical Analysis''. + \item That idea is suboptimal, more structure needed (not + necessarily built upon existing...). + \end{itemize} + \end{itemize} +\end{frame} + +\subsection{Common Lisp Statistics} + +\begin{frame} + \frametitle{Interactive Programming} + \framesubtitle{Everything goes back to being Lisp-like} + \begin{itemize} + \item Interactive programming (as originating with Lisp): works + extremely well for data analysis (Lisp being the original + ``programming with data'' language). + \item Theories/methods for how to do this are reflected in styles + for using R. + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Lisp} + + Lisp (LISt Processor) is different than most high-level computing + languages, and is very old (1956). Lisp is built on lists of things + which are evaluatable. +\begin{verbatim} +(functionName data1 data2 data3) +\end{verbatim} + or ``quoted'': +\begin{verbatim} +'(functionName data1 data2 data3) +\end{verbatim} + which is shorthand for +\begin{verbatim} +(list functionName data1 data2 data3) +\end{verbatim} + The difference is important -- lists of data (the second/third) are + not (yet?!) functions applied to (unencapsulated lists of) data (the first). +\end{frame} + +\begin{frame} + \frametitle{Features} + \begin{itemize} + \item Data and Functions semantically the same + \item Natural interactive use through functional programming with + side effects + \item Batch is a simplification of interactive -- not a special mode! + \end{itemize} +\end{frame} + + + +\begin{frame}[fragile]{Representation: XML and Lisp}{executing your data} + Many people are familiar with XML: +\begin{verbatim} +Tony Rossini +\end{verbatim} + which is shorter in Lisp: +\begin{verbatim} +(name "Tony Rossini" :phone "+41613674557") +\end{verbatim} + \begin{itemize} + \item Lisp ``parens'', universally hated by unbelievers, are + wonderful for denoting when a ``concept is complete''. + \item Why can't your data self-execute? + \end{itemize} +\end{frame} + +\begin{frame}[fragile]{Numerics with Lisp} + \begin{itemize} + \item addition of rational numbers and arithmetic + \item example for mean +\begin{verbatim} + (defun mean (x) + (checktype x 'vector-like) + (/ (loop for i from 0 to (- (nelts *x*) 1) + summing (vref *x* i)) + (nelts *x*))) +\end{verbatim} + \item example for variance +\begin{verbatim} +(defun variance (x) + (let ((meanx (mean x)) + (nm1 (1- (nelts x)))) + (/ (loop for i from 0 to nm1 + summing (power (- (vref *x* i) meanx) 2) + nm1)))) +\end{verbatim} + \item But through macros, \verb+(vref *x* i)+ could be + \verb+#V(X[i])+ or your favorite syntax. + \end{itemize} + +\end{frame} + + +\begin{frame}{Common Lisp Statistics 1} + \begin{itemize} + \item Originally based on LispStat (reusability) + \item Re-factored structure (some numerics worked with a 1990-era code base). + \item Current activities: + \begin{enumerate} + \item numerics redone using CFFI-based BLAS/LAPLACK (cl-blapack) + \item matrix interface based on MatLisp + \item starting design of a user interface system (interfaces, + visuals). + \item general framework for model specification (regression, + likelihood, ODEs) + \item general framework for algorithm specification (bootstrap, + MLE, algorithmic data anaylsis methods). + \end{enumerate} + \end{itemize} +\end{frame} + +\begin{frame}{Common Lisp Statistics 2} + + \begin{itemize} + \item Implemented using SBCL. Contributed fixes for + Clozure/OpenMCL. Goal to target CLISP + \item Supports LispStat prototype object system + \item Package-based design -- only use the components you need, or + the components whose API you like. + \end{itemize} +\end{frame} + +\section{Discussion} + +\begin{frame} + \frametitle{Outlook} + \begin{itemize} + \item Semantics and Computability have captured a great deal of + attention in the informatics and business computing R\&D worlds + \item Statistically-driven Decision Making and Knowledge Discovery + is, with high likelihood, the next challenging stage after data + integration. + \item Statistical practice (theory and application) can be enhanced, + made more efficient, providing increased benefit to organizations + and groups using appropriate methods. + \item Lisp as a language, shares characteristics of both Latin + (difficult dead language useful for classical training) and German + (difficult living language useful for general life). Of course, + for some people, they are not difficult. + \end{itemize} + +\end{frame} + +\begin{frame} + The research program described in this talk is currently driving the + design of CommonLisp Stat, which leverages concepts and approaches + from the dead and moribund LispStat project. + + \begin{itemize} + \item \url{http://repo.or.cz/w/CommonLispStat.git/} + \item \url{http://www.github.com/blindglobe/} + \end{itemize} + +\end{frame} +\begin{frame}{Final Comment} + + \begin{itemize} + \item In the Pharma industry, it is all about getting the right + drugs to the patient faster. Data analysis systems seriously + impact this process, being potentially an impediment or an + accelerator. + + \begin{itemize} + \item \alert{Information technologies can increase the efficiency + of statistical practice}, though innovation change management + must be taking into account. (i.e. Statistical practice, while + considered by some an ``art form'', can benefit from + industrialization). + \item \alert{Lisp's features match the basic requirements we need} + (dichotomy: programs as data, data as programs). Sales pitch, + though... + \item Outlook: Lots of work and experimentation to do! + \end{itemize} + \item {\tiny Gratuitous Advert: We are hiring, have student + internships (undergrad, grad students), and a visiting faculty + program. Talk with me if possibly interested.} + \end{itemize} +\end{frame} + + +% % All of the following is optional and typically not needed. +% \appendix + + +% \section*{\appendixname} + + +% \begin{frame} \frametitle{Complements and Backup} +% No more, stop here. Questions? (now or later). +% \end{frame} + +% \begin{frame}{The Industrial Challenge.}{Getting the Consulting Right.} +% % - A title should summarize the slide in an understandable fashion +% % for anyone how does not follow everything on the slide itself. + +% \begin{itemize} +% \item Recording assumptions for the next data analyst, reviewer. +% Use \texttt{itemize} a lot. +% \item +% Use very short sentences or short phrases. +% \end{itemize} +% \end{frame} + + +% \begin{frame}{The Industrial Challenge.}{Getting the Right Research Fast.} +% % - A title should summarize the slide in an understandable fashion +% % for anyone how does not follow everything on the slide itself. + +% \begin{itemize} +% \item +% Use \texttt{itemize} a lot. +% \item +% Use very short sentences or short phrases. +% \end{itemize} +% \end{frame} + + +% \begin{frame}{Explicating the Work-flow}{QA/QC-based improvements.} + + +% \end{frame} + +% \section{Motivation} + +% \subsection{IT Can Speed up Deliverables in Statistical Practice} + +% \begin{frame}{Our Generic Work-flow and Life-cycle} +% {describing most data analytic activities} +% Workflow: +% \begin{enumerate} +% \item Scope out the problem +% \item Sketch out a potential solution +% \item Implement until road-blocks appear +% \item Deliver results +% \end{enumerate} + +% Lifecycle: +% \begin{enumerate} +% \item paper sketch +% \item 1st e-draft of text/code/date (iterate to \#1, discarding) +% \item cycle through work +% \item publish +% \item ``throw-away'' +% \end{enumerate} +% but there is valuble information that could enable the next +% generation! +% \end{frame} + +% \begin{frame}[fragile]{Paper $\rightarrow$ Computer $\rightarrow$ Article $\rightarrow$ Computer}{Cut and Paste makes for large errors.} +% \begin{itemize} +% \item Problems in a regulatory setting +% \item Regulatory issues are just ``best practices'' +% \end{itemize} + +% Why do we ``copy/paste'', or analogously, restart our work? + +% pro: +% \begin{itemize} +% \item every time we repeat, we reinforce the idea in our brain +% \item review of ideas can help improve them +% \end{itemize} +% con: +% \begin{itemize} +% \item inefficiency +% \item introduction of mistakes +% \item loss of historical context +% \item changes to earlier work (on a different development branch) +% can not propagate. +% \end{itemize} +% \end{frame} + +% \section{Semantics and Statistical Practice} + + +% \begin{frame} +% \frametitle{Statistical Activity Leads to Reports} +% \framesubtitle{You read what you know, do you understand it?} + +% How can we improve the communication of the ideas we have? + +% Precision of communication? + +% \end{frame} + + + +% \begin{frame} \frametitle{Communication Requires Context} +% \framesubtitle{Intentions imply more than one might like...} + +% \begin{itemize} +% \item Consideration of what we might do +% \item Applications with related functionality +% \end{itemize} +% \end{frame} + + + +% \begin{frame} +% \frametitle{Design Patterns} +% \framesubtitle{Supporting Work-flow Transitions} + +% (joint work with H Wickham): The point of this research program is +% not to describe what to do at any particular stage of work, but to +% encourage researchers and practitioners to consider how the +% translation and transfer of information between stages so that work +% is not lost. + +% Examples of stages in a work-flow: +% \begin{itemize} +% \item planning, execution, reporting; +% \item scoping, illustrative examples or counter examples, algorithmic construction, +% article writing. +% \item descriptive statistics, preliminary inferential analysis, +% model/assumption checking, final inferential analysis, +% communication of scientific results +% \end{itemize} +% Description of work-flows is essential to initiating discussions on +% quality/efficiency of approaches to work. +% \end{frame} + +% \section{Design Challenges} + +% \begin{frame} +% \frametitle{Activities are enhanced by support} + +% \begin{itemize} +% \item Mathematical manipulation can be enhanced by symbolic +% computation +% \item Statistical programming can be enabled by examples and related +% algorithm implementation +% \item Datasets, to a limited extent, can self-describe. +% \end{itemize} +% \end{frame} + +% \begin{frame} +% \frametitle{Executable and Computable Science} + +% Use of algorithms and construction to describe how things work. + +% Support for agent-based approaches +% \end{frame} + + +% \begin{frame} +% \frametitle{What is Data? Metadata?} + +% Data: what we've observed + +% MetaData: context for observations, enables semantics. +% \end{frame} + + + + +% % \begin{frame}[fragile] +% % \frametitle{Defining Variables} +% % \framesubtitle{Setting variables} +% % \begin{verbatim} +% % (setq ) +% % \end{verbatim} +% % Example: +% % \begin{verbatim} +% % (setq ess-source-directory +% % "/home/rossini/R-src") +% % \end{verbatim} +% % \end{frame} + +% % \begin{frame}[fragile] +% % \frametitle{Defining on the fly} +% % \begin{verbatim} +% % (setq ess-source-directory +% % (lambda () (file-name-as-directory +% % (expand-file-name +% % (concat (default-directory) +% % ess-suffix "-src"))))) +% % \end{verbatim} +% % (Lambda-expressions are anonymous functions, i.e. ``instant-functions'') +% % \end{frame} + + +% % \begin{frame}[fragile] +% % \frametitle{Function Reuse} +% % By naming the function, we could make the previous example reusable +% % (if possible): +% % \begin{verbatim} +% % (defun my-src-directory () +% % (file-name-as-directory +% % (expand-file-name +% % (concat (default-directory) +% % ess-suffix "-src")))) +% % \end{verbatim} +% % Example: +% % \begin{verbatim} +% % (setq ess-source-directory (my-src-directory)) +% % \end{verbatim} +% % \end{frame} + + +% % \begin{frame} +% % \frametitle{Equality Among Packages} +% % \begin{itemize} +% % \item more/less equal can be described specifically through +% % overriding imports. +% % \end{itemize} +% % \end{frame} + + +% \subsection*{For Further Reading} + +% \begin{frame}[allowframebreaks] +% \frametitle{Related Material} + +% \begin{thebibliography}{10} + +% \beamertemplatebookbibitems +% % Start with overview books. + +% \bibitem{LispStat1990} +% L.~Tierney +% \newblock {\em LispStat}. + +% \beamertemplatearticlebibitems +% % Followed by interesting articles. Keep the list short. + +% \bibitem{Rossini2001} +% AJ.~Rossini +% \newblock Literate Statistical Practice +% \newblock {\em Proceedings of the Conference on Distributed +% Statistical Computing}, 2001. + +% \bibitem{RossiniLeisch2003} +% AJ.~Rossini and F.~Leisch +% \newblock Literate Statistical Practice +% \newblock {\em Technical Report Series, University of Washington +% Department of Biostatistics}, 2003. + +% \beamertemplatearrowbibitems +% % Followed by interesting articles. Keep the list short. + +% \bibitem{CLS} +% Common Lisp Stat, 2008. +% \newblock \url{http://repo.or.cz/CommonLispStat.git/} + +% \end{thebibliography} +% \end{frame} -- 2.11.4.GIT