infrastructure for checking out other packages for inclusion.
[CommonLispStat.git] / Doc / talks / Rossini-RiceU-2009Mar.tex
blob51542303a7293f20d3813e854c9d72c8860224d9
1 \documentclass{beamer}
3 \mode<presentation>
5 \usetheme{classic}
6 \setbeamercovered{transparent}
9 \usepackage[english]{babel}
10 \usepackage[latin1]{inputenc}
11 \usepackage{times}
12 \usepackage[T1]{fontenc}
14 \title[CLS]{Common Lisp Statistics}
15 \subtitle{Using History to design better data analysis environments}
16 \author[Rossini]{Anthony~(Tony)~Rossini}
18 \institute[Novartis and University of Washington] % (optional, but mostly needed)
20 Group Head, Modeling and Simulation\\
21 Novartis Pharma AG, Switzerland
22 \and
23 Affiliate Assoc Prof, Biomedical and Health Informatics\\
24 University of Washington, USA}
26 \date[Rice 09]{Rice, Mar 2009}
27 \subject{Statistical Computing Environments}
29 \begin{document}
31 \begin{frame}
32 \titlepage
33 \end{frame}
35 \begin{frame}{Outline}
36 \tableofcontents
37 \end{frame}
39 % Structuring a talk is a difficult task and the following structure
40 % may not be suitable. Here are some rules that apply for this
41 % solution:
43 % - Exactly two or three sections (other than the summary).
44 % - At *most* three subsections per section.
45 % - Talk about 30s to 2min per frame. So there should be between about
46 % 15 and 30 frames, all told.
48 % - A conference audience is likely to know very little of what you
49 % are going to talk about. So *simplify*!
50 % - In a 20min talk, getting the main ideas across is hard
51 % enough. Leave out details, even if it means being less precise than
52 % you think necessary.
53 % - If you omit details that are vital to the proof/implementation,
54 % just say so once. Everybody will be happy with that.
56 \section{Preliminaries}
58 \subsection{Context}
60 \begin{frame}{Goals for this Talk}{(define, strategic approach,
61 justify)}
63 \begin{itemize}
64 \item To describe the concept of \alert{computable and executable
65 statistics}, placing it in a historical context.
67 \item To demonstrate that \alert{a research program}
68 implemented through simple steps can increase the efficiency of
69 statistical computing approaches by clearly describing both:
70 \begin{itemize}
71 \item numerical characteristics of procedures,
72 \item statistical concepts driving them.
73 \end{itemize}
75 \item To justify that the \alert{approach is worthwhile} and
76 represents a staged effort towards \alert{increased use of best
77 practices}.
78 \end{itemize}
79 (unfortunately, the last is still incomplete)
80 \end{frame}
83 \begin{frame}{Historical Computing Languages}
84 \begin{itemize}
85 \item FORTRAN : FORmula TRANslator. Original numerical computing
86 language, designed for clean implementation of numerical
87 algorithms
88 \item LISP : LISt Processor. Associated with symbolic
89 manipulation, AI, and knowledge approaches
90 \end{itemize}
92 They represent the 2 generalized needs of statistical computing,
93 which could be summarized as
94 \begin{itemize}
95 \item algorithms/numerics,
96 \item elicitation, communication, and generation of knowledge (``data
97 analysis'')
98 \end{itemize}
99 \end{frame}
101 \begin{frame}{Statistical Computing Environments}
103 Past:
104 \begin{itemize}
105 \item SPSS / BMDP / SAS
106 \item S ( S, S-PLUS, R)
107 \item LispStat ( XLispStat, ViSta, ARC , CommonLispStat ) ; QUAIL
108 \item XGobi (Orca / GGobi / Statistical Reality Engine)
109 \item MiniTab
110 \item Stata
111 \item DataDesk
112 \item Augsburg Impressionist series (MANET,
113 \item Excel
114 \end{itemize}
115 many others...
117 \end{frame}
119 \begin{frame}{How many are left?}
121 \begin{itemize}
122 \item R
123 \item SAS
124 \item SPSS
125 \item Stata
126 \item Minitab
127 \item very few others...
128 \end{itemize}
129 ``R is the Microsoft of the statistical computing world'' -- anonymous.
130 \end{frame}
132 \begin{frame}{Selection Pressure}
133 \begin{itemize}
134 \item the R user population is growing rapidly, fueled by critical
135 mass, quality, and value
136 \item R is a great system for applied data analysis
137 \item R is not such a great system for research into statistical
138 computing (backwards compatibility, inertia due to user population)
139 \end{itemize}
140 There is a need for alternative experiments for developing new
141 approaches/ideas/concepts.
142 \end{frame}
144 \begin{frame}{Philosophically, why Common Lisp?}
145 Philosophically:
146 \begin{itemize}
147 \item Lisp can cleanly present computational intentions, both
148 symbolically and numerically.
149 \item Semantics and context are important: well supported by Lisp
150 paradigms.
151 \item Lisp's parentheses describe singular, multi-scale,
152 \alert{complete thoughts}.
153 \end{itemize}
155 \end{frame}
157 \begin{frame}{Technically, why Common Lisp?}
158 \begin{itemize}
159 \item interactive COMPILED language (``R with a compiler'')
160 \item CLOS is R's S4 object system ``done right''.
161 \item clean semantics: modality, typing, can be expressed the way
162 one wants it.
163 \item programs are data, data are programs, leading to
164 \item Most modern computing tools available (XML, WWW technologies)
165 \item ``executable XML''
166 \end{itemize}
167 Common Lisp is very close in usage to how people currently use R
168 (mostly interactive, some batch, and a wish for compilation efficiency).
169 \end{frame}
171 \subsection{Background}
173 \begin{frame}
174 \frametitle{Desire: Semantics and Statistics}
175 \begin{itemize}
176 \item The semantic web (content which is self-descriptive) is an
177 interesting and potentially useful idea.
179 \item
180 Biological informatics support (GO, Entrez) has allowed for
181 precise definitions of concepts in biology.
183 \item It is a shame that a field like statistics, requiring such
184 precision, has less than an imprecise and temporally instable
185 field such as biology\ldots
186 \end{itemize}
188 How can we express statistical work (research, applied work) which
189 is both human and computer readable (perhaps subject to
190 transformations first)?
191 \end{frame}
194 % \subsection{Context}
196 % \begin{frame}{Context}{(where I'm coming from, my ``priors'')}
197 % \begin{itemize}
198 % \item Pharmaceutical Industry
199 % \item Modeling and Simulation uses mathematical models/constructs to
200 % record beliefs (biology, pharmacology, clinical science) for
201 % explication, clinical team alignment, decision support, and
202 % quality.
203 % \item My work at Novartis is at the intersection of biomedical
204 % informatics, statistics, and mathematical modeling.
205 % \item As manager: I need a mix of applications and novel research development to
206 % solve our challenges better, faster, more efficiently.
207 % \item Data analysis is a specialized approach to computer
208 % programming, \alert{different} than applications programming or
209 % systems programming.
210 % \end{itemize}
211 % \end{frame}
213 \section{Computable and Executable Statistics}
215 \begin{frame}{Can we compute with them?}
216 3 Examples:
217 \begin{itemize}
218 \item Research
219 \item Consulting
220 \item Reimplementation
221 \end{itemize}
222 Consider whether one can ``compute'' with the information given?
223 \end{frame}
225 \begin{frame}[fragile]{Example 1: Theory\ldots}
226 \label{example1}
227 Let $f(x;\theta)$ describe the likelihood of XX under the following
228 assumptions.
229 \begin{enumerate}
230 \item assumption-1
231 \item assumption-2
232 \end{enumerate}
233 Then if we use the following algorithm:
234 \begin{enumerate}
235 \item step-1
236 \item step-2
237 \end{enumerate}
238 then $\hat{\theta}$ should be $N(0,\hat\sigma^2)$ with the following
239 characteristics\ldots
240 \end{frame}
242 \begin{frame}
243 \frametitle{Can we compute, using this description?}
244 Given the information at hand:
245 \begin{itemize}
246 \item we ought to have a framework for initial coding for the
247 actual simulations (test-first!)
248 \item the implementation is somewhat clear
249 \item We should ask: what theorems have similar assumptions?
250 \item We should ask: what theorems have similar conclusions but
251 different assumptions?
252 \end{itemize}
253 \end{frame}
255 \begin{frame}[fragile]{Realizing Theory}
256 \small{
257 \begin{verbatim}
258 (define-theorem my-proposed-theorem
259 (:theorem-type '(distribution-properties
260 frequentist
261 likelihood))
262 (:assumes '(assumption-1 assumption-2))
263 (:likelihood-form
264 (defun likelihood (data theta gamma)
265 (exponential-family theta gamma)))
266 (:compute-by
267 '(progn
268 (compute-starting-values thetahat gammahat)
269 (until (convergence)
270 (setf convergence
271 (or (step-1 thetahat)
272 (step-2 gammahat))))))
273 (:claim (assert
274 (and (equal-distribution thetahat 'normal)
275 (equal-distribution gammahat 'normal)))))
276 \end{verbatim}
278 \end{frame}
280 \begin{frame}[fragile]{It would be nice to have}
281 \begin{verbatim}
282 (theorem-veracity 'my-proposed-theorem)
283 \end{verbatim}
284 \end{frame}
286 \begin{frame}[fragile]{and why not...?}
287 \begin{verbatim}
288 (when (theorem-veracity
289 'my-proposed-theorem)
290 (write-paper 'my-proposed-theorem
291 :style :JASA
292 :output-format
293 '(LaTeX MSWord)))
294 \end{verbatim}
295 \end{frame}
297 \begin{frame}{Comments}
298 \begin{itemize}
299 \item The general problem is very difficult
300 \item Some progress has been made in small areas of basic
301 statistics: currently working on linear regression (LS-based,
302 Normal-bayesian) and the T-test.
303 \item Areas targetted for medium-term future: resampling methods and
304 similar algorithms.
305 \end{itemize}
307 \end{frame}
309 \begin{frame}
310 \frametitle{Example 2: Practice\ldots}
311 \label{example2}
312 The dataset comes from a series of clinical trials. We model the
313 primary endpoint, ``relief'', as a binary random variable. There is
314 a random trial effect on relief as well as severity due to
315 differences in recruitment and inclusion/exclusion criteria.
316 \end{frame}
318 \begin{frame}
319 \frametitle{Can we compute, using this description?}
320 \begin{itemize}
321 \item With a real such description, it is clear what some of the
322 potential models might be for this dataset
323 \item It should be clear how to start thinking of a data dictionary
324 for this problem.
325 \end{itemize}
326 \end{frame}
328 \begin{frame}[fragile]{Can we compute?}
329 \begin{verbatim}
330 (dataset-metadata paper-1
331 :context 'clinical-trials
332 :variables '((relief :model-type dependent
333 :distribution binary)
334 (trial :model-type independent
335 :distribution categorical)
336 (disease-severity))
337 :metadata '(inclusion-criteria
338 exclusion-criteria
339 recruitment-rate))
340 (propose-analysis paper-1)
341 ; => '(tables
342 ; (logistic regression))
343 \end{verbatim}
344 \end{frame}
346 \begin{frame}{Example 3: The Round-trip\ldots}
347 \label{example3}
348 The first examples describe ``ideas $\rightarrow$ code''
350 Consider the last time you read someone else's implementation of a
351 statistical procedure (i.e. R package code). When you read the
352 code, could you see:
353 \begin{itemize}
354 \item the assumptions used?
355 \item the algorithm implemented?
356 \item practical guidance for when you might select the algorithm
357 over others?
358 \item practical guidance for when you might select the
359 implementation over others?
360 \end{itemize}
361 These are usually components of any reasonable journal article.
362 \textit{(Q: have you actually read an R package that wasn't yours?)}
363 \end{frame}
365 \begin{frame}{Exercise left to the reader!}
367 (aside: I have been looking at the \textbf{stats} and \textbf{lme4}
368 packages recently -- \textit{for me}, very clear numerically, much
369 less so statistically)
370 \end{frame}
374 \subsection{Literate Programming is insufficient}
376 \begin{frame}{Literate Statistical Practice.}
377 \begin{enumerate}
378 \item Literate Programming applied to data analysis (Rossini, 1997/2001)
379 \item among the \alert{most annoying} techniques to integrate into
380 work-flow if one is not perfectly methodological.
381 \item Some tools:
382 \begin{itemize}
383 \item ESS: supports interactive creation of literate programs.
384 \item Sweave: tool which exemplifies reporting context; odfWeave
385 primarily simplifies reporting.
386 \item Roxygen: primarily supports a literate programming
387 documentation style, not a literate data analysis programming
388 style.
389 \end{itemize}
390 \item ROI demonstrated in specialized cases: BioConductor.
391 \item \alert{usually done after the fact} (final step of work-flow)
392 as a documentation/computational reproducibility technique, rarely
393 integrated into work-flow.
394 \end{enumerate}
395 Many contributors:
396 Knuth, Claerbout, Carey, de Leeuw, Leisch, Gentleman, Temple-Lang,
397 \ldots{}
398 \end{frame}
400 \begin{frame}
401 \frametitle{Literate Programming}
402 \framesubtitle{Why isn't it enough for Data Analysis?}
404 Only 2 contexts: (executable) code and documentation. Fine for
405 application programming, but for data analysis, we could benefit
406 from:
407 \begin{itemize}
408 \item classification of statistical procedures
409 \item descriptions of assumptions
410 \item pragmatic recommendations
411 \item inheritance of structure through the work-flow of a
412 statistical methodology or data analysis project
413 \item datasets and metadata
414 \end{itemize}
415 Concept: ontologies describing mathematical assumptions, applications
416 of methods, work-flow, and statistical data structures can enable
417 machine communication.
419 (i.e. informatics framework ala biology)
420 \end{frame}
423 \begin{frame}{Communication in Statistical Practice}{\ldots is essential for \ldots}
424 \begin{itemize}
425 \item finding
426 \item explanations
427 \item agreement
428 \item receiving information
429 \end{itemize}
430 \alert{``machine-readable'' communication/computation lets the
431 computer help} \\
432 Semantic Web is about ``machine-enabled computability''.
433 \end{frame}
435 \begin{frame} \frametitle{Semantics}
436 \framesubtitle{One definition: description and context}
438 Interoperability is the key, with respect to
439 \begin{itemize}
440 \item ``Finding things''
441 \item Applications and activities with related functionality
442 \begin{itemize}
443 \item moving information from one state to another (paper, journal
444 article, computer program)
445 \item computer programs which implement solutions to similar tasks
446 \end{itemize}
447 \end{itemize}
448 \end{frame}
451 \begin{frame}{Statistical Practice is somewhat restricted}
452 {...but in a good sense, enabling potential for semantics...}
454 There is a restrictable set of intended actions for what can be done
455 -- the critical goal is to be able to make a difference by
456 accelerating activities that should be ``computable'':
457 \begin{itemize}
458 \item restricted natural language processing
459 \item mathematical translation
460 \item common description of activities for simpler programming/data
461 analysis (S approach to objects and methods)
462 \end{itemize}
463 R is a good basic start (model formulation approach, simple
464 ``programming with data'' paradigm); we should see if we can do
465 better!
466 \end{frame}
468 \begin{frame}{Computable and Executable Statistics requires}
470 \begin{itemize}
471 \item approaches to describe data and metadata (``data'')
472 \begin{itemize}
473 \item semantic WWW
474 \item metadata management and integration, driving
475 \item data integration
476 \end{itemize}
477 \item approaches to describe data analysis methods (``models'')
478 \begin{itemize}
479 \item quantitatively: many ontologies (AMS, etc), few meeting
480 statistical needs.
481 \item many substantive fields have implementations
482 (bioinformatics, etc) but not well focused.
483 \end{itemize}
484 \item approaches to describe the specific form of interaction
485 (``instances of models'')
486 \begin{itemize}
487 \item Original idea behind ``Literate Statistical Analysis''.
488 \item That idea is suboptimal, more structure needed (not
489 necessarily built upon existing...).
490 \end{itemize}
491 \end{itemize}
492 \end{frame}
494 \subsection{Common Lisp Statistics}
496 \begin{frame}
497 \frametitle{Interactive Programming}
498 \framesubtitle{Everything goes back to being Lisp-like}
499 \begin{itemize}
500 \item Interactive programming (as originating with Lisp): works
501 extremely well for data analysis (Lisp being the original
502 ``programming with data'' language).
503 \item Theories/methods for how to do this are reflected in styles
504 for using R.
505 \end{itemize}
506 \end{frame}
508 \begin{frame}[fragile]
509 \frametitle{Lisp}
511 Lisp (LISt Processor) is different than most high-level computing
512 languages, and is very old (1956). Lisp is built on lists of things
513 which are evaluatable.
514 \begin{verbatim}
515 (functionName data1 data2 data3)
516 \end{verbatim}
517 or ``quoted'':
518 \begin{verbatim}
519 '(functionName data1 data2 data3)
520 \end{verbatim}
521 which is shorthand for
522 \begin{verbatim}
523 (list functionName data1 data2 data3)
524 \end{verbatim}
525 The difference is important -- lists of data (the second/third) are
526 not (yet?!) functions applied to (unencapsulated lists of) data (the first).
527 \end{frame}
529 \begin{frame}
530 \frametitle{Features}
531 \begin{itemize}
532 \item Data and Functions semantically the same
533 \item Natural interactive use through functional programming with
534 side effects
535 \item Batch is a simplification of interactive -- not a special mode!
536 \end{itemize}
537 \end{frame}
541 \begin{frame}[fragile]{Representation: XML and Lisp}{executing your data}
542 Many people are familiar with XML:
543 \begin{verbatim}
544 <name phone="+41793674557">Tony Rossini</name>
545 \end{verbatim}
546 which is shorter in Lisp:
547 \begin{verbatim}
548 (name "Tony Rossini" :phone "+41613674557")
549 \end{verbatim}
550 \begin{itemize}
551 \item Lisp ``parens'', universally hated by unbelievers, are
552 wonderful for denoting when a ``concept is complete''.
553 \item Why can't your data self-execute?
554 \end{itemize}
555 \end{frame}
557 \begin{frame}[fragile]{Numerics with Lisp}
558 \begin{itemize}
559 \item addition of rational numbers and arithmetic
560 \item example for mean
561 \begin{verbatim}
562 (defun mean (x)
563 (checktype x 'vector-like)
564 (/ (loop for i from 0 to (- (nelts *x*) 1)
565 summing (vref *x* i))
566 (nelts *x*)))
567 \end{verbatim}
568 \item example for variance
569 \begin{verbatim}
570 (defun variance (x)
571 (let ((meanx (mean x))
572 (nm1 (1- (nelts x))))
573 (/ (loop for i from 0 to nm1
574 summing (power (- (vref *x* i) meanx) 2)
575 nm1))))
576 \end{verbatim}
577 \item But through macros, \verb+(vref *x* i)+ could be
578 \verb+#V(X[i])+ or your favorite syntax.
579 \end{itemize}
581 \end{frame}
584 \begin{frame}{Common Lisp Statistics 1}
585 \begin{itemize}
586 \item Originally based on LispStat (reusability)
587 \item Re-factored structure (some numerics worked with a 1990-era code base).
588 \item Current activities:
589 \begin{enumerate}
590 \item numerics redone using CFFI-based BLAS/LAPLACK (cl-blapack)
591 \item matrix interface based on MatLisp
592 \item starting design of a user interface system (interfaces,
593 visuals).
594 \item general framework for model specification (regression,
595 likelihood, ODEs)
596 \item general framework for algorithm specification (bootstrap,
597 MLE, algorithmic data anaylsis methods).
598 \end{enumerate}
599 \end{itemize}
600 \end{frame}
602 \begin{frame}{Common Lisp Statistics 2}
604 \begin{itemize}
605 \item Implemented using SBCL. Contributed fixes for
606 Clozure/OpenMCL. Goal to target CLISP
607 \item Supports LispStat prototype object system
608 \item Package-based design -- only use the components you need, or
609 the components whose API you like.
610 \end{itemize}
611 \end{frame}
613 \section{Discussion}
615 \begin{frame}
616 \frametitle{Outlook}
617 \begin{itemize}
618 \item Semantics and Computability have captured a great deal of
619 attention in the informatics and business computing R\&D worlds
620 \item Statistically-driven Decision Making and Knowledge Discovery
621 is, with high likelihood, the next challenging stage after data
622 integration.
623 \item Statistical practice (theory and application) can be enhanced,
624 made more efficient, providing increased benefit to organizations
625 and groups using appropriate methods.
626 \item Lisp as a language, shares characteristics of both Latin
627 (difficult dead language useful for classical training) and German
628 (difficult living language useful for general life). Of course,
629 for some people, they are not difficult.
630 \end{itemize}
632 \end{frame}
634 \begin{frame}
635 The research program described in this talk is currently driving the
636 design of CommonLisp Stat, which leverages concepts and approaches
637 from the dead and moribund LispStat project.
639 \begin{itemize}
640 \item \url{http://repo.or.cz/w/CommonLispStat.git/}
641 \item \url{http://www.github.com/blindglobe/}
642 \end{itemize}
644 \end{frame}
645 \begin{frame}{Final Comment}
647 \begin{itemize}
648 \item In the Pharma industry, it is all about getting the right
649 drugs to the patient faster. Data analysis systems seriously
650 impact this process, being potentially an impediment or an
651 accelerator.
653 \begin{itemize}
654 \item \alert{Information technologies can increase the efficiency
655 of statistical practice}, though innovation change management
656 must be taking into account. (i.e. Statistical practice, while
657 considered by some an ``art form'', can benefit from
658 industrialization).
659 \item \alert{Lisp's features match the basic requirements we need}
660 (dichotomy: programs as data, data as programs). Sales pitch,
661 though...
662 \item Outlook: Lots of work and experimentation to do!
663 \end{itemize}
664 \item {\tiny Gratuitous Advert: We are hiring, have student
665 internships (undergrad, grad students), and a visiting faculty
666 program. Talk with me if possibly interested.}
667 \end{itemize}
668 \end{frame}
671 % % All of the following is optional and typically not needed.
672 % \appendix
675 % \section<presentation>*{\appendixname}
678 % \begin{frame} \frametitle{Complements and Backup}
679 % No more, stop here. Questions? (now or later).
680 % \end{frame}
682 % \begin{frame}{The Industrial Challenge.}{Getting the Consulting Right.}
683 % % - A title should summarize the slide in an understandable fashion
684 % % for anyone how does not follow everything on the slide itself.
686 % \begin{itemize}
687 % \item Recording assumptions for the next data analyst, reviewer.
688 % Use \texttt{itemize} a lot.
689 % \item
690 % Use very short sentences or short phrases.
691 % \end{itemize}
692 % \end{frame}
695 % \begin{frame}{The Industrial Challenge.}{Getting the Right Research Fast.}
696 % % - A title should summarize the slide in an understandable fashion
697 % % for anyone how does not follow everything on the slide itself.
699 % \begin{itemize}
700 % \item
701 % Use \texttt{itemize} a lot.
702 % \item
703 % Use very short sentences or short phrases.
704 % \end{itemize}
705 % \end{frame}
708 % \begin{frame}{Explicating the Work-flow}{QA/QC-based improvements.}
711 % \end{frame}
713 % \section{Motivation}
715 % \subsection{IT Can Speed up Deliverables in Statistical Practice}
717 % \begin{frame}{Our Generic Work-flow and Life-cycle}
718 % {describing most data analytic activities}
719 % Workflow:
720 % \begin{enumerate}
721 % \item Scope out the problem
722 % \item Sketch out a potential solution
723 % \item Implement until road-blocks appear
724 % \item Deliver results
725 % \end{enumerate}
727 % Lifecycle:
728 % \begin{enumerate}
729 % \item paper sketch
730 % \item 1st e-draft of text/code/date (iterate to \#1, discarding)
731 % \item cycle through work
732 % \item publish
733 % \item ``throw-away''
734 % \end{enumerate}
735 % but there is valuble information that could enable the next
736 % generation!
737 % \end{frame}
739 % \begin{frame}[fragile]{Paper $\rightarrow$ Computer $\rightarrow$ Article $\rightarrow$ Computer}{Cut and Paste makes for large errors.}
740 % \begin{itemize}
741 % \item Problems in a regulatory setting
742 % \item Regulatory issues are just ``best practices''
743 % \end{itemize}
745 % Why do we ``copy/paste'', or analogously, restart our work?
747 % pro:
748 % \begin{itemize}
749 % \item every time we repeat, we reinforce the idea in our brain
750 % \item review of ideas can help improve them
751 % \end{itemize}
752 % con:
753 % \begin{itemize}
754 % \item inefficiency
755 % \item introduction of mistakes
756 % \item loss of historical context
757 % \item changes to earlier work (on a different development branch)
758 % can not propagate.
759 % \end{itemize}
760 % \end{frame}
762 % \section{Semantics and Statistical Practice}
765 % \begin{frame}
766 % \frametitle{Statistical Activity Leads to Reports}
767 % \framesubtitle{You read what you know, do you understand it?}
769 % How can we improve the communication of the ideas we have?
771 % Precision of communication?
773 % \end{frame}
777 % \begin{frame} \frametitle{Communication Requires Context}
778 % \framesubtitle{Intentions imply more than one might like...}
780 % \begin{itemize}
781 % \item Consideration of what we might do
782 % \item Applications with related functionality
783 % \end{itemize}
784 % \end{frame}
788 % \begin{frame}
789 % \frametitle{Design Patterns}
790 % \framesubtitle{Supporting Work-flow Transitions}
792 % (joint work with H Wickham): The point of this research program is
793 % not to describe what to do at any particular stage of work, but to
794 % encourage researchers and practitioners to consider how the
795 % translation and transfer of information between stages so that work
796 % is not lost.
798 % Examples of stages in a work-flow:
799 % \begin{itemize}
800 % \item planning, execution, reporting;
801 % \item scoping, illustrative examples or counter examples, algorithmic construction,
802 % article writing.
803 % \item descriptive statistics, preliminary inferential analysis,
804 % model/assumption checking, final inferential analysis,
805 % communication of scientific results
806 % \end{itemize}
807 % Description of work-flows is essential to initiating discussions on
808 % quality/efficiency of approaches to work.
809 % \end{frame}
811 % \section{Design Challenges}
813 % \begin{frame}
814 % \frametitle{Activities are enhanced by support}
816 % \begin{itemize}
817 % \item Mathematical manipulation can be enhanced by symbolic
818 % computation
819 % \item Statistical programming can be enabled by examples and related
820 % algorithm implementation
821 % \item Datasets, to a limited extent, can self-describe.
822 % \end{itemize}
823 % \end{frame}
825 % \begin{frame}
826 % \frametitle{Executable and Computable Science}
828 % Use of algorithms and construction to describe how things work.
830 % Support for agent-based approaches
831 % \end{frame}
834 % \begin{frame}
835 % \frametitle{What is Data? Metadata?}
837 % Data: what we've observed
839 % MetaData: context for observations, enables semantics.
840 % \end{frame}
845 % % \begin{frame}[fragile]
846 % % \frametitle{Defining Variables}
847 % % \framesubtitle{Setting variables}
848 % % \begin{verbatim}
849 % % (setq <variable> <value>)
850 % % \end{verbatim}
851 % % Example:
852 % % \begin{verbatim}
853 % % (setq ess-source-directory
854 % % "/home/rossini/R-src")
855 % % \end{verbatim}
856 % % \end{frame}
858 % % \begin{frame}[fragile]
859 % % \frametitle{Defining on the fly}
860 % % \begin{verbatim}
861 % % (setq ess-source-directory
862 % % (lambda () (file-name-as-directory
863 % % (expand-file-name
864 % % (concat (default-directory)
865 % % ess-suffix "-src")))))
866 % % \end{verbatim}
867 % % (Lambda-expressions are anonymous functions, i.e. ``instant-functions'')
868 % % \end{frame}
871 % % \begin{frame}[fragile]
872 % % \frametitle{Function Reuse}
873 % % By naming the function, we could make the previous example reusable
874 % % (if possible):
875 % % \begin{verbatim}
876 % % (defun my-src-directory ()
877 % % (file-name-as-directory
878 % % (expand-file-name
879 % % (concat (default-directory)
880 % % ess-suffix "-src"))))
881 % % \end{verbatim}
882 % % Example:
883 % % \begin{verbatim}
884 % % (setq ess-source-directory (my-src-directory))
885 % % \end{verbatim}
886 % % \end{frame}
889 % % \begin{frame}
890 % % \frametitle{Equality Among Packages}
891 % % \begin{itemize}
892 % % \item more/less equal can be described specifically through
893 % % overriding imports.
894 % % \end{itemize}
895 % % \end{frame}
898 % \subsection<presentation>*{For Further Reading}
900 % \begin{frame}[allowframebreaks]
901 % \frametitle<presentation>{Related Material}
903 % \begin{thebibliography}{10}
905 % \beamertemplatebookbibitems
906 % % Start with overview books.
908 % \bibitem{LispStat1990}
909 % L.~Tierney
910 % \newblock {\em LispStat}.
912 % \beamertemplatearticlebibitems
913 % % Followed by interesting articles. Keep the list short.
915 % \bibitem{Rossini2001}
916 % AJ.~Rossini
917 % \newblock Literate Statistical Practice
918 % \newblock {\em Proceedings of the Conference on Distributed
919 % Statistical Computing}, 2001.
921 % \bibitem{RossiniLeisch2003}
922 % AJ.~Rossini and F.~Leisch
923 % \newblock Literate Statistical Practice
924 % \newblock {\em Technical Report Series, University of Washington
925 % Department of Biostatistics}, 2003.
927 % \beamertemplatearrowbibitems
928 % % Followed by interesting articles. Keep the list short.
930 % \bibitem{CLS}
931 % Common Lisp Stat, 2008.
932 % \newblock \url{http://repo.or.cz/CommonLispStat.git/}
934 % \end{thebibliography}
935 % \end{frame}
937 \end{document}