clean up, insert first version of Lisp stuff.
[CommonLispStat.git] / Doc / Rossini-RiceU-2009Mar.tex
blob6ee64b5f0b70609ac2023fabe143272ee24b2c5b
1 \documentclass{beamer}
3 \mode<presentation>
5 \usetheme{classic}
6 \setbeamercovered{transparent}
9 \usepackage[english]{babel}
10 \usepackage[latin1]{inputenc}
11 \usepackage{times}
12 \usepackage[T1]{fontenc}
14 \title[CLS]{Common Lisp Statistics}
15 \subtitle{Using History to design better data analysis environments}
16 \author[Rossini]{Anthony~(Tony)~Rossini}
18 \institute[Novartis and University of Washington] % (optional, but mostly needed)
20 Group Head, Modeling and Simulation\\
21 Novartis Pharma AG, Switzerland
22 \and
23 Affiliate Assoc Prof, Biomedical and Health Informatics\\
24 University of Washington, USA}
26 \date[Rice 09]{Rice, Mar 2009}
27 \subject{Statistical Computing Environments}
29 \begin{document}
31 \begin{frame}
32 \titlepage
33 \end{frame}
35 \begin{frame}{Outline}
36 \tableofcontents
37 \end{frame}
39 % Structuring a talk is a difficult task and the following structure
40 % may not be suitable. Here are some rules that apply for this
41 % solution:
43 % - Exactly two or three sections (other than the summary).
44 % - At *most* three subsections per section.
45 % - Talk about 30s to 2min per frame. So there should be between about
46 % 15 and 30 frames, all told.
48 % - A conference audience is likely to know very little of what you
49 % are going to talk about. So *simplify*!
50 % - In a 20min talk, getting the main ideas across is hard
51 % enough. Leave out details, even if it means being less precise than
52 % you think necessary.
53 % - If you omit details that are vital to the proof/implementation,
54 % just say so once. Everybody will be happy with that.
56 \section{Preliminaries}
58 \begin{frame}{Goals for this Talk}{(define, strategic approach,
59 justify)}
61 \begin{itemize}
62 \item To describe the concept of \alert{computable and executable
63 statistics}.
65 \item To demonstrate that \alert{there exists a research program}
66 consisting of simple steps which can increase the use of
67 statistical computing approaches to allow for clear description
68 not only of the numerical characteristics of procedures, but the
69 statistical concepts behind them.
71 \item To justify that the \alert{approach is worthwhile} and
72 represents a staged effort towards \alert{increased use of best
73 practices}.
74 \end{itemize}
75 (unfortunately, the last is still incomplete)
76 \end{frame}
78 \begin{frame}{Philosophically, why Common Lisp?}
79 Philosophically:
80 \begin{itemize}
81 \item Lisp as an ancient ``AI'' language; Statistics as ``artificial
82 intelligence'' (not real intelligence, \alert{humans are too
83 flawed and inconsistent} for Bayesian work to be anything but
84 AI).
85 \item Semantics and context are important: well supported by Lisp
86 paradigms.
87 \item Lisp's parentheses describe single, multi-scale,
88 \alert{complete thought}. See \#1 for why that could make it
89 difficult.
90 \end{itemize}
92 \end{frame}
94 \begin{frame}{Technically, why Common Lisp?}
95 \begin{itemize}
96 \item interactive COMPILED language (``R with a compiler'')
97 \item CLOS is R's S4 object system ``done right''.
98 \item clean semantics
99 \item programs are data, data are programs, leading to
100 \item Most modern computing tools available (XML, WWW technologies)
101 \item ``executable XML''
102 \end{itemize}
103 \end{frame}
105 \subsection{Background}
107 \begin{frame}{Many systems existed concurrently for statistical
108 computing}
110 \begin{itemize}
111 \item LispStat (ViSta, ARC)
112 \item SPSS (BMDP)
113 \item MiniTab
114 \item Stata
115 \item SAS
116 \item Quail
117 \item XGobi (Orca, GGobi, Statistical Reality Engine)
118 \item DataDesk
119 \item R
120 \item Excel
121 \end{itemize}
122 \end{frame}
125 \begin{frame}
126 \frametitle{Semantics and Statistics}
127 \begin{itemize}
128 \item
129 There have been many wonderful talks about the semantic web which \\
130 \alert{demonstrated its coolness} \\
131 while completely \\
132 \alert{failed to demonstrate its usefulness}.\\
133 This talk follows in the tradition of such giants\ldots{}
135 \item
136 Biological informatics support (GO, Entrez) has allowed for
137 precise definitions of concepts in biology.
139 \item It is a shame that a field like statistics, requiring such
140 precision, has less than an imprecise and temporally instable
141 field such as biology\ldots
142 \end{itemize}
143 \end{frame}
146 \subsection{Context}
148 \begin{frame}{Context}{(where I'm coming from, my ``priors'')}
149 \begin{itemize}
150 \item Pharmaceutical Industry
151 \item Modeling and Simulation uses mathematical models/constructs to
152 record beliefs for explication, clinical team alignment, decision
153 support, and quality management.
154 \item My major role at Novartis is to work at the intersection of
155 biomedical informatics, statistics, and mathematical modeling.
156 \item I need a mix of applications and novel research development to
157 solve challenges better, faster, more efficiently.
158 \item Data analysis is a specialized approach to computer
159 programming, \alert{different} than applications programming or
160 systems programming.
161 \item \alert{Nearly all of the research challenges I face today
162 existed for me in academia, and vice-versa.}
163 \end{itemize}
164 \end{frame}
166 \section{Computable and Executable Statistics}
168 \begin{frame}{Can we compute with them?}
170 For the following examples, consider whether one can ``compute''
171 with the information given.
172 \end{frame}
175 \begin{frame}[fragile]{Example 1: Theory\ldots}
176 \label{example1}
177 Let $f(x;\theta)$ describe the likelihood of XX under the following
178 assumptions.
179 \begin{enumerate}
180 \item assumption-1
181 \item assumption-2
182 \end{enumerate}
183 Then if we use the following algorithm:
184 \begin{enumerate}
185 \item step-1
186 \item step-2
187 \end{enumerate}
188 then $\hat{\theta}$ should be $N(0,\hat\sigma^2)$ with the following
189 characteristics\ldots
190 \end{frame}
192 \begin{frame}
193 \frametitle{Can we compute, using this description?}
194 Given the information at hand:
195 \begin{itemize}
196 \item we ought to have a framework for initial coding for the
197 actual simulations (test-first!)
198 \item the implementation is somewhat clear
199 \end{itemize}
200 \end{frame}
202 \begin{frame}[fragile]{Realizing Theory}
203 \small{
204 \begin{verbatim}
205 (define-theorem my-proposed-theorem
206 (:theorem-type '(distribution-properties
207 frequentist
208 likelihood))
209 (:assumes '(assumption-1 assumption-2))
210 (:likelihood-form
211 (defun likelihood (data theta gamma)
212 (exponential-family theta gamma)))
213 (:compute-by
214 '(progn
215 (compute-starting-values thetahat gammahat
216 (until (convergence)
217 (setf convergence
218 (or (step-1 thetahat)
219 (step-2 gammahat)))))))
220 (:claim (assert
221 (and (equal-distribution thetahat normal)
222 (equal-distribution gammahat normal)))))
223 \end{verbatim}
225 \end{frame}
227 \begin{frame}[fragile]{It would be nice to have}
228 \begin{verbatim}
229 (theorem-veracity 'my-proposed-theorem)
230 \end{verbatim}
231 \end{frame}
233 \begin{frame}[fragile]{and why not...?}
234 \begin{verbatim}
235 (when (theorem-veracity
236 'my-proposed-theorem)
237 (write-paper 'my-proposed-theorem
238 :style :JASA
239 :output-format
240 '(LaTeX MSWord)))
241 \end{verbatim}
242 \end{frame}
244 \begin{frame}{Comments}
245 \begin{itemize}
246 \item The general problem is very difficult
247 \item Some progress has been made in small areas of basic statistics
248 \item Areas targetted for medium-term future: resampling methods,
249 likelihood theory and algorithms.
250 \end{itemize}
252 \end{frame}
254 \begin{frame}
255 \frametitle{Example 2: Practice\ldots}
256 \label{example2}
257 The dataset comes from a series of clinical trials. We model the
258 primary endpoint, ``relief'', as a binary random variable. There is
259 a random trial effect on relief as well as severity due to
260 differences in recruitment and inclusion/exclusion criteria.
261 \end{frame}
263 \begin{frame}
264 \frametitle{Can we compute, using this description?}
265 \begin{itemize}
266 \item With a real such description, it is clear what some of the
267 potential models might be for this dataset
268 \item It should be clear how to start thinking of a data dictionary
269 for this problem.
270 \end{itemize}
271 \end{frame}
273 \begin{frame}[fragile]{Can we compute?}
274 \begin{verbatim}
275 (dataset paper-1
276 :context 'clinical-trials
277 :variables '((relief :model-type dependent
278 :distribution binary)
279 (trial :model-type independent
280 :distribution categorical)
281 (disease-severity))
282 :metadata '(inclusion-criteria
283 exclusion-criteria
284 recruitment-rate))
285 (propose-analysis paper-1) ; => '(tables
286 ; (logistic regression))
287 \end{verbatim}
288 \end{frame}
290 \begin{frame}{Example 3: The Round-trip\ldots}
291 \label{example3}
292 The first examples describe ``ideas $\rightarrow$ code''
294 Consider the last time you read someone else's implementation of a
295 statistical procedure (i.e. R package code). When you read the
296 code, could you see:
297 \begin{itemize}
298 \item the assumptions used?
299 \item the algorithm implemented?
300 \item practical guidance for when you might select the algorithm
301 over others?
302 \item practical guidance for when you might select the
303 implementation over others?
304 \end{itemize}
305 These are usually components of any reasonable journal article.
306 \textit{(Q: have you actually read an R package that wasn't yours?)}
307 \end{frame}
309 \begin{frame}{Exercise left to the reader!}
311 (aside: I have been looking at the \textbf{stats} and \textbf{lme4}
312 packages recently -- \textit{for me}, very clear numerically, much
313 less so statistically)
314 \end{frame}
318 \subsection{Literate Programming is insufficient}
320 \begin{frame}{Literate Statistical Practice.}
321 \begin{enumerate}
322 \item Literate Programming applied to data analysis
323 \item among the \alert{most annoying} techniques to integrate into
324 work-flow if one is not perfectly methodological.
325 \item Some tools:
326 \begin{itemize}
327 \item ESS: supports interactive creation of literate programs.
328 \item Sweave: tool which exemplifies reporting context; odfWeave
329 primarily simplifies reporting.
330 \item Roxygen: primarily supports a literate programming
331 documentation style, not a literate data analysis programming
332 style.
333 \end{itemize}
334 \item ROI demonstrated in specialized cases: BioConductor.
335 \item \alert{usually done after the fact} (final step of work-flow)
336 as a documentation/computational reproducibility technique, rarely
337 integrated into work-flow.
338 \end{enumerate}
339 Many contributors to this general theory/approach:
340 Knuth, Claerbout, de Leeuw, Leisch, Gentleman, Temple-Lang,
341 \ldots{}
342 \end{frame}
344 % \begin{frame}
345 % \frametitle{Literate Programming}
346 % \framesubtitle{Why is it not enough?}
348 % Claim: it isn't
349 % \begin{enumerate}
350 % \item used for statistics since mid 90s (Emacs/ESS support in 1997)
351 % \item active popular use with R (Leisch, 2001)
352 % \end{enumerate}
354 % but it provides a work-flow which is difficult and unnatural for many
355 % people (no perceived ROI).
356 % \end{frame}
358 \begin{frame}{Related work}
360 Mathematica Workbooks for mathematics concepts
361 \begin{itemize}
362 \item Mathematical storage and reproducibility, what bout Statistical
363 Concepts?
364 \item Not open, but freely reproducible.
365 \item Some semantics, hopefully this will improve.
366 \end{itemize}
368 Electronic Lab Notebooks for data and the data/data analytics
369 interaction (but not quantitative methodological development).
370 \end{frame}
372 \section{Results/Contribution}
374 \subsection{Claims}
376 % \begin{frame}{Semantic Web}{How do we communicate "things"?}
377 % Recall Monday evening talk: What kinds of communication problems can we have?
378 % \begin{itemize}
379 % \item I say "reinigung", you say "waschen"
380 % \item I say "clean", you say "sauber"
381 % \end{itemize}
382 % In the context of our work, how do we communicate what we've done?
383 % \end{frame}
385 \begin{frame}{Communication in Statistical Practice}{\ldots is essential for \ldots}
386 \begin{itemize}
387 \item finding
388 \item explanations
389 \item agreement
390 \item receiving information
391 \end{itemize}
392 \alert{``machine-readable'' communication/computation lets the
393 computer help} \\
394 Semantic Web is about ``machine-enabled computability''.
395 \end{frame}
397 \begin{frame}
398 \frametitle{Literate Programming}
399 \framesubtitle{Why isn't it enough for Data Analysis?}
401 Only 2 contexts: (executable) code and documentation. Fine for
402 application programming, but for data analysis, we could benefit
403 from:
404 \begin{itemize}
405 \item classification of statistical procedures
406 \item descriptions of assumptions
407 \item pragmatic recommendations
408 \item inheritance of structure through the work-flow of a
409 statistical methodology or data analysis project
410 \item datasets and metadata
411 \end{itemize}
412 Concept: ontologies describing mathematical assumptions, applications
413 of methods, work-flow, and statistical data structures can enable
414 machine communication.
416 (i.e. informatics framework ala biology)
417 \end{frame}
419 \begin{frame} \frametitle{Semantics}
420 \framesubtitle{One definition: description and context}
422 Interoperability is the key, with respect to
423 \begin{itemize}
424 \item ``Finding things''
425 \item Applications and activities with related functionality
426 \begin{itemize}
427 \item moving information from one state to another (paper, journal
428 article, computer program)
429 \item computer programs which implement solutions to similar tasks
430 \end{itemize}
431 \end{itemize}
432 \end{frame}
434 \begin{frame}{Statistical Practice is somewhat restricted}
435 {...but in a good sense, enabling potential for semantics...}
437 There is a restrictable set of intended actions for what can be done
438 -- the critical goal is to be able to make a difference by
439 accelerating activities that should be ``computable'':
440 \begin{itemize}
441 \item restricted natural language processing
442 \item mathematical translation
443 \item common description of activities for simpler programming/data
444 analysis (S approach to objects and methods)
445 \end{itemize}
446 R is a good primitive start (model formulation approach, simple
447 ``programming with data'' paradigm); we should see if we can do
448 better!
449 \end{frame}
452 % \begin{frame}{Semantics}{Capturing Ideas, Concepts, Proposals.}
453 % \begin{itemize}
454 % \item Capturing the historical state and corresponding decisions is
455 % essential for developing improved approaches. A common problem in
456 % ``product development'' (stat research, drug development) is
457 % cycling through the same issues repeatedly.
458 % \item These should be captured semantically
459 % \item Conversion of concepts to computable semantics is sensible
460 % when you need it, difficult without a compelling reasons
461 % \end{itemize}
462 % \end{frame}
465 % \begin{frame}{Lowering the bounds to interactive work.}
466 % \begin{enumerate}
467 % \item Limitations of object-orientation and information-hiding
468 % routines: require context in order to keep the context.
469 % \item Statistical and Data analysis: context is central and obvious.
470 % \end{enumerate}
471 % \end{frame}
474 \subsection{Common Lisp Statistics}
476 \begin{frame}
477 \frametitle{Interactive Programming}
478 \framesubtitle{Everything goes back to being Lisp-like}
479 \begin{itemize}
480 \item Interactive programming (as originating with Lisp): works
481 extremely well for data analysis (Lisp being the original
482 ``programming with data'' language).
483 \item Theories/methods for how to do this are reflected in styles
484 for using R.
485 \end{itemize}
486 \end{frame}
488 \begin{frame}[fragile]
489 \frametitle{Lisp}
491 Lisp (LISt Processor) is different than most high-level computing
492 languages, and is very old (1956). Lisp is built on lists of things
493 which are evaluatable.
494 \begin{verbatim}
495 (functionName data1 data2 data3)
496 \end{verbatim}
497 or ``quoted'':
498 \begin{verbatim}
499 '(functionName data1 data2 data3)
500 \end{verbatim}
501 which is shorthand for
502 \begin{verbatim}
503 (list functionName data1 data2 data3)
504 \end{verbatim}
505 The difference is important -- lists of data (the second/third) are
506 not (yet?!) functions applied to (unencapsulated lists of) data (the first).
507 \end{frame}
509 \begin{frame}
510 \frametitle{Features}
511 \begin{itemize}
512 \item Data and Functions semantically the same
513 \item Natural interactive use through functional programming with
514 side effects
515 \item Batch is a simplification of interactive -- not a special mode!
516 \end{itemize}
517 \end{frame}
520 \subsection{Current Approach / Implementation}
525 \begin{frame}{Computable and Executable Statistics requires}
527 \begin{itemize}
528 \item approaches to describe data and metadata (``data'')
529 \begin{itemize}
530 \item semantic WWW
531 \item metadata management and integration, driving
532 \item data integration
533 \end{itemize}
534 \item approaches to describe data analysis methods (``models'')
535 \begin{itemize}
536 \item quantitatively: many ontologies (AMS, etc), few meeting
537 statistical needs.
538 \item many substantive fields have implementations
539 (bioinformatics, etc) but not well focused.
540 \end{itemize}
541 \item approaches to describe the specific form of interaction
542 (``instances of models'')
543 \begin{itemize}
544 \item Original idea behind ``Literate Statistical Analysis''.
545 \item That idea is suboptimal, more structure needed (not
546 necessarily built upon existing...).
547 \end{itemize}
548 \end{itemize}
549 \end{frame}
551 \begin{frame}[fragile]{Representation: XML and Lisp}{executing your data}
552 Many people are familiar with XML:
553 \begin{verbatim}
554 <name phone="+41793674557">Tony Rossini</name>
555 \end{verbatim}
556 which is shorter in Lisp:
557 \begin{verbatim}
558 (name "Tony Rossini" :phone "+41613674557")
559 \end{verbatim}
560 \begin{itemize}
561 \item Lisp ``parens'', universally hated by unbelievers, are
562 wonderful for denoting when a ``concept is complete''.
563 \item Why can't your data self-execute?
564 \end{itemize}
565 \end{frame}
567 \begin{frame}{Common Lisp Statistics}
568 Ross talked about Lisp. I generally agree. My current
569 research program dates back over 3 years, and:
570 \begin{itemize}
571 \item Originally based on LispStat (reusability)
572 \item Re-factored structure (some numerics worked with a 1990-era code base).
573 \item Current activities:
574 \begin{enumerate}
575 \item numerics redone using CFFI-based BLAS/LAPLACK (cl-blapack)
576 \item matrix interface based on MatLisp
577 \item starting design of a user interface system (interfaces,
578 visuals).
579 \item general framework for model specification (regression,
580 likelihood, ODEs)
581 \item general framework for algorithm specification (bootstrap,
582 MLE, algorithmic data anaylsis methods).
583 \end{enumerate}
584 \end{itemize}
585 \end{frame}
587 \begin{frame}{Common Lisp Statistics}
589 \begin{itemize}
590 \item Implemented using SBCL. Contributed fixes for
591 Clozure/OpenMCL. Goal to target CLISP
592 \item Supports LispStat prototype object system
593 \item Package-based design -- only use the components you need, or
594 the components whose API you like.
595 \end{itemize}
596 \end{frame}
597 \section*{Summary}
599 % \begin{frame}{Delivering Better Data Analyses Faster}
600 % Industrial settings:
601 % \begin{enumerate}
602 % \item Pharmaceutical companies
603 % \item Academic departments
604 % \item Review-centric organizations (Health Authorities, Regulators)
605 % \end{enumerate}
606 % \end{frame}
609 \section{Discussion}
611 \begin{frame}
612 \frametitle{Outlook}
613 \begin{itemize}
614 \item Semantics and Computability have captured a great deal of
615 attention in the informatics and business computing R\&D worlds
616 \item Statistically-driven Decision Making and Knowledge Discovery
617 is, with high likelihood, the next challenging stage after data
618 integration.
619 \item Statistical practice (theory and application) can be enhanced,
620 made more efficient, providing increased benefit to organizations
621 and groups using appropriate methods.
622 \item Lisp as a language, shares characteristics of both Latin
623 (difficult dead language useful for classical training) and German
624 (difficult living language useful for general life). Of course,
625 for some people, they are not difficult.
626 \end{itemize}
628 The research program described in this talk is currently driving the
629 design of CommonLisp Stat, which leverages concepts and approaches
630 from the dead and moribund XLisp-Stat project.
632 \url{http://repo.or.cz/w/CommonLispStat.git/}
634 \url{http://www.github.com/blindglobe/}
636 \end{frame}
638 \begin{frame}{Summary}
640 \begin{itemize}
641 \item In the Pharma industry, it is all about getting the right
642 drugs to the patient faster. Data analysis systems seriously
643 impact this process, being potentially an impediment or an
644 accelerator.
646 \begin{itemize}
647 \item \alert{Information technologies can increase the efficiency
648 of statistical practice}, though innovation change management
649 must be taking into account. (i.e. Statistical practice, while
650 considered by some an ``art form'', can benefit from
651 industrialization).
652 \item \alert{Lisp's features match the basic requirements we need}
653 (dichotomy: programs as data, data as programs). Sales pitch,
654 though...
655 \item Outlook: Lots of work and experimentation to do!
656 \end{itemize}
657 \item {\tiny Gratuitous Advert: We are hiring, have student
658 internships (undergrad, grad students), and a visiting faculty
659 program. Talk with me if possibly interested.}
660 \end{itemize}
661 \end{frame}
664 % % All of the following is optional and typically not needed.
665 % \appendix
668 % \section<presentation>*{\appendixname}
671 % \begin{frame} \frametitle{Complements and Backup}
672 % No more, stop here. Questions? (now or later).
673 % \end{frame}
675 % \begin{frame}{The Industrial Challenge.}{Getting the Consulting Right.}
676 % % - A title should summarize the slide in an understandable fashion
677 % % for anyone how does not follow everything on the slide itself.
679 % \begin{itemize}
680 % \item Recording assumptions for the next data analyst, reviewer.
681 % Use \texttt{itemize} a lot.
682 % \item
683 % Use very short sentences or short phrases.
684 % \end{itemize}
685 % \end{frame}
688 % \begin{frame}{The Industrial Challenge.}{Getting the Right Research Fast.}
689 % % - A title should summarize the slide in an understandable fashion
690 % % for anyone how does not follow everything on the slide itself.
692 % \begin{itemize}
693 % \item
694 % Use \texttt{itemize} a lot.
695 % \item
696 % Use very short sentences or short phrases.
697 % \end{itemize}
698 % \end{frame}
701 % \begin{frame}{Explicating the Work-flow}{QA/QC-based improvements.}
704 % \end{frame}
706 % \section{Motivation}
708 % \subsection{IT Can Speed up Deliverables in Statistical Practice}
710 % \begin{frame}{Our Generic Work-flow and Life-cycle}
711 % {describing most data analytic activities}
712 % Workflow:
713 % \begin{enumerate}
714 % \item Scope out the problem
715 % \item Sketch out a potential solution
716 % \item Implement until road-blocks appear
717 % \item Deliver results
718 % \end{enumerate}
720 % Lifecycle:
721 % \begin{enumerate}
722 % \item paper sketch
723 % \item 1st e-draft of text/code/date (iterate to \#1, discarding)
724 % \item cycle through work
725 % \item publish
726 % \item ``throw-away''
727 % \end{enumerate}
728 % but there is valuble information that could enable the next
729 % generation!
730 % \end{frame}
732 % \begin{frame}[fragile]{Paper $\rightarrow$ Computer $\rightarrow$ Article $\rightarrow$ Computer}{Cut and Paste makes for large errors.}
733 % \begin{itemize}
734 % \item Problems in a regulatory setting
735 % \item Regulatory issues are just ``best practices''
736 % \end{itemize}
738 % Why do we ``copy/paste'', or analogously, restart our work?
740 % pro:
741 % \begin{itemize}
742 % \item every time we repeat, we reinforce the idea in our brain
743 % \item review of ideas can help improve them
744 % \end{itemize}
745 % con:
746 % \begin{itemize}
747 % \item inefficiency
748 % \item introduction of mistakes
749 % \item loss of historical context
750 % \item changes to earlier work (on a different development branch)
751 % can not propagate.
752 % \end{itemize}
753 % \end{frame}
755 % \section{Semantics and Statistical Practice}
758 % \begin{frame}
759 % \frametitle{Statistical Activity Leads to Reports}
760 % \framesubtitle{You read what you know, do you understand it?}
762 % How can we improve the communication of the ideas we have?
764 % Precision of communication?
766 % \end{frame}
770 % \begin{frame} \frametitle{Communication Requires Context}
771 % \framesubtitle{Intentions imply more than one might like...}
773 % \begin{itemize}
774 % \item Consideration of what we might do
775 % \item Applications with related functionality
776 % \end{itemize}
777 % \end{frame}
781 % \begin{frame}
782 % \frametitle{Design Patterns}
783 % \framesubtitle{Supporting Work-flow Transitions}
785 % (joint work with H Wickham): The point of this research program is
786 % not to describe what to do at any particular stage of work, but to
787 % encourage researchers and practitioners to consider how the
788 % translation and transfer of information between stages so that work
789 % is not lost.
791 % Examples of stages in a work-flow:
792 % \begin{itemize}
793 % \item planning, execution, reporting;
794 % \item scoping, illustrative examples or counter examples, algorithmic construction,
795 % article writing.
796 % \item descriptive statistics, preliminary inferential analysis,
797 % model/assumption checking, final inferential analysis,
798 % communication of scientific results
799 % \end{itemize}
800 % Description of work-flows is essential to initiating discussions on
801 % quality/efficiency of approaches to work.
802 % \end{frame}
804 % \section{Design Challenges}
806 % \begin{frame}
807 % \frametitle{Activities are enhanced by support}
809 % \begin{itemize}
810 % \item Mathematical manipulation can be enhanced by symbolic
811 % computation
812 % \item Statistical programming can be enabled by examples and related
813 % algorithm implementation
814 % \item Datasets, to a limited extent, can self-describe.
815 % \end{itemize}
816 % \end{frame}
818 % \begin{frame}
819 % \frametitle{Executable and Computable Science}
821 % Use of algorithms and construction to describe how things work.
823 % Support for agent-based approaches
824 % \end{frame}
827 % \begin{frame}
828 % \frametitle{What is Data? Metadata?}
830 % Data: what we've observed
832 % MetaData: context for observations, enables semantics.
833 % \end{frame}
838 % % \begin{frame}[fragile]
839 % % \frametitle{Defining Variables}
840 % % \framesubtitle{Setting variables}
841 % % \begin{verbatim}
842 % % (setq <variable> <value>)
843 % % \end{verbatim}
844 % % Example:
845 % % \begin{verbatim}
846 % % (setq ess-source-directory
847 % % "/home/rossini/R-src")
848 % % \end{verbatim}
849 % % \end{frame}
851 % % \begin{frame}[fragile]
852 % % \frametitle{Defining on the fly}
853 % % \begin{verbatim}
854 % % (setq ess-source-directory
855 % % (lambda () (file-name-as-directory
856 % % (expand-file-name
857 % % (concat (default-directory)
858 % % ess-suffix "-src")))))
859 % % \end{verbatim}
860 % % (Lambda-expressions are anonymous functions, i.e. ``instant-functions'')
861 % % \end{frame}
864 % % \begin{frame}[fragile]
865 % % \frametitle{Function Reuse}
866 % % By naming the function, we could make the previous example reusable
867 % % (if possible):
868 % % \begin{verbatim}
869 % % (defun my-src-directory ()
870 % % (file-name-as-directory
871 % % (expand-file-name
872 % % (concat (default-directory)
873 % % ess-suffix "-src"))))
874 % % \end{verbatim}
875 % % Example:
876 % % \begin{verbatim}
877 % % (setq ess-source-directory (my-src-directory))
878 % % \end{verbatim}
879 % % \end{frame}
882 % % \begin{frame}
883 % % \frametitle{Equality Among Packages}
884 % % \begin{itemize}
885 % % \item more/less equal can be described specifically through
886 % % overriding imports.
887 % % \end{itemize}
888 % % \end{frame}
891 % \subsection<presentation>*{For Further Reading}
893 % \begin{frame}[allowframebreaks]
894 % \frametitle<presentation>{Related Material}
896 % \begin{thebibliography}{10}
898 % \beamertemplatebookbibitems
899 % % Start with overview books.
901 % \bibitem{LispStat1990}
902 % L.~Tierney
903 % \newblock {\em LispStat}.
905 % \beamertemplatearticlebibitems
906 % % Followed by interesting articles. Keep the list short.
908 % \bibitem{Rossini2001}
909 % AJ.~Rossini
910 % \newblock Literate Statistical Practice
911 % \newblock {\em Proceedings of the Conference on Distributed
912 % Statistical Computing}, 2001.
914 % \bibitem{RossiniLeisch2003}
915 % AJ.~Rossini and F.~Leisch
916 % \newblock Literate Statistical Practice
917 % \newblock {\em Technical Report Series, University of Washington
918 % Department of Biostatistics}, 2003.
920 % \beamertemplatearrowbibitems
921 % % Followed by interesting articles. Keep the list short.
923 % \bibitem{CLS}
924 % Common Lisp Stat, 2008.
925 % \newblock \url{http://repo.or.cz/CommonLispStat.git/}
927 % \end{thebibliography}
928 % \end{frame}
930 \end{document}