Doc/Rossini-RiceU-2009Mar.tex

   1 \documentclass{beamer}
   2
   3 \mode<presentation>
   4 {
   5   \usetheme{classic}
   6   \setbeamercovered{transparent}
   7 }
   8
   9 \usepackage[english]{babel}
  10 \usepackage[latin1]{inputenc}
  11 \usepackage{times}
  12 \usepackage[T1]{fontenc}
  13
  14 \title[CLS]{Common Lisp Statistics}
  15 \subtitle{Using History to design better data analysis environments}
  16 \author[Rossini]{Anthony~(Tony)~Rossini}
  17
  18 \institute[Novartis and University of Washington] % (optional, but mostly needed)
  19 {
  20   Group Head, Modeling and Simulation\\
  21   Novartis Pharma AG, Switzerland
  22   \and
  23   Affiliate Assoc Prof, Biomedical and Health Informatics\\
  24   University of Washington, USA}
  25
  26 \date[Rice 09]{Rice, Mar 2009}
  27 \subject{Statistical Computing Environments}
  28
  29 \begin{document}
  30
  31 \begin{frame}
  32   \titlepage
  33 \end{frame}
  34
  35 \begin{frame}{Outline}
  36   \tableofcontents
  37 \end{frame}
  38
  39 % Structuring a talk is a difficult task and the following structure
  40 % may not be suitable. Here are some rules that apply for this
  41 % solution:
  42
  43 % - Exactly two or three sections (other than the summary).
  44 % - At *most* three subsections per section.
  45 % - Talk about 30s to 2min per frame. So there should be between about
  46 %   15 and 30 frames, all told.
  47
  48 % - A conference audience is likely to know very little of what you
  49 %   are going to talk about. So *simplify*!
  50 % - In a 20min talk, getting the main ideas across is hard
  51 %   enough. Leave out details, even if it means being less precise than
  52 %   you think necessary.
  53 % - If you omit details that are vital to the proof/implementation,
  54 %   just say so once. Everybody will be happy with that.
  55
  56 \section{Preliminaries}
  57
  58 \begin{frame}{Goals for this Talk}{(define, strategic approach,
  59     justify)}
  60
  61   \begin{itemize}
  62   \item To describe the concept of \alert{computable and executable
  63       statistics}.
  64
  65   \item To demonstrate that \alert{there exists a research program}
  66     consisting of simple steps which can increase the use of
  67     statistical computing approaches to allow for clear description
  68     not only of the numerical characteristics of procedures, but the
  69     statistical concepts behind them.
  70
  71   \item To justify that the \alert{approach is worthwhile} and
  72     represents a staged effort towards \alert{increased use of best
  73       practices}.
  74   \end{itemize}
  75   (unfortunately, the last is still incomplete)
  76 \end{frame}
  77
  78 \begin{frame}{Philosophically, why Common Lisp?}
  79   Philosophically:
  80   \begin{itemize}
  81   \item Lisp as an ancient ``AI'' language; Statistics as ``artificial
  82     intelligence'' (not real intelligence, \alert{humans are too
  83       flawed and inconsistent} for Bayesian work to be anything but
  84     AI).
  85   \item Semantics and context are important: well supported by Lisp
  86     paradigms.
  87   \item Lisp's parentheses describe single, multi-scale,
  88     \alert{complete thought}.  See \#1 for why that could make it
  89     difficult.
  90   \end{itemize}
  91
  92 \end{frame}
  93
  94 \begin{frame}{Technically, why Common Lisp?}
  95   \begin{itemize}
  96   \item interactive COMPILED language (``R with a compiler'')
  97   \item CLOS is R's S4 object system ``done right''.
  98   \item clean semantics
  99   \item programs are data, data are programs, leading to
 100   \item Most modern computing tools available (XML, WWW technologies)
 101   \item ``executable XML''
 102   \end{itemize}
 103 \end{frame}
 104
 105 \subsection{Background}
 106
 107 \begin{frame}{Many systems existed concurrently for statistical
 108     computing}
 109
 110   \begin{itemize}
 111   \item LispStat (ViSta, ARC)
 112   \item SPSS (BMDP)
 113   \item MiniTab
 114   \item Stata
 115   \item SAS
 116   \item Quail
 117   \item XGobi (Orca, GGobi, Statistical Reality Engine)
 118   \item DataDesk
 119   \item R
 120   \item Excel
 121   \end{itemize}
 122 \end{frame}
 123
 124
 125 \begin{frame}
 126   \frametitle{Semantics and Statistics}
 127   \begin{itemize}
 128   \item
 129     There have been many wonderful talks about the semantic web which \\
 130     \alert{demonstrated its coolness} \\
 131     while completely \\
 132     \alert{failed to demonstrate its usefulness}.\\
 133     This talk follows in the tradition of such giants\ldots{}
 134
 135   \item
 136     Biological informatics support (GO, Entrez) has allowed for
 137     precise definitions of concepts in biology.
 138
 139   \item It is a shame that a field like statistics, requiring such
 140     precision, has less than an imprecise and temporally instable
 141     field such as biology\ldots
 142   \end{itemize}
 143 \end{frame}
 144
 145
 146 \subsection{Context}
 147
 148 \begin{frame}{Context}{(where I'm coming from, my ``priors'')}
 149   \begin{itemize}
 150   \item Pharmaceutical Industry
 151   \item Modeling and Simulation uses mathematical models/constructs to
 152     record beliefs for explication, clinical team alignment, decision
 153     support, and quality management.
 154   \item My major role at Novartis is to work at the intersection of
 155     biomedical informatics, statistics, and mathematical modeling.
 156   \item I need a mix of applications and novel research development to
 157     solve challenges better, faster, more efficiently.
 158   \item Data analysis is a specialized approach to computer
 159     programming, \alert{different} than applications programming or
 160     systems programming.
 161   \item \alert{Nearly all of the research challenges I face today
 162       existed for me in academia, and vice-versa.}
 163   \end{itemize}
 164 \end{frame}
 165
 166 \section{Computable and Executable Statistics}
 167
 168 \begin{frame}{Can we compute with them?}
 169
 170   For the following examples, consider whether one can ``compute''
 171   with the information given.
 172 \end{frame}
 173
 174
 175 \begin{frame}[fragile]{Example 1: Theory\ldots}
 176   \label{example1}
 177   Let $f(x;\theta)$ describe the likelihood of XX under the following
 178   assumptions.
 179   \begin{enumerate}
 180   \item assumption-1
 181   \item assumption-2
 182   \end{enumerate}
 183   Then if we use the following algorithm:
 184   \begin{enumerate}
 185   \item step-1
 186   \item step-2
 187   \end{enumerate}
 188   then $\hat{\theta}$ should be $N(0,\hat\sigma^2)$ with the following
 189   characteristics\ldots
 190 \end{frame}
 191
 192 \begin{frame}
 193   \frametitle{Can we compute, using this description?}
 194   Given the information at hand:
 195   \begin{itemize}
 196   \item we ought to have a framework for initial coding for the
 197     actual simulations (test-first!)
 198   \item the implementation is somewhat clear
 199   \end{itemize}
 200 \end{frame}
 201
 202 \begin{frame}[fragile]{Realizing Theory}
 203   \small{
 204 \begin{verbatim}
 205 (define-theorem my-proposed-theorem
 206    (:theorem-type '(distribution-properties
 207                     frequentist
 208                     likelihood))
 209    (:assumes '(assumption-1 assumption-2))
 210    (:likelihood-form
 211       (defun likelihood (data theta gamma)
 212         (exponential-family theta gamma)))
 213    (:compute-by
 214       '(progn
 215          (compute-starting-values thetahat gammahat
 216          (until (convergence)
 217            (setf convergence
 218                  (or (step-1 thetahat)
 219                      (step-2 gammahat)))))))
 220    (:claim (assert
 221              (and (equal-distribution thetahat normal)
 222                   (equal-distribution gammahat normal)))))
 223 \end{verbatim}
 224   }
 225 \end{frame}
 226
 227 \begin{frame}[fragile]{It would be nice to have}
 228 \begin{verbatim}
 229    (theorem-veracity 'my-proposed-theorem)
 230 \end{verbatim}
 231 \end{frame}
 232
 233 \begin{frame}[fragile]{and why not...?}
 234 \begin{verbatim}
 235    (when (theorem-veracity
 236               'my-proposed-theorem)
 237       (write-paper 'my-proposed-theorem
 238                    :style :JASA
 239                    :output-format
 240                          '(LaTeX MSWord)))
 241 \end{verbatim}
 242 \end{frame}
 243
 244 \begin{frame}{Comments}
 245   \begin{itemize}
 246   \item The general problem is very difficult
 247   \item Some progress has been made in small areas of basic statistics
 248   \item Areas targetted for medium-term future: resampling methods,
 249     likelihood theory and algorithms.
 250   \end{itemize}
 251
 252 \end{frame}
 253
 254 \begin{frame}
 255   \frametitle{Example 2: Practice\ldots}
 256   \label{example2}
 257   The dataset comes from a series of clinical trials.  We model the
 258   primary endpoint, ``relief'', as a binary random variable.  There is
 259   a random trial effect on relief as well as severity due to
 260   differences in recruitment and inclusion/exclusion criteria.
 261 \end{frame}
 262
 263 \begin{frame}
 264   \frametitle{Can we compute, using this description?}
 265   \begin{itemize}
 266   \item With a real such description, it is clear what some of the
 267     potential models might be for this dataset
 268   \item It should be clear how to start thinking of a data dictionary
 269     for this problem.
 270   \end{itemize}
 271 \end{frame}
 272
 273 \begin{frame}[fragile]{Can we compute?}
 274 \begin{verbatim}
 275   (dataset paper-1
 276     :context 'clinical-trials
 277     :variables '((relief :model-type dependent
 278                          :distribution binary)
 279                  (trial  :model-type independent
 280                          :distribution categorical)
 281                  (disease-severity))
 282     :metadata '(inclusion-criteria
 283                 exclusion-criteria
 284                 recruitment-rate))
 285   (propose-analysis paper-1) ; => '(tables
 286                              ;      (logistic regression))
 287 \end{verbatim}
 288 \end{frame}
 289
 290 \begin{frame}{Example 3: The Round-trip\ldots}
 291   \label{example3}
 292   The first examples describe ``ideas $\rightarrow$ code''
 293
 294   Consider the last time you read someone else's implementation of a
 295   statistical procedure (i.e. R package code).  When you read the
 296   code, could you see:
 297   \begin{itemize}
 298   \item the assumptions used?
 299   \item the algorithm implemented?
 300   \item practical guidance for when you might select the algorithm
 301     over others?
 302   \item practical guidance for when you might select the
 303     implementation over others?
 304   \end{itemize}
 305   These are usually components of any reasonable journal article.
 306   \textit{(Q: have you actually read an R package that wasn't yours?)}
 307 \end{frame}
 308
 309 \begin{frame}{Exercise left to the reader!}
 310
 311   (aside: I have been looking at the \textbf{stats} and \textbf{lme4}
 312   packages recently -- \textit{for me}, very clear numerically, much
 313   less so statistically)
 314 \end{frame}
 315
 316
 317
 318 \subsection{Literate Programming is insufficient}
 319
 320 \begin{frame}{Literate Statistical Practice.}
 321   \begin{enumerate}
 322   \item Literate Programming applied to data analysis
 323   \item among the \alert{most annoying} techniques to integrate into
 324     work-flow if one is not perfectly methodological.
 325   \item Some tools:
 326     \begin{itemize}
 327     \item ESS: supports interactive creation of literate programs.
 328     \item Sweave: tool which exemplifies reporting context; odfWeave
 329       primarily simplifies reporting.
 330     \item Roxygen: primarily supports a literate programming
 331       documentation style, not a literate data analysis programming
 332       style.
 333   \end{itemize}
 334   \item ROI demonstrated in specialized cases: BioConductor.
 335   \item \alert{usually done after the fact} (final step of work-flow)
 336     as a documentation/computational reproducibility technique, rarely
 337     integrated into work-flow.
 338   \end{enumerate}
 339   Many contributors to this general theory/approach:
 340   Knuth, Claerbout, de Leeuw, Leisch, Gentleman, Temple-Lang,
 341   \ldots{}
 342 \end{frame}
 343
 344 % \begin{frame}
 345 %   \frametitle{Literate Programming}
 346 %   \framesubtitle{Why is it not enough?}
 347
 348 %   Claim: it isn't
 349 %   \begin{enumerate}
 350 %   \item used for statistics since mid 90s (Emacs/ESS support in 1997)
 351 %   \item active popular use with R  (Leisch, 2001)
 352 %   \end{enumerate}
 353
 354 %   but it provides a work-flow which is difficult and unnatural for many
 355 %   people (no perceived ROI).
 356 % \end{frame}
 357
 358 \begin{frame}{Related work}
 359
 360   Mathematica Workbooks for mathematics concepts
 361   \begin{itemize}
 362   \item Mathematical storage and reproducibility, what bout Statistical
 363     Concepts?
 364   \item Not open, but freely reproducible.
 365   \item Some semantics, hopefully this will improve.
 366   \end{itemize}
 367
 368   Electronic Lab Notebooks for data and the data/data analytics
 369   interaction (but not quantitative methodological development).
 370 \end{frame}
 371
 372 \section{Results/Contribution}
 373
 374 \subsection{Claims}
 375
 376 % \begin{frame}{Semantic Web}{How do we communicate "things"?}
 377 %   Recall Monday evening talk:   What kinds of communication problems can we have?
 378 %   \begin{itemize}
 379 %   \item I say "reinigung", you say "waschen"
 380 %   \item I say "clean", you say "sauber"
 381 %   \end{itemize}
 382 %   In the context of our work, how do we communicate what we've done?
 383 % \end{frame}
 384
 385 \begin{frame}{Communication in Statistical Practice}{\ldots is essential for \ldots}
 386   \begin{itemize}
 387   \item finding
 388   \item explanations
 389   \item agreement
 390   \item receiving information
 391   \end{itemize}
 392   \alert{``machine-readable'' communication/computation lets the
 393     computer help} \\
 394   Semantic Web is about ``machine-enabled computability''.
 395 \end{frame}
 396
 397 \begin{frame}
 398   \frametitle{Literate Programming}
 399   \framesubtitle{Why isn't it enough for Data Analysis?}
 400
 401   Only 2 contexts: (executable) code and documentation.  Fine for
 402   application programming,  but for data analysis, we could benefit
 403   from:
 404   \begin{itemize}
 405   \item classification of statistical procedures
 406   \item descriptions of assumptions
 407   \item pragmatic recommendations
 408   \item inheritance of structure through the work-flow of a
 409     statistical methodology or data analysis project
 410   \item datasets and metadata
 411   \end{itemize}
 412   Concept: ontologies describing mathematical assumptions, applications
 413   of methods, work-flow, and statistical data structures can enable
 414   machine communication.
 415
 416   (i.e. informatics framework ala biology)
 417 \end{frame}
 418
 419 \begin{frame}  \frametitle{Semantics}
 420   \framesubtitle{One definition: description and context}
 421
 422   Interoperability is the key, with respect to
 423   \begin{itemize}
 424   \item ``Finding things''
 425   \item Applications and activities with related functionality
 426     \begin{itemize}
 427     \item moving information from one state to another (paper, journal
 428       article, computer program)
 429     \item computer programs which implement solutions to similar tasks
 430     \end{itemize}
 431   \end{itemize}
 432 \end{frame}
 433
 434 \begin{frame}{Statistical Practice is somewhat restricted}
 435   {...but in a good sense, enabling potential for semantics...}
 436
 437   There is a restrictable set of intended actions for what can be done
 438   -- the critical goal is to be able to make a difference by
 439   accelerating activities that should be ``computable'':
 440   \begin{itemize}
 441   \item restricted natural language processing
 442   \item mathematical translation
 443   \item common description of activities for simpler programming/data
 444     analysis (S approach to objects and methods)
 445   \end{itemize}
 446   R is a good primitive start (model formulation approach, simple
 447   ``programming with data'' paradigm); we should see if we can do
 448   better!
 449 \end{frame}
 450
 451
 452 % \begin{frame}{Semantics}{Capturing Ideas, Concepts, Proposals.}
 453 %   \begin{itemize}
 454 %   \item Capturing the historical state and corresponding decisions is
 455 %     essential for developing improved approaches.  A common problem in
 456 %     ``product development'' (stat research, drug development) is
 457 %     cycling through the same issues repeatedly.
 458 %   \item These should be captured semantically
 459 %   \item Conversion of concepts to computable semantics is sensible
 460 %     when you need it, difficult without a compelling reasons
 461 %   \end{itemize}
 462 % \end{frame}
 463
 464
 465 % \begin{frame}{Lowering the bounds to interactive work.}
 466 %   \begin{enumerate}
 467 %   \item Limitations of object-orientation and information-hiding
 468 %     routines: require context in order to keep the context.
 469 %   \item Statistical and Data analysis: context is central and obvious.
 470 %   \end{enumerate}
 471 % \end{frame}
 472
 473
 474 \subsection{Common Lisp Statistics}
 475
 476 \begin{frame}
 477   \frametitle{Interactive Programming}
 478   \framesubtitle{Everything goes back to being Lisp-like}
 479   \begin{itemize}
 480   \item Interactive programming (as originating with Lisp): works
 481     extremely well for data analysis (Lisp being the original
 482     ``programming with data'' language).
 483   \item Theories/methods for how to do this are reflected in styles
 484     for using R.
 485   \end{itemize}
 486 \end{frame}
 487
 488 \begin{frame}[fragile]
 489   \frametitle{Lisp}
 490
 491   Lisp (LISt Processor) is different than most high-level computing
 492   languages, and is very old (1956).  Lisp is built on lists of things
 493   which are evaluatable.
 494 \begin{verbatim}
 495 (functionName data1 data2 data3)
 496 \end{verbatim}
 497   or ``quoted'':
 498 \begin{verbatim}
 499 '(functionName data1 data2 data3)
 500 \end{verbatim}
 501   which is shorthand for
 502 \begin{verbatim}
 503 (list functionName data1 data2 data3)
 504 \end{verbatim}
 505   The difference is important -- lists of data (the second/third) are
 506   not (yet?!) functions applied to (unencapsulated lists of) data (the first).
 507 \end{frame}
 508
 509 \begin{frame}
 510   \frametitle{Features}
 511   \begin{itemize}
 512   \item Data and Functions semantically the same
 513   \item Natural interactive use through functional programming with
 514     side effects
 515   \item Batch is a simplification of interactive -- not a special mode!
 516   \end{itemize}
 517 \end{frame}
 518
 519
 520 \subsection{Current Approach / Implementation}
 521
 522
 523
 524
 525 \begin{frame}{Computable and Executable Statistics requires}
 526
 527   \begin{itemize}
 528   \item approaches to describe data and metadata (``data'')
 529     \begin{itemize}
 530     \item semantic WWW
 531     \item metadata management and integration, driving
 532     \item data integration
 533     \end{itemize}
 534   \item approaches to describe data analysis methods (``models'')
 535     \begin{itemize}
 536     \item quantitatively: many ontologies (AMS, etc), few meeting
 537       statistical needs.
 538     \item many substantive fields have implementations
 539       (bioinformatics, etc) but not well focused.
 540     \end{itemize}
 541   \item approaches to describe the specific form of interaction
 542     (``instances of models'')
 543     \begin{itemize}
 544     \item Original idea behind ``Literate Statistical Analysis''.
 545     \item That idea is suboptimal, more structure needed (not
 546       necessarily built upon existing...).
 547     \end{itemize}
 548   \end{itemize}
 549 \end{frame}
 550
 551 \begin{frame}[fragile]{Representation: XML and Lisp}{executing your data}
 552   Many people are familiar with XML:
 553 \begin{verbatim}
 554 <name phone="+41793674557">Tony Rossini</name>
 555 \end{verbatim}
 556   which is shorter in Lisp:
 557 \begin{verbatim}
 558 (name "Tony Rossini" :phone "+41613674557")
 559 \end{verbatim}
 560   \begin{itemize}
 561   \item Lisp ``parens'', universally hated by unbelievers, are
 562     wonderful for denoting when a ``concept is complete''.
 563   \item Why can't your data self-execute?
 564   \end{itemize}
 565 \end{frame}
 566
 567 \begin{frame}{Common Lisp Statistics}
 568   Ross talked about Lisp.   I generally agree.  My current
 569   research program dates back over 3 years, and:
 570   \begin{itemize}
 571   \item Originally based on LispStat (reusability)
 572   \item Re-factored structure (some numerics worked with a 1990-era code base).
 573   \item Current activities:
 574     \begin{enumerate}
 575     \item numerics redone using CFFI-based BLAS/LAPLACK (cl-blapack)
 576     \item matrix interface based on MatLisp
 577     \item starting design of a user interface system (interfaces,
 578       visuals).
 579     \item general framework for model specification (regression,
 580       likelihood, ODEs)
 581     \item general framework for algorithm specification (bootstrap,
 582       MLE, algorithmic data anaylsis methods).
 583     \end{enumerate}
 584   \end{itemize}
 585 \end{frame}
 586
 587 \begin{frame}{Common Lisp Statistics}
 588
 589   \begin{itemize}
 590   \item Implemented using SBCL.  Contributed fixes for
 591     Clozure/OpenMCL. Goal to target CLISP
 592   \item Supports LispStat prototype object system
 593   \item Package-based design -- only use the components you need, or
 594     the components whose API you like.
 595   \end{itemize}
 596 \end{frame}
 597 \section*{Summary}
 598
 599 % \begin{frame}{Delivering Better Data Analyses Faster}
 600 %   Industrial settings:
 601 %   \begin{enumerate}
 602 %   \item Pharmaceutical companies
 603 %   \item Academic departments
 604 %   \item Review-centric organizations (Health Authorities, Regulators)
 605 %   \end{enumerate}
 606 % \end{frame}
 607
 608
 609 \section{Discussion}
 610
 611 \begin{frame}
 612   \frametitle{Outlook}
 613   \begin{itemize}
 614   \item Semantics and Computability have captured a great deal of
 615     attention in the informatics and business computing R\&D worlds
 616   \item Statistically-driven Decision Making and Knowledge Discovery
 617     is, with high likelihood, the next challenging stage after data
 618     integration.
 619   \item Statistical practice (theory and application) can be enhanced,
 620     made more efficient, providing  increased benefit to organizations
 621     and groups using appropriate methods.
 622   \item Lisp as a language, shares characteristics of both Latin
 623     (difficult dead language useful for classical training) and German
 624     (difficult living language useful for general life).  Of course,
 625     for some people, they are not difficult.
 626   \end{itemize}
 627
 628   The research program described in this talk is currently driving the
 629   design of CommonLisp Stat, which leverages concepts and approaches
 630   from the dead and moribund XLisp-Stat project.
 631
 632   \url{http://repo.or.cz/w/CommonLispStat.git/}
 633
 634   \url{http://www.github.com/blindglobe/}
 635
 636 \end{frame}
 637
 638 \begin{frame}{Summary}
 639
 640   \begin{itemize}
 641   \item In the Pharma industry, it is all about getting the right
 642     drugs to the patient faster.  Data analysis systems seriously
 643     impact this process, being potentially an impediment or an
 644     accelerator.
 645
 646     \begin{itemize}
 647     \item \alert{Information technologies can increase the efficiency
 648         of statistical practice}, though innovation change management
 649       must be taking into account.  (i.e. Statistical practice, while
 650       considered by some an ``art form'', can benefit from
 651       industrialization).
 652     \item \alert{Lisp's features match the basic requirements we need}
 653       (dichotomy: programs as data, data as programs).  Sales pitch,
 654       though...
 655     \item Outlook: Lots of work and experimentation to do!
 656     \end{itemize}
 657   \item {\tiny Gratuitous Advert: We are hiring, have student
 658       internships (undergrad, grad students), and a visiting faculty
 659       program.  Talk with me if possibly interested.}
 660   \end{itemize}
 661 \end{frame}
 662
 663
 664 % % All of the following is optional and typically not needed.
 665 % \appendix
 666
 667
 668 % \section<presentation>*{\appendixname}
 669
 670
 671 % \begin{frame} \frametitle{Complements and Backup}
 672 %   No more, stop here.  Questions?  (now or later).
 673 % \end{frame}
 674
 675 % \begin{frame}{The Industrial Challenge.}{Getting the Consulting Right.}
 676 %   % - A title should summarize the slide in an understandable fashion
 677 %   %   for anyone how does not follow everything on the slide itself.
 678
 679 %   \begin{itemize}
 680 %   \item Recording assumptions for the next data analyst, reviewer.
 681 %     Use \texttt{itemize} a lot.
 682 %   \item
 683 %     Use very short sentences or short phrases.
 684 %   \end{itemize}
 685 % \end{frame}
 686
 687
 688 % \begin{frame}{The Industrial Challenge.}{Getting the Right Research Fast.}
 689 %   % - A title should summarize the slide in an understandable fashion
 690 %   %   for anyone how does not follow everything on the slide itself.
 691
 692 %   \begin{itemize}
 693 %   \item
 694 %     Use \texttt{itemize} a lot.
 695 %   \item
 696 %     Use very short sentences or short phrases.
 697 %   \end{itemize}
 698 % \end{frame}
 699
 700
 701 % \begin{frame}{Explicating the Work-flow}{QA/QC-based improvements.}
 702
 703
 704 % \end{frame}
 705
 706 % \section{Motivation}
 707
 708 % \subsection{IT Can Speed up Deliverables in Statistical Practice}
 709
 710 % \begin{frame}{Our Generic Work-flow and Life-cycle}
 711 %   {describing most data analytic activities}
 712 %   Workflow:
 713 %   \begin{enumerate}
 714 %   \item Scope out the problem
 715 %   \item Sketch out a potential solution
 716 %   \item Implement until road-blocks appear
 717 %   \item Deliver results
 718 %   \end{enumerate}
 719
 720 %   Lifecycle:
 721 %   \begin{enumerate}
 722 %   \item paper sketch
 723 %   \item 1st e-draft of text/code/date (iterate to \#1, discarding)
 724 %   \item cycle through work
 725 %   \item publish
 726 %   \item ``throw-away''
 727 %   \end{enumerate}
 728 %   but there is valuble information that could enable the next
 729 %   generation!
 730 % \end{frame}
 731
 732 % \begin{frame}[fragile]{Paper $\rightarrow$ Computer  $\rightarrow$ Article $\rightarrow$ Computer}{Cut and Paste makes for large errors.}
 733 %   \begin{itemize}
 734 %   \item Problems in a regulatory setting
 735 %   \item Regulatory issues are just ``best practices''
 736 %   \end{itemize}
 737
 738 %   Why do we ``copy/paste'', or analogously, restart our work?
 739
 740 %   pro:
 741 %   \begin{itemize}
 742 %   \item every time we repeat, we reinforce the idea in our brain
 743 %   \item review of ideas can help improve them
 744 %   \end{itemize}
 745 %   con:
 746 %   \begin{itemize}
 747 %   \item inefficiency
 748 %   \item introduction of mistakes
 749 %   \item loss of historical context
 750 %   \item changes to earlier work (on a different development branch)
 751 %     can not propagate.
 752 %   \end{itemize}
 753 % \end{frame}
 754
 755 % \section{Semantics and Statistical Practice}
 756
 757
 758 % \begin{frame}
 759 %   \frametitle{Statistical Activity Leads to Reports}
 760 %   \framesubtitle{You read what you know, do you understand it?}
 761
 762 %   How can we improve the communication of the ideas we have?
 763
 764 %   Precision of communication?
 765
 766 % \end{frame}
 767
 768
 769
 770 % \begin{frame}  \frametitle{Communication Requires Context}
 771 %   \framesubtitle{Intentions imply more than one might like...}
 772
 773 %   \begin{itemize}
 774 %   \item Consideration of what we might do
 775 %   \item Applications with related functionality
 776 %   \end{itemize}
 777 % \end{frame}
 778
 779
 780
 781 % \begin{frame}
 782 %   \frametitle{Design Patterns}
 783 %   \framesubtitle{Supporting Work-flow Transitions}
 784
 785 %   (joint work with H Wickham): The point of this research program is
 786 %   not to describe what to do at any particular stage of work, but to
 787 %   encourage researchers and practitioners to consider how the
 788 %   translation and transfer of information between stages so that work
 789 %   is not lost.
 790
 791 %   Examples of stages in a work-flow:
 792 %   \begin{itemize}
 793 %   \item planning, execution, reporting;
 794 %   \item scoping, illustrative examples or counter examples, algorithmic construction,
 795 %     article writing.
 796 %   \item descriptive statistics, preliminary inferential analysis,
 797 %     model/assumption checking, final inferential analysis,
 798 %     communication of scientific results
 799 %   \end{itemize}
 800 %   Description of work-flows is essential to initiating discussions on
 801 %   quality/efficiency of approaches to work.
 802 % \end{frame}
 803
 804 % \section{Design Challenges}
 805
 806 % \begin{frame}
 807 %   \frametitle{Activities are enhanced by support}
 808
 809 %   \begin{itemize}
 810 %   \item Mathematical manipulation can be enhanced by symbolic
 811 %     computation
 812 %   \item Statistical programming can be enabled by examples and related
 813 %     algorithm implementation
 814 %   \item Datasets, to a limited extent, can self-describe.
 815 %   \end{itemize}
 816 % \end{frame}
 817
 818 % \begin{frame}
 819 %   \frametitle{Executable and Computable Science}
 820
 821 %   Use of algorithms and construction to describe how things work.
 822
 823 %   Support for agent-based approaches
 824 % \end{frame}
 825
 826
 827 % \begin{frame}
 828 %   \frametitle{What is Data?  Metadata?}
 829
 830 %   Data: what we've observed
 831
 832 %   MetaData: context for observations, enables semantics.
 833 % \end{frame}
 834
 835
 836
 837
 838 % % \begin{frame}[fragile]
 839 % %   \frametitle{Defining Variables}
 840 % %   \framesubtitle{Setting variables}
 841 % % \begin{verbatim}
 842 % % (setq <variable> <value>)
 843 % % \end{verbatim}
 844 % %   Example:
 845 % % \begin{verbatim}
 846 % % (setq ess-source-directory
 847 % %       "/home/rossini/R-src")
 848 % % \end{verbatim}
 849 % % \end{frame}
 850
 851 % % \begin{frame}[fragile]
 852 % %   \frametitle{Defining on the fly}
 853 % % \begin{verbatim}
 854 % % (setq ess-source-directory
 855 % %    (lambda () (file-name-as-directory
 856 % %          (expand-file-name
 857 % %            (concat (default-directory)
 858 % %                    ess-suffix "-src")))))
 859 % % \end{verbatim}
 860 % %   (Lambda-expressions are anonymous functions, i.e. ``instant-functions'')
 861 % % \end{frame}
 862
 863
 864 % % \begin{frame}[fragile]
 865 % %   \frametitle{Function Reuse}
 866 % %   By naming the function, we could make the previous example reusable
 867 % %   (if possible):
 868 % % \begin{verbatim}
 869 % % (defun my-src-directory ()
 870 % %       (file-name-as-directory
 871 % %          (expand-file-name
 872 % %            (concat (default-directory)
 873 % %                    ess-suffix "-src"))))
 874 % % \end{verbatim}
 875 % %   Example:
 876 % % \begin{verbatim}
 877 % % (setq ess-source-directory (my-src-directory))
 878 % % \end{verbatim}
 879 % % \end{frame}
 880
 881
 882 % % \begin{frame}
 883 % %   \frametitle{Equality Among Packages}
 884 % %   \begin{itemize}
 885 % %   \item more/less equal can be described specifically through
 886 % %     overriding imports.
 887 % %   \end{itemize}
 888 % % \end{frame}
 889
 890
 891 % \subsection<presentation>*{For Further Reading}
 892
 893 % \begin{frame}[allowframebreaks]
 894 %   \frametitle<presentation>{Related Material}
 895
 896 %   \begin{thebibliography}{10}
 897
 898 %   \beamertemplatebookbibitems
 899 %   % Start with overview books.
 900
 901 %   \bibitem{LispStat1990}
 902 %     L.~Tierney
 903 %     \newblock {\em LispStat}.
 904
 905 %   \beamertemplatearticlebibitems
 906 %   % Followed by interesting articles. Keep the list short.
 907
 908 %   \bibitem{Rossini2001}
 909 %     AJ.~Rossini
 910 %     \newblock Literate Statistical Practice
 911 %     \newblock {\em Proceedings of the Conference on Distributed
 912 %       Statistical Computing}, 2001.
 913
 914 %   \bibitem{RossiniLeisch2003}
 915 %     AJ.~Rossini and F.~Leisch
 916 %     \newblock Literate Statistical Practice
 917 %     \newblock {\em Technical Report Series, University of Washington
 918 %       Department of Biostatistics}, 2003.
 919
 920 %   \beamertemplatearrowbibitems
 921 %   % Followed by interesting articles. Keep the list short.
 922
 923 %   \bibitem{CLS}
 924 %     Common Lisp Stat, 2008.
 925 %     \newblock \url{http://repo.or.cz/CommonLispStat.git/}
 926
 927 %   \end{thebibliography}
 928 % \end{frame}
 929
 930 \end{document}