Doc/talks/Rossini-RiceU-2009Mar.tex

   1 \documentclass{beamer}
   2
   3 \mode<presentation>
   4 {
   5   \usetheme{classic}
   6   \setbeamercovered{transparent}
   7 }
   8
   9 \usepackage[english]{babel}
  10 \usepackage[latin1]{inputenc}
  11 \usepackage{times}
  12 \usepackage[T1]{fontenc}
  13
  14 \title[CLS]{Common Lisp Statistics}
  15 \subtitle{Using History to design better data analysis environments}
  16 \author[Rossini]{Anthony~(Tony)~Rossini}
  17
  18 \institute[Novartis and University of Washington] % (optional, but mostly needed)
  19 {
  20   Group Head, Modeling and Simulation\\
  21   Novartis Pharma AG, Switzerland
  22   \and
  23   Affiliate Assoc Prof, Biomedical and Health Informatics\\
  24   University of Washington, USA}
  25
  26 \date[Rice 09]{Rice, Mar 2009}
  27 \subject{Statistical Computing Environments}
  28
  29 \begin{document}
  30
  31 \begin{frame}
  32   \titlepage
  33 \end{frame}
  34
  35 \begin{frame}{Outline}
  36   \tableofcontents
  37 \end{frame}
  38
  39 % Structuring a talk is a difficult task and the following structure
  40 % may not be suitable. Here are some rules that apply for this
  41 % solution:
  42
  43 % - Exactly two or three sections (other than the summary).
  44 % - At *most* three subsections per section.
  45 % - Talk about 30s to 2min per frame. So there should be between about
  46 %   15 and 30 frames, all told.
  47
  48 % - A conference audience is likely to know very little of what you
  49 %   are going to talk about. So *simplify*!
  50 % - In a 20min talk, getting the main ideas across is hard
  51 %   enough. Leave out details, even if it means being less precise than
  52 %   you think necessary.
  53 % - If you omit details that are vital to the proof/implementation,
  54 %   just say so once. Everybody will be happy with that.
  55
  56 \section{Preliminaries}
  57
  58 \subsection{Context}
  59
  60 \begin{frame}{Goals for this Talk}{(define, strategic approach,
  61     justify)}
  62
  63   \begin{itemize}
  64   \item To describe the concept of \alert{computable and executable
  65       statistics}, placing it in a historical context.
  66
  67   \item To demonstrate that \alert{a research program}
  68     implemented through  simple steps can increase the efficiency  of
  69     statistical computing approaches by  clearly describing both:
  70     \begin{itemize}
  71     \item numerical characteristics of procedures,
  72     \item statistical concepts driving them.
  73     \end{itemize}
  74
  75   \item To justify that the \alert{approach is worthwhile} and
  76     represents a staged effort towards \alert{increased use of best
  77       practices}.
  78   \end{itemize}
  79   (unfortunately, the last is still incomplete)
  80 \end{frame}
  81
  82
  83 \begin{frame}{Historical Computing Languages}
  84   \begin{itemize}
  85   \item FORTRAN : FORmula TRANslator.  Original numerical computing
  86     language, designed for clean implementation of numerical
  87     algorithms
  88   \item LISP : LISt Processor.  Associated with symbolic
  89     manipulation, AI, and knowledge approaches
  90   \end{itemize}
  91
  92   They represent the 2 generalized needs of statistical computing,
  93   which could be summarized as
  94   \begin{itemize}
  95   \item algorithms/numerics,
  96   \item elicitation, communication, and generation of knowledge (``data
  97     analysis'')
  98   \end{itemize}
  99 \end{frame}
 100
 101 \begin{frame}{Statistical Computing Environments}
 102
 103   Past:
 104   \begin{itemize}
 105   \item SPSS / BMDP / SAS
 106   \item S ( S, S-PLUS, R)
 107   \item LispStat ( XLispStat,  ViSta, ARC , CommonLispStat ) ; QUAIL
 108   \item XGobi (Orca / GGobi / Statistical Reality Engine)
 109   \item MiniTab
 110   \item Stata
 111   \item DataDesk
 112   \item Augsburg Impressionist series (MANET,
 113   \item Excel
 114   \end{itemize}
 115   many others...
 116
 117 \end{frame}
 118
 119 \begin{frame}{How many are left?}
 120
 121   \begin{itemize}
 122   \item R
 123   \item SAS
 124   \item SPSS
 125   \item Stata
 126   \item Minitab
 127   \item very few others...
 128   \end{itemize}
 129   ``R is the Microsoft of the statistical computing world'' -- anonymous.
 130 \end{frame}
 131
 132 \begin{frame}{Selection Pressure}
 133   \begin{itemize}
 134   \item the R user population is growing rapidly, fueled by critical
 135     mass, quality, and value
 136   \item R is a great system for applied data analysis
 137   \item R is not such a great system for research into statistical
 138     computing (backwards compatibility, inertia due to user population)
 139   \end{itemize}
 140   There is a need for alternative experiments for developing new
 141   approaches/ideas/concepts.
 142 \end{frame}
 143
 144 \begin{frame}{Philosophically, why Common Lisp?}
 145   Philosophically:
 146   \begin{itemize}
 147   \item Lisp can cleanly present computational intentions, both
 148     symbolically and numerically.
 149   \item Semantics and context are important: well supported by Lisp
 150     paradigms.
 151   \item Lisp's parentheses describe singular, multi-scale,
 152     \alert{complete thoughts}.
 153   \end{itemize}
 154
 155 \end{frame}
 156
 157 \begin{frame}{Technically, why Common Lisp?}
 158   \begin{itemize}
 159   \item interactive COMPILED language (``R with a compiler'')
 160   \item CLOS is R's S4 object system ``done right''.
 161   \item clean semantics: modality, typing, can be expressed the way
 162     one wants it.
 163   \item programs are data, data are programs, leading to
 164   \item Most modern computing tools available (XML, WWW technologies)
 165   \item ``executable XML''
 166   \end{itemize}
 167   Common Lisp is very close in usage to how people currently use R
 168   (mostly interactive, some batch, and a wish for compilation efficiency).
 169 \end{frame}
 170
 171 \subsection{Background}
 172
 173 \begin{frame}
 174   \frametitle{Desire: Semantics and Statistics}
 175   \begin{itemize}
 176   \item The semantic web (content which is self-descriptive) is an
 177     interesting and potentially useful idea.
 178
 179   \item
 180     Biological informatics support (GO, Entrez) has allowed for
 181     precise definitions of concepts in biology.
 182
 183   \item It is a shame that a field like statistics, requiring such
 184     precision, has less than an imprecise and temporally instable
 185     field such as biology\ldots
 186   \end{itemize}
 187
 188   How can we express statistical work (research, applied work) which
 189   is both human and computer readable (perhaps subject to
 190   transformations first)?
 191 \end{frame}
 192
 193
 194 % \subsection{Context}
 195
 196 % \begin{frame}{Context}{(where I'm coming from, my ``priors'')}
 197 %   \begin{itemize}
 198 %   \item Pharmaceutical Industry
 199 %   \item Modeling and Simulation uses mathematical models/constructs to
 200 %     record beliefs (biology, pharmacology, clinical science) for
 201 %     explication, clinical team alignment, decision support, and
 202 %     quality.
 203 %   \item My work at Novartis is at the intersection of biomedical
 204 %     informatics, statistics, and mathematical modeling.
 205 %   \item As manager: I need a mix of applications and novel research development to
 206 %     solve our challenges better, faster, more efficiently.
 207 %   \item Data analysis is a specialized approach to computer
 208 %     programming, \alert{different} than applications programming or
 209 %     systems programming.
 210 %   \end{itemize}
 211 % \end{frame}
 212
 213 \section{Computable and Executable Statistics}
 214
 215 \begin{frame}{Can we compute with them?}
 216   3 Examples:
 217   \begin{itemize}
 218   \item Research
 219   \item Consulting
 220   \item Reimplementation
 221   \end{itemize}
 222   Consider whether one can ``compute'' with the information given?
 223 \end{frame}
 224
 225 \begin{frame}[fragile]{Example 1: Theory\ldots}
 226   \label{example1}
 227   Let $f(x;\theta)$ describe the likelihood of XX under the following
 228   assumptions.
 229   \begin{enumerate}
 230   \item assumption-1
 231   \item assumption-2
 232   \end{enumerate}
 233   Then if we use the following algorithm:
 234   \begin{enumerate}
 235   \item step-1
 236   \item step-2
 237   \end{enumerate}
 238   then $\hat{\theta}$ should be $N(0,\hat\sigma^2)$ with the following
 239   characteristics\ldots
 240 \end{frame}
 241
 242 \begin{frame}
 243   \frametitle{Can we compute, using this description?}
 244   Given the information at hand:
 245   \begin{itemize}
 246   \item we ought to have a framework for initial coding for the
 247     actual simulations (test-first!)
 248   \item the implementation is somewhat clear
 249   \item We should ask: what theorems have similar assumptions?
 250   \item We should ask: what theorems have similar conclusions but
 251     different assumptions?
 252   \end{itemize}
 253 \end{frame}
 254
 255 \begin{frame}[fragile]{Realizing Theory}
 256   \small{
 257 \begin{verbatim}
 258 (define-theorem my-proposed-theorem
 259    (:theorem-type '(distribution-properties
 260                     frequentist
 261                     likelihood))
 262    (:assumes '(assumption-1 assumption-2))
 263    (:likelihood-form
 264       (defun likelihood (data theta gamma)
 265         (exponential-family theta gamma)))
 266    (:compute-by
 267       '(progn
 268          (compute-starting-values thetahat gammahat)
 269          (until (convergence)
 270            (setf convergence
 271                  (or (step-1 thetahat)
 272                      (step-2 gammahat))))))
 273    (:claim (assert
 274              (and (equal-distribution thetahat 'normal)
 275                   (equal-distribution gammahat 'normal)))))
 276 \end{verbatim}
 277   }
 278 \end{frame}
 279
 280 \begin{frame}[fragile]{It would be nice to have}
 281 \begin{verbatim}
 282    (theorem-veracity 'my-proposed-theorem)
 283 \end{verbatim}
 284 \end{frame}
 285
 286 \begin{frame}[fragile]{and why not...?}
 287 \begin{verbatim}
 288    (when (theorem-veracity
 289               'my-proposed-theorem)
 290       (write-paper 'my-proposed-theorem
 291                    :style :JASA
 292                    :output-format
 293                          '(LaTeX MSWord)))
 294 \end{verbatim}
 295 \end{frame}
 296
 297 \begin{frame}{Comments}
 298   \begin{itemize}
 299   \item The general problem is very difficult
 300   \item Some progress has been made in small areas of basic
 301     statistics: currently working on linear regression (LS-based,
 302     Normal-bayesian) and the T-test.
 303   \item Areas targetted for medium-term future: resampling methods and
 304     similar algorithms.
 305   \end{itemize}
 306
 307 \end{frame}
 308
 309 \begin{frame}
 310   \frametitle{Example 2: Practice\ldots}
 311   \label{example2}
 312   The dataset comes from a series of clinical trials.  We model the
 313   primary endpoint, ``relief'', as a binary random variable.  There is
 314   a random trial effect on relief as well as severity due to
 315   differences in recruitment and inclusion/exclusion criteria.
 316 \end{frame}
 317
 318 \begin{frame}
 319   \frametitle{Can we compute, using this description?}
 320   \begin{itemize}
 321   \item With a real such description, it is clear what some of the
 322     potential models might be for this dataset
 323   \item It should be clear how to start thinking of a data dictionary
 324     for this problem.
 325   \end{itemize}
 326 \end{frame}
 327
 328 \begin{frame}[fragile]{Can we compute?}
 329 \begin{verbatim}
 330   (dataset-metadata paper-1
 331     :context 'clinical-trials
 332     :variables '((relief :model-type dependent
 333                          :distribution binary)
 334                  (trial  :model-type independent
 335                          :distribution categorical)
 336                  (disease-severity))
 337     :metadata '(inclusion-criteria
 338                 exclusion-criteria
 339                 recruitment-rate))
 340   (propose-analysis paper-1)
 341      ; => '(tables
 342      ;      (logistic regression))
 343 \end{verbatim}
 344 \end{frame}
 345
 346 \begin{frame}{Example 3: The Round-trip\ldots}
 347   \label{example3}
 348   The first examples describe ``ideas $\rightarrow$ code''
 349
 350   Consider the last time you read someone else's implementation of a
 351   statistical procedure (i.e. R package code).  When you read the
 352   code, could you see:
 353   \begin{itemize}
 354   \item the assumptions used?
 355   \item the algorithm implemented?
 356   \item practical guidance for when you might select the algorithm
 357     over others?
 358   \item practical guidance for when you might select the
 359     implementation over others?
 360   \end{itemize}
 361   These are usually components of any reasonable journal article.
 362   \textit{(Q: have you actually read an R package that wasn't yours?)}
 363 \end{frame}
 364
 365 \begin{frame}{Exercise left to the reader!}
 366
 367   (aside: I have been looking at the \textbf{stats} and \textbf{lme4}
 368   packages recently -- \textit{for me}, very clear numerically, much
 369   less so statistically)
 370 \end{frame}
 371
 372
 373
 374 \subsection{Literate Programming is insufficient}
 375
 376 \begin{frame}{Literate Statistical Practice.}
 377   \begin{enumerate}
 378   \item Literate Programming applied to data analysis (Rossini, 1997/2001)
 379   \item among the \alert{most annoying} techniques to integrate into
 380     work-flow if one is not perfectly methodological.
 381   \item Some tools:
 382     \begin{itemize}
 383     \item ESS: supports interactive creation of literate programs.
 384     \item Sweave: tool which exemplifies reporting context; odfWeave
 385       primarily simplifies reporting.
 386     \item Roxygen: primarily supports a literate programming
 387       documentation style, not a literate data analysis programming
 388       style.
 389   \end{itemize}
 390   \item ROI demonstrated in specialized cases: BioConductor.
 391   \item \alert{usually done after the fact} (final step of work-flow)
 392     as a documentation/computational reproducibility technique, rarely
 393     integrated into work-flow.
 394   \end{enumerate}
 395   Many contributors:
 396   Knuth, Claerbout, Carey, de Leeuw, Leisch, Gentleman, Temple-Lang,
 397   \ldots{}
 398 \end{frame}
 399
 400 \begin{frame}
 401   \frametitle{Literate Programming}
 402   \framesubtitle{Why isn't it enough for Data Analysis?}
 403
 404   Only 2 contexts: (executable) code and documentation.  Fine for
 405   application programming,  but for data analysis, we could benefit
 406   from:
 407   \begin{itemize}
 408   \item classification of statistical procedures
 409   \item descriptions of assumptions
 410   \item pragmatic recommendations
 411   \item inheritance of structure through the work-flow of a
 412     statistical methodology or data analysis project
 413   \item datasets and metadata
 414   \end{itemize}
 415   Concept: ontologies describing mathematical assumptions, applications
 416   of methods, work-flow, and statistical data structures can enable
 417   machine communication.
 418
 419   (i.e. informatics framework ala biology)
 420 \end{frame}
 421
 422
 423 \begin{frame}{Communication in Statistical Practice}{\ldots is essential for \ldots}
 424   \begin{itemize}
 425   \item finding
 426   \item explanations
 427   \item agreement
 428   \item receiving information
 429   \end{itemize}
 430   \alert{``machine-readable'' communication/computation lets the
 431     computer help} \\
 432   Semantic Web is about ``machine-enabled computability''.
 433 \end{frame}
 434
 435 \begin{frame}  \frametitle{Semantics}
 436   \framesubtitle{One definition: description and context}
 437
 438   Interoperability is the key, with respect to
 439   \begin{itemize}
 440   \item ``Finding things''
 441   \item Applications and activities with related functionality
 442     \begin{itemize}
 443     \item moving information from one state to another (paper, journal
 444       article, computer program)
 445     \item computer programs which implement solutions to similar tasks
 446     \end{itemize}
 447   \end{itemize}
 448 \end{frame}
 449
 450
 451 \begin{frame}{Statistical Practice is somewhat restricted}
 452   {...but in a good sense, enabling potential for semantics...}
 453
 454   There is a restrictable set of intended actions for what can be done
 455   -- the critical goal is to be able to make a difference by
 456   accelerating activities that should be ``computable'':
 457   \begin{itemize}
 458   \item restricted natural language processing
 459   \item mathematical translation
 460   \item common description of activities for simpler programming/data
 461     analysis (S approach to objects and methods)
 462   \end{itemize}
 463   R is a good basic start (model formulation approach, simple
 464   ``programming with data'' paradigm); we should see if we can do
 465   better!
 466 \end{frame}
 467
 468 \begin{frame}{Computable and Executable Statistics requires}
 469
 470   \begin{itemize}
 471   \item approaches to describe data and metadata (``data'')
 472     \begin{itemize}
 473     \item semantic WWW
 474     \item metadata management and integration, driving
 475     \item data integration
 476     \end{itemize}
 477   \item approaches to describe data analysis methods (``models'')
 478     \begin{itemize}
 479     \item quantitatively: many ontologies (AMS, etc), few meeting
 480       statistical needs.
 481     \item many substantive fields have implementations
 482       (bioinformatics, etc) but not well focused.
 483     \end{itemize}
 484   \item approaches to describe the specific form of interaction
 485     (``instances of models'')
 486     \begin{itemize}
 487     \item Original idea behind ``Literate Statistical Analysis''.
 488     \item That idea is suboptimal, more structure needed (not
 489       necessarily built upon existing...).
 490     \end{itemize}
 491   \end{itemize}
 492 \end{frame}
 493
 494 \subsection{Common Lisp Statistics}
 495
 496 \begin{frame}
 497   \frametitle{Interactive Programming}
 498   \framesubtitle{Everything goes back to being Lisp-like}
 499   \begin{itemize}
 500   \item Interactive programming (as originating with Lisp): works
 501     extremely well for data analysis (Lisp being the original
 502     ``programming with data'' language).
 503   \item Theories/methods for how to do this are reflected in styles
 504     for using R.
 505   \end{itemize}
 506 \end{frame}
 507
 508 \begin{frame}[fragile]
 509   \frametitle{Lisp}
 510
 511   Lisp (LISt Processor) is different than most high-level computing
 512   languages, and is very old (1956).  Lisp is built on lists of things
 513   which are evaluatable.
 514 \begin{verbatim}
 515 (functionName data1 data2 data3)
 516 \end{verbatim}
 517   or ``quoted'':
 518 \begin{verbatim}
 519 '(functionName data1 data2 data3)
 520 \end{verbatim}
 521   which is shorthand for
 522 \begin{verbatim}
 523 (list functionName data1 data2 data3)
 524 \end{verbatim}
 525   The difference is important -- lists of data (the second/third) are
 526   not (yet?!) functions applied to (unencapsulated lists of) data (the first).
 527 \end{frame}
 528
 529 \begin{frame}
 530   \frametitle{Features}
 531   \begin{itemize}
 532   \item Data and Functions semantically the same
 533   \item Natural interactive use through functional programming with
 534     side effects
 535   \item Batch is a simplification of interactive -- not a special mode!
 536   \end{itemize}
 537 \end{frame}
 538
 539
 540
 541 \begin{frame}[fragile]{Representation: XML and Lisp}{executing your data}
 542   Many people are familiar with XML:
 543 \begin{verbatim}
 544 <name phone="+41793674557">Tony Rossini</name>
 545 \end{verbatim}
 546   which is shorter in Lisp:
 547 \begin{verbatim}
 548 (name "Tony Rossini" :phone "+41613674557")
 549 \end{verbatim}
 550   \begin{itemize}
 551   \item Lisp ``parens'', universally hated by unbelievers, are
 552     wonderful for denoting when a ``concept is complete''.
 553   \item Why can't your data self-execute?
 554   \end{itemize}
 555 \end{frame}
 556
 557 \begin{frame}[fragile]{Numerics with Lisp}
 558   \begin{itemize}
 559   \item addition of rational numbers and arithmetic
 560   \item example for mean
 561 \begin{verbatim}
 562  (defun mean (x)
 563     (checktype x 'vector-like)
 564     (/ (loop for i from 0 to (- (nelts *x*) 1)
 565           summing (vref *x* i))
 566        (nelts *x*)))
 567 \end{verbatim}
 568   \item example for variance
 569 \begin{verbatim}
 570 (defun variance (x)
 571   (let ((meanx (mean x))
 572         (nm1 (1- (nelts x))))
 573      (/ (loop for i from 0 to nm1
 574            summing (power (- (vref *x* i) meanx) 2)
 575         nm1))))
 576 \end{verbatim}
 577   \item But through macros, \verb+(vref *x* i)+ could be
 578     \verb+#V(X[i])+ or your favorite syntax.
 579   \end{itemize}
 580
 581 \end{frame}
 582
 583
 584 \begin{frame}{Common Lisp Statistics 1}
 585   \begin{itemize}
 586   \item Originally based on LispStat (reusability)
 587   \item Re-factored structure (some numerics worked with a 1990-era code base).
 588   \item Current activities:
 589     \begin{enumerate}
 590     \item numerics redone using CFFI-based BLAS/LAPLACK (cl-blapack)
 591     \item matrix interface based on MatLisp
 592     \item starting design of a user interface system (interfaces,
 593       visuals).
 594     \item general framework for model specification (regression,
 595       likelihood, ODEs)
 596     \item general framework for algorithm specification (bootstrap,
 597       MLE, algorithmic data anaylsis methods).
 598     \end{enumerate}
 599   \end{itemize}
 600 \end{frame}
 601
 602 \begin{frame}{Common Lisp Statistics 2}
 603
 604   \begin{itemize}
 605   \item Implemented using SBCL.  Contributed fixes for
 606     Clozure/OpenMCL. Goal to target CLISP
 607   \item Supports LispStat prototype object system
 608   \item Package-based design -- only use the components you need, or
 609     the components whose API you like.
 610   \end{itemize}
 611 \end{frame}
 612
 613 \section{Discussion}
 614
 615 \begin{frame}
 616   \frametitle{Outlook}
 617   \begin{itemize}
 618   \item Semantics and Computability have captured a great deal of
 619     attention in the informatics and business computing R\&D worlds
 620   \item Statistically-driven Decision Making and Knowledge Discovery
 621     is, with high likelihood, the next challenging stage after data
 622     integration.
 623   \item Statistical practice (theory and application) can be enhanced,
 624     made more efficient, providing  increased benefit to organizations
 625     and groups using appropriate methods.
 626   \item Lisp as a language, shares characteristics of both Latin
 627     (difficult dead language useful for classical training) and German
 628     (difficult living language useful for general life).  Of course,
 629     for some people, they are not difficult.
 630   \end{itemize}
 631
 632 \end{frame}
 633
 634 \begin{frame}
 635   The research program described in this talk is currently driving the
 636   design of CommonLisp Stat, which leverages concepts and approaches
 637   from the dead and moribund LispStat project.
 638
 639   \begin{itemize}
 640   \item \url{http://repo.or.cz/w/CommonLispStat.git/}
 641   \item \url{http://www.github.com/blindglobe/}
 642   \end{itemize}
 643
 644 \end{frame}
 645 \begin{frame}{Final Comment}
 646
 647   \begin{itemize}
 648   \item In the Pharma industry, it is all about getting the right
 649     drugs to the patient faster.  Data analysis systems seriously
 650     impact this process, being potentially an impediment or an
 651     accelerator.
 652
 653     \begin{itemize}
 654     \item \alert{Information technologies can increase the efficiency
 655         of statistical practice}, though innovation change management
 656       must be taking into account.  (i.e. Statistical practice, while
 657       considered by some an ``art form'', can benefit from
 658       industrialization).
 659     \item \alert{Lisp's features match the basic requirements we need}
 660       (dichotomy: programs as data, data as programs).  Sales pitch,
 661       though...
 662     \item Outlook: Lots of work and experimentation to do!
 663     \end{itemize}
 664   \item {\tiny Gratuitous Advert: We are hiring, have student
 665       internships (undergrad, grad students), and a visiting faculty
 666       program.  Talk with me if possibly interested.}
 667   \end{itemize}
 668 \end{frame}
 669
 670
 671 % % All of the following is optional and typically not needed.
 672 % \appendix
 673
 674
 675 % \section<presentation>*{\appendixname}
 676
 677
 678 % \begin{frame} \frametitle{Complements and Backup}
 679 %   No more, stop here.  Questions?  (now or later).
 680 % \end{frame}
 681
 682 % \begin{frame}{The Industrial Challenge.}{Getting the Consulting Right.}
 683 %   % - A title should summarize the slide in an understandable fashion
 684 %   %   for anyone how does not follow everything on the slide itself.
 685
 686 %   \begin{itemize}
 687 %   \item Recording assumptions for the next data analyst, reviewer.
 688 %     Use \texttt{itemize} a lot.
 689 %   \item
 690 %     Use very short sentences or short phrases.
 691 %   \end{itemize}
 692 % \end{frame}
 693
 694
 695 % \begin{frame}{The Industrial Challenge.}{Getting the Right Research Fast.}
 696 %   % - A title should summarize the slide in an understandable fashion
 697 %   %   for anyone how does not follow everything on the slide itself.
 698
 699 %   \begin{itemize}
 700 %   \item
 701 %     Use \texttt{itemize} a lot.
 702 %   \item
 703 %     Use very short sentences or short phrases.
 704 %   \end{itemize}
 705 % \end{frame}
 706
 707
 708 % \begin{frame}{Explicating the Work-flow}{QA/QC-based improvements.}
 709
 710
 711 % \end{frame}
 712
 713 % \section{Motivation}
 714
 715 % \subsection{IT Can Speed up Deliverables in Statistical Practice}
 716
 717 % \begin{frame}{Our Generic Work-flow and Life-cycle}
 718 %   {describing most data analytic activities}
 719 %   Workflow:
 720 %   \begin{enumerate}
 721 %   \item Scope out the problem
 722 %   \item Sketch out a potential solution
 723 %   \item Implement until road-blocks appear
 724 %   \item Deliver results
 725 %   \end{enumerate}
 726
 727 %   Lifecycle:
 728 %   \begin{enumerate}
 729 %   \item paper sketch
 730 %   \item 1st e-draft of text/code/date (iterate to \#1, discarding)
 731 %   \item cycle through work
 732 %   \item publish
 733 %   \item ``throw-away''
 734 %   \end{enumerate}
 735 %   but there is valuble information that could enable the next
 736 %   generation!
 737 % \end{frame}
 738
 739 % \begin{frame}[fragile]{Paper $\rightarrow$ Computer  $\rightarrow$ Article $\rightarrow$ Computer}{Cut and Paste makes for large errors.}
 740 %   \begin{itemize}
 741 %   \item Problems in a regulatory setting
 742 %   \item Regulatory issues are just ``best practices''
 743 %   \end{itemize}
 744
 745 %   Why do we ``copy/paste'', or analogously, restart our work?
 746
 747 %   pro:
 748 %   \begin{itemize}
 749 %   \item every time we repeat, we reinforce the idea in our brain
 750 %   \item review of ideas can help improve them
 751 %   \end{itemize}
 752 %   con:
 753 %   \begin{itemize}
 754 %   \item inefficiency
 755 %   \item introduction of mistakes
 756 %   \item loss of historical context
 757 %   \item changes to earlier work (on a different development branch)
 758 %     can not propagate.
 759 %   \end{itemize}
 760 % \end{frame}
 761
 762 % \section{Semantics and Statistical Practice}
 763
 764
 765 % \begin{frame}
 766 %   \frametitle{Statistical Activity Leads to Reports}
 767 %   \framesubtitle{You read what you know, do you understand it?}
 768
 769 %   How can we improve the communication of the ideas we have?
 770
 771 %   Precision of communication?
 772
 773 % \end{frame}
 774
 775
 776
 777 % \begin{frame}  \frametitle{Communication Requires Context}
 778 %   \framesubtitle{Intentions imply more than one might like...}
 779
 780 %   \begin{itemize}
 781 %   \item Consideration of what we might do
 782 %   \item Applications with related functionality
 783 %   \end{itemize}
 784 % \end{frame}
 785
 786
 787
 788 % \begin{frame}
 789 %   \frametitle{Design Patterns}
 790 %   \framesubtitle{Supporting Work-flow Transitions}
 791
 792 %   (joint work with H Wickham): The point of this research program is
 793 %   not to describe what to do at any particular stage of work, but to
 794 %   encourage researchers and practitioners to consider how the
 795 %   translation and transfer of information between stages so that work
 796 %   is not lost.
 797
 798 %   Examples of stages in a work-flow:
 799 %   \begin{itemize}
 800 %   \item planning, execution, reporting;
 801 %   \item scoping, illustrative examples or counter examples, algorithmic construction,
 802 %     article writing.
 803 %   \item descriptive statistics, preliminary inferential analysis,
 804 %     model/assumption checking, final inferential analysis,
 805 %     communication of scientific results
 806 %   \end{itemize}
 807 %   Description of work-flows is essential to initiating discussions on
 808 %   quality/efficiency of approaches to work.
 809 % \end{frame}
 810
 811 % \section{Design Challenges}
 812
 813 % \begin{frame}
 814 %   \frametitle{Activities are enhanced by support}
 815
 816 %   \begin{itemize}
 817 %   \item Mathematical manipulation can be enhanced by symbolic
 818 %     computation
 819 %   \item Statistical programming can be enabled by examples and related
 820 %     algorithm implementation
 821 %   \item Datasets, to a limited extent, can self-describe.
 822 %   \end{itemize}
 823 % \end{frame}
 824
 825 % \begin{frame}
 826 %   \frametitle{Executable and Computable Science}
 827
 828 %   Use of algorithms and construction to describe how things work.
 829
 830 %   Support for agent-based approaches
 831 % \end{frame}
 832
 833
 834 % \begin{frame}
 835 %   \frametitle{What is Data?  Metadata?}
 836
 837 %   Data: what we've observed
 838
 839 %   MetaData: context for observations, enables semantics.
 840 % \end{frame}
 841
 842
 843
 844
 845 % % \begin{frame}[fragile]
 846 % %   \frametitle{Defining Variables}
 847 % %   \framesubtitle{Setting variables}
 848 % % \begin{verbatim}
 849 % % (setq <variable> <value>)
 850 % % \end{verbatim}
 851 % %   Example:
 852 % % \begin{verbatim}
 853 % % (setq ess-source-directory
 854 % %       "/home/rossini/R-src")
 855 % % \end{verbatim}
 856 % % \end{frame}
 857
 858 % % \begin{frame}[fragile]
 859 % %   \frametitle{Defining on the fly}
 860 % % \begin{verbatim}
 861 % % (setq ess-source-directory
 862 % %    (lambda () (file-name-as-directory
 863 % %          (expand-file-name
 864 % %            (concat (default-directory)
 865 % %                    ess-suffix "-src")))))
 866 % % \end{verbatim}
 867 % %   (Lambda-expressions are anonymous functions, i.e. ``instant-functions'')
 868 % % \end{frame}
 869
 870
 871 % % \begin{frame}[fragile]
 872 % %   \frametitle{Function Reuse}
 873 % %   By naming the function, we could make the previous example reusable
 874 % %   (if possible):
 875 % % \begin{verbatim}
 876 % % (defun my-src-directory ()
 877 % %       (file-name-as-directory
 878 % %          (expand-file-name
 879 % %            (concat (default-directory)
 880 % %                    ess-suffix "-src"))))
 881 % % \end{verbatim}
 882 % %   Example:
 883 % % \begin{verbatim}
 884 % % (setq ess-source-directory (my-src-directory))
 885 % % \end{verbatim}
 886 % % \end{frame}
 887
 888
 889 % % \begin{frame}
 890 % %   \frametitle{Equality Among Packages}
 891 % %   \begin{itemize}
 892 % %   \item more/less equal can be described specifically through
 893 % %     overriding imports.
 894 % %   \end{itemize}
 895 % % \end{frame}
 896
 897
 898 % \subsection<presentation>*{For Further Reading}
 899
 900 % \begin{frame}[allowframebreaks]
 901 %   \frametitle<presentation>{Related Material}
 902
 903 %   \begin{thebibliography}{10}
 904
 905 %   \beamertemplatebookbibitems
 906 %   % Start with overview books.
 907
 908 %   \bibitem{LispStat1990}
 909 %     L.~Tierney
 910 %     \newblock {\em LispStat}.
 911
 912 %   \beamertemplatearticlebibitems
 913 %   % Followed by interesting articles. Keep the list short.
 914
 915 %   \bibitem{Rossini2001}
 916 %     AJ.~Rossini
 917 %     \newblock Literate Statistical Practice
 918 %     \newblock {\em Proceedings of the Conference on Distributed
 919 %       Statistical Computing}, 2001.
 920
 921 %   \bibitem{RossiniLeisch2003}
 922 %     AJ.~Rossini and F.~Leisch
 923 %     \newblock Literate Statistical Practice
 924 %     \newblock {\em Technical Report Series, University of Washington
 925 %       Department of Biostatistics}, 2003.
 926
 927 %   \beamertemplatearrowbibitems
 928 %   % Followed by interesting articles. Keep the list short.
 929
 930 %   \bibitem{CLS}
 931 %     Common Lisp Stat, 2008.
 932 %     \newblock \url{http://repo.or.cz/CommonLispStat.git/}
 933
 934 %   \end{thebibliography}
 935 % \end{frame}
 936
 937 \end{document}