Doc/Rossini-DSC-July2009,tex

   1 \documentclass{beamer}
   2
   3 \mode<presentation>
   4 {
   5   \usetheme{classic}
   6   \setbeamercovered{transparent}
   7 }
   8
   9 \usepackage[english]{babel}
  10 \usepackage[latin1]{inputenc}
  11 \usepackage{times}
  12 \usepackage[T1]{fontenc}
  13
  14 \title[CLS]{Common Lisp Statistics}
  15 \subtitle{Using History to design better data analysis environments}
  16 \author[Rossini]{Anthony~(Tony)~Rossini}
  17
  18 \institute[Novartis and University of Washington] % (optional, but mostly needed)
  19 {
  20   Group Head, Modeling and Simulation\\
  21   Novartis Pharma AG, Switzerland
  22   \and
  23   Affiliate Assoc Prof, Biomedical and Health Informatics\\
  24   University of Washington, USA}
  25
  26 \date[Rice 09]{Rice, Mar 2009}
  27 \subject{Statistical Computing Environments}
  28
  29 \begin{document}
  30
  31 \begin{frame}
  32   \titlepage
  33 \end{frame}
  34
  35 \begin{frame}{Outline}
  36   \tableofcontents
  37 \end{frame}
  38
  39 % Structuring a talk is a difficult task and the following structure
  40 % may not be suitable. Here are some rules that apply for this
  41 % solution:
  42
  43 % - Exactly two or three sections (other than the summary).
  44 % - At *most* three subsections per section.
  45 % - Talk about 30s to 2min per frame. So there should be between about
  46 %   15 and 30 frames, all told.
  47
  48 % - A conference audience is likely to know very little of what you
  49 %   are going to talk about. So *simplify*!
  50 % - In a 20min talk, getting the main ideas across is hard
  51 %   enough. Leave out details, even if it means being less precise than
  52 %   you think necessary.
  53 % - If you omit details that are vital to the proof/implementation,
  54 %   just say so once. Everybody will be happy with that.
  55
  56 \section{Motivation for CLS}
  57
  58 \section{Using CLS}
  59
  60 \section{Features}
  61
  62 \section{Discussion}
  63
  64
  65 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  66
  67
  68 \section{Preliminaries}
  69
  70 \subsection{Context}
  71
  72 \begin{frame}{Goals for this Talk}{(define, strategic approach,
  73     justify)}
  74
  75   \begin{itemize}
  76   \item To describe the concept of \alert{computable and executable
  77       statistics}, placing it in a historical context.
  78
  79   \item To demonstrate that \alert{a research program}
  80     implemented through  simple steps can increase the efficiency  of
  81     statistical computing approaches by  clearly describing both:
  82     \begin{itemize}
  83     \item numerical characteristics of procedures,
  84     \item statistical concepts driving them.
  85     \end{itemize}
  86
  87   \item To justify that the \alert{approach is worthwhile} and
  88     represents a staged effort towards \alert{increased use of best
  89       practices}.
  90   \end{itemize}
  91   (unfortunately, the last is still incomplete)
  92 \end{frame}
  93
  94
  95 \begin{frame}{Historical Computing Languages}
  96   \begin{itemize}
  97   \item FORTRAN : FORmula TRANslator.  Original numerical computing
  98     language, designed for clean implementation of numerical
  99     algorithms
 100   \item LISP : LISt Processor.  Associated with symbolic
 101     manipulation, AI, and knowledge approaches
 102   \end{itemize}
 103
 104   They represent the 2 generalized needs of statistical computing,
 105   which could be summarized as
 106   \begin{itemize}
 107   \item algorithms/numerics,
 108   \item elicitation, communication, and generation of knowledge (``data
 109     analysis'')
 110   \end{itemize}
 111 \end{frame}
 112
 113 \begin{frame}{Statistical Computing Environments}
 114
 115   Past:
 116   \begin{itemize}
 117   \item SPSS / BMDP / SAS
 118   \item S ( S, S-PLUS, R)
 119   \item LispStat ( XLispStat,  ViSta, ARC , CommonLispStat ) ; QUAIL
 120   \item XGobi (Orca / GGobi / Statistical Reality Engine)
 121   \item MiniTab
 122   \item Stata
 123   \item DataDesk
 124   \item Augsburg Impressionist series (MANET,
 125   \item Excel
 126   \end{itemize}
 127   many others...
 128
 129 \end{frame}
 130
 131 \begin{frame}{How many are left?}
 132
 133   \begin{itemize}
 134   \item R
 135   \item SAS
 136   \item SPSS
 137   \item Stata
 138   \item Minitab
 139   \item very few others...
 140   \end{itemize}
 141   ``R is the Microsoft of the statistical computing world'' -- anonymous.
 142 \end{frame}
 143
 144 \begin{frame}{Selection Pressure}
 145   \begin{itemize}
 146   \item the R user population is growing rapidly, fueled by critical
 147     mass, quality, and value
 148   \item R is a great system for applied data analysis
 149   \item R is not such a great system for research into statistical
 150     computing (backwards compatibility, inertia due to user population)
 151   \end{itemize}
 152   There is a need for alternative experiments for developing new
 153   approaches/ideas/concepts.
 154 \end{frame}
 155
 156 \begin{frame}{Philosophically, why Common Lisp?}
 157   Philosophically:
 158   \begin{itemize}
 159   \item Lisp can cleanly present computational intentions, both
 160     symbolically and numerically.
 161   \item Semantics and context are important: well supported by Lisp
 162     paradigms.
 163   \item Lisp's parentheses describe singular, multi-scale,
 164     \alert{complete thoughts}.
 165   \end{itemize}
 166
 167 \end{frame}
 168
 169 \begin{frame}{Technically, why Common Lisp?}
 170   \begin{itemize}
 171   \item interactive COMPILED language (``R with a compiler'')
 172   \item CLOS is R's S4 object system ``done right''.
 173   \item clean semantics: modality, typing, can be expressed the way
 174     one wants it.
 175   \item programs are data, data are programs, leading to
 176   \item Most modern computing tools available (XML, WWW technologies)
 177   \item ``executable XML''
 178   \end{itemize}
 179   Common Lisp is very close in usage to how people currently use R
 180   (mostly interactive, some batch, and a wish for compilation efficiency).
 181 \end{frame}
 182
 183 \subsection{Background}
 184
 185 \begin{frame}
 186   \frametitle{Desire: Semantics and Statistics}
 187   \begin{itemize}
 188   \item The semantic web (content which is self-descriptive) is an
 189     interesting and potentially useful idea.
 190
 191   \item
 192     Biological informatics support (GO, Entrez) has allowed for
 193     precise definitions of concepts in biology.
 194
 195   \item It is a shame that a field like statistics, requiring such
 196     precision, has less than an imprecise and temporally instable
 197     field such as biology\ldots
 198   \end{itemize}
 199
 200   How can we express statistical work (research, applied work) which
 201   is both human and computer readable (perhaps subject to
 202   transformations first)?
 203 \end{frame}
 204
 205
 206 % \subsection{Context}
 207
 208 % \begin{frame}{Context}{(where I'm coming from, my ``priors'')}
 209 %   \begin{itemize}
 210 %   \item Pharmaceutical Industry
 211 %   \item Modeling and Simulation uses mathematical models/constructs to
 212 %     record beliefs (biology, pharmacology, clinical science) for
 213 %     explication, clinical team alignment, decision support, and
 214 %     quality.
 215 %   \item My work at Novartis is at the intersection of biomedical
 216 %     informatics, statistics, and mathematical modeling.
 217 %   \item As manager: I need a mix of applications and novel research development to
 218 %     solve our challenges better, faster, more efficiently.
 219 %   \item Data analysis is a specialized approach to computer
 220 %     programming, \alert{different} than applications programming or
 221 %     systems programming.
 222 %   \end{itemize}
 223 % \end{frame}
 224
 225 \section{Computable and Executable Statistics}
 226
 227 \begin{frame}{Can we compute with them?}
 228   3 Examples:
 229   \begin{itemize}
 230   \item Research
 231   \item Consulting
 232   \item Reimplementation
 233   \end{itemize}
 234   Consider whether one can ``compute'' with the information given?
 235 \end{frame}
 236
 237 \begin{frame}[fragile]{Example 1: Theory\ldots}
 238   \label{example1}
 239   Let $f(x;\theta)$ describe the likelihood of XX under the following
 240   assumptions.
 241   \begin{enumerate}
 242   \item assumption-1
 243   \item assumption-2
 244   \end{enumerate}
 245   Then if we use the following algorithm:
 246   \begin{enumerate}
 247   \item step-1
 248   \item step-2
 249   \end{enumerate}
 250   then $\hat{\theta}$ should be $N(0,\hat\sigma^2)$ with the following
 251   characteristics\ldots
 252 \end{frame}
 253
 254 \begin{frame}
 255   \frametitle{Can we compute, using this description?}
 256   Given the information at hand:
 257   \begin{itemize}
 258   \item we ought to have a framework for initial coding for the
 259     actual simulations (test-first!)
 260   \item the implementation is somewhat clear
 261   \item We should ask: what theorems have similar assumptions?
 262   \item We should ask: what theorems have similar conclusions but
 263     different assumptions?
 264   \end{itemize}
 265 \end{frame}
 266
 267 \begin{frame}[fragile]{Realizing Theory}
 268   \small{
 269 \begin{verbatim}
 270 (define-theorem my-proposed-theorem
 271    (:theorem-type '(distribution-properties
 272                     frequentist
 273                     likelihood))
 274    (:assumes '(assumption-1 assumption-2))
 275    (:likelihood-form
 276       (defun likelihood (data theta gamma)
 277         (exponential-family theta gamma)))
 278    (:compute-by
 279       '(progn
 280          (compute-starting-values thetahat gammahat)
 281          (until (convergence)
 282            (setf convergence
 283                  (or (step-1 thetahat)
 284                      (step-2 gammahat))))))
 285    (:claim (assert
 286              (and (equal-distribution thetahat 'normal)
 287                   (equal-distribution gammahat 'normal)))))
 288 \end{verbatim}
 289   }
 290 \end{frame}
 291
 292 \begin{frame}[fragile]{It would be nice to have}
 293 \begin{verbatim}
 294    (theorem-veracity 'my-proposed-theorem)
 295 \end{verbatim}
 296 \end{frame}
 297
 298 \begin{frame}[fragile]{and why not...?}
 299 \begin{verbatim}
 300    (when (theorem-veracity
 301               'my-proposed-theorem)
 302       (write-paper 'my-proposed-theorem
 303                    :style :JASA
 304                    :output-format
 305                          '(LaTeX MSWord)))
 306 \end{verbatim}
 307 \end{frame}
 308
 309 \begin{frame}{Comments}
 310   \begin{itemize}
 311   \item The general problem is very difficult
 312   \item Some progress has been made in small areas of basic
 313     statistics: currently working on linear regression (LS-based,
 314     Normal-bayesian) and the T-test.
 315   \item Areas targetted for medium-term future: resampling methods and
 316     similar algorithms.
 317   \end{itemize}
 318
 319 \end{frame}
 320
 321 \begin{frame}
 322   \frametitle{Example 2: Practice\ldots}
 323   \label{example2}
 324   The dataset comes from a series of clinical trials.  We model the
 325   primary endpoint, ``relief'', as a binary random variable.  There is
 326   a random trial effect on relief as well as severity due to
 327   differences in recruitment and inclusion/exclusion criteria.
 328 \end{frame}
 329
 330 \begin{frame}
 331   \frametitle{Can we compute, using this description?}
 332   \begin{itemize}
 333   \item With a real such description, it is clear what some of the
 334     potential models might be for this dataset
 335   \item It should be clear how to start thinking of a data dictionary
 336     for this problem.
 337   \end{itemize}
 338 \end{frame}
 339
 340 \begin{frame}[fragile]{Can we compute?}
 341 \begin{verbatim}
 342   (dataset-metadata paper-1
 343     :context 'clinical-trials
 344     :variables '((relief :model-type dependent
 345                          :distribution binary)
 346                  (trial  :model-type independent
 347                          :distribution categorical)
 348                  (disease-severity))
 349     :metadata '(inclusion-criteria
 350                 exclusion-criteria
 351                 recruitment-rate))
 352   (propose-analysis paper-1)
 353      ; => '(tables
 354      ;      (logistic regression))
 355 \end{verbatim}
 356 \end{frame}
 357
 358 \begin{frame}{Example 3: The Round-trip\ldots}
 359   \label{example3}
 360   The first examples describe ``ideas $\rightarrow$ code''
 361
 362   Consider the last time you read someone else's implementation of a
 363   statistical procedure (i.e. R package code).  When you read the
 364   code, could you see:
 365   \begin{itemize}
 366   \item the assumptions used?
 367   \item the algorithm implemented?
 368   \item practical guidance for when you might select the algorithm
 369     over others?
 370   \item practical guidance for when you might select the
 371     implementation over others?
 372   \end{itemize}
 373   These are usually components of any reasonable journal article.
 374   \textit{(Q: have you actually read an R package that wasn't yours?)}
 375 \end{frame}
 376
 377 \begin{frame}{Exercise left to the reader!}
 378
 379   (aside: I have been looking at the \textbf{stats} and \textbf{lme4}
 380   packages recently -- \textit{for me}, very clear numerically, much
 381   less so statistically)
 382 \end{frame}
 383
 384
 385
 386 \subsection{Literate Programming is insufficient}
 387
 388 \begin{frame}{Literate Statistical Practice.}
 389   \begin{enumerate}
 390   \item Literate Programming applied to data analysis (Rossini, 1997/2001)
 391   \item among the \alert{most annoying} techniques to integrate into
 392     work-flow if one is not perfectly methodological.
 393   \item Some tools:
 394     \begin{itemize}
 395     \item ESS: supports interactive creation of literate programs.
 396     \item Sweave: tool which exemplifies reporting context; odfWeave
 397       primarily simplifies reporting.
 398     \item Roxygen: primarily supports a literate programming
 399       documentation style, not a literate data analysis programming
 400       style.
 401   \end{itemize}
 402   \item ROI demonstrated in specialized cases: BioConductor.
 403   \item \alert{usually done after the fact} (final step of work-flow)
 404     as a documentation/computational reproducibility technique, rarely
 405     integrated into work-flow.
 406   \end{enumerate}
 407   Many contributors:
 408   Knuth, Claerbout, Carey, de Leeuw, Leisch, Gentleman, Temple-Lang,
 409   \ldots{}
 410 \end{frame}
 411
 412 \begin{frame}
 413   \frametitle{Literate Programming}
 414   \framesubtitle{Why isn't it enough for Data Analysis?}
 415
 416   Only 2 contexts: (executable) code and documentation.  Fine for
 417   application programming,  but for data analysis, we could benefit
 418   from:
 419   \begin{itemize}
 420   \item classification of statistical procedures
 421   \item descriptions of assumptions
 422   \item pragmatic recommendations
 423   \item inheritance of structure through the work-flow of a
 424     statistical methodology or data analysis project
 425   \item datasets and metadata
 426   \end{itemize}
 427   Concept: ontologies describing mathematical assumptions, applications
 428   of methods, work-flow, and statistical data structures can enable
 429   machine communication.
 430
 431   (i.e. informatics framework ala biology)
 432 \end{frame}
 433
 434
 435 \begin{frame}{Communication in Statistical Practice}{\ldots is essential for \ldots}
 436   \begin{itemize}
 437   \item finding
 438   \item explanations
 439   \item agreement
 440   \item receiving information
 441   \end{itemize}
 442   \alert{``machine-readable'' communication/computation lets the
 443     computer help} \\
 444   Semantic Web is about ``machine-enabled computability''.
 445 \end{frame}
 446
 447 \begin{frame}  \frametitle{Semantics}
 448   \framesubtitle{One definition: description and context}
 449
 450   Interoperability is the key, with respect to
 451   \begin{itemize}
 452   \item ``Finding things''
 453   \item Applications and activities with related functionality
 454     \begin{itemize}
 455     \item moving information from one state to another (paper, journal
 456       article, computer program)
 457     \item computer programs which implement solutions to similar tasks
 458     \end{itemize}
 459   \end{itemize}
 460 \end{frame}
 461
 462
 463 \begin{frame}{Statistical Practice is somewhat restricted}
 464   {...but in a good sense, enabling potential for semantics...}
 465
 466   There is a restrictable set of intended actions for what can be done
 467   -- the critical goal is to be able to make a difference by
 468   accelerating activities that should be ``computable'':
 469   \begin{itemize}
 470   \item restricted natural language processing
 471   \item mathematical translation
 472   \item common description of activities for simpler programming/data
 473     analysis (S approach to objects and methods)
 474   \end{itemize}
 475   R is a good basic start (model formulation approach, simple
 476   ``programming with data'' paradigm); we should see if we can do
 477   better!
 478 \end{frame}
 479
 480 \begin{frame}{Computable and Executable Statistics requires}
 481
 482   \begin{itemize}
 483   \item approaches to describe data and metadata (``data'')
 484     \begin{itemize}
 485     \item semantic WWW
 486     \item metadata management and integration, driving
 487     \item data integration
 488     \end{itemize}
 489   \item approaches to describe data analysis methods (``models'')
 490     \begin{itemize}
 491     \item quantitatively: many ontologies (AMS, etc), few meeting
 492       statistical needs.
 493     \item many substantive fields have implementations
 494       (bioinformatics, etc) but not well focused.
 495     \end{itemize}
 496   \item approaches to describe the specific form of interaction
 497     (``instances of models'')
 498     \begin{itemize}
 499     \item Original idea behind ``Literate Statistical Analysis''.
 500     \item That idea is suboptimal, more structure needed (not
 501       necessarily built upon existing...).
 502     \end{itemize}
 503   \end{itemize}
 504 \end{frame}
 505
 506 \subsection{Common Lisp Statistics}
 507
 508 \begin{frame}
 509   \frametitle{Interactive Programming}
 510   \framesubtitle{Everything goes back to being Lisp-like}
 511   \begin{itemize}
 512   \item Interactive programming (as originating with Lisp): works
 513     extremely well for data analysis (Lisp being the original
 514     ``programming with data'' language).
 515   \item Theories/methods for how to do this are reflected in styles
 516     for using R.
 517   \end{itemize}
 518 \end{frame}
 519
 520 \begin{frame}[fragile]
 521   \frametitle{Lisp}
 522
 523   Lisp (LISt Processor) is different than most high-level computing
 524   languages, and is very old (1956).  Lisp is built on lists of things
 525   which are evaluatable.
 526 \begin{verbatim}
 527 (functionName data1 data2 data3)
 528 \end{verbatim}
 529   or ``quoted'':
 530 \begin{verbatim}
 531 '(functionName data1 data2 data3)
 532 \end{verbatim}
 533   which is shorthand for
 534 \begin{verbatim}
 535 (list functionName data1 data2 data3)
 536 \end{verbatim}
 537   The difference is important -- lists of data (the second/third) are
 538   not (yet?!) functions applied to (unencapsulated lists of) data (the first).
 539 \end{frame}
 540
 541 \begin{frame}
 542   \frametitle{Features}
 543   \begin{itemize}
 544   \item Data and Functions semantically the same
 545   \item Natural interactive use through functional programming with
 546     side effects
 547   \item Batch is a simplification of interactive -- not a special mode!
 548   \end{itemize}
 549 \end{frame}
 550
 551
 552
 553 \begin{frame}[fragile]{Representation: XML and Lisp}{executing your data}
 554   Many people are familiar with XML:
 555 \begin{verbatim}
 556 <name phone="+41793674557">Tony Rossini</name>
 557 \end{verbatim}
 558   which is shorter in Lisp:
 559 \begin{verbatim}
 560 (name "Tony Rossini" :phone "+41613674557")
 561 \end{verbatim}
 562   \begin{itemize}
 563   \item Lisp ``parens'', universally hated by unbelievers, are
 564     wonderful for denoting when a ``concept is complete''.
 565   \item Why can't your data self-execute?
 566   \end{itemize}
 567 \end{frame}
 568
 569 \begin{frame}[fragile]{Numerics with Lisp}
 570   \begin{itemize}
 571   \item addition of rational numbers and arithmetic
 572   \item example for mean
 573 \begin{verbatim}
 574  (defun mean (x)
 575     (checktype x 'vector-like)
 576     (/ (loop for i from 0 to (- (nelts *x*) 1)
 577           summing (vref *x* i))
 578        (nelts *x*)))
 579 \end{verbatim}
 580   \item example for variance
 581 \begin{verbatim}
 582 (defun variance (x)
 583   (let ((meanx (mean x))
 584         (nm1 (1- (nelts x))))
 585      (/ (loop for i from 0 to nm1
 586            summing (power (- (vref *x* i) meanx) 2)
 587         nm1))))
 588 \end{verbatim}
 589   \item But through macros, \verb+(vref *x* i)+ could be
 590     \verb+#V(X[i])+ or your favorite syntax.
 591   \end{itemize}
 592
 593 \end{frame}
 594
 595
 596 \begin{frame}{Common Lisp Statistics 1}
 597   \begin{itemize}
 598   \item Originally based on LispStat (reusability)
 599   \item Re-factored structure (some numerics worked with a 1990-era code base).
 600   \item Current activities:
 601     \begin{enumerate}
 602     \item numerics redone using CFFI-based BLAS/LAPLACK (cl-blapack)
 603     \item matrix interface based on MatLisp
 604     \item starting design of a user interface system (interfaces,
 605       visuals).
 606     \item general framework for model specification (regression,
 607       likelihood, ODEs)
 608     \item general framework for algorithm specification (bootstrap,
 609       MLE, algorithmic data anaylsis methods).
 610     \end{enumerate}
 611   \end{itemize}
 612 \end{frame}
 613
 614 \begin{frame}{Common Lisp Statistics 2}
 615
 616   \begin{itemize}
 617   \item Implemented using SBCL.  Contributed fixes for
 618     Clozure/OpenMCL. Goal to target CLISP
 619   \item Supports LispStat prototype object system
 620   \item Package-based design -- only use the components you need, or
 621     the components whose API you like.
 622   \end{itemize}
 623 \end{frame}
 624
 625 \section{Discussion}
 626
 627 \begin{frame}
 628   \frametitle{Outlook}
 629   \begin{itemize}
 630   \item Semantics and Computability have captured a great deal of
 631     attention in the informatics and business computing R\&D worlds
 632   \item Statistically-driven Decision Making and Knowledge Discovery
 633     is, with high likelihood, the next challenging stage after data
 634     integration.
 635   \item Statistical practice (theory and application) can be enhanced,
 636     made more efficient, providing  increased benefit to organizations
 637     and groups using appropriate methods.
 638   \item Lisp as a language, shares characteristics of both Latin
 639     (difficult dead language useful for classical training) and German
 640     (difficult living language useful for general life).  Of course,
 641     for some people, they are not difficult.
 642   \end{itemize}
 643
 644 \end{frame}
 645
 646 \begin{frame}
 647   The research program described in this talk is currently driving the
 648   design of CommonLisp Stat, which leverages concepts and approaches
 649   from the dead and moribund LispStat project.
 650
 651   \begin{itemize}
 652   \item \url{http://repo.or.cz/w/CommonLispStat.git/}
 653   \item \url{http://www.github.com/blindglobe/}
 654   \end{itemize}
 655
 656 \end{frame}
 657 \begin{frame}{Final Comment}
 658
 659   \begin{itemize}
 660   \item In the Pharma industry, it is all about getting the right
 661     drugs to the patient faster.  Data analysis systems seriously
 662     impact this process, being potentially an impediment or an
 663     accelerator.
 664
 665     \begin{itemize}
 666     \item \alert{Information technologies can increase the efficiency
 667         of statistical practice}, though innovation change management
 668       must be taking into account.  (i.e. Statistical practice, while
 669       considered by some an ``art form'', can benefit from
 670       industrialization).
 671     \item \alert{Lisp's features match the basic requirements we need}
 672       (dichotomy: programs as data, data as programs).  Sales pitch,
 673       though...
 674     \item Outlook: Lots of work and experimentation to do!
 675     \end{itemize}
 676   \item {\tiny Gratuitous Advert: We are hiring, have student
 677       internships (undergrad, grad students), and a visiting faculty
 678       program.  Talk with me if possibly interested.}
 679   \end{itemize}
 680 \end{frame}
 681
 682
 683 % % All of the following is optional and typically not needed.
 684 % \appendix
 685
 686
 687 % \section<presentation>*{\appendixname}
 688
 689
 690 % \begin{frame} \frametitle{Complements and Backup}
 691 %   No more, stop here.  Questions?  (now or later).
 692 % \end{frame}
 693
 694 % \begin{frame}{The Industrial Challenge.}{Getting the Consulting Right.}
 695 %   % - A title should summarize the slide in an understandable fashion
 696 %   %   for anyone how does not follow everything on the slide itself.
 697
 698 %   \begin{itemize}
 699 %   \item Recording assumptions for the next data analyst, reviewer.
 700 %     Use \texttt{itemize} a lot.
 701 %   \item
 702 %     Use very short sentences or short phrases.
 703 %   \end{itemize}
 704 % \end{frame}
 705
 706
 707 % \begin{frame}{The Industrial Challenge.}{Getting the Right Research Fast.}
 708 %   % - A title should summarize the slide in an understandable fashion
 709 %   %   for anyone how does not follow everything on the slide itself.
 710
 711 %   \begin{itemize}
 712 %   \item
 713 %     Use \texttt{itemize} a lot.
 714 %   \item
 715 %     Use very short sentences or short phrases.
 716 %   \end{itemize}
 717 % \end{frame}
 718
 719
 720 % \begin{frame}{Explicating the Work-flow}{QA/QC-based improvements.}
 721
 722
 723 % \end{frame}
 724
 725 % \section{Motivation}
 726
 727 % \subsection{IT Can Speed up Deliverables in Statistical Practice}
 728
 729 % \begin{frame}{Our Generic Work-flow and Life-cycle}
 730 %   {describing most data analytic activities}
 731 %   Workflow:
 732 %   \begin{enumerate}
 733 %   \item Scope out the problem
 734 %   \item Sketch out a potential solution
 735 %   \item Implement until road-blocks appear
 736 %   \item Deliver results
 737 %   \end{enumerate}
 738
 739 %   Lifecycle:
 740 %   \begin{enumerate}
 741 %   \item paper sketch
 742 %   \item 1st e-draft of text/code/date (iterate to \#1, discarding)
 743 %   \item cycle through work
 744 %   \item publish
 745 %   \item ``throw-away''
 746 %   \end{enumerate}
 747 %   but there is valuble information that could enable the next
 748 %   generation!
 749 % \end{frame}
 750
 751 % \begin{frame}[fragile]{Paper $\rightarrow$ Computer  $\rightarrow$ Article $\rightarrow$ Computer}{Cut and Paste makes for large errors.}
 752 %   \begin{itemize}
 753 %   \item Problems in a regulatory setting
 754 %   \item Regulatory issues are just ``best practices''
 755 %   \end{itemize}
 756
 757 %   Why do we ``copy/paste'', or analogously, restart our work?
 758
 759 %   pro:
 760 %   \begin{itemize}
 761 %   \item every time we repeat, we reinforce the idea in our brain
 762 %   \item review of ideas can help improve them
 763 %   \end{itemize}
 764 %   con:
 765 %   \begin{itemize}
 766 %   \item inefficiency
 767 %   \item introduction of mistakes
 768 %   \item loss of historical context
 769 %   \item changes to earlier work (on a different development branch)
 770 %     can not propagate.
 771 %   \end{itemize}
 772 % \end{frame}
 773
 774 % \section{Semantics and Statistical Practice}
 775
 776
 777 % \begin{frame}
 778 %   \frametitle{Statistical Activity Leads to Reports}
 779 %   \framesubtitle{You read what you know, do you understand it?}
 780
 781 %   How can we improve the communication of the ideas we have?
 782
 783 %   Precision of communication?
 784
 785 % \end{frame}
 786
 787
 788
 789 % \begin{frame}  \frametitle{Communication Requires Context}
 790 %   \framesubtitle{Intentions imply more than one might like...}
 791
 792 %   \begin{itemize}
 793 %   \item Consideration of what we might do
 794 %   \item Applications with related functionality
 795 %   \end{itemize}
 796 % \end{frame}
 797
 798
 799
 800 % \begin{frame}
 801 %   \frametitle{Design Patterns}
 802 %   \framesubtitle{Supporting Work-flow Transitions}
 803
 804 %   (joint work with H Wickham): The point of this research program is
 805 %   not to describe what to do at any particular stage of work, but to
 806 %   encourage researchers and practitioners to consider how the
 807 %   translation and transfer of information between stages so that work
 808 %   is not lost.
 809
 810 %   Examples of stages in a work-flow:
 811 %   \begin{itemize}
 812 %   \item planning, execution, reporting;
 813 %   \item scoping, illustrative examples or counter examples, algorithmic construction,
 814 %     article writing.
 815 %   \item descriptive statistics, preliminary inferential analysis,
 816 %     model/assumption checking, final inferential analysis,
 817 %     communication of scientific results
 818 %   \end{itemize}
 819 %   Description of work-flows is essential to initiating discussions on
 820 %   quality/efficiency of approaches to work.
 821 % \end{frame}
 822
 823 % \section{Design Challenges}
 824
 825 % \begin{frame}
 826 %   \frametitle{Activities are enhanced by support}
 827
 828 %   \begin{itemize}
 829 %   \item Mathematical manipulation can be enhanced by symbolic
 830 %     computation
 831 %   \item Statistical programming can be enabled by examples and related
 832 %     algorithm implementation
 833 %   \item Datasets, to a limited extent, can self-describe.
 834 %   \end{itemize}
 835 % \end{frame}
 836
 837 % \begin{frame}
 838 %   \frametitle{Executable and Computable Science}
 839
 840 %   Use of algorithms and construction to describe how things work.
 841
 842 %   Support for agent-based approaches
 843 % \end{frame}
 844
 845
 846 % \begin{frame}
 847 %   \frametitle{What is Data?  Metadata?}
 848
 849 %   Data: what we've observed
 850
 851 %   MetaData: context for observations, enables semantics.
 852 % \end{frame}
 853
 854
 855
 856
 857 % % \begin{frame}[fragile]
 858 % %   \frametitle{Defining Variables}
 859 % %   \framesubtitle{Setting variables}
 860 % % \begin{verbatim}
 861 % % (setq <variable> <value>)
 862 % % \end{verbatim}
 863 % %   Example:
 864 % % \begin{verbatim}
 865 % % (setq ess-source-directory
 866 % %       "/home/rossini/R-src")
 867 % % \end{verbatim}
 868 % % \end{frame}
 869
 870 % % \begin{frame}[fragile]
 871 % %   \frametitle{Defining on the fly}
 872 % % \begin{verbatim}
 873 % % (setq ess-source-directory
 874 % %    (lambda () (file-name-as-directory
 875 % %          (expand-file-name
 876 % %            (concat (default-directory)
 877 % %                    ess-suffix "-src")))))
 878 % % \end{verbatim}
 879 % %   (Lambda-expressions are anonymous functions, i.e. ``instant-functions'')
 880 % % \end{frame}
 881
 882
 883 % % \begin{frame}[fragile]
 884 % %   \frametitle{Function Reuse}
 885 % %   By naming the function, we could make the previous example reusable
 886 % %   (if possible):
 887 % % \begin{verbatim}
 888 % % (defun my-src-directory ()
 889 % %       (file-name-as-directory
 890 % %          (expand-file-name
 891 % %            (concat (default-directory)
 892 % %                    ess-suffix "-src"))))
 893 % % \end{verbatim}
 894 % %   Example:
 895 % % \begin{verbatim}
 896 % % (setq ess-source-directory (my-src-directory))
 897 % % \end{verbatim}
 898 % % \end{frame}
 899
 900
 901 % % \begin{frame}
 902 % %   \frametitle{Equality Among Packages}
 903 % %   \begin{itemize}
 904 % %   \item more/less equal can be described specifically through
 905 % %     overriding imports.
 906 % %   \end{itemize}
 907 % % \end{frame}
 908
 909
 910 % \subsection<presentation>*{For Further Reading}
 911
 912 % \begin{frame}[allowframebreaks]
 913 %   \frametitle<presentation>{Related Material}
 914
 915 %   \begin{thebibliography}{10}
 916
 917 %   \beamertemplatebookbibitems
 918 %   % Start with overview books.
 919
 920 %   \bibitem{LispStat1990}
 921 %     L.~Tierney
 922 %     \newblock {\em LispStat}.
 923
 924 %   \beamertemplatearticlebibitems
 925 %   % Followed by interesting articles. Keep the list short.
 926
 927 %   \bibitem{Rossini2001}
 928 %     AJ.~Rossini
 929 %     \newblock Literate Statistical Practice
 930 %     \newblock {\em Proceedings of the Conference on Distributed
 931 %       Statistical Computing}, 2001.
 932
 933 %   \bibitem{RossiniLeisch2003}
 934 %     AJ.~Rossini and F.~Leisch
 935 %     \newblock Literate Statistical Practice
 936 %     \newblock {\em Technical Report Series, University of Washington
 937 %       Department of Biostatistics}, 2003.
 938
 939 %   \beamertemplatearrowbibitems
 940 %   % Followed by interesting articles. Keep the list short.
 941
 942 %   \bibitem{CLS}
 943 %     Common Lisp Stat, 2008.
 944 %     \newblock \url{http://repo.or.cz/CommonLispStat.git/}
 945
 946 %   \end{thebibliography}
 947 % \end{frame}
 948
 949 \end{document}