1 #+OPTIONS: H:3 num:nil toc:2 \n:nil ::t |:t ^:{} -:t f:t *:t tex:t d:(HIDE) tags:not-in-toc
3 #+AUTHOR: Charles C. Berry
4 #+EMAIL: cberry@tajo.ucsd.edu
5 #+DATE: 2010-12-08 Thur
6 #+DESCRIPTION: R Package Development Helpers
8 #+PROPERTY: exports none
11 <div id="subtitle" style="float: center; text-align: center;">
13 Org-babel support for building
14 <a href="http://www.r-project.org/">R</a> packages
17 <a href="http://www.r-project.org/">
18 <img src="http://www.r-project.org/Rlogo.jpg"/>
27 This document contains
28 - tools useful for writing R extensions called /packages/
29 - source code to create a simple R package.
33 - The [[http://www.r-project.org][R language and environment]] for statistical computation and
34 graphics has a powerful system for developing and distributing
35 software enhancements and datasets called /packages/.
36 - A vast archive of such packages ---called [[http://cran.r-project.org/][CRAN]] --- is available.
37 - Users can create their own packages by following instructions in
38 [[http://cran.r-project.org/doc/manuals/R-exts.html][Writing R Extensions]].
40 * Some notes on this document and =org-babel=
42 - This document provides tools for R package development using org-mode.
44 - There are two somewhat contrary philosophies about how R packages are
45 managed using =org-babel=.
46 - One camp holds that all of the code for a package should be kept
47 in one master =*.org= document, which when tangled produces the source
48 directory files needed. The =.org= document also holds notes, utility
49 functions, navigation tools, and code snippets. A very simple R
50 package is included below, and it can be checked, installed, and
51 run from this =.org= document.
52 - The other camp leaves the =R= and =Rd= code and other package
53 files in the package directory subfolders and edits them
55 - The tools shown here support either approach.
57 - Some introductory tips at [[file:../languages/ob-doc-R.org][ob-doc-R]] show how to enable full editing
58 support for R code with ESS (http://ess.r-project.org/).
60 - This document is to be put in the top level source directory of an R
61 package (i.e. at the same level as the DESCRIPTION file). To try it
62 out using the built in package, create a fresh diretory named
63 =countRows= and just put it there.
65 - version control blocks here use svn calls, and you may need to
66 replace these with your own.
68 - =#+begin_src sh ... #+end_src= shell blocks work on systems that
69 support unix-like shells. On Windows systems these blocks would
70 likely need to be changed.
73 - [[https://git.sr.ht/~bzg/worg/tree/master/item/org-contrib/babel/examples/Rpackage.org][Download]] the =.org= version of this document
74 - Create a package directory (naming it like the package is convenient)
75 - Copy the =.org= version of this document into that directory
76 - Move point to the =set up .Rbuildignore= headline and execute it
78 - Create some package files, *or* create src blocks as outlined in
79 this document and run =org-babel-tangle= to create the package
83 - INSTALL the package[[howrun][1]] or
84 - check the package[[howrun][1]]
85 - Load some code (i.e. for a function) using ESS and try it out.
86 - Inspect a formatted help page
87 - Edit the code. Re-tangle as, and if, needed.
88 - Once the package is ready, build it or INSTALL it to a permanent
91 : 1. moving point to the corresponding headline, then
92 : typing 'C-c C-v C-s y' or
93 : 'M-x org-babel-execute-subtree'
94 : will execute each tool.
100 - Environment variables like these may be added in the next src block:
101 - =export R_LIBS=Rlib=
102 - =export R_ARCH=x86_64=
104 #+begin_src sh :results output
106 cd ..; R CMD check $CWD | sed 's/^*/ */'
109 : #+begin_src sh :results output
111 : cd ..; R CMD check $CWD | sed 's/^*/ */'
116 - customize the =rckopts= variable, possibly "rckopts="
117 - Variables may be also added next src block
118 -- =export R_ARCH=x86_64=
120 #+begin_src sh :results output :var rckopts="--library=./Rlib"
122 cd ..; R CMD INSTALL $rckopts $CWD
125 : #+begin_src sh :results output :var rckopts="--library=./Rlib"
127 : cd ..; R CMD INSTALL $rckopts $CWD
132 #+begin_src sh :results output
134 cd ..; R CMD build $CWD
137 : #+begin_src sh :results output
139 : cd ..; R CMD build $CWD
144 - The src block adds enough asterisks to the line listing each
145 filename to turn it into a headline at the next level down. This is
146 helpful if you have a lot of help pages and want to fold them up for
149 #+begin_src R :results output :var hdlev=(car (org-heading-components))
150 linestart <- paste( c( "\n", rep('*', hdlev+1 ) ), collapse='')
151 rd.files <- Sys.glob("man/*.Rd")
152 for ( ird in rd.files ){
153 hlp.txt <- capture.output(tools:::Rd2txt( ird ) )
154 hlp.txt <- gsub( "_\b","", hlp.txt)
155 headline <- paste( linestart, ird ,'\n' )
156 cat( headline, hlp.txt , sep='\n')
160 : #+begin_src R :results output :var hdlev=(car (org-heading-components))
161 : linestart <- paste( c( "\n", rep('*', hdlev+1 ) ), collapse='')
162 : rd.files <- Sys.glob("man/*.Rd")
163 : for ( ird in rd.files ){
164 : hlp.txt <- capture.output(tools:::Rd2txt( ird ) )
165 : hlp.txt <- gsub( "_\b","", hlp.txt)
166 : headline <- paste( linestart, ird ,'\n' )
167 : cat( headline, hlp.txt , sep='\n')
174 #+begin_src R :session :results output :var libname=(file-name-directory buffer-file-name)
175 ## customize the next line as needed:
176 .libPaths(new = file.path(getwd(),"Rlib") )
177 require( basename(libname), character.only=TRUE)
180 - this loads the library into an R session
181 - customize or delete the =.libPaths= line as desired
184 : #+begin_src R :session :var libname=(file-name-directory buffer-file-name)
185 : .libPaths(new = file.path(getwd(),"Rlib") )
186 : require( basename(libname), character.only=TRUE)
191 - if you keep all your source code in this =.org= document, then you do not
192 need to do this - instead just type =C-s require(=
193 - list package dependencies that might need to be dealt with
195 #+begin_src sh :results output
199 : #+begin_src sh :results output
200 : grep 'require(' R/*
203 ** set up .Rbuildignore and man, R, and Rlib directories
205 - This document sits in the top level source directory. So, ignore it
206 and its offspring when checking, installing and building.
207 - List all files to ignore under =#+results: rbi= (including this
208 one!). Regular expressions are allowed.
209 - Rlib is optional. If you want to INSTALL in the system directory,
216 Only need to run this once (unless you add more ignorable files).
218 #+begin_src R :results output silent :var rbld=rbi
219 cat(rbld,'\n', file=".Rbuildignore")
222 dir.create("../Rlib")
225 : #+begin_src R :results output silent :var rbld=rbi
226 : cat(rbld,'\n', file=".Rbuildignore")
229 : dir.create("../Rlib")
232 * Project Specific Entries
234 Package specific notes and blocks go here. It is a good idea to have
235 several second level headlines --- possibly including the package code
236 --- to group things by topic/idea, then a third level headline for
237 almost every src block and TODO item.
239 ** Package structure and src languages :ARCHIVE:noexport:
241 - The top level directory may contain these files (and others):
243 | filename | filetype |
244 |-------------+---------------|
246 | NAMESPACE | R-like script |
247 | configure | Bourne shell |
248 | cleanup | Bourne shell |
253 | DESCRIPTION | [[http://www.debian.org/doc/debian-policy/ch-controlfields.html][DCF]] |
254 |-------------+---------------|
259 | direname | types of files |
260 |----------+--------------------------------------------------|
268 | src | .c, .cc or .cpp, .f, .f90, .f95, .m, .mm, .M, .h |
270 |----------+--------------------------------------------------|
272 ** Example: The countRows package
274 - This example illustrates how to use the =.org= document as the source code
275 master. By navigating to the =INSTALL package= headline and entering
276 =C-c C-v C-s y=, the INSTALL command is run. Likewise for =check
277 package=, =help pages=, and the other tools.
279 - The =countRows= package implements a simple, but quick way to count the rows of
280 a =data.frame=. It is akin to =sort | uniq -c= in a Unix-alike shell.
282 - The package is based on a function that was posted in this [[https://stat.ethz.ch/pipermail/r-help/2008-January/151489.html][reply]] to
283 a [[https://stat.ethz.ch/pipermail/r-help/2008-January/151372.html][query]] on the R-help list.
285 *** The DESCRIPTION File
287 - The DESCRIPTION file is obligatory
288 - It follows Debian Control File format.
289 - Required and optional fields are described in [[http://cran.r-project.org/doc/manuals/R-exts.html][Writing R Extensions]].
291 #+begin_src sh :results silent :tangle DESCRIPTION :eval nil
294 Title: Count Rows of a data.frame
297 Author: Charles C. Berry
298 Maintainer: Charles Berry <cberry@tajo.ucsd.edu>
299 Description: One of many ways to count the rows of a data.frame.
300 Akin to 'sort | uniq -c' shell command
305 : #+begin_src sh :results silent :tangle DESCRIPTION :eval nil
308 : Title: Count Rows of a data.frame
311 : Author: Charles C. Berry
312 : Maintainer: Charles Berry <cberry@tajo.ucsd.edu>
313 : Description: One of many ways to count the rows of a data.frame.
314 : Akin to 'sort | uniq -c' shell command
321 - Each =#+begin_src R= block defines one or more functions.
322 - The =:tangle= header tells where to place the code
324 **** count.rows function
326 #+begin_src R :eval nil :tangle R/count.rows.R
330 order.x <- do.call( order, as.data.frame(x) )
332 rowSums( x[tail(order.x,-1),] != x[head(order.x,-1),] )==0
333 tf.runs <- rle(equal.to.previous)
335 unlist(mapply( function(x,y) if (y) x+1 else (rep(1,x)),
336 tf.runs$length, tf.runs$value )))
337 counts <- counts[ c( diff( counts ) <= 0, TRUE ) ]
338 unique.rows <- which( c(TRUE, !equal.to.previous ) )
339 cbind( counts, x[ order.x[ unique.rows ], ,drop=FALSE ] )
344 : #+begin_src R :eval nil :exports code :tangle R/count.rows.R
348 : order.x <- do.call( order, as.data.frame(x) )
349 : equal.to.previous <-
350 : rowSums( x[tail(order.x,-1),] != x[head(order.x,-1),] )==0
351 : tf.runs <- rle(equal.to.previous)
353 : unlist(mapply( function(x,y) if (y) x+1 else (rep(1,x)),
354 : tf.runs$length, tf.runs$value )))
355 : counts <- counts[ c( diff( counts ) <= 0, TRUE ) ]
356 : unique.rows <- which( c(TRUE, !equal.to.previous ) )
357 : cbind( counts, x[ order.x[ unique.rows ], ,drop=FALSE ] )
361 *** Rd help page markup
363 - There is usually one =#+begin_src Rd= block for each help page
364 - Usually one page covers the package as a whole and other cover the
365 functions and datasets it includes.
368 #+begin_src Rd :eval nil :tangle man/count.rows.Rd
371 \title{ Count \code{data.frame} rows }
372 \description{ Counts the unique rows of a \code{data.frame} }
373 \usage{ count.rows(x) }
376 Just a \code{data.frame} or \code{matrix}
380 Basically, this function tries to be smart about counting
381 rows. It relies on the \code{\link{order}} function and basic logic to
382 do the heavy lifting.
385 A \code{data.frame} with a column named \code{counts}, all the olumns
386 of \code{x} and the rows that would appear in \code{unique( x )}.
389 Charles C. Berry \email{ccberry@ucsd.tajo.edu }
392 hec.frame <- as.data.frame( HairEyeColor )
394 hec.frame[ rep(1:nrow(hec.frame), hec.frame$Freq ), ]
395 hec.counts <- count.rows( hec.frame )
396 all.equal( hec.counts$counts, hec.counts$Freq )
403 : #+begin_src Rd :eval nil :tangle man/count.rows.Rd
406 : \title{ Count \code{data.frame} rows }
407 : \description{ Counts the unique rows of a \code{data.frame} }
408 : \usage{ count.rows(x) }
411 : Just a \code{data.frame} or \code{matrix}
415 : Basically, this function tries to be smart about counting
416 : rows. It relies on the \code{\link{order}} function and basic logic to
417 : do the heavy lifting.
420 : A \code{data.frame} with a column named \code{counts}, all the olumns
421 : of \code{x} and the rows that would appear in \code{unique( x )}.
424 : Charles C. Berry \email{ccberry@ucsd.tajo.edu }
427 : hec.frame <- as.data.frame( HairEyeColor )
429 : hec.frame[ rep(1:nrow(hec.frame), hec.frame$Freq ), ]
430 : hec.counts <- count.rows( hec.frame )
431 : all.equal( hec.counts$counts, hec.counts$Freq )
438 **** countRows-package
440 #+begin_src Rd :eval nil :tangle man/countRows-package.Rd
441 \name{countRows-package}
442 \alias{countRows-package}
445 \title{Count \code{data.frame} rows }
446 \description{ Counts the unique rows of a \code{data.frame} }
449 Package: \tab countRows\cr
450 Type: \tab Package\cr
452 Date: \tab 2010-12-08\cr
453 License: \tab GPL-3\cr
454 LazyLoad: \tab yes\cr
457 There is only one function in this package, \code{count.rows} and it
461 Charles C. Berry \email{cberry@ucsd.tajo.edu}
466 : #+begin_src Rd :eval nil :tangle man/countRows-package.Rd
467 : \name{countRows-package}
468 : \alias{countRows-package}
471 : \title{Count \code{data.frame} rows }
472 : \description{ Counts the unique rows of a \code{data.frame} }
475 : Package: \tab countRows\cr
476 : Type: \tab Package\cr
477 : Version: \tab 1.0\cr
478 : Date: \tab 2010-12-08\cr
479 : License: \tab GPL-3\cr
480 : LazyLoad: \tab yes\cr
483 : There is only one function in this package, \code{count.rows} and it
487 : Charles C. Berry \email{cberry@ucsd.tajo.edu}
489 : \keyword{ package }
492 *** Tests and Tryouts
494 - As part of developing a package one must try out some code and
495 perhaps develop some tests to be sure it does what it is supposed to
497 - Here is an easy-to-read tryout of the =count.rows= function:
498 - You may need to edit or delete the =.libPaths= call to suit your
501 : #+begin_src R :session :results output :exports both
502 : .libPaths( new = "./Rlib")
503 : require( countRows )
504 : simple.df <- data.frame( diag(1:4), row.names=letters[ 1:4 ])
505 : repeated.df <- simple.df[ rep( 1:4, 4:1 ), ]
507 : count.rows( repeated.df )
510 #+begin_src R :session :results output :exports results
511 .libPaths( new = "./Rlib")
513 simple.df <- data.frame( diag(1:4), row.names=letters[ 1:4 ])
514 repeated.df <- simple.df[ rep( 1:4, 4:1 ), ]
516 count.rows( repeated.df )
521 Loading required package: countRows
534 * Version Control, Navigation, and setup tasks
535 ** list files for convenient navigation
537 - Use this if you do not use the =.org= document to keep the master for the
539 - It is useful when in a terminal window on a remote machine, and speedbar
540 is not a good option. =C-u C-c C-o= or =Mouse-1= will open the file
543 #+begin_src R :results output verbatim :var cwd="."
544 cat(paste("file:",list.files(cwd,".*",recursive=TRUE),sep=''),sep='\n')
547 : #+begin_src R :results output verbatim :var cwd="."
548 : cat(paste("file:",list.files(cwd,".*",recursive=TRUE),sep=''),sep='\n')
551 ** Speedbar navigation
553 - Use this if you do not use the =.org= document to keep the master for the
555 - Make speedbar stick to the package source directory by typing 't' in
556 its frame after executing this block:
558 #+begin_src emacs-lisp :results output silent
560 (ess-S-initialize-speedbar)
561 ;; uncomment this line if it isn't in ~/.emacs:
562 ;; (add-to-list 'auto-mode-alist '("\\.Rd\\'" . Rd-mode))
563 (speedbar-add-supported-extension ".Rd")
564 (speedbar-add-supported-extension "NAMESPACE")
565 (speedbar-add-supported-extension "DESCRIPTION")
571 : #+begin_src emacs-lisp :results output silent
572 : (require 'speedbar)
573 : (ess-S-initialize-speedbar)
574 : ;; uncomment this line if it isn't in ~/.emacs:
575 : ;; (add-to-list 'auto-mode-alist '("\\.Rd\\'" . Rd-mode))
576 : (speedbar-add-supported-extension ".Rd")
577 : (speedbar-add-supported-extension "NAMESPACE")
578 : (speedbar-add-supported-extension "DESCRIPTION")
585 - If you don't use svn, substitute the relevant version control
586 command in each block in this section
588 - Each of these can be run by putting point on the headline then
589 keying =C-c C-v C-s y=
591 - Possibly add --username=<> --password=<> to the svn commands
595 - Show what files are version controlled
597 #+begin_src sh :results output
601 : #+begin_src sh :results output
602 : svn list --recursive
607 - Use at the start of each session to sync changes from other machines
609 #+begin_src sh :results output
613 : #+begin_src sh :results output
619 - At the end of a day's work commit the changes
621 #+begin_src sh :results output
622 svn commit -m "edits"
625 : #+begin_src sh :results output
626 : svn commit -m "edits"