1 ######################################################################
2 ## Copyright (C) 2006, Roger D. Peng <rpeng@jhsph.edu>
4 ## This program is free software; you can redistribute it and/or modify
5 ## it under the terms of the GNU General Public License as published by
6 ## the Free Software Foundation; either version 2 of the License, or
7 ## (at your option) any later version.
9 ## This program is distributed in the hope that it will be useful,
10 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 ## GNU General Public License for more details.
14 ## You should have received a copy of the GNU General Public License
15 ## along with this program; if not, write to the Free Software
16 ## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 #####################################################################
20 ######################################################################
21 ## Class 'filehashDB1'
25 ## File format: [key] [nbytes data] [data]
26 ## serialized serialized raw bytes (serialized)
29 ######################################################################
31 ## 'meta' is a list with an element called 'metaEnv'. 'metaEnv' is an
32 ## environment that contains metadata for the database.
34 setClass("filehashDB1",
35 representation(datafile = "character",
36 meta = "list"), ## contains 'metaEnv' element
40 setValidity("filehashDB1",
42 if(!file.exists(object@datafile))
43 return(gettextf("datafile '%s' does not exist",
45 if(is.null(object@meta$metaEnv))
46 return(gettextf("object is missing 'metaEnv' element"))
50 createDB1 <- function(dbName) {
51 if(!hasWorkingFtell())
52 stop("need working 'ftell()' to use 'DB1' format")
53 if(file.exists(dbName)) {
54 message(gettextf("database '%s' already exists", dbName))
57 status <- file.create(dbName)
60 stop(gettextf("unable to create database file '%s'", dbName))
64 makeMetaEnv <- function(filename) {
65 ## Create database map and store in environment. Don't read map
66 ## until you need it; for example, it's not needed for *writing*
68 metaEnv <- new.env(parent = emptyenv())
69 metaEnv$map <- NULL ## 'NULL' indicates the map needs to be read
70 metaEnv$dbfilesize <- file.info(filename)$size
75 initializeDB1 <- function(dbName) {
76 if(!hasWorkingFtell())
77 stop("need working 'ftell()' to use DB1 format")
78 dbName <- normalizePath(dbName)
82 meta = list(metaEnv = makeMetaEnv(dbName)),
83 name = basename(dbName)
88 readKeyMap <- function(con, map = NULL, pos = 0) {
90 ## using 'hash = TRUE' is critical because it can have a major
91 ## impact on performance for large databases
92 map <- new.env(hash = TRUE, parent = emptyenv())
96 stop("'pos' cannot be negative")
97 filename <- path.expand(summary(con)$description)
98 filesize <- file.info(filename)$size
101 stop("'pos' cannot be greater than file size")
102 .Call("read_key_map", filename, map, filesize, pos)
105 readSingleKey <- function(con, map, key) {
109 stop(gettextf("unable to obtain value for key '%s'", key))
111 seek(con, start, rw = "read")
115 readKeys <- function(con, map, keys) {
116 r <- lapply(keys, function(key) readSingleKey(con, map, key))
121 gotoEndPos <- function(con) {
122 ## Move connection to the end
127 writeNullKeyValue <- function(con, key) {
128 writestart <- gotoEndPos(con)
130 handler <- function(cond) {
131 ## Rewind the file back to where writing began and truncate at
133 seek(con, writestart, "start", "write")
137 if(!createLockFile(con))
138 stop("cannot create lock file")
142 len <- as.integer(-1)
144 }, interrupt = handler, error = handler, finally = {
150 writeKeyValue <- function(con, key, value) {
151 writestart <- gotoEndPos(con)
153 handler <- function(cond) {
154 ## Rewind the file back to where writing began and
155 ## truncate at that position; this is probably a bad
156 ## idea for files > 2GB
157 seek(con, writestart, "start", "write")
161 if(!createLockFile(con))
162 stop("cannot create lock file")
166 byteData <- serialize(value, NULL)
167 len <- length(byteData)
170 writeBin(byteData, con)
171 }, interrupt = handler, error = handler, finally = {
177 lockFileName <- function(con) {
178 ## Use 3 underscores for lock file
179 sprintf("%s___LOCK", summary(con)$description)
182 createLockFile <- function(con) {
183 status <- .Call("lock_file", lockFileName(con))
187 deleteLockFile <- function(con) {
188 lockfile <- lockFileName(con)
189 file.remove(lockfile)
192 ######################################################################
193 ## Internal utilities
195 filesize <- gotoEndPos
197 setGeneric("checkMap", function(db, ...) standardGeneric("checkMap"))
199 setMethod("checkMap", "filehashDB1",
200 function(db, filecon, ...) {
201 old.size <- get("dbfilesize", db@meta$metaEnv)
202 cur.size <- tryCatch({
204 }, error = function(err) {
207 size.change <- old.size != cur.size
208 map.orig <- getMap(db)
210 map <- if(is.null(map.orig))
213 readKeyMap(filecon, map.orig, old.size)
216 if(!identical(map, map.orig)) {
217 assign("map", map, db@meta$metaEnv)
218 assign("dbfilesize", cur.size, db@meta$metaEnv)
224 setGeneric("getMap", function(db) standardGeneric("getMap"))
226 setMethod("getMap", "filehashDB1",
228 get("map", db@meta$metaEnv)
231 ######################################################################
232 ## Interface functions
234 setMethod("dbInsert",
235 signature(db = "filehashDB1", key = "character", value = "ANY"),
236 function(db, key, value, ...) {
237 filecon <- try(file(db@datafile, "ab"), silent = TRUE)
239 if(inherits(filecon, "try-error"))
240 stop("unable to open connection to database")
241 on.exit(close(filecon))
242 writeKeyValue(filecon, key, value)
246 signature(db = "filehashDB1", key = "character"),
247 function(db, key, ...) {
248 filecon <- try(file(db@datafile, "rb"), silent = TRUE)
250 if(inherits(filecon, "try-error"))
251 stop("unable to open connection to database")
252 on.exit(close(filecon))
254 checkMap(db, filecon)
257 r <- readKeys(filecon, map, key[1])
261 setMethod("dbMultiFetch",
262 signature(db = "filehashDB1", key = "character"),
263 function(db, key, ...) {
264 filecon <- file(db@datafile, "rb")
265 on.exit(close(filecon))
267 checkMap(db, filecon)
270 readKeys(filecon, map, key)
273 setMethod("[", signature(x = "filehashDB1", i = "character", j = "missing",
275 function(x, i , j, drop) {
279 setMethod("dbExists", signature(db = "filehashDB1", key = "character"),
280 function(db, key, ...) {
285 setMethod("dbList", "filehashDB1",
287 filecon <- file(db@datafile, "rb")
288 on.exit(close(filecon))
290 checkMap(db, filecon)
296 keys <- as.list(map, all.names = TRUE)
297 use <- !sapply(keys, is.null)
302 setMethod("dbDelete", signature(db = "filehashDB1", key = "character"),
303 function(db, key, ...) {
304 filecon <- file(db@datafile, "ab")
305 on.exit(close(filecon))
307 writeNullKeyValue(filecon, key)
310 setMethod("dbUnlink", "filehashDB1",
312 file.remove(db@datafile)
315 reorganizeDB <- function(db, ...) {
316 datafile <- db@datafile
318 ## Find a temporary file name
319 tempdata <- paste(datafile, "Tmp", sep = "")
321 while(file.exists(tempdata)) {
323 tempdata <- paste(datafile, "Tmp", i, sep = "")
325 if(!dbCreate(tempdata, type = "DB1")) {
326 warning("could not create temporary database")
329 on.exit(file.remove(tempdata))
331 tempdb <- dbInit(tempdata, type = "DB1")
334 ## Copy all keys to temporary database
335 nkeys <- length(keys)
336 cat("Reorganizing database: ")
338 for(i in seq_along(keys)) {
340 msg <- sprintf("%d%% (%d/%d)", round (100 * i / nkeys),
344 dbInsert(tempdb, key, dbFetch(db, key))
346 back <- paste(rep("\b", nchar(msg)), collapse = "")
350 status <- file.rename(tempdata, datafile)
352 if(!isTRUE(status)) {
354 warning("temporary database could not be renamed and is left in ",
359 message("Finished; reload database with 'dbInit'")
363 setMethod("dbReorganize", "filehashDB1", reorganizeDB)
366 ################################################################################
367 ## Test system's ftell()
369 hasWorkingFtell <- function() {
371 con <- file(tfile, "wb")
381 offset <- end - begin
389 ######################################################################