1 ######################################################################
2 ## Copyright (C) 2006, Roger D. Peng <rpeng@jhsph.edu>
4 ## This program is free software; you can redistribute it and/or modify
5 ## it under the terms of the GNU General Public License as published by
6 ## the Free Software Foundation; either version 2 of the License, or
7 ## (at your option) any later version.
9 ## This program is distributed in the hope that it will be useful,
10 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 ## GNU General Public License for more details.
14 ## You should have received a copy of the GNU General Public License
15 ## along with this program; if not, write to the Free Software
16 ## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 #####################################################################
20 ######################################################################
21 ## Class 'filehashDB1'
25 ## File format: [key] [nbytes data] [data]
26 ## serialized serialized raw bytes (serialized)
29 ######################################################################
33 ## 'meta' is a list with an element called 'metaEnv'. 'metaEnv' is an
34 ## environment that contains metadata for the database.
36 setClass("filehashDB1",
37 representation(datafile = "character",
39 meta = "list"), ## contains 'metaEnv' element
43 setValidity("filehashDB1",
45 if(!file.exists(object@datafile))
46 return(gettextf("datafile '%s' does not exist", datafile))
47 if(is.null(object@meta$metaEnv))
48 return(gettextf("object is missing 'metaEnv' element"))
52 createDB1 <- function(dbName) {
53 if(!hasWorkingFtell())
54 stop("need working 'ftell()' to use DB1 format")
55 if(!file.exists(dbName))
56 createEmptyFile(dbName)
58 message(gettextf("database '%s' already exists", dbName))
62 setConnectionFinalizer <- function(metaEnv, con) {
63 conList <- list(con = unclass(con))
64 metaEnv$conList <- conList
66 reg.finalizer(metaEnv, function(env) {
67 conList <- get("conList", env)
68 for(i in seq(along = conList)) {
69 con <- getConnection(conList[[i]])
71 if(!is.null(con) && isOpen(con))
77 makeMetaEnv <- function(con) {
78 ## Create database map and store in environment. Don't read map
79 ## until you need it; for example, it's not needed for *writing*
81 metaEnv <- new.env(parent = emptyenv())
82 metaEnv$map <- NULL ## 'NULL' indicates the map needs to be read
83 filename <- summary(con)$description
84 metaEnv$dbfilesize <- file.info(filename)$size
86 ## This list stores the connection number for the file connection.
87 ## Store the connection list in an environment and register a
88 ## finalizer to close the connection when the environment is
90 setConnectionFinalizer(metaEnv, con)
94 initializeDB1 <- function(dbName) {
95 if(!hasWorkingFtell())
96 stop("need working 'ftell()' to use DB1 format")
97 dbName <- normalizePath(dbName)
100 }, error = function(err) {
101 message("database will be opened with read-only access")
104 metaEnv <- makeMetaEnv(con)
106 new("filehashDB1", datafile = dbName,
108 meta = list(metaEnv = metaEnv),
109 name = basename(dbName)
114 findEndPos <- function(con) {
119 readKeyMap <- function(con, map = NULL, pos = 0) {
121 map <- new.env(hash = TRUE, parent = emptyenv())
124 seek(con, pos, "start", "read")
127 while(!inherits(status, "condition")) {
129 key <- unserialize(con)
130 datalen <- unserialize(con)
131 pos <- seek(con, rw = "read") ## Update position
134 ## Negative values of 'datalen' indicate deleted keys so only
135 ## record positive 'datalen' values
138 ## Fast forward to the next key
139 seek(con, datalen, "current", "read")
143 ## Key is deleted; there is no data after it
144 if(exists(key, map, inherits = FALSE))
145 remove(list = key, pos = map)
148 }, error = function(err) {
155 convertDB1 <- function(old, new) {
157 newdb <- dbInit(new, "DB1")
159 con <- file(old, "rb")
162 endpos <- findEndPos(con)
165 while(pos < endpos) {
166 keylen <- readBin(con, "numeric", endian = "little")
167 key <- rawToChar(readBin(con, "raw", keylen))
168 datalen <- readBin(con, "numeric", endian = "little")
169 value <- unserialize(con)
171 dbInsert(newdb, key, value)
177 readSingleKey <- function(con, map, key) {
181 stop(gettextf("'%s' not in database", key))
183 seek(con, start, rw = "read")
187 readKeys <- function(con, map, keys) {
188 r <- lapply(keys, function(key) readSingleKey(con, map, key))
193 writeNullKeyValue <- function(con, key) {
194 writestart <- findEndPos(con)
196 handler <- function(cond) {
197 ## Rewind the file back to where writing began and truncate at
199 seek(con, writestart, "start", "write")
212 }, interrupt = handler, error = handler, finally = {
223 writeKey <- function(con, key) {
228 writeKeyValue <- function(con, key, value) {
229 writestart <- findEndPos(con)
231 handler <- function(cond) {
232 ## Rewind the file back to where writing began and truncate at
233 ## that position; this is probably a bad idea for files > 2GB
234 seek(con, writestart, "start", "write")
245 ## Serialize data to raw bytes
246 byteData <- toBytes(value)
248 ## Write out length of data
249 len <- length(byteData)
253 writeBin(byteData, con)
254 }, interrupt = handler, error = handler, finally = {
265 lockFileName <- function(con) {
266 ## Use 3 underscores for lock file
267 paste(summary(con)$description, "LOCK", sep = "___")
270 createLockFile <- function(con) {
271 lockfile <- lockFileName(con)
272 createEmptyFile(lockfile)
275 deleteLockFile <- function(con) {
277 lockfile <- lockFileName(con)
278 file.remove(lockfile)
282 isLocked <- function(con) {
283 lockfile <- lockFileName(con)
284 isTRUE( file.exists(lockfile) )
287 ######################################################################
288 ## Internal utilities
290 filesize <- findEndPos
292 setGeneric("checkMap", function(db) standardGeneric("checkMap"))
294 setMethod("checkMap", "filehashDB1",
296 old.size <- get("dbfilesize", db@meta$metaEnv)
297 ## cur.size <- file.info(db@datafile)$size
298 cur.size <- tryCatch({
300 }, error = function(err) {
303 size.change <- old.size != cur.size
304 map.orig <- getMap(db)
306 map <- if(is.null(map.orig))
307 readKeyMap(db@filecon)
309 readKeyMap(db@filecon, map.orig, old.size)
313 if(!identical(map, map.orig)) {
314 assign("map", map, db@meta$metaEnv)
315 assign("dbfilesize", cur.size, db@meta$metaEnv)
321 setGeneric("getMap", function(db) standardGeneric("getMap"))
323 setMethod("getMap", "filehashDB1",
325 get("map", db@meta$metaEnv)
328 ######################################################################
329 ## Interface functions
331 setMethod("dbReconnect", "filehashDB1",
334 db@filecon <- tryCatch({
335 file(db@datafile, "a+b")
336 }, error = function(err) {
337 file(db@datafile, "rb")
339 db@meta <- list(metaEnv = makeMetaEnv(db@filecon))
343 setMethod("dbFirst", "filehashDB1",
345 seek(db@filecon, 0, origin = "start", rw = "read")
348 setMethod("dbNext", "filehashDB1",
350 pos <- seek(db@filecon, rw = "read")
354 ## Search for next (non-deleted) key/value pair
356 key <- unserialize(db@filecon)
357 size <- unserialize(db@filecon)
360 val <- unserialize(db@filecon)
365 }, error = function(err) {
366 seek(db@filecon, pos, "start", rw = "read")
372 setMethod("dbInsert",
373 signature(db = "filehashDB1", key = "character", value = "ANY"),
374 function(db, key, value, ...) {
375 writeKeyValue(db@filecon, key, value)
379 signature(db = "filehashDB1", key = "character"),
380 function(db, key, ...) {
383 r <- readKeys(db@filecon, map, key[1])
387 setMethod("dbMultiFetch",
388 signature(db = "filehashDB1", key = "character"),
389 function(db, key, ...) {
392 readKeys(db@filecon, map, key)
395 setMethod("[", signature(x = "filehashDB1", i = "character", j = "missing",
397 function(x, i , j, drop) {
401 setMethod("dbExists", signature(db = "filehashDB1", key = "character"),
402 function(db, key, ...) {
407 setMethod("dbList", "filehashDB1",
414 names(as.list(map, all.names = TRUE))
417 setMethod("dbDelete", signature(db = "filehashDB1", key = "character"),
418 function(db, key, ...) {
419 writeNullKeyValue(db@filecon, key)
422 setMethod("dbUnlink", "filehashDB1",
424 file.remove(db@datafile)
427 setMethod("dbDisconnect", "filehashDB1",
429 if(isOpen(db@filecon))
433 setMethod("dbReorganize", "filehashDB1",
435 datafile <- db@datafile
437 ## Find a temporary file name
438 tempdata <- paste(datafile, "Tmp", sep = "")
440 while(file.exists(tempdata)) {
442 tempdata <- paste(datafile, "Tmp", i, sep = "")
444 if(!dbCreate(tempdata, type = "DB1")) {
445 warning("could not create temporary database")
448 on.exit(file.remove(tempdata))
450 tempdb <- dbInit(tempdata, type = "DB1")
453 ## Copy all keys to temporary database
455 dbInsert(tempdb, key, dbFetch(db, key))
459 status <- file.rename(tempdata, datafile)
461 if(!isTRUE(status)) {
463 warning("temporary database could not be renamed and is left in ",
467 message("original database has been disconnected; ",
468 "reload with 'dbInit'")
473 ######################################################################
474 ## Test system's ftell()
476 hasWorkingFtell <- function() {
478 con <- file(tfile, "wb")
488 offset <- end - begin
496 ######################################################################