1 ######################################################################
2 ## Copyright (C) 2006, Roger D. Peng <rpeng@jhsph.edu>
4 ## This program is free software; you can redistribute it and/or modify
5 ## it under the terms of the GNU General Public License as published by
6 ## the Free Software Foundation; either version 2 of the License, or
7 ## (at your option) any later version.
9 ## This program is distributed in the hope that it will be useful,
10 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 ## GNU General Public License for more details.
14 ## You should have received a copy of the GNU General Public License
15 ## along with this program; if not, write to the Free Software
16 ## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 #####################################################################
20 ######################################################################
21 ## Class 'filehashDB1'
25 ## File format: [key] [nbytes data] [data]
26 ## serialized serialized raw bytes (serialized)
29 ######################################################################
31 ## 'meta' is a list with an element called 'metaEnv'. 'metaEnv' is an
32 ## environment that contains metadata for the database.
34 setClass("filehashDB1",
35 representation(datafile = "character",
36 meta = "list"), ## contains 'metaEnv' element
40 setValidity("filehashDB1",
42 if(!file.exists(object@datafile))
43 return(gettextf("datafile '%s' does not exist", datafile))
44 if(is.null(object@meta$metaEnv))
45 return(gettextf("object is missing 'metaEnv' element"))
49 createDB1 <- function(dbName) {
50 if(!hasWorkingFtell())
51 stop("need working 'ftell()' to use 'DB1' format")
52 if(!file.exists(dbName))
55 message(gettextf("database '%s' already exists", dbName))
59 makeMetaEnv <- function(filename) {
60 ## Create database map and store in environment. Don't read map
61 ## until you need it; for example, it's not needed for *writing*
63 metaEnv <- new.env(parent = emptyenv())
64 metaEnv$map <- NULL ## 'NULL' indicates the map needs to be read
65 metaEnv$dbfilesize <- file.info(filename)$size
70 initializeDB1 <- function(dbName) {
71 if(!hasWorkingFtell())
72 stop("need working 'ftell()' to use DB1 format")
73 dbName <- normalizePath(dbName)
77 meta = list(metaEnv = makeMetaEnv(dbName)),
78 name = basename(dbName)
83 findEndPos <- function(con) {
88 readKeyMap <- function(con, map = NULL, pos = 0) {
90 map <- new.env(hash = TRUE, parent = emptyenv())
93 seek(con, pos, "start", "read")
96 while(!inherits(status, "condition")) {
98 key <- unserialize(con)
99 datalen <- unserialize(con)
100 pos <- seek(con, rw = "read") ## Update position
103 ## Negative values of 'datalen' indicate deleted keys so only
104 ## record positive 'datalen' values
107 ## Fast forward to the next key
108 seek(con, datalen, "current", "read")
112 ## Key is deleted; there is no data after it
113 if(exists(key, map, inherits = FALSE))
114 remove(list = key, pos = map)
117 }, error = function(err) {
124 convertDB1 <- function(old, new) {
126 newdb <- dbInit(new, "DB1")
128 con <- file(old, "rb")
131 endpos <- findEndPos(con)
134 while(pos < endpos) {
135 keylen <- readBin(con, "numeric", endian = "little")
136 key <- rawToChar(readBin(con, "raw", keylen))
137 datalen <- readBin(con, "numeric", endian = "little")
138 value <- unserialize(con)
140 dbInsert(newdb, key, value)
146 readSingleKey <- function(con, map, key) {
150 stop(gettextf("'%s' not in database", key))
152 seek(con, start, rw = "read")
156 readKeys <- function(con, map, keys) {
157 r <- lapply(keys, function(key) readSingleKey(con, map, key))
162 writeNullKeyValue <- function(con, key) {
163 writestart <- findEndPos(con)
165 handler <- function(cond) {
166 ## Rewind the file back to where writing began and truncate at
168 seek(con, writestart, "start", "write")
179 len <- as.integer(-1)
181 }, interrupt = handler, error = handler, finally = {
192 writeKey <- function(con, key) {
197 writeKeyValue <- function(con, key, value) {
198 writestart <- findEndPos(con)
200 handler <- function(cond) {
201 ## Rewind the file back to where writing began and truncate at
202 ## that position; this is probably a bad idea for files > 2GB
203 seek(con, writestart, "start", "write")
214 ## Serialize data to raw bytes
215 byteData <- serialize(value, NULL)
217 ## Write out length of data
218 len <- length(byteData)
222 writeBin(byteData, con)
223 }, interrupt = handler, error = handler, finally = {
234 lockFileName <- function(con) {
235 ## Use 3 underscores for lock file
236 paste(summary(con)$description, "LOCK", sep = "___")
239 createLockFile <- function(con) {
240 lockfile <- lockFileName(con)
241 file.create(lockfile)
244 deleteLockFile <- function(con) {
246 lockfile <- lockFileName(con)
247 file.remove(lockfile)
251 isLocked <- function(con) {
252 lockfile <- lockFileName(con)
253 isTRUE( file.exists(lockfile) )
256 ######################################################################
257 ## Internal utilities
259 filesize <- findEndPos
261 setGeneric("checkMap", function(db, ...) standardGeneric("checkMap"))
263 setMethod("checkMap", "filehashDB1",
264 function(db, filecon, ...) {
265 old.size <- get("dbfilesize", db@meta$metaEnv)
266 cur.size <- tryCatch({
268 }, error = function(err) {
271 size.change <- old.size != cur.size
272 map.orig <- getMap(db)
274 map <- if(is.null(map.orig))
277 readKeyMap(filecon, map.orig, old.size)
281 if(!identical(map, map.orig)) {
282 assign("map", map, db@meta$metaEnv)
283 assign("dbfilesize", cur.size, db@meta$metaEnv)
289 setGeneric("getMap", function(db) standardGeneric("getMap"))
291 setMethod("getMap", "filehashDB1",
293 get("map", db@meta$metaEnv)
296 ######################################################################
297 ## Interface functions
299 setMethod("dbInsert",
300 signature(db = "filehashDB1", key = "character", value = "ANY"),
301 function(db, key, value, ...) {
302 filecon <- file(db@datafile, "ab")
303 on.exit(close(filecon))
304 writeKeyValue(filecon, key, value)
308 signature(db = "filehashDB1", key = "character"),
309 function(db, key, ...) {
310 filecon <- file(db@datafile, "rb")
311 on.exit(close(filecon))
313 checkMap(db, filecon)
316 r <- readKeys(filecon, map, key[1])
320 setMethod("dbMultiFetch",
321 signature(db = "filehashDB1", key = "character"),
322 function(db, key, ...) {
323 filecon <- file(db@datafile, "rb")
324 on.exit(close(filecon))
326 checkMap(db, filecon)
329 readKeys(filecon, map, key)
332 setMethod("[", signature(x = "filehashDB1", i = "character", j = "missing",
334 function(x, i , j, drop) {
338 setMethod("dbExists", signature(db = "filehashDB1", key = "character"),
339 function(db, key, ...) {
344 setMethod("dbList", "filehashDB1",
346 filecon <- file(db@datafile, "rb")
347 on.exit(close(filecon))
349 checkMap(db, filecon)
355 names(as.list(map, all.names = TRUE))
358 setMethod("dbDelete", signature(db = "filehashDB1", key = "character"),
359 function(db, key, ...) {
360 filecon <- file(db@datafile, "ab")
361 on.exit(close(filecon))
363 writeNullKeyValue(filecon, key)
366 setMethod("dbUnlink", "filehashDB1",
368 file.remove(db@datafile)
371 setMethod("dbReorganize", "filehashDB1",
373 datafile <- db@datafile
375 ## Find a temporary file name
376 tempdata <- paste(datafile, "Tmp", sep = "")
378 while(file.exists(tempdata)) {
380 tempdata <- paste(datafile, "Tmp", i, sep = "")
382 if(!dbCreate(tempdata, type = "DB1")) {
383 warning("could not create temporary database")
386 on.exit(file.remove(tempdata))
388 tempdb <- dbInit(tempdata, type = "DB1")
391 ## Copy all keys to temporary database
392 message("reorganizing database contents...")
394 dbInsert(tempdb, key, dbFetch(db, key))
396 ## dbDisconnect(tempdb)
398 status <- file.rename(tempdata, datafile)
400 if(!isTRUE(status)) {
402 warning("temporary database could not be renamed and is left in ",
406 ## message("original database has been disconnected; ",
407 ## "reload with 'dbInit'")
408 message("database reorganized; reload database with 'dbInit'")
413 ################################################################################
414 ## Test system's ftell()
416 hasWorkingFtell <- function() {
418 con <- file(tfile, "wb")
428 offset <- end - begin
436 ######################################################################