1 ######################################################################
2 ## Copyright (C) 2006--2008, Roger D. Peng <rpeng@jhsph.edu>
4 ## This program is free software; you can redistribute it and/or modify
5 ## it under the terms of the GNU General Public License as published by
6 ## the Free Software Foundation; either version 2 of the License, or
7 ## (at your option) any later version.
9 ## This program is distributed in the hope that it will be useful,
10 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 ## GNU General Public License for more details.
14 ## You should have received a copy of the GNU General Public License
15 ## along with this program; if not, write to the Free Software
16 ## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 #####################################################################
20 ######################################################################
21 ## Class 'filehashDB1'
25 ## File format: [key] [nbytes data] [data]
26 ## serialized serialized raw bytes (serialized)
29 ######################################################################
31 ## 'meta' is a list of functions for updating the file size of the
32 ## database and the file map.
34 setClass("filehashDB1",
35 representation(datafile = "character",
36 meta = "list"), ## contains 'metaEnv' element
40 setValidity("filehashDB1",
42 if(!file.exists(object@datafile))
43 return(gettextf("datafile '%s' does not exist",
48 createDB1 <- function(dbName) {
49 if(!hasWorkingFtell())
50 stop("need working 'ftell()' to use 'DB1' format")
51 if(file.exists(dbName)) {
52 message(gettextf("database '%s' already exists", dbName))
55 status <- file.create(dbName)
58 stop(gettextf("unable to create database file '%s'", dbName))
62 makeMetaEnv <- function(filename) {
63 dbmap <- NULL ## 'NULL' indicates the map needs to be read
64 dbfilesize <- file.info(filename)$size
66 updatesize <- function(size) {
69 updatemap <- function(map) {
72 getsize <- function() {
75 getmap <- function() {
78 list(updatesize = updatesize,
79 updatemap = updatemap,
84 initializeDB1 <- function(dbName) {
85 if(!hasWorkingFtell())
86 stop("need working 'ftell()' to use DB1 format")
87 dbName <- normalizePath(dbName)
91 meta = makeMetaEnv(dbName),
92 name = basename(dbName)
97 readKeyMap <- function(con, map = NULL, pos = 0) {
99 ## using 'hash = TRUE' is critical because it can have a major
100 ## impact on performance for large databases
101 map <- new.env(hash = TRUE, parent = emptyenv())
105 stop("'pos' cannot be negative")
106 filename <- path.expand(summary(con)$description)
107 filesize <- file.info(filename)$size
110 stop("'pos' cannot be greater than file size")
111 .Call("read_key_map", filename, map, filesize, pos)
114 readSingleKey <- function(con, map, key) {
118 stop(gettextf("unable to obtain value for key '%s'", key))
120 seek(con, start, rw = "read")
124 readKeys <- function(con, map, keys) {
125 r <- lapply(keys, function(key) readSingleKey(con, map, key))
130 gotoEndPos <- function(con) {
131 ## Move connection to the end
136 writeNullKeyValue <- function(con, key) {
137 writestart <- gotoEndPos(con)
139 handler <- function(cond) {
140 ## Rewind the file back to where writing began and truncate at
142 seek(con, writestart, "start", "write")
149 len <- as.integer(-1)
151 }, interrupt = handler, error = handler, finally = {
156 writeKeyValue <- function(con, key, value) {
157 writestart <- gotoEndPos(con)
159 handler <- function(cond) {
160 ## Rewind the file back to where writing began and
161 ## truncate at that position; this is probably a bad
162 ## idea for files > 2GB
163 seek(con, writestart, "start", "write")
170 byteData <- serialize(value, NULL)
171 len <- length(byteData)
174 writeBin(byteData, con)
175 }, interrupt = handler, error = handler, finally = {
180 setMethod("lockFile", "file", function(db, ...) {
181 ## Use 3 underscores for lock file
182 sprintf("%s___LOCK", summary(db)$description)
185 createLockFile <- function(name) {
186 status <- .Call("lock_file", name)
188 if(!isTRUE(status >= 0))
189 stop("cannot create lock file")
193 deleteLockFile <- function(name) {
194 if(!file.remove(name))
195 stop("cannot remove lock file")
199 ################################################################################
200 ## Internal utilities
202 filesize <- gotoEndPos
204 setGeneric("checkMap", function(db, ...) standardGeneric("checkMap"))
206 setMethod("checkMap", "filehashDB1",
207 function(db, filecon, ...) {
208 old.size <- db@meta$getsize()
209 cur.size <- tryCatch({
211 }, error = function(err) {
214 size.change <- old.size != cur.size
219 map <- readKeyMap(filecon)
220 else if(size.change) {
221 ## Modify 'map.old' directly
223 readKeyMap(filecon, map, old.size)
224 }, error = function(err) {
225 message(conditionMessage(err))
231 if(!identical(map, map0)) {
232 db@meta$updatemap(map)
233 db@meta$updatesize(cur.size)
239 setGeneric("getMap", function(db) standardGeneric("getMap"))
241 setMethod("getMap", "filehashDB1",
246 ################################################################################
247 ## Interface functions
249 openDBConn <- function(filename, mode) {
254 if(inherits(con, "try-error"))
255 stop("unable to open connection to database")
259 setMethod("dbInsert",
260 signature(db = "filehashDB1", key = "character", value = "ANY"),
261 function(db, key, value, ...) {
262 con <- openDBConn(db@datafile, "ab")
265 lockname <- lockFile(con)
266 createLockFile(lockname)
267 on.exit(deleteLockFile(lockname), add = TRUE)
269 writeKeyValue(con, key, value)
273 signature(db = "filehashDB1", key = "character"),
274 function(db, key, ...) {
275 con <- openDBConn(db@datafile, "rb")
278 lockname <- lockFile(con)
279 createLockFile(lockname)
280 on.exit(deleteLockFile(lockname), add = TRUE)
285 r <- readKeys(con, map, key[1])
289 setMethod("dbMultiFetch",
290 signature(db = "filehashDB1", key = "character"),
291 function(db, key, ...) {
292 con <- openDBConn(db@datafile, "rb")
295 lockname <- lockFile(con)
296 createLockFile(lockname)
297 on.exit(deleteLockFile(lockname), add = TRUE)
302 readKeys(con, map, key)
305 setMethod("[", signature(x = "filehashDB1", i = "character", j = "missing",
307 function(x, i , j, drop) {
311 setMethod("dbExists", signature(db = "filehashDB1", key = "character"),
312 function(db, key, ...) {
317 setMethod("dbList", "filehashDB1",
319 con <- openDBConn(db@datafile, "rb")
322 lockname <- lockFile(con)
323 createLockFile(lockname)
324 on.exit(deleteLockFile(lockname), add = TRUE)
332 keys <- as.list(map, all.names = TRUE)
333 use <- !sapply(keys, is.null)
338 setMethod("dbDelete", signature(db = "filehashDB1", key = "character"),
339 function(db, key, ...) {
340 con <- openDBConn(db@datafile, "ab")
343 lockname <- lockFile(con)
344 createLockFile(lockname)
345 on.exit(deleteLockFile(lockname), add = TRUE)
347 writeNullKeyValue(con, key)
350 setMethod("dbUnlink", "filehashDB1",
352 file.remove(db@datafile)
355 reorganizeDB <- function(db, ...) {
356 datafile <- db@datafile
358 ## Find a temporary file name
359 tempdata <- paste(datafile, "Tmp", sep = "")
361 while(file.exists(tempdata)) {
363 tempdata <- paste(datafile, "Tmp", i, sep = "")
365 if(!dbCreate(tempdata, type = "DB1")) {
366 warning("could not create temporary database")
369 on.exit(file.remove(tempdata))
371 tempdb <- dbInit(tempdata, type = "DB1")
374 ## Copy all keys to temporary database
375 nkeys <- length(keys)
376 cat("Reorganizing database: ")
378 for(i in seq_along(keys)) {
380 msg <- sprintf("%d%% (%d/%d)", round (100 * i / nkeys),
384 dbInsert(tempdb, key, dbFetch(db, key))
386 back <- paste(rep("\b", nchar(msg)), collapse = "")
390 status <- file.rename(tempdata, datafile)
392 if(!isTRUE(status)) {
394 warning("temporary database could not be renamed and is left in ",
399 message("Finished; reload database with 'dbInit'")
403 setMethod("dbReorganize", "filehashDB1", reorganizeDB)
406 ################################################################################
407 ## Test system's ftell()
409 hasWorkingFtell <- function() {
411 con <- file(tfile, "wb")
421 offset <- end - begin
429 ######################################################################