Add a 'tryCatch' when modifying 'map0' in 'checkMap'
[filehash.git] / R / filehash-DB1.R
blob4e7995964c319d52d26b0cbd79cc5951d00f9fef
1 ######################################################################
2 ## Copyright (C) 2006--2008, Roger D. Peng <rpeng@jhsph.edu>
3 ##     
4 ## This program is free software; you can redistribute it and/or modify
5 ## it under the terms of the GNU General Public License as published by
6 ## the Free Software Foundation; either version 2 of the License, or
7 ## (at your option) any later version.
8 ## 
9 ## This program is distributed in the hope that it will be useful,
10 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 ## GNU General Public License for more details.
13 ## 
14 ## You should have received a copy of the GNU General Public License
15 ## along with this program; if not, write to the Free Software
16 ## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 ## 02110-1301, USA
18 #####################################################################
20 ######################################################################
21 ## Class 'filehashDB1'
23 ## Database entries
25 ## File format: [key]        [nbytes data] [data]
26 ##              serialized   serialized    raw bytes (serialized)
29 ######################################################################
31 ## 'meta' is a list of functions for updating the file size of the
32 ## database and the file map.
34 setClass("filehashDB1",
35          representation(datafile = "character",
36                         meta = "list"),  ## contains 'metaEnv' element
37          contains = "filehash"
38          )
40 setValidity("filehashDB1",
41             function(object) {
42                     if(!file.exists(object@datafile))
43                             return(gettextf("datafile '%s' does not exist",
44                                             datafile))
45                     TRUE
46             })
48 createDB1 <- function(dbName) {
49         if(!hasWorkingFtell())
50                 stop("need working 'ftell()' to use 'DB1' format")
51         if(file.exists(dbName)) {
52                 message(gettextf("database '%s' already exists", dbName))
53                 return(TRUE)
54         }
55         status <- file.create(dbName)
57         if(!status)
58                 stop(gettextf("unable to create database file '%s'", dbName))
59         TRUE
62 makeMetaEnv <- function(filename) {
63         dbmap <- NULL  ## 'NULL' indicates the map needs to be read
64         dbfilesize <- file.info(filename)$size
66         updatesize <- function(size) {
67                 dbfilesize <<- size
68         }
69         updatemap <- function(map) {
70                 dbmap <<- map
71         }
72         getsize <- function() {
73                 dbfilesize
74         }
75         getmap <- function() {
76                 dbmap
77         }
78         list(updatesize = updatesize,
79              updatemap = updatemap,
80              getmap = getmap,
81              getsize = getsize)
84 initializeDB1 <- function(dbName) {
85         if(!hasWorkingFtell())
86                 stop("need working 'ftell()' to use DB1 format")
87         dbName <- normalizePath(dbName)
89         new("filehashDB1",
90             datafile = dbName,
91             meta = makeMetaEnv(dbName),
92             name = basename(dbName)
93             )
97 readKeyMap <- function(con, map = NULL, pos = 0) {
98         if(is.null(map)) {
99                 ## using 'hash = TRUE' is critical because it can have a major
100                 ## impact on performance for large databases
101                 map <- new.env(hash = TRUE, parent = emptyenv())
102                 pos <- 0
103         }
104         if(pos < 0)
105                 stop("'pos' cannot be negative")
106         filename <- path.expand(summary(con)$description)
107         filesize <- file.info(filename)$size
109         if(pos > filesize)
110                 stop("'pos' cannot be greater than file size")
111         .Call("read_key_map", filename, map, filesize, pos)
114 readSingleKey <- function(con, map, key) {
115         start <- map[[key]]
117         if(is.null(start))
118                 stop(gettextf("unable to obtain value for key '%s'", key))
120         seek(con, start, rw = "read")
121         unserialize(con)
124 readKeys <- function(con, map, keys) {
125         r <- lapply(keys, function(key) readSingleKey(con, map, key))
126         names(r) <- keys
127         r
130 gotoEndPos <- function(con) {
131         ## Move connection to the end
132         seek(con, 0, "end")
133         seek(con)
136 writeNullKeyValue <- function(con, key) {
137         writestart <- gotoEndPos(con)
139         handler <- function(cond) {
140                 ## Rewind the file back to where writing began and truncate at
141                 ## that position
142                 seek(con, writestart, "start", "write")
143                 truncate(con)
144                 cond
145         }
146         tryCatch({
147                 serialize(key, con)
149                 len <- as.integer(-1)
150                 serialize(len, con)
151         }, interrupt = handler, error = handler, finally = {
152                 flush(con)
153         })
156 writeKeyValue <- function(con, key, value) {
157         writestart <- gotoEndPos(con)
159         handler <- function(cond) {
160                 ## Rewind the file back to where writing began and
161                 ## truncate at that position; this is probably a bad
162                 ## idea for files > 2GB
163                 seek(con, writestart, "start", "write")
164                 truncate(con)
165                 cond
166         }
167         tryCatch({
168                 serialize(key, con)
170                 byteData <- serialize(value, NULL)
171                 len <- length(byteData)
172                 serialize(len, con)
174                 writeBin(byteData, con)
175         }, interrupt = handler, error = handler, finally = {
176                 flush(con)
177         })
180 setMethod("lockFile", "file", function(db, ...) {
181         ## Use 3 underscores for lock file
182         sprintf("%s___LOCK", summary(db)$description)
185 createLockFile <- function(name) {
186         status <- .Call("lock_file", name)
188         if(!isTRUE(status >= 0))
189                 stop("cannot create lock file")
190         TRUE
193 deleteLockFile <- function(name) {
194         if(!file.remove(name))
195                 stop("cannot remove lock file")
196         TRUE
199 ################################################################################
200 ## Internal utilities
202 filesize <- gotoEndPos
204 setGeneric("checkMap", function(db, ...) standardGeneric("checkMap"))
206 setMethod("checkMap", "filehashDB1",
207           function(db, filecon, ...) {
208                   old.size <- db@meta$getsize()
209                   cur.size <- tryCatch({
210                           filesize(filecon)
211                   }, error = function(err) {
212                           old.size
213                   })
214                   size.change <- old.size != cur.size
215                   map <- getMap(db)
216                   map0 <- map
218                   if(is.null(map))
219                           map <- readKeyMap(filecon)
220                   else if(size.change) {
221                           ## Modify 'map.old' directly
222                           map <- tryCatch({
223                                   readKeyMap(filecon, map, old.size)
224                           }, error = function(err) {
225                                   message(conditionMessage(err))
226                                   map0
227                           })
228                   }
229                   else
230                           map <- map0
231                   if(!identical(map, map0)) {
232                           db@meta$updatemap(map)
233                           db@meta$updatesize(cur.size)
234                   }
235                   invisible(db)
236           })
239 setGeneric("getMap", function(db) standardGeneric("getMap"))
241 setMethod("getMap", "filehashDB1",
242           function(db) {
243                   db@meta$getmap()
244           })
246 ################################################################################
247 ## Interface functions
249 openDBConn <- function(filename, mode) {
250         con <- try({
251                 file(filename, mode)
252         }, silent = TRUE)
254         if(inherits(con, "try-error"))
255                 stop("unable to open connection to database")
256         con
259 setMethod("dbInsert",
260           signature(db = "filehashDB1", key = "character", value = "ANY"),
261           function(db, key, value, ...) {
262                   con <- openDBConn(db@datafile, "ab")
263                   on.exit(close(con))
265                   lockname <- lockFile(con)
266                   createLockFile(lockname)
267                   on.exit(deleteLockFile(lockname), add = TRUE)
269                   writeKeyValue(con, key, value)
270           })
272 setMethod("dbFetch",
273           signature(db = "filehashDB1", key = "character"),
274           function(db, key, ...) {
275                   con <- openDBConn(db@datafile, "rb")
276                   on.exit(close(con))
278                   lockname <- lockFile(con)
279                   createLockFile(lockname)
280                   on.exit(deleteLockFile(lockname), add = TRUE)
282                   checkMap(db, con)
283                   map <- getMap(db)
285                   r <- readKeys(con, map, key[1])
286                   r[[1]]
287           })
289 setMethod("dbMultiFetch",
290           signature(db = "filehashDB1", key = "character"),
291           function(db, key, ...) {
292                   con <- openDBConn(db@datafile, "rb")
293                   on.exit(close(con))
295                   lockname <- lockFile(con)
296                   createLockFile(lockname)
297                   on.exit(deleteLockFile(lockname), add = TRUE)
299                   checkMap(db, con)
300                   map <- getMap(db)
302                   readKeys(con, map, key)
303           })
305 setMethod("[", signature(x = "filehashDB1", i = "character", j = "missing",
306                          drop = "missing"),
307           function(x, i , j, drop) {
308                   dbMultiFetch(x, i)
309           })
311 setMethod("dbExists", signature(db = "filehashDB1", key = "character"),
312           function(db, key, ...) {
313                   dbkeys <- dbList(db)
314                   key %in% dbkeys
315           })
317 setMethod("dbList", "filehashDB1",
318           function(db, ...) {
319                   con <- openDBConn(db@datafile, "rb")
320                   on.exit(close(con))
322                   lockname <- lockFile(con)
323                   createLockFile(lockname)
324                   on.exit(deleteLockFile(lockname), add = TRUE)
326                   checkMap(db, con)
327                   map <- getMap(db)
329                   if(length(map) == 0)
330                           character(0)
331                   else {
332                           keys <- as.list(map, all.names = TRUE)
333                           use <- !sapply(keys, is.null)
334                           names(keys[use])
335                   }
336           })
338 setMethod("dbDelete", signature(db = "filehashDB1", key = "character"),
339           function(db, key, ...) {
340                   con <- openDBConn(db@datafile, "ab")
341                   on.exit(close(con))
343                   lockname <- lockFile(con)
344                   createLockFile(lockname)
345                   on.exit(deleteLockFile(lockname), add = TRUE)
347                   writeNullKeyValue(con, key)
348           })
350 setMethod("dbUnlink", "filehashDB1",
351           function(db, ...) {
352                   file.remove(db@datafile)
353           })
355 reorganizeDB <- function(db, ...) {
356         datafile <- db@datafile
358         ## Find a temporary file name
359         tempdata <- paste(datafile, "Tmp", sep = "")
360         i <- 0
361         while(file.exists(tempdata)) {
362                 i <- i + 1
363                 tempdata <- paste(datafile, "Tmp", i, sep = "")
364         }
365         if(!dbCreate(tempdata, type = "DB1")) {
366                 warning("could not create temporary database")
367                 return(FALSE)
368         }
369         on.exit(file.remove(tempdata))
371         tempdb <- dbInit(tempdata, type = "DB1")
372         keys <- dbList(db)
374         ## Copy all keys to temporary database
375         nkeys <- length(keys)
376         cat("Reorganizing database: ")
378         for(i in seq_along(keys)) {
379                 key <- keys[i]
380                 msg <- sprintf("%d%% (%d/%d)", round (100 * i / nkeys),
381                                i, nkeys)
382                 cat(msg)
384                 dbInsert(tempdb, key, dbFetch(db, key))
386                 back <- paste(rep("\b", nchar(msg)), collapse = "")
387                 cat(back)
388         }
389         cat("\n")
390         status <- file.rename(tempdata, datafile)
392         if(!isTRUE(status)) {
393                 on.exit()
394                 warning("temporary database could not be renamed and is left in ",
395                         tempdata)
396                 return(FALSE)
397         }
398         on.exit()
399         message("Finished; reload database with 'dbInit'")
400         TRUE
403 setMethod("dbReorganize", "filehashDB1", reorganizeDB)
406 ################################################################################
407 ## Test system's ftell()
409 hasWorkingFtell <- function() {
410         tfile <- tempfile()
411         con <- file(tfile, "wb")
413         tryCatch({
414                 bytes <- raw(10)
415                 begin <- seek(con)
417                 if(begin != 0)
418                         return(FALSE)
419                 writeBin(bytes, con)
420                 end <- seek(con)
421                 offset <- end - begin
422                 isTRUE(offset == 10)
423         }, finally = {
424                 close(con)
425                 unlink(tfile)
426         })
429 ######################################################################