1 # frozen_string_literal: true
2 # = PStore -- Transactional File Storage for Ruby Objects
6 # documentation by Kev Jackson and James Edward Gray II
7 # improved by Hongli Lai
9 # See PStore for documentation.
13 # \PStore implements a file based persistence mechanism based on a Hash.
14 # User code can store hierarchies of Ruby objects (values)
15 # into the data store by name (keys).
16 # An object hierarchy may be just a single object.
17 # User code may later read values back from the data store
18 # or even update data, as needed.
20 # The transactional behavior ensures that any changes succeed or fail together.
21 # This can be used to ensure that the data store is not left in a transitory state,
22 # where some values were updated but others were not.
24 # Behind the scenes, Ruby objects are stored to the data store file with Marshal.
25 # That carries the usual limitations. Proc objects cannot be marshalled,
28 # There are three important concepts here (details at the links):
30 # - {Store}[rdoc-ref:PStore@The+Store]: a store is an instance of \PStore.
31 # - {Entries}[rdoc-ref:PStore@Entries]: the store is hash-like;
32 # each entry is the key for a stored object.
33 # - {Transactions}[rdoc-ref:PStore@Transactions]: each transaction is a collection
34 # of prospective changes to the store;
35 # a transaction is defined in the block given with a call
36 # to PStore#transaction.
38 # == About the Examples
40 # Examples on this page need a store that has known properties.
41 # They can get a new (and populated) store by calling thus:
43 # example_store do |store|
44 # # Example code using store goes here.
47 # All we really need to know about +example_store+
48 # is that it yields a fresh store with a known population of entries;
53 # # Yield a pristine store for use in examples.
55 # # Create the store in a temporary file.
56 # Tempfile.create do |file|
57 # store = PStore.new(file)
58 # # Populate the store.
59 # store.transaction do
70 # The contents of the store are maintained in a file whose path is specified
71 # when the store is created (see PStore.new).
72 # The objects are stored and retrieved using
73 # module Marshal, which means that certain objects cannot be added to the store;
74 # see {Marshal::dump}[rdoc-ref:Marshal.dump].
78 # A store may have any number of entries.
79 # Each entry has a key and a value, just as in a hash:
81 # - Key: as in a hash, the key can be (almost) any object;
82 # see {Hash Keys}[rdoc-ref:Hash@Hash+Keys].
83 # You may find it convenient to keep it simple by using only
84 # symbols or strings as keys.
85 # - Value: the value may be any object that can be marshalled by \Marshal
86 # (see {Marshal::dump}[rdoc-ref:Marshal.dump])
87 # and in fact may be a collection
88 # (e.g., an array, a hash, a set, a range, etc).
89 # That collection may in turn contain nested objects,
90 # including collections, to any depth;
91 # those objects must also be \Marshal-able.
92 # See {Hierarchical Values}[rdoc-ref:PStore@Hierarchical+Values].
96 # === The Transaction Block
98 # The block given with a call to method #transaction#
99 # contains a _transaction_,
100 # which consists of calls to \PStore methods that
101 # read from or write to the store
102 # (that is, all \PStore methods except #transaction itself,
103 # #path, and Pstore.new):
105 # example_store do |store|
106 # store.transaction do
107 # store.keys # => [:foo, :bar, :baz]
109 # store.keys # => [:foo, :bar, :baz, :bat]
113 # Execution of the transaction is deferred until the block exits,
114 # and is executed _atomically_ (all-or-nothing):
115 # either all transaction calls are executed, or none are.
116 # This maintains the integrity of the store.
118 # Other code in the block (including even calls to #path and PStore.new)
119 # is executed immediately, not deferred.
121 # The transaction block:
123 # - May not contain a nested call to #transaction.
124 # - Is the only context where methods that read from or write to
125 # the store are allowed.
127 # As seen above, changes in a transaction are made automatically
128 # when the block exits.
129 # The block may be exited early by calling method #commit or #abort.
131 # - Method #commit triggers the update to the store and exits the block:
133 # example_store do |store|
134 # store.transaction do
135 # store.keys # => [:foo, :bar, :baz]
138 # fail 'Cannot get here'
140 # store.transaction do
141 # # Update was completed.
142 # store.keys # => [:foo, :bar, :baz, :bat]
146 # - Method #abort discards the update to the store and exits the block:
148 # example_store do |store|
149 # store.transaction do
150 # store.keys # => [:foo, :bar, :baz]
153 # fail 'Cannot get here'
155 # store.transaction do
156 # # Update was not completed.
157 # store.keys # => [:foo, :bar, :baz]
161 # === Read-Only Transactions
163 # By default, a transaction allows both reading from and writing to
166 # store.transaction do
167 # # Read-write transaction.
168 # # Any code except a call to #transaction is allowed here.
171 # If argument +read_only+ is passed as +true+,
172 # only reading is allowed:
174 # store.transaction(true) do
175 # # Read-only transaction:
176 # # Calls to #transaction, #[]=, and #delete are not allowed here.
179 # == Hierarchical Values
181 # The value for an entry may be a simple object (as seen above).
182 # It may also be a hierarchy of objects nested to any depth:
184 # deep_store = PStore.new('deep.store')
185 # deep_store.transaction do
186 # array_of_hashes = [{}, {}, {}]
187 # deep_store[:array_of_hashes] = array_of_hashes
188 # deep_store[:array_of_hashes] # => [{}, {}, {}]
189 # hash_of_arrays = {foo: [], bar: [], baz: []}
190 # deep_store[:hash_of_arrays] = hash_of_arrays
191 # deep_store[:hash_of_arrays] # => {:foo=>[], :bar=>[], :baz=>[]}
192 # deep_store[:hash_of_arrays][:foo].push(:bat)
193 # deep_store[:hash_of_arrays] # => {:foo=>[:bat], :bar=>[], :baz=>[]}
196 # And recall that you can use
197 # {dig methods}[rdoc-ref:dig_methods.rdoc]
198 # in a returned hierarchy of objects.
200 # == Working with the Store
202 # === Creating a Store
204 # Use method PStore.new to create a store.
205 # The new store creates or opens its containing file:
207 # store = PStore.new('t.store')
209 # === Modifying the Store
211 # Use method #[]= to update or create an entry:
213 # example_store do |store|
214 # store.transaction do
215 # store[:foo] = 1 # Update.
216 # store[:bam] = 1 # Create.
220 # Use method #delete to remove an entry:
222 # example_store do |store|
223 # store.transaction do
225 # store[:foo] # => nil
229 # === Retrieving Values
231 # Use method #fetch (allows default) or #[] (defaults to +nil+)
232 # to retrieve an entry:
234 # example_store do |store|
235 # store.transaction do
237 # store[:nope] # => nil
238 # store.fetch(:baz) # => 2
239 # store.fetch(:nope, nil) # => nil
240 # store.fetch(:nope) # Raises exception.
244 # === Querying the Store
246 # Use method #key? to determine whether a given key exists:
248 # example_store do |store|
249 # store.transaction do
250 # store.key?(:foo) # => true
254 # Use method #keys to retrieve keys:
256 # example_store do |store|
257 # store.transaction do
258 # store.keys # => [:foo, :bar, :baz]
262 # Use method #path to retrieve the path to the store's underlying file;
263 # this method may be called from outside a transaction block:
265 # store = PStore.new('t.store')
266 # store.path # => "t.store"
268 # == Transaction Safety
270 # For transaction safety, see:
272 # - Optional argument +thread_safe+ at method PStore.new.
273 # - Attribute #ultra_safe.
275 # Needless to say, if you're storing valuable data with \PStore, then you should
276 # backup the \PStore file from time to time.
278 # == An Example Store
282 # # A mock wiki object.
285 # attr_reader :page_name
287 # def initialize(page_name, author, contents)
288 # @page_name = page_name
289 # @revisions = Array.new
290 # add_revision(author, contents)
293 # def add_revision(author, contents)
294 # @revisions << {created: Time.now,
296 # contents: contents}
299 # def wiki_page_references
300 # [@page_name] + @revisions.last[:contents].scan(/\b(?:[A-Z]+[a-z]+){2,}/)
305 # # Create a new wiki page.
306 # home_page = WikiPage.new("HomePage", "James Edward Gray II",
307 # "A page about the JoysOfDocumentation..." )
309 # wiki = PStore.new("wiki_pages.pstore")
310 # # Update page data and the index together, or not at all.
311 # wiki.transaction do
313 # wiki[home_page.page_name] = home_page
314 # # Create page index.
315 # wiki[:wiki_index] ||= Array.new
316 # # Update wiki index.
317 # wiki[:wiki_index].push(*home_page.wiki_page_references)
320 # # Read wiki data, setting argument read_only to true.
321 # wiki.transaction(true) do
322 # wiki.keys.each do |key|
331 RDWR_ACCESS = {mode: IO::RDWR | IO::CREAT | IO::BINARY, encoding: Encoding::ASCII_8BIT}.freeze
332 RD_ACCESS = {mode: IO::RDONLY | IO::BINARY, encoding: Encoding::ASCII_8BIT}.freeze
333 WR_ACCESS = {mode: IO::WRONLY | IO::CREAT | IO::TRUNC | IO::BINARY, encoding: Encoding::ASCII_8BIT}.freeze
335 # The error type thrown by all PStore methods.
336 class Error < StandardError
339 # Whether \PStore should do its best to prevent file corruptions,
340 # even when an unlikely error (such as memory-error or filesystem error) occurs:
342 # - +true+: changes are posted by creating a temporary file,
343 # writing the updated data to it, then renaming the file to the given #path.
344 # File integrity is maintained.
345 # Note: has effect only if the filesystem has atomic file rename
346 # (as do POSIX platforms Linux, MacOS, FreeBSD and others).
348 # - +false+ (the default): changes are posted by rewinding the open file
349 # and writing the updated data.
350 # File integrity is maintained if the filesystem raises
351 # no unexpected I/O error;
352 # if such an error occurs during a write to the store,
353 # the file may become corrupted.
355 attr_accessor :ultra_safe
357 # Returns a new \PStore object.
359 # Argument +file+ is the path to the file in which objects are to be stored;
360 # if the file exists, it should be one that was written by \PStore.
363 # store = PStore.new(path)
365 # A \PStore object is
366 # {reentrant}[https://en.wikipedia.org/wiki/Reentrancy_(computing)].
367 # If argument +thread_safe+ is given as +true+,
368 # the object is also thread-safe (at the cost of a small performance penalty):
370 # store = PStore.new(path, true)
372 def initialize(file, thread_safe = false)
373 dir = File::dirname(file)
374 unless File::directory? dir
375 raise PStore::Error, format("directory %s does not exist", dir)
377 if File::exist? file and not File::readable? file
378 raise PStore::Error, format("file %s not readable", file)
383 @thread_safe = thread_safe
384 @lock = Thread::Mutex.new
387 # Raises PStore::Error if the calling code is not in a PStore#transaction.
389 raise PStore::Error, "not in transaction" unless @lock.locked?
392 # Raises PStore::Error if the calling code is not in a PStore#transaction or
393 # if the code is in a read-only PStore#transaction.
395 def in_transaction_wr
397 raise PStore::Error, "in read-only transaction" if @rdonly
399 private :in_transaction, :in_transaction_wr
401 # Returns the value for the given +key+ if the key exists.
403 # if not +nil+, the returned value is an object or a hierarchy of objects:
405 # example_store do |store|
406 # store.transaction do
408 # store[:nope] # => nil
412 # Returns +nil+ if there is no such key.
414 # See also {Hierarchical Values}[rdoc-ref:PStore@Hierarchical+Values].
416 # Raises an exception if called outside a transaction block.
422 # Like #[], except that it accepts a default value for the store.
423 # If the +key+ does not exist:
425 # - Raises an exception if +default+ is +PStore::Error+.
426 # - Returns the value of +default+ otherwise:
428 # example_store do |store|
429 # store.transaction do
430 # store.fetch(:nope, nil) # => nil
431 # store.fetch(:nope) # Raises an exception.
435 # Raises an exception if called outside a transaction block.
436 def fetch(key, default=PStore::Error)
438 unless @table.key? key
439 if default == PStore::Error
440 raise PStore::Error, format("undefined key '%s'", key)
448 # Creates or replaces the value for the given +key+:
450 # example_store do |store|
451 # temp.transaction do
456 # See also {Hierarchical Values}[rdoc-ref:PStore@Hierarchical+Values].
458 # Raises an exception if called outside a transaction block.
464 # Removes and returns the value at +key+ if it exists:
466 # example_store do |store|
467 # store.transaction do
473 # Returns +nil+ if there is no such key.
475 # Raises an exception if called outside a transaction block.
481 # Returns an array of the existing keys:
483 # example_store do |store|
484 # store.transaction do
485 # store.keys # => [:foo, :bar, :baz]
489 # Raises an exception if called outside a transaction block.
496 # Returns +true+ if +key+ exists, +false+ otherwise:
498 # example_store do |store|
499 # store.transaction do
500 # store.key?(:foo) # => true
504 # Raises an exception if called outside a transaction block.
511 # Returns the string file path used to create the store:
513 # store.path # => "flat.store"
519 # Exits the current transaction block, committing any changes
521 # {transaction block}[rdoc-ref:PStore@The+Transaction+Block].
523 # Raises an exception if called outside a transaction block.
527 throw :pstore_abort_transaction
530 # Exits the current transaction block, discarding any changes
532 # {transaction block}[rdoc-ref:PStore@The+Transaction+Block].
534 # Raises an exception if called outside a transaction block.
538 throw :pstore_abort_transaction
541 # Opens a transaction block for the store.
542 # See {Transactions}[rdoc-ref:PStore@Transactions].
544 # With argument +read_only+ as +false+, the block may both read from
545 # and write to the store.
547 # With argument +read_only+ as +true+, the block may not include calls
548 # to #transaction, #[]=, or #delete.
550 # Raises an exception if called within a transaction block.
551 def transaction(read_only = false) # :yields: pstore
554 raise PStore::Error, "nested transaction" unless @lock.try_lock
559 raise PStore::Error, "nested transaction"
565 file = open_and_lock_file(@filename, read_only)
568 @table, checksum, original_data_size = load_data(file, read_only)
570 catch(:pstore_abort_transaction) do
574 if !@abort && !read_only
575 save_data(checksum, original_data_size, file)
581 # This can only occur if read_only == true.
583 catch(:pstore_abort_transaction) do
594 # Constant for relieving Ruby's garbage collector.
595 CHECKSUM_ALGO = %w[SHA512 SHA384 SHA256 SHA1 RMD160 MD5].each do |algo|
602 EMPTY_MARSHAL_DATA = Marshal.dump({})
603 EMPTY_MARSHAL_CHECKSUM = CHECKSUM_ALGO.digest(EMPTY_MARSHAL_DATA)
606 # Open the specified filename (either in read-only mode or in
607 # read-write mode) and lock it for reading or writing.
609 # The opened File object will be returned. If _read_only_ is true,
610 # and the file does not exist, then nil will be returned.
612 # All exceptions are propagated.
614 def open_and_lock_file(filename, read_only)
617 file = File.new(filename, **RD_ACCESS)
619 file.flock(File::LOCK_SH)
629 file = File.new(filename, **RDWR_ACCESS)
630 file.flock(File::LOCK_EX)
635 # Load the given PStore file.
636 # If +read_only+ is true, the unmarshalled Hash will be returned.
637 # If +read_only+ is false, a 3-tuple will be returned: the unmarshalled
638 # Hash, a checksum of the data, and the size of the data.
639 def load_data(file, read_only)
643 raise Error, "PStore file seems to be corrupted." unless table.is_a?(Hash)
645 # This seems to be a newly-created file.
652 # This seems to be a newly-created file.
654 checksum = empty_marshal_checksum
655 size = empty_marshal_data.bytesize
658 checksum = CHECKSUM_ALGO.digest(data)
660 raise Error, "PStore file seems to be corrupted." unless table.is_a?(Hash)
662 data.replace(EMPTY_STRING)
663 [table, checksum, size]
668 is_windows = RUBY_PLATFORM =~ /mswin|mingw|bccwin|wince/
669 self.class.__send__(:define_method, :on_windows?) do
675 def save_data(original_checksum, original_file_size, file)
676 new_data = dump(@table)
678 if new_data.bytesize != original_file_size || CHECKSUM_ALGO.digest(new_data) != original_checksum
679 if @ultra_safe && !on_windows?
680 # Windows doesn't support atomic file renames.
681 save_data_with_atomic_file_rename_strategy(new_data, file)
683 save_data_with_fast_strategy(new_data, file)
687 new_data.replace(EMPTY_STRING)
690 def save_data_with_atomic_file_rename_strategy(data, file)
691 temp_filename = "#{@filename}.tmp.#{Process.pid}.#{rand 1000000}"
692 temp_file = File.new(temp_filename, **WR_ACCESS)
694 temp_file.flock(File::LOCK_EX)
695 temp_file.write(data)
697 File.rename(temp_filename, @filename)
699 File.unlink(temp_file) rescue nil
706 def save_data_with_fast_strategy(data, file)
709 file.truncate(data.bytesize)
713 # This method is just a wrapped around Marshal.dump
714 # to allow subclass overriding used in YAML::Store.
715 def dump(table) # :nodoc:
719 # This method is just a wrapped around Marshal.load.
720 # to allow subclass overriding used in YAML::Store.
721 def load(content) # :nodoc:
722 Marshal::load(content)
725 def empty_marshal_data
728 def empty_marshal_checksum
729 EMPTY_MARSHAL_CHECKSUM