mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
4e7c8bbe03
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@33265 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
532 lines
16 KiB
Ruby
532 lines
16 KiB
Ruby
# = PStore -- Transactional File Storage for Ruby Objects
|
|
#
|
|
# pstore.rb -
|
|
# originally by matz
|
|
# documentation by Kev Jackson and James Edward Gray II
|
|
# improved by Hongli Lai
|
|
#
|
|
# See PStore for documentation.
|
|
|
|
|
|
require "fileutils"
|
|
require "digest/md5"
|
|
require "thread"
|
|
|
|
#
|
|
# PStore implements a file based persistence mechanism based on a Hash. User
|
|
# code can store hierarchies of Ruby objects (values) into the data store file
|
|
# by name (keys). An object hierarchy may be just a single object. User code
|
|
# may later read values back from the data store or even update data, as needed.
|
|
#
|
|
# The transactional behavior ensures that any changes succeed or fail together.
|
|
# This can be used to ensure that the data store is not left in a transitory
|
|
# state, where some values were updated but others were not.
|
|
#
|
|
# Behind the scenes, Ruby objects are stored to the data store file with
|
|
# Marshal. That carries the usual limitations. Proc objects cannot be
|
|
# marshalled, for example.
|
|
#
|
|
# == Usage example:
|
|
#
|
|
# require "pstore"
|
|
#
|
|
# # a mock wiki object...
|
|
# class WikiPage
|
|
# def initialize( page_name, author, contents )
|
|
# @page_name = page_name
|
|
# @revisions = Array.new
|
|
#
|
|
# add_revision(author, contents)
|
|
# end
|
|
#
|
|
# attr_reader :page_name
|
|
#
|
|
# def add_revision( author, contents )
|
|
# @revisions << { :created => Time.now,
|
|
# :author => author,
|
|
# :contents => contents }
|
|
# end
|
|
#
|
|
# def wiki_page_references
|
|
# [@page_name] + @revisions.last[:contents].scan(/\b(?:[A-Z]+[a-z]+){2,}/)
|
|
# end
|
|
#
|
|
# # ...
|
|
# end
|
|
#
|
|
# # create a new page...
|
|
# home_page = WikiPage.new( "HomePage", "James Edward Gray II",
|
|
# "A page about the JoysOfDocumentation..." )
|
|
#
|
|
# # then we want to update page data and the index together, or not at all...
|
|
# wiki = PStore.new("wiki_pages.pstore")
|
|
# wiki.transaction do # begin transaction; do all of this or none of it
|
|
# # store page...
|
|
# wiki[home_page.page_name] = home_page
|
|
# # ensure that an index has been created...
|
|
# wiki[:wiki_index] ||= Array.new
|
|
# # update wiki index...
|
|
# wiki[:wiki_index].push(*home_page.wiki_page_references)
|
|
# end # commit changes to wiki data store file
|
|
#
|
|
# ### Some time later... ###
|
|
#
|
|
# # read wiki data...
|
|
# wiki.transaction(true) do # begin read-only transaction, no changes allowed
|
|
# wiki.roots.each do |data_root_name|
|
|
# p data_root_name
|
|
# p wiki[data_root_name]
|
|
# end
|
|
# end
|
|
#
|
|
# == Transaction modes
|
|
#
|
|
# By default, file integrity is only ensured as long as the operating system
|
|
# (and the underlying hardware) doesn't raise any unexpected I/O errors. If an
|
|
# I/O error occurs while PStore is writing to its file, then the file will
|
|
# become corrupted.
|
|
#
|
|
# You can prevent this by setting <em>pstore.ultra_safe = true</em>.
|
|
# However, this results in a minor performance loss, and only works on platforms
|
|
# that support atomic file renames. Please consult the documentation for
|
|
# +ultra_safe+ for details.
|
|
#
|
|
# Needless to say, if you're storing valuable data with PStore, then you should
|
|
# backup the PStore files from time to time.
|
|
class PStore
|
|
RDWR_ACCESS = {mode: IO::RDWR | IO::CREAT | IO::BINARY, encoding: Encoding::ASCII_8BIT}.freeze
|
|
RD_ACCESS = {mode: IO::RDONLY | IO::BINARY, encoding: Encoding::ASCII_8BIT}.freeze
|
|
WR_ACCESS = {mode: IO::WRONLY | IO::CREAT | IO::TRUNC | IO::BINARY, encoding: Encoding::ASCII_8BIT}.freeze
|
|
|
|
# The error type thrown by all PStore methods.
|
|
class Error < StandardError
|
|
end
|
|
|
|
# Whether PStore should do its best to prevent file corruptions, even when under
|
|
# unlikely-to-occur error conditions such as out-of-space conditions and other
|
|
# unusual OS filesystem errors. Setting this flag comes at the price in the form
|
|
# of a performance loss.
|
|
#
|
|
# This flag only has effect on platforms on which file renames are atomic (e.g.
|
|
# all POSIX platforms: Linux, MacOS X, FreeBSD, etc). The default value is false.
|
|
attr_accessor :ultra_safe
|
|
|
|
#
|
|
# To construct a PStore object, pass in the _file_ path where you would like
|
|
# the data to be stored.
|
|
#
|
|
# PStore objects are always reentrant. But if _thread_safe_ is set to true,
|
|
# then it will become thread-safe at the cost of a minor performance hit.
|
|
#
|
|
def initialize(file, thread_safe = false)
|
|
dir = File::dirname(file)
|
|
unless File::directory? dir
|
|
raise PStore::Error, format("directory %s does not exist", dir)
|
|
end
|
|
if File::exist? file and not File::readable? file
|
|
raise PStore::Error, format("file %s not readable", file)
|
|
end
|
|
@filename = file
|
|
@abort = false
|
|
@ultra_safe = false
|
|
@thread_safe = thread_safe
|
|
@lock = Mutex.new
|
|
end
|
|
|
|
# Raises PStore::Error if the calling code is not in a PStore#transaction.
|
|
def in_transaction
|
|
raise PStore::Error, "not in transaction" unless @lock.locked?
|
|
end
|
|
#
|
|
# Raises PStore::Error if the calling code is not in a PStore#transaction or
|
|
# if the code is in a read-only PStore#transaction.
|
|
#
|
|
def in_transaction_wr()
|
|
in_transaction()
|
|
raise PStore::Error, "in read-only transaction" if @rdonly
|
|
end
|
|
private :in_transaction, :in_transaction_wr
|
|
|
|
#
|
|
# Retrieves a value from the PStore file data, by _name_. The hierarchy of
|
|
# Ruby objects stored under that root _name_ will be returned.
|
|
#
|
|
# *WARNING*: This method is only valid in a PStore#transaction. It will
|
|
# raise PStore::Error if called at any other time.
|
|
#
|
|
def [](name)
|
|
in_transaction
|
|
@table[name]
|
|
end
|
|
#
|
|
# This method is just like PStore#[], save that you may also provide a
|
|
# _default_ value for the object. In the event the specified _name_ is not
|
|
# found in the data store, your _default_ will be returned instead. If you do
|
|
# not specify a default, PStore::Error will be raised if the object is not
|
|
# found.
|
|
#
|
|
# *WARNING*: This method is only valid in a PStore#transaction. It will
|
|
# raise PStore::Error if called at any other time.
|
|
#
|
|
def fetch(name, default=PStore::Error)
|
|
in_transaction
|
|
unless @table.key? name
|
|
if default == PStore::Error
|
|
raise PStore::Error, format("undefined root name `%s'", name)
|
|
else
|
|
return default
|
|
end
|
|
end
|
|
@table[name]
|
|
end
|
|
#
|
|
# Stores an individual Ruby object or a hierarchy of Ruby objects in the data
|
|
# store file under the root _name_. Assigning to a _name_ already in the data
|
|
# store clobbers the old data.
|
|
#
|
|
# == Example:
|
|
#
|
|
# require "pstore"
|
|
#
|
|
# store = PStore.new("data_file.pstore")
|
|
# store.transaction do # begin transaction
|
|
# # load some data into the store...
|
|
# store[:single_object] = "My data..."
|
|
# store[:obj_heirarchy] = { "Kev Jackson" => ["rational.rb", "pstore.rb"],
|
|
# "James Gray" => ["erb.rb", "pstore.rb"] }
|
|
# end # commit changes to data store file
|
|
#
|
|
# *WARNING*: This method is only valid in a PStore#transaction and it cannot
|
|
# be read-only. It will raise PStore::Error if called at any other time.
|
|
#
|
|
def []=(name, value)
|
|
in_transaction_wr()
|
|
@table[name] = value
|
|
end
|
|
#
|
|
# Removes an object hierarchy from the data store, by _name_.
|
|
#
|
|
# *WARNING*: This method is only valid in a PStore#transaction and it cannot
|
|
# be read-only. It will raise PStore::Error if called at any other time.
|
|
#
|
|
def delete(name)
|
|
in_transaction_wr()
|
|
@table.delete name
|
|
end
|
|
|
|
#
|
|
# Returns the names of all object hierarchies currently in the store.
|
|
#
|
|
# *WARNING*: This method is only valid in a PStore#transaction. It will
|
|
# raise PStore::Error if called at any other time.
|
|
#
|
|
def roots
|
|
in_transaction
|
|
@table.keys
|
|
end
|
|
#
|
|
# Returns true if the supplied _name_ is currently in the data store.
|
|
#
|
|
# *WARNING*: This method is only valid in a PStore#transaction. It will
|
|
# raise PStore::Error if called at any other time.
|
|
#
|
|
def root?(name)
|
|
in_transaction
|
|
@table.key? name
|
|
end
|
|
# Returns the path to the data store file.
|
|
def path
|
|
@filename
|
|
end
|
|
|
|
#
|
|
# Ends the current PStore#transaction, committing any changes to the data
|
|
# store immediately.
|
|
#
|
|
# == Example:
|
|
#
|
|
# require "pstore"
|
|
#
|
|
# store = PStore.new("data_file.pstore")
|
|
# store.transaction do # begin transaction
|
|
# # load some data into the store...
|
|
# store[:one] = 1
|
|
# store[:two] = 2
|
|
#
|
|
# store.commit # end transaction here, committing changes
|
|
#
|
|
# store[:three] = 3 # this change is never reached
|
|
# end
|
|
#
|
|
# *WARNING*: This method is only valid in a PStore#transaction. It will
|
|
# raise PStore::Error if called at any other time.
|
|
#
|
|
def commit
|
|
in_transaction
|
|
@abort = false
|
|
throw :pstore_abort_transaction
|
|
end
|
|
#
|
|
# Ends the current PStore#transaction, discarding any changes to the data
|
|
# store.
|
|
#
|
|
# == Example:
|
|
#
|
|
# require "pstore"
|
|
#
|
|
# store = PStore.new("data_file.pstore")
|
|
# store.transaction do # begin transaction
|
|
# store[:one] = 1 # this change is not applied, see below...
|
|
# store[:two] = 2 # this change is not applied, see below...
|
|
#
|
|
# store.abort # end transaction here, discard all changes
|
|
#
|
|
# store[:three] = 3 # this change is never reached
|
|
# end
|
|
#
|
|
# *WARNING*: This method is only valid in a PStore#transaction. It will
|
|
# raise PStore::Error if called at any other time.
|
|
#
|
|
def abort
|
|
in_transaction
|
|
@abort = true
|
|
throw :pstore_abort_transaction
|
|
end
|
|
|
|
#
|
|
# Opens a new transaction for the data store. Code executed inside a block
|
|
# passed to this method may read and write data to and from the data store
|
|
# file.
|
|
#
|
|
# At the end of the block, changes are committed to the data store
|
|
# automatically. You may exit the transaction early with a call to either
|
|
# PStore#commit or PStore#abort. See those methods for details about how
|
|
# changes are handled. Raising an uncaught Exception in the block is
|
|
# equivalent to calling PStore#abort.
|
|
#
|
|
# If _read_only_ is set to +true+, you will only be allowed to read from the
|
|
# data store during the transaction and any attempts to change the data will
|
|
# raise a PStore::Error.
|
|
#
|
|
# Note that PStore does not support nested transactions.
|
|
#
|
|
def transaction(read_only = false, &block) # :yields: pstore
|
|
value = nil
|
|
raise PStore::Error, "nested transaction" if !@thread_safe && @lock.locked?
|
|
@lock.synchronize do
|
|
@rdonly = read_only
|
|
@abort = false
|
|
file = open_and_lock_file(@filename, read_only)
|
|
if file
|
|
begin
|
|
@table, checksum, original_data_size = load_data(file, read_only)
|
|
|
|
catch(:pstore_abort_transaction) do
|
|
value = yield(self)
|
|
end
|
|
|
|
if !@abort && !read_only
|
|
save_data(checksum, original_data_size, file)
|
|
end
|
|
ensure
|
|
file.close if !file.closed?
|
|
end
|
|
else
|
|
# This can only occur if read_only == true.
|
|
@table = {}
|
|
catch(:pstore_abort_transaction) do
|
|
value = yield(self)
|
|
end
|
|
end
|
|
end
|
|
value
|
|
rescue ThreadError
|
|
raise PStore::Error, "nested transaction"
|
|
end
|
|
|
|
private
|
|
# Constant for relieving Ruby's garbage collector.
|
|
EMPTY_STRING = ""
|
|
EMPTY_MARSHAL_DATA = Marshal.dump({})
|
|
EMPTY_MARSHAL_CHECKSUM = Digest::MD5.digest(EMPTY_MARSHAL_DATA)
|
|
|
|
#
|
|
# Open the specified filename (either in read-only mode or in
|
|
# read-write mode) and lock it for reading or writing.
|
|
#
|
|
# The opened File object will be returned. If _read_only_ is true,
|
|
# and the file does not exist, then nil will be returned.
|
|
#
|
|
# All exceptions are propagated.
|
|
#
|
|
def open_and_lock_file(filename, read_only)
|
|
if read_only
|
|
begin
|
|
file = File.new(filename, RD_ACCESS)
|
|
begin
|
|
file.flock(File::LOCK_SH)
|
|
return file
|
|
rescue
|
|
file.close
|
|
raise
|
|
end
|
|
rescue Errno::ENOENT
|
|
return nil
|
|
end
|
|
else
|
|
file = File.new(filename, RDWR_ACCESS)
|
|
file.flock(File::LOCK_EX)
|
|
return file
|
|
end
|
|
end
|
|
|
|
# Load the given PStore file.
|
|
# If +read_only+ is true, the unmarshalled Hash will be returned.
|
|
# If +read_only+ is false, a 3-tuple will be returned: the unmarshalled
|
|
# Hash, an MD5 checksum of the data, and the size of the data.
|
|
def load_data(file, read_only)
|
|
if read_only
|
|
begin
|
|
table = load(file)
|
|
if !table.is_a?(Hash)
|
|
raise Error, "PStore file seems to be corrupted."
|
|
end
|
|
rescue EOFError
|
|
# This seems to be a newly-created file.
|
|
table = {}
|
|
end
|
|
table
|
|
else
|
|
data = file.read
|
|
if data.empty?
|
|
# This seems to be a newly-created file.
|
|
table = {}
|
|
checksum = empty_marshal_checksum
|
|
size = empty_marshal_data.size
|
|
else
|
|
table = load(data)
|
|
checksum = Digest::MD5.digest(data)
|
|
size = data.size
|
|
if !table.is_a?(Hash)
|
|
raise Error, "PStore file seems to be corrupted."
|
|
end
|
|
end
|
|
data.replace(EMPTY_STRING)
|
|
[table, checksum, size]
|
|
end
|
|
end
|
|
|
|
def on_windows?
|
|
is_windows = RUBY_PLATFORM =~ /mswin/ ||
|
|
RUBY_PLATFORM =~ /mingw/ ||
|
|
RUBY_PLATFORM =~ /bccwin/ ||
|
|
RUBY_PLATFORM =~ /wince/
|
|
self.class.__send__(:define_method, :on_windows?) do
|
|
is_windows
|
|
end
|
|
is_windows
|
|
end
|
|
|
|
# Check whether Marshal.dump supports the 'canonical' option. This option
|
|
# makes sure that Marshal.dump always dumps data structures in the same order.
|
|
# This is important because otherwise, the checksums that we generate may differ.
|
|
def marshal_dump_supports_canonical_option?
|
|
begin
|
|
Marshal.dump(nil, -1, true)
|
|
result = true
|
|
rescue
|
|
result = false
|
|
end
|
|
self.class.instance_method(:marshal_dump_supports_canonical_option?)
|
|
self.class.__send__(:define_method, :marshal_dump_supports_canonical_option?) do
|
|
result
|
|
end
|
|
result
|
|
end
|
|
|
|
def save_data(original_checksum, original_file_size, file)
|
|
# We only want to save the new data if the size or checksum has changed.
|
|
# This results in less filesystem calls, which is good for performance.
|
|
if marshal_dump_supports_canonical_option?
|
|
new_data = Marshal.dump(@table, -1, true)
|
|
else
|
|
new_data = dump(@table)
|
|
end
|
|
new_checksum = Digest::MD5.digest(new_data)
|
|
|
|
if new_data.size != original_file_size || new_checksum != original_checksum
|
|
if @ultra_safe && !on_windows?
|
|
# Windows doesn't support atomic file renames.
|
|
save_data_with_atomic_file_rename_strategy(new_data, file)
|
|
else
|
|
save_data_with_fast_strategy(new_data, file)
|
|
end
|
|
end
|
|
|
|
new_data.replace(EMPTY_STRING)
|
|
end
|
|
|
|
def save_data_with_atomic_file_rename_strategy(data, file)
|
|
temp_filename = "#{@filename}.tmp.#{Process.pid}.#{rand 1000000}"
|
|
temp_file = File.new(temp_filename, WR_ACCESS)
|
|
begin
|
|
temp_file.flock(File::LOCK_EX)
|
|
temp_file.write(data)
|
|
temp_file.flush
|
|
File.rename(temp_filename, @filename)
|
|
rescue
|
|
File.unlink(temp_file) rescue nil
|
|
raise
|
|
ensure
|
|
temp_file.close
|
|
end
|
|
end
|
|
|
|
def save_data_with_fast_strategy(data, file)
|
|
file.rewind
|
|
file.truncate(0)
|
|
file.write(data)
|
|
end
|
|
|
|
|
|
# This method is just a wrapped around Marshal.dump
|
|
# to allow subclass overriding used in YAML::Store.
|
|
def dump(table) # :nodoc:
|
|
Marshal::dump(table)
|
|
end
|
|
|
|
# This method is just a wrapped around Marshal.load.
|
|
# to allow subclass overriding used in YAML::Store.
|
|
def load(content) # :nodoc:
|
|
Marshal::load(content)
|
|
end
|
|
|
|
def empty_marshal_data
|
|
EMPTY_MARSHAL_DATA
|
|
end
|
|
def empty_marshal_checksum
|
|
EMPTY_MARSHAL_CHECKSUM
|
|
end
|
|
end
|
|
|
|
# :enddoc:
|
|
|
|
if __FILE__ == $0
|
|
db = PStore.new("/tmp/foo")
|
|
db.transaction do
|
|
p db.roots
|
|
ary = db["root"] = [1,2,3,4]
|
|
ary[1] = [1,1.5]
|
|
end
|
|
|
|
1000.times do
|
|
db.transaction do
|
|
db["root"][0] += 1
|
|
p db["root"][0]
|
|
end
|
|
end
|
|
|
|
db.transaction(true) do
|
|
p db["root"]
|
|
end
|
|
end
|