Does the Python shelve module have any protection built in to make sure two processes aren't writing to a file at the same time?
The shelve module uses an underlying database package (such as dbm, gdbm or bsddb) .
The restrictions pragraph says (my emphasis):
The shelve module does not support concurrent read/write access to shelved objects. (Multiple simultaneous read accesses are safe.) When a program has a shelf open for writing, no other program should have it open for reading or writing. Unix file locking can be used to solve this, but this differs across Unix versions and requires knowledge about the database implementation used.
Conclusion: it depends on OS and the underlying DB. To keep things portable, do not build on concurrency.
I've implemented Ivo's approach as a context manager, for anyone interested:
from contextlib import contextmanager
from fcntl import flock, LOCK_SH, LOCK_EX, LOCK_UN
import shelve
@contextmanager
def locking(lock_path, lock_mode):
with open(lock_path, 'w') as lock:
flock(lock.fileno(), lock_mode) # block until lock is acquired
try:
yield
finally:
flock(lock.fileno(), LOCK_UN) # release
class DBManager(object):
def __init__(self, db_path):
self.db_path = db_path
def read(self):
with locking("%s.lock" % self.db_path, LOCK_SH):
with shelve.open(self.db_path, "r", 2) as db:
return dict(db)
def cas(self, old_db, new_db):
with locking("%s.lock" % self.db_path, LOCK_EX):
with shelve.open(self.db_path, "c", 2) as db:
if old_db != dict(db):
return False
db.clear()
db.update(new_db)
return True
As per the top answer, it's not safe to have multiple writers to the shelve. My approach to making shelves safer is to write a wrapper that takes care of opening and accessing shelve elements. The wrapper code looks something like this:
def open(self, mode=READONLY):
if mode is READWRITE:
lockfilemode = "a"
lockmode = LOCK_EX
shelve_mode = 'c'
else:
lockfilemode = "r"
lockmode = LOCK_SH
shelve_mode = 'r'
self.lockfd = open(shelvefile+".lck", lockfilemode)
fcntl.flock(self.lockfd.fileno(), lockmode | LOCK_NB)
self.shelve = shelve.open(shelvefile, flag=shelve_mode, protocol=pickle.HIGHEST_PROTOCOL))
def close(self):
self.shelve.close()
fcntl.flock(self.lockfd.fileno(), LOCK_UN)
lockfd.close()
Building on Ivo's and Samus_'s approaches, I've implemented an even simpler wrapper for shelve.open:
import fcntl
import shelve
import contextlib
import typing
@contextlib.contextmanager
def open_safe_shelve(db_path: str, flag: typing.Literal["r", "w", "c", "n"] = "c", protocol=None, writeback=False):
if flag in ("w", "c", "n"):
lockfile_lock_mode = fcntl.LOCK_EX
elif flag == "r":
lockfile_lock_mode = fcntl.LOCK_SH
else:
raise ValueError(f"Invalid mode: {flag}, only 'r', 'w', 'c', 'n' are allowed.")
with open(f"{db_path}.lock", "w") as lock: # According to https://docs.python.org/3/library/fcntl.html#fcntl.flock, the file must be opened in write mode on some systems.
fcntl.flock(lock.fileno(), lockfile_lock_mode) # Block until lock is acquired.
try:
yield shelve.open(db_path, flag=flag, protocol=protocol, writeback=writeback)
finally:
fcntl.flock(lock.fileno(), fcntl.LOCK_UN) # Release lock
This avoids having to check if the dict has changed since the last time, like in Samus_'s cas()
method.
Note that this will block until the lock can be obtained. If you instead want to throw an exception if the lock is already taken, use lockfile_lock_mode | fcntl.LOCK_NB
as the lock flag.
It can be used in the same way shelve would normally be used. For example:
import time
import multiprocessing
def read(db_path: str):
print("Reading wants lock")
with open_safe_shelve(db_path, "r") as db:
print("Reading has lock")
print(f"foo: {db.get('foo', None)}")
time.sleep(10)
print(f"foo: {db.get('foo', None)}")
print("Reading giving up lock")
def write(db_path: str):
print("Writing wants lock")
with open_safe_shelve(db_path) as db:
print("Writing has lock")
db["foo"] = "bar"
print("Writing giving up lock")
if __name__ == "__main__":
db_path = "test_database"
read_process = multiprocessing.Process(target=read, args=(db_path,))
write_process = multiprocessing.Process(target=write, args=(db_path,))
read_process.start()
time.sleep(1)
write_process.start()
read_process.join()
write_process.join()
will output (assuming test_database.db
already exists):
Reading wants lock
Reading has lock
foo: None
Writing wants lock
# (sleeps for around 9 seconds)
foo: None
Reading giving up lock
Writing has lock
Writing giving up lock
© 2022 - 2025 — McMap. All rights reserved.