110 lines
3.7 KiB
Python
110 lines
3.7 KiB
Python
import pickle
|
|
import json
|
|
import csv
|
|
import os
|
|
import shutil
|
|
import base64
|
|
|
|
|
|
""" found on https://stackoverflow.com/a/36252257/8967590 """
|
|
class PythonObjectEncoder(json.JSONEncoder):
|
|
def default(self, obj):
|
|
if isinstance(obj, (list, dict, str, int, float, bool, type(None))):
|
|
return super().default(obj)
|
|
return {'_python_object': base64.b64encode(pickle.dumps(obj)).decode('utf-8')}
|
|
|
|
|
|
def as_python_object(dct):
|
|
if '_python_object' in dct:
|
|
return pickle.loads(base64.b64decode(dct['_python_object'].encode('utf-8')))
|
|
return dct
|
|
|
|
|
|
class PersistentDict(dict):
|
|
""" Persistent dictionary with an API compatible with shelve and anydbm.
|
|
|
|
The dict is kept in memory, so the dictionary operations run as fast as
|
|
a regular dictionary.
|
|
|
|
Write to disk is delayed until close or sync (similar to gdbm's fast mode).
|
|
|
|
Input file format is automatically discovered.
|
|
Output file format is selectable between pickle, json, and csv.
|
|
All three serialization formats are backed by fast C implementations.
|
|
|
|
"""
|
|
|
|
def __init__(self, filename, flag='c', mode=None, format='pickle', *args, **kwargs):
|
|
self.flag = flag # r=readonly, c=create, or n=new
|
|
self.mode = mode # None or an octal triple like 0644
|
|
self.format = format # 'csv', 'json', or 'pickle'
|
|
self.filename = filename
|
|
if flag != 'n' and os.access(filename, os.R_OK):
|
|
fileobj = open(filename, 'rb' if format == 'pickle' else 'r')
|
|
with fileobj:
|
|
self.load(fileobj)
|
|
dict.__init__(self, *args, **kwargs)
|
|
|
|
def sync(self):
|
|
"""Write dict to disk"""
|
|
if self.flag == 'r':
|
|
return
|
|
filename = self.filename
|
|
tempname = filename + '.tmp'
|
|
fileobj = open(tempname, 'wb' if self.format == 'pickle' else 'w')
|
|
try:
|
|
self.dump(fileobj)
|
|
except (IOError, OSError, pickle.PickleError, json.JSONEncoder) as e:
|
|
# Clean up temp file if serialization fails
|
|
os.remove(tempname)
|
|
raise
|
|
finally:
|
|
fileobj.close()
|
|
shutil.move(tempname, self.filename) # atomic commit
|
|
if self.mode is not None:
|
|
os.chmod(self.filename, self.mode)
|
|
|
|
def close(self):
|
|
self.sync()
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, *exc_info):
|
|
self.close()
|
|
|
|
def dump(self, fileobj):
|
|
if self.format == 'csv':
|
|
csv.writer(fileobj).writerows(self.items())
|
|
elif self.format == 'json':
|
|
json.dump(self, fileobj, separators=(',', ':'), sort_keys=True, indent=4, cls=PythonObjectEncoder)
|
|
elif self.format == 'pickle':
|
|
pickle.dump(dict(self), fileobj, 2)
|
|
else:
|
|
raise NotImplementedError('Unknown format: ' + repr(self.format))
|
|
|
|
def load(self, fileobj):
|
|
"""
|
|
Try to load file using different formats.
|
|
|
|
Attempts pickle, json, then csv in that order. This allows
|
|
automatic format detection when reading existing files.
|
|
"""
|
|
# try formats from most restrictive to least restrictive
|
|
for loader in (
|
|
(pickle.load, {}),
|
|
(json.load, {
|
|
"object_hook": as_python_object}
|
|
),
|
|
(csv.reader, {})
|
|
):
|
|
fileobj.seek(0)
|
|
try:
|
|
return self.update(loader[0](fileobj, **loader[1]))
|
|
except (KeyboardInterrupt, SystemExit):
|
|
raise # Don't suppress these critical exceptions
|
|
except Exception:
|
|
# Try next loader - expected to fail for wrong formats
|
|
pass
|
|
raise ValueError('File not in a supported format')
|