import pickle import json import csv import os import shutil import base64 """ found on https://stackoverflow.com/a/36252257/8967590 """ class PythonObjectEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, (list, dict, str, int, float, bool, type(None))): return super().default(obj) return {'_python_object': base64.b64encode(pickle.dumps(obj)).decode('utf-8')} def as_python_object(dct): if '_python_object' in dct: return pickle.loads(base64.b64decode(dct['_python_object'].encode('utf-8'))) return dct class PersistentDict(dict): """ Persistent dictionary with an API compatible with shelve and anydbm. The dict is kept in memory, so the dictionary operations run as fast as a regular dictionary. Write to disk is delayed until close or sync (similar to gdbm's fast mode). Input file format is automatically discovered. Output file format is selectable between pickle, json, and csv. All three serialization formats are backed by fast C implementations. """ def __init__(self, filename, flag='c', mode=None, format='pickle', *args, **kwargs): self.flag = flag # r=readonly, c=create, or n=new self.mode = mode # None or an octal triple like 0644 self.format = format # 'csv', 'json', or 'pickle' self.filename = filename if flag != 'n' and os.access(filename, os.R_OK): fileobj = open(filename, 'rb' if format == 'pickle' else 'r') with fileobj: self.load(fileobj) dict.__init__(self, *args, **kwargs) def sync(self): """Write dict to disk""" if self.flag == 'r': return filename = self.filename tempname = filename + '.tmp' fileobj = open(tempname, 'wb' if self.format == 'pickle' else 'w') try: self.dump(fileobj) except (IOError, OSError, pickle.PickleError, json.JSONEncoder) as e: # Clean up temp file if serialization fails os.remove(tempname) raise finally: fileobj.close() shutil.move(tempname, self.filename) # atomic commit if self.mode is not None: os.chmod(self.filename, self.mode) def close(self): self.sync() def __enter__(self): return self def __exit__(self, *exc_info): self.close() def dump(self, fileobj): if self.format == 'csv': csv.writer(fileobj).writerows(self.items()) elif self.format == 'json': json.dump(self, fileobj, separators=(',', ':'), sort_keys=True, indent=4, cls=PythonObjectEncoder) elif self.format == 'pickle': pickle.dump(dict(self), fileobj, 2) else: raise NotImplementedError('Unknown format: ' + repr(self.format)) def load(self, fileobj): """ Try to load file using different formats. Attempts pickle, json, then csv in that order. This allows automatic format detection when reading existing files. """ # try formats from most restrictive to least restrictive for loader in ( (pickle.load, {}), (json.load, { "object_hook": as_python_object} ), (csv.reader, {}) ): fileobj.seek(0) try: return self.update(loader[0](fileobj, **loader[1])) except (KeyboardInterrupt, SystemExit): raise # Don't suppress these critical exceptions except Exception: # Try next loader - expected to fail for wrong formats pass raise ValueError('File not in a supported format')