Files
chrani-bot-tng/bot/modules/storage/persistent_dict.py
2025-11-21 07:26:02 +01:00

110 lines
3.7 KiB
Python

import pickle
import json
import csv
import os
import shutil
import base64
""" found on https://stackoverflow.com/a/36252257/8967590 """
class PythonObjectEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, (list, dict, str, int, float, bool, type(None))):
return super().default(obj)
return {'_python_object': base64.b64encode(pickle.dumps(obj)).decode('utf-8')}
def as_python_object(dct):
if '_python_object' in dct:
return pickle.loads(base64.b64decode(dct['_python_object'].encode('utf-8')))
return dct
class PersistentDict(dict):
""" Persistent dictionary with an API compatible with shelve and anydbm.
The dict is kept in memory, so the dictionary operations run as fast as
a regular dictionary.
Write to disk is delayed until close or sync (similar to gdbm's fast mode).
Input file format is automatically discovered.
Output file format is selectable between pickle, json, and csv.
All three serialization formats are backed by fast C implementations.
"""
def __init__(self, filename, flag='c', mode=None, format='pickle', *args, **kwargs):
self.flag = flag # r=readonly, c=create, or n=new
self.mode = mode # None or an octal triple like 0644
self.format = format # 'csv', 'json', or 'pickle'
self.filename = filename
if flag != 'n' and os.access(filename, os.R_OK):
fileobj = open(filename, 'rb' if format == 'pickle' else 'r')
with fileobj:
self.load(fileobj)
dict.__init__(self, *args, **kwargs)
def sync(self):
"""Write dict to disk"""
if self.flag == 'r':
return
filename = self.filename
tempname = filename + '.tmp'
fileobj = open(tempname, 'wb' if self.format == 'pickle' else 'w')
try:
self.dump(fileobj)
except (IOError, OSError, pickle.PickleError, json.JSONEncoder) as e:
# Clean up temp file if serialization fails
os.remove(tempname)
raise
finally:
fileobj.close()
shutil.move(tempname, self.filename) # atomic commit
if self.mode is not None:
os.chmod(self.filename, self.mode)
def close(self):
self.sync()
def __enter__(self):
return self
def __exit__(self, *exc_info):
self.close()
def dump(self, fileobj):
if self.format == 'csv':
csv.writer(fileobj).writerows(self.items())
elif self.format == 'json':
json.dump(self, fileobj, separators=(',', ':'), sort_keys=True, indent=4, cls=PythonObjectEncoder)
elif self.format == 'pickle':
pickle.dump(dict(self), fileobj, 2)
else:
raise NotImplementedError('Unknown format: ' + repr(self.format))
def load(self, fileobj):
"""
Try to load file using different formats.
Attempts pickle, json, then csv in that order. This allows
automatic format detection when reading existing files.
"""
# try formats from most restrictive to least restrictive
for loader in (
(pickle.load, {}),
(json.load, {
"object_hook": as_python_object}
),
(csv.reader, {})
):
fileobj.seek(0)
try:
return self.update(loader[0](fileobj, **loader[1]))
except (KeyboardInterrupt, SystemExit):
raise # Don't suppress these critical exceptions
except Exception:
# Try next loader - expected to fail for wrong formats
pass
raise ValueError('File not in a supported format')