import traceback
import stat
import tempfile
-import hashlib
class DiskCacheSlot(object):
__slots__ = ("locator", "ready", "content", "cachedir")
f = tempfile.NamedTemporaryFile(dir=blockdir, delete=False)
tmpfile = f.name
os.chmod(tmpfile, stat.S_IRUSR | stat.S_IWUSR)
+
f.write(value)
f.flush()
os.rename(tmpfile, final)
@staticmethod
def get_from_disk(locator, cachedir):
- # Get it, check it, return it
blockdir = os.path.join(cachedir, locator[0:3])
final = os.path.join(blockdir, locator)
try:
filehandle = open(final, "rb")
+
content = mmap.mmap(filehandle.fileno(), 0, access=mmap.ACCESS_READ)
- disk_md5 = hashlib.md5(content).hexdigest()
- if disk_md5 == locator:
- dc = DiskCacheSlot(locator, cachedir)
- dc.content = content
- dc.ready.set()
- return dc
+ dc = DiskCacheSlot(locator, cachedir)
+ dc.content = content
+ dc.ready.set()
+ return dc
except FileNotFoundError:
pass
except Exception as e:
traceback.print_exc()
return None
+
+ @staticmethod
+ def init_cache(cachedir, maxslots):
+ # map in all the files in the cache directory, up to max slots.
+ # after max slots, try to delete the excess blocks.
+ #
+ # this gives the calling process ownership of all the blocks
+
+ blocks = []
+ for root, dirs, files in os.walk(cachedir):
+ for name in files:
+ blockpath = os.path.join(root, name)
+ res = os.stat(blockpath)
+ blocks.append((name, res.st_atime))
+
+ # sort by access time (atime), going from most recently
+ # accessed (highest timestamp) to least recently accessed
+ # (lowest timestamp).
+ blocks.sort(key=lambda x: x[1], reverse=True)
+
+ # Map in all the files we found, up to maxslots, if we exceed
+ # maxslots, start throwing things out.
+ cachelist = []
+ for b in blocks:
+ got = DiskCacheSlot.get_from_disk(b[0], cachedir)
+ if got is None:
+ continue
+ if len(cachelist) < maxslots:
+ cachelist.append(got)
+ else:
+ # we found more blocks than maxslots, try to
+ # throw it out of the cache.
+ got.evict()
+
+ return cachelist