import argparse
import cgi
+import csv
import json
import logging
import math
mtime,
disk_size,
cumulative_disk_size,
- float(free_keep_space - cumulative_disk_size)/total_keep_space))
+ float(free_keep_space + cumulative_disk_size)/total_keep_space))
print 'The oldest Garbage Collection Candidates: '
pprint.pprint(garbage_collection_report[:20])
+def outputGarbageCollectionReport(filename):
+ with open(filename, 'wb') as csvfile:
+ gcwriter = csv.writer(csvfile)
+ gcwriter.writerow(['block uuid', 'latest mtime', 'disk size',
+ 'cumulative size', 'disk free'])
+ for line in garbage_collection_report:
+ gcwriter.writerow(line)
+
+
def detectReplicationProblems():
blocks_not_in_any_collections.update(
set(block_to_replication.keys()).difference(block_to_collections.keys()))
default='user-storage-report',
help=('The event type to set when logging user '
'storage usage to workbench.'))
+parser.add_argument('--garbage-collection-file',
+ default='',
+ help=('The file to write a garbage collection report, or '
+ 'leave empty for no report.'))
args = None
* cumulative disk size: The sum of this block's disk size and all the
blocks listed above it
* disk free: The proportion of our disk space that would be free if we
-deleted this block and all the above. So this is (free disk space -
+deleted this block and all the above. So this is (free disk space +
cumulative disk size) / total disk capacity
"""
computeReplication(keep_blocks)
- computeGarbageCollectionCandidates()
-
log.info('average replication level is %f',
(float(sum(block_to_replication.values())) /
len(block_to_replication)))
+ computeGarbageCollectionCandidates()
+
+ if args.garbage_collection_file:
+ log.info('Writing garbage Collection report to %s',
+ args.garbage_collection_file)
+ outputGarbageCollectionReport(args.garbage_collection_file)
+
detectReplicationProblems()
computeUserStorageUsage()