X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/0514b290f5ff9a2700b599bf6fb19a468a73c3fb..47a5c66948792602f657e5e1157bfd172d23c546:/services/datamanager/summary/summary.go?ds=sidebyside diff --git a/services/datamanager/summary/summary.go b/services/datamanager/summary/summary.go deleted file mode 100644 index 9fb0316b73..0000000000 --- a/services/datamanager/summary/summary.go +++ /dev/null @@ -1,277 +0,0 @@ -// Summarizes Collection Data and Keep Server Contents. - -package summary - -// TODO(misha): Check size of blocks as well as their digest. - -import ( - "fmt" - "git.curoverse.com/arvados.git/sdk/go/blockdigest" - "git.curoverse.com/arvados.git/services/datamanager/collection" - "git.curoverse.com/arvados.git/services/datamanager/keep" - "sort" -) - -// BlockSet is a map of blocks -type BlockSet map[blockdigest.DigestWithSize]struct{} - -// Insert adds a single block to the set. -func (bs BlockSet) Insert(digest blockdigest.DigestWithSize) { - bs[digest] = struct{}{} -} - -// Union adds a set of blocks to the set. -func (bs BlockSet) Union(obs BlockSet) { - for k, v := range obs { - bs[k] = v - } -} - -// CollectionIndexSet is used to save space. To convert to and from -// the uuid, use collection.ReadCollections' fields -// CollectionIndexToUUID and CollectionUUIDToIndex. -type CollectionIndexSet map[int]struct{} - -// Insert adds a single collection to the set. The collection is specified by -// its index. -func (cis CollectionIndexSet) Insert(collectionIndex int) { - cis[collectionIndex] = struct{}{} -} - -// ToCollectionIndexSet gets block to collection indices -func (bs BlockSet) ToCollectionIndexSet( - readCollections collection.ReadCollections, - collectionIndexSet *CollectionIndexSet) { - for block := range bs { - for _, collectionIndex := range readCollections.BlockToCollectionIndices[block] { - collectionIndexSet.Insert(collectionIndex) - } - } -} - -// ReplicationLevels struct -// Keeps track of the requested and actual replication levels. -// Currently this is only used for blocks but could easily be used for -// collections as well. -type ReplicationLevels struct { - // The requested replication level. - // For Blocks this is the maximum replication level among all the - // collections this block belongs to. - Requested int - - // The actual number of keep servers this is on. - Actual int -} - -// ReplicationLevelBlockSetMap maps from replication levels to their blocks. -type ReplicationLevelBlockSetMap map[ReplicationLevels]BlockSet - -// ReplicationLevelBlockCount is an individual entry from ReplicationLevelBlockSetMap -// which only reports the number of blocks, not which blocks. -type ReplicationLevelBlockCount struct { - Levels ReplicationLevels - Count int -} - -// ReplicationLevelBlockSetSlice is an ordered list of ReplicationLevelBlockCount useful for reporting. -type ReplicationLevelBlockSetSlice []ReplicationLevelBlockCount - -// ReplicationSummary sturct -type ReplicationSummary struct { - CollectionBlocksNotInKeep BlockSet - UnderReplicatedBlocks BlockSet - OverReplicatedBlocks BlockSet - CorrectlyReplicatedBlocks BlockSet - KeepBlocksNotInCollections BlockSet - - CollectionsNotFullyInKeep CollectionIndexSet - UnderReplicatedCollections CollectionIndexSet - OverReplicatedCollections CollectionIndexSet - CorrectlyReplicatedCollections CollectionIndexSet -} - -// ReplicationSummaryCounts struct counts the elements in each set in ReplicationSummary. -type ReplicationSummaryCounts struct { - CollectionBlocksNotInKeep int - UnderReplicatedBlocks int - OverReplicatedBlocks int - CorrectlyReplicatedBlocks int - KeepBlocksNotInCollections int - CollectionsNotFullyInKeep int - UnderReplicatedCollections int - OverReplicatedCollections int - CorrectlyReplicatedCollections int -} - -// GetOrCreate gets the BlockSet for a given set of ReplicationLevels, -// creating it if it doesn't already exist. -func (rlbs ReplicationLevelBlockSetMap) GetOrCreate( - repLevels ReplicationLevels) (bs BlockSet) { - bs, exists := rlbs[repLevels] - if !exists { - bs = make(BlockSet) - rlbs[repLevels] = bs - } - return -} - -// Insert adds a block to the set for a given replication level. -func (rlbs ReplicationLevelBlockSetMap) Insert( - repLevels ReplicationLevels, - block blockdigest.DigestWithSize) { - rlbs.GetOrCreate(repLevels).Insert(block) -} - -// Union adds a set of blocks to the set for a given replication level. -func (rlbs ReplicationLevelBlockSetMap) Union( - repLevels ReplicationLevels, - bs BlockSet) { - rlbs.GetOrCreate(repLevels).Union(bs) -} - -// Counts outputs a sorted list of ReplicationLevelBlockCounts. -func (rlbs ReplicationLevelBlockSetMap) Counts() ( - sorted ReplicationLevelBlockSetSlice) { - sorted = make(ReplicationLevelBlockSetSlice, len(rlbs)) - i := 0 - for levels, set := range rlbs { - sorted[i] = ReplicationLevelBlockCount{Levels: levels, Count: len(set)} - i++ - } - sort.Sort(sorted) - return -} - -// Implemented to meet sort.Interface -func (rlbss ReplicationLevelBlockSetSlice) Len() int { - return len(rlbss) -} - -// Implemented to meet sort.Interface -func (rlbss ReplicationLevelBlockSetSlice) Less(i, j int) bool { - return rlbss[i].Levels.Requested < rlbss[j].Levels.Requested || - (rlbss[i].Levels.Requested == rlbss[j].Levels.Requested && - rlbss[i].Levels.Actual < rlbss[j].Levels.Actual) -} - -// Implemented to meet sort.Interface -func (rlbss ReplicationLevelBlockSetSlice) Swap(i, j int) { - rlbss[i], rlbss[j] = rlbss[j], rlbss[i] -} - -// ComputeCounts returns ReplicationSummaryCounts -func (rs ReplicationSummary) ComputeCounts() (rsc ReplicationSummaryCounts) { - // TODO(misha): Consider rewriting this method to iterate through - // the fields using reflection, instead of explictily listing the - // fields as we do now. - rsc.CollectionBlocksNotInKeep = len(rs.CollectionBlocksNotInKeep) - rsc.UnderReplicatedBlocks = len(rs.UnderReplicatedBlocks) - rsc.OverReplicatedBlocks = len(rs.OverReplicatedBlocks) - rsc.CorrectlyReplicatedBlocks = len(rs.CorrectlyReplicatedBlocks) - rsc.KeepBlocksNotInCollections = len(rs.KeepBlocksNotInCollections) - rsc.CollectionsNotFullyInKeep = len(rs.CollectionsNotFullyInKeep) - rsc.UnderReplicatedCollections = len(rs.UnderReplicatedCollections) - rsc.OverReplicatedCollections = len(rs.OverReplicatedCollections) - rsc.CorrectlyReplicatedCollections = len(rs.CorrectlyReplicatedCollections) - return rsc -} - -// PrettyPrint ReplicationSummaryCounts -func (rsc ReplicationSummaryCounts) PrettyPrint() string { - return fmt.Sprintf("Replication Block Counts:"+ - "\n Missing From Keep: %d, "+ - "\n Under Replicated: %d, "+ - "\n Over Replicated: %d, "+ - "\n Replicated Just Right: %d, "+ - "\n Not In Any Collection: %d. "+ - "\nReplication Collection Counts:"+ - "\n Missing From Keep: %d, "+ - "\n Under Replicated: %d, "+ - "\n Over Replicated: %d, "+ - "\n Replicated Just Right: %d.", - rsc.CollectionBlocksNotInKeep, - rsc.UnderReplicatedBlocks, - rsc.OverReplicatedBlocks, - rsc.CorrectlyReplicatedBlocks, - rsc.KeepBlocksNotInCollections, - rsc.CollectionsNotFullyInKeep, - rsc.UnderReplicatedCollections, - rsc.OverReplicatedCollections, - rsc.CorrectlyReplicatedCollections) -} - -// BucketReplication returns ReplicationLevelBlockSetMap -func BucketReplication(readCollections collection.ReadCollections, - keepServerInfo keep.ReadServers) (rlbs ReplicationLevelBlockSetMap) { - rlbs = make(ReplicationLevelBlockSetMap) - - for block, requestedReplication := range readCollections.BlockToDesiredReplication { - rlbs.Insert( - ReplicationLevels{ - Requested: requestedReplication, - Actual: len(keepServerInfo.BlockToServers[block])}, - block) - } - - for block, servers := range keepServerInfo.BlockToServers { - if 0 == readCollections.BlockToDesiredReplication[block] { - rlbs.Insert( - ReplicationLevels{Requested: 0, Actual: len(servers)}, - block) - } - } - return -} - -// SummarizeBuckets reads collections and summarizes -func (rlbs ReplicationLevelBlockSetMap) SummarizeBuckets( - readCollections collection.ReadCollections) ( - rs ReplicationSummary) { - rs.CollectionBlocksNotInKeep = make(BlockSet) - rs.UnderReplicatedBlocks = make(BlockSet) - rs.OverReplicatedBlocks = make(BlockSet) - rs.CorrectlyReplicatedBlocks = make(BlockSet) - rs.KeepBlocksNotInCollections = make(BlockSet) - - rs.CollectionsNotFullyInKeep = make(CollectionIndexSet) - rs.UnderReplicatedCollections = make(CollectionIndexSet) - rs.OverReplicatedCollections = make(CollectionIndexSet) - rs.CorrectlyReplicatedCollections = make(CollectionIndexSet) - - for levels, bs := range rlbs { - if levels.Actual == 0 { - rs.CollectionBlocksNotInKeep.Union(bs) - } else if levels.Requested == 0 { - rs.KeepBlocksNotInCollections.Union(bs) - } else if levels.Actual < levels.Requested { - rs.UnderReplicatedBlocks.Union(bs) - } else if levels.Actual > levels.Requested { - rs.OverReplicatedBlocks.Union(bs) - } else { - rs.CorrectlyReplicatedBlocks.Union(bs) - } - } - - rs.CollectionBlocksNotInKeep.ToCollectionIndexSet(readCollections, - &rs.CollectionsNotFullyInKeep) - // Since different collections can specify different replication - // levels, the fact that a block is under-replicated does not imply - // that all collections that it belongs to are under-replicated, but - // we'll ignore that for now. - // TODO(misha): Fix this and report the correct set of collections. - rs.UnderReplicatedBlocks.ToCollectionIndexSet(readCollections, - &rs.UnderReplicatedCollections) - rs.OverReplicatedBlocks.ToCollectionIndexSet(readCollections, - &rs.OverReplicatedCollections) - - for i := range readCollections.CollectionIndexToUUID { - if _, notInKeep := rs.CollectionsNotFullyInKeep[i]; notInKeep { - } else if _, underReplicated := rs.UnderReplicatedCollections[i]; underReplicated { - } else if _, overReplicated := rs.OverReplicatedCollections[i]; overReplicated { - } else { - rs.CorrectlyReplicatedCollections.Insert(i) - } - } - - return -}