1 // Summarizes Collection Data and Keep Server Contents.
5 // TODO(misha): Check size of blocks as well as their digest.
9 "git.curoverse.com/arvados.git/sdk/go/blockdigest"
10 "git.curoverse.com/arvados.git/services/datamanager/collection"
11 "git.curoverse.com/arvados.git/services/datamanager/keep"
15 // BlockSet is a map of blocks
16 type BlockSet map[blockdigest.DigestWithSize]struct{}
18 // Insert adds a single block to the set.
19 func (bs BlockSet) Insert(digest blockdigest.DigestWithSize) {
20 bs[digest] = struct{}{}
23 // Union adds a set of blocks to the set.
24 func (bs BlockSet) Union(obs BlockSet) {
25 for k, v := range obs {
30 // CollectionIndexSet is used to save space. To convert to and from
31 // the uuid, use collection.ReadCollections' fields
32 // CollectionIndexToUUID and CollectionUUIDToIndex.
33 type CollectionIndexSet map[int]struct{}
35 // Insert adds a single collection to the set. The collection is specified by
37 func (cis CollectionIndexSet) Insert(collectionIndex int) {
38 cis[collectionIndex] = struct{}{}
41 // ToCollectionIndexSet gets block to collection indices
42 func (bs BlockSet) ToCollectionIndexSet(
43 readCollections collection.ReadCollections,
44 collectionIndexSet *CollectionIndexSet) {
45 for block := range bs {
46 for _, collectionIndex := range readCollections.BlockToCollectionIndices[block] {
47 collectionIndexSet.Insert(collectionIndex)
52 // ReplicationLevels struct
53 // Keeps track of the requested and actual replication levels.
54 // Currently this is only used for blocks but could easily be used for
55 // collections as well.
56 type ReplicationLevels struct {
57 // The requested replication level.
58 // For Blocks this is the maximum replication level among all the
59 // collections this block belongs to.
62 // The actual number of keep servers this is on.
66 // ReplicationLevelBlockSetMap maps from replication levels to their blocks.
67 type ReplicationLevelBlockSetMap map[ReplicationLevels]BlockSet
69 // ReplicationLevelBlockCount is an individual entry from ReplicationLevelBlockSetMap
70 // which only reports the number of blocks, not which blocks.
71 type ReplicationLevelBlockCount struct {
72 Levels ReplicationLevels
76 // ReplicationLevelBlockSetSlice is an ordered list of ReplicationLevelBlockCount useful for reporting.
77 type ReplicationLevelBlockSetSlice []ReplicationLevelBlockCount
79 // ReplicationSummary sturct
80 type ReplicationSummary struct {
81 CollectionBlocksNotInKeep BlockSet
82 UnderReplicatedBlocks BlockSet
83 OverReplicatedBlocks BlockSet
84 CorrectlyReplicatedBlocks BlockSet
85 KeepBlocksNotInCollections BlockSet
87 CollectionsNotFullyInKeep CollectionIndexSet
88 UnderReplicatedCollections CollectionIndexSet
89 OverReplicatedCollections CollectionIndexSet
90 CorrectlyReplicatedCollections CollectionIndexSet
93 // ReplicationSummaryCounts struct counts the elements in each set in ReplicationSummary.
94 type ReplicationSummaryCounts struct {
95 CollectionBlocksNotInKeep int
96 UnderReplicatedBlocks int
97 OverReplicatedBlocks int
98 CorrectlyReplicatedBlocks int
99 KeepBlocksNotInCollections int
100 CollectionsNotFullyInKeep int
101 UnderReplicatedCollections int
102 OverReplicatedCollections int
103 CorrectlyReplicatedCollections int
106 // GetOrCreate gets the BlockSet for a given set of ReplicationLevels,
107 // creating it if it doesn't already exist.
108 func (rlbs ReplicationLevelBlockSetMap) GetOrCreate(
109 repLevels ReplicationLevels) (bs BlockSet) {
110 bs, exists := rlbs[repLevels]
118 // Insert adds a block to the set for a given replication level.
119 func (rlbs ReplicationLevelBlockSetMap) Insert(
120 repLevels ReplicationLevels,
121 block blockdigest.DigestWithSize) {
122 rlbs.GetOrCreate(repLevels).Insert(block)
125 // Union adds a set of blocks to the set for a given replication level.
126 func (rlbs ReplicationLevelBlockSetMap) Union(
127 repLevels ReplicationLevels,
129 rlbs.GetOrCreate(repLevels).Union(bs)
132 // Counts outputs a sorted list of ReplicationLevelBlockCounts.
133 func (rlbs ReplicationLevelBlockSetMap) Counts() (
134 sorted ReplicationLevelBlockSetSlice) {
135 sorted = make(ReplicationLevelBlockSetSlice, len(rlbs))
137 for levels, set := range rlbs {
138 sorted[i] = ReplicationLevelBlockCount{Levels: levels, Count: len(set)}
145 // Implemented to meet sort.Interface
146 func (rlbss ReplicationLevelBlockSetSlice) Len() int {
150 // Implemented to meet sort.Interface
151 func (rlbss ReplicationLevelBlockSetSlice) Less(i, j int) bool {
152 return rlbss[i].Levels.Requested < rlbss[j].Levels.Requested ||
153 (rlbss[i].Levels.Requested == rlbss[j].Levels.Requested &&
154 rlbss[i].Levels.Actual < rlbss[j].Levels.Actual)
157 // Implemented to meet sort.Interface
158 func (rlbss ReplicationLevelBlockSetSlice) Swap(i, j int) {
159 rlbss[i], rlbss[j] = rlbss[j], rlbss[i]
162 // ComputeCounts returns ReplicationSummaryCounts
163 func (rs ReplicationSummary) ComputeCounts() (rsc ReplicationSummaryCounts) {
164 // TODO(misha): Consider rewriting this method to iterate through
165 // the fields using reflection, instead of explictily listing the
166 // fields as we do now.
167 rsc.CollectionBlocksNotInKeep = len(rs.CollectionBlocksNotInKeep)
168 rsc.UnderReplicatedBlocks = len(rs.UnderReplicatedBlocks)
169 rsc.OverReplicatedBlocks = len(rs.OverReplicatedBlocks)
170 rsc.CorrectlyReplicatedBlocks = len(rs.CorrectlyReplicatedBlocks)
171 rsc.KeepBlocksNotInCollections = len(rs.KeepBlocksNotInCollections)
172 rsc.CollectionsNotFullyInKeep = len(rs.CollectionsNotFullyInKeep)
173 rsc.UnderReplicatedCollections = len(rs.UnderReplicatedCollections)
174 rsc.OverReplicatedCollections = len(rs.OverReplicatedCollections)
175 rsc.CorrectlyReplicatedCollections = len(rs.CorrectlyReplicatedCollections)
179 // PrettyPrint ReplicationSummaryCounts
180 func (rsc ReplicationSummaryCounts) PrettyPrint() string {
181 return fmt.Sprintf("Replication Block Counts:"+
182 "\n Missing From Keep: %d, "+
183 "\n Under Replicated: %d, "+
184 "\n Over Replicated: %d, "+
185 "\n Replicated Just Right: %d, "+
186 "\n Not In Any Collection: %d. "+
187 "\nReplication Collection Counts:"+
188 "\n Missing From Keep: %d, "+
189 "\n Under Replicated: %d, "+
190 "\n Over Replicated: %d, "+
191 "\n Replicated Just Right: %d.",
192 rsc.CollectionBlocksNotInKeep,
193 rsc.UnderReplicatedBlocks,
194 rsc.OverReplicatedBlocks,
195 rsc.CorrectlyReplicatedBlocks,
196 rsc.KeepBlocksNotInCollections,
197 rsc.CollectionsNotFullyInKeep,
198 rsc.UnderReplicatedCollections,
199 rsc.OverReplicatedCollections,
200 rsc.CorrectlyReplicatedCollections)
203 // BucketReplication returns ReplicationLevelBlockSetMap
204 func BucketReplication(readCollections collection.ReadCollections,
205 keepServerInfo keep.ReadServers) (rlbs ReplicationLevelBlockSetMap) {
206 rlbs = make(ReplicationLevelBlockSetMap)
208 for block, requestedReplication := range readCollections.BlockToDesiredReplication {
211 Requested: requestedReplication,
212 Actual: len(keepServerInfo.BlockToServers[block])},
216 for block, servers := range keepServerInfo.BlockToServers {
217 if 0 == readCollections.BlockToDesiredReplication[block] {
219 ReplicationLevels{Requested: 0, Actual: len(servers)},
226 // SummarizeBuckets reads collections and summarizes
227 func (rlbs ReplicationLevelBlockSetMap) SummarizeBuckets(
228 readCollections collection.ReadCollections) (
229 rs ReplicationSummary) {
230 rs.CollectionBlocksNotInKeep = make(BlockSet)
231 rs.UnderReplicatedBlocks = make(BlockSet)
232 rs.OverReplicatedBlocks = make(BlockSet)
233 rs.CorrectlyReplicatedBlocks = make(BlockSet)
234 rs.KeepBlocksNotInCollections = make(BlockSet)
236 rs.CollectionsNotFullyInKeep = make(CollectionIndexSet)
237 rs.UnderReplicatedCollections = make(CollectionIndexSet)
238 rs.OverReplicatedCollections = make(CollectionIndexSet)
239 rs.CorrectlyReplicatedCollections = make(CollectionIndexSet)
241 for levels, bs := range rlbs {
242 if levels.Actual == 0 {
243 rs.CollectionBlocksNotInKeep.Union(bs)
244 } else if levels.Requested == 0 {
245 rs.KeepBlocksNotInCollections.Union(bs)
246 } else if levels.Actual < levels.Requested {
247 rs.UnderReplicatedBlocks.Union(bs)
248 } else if levels.Actual > levels.Requested {
249 rs.OverReplicatedBlocks.Union(bs)
251 rs.CorrectlyReplicatedBlocks.Union(bs)
255 rs.CollectionBlocksNotInKeep.ToCollectionIndexSet(readCollections,
256 &rs.CollectionsNotFullyInKeep)
257 // Since different collections can specify different replication
258 // levels, the fact that a block is under-replicated does not imply
259 // that all collections that it belongs to are under-replicated, but
260 // we'll ignore that for now.
261 // TODO(misha): Fix this and report the correct set of collections.
262 rs.UnderReplicatedBlocks.ToCollectionIndexSet(readCollections,
263 &rs.UnderReplicatedCollections)
264 rs.OverReplicatedBlocks.ToCollectionIndexSet(readCollections,
265 &rs.OverReplicatedCollections)
267 for i := range readCollections.CollectionIndexToUUID {
268 if _, notInKeep := rs.CollectionsNotFullyInKeep[i]; notInKeep {
269 } else if _, underReplicated := rs.UnderReplicatedCollections[i]; underReplicated {
270 } else if _, overReplicated := rs.OverReplicatedCollections[i]; overReplicated {
272 rs.CorrectlyReplicatedCollections.Insert(i)