Adds sanity check on number of collections retrieved
[arvados.git] / services / datamanager / summary / summary.go
1 // Summarizes Collection Data and Keep Server Contents.
2
3 package summary
4
5 // TODO(misha): Check size of blocks as well as their digest.
6
7 import (
8         "fmt"
9         "git.curoverse.com/arvados.git/sdk/go/blockdigest"
10         "git.curoverse.com/arvados.git/services/datamanager/collection"
11         "git.curoverse.com/arvados.git/services/datamanager/keep"
12         "sort"
13 )
14
15 // BlockSet is a map of blocks
16 type BlockSet map[blockdigest.DigestWithSize]struct{}
17
18 // Insert adds a single block to the set.
19 func (bs BlockSet) Insert(digest blockdigest.DigestWithSize) {
20         bs[digest] = struct{}{}
21 }
22
23 // Union adds a set of blocks to the set.
24 func (bs BlockSet) Union(obs BlockSet) {
25         for k, v := range obs {
26                 bs[k] = v
27         }
28 }
29
30 // CollectionIndexSet is used to save space. To convert to and from
31 // the uuid, use collection.ReadCollections' fields
32 // CollectionIndexToUUID and CollectionUUIDToIndex.
33 type CollectionIndexSet map[int]struct{}
34
35 // Insert adds a single collection to the set. The collection is specified by
36 // its index.
37 func (cis CollectionIndexSet) Insert(collectionIndex int) {
38         cis[collectionIndex] = struct{}{}
39 }
40
41 // ToCollectionIndexSet gets block to collection indices
42 func (bs BlockSet) ToCollectionIndexSet(
43         readCollections collection.ReadCollections,
44         collectionIndexSet *CollectionIndexSet) {
45         for block := range bs {
46                 for _, collectionIndex := range readCollections.BlockToCollectionIndices[block] {
47                         collectionIndexSet.Insert(collectionIndex)
48                 }
49         }
50 }
51
52 // ReplicationLevels struct
53 // Keeps track of the requested and actual replication levels.
54 // Currently this is only used for blocks but could easily be used for
55 // collections as well.
56 type ReplicationLevels struct {
57         // The requested replication level.
58         // For Blocks this is the maximum replication level among all the
59         // collections this block belongs to.
60         Requested int
61
62         // The actual number of keep servers this is on.
63         Actual int
64 }
65
66 // ReplicationLevelBlockSetMap maps from replication levels to their blocks.
67 type ReplicationLevelBlockSetMap map[ReplicationLevels]BlockSet
68
69 // ReplicationLevelBlockCount is an individual entry from ReplicationLevelBlockSetMap
70 // which only reports the number of blocks, not which blocks.
71 type ReplicationLevelBlockCount struct {
72         Levels ReplicationLevels
73         Count  int
74 }
75
76 // ReplicationLevelBlockSetSlice is an ordered list of ReplicationLevelBlockCount useful for reporting.
77 type ReplicationLevelBlockSetSlice []ReplicationLevelBlockCount
78
79 // ReplicationSummary sturct
80 type ReplicationSummary struct {
81         CollectionBlocksNotInKeep  BlockSet
82         UnderReplicatedBlocks      BlockSet
83         OverReplicatedBlocks       BlockSet
84         CorrectlyReplicatedBlocks  BlockSet
85         KeepBlocksNotInCollections BlockSet
86
87         CollectionsNotFullyInKeep      CollectionIndexSet
88         UnderReplicatedCollections     CollectionIndexSet
89         OverReplicatedCollections      CollectionIndexSet
90         CorrectlyReplicatedCollections CollectionIndexSet
91 }
92
93 // ReplicationSummaryCounts struct counts the elements in each set in ReplicationSummary.
94 type ReplicationSummaryCounts struct {
95         CollectionBlocksNotInKeep      int
96         UnderReplicatedBlocks          int
97         OverReplicatedBlocks           int
98         CorrectlyReplicatedBlocks      int
99         KeepBlocksNotInCollections     int
100         CollectionsNotFullyInKeep      int
101         UnderReplicatedCollections     int
102         OverReplicatedCollections      int
103         CorrectlyReplicatedCollections int
104 }
105
106 // GetOrCreate gets the BlockSet for a given set of ReplicationLevels,
107 // creating it if it doesn't already exist.
108 func (rlbs ReplicationLevelBlockSetMap) GetOrCreate(
109         repLevels ReplicationLevels) (bs BlockSet) {
110         bs, exists := rlbs[repLevels]
111         if !exists {
112                 bs = make(BlockSet)
113                 rlbs[repLevels] = bs
114         }
115         return
116 }
117
118 // Insert adds a block to the set for a given replication level.
119 func (rlbs ReplicationLevelBlockSetMap) Insert(
120         repLevels ReplicationLevels,
121         block blockdigest.DigestWithSize) {
122         rlbs.GetOrCreate(repLevels).Insert(block)
123 }
124
125 // Union adds a set of blocks to the set for a given replication level.
126 func (rlbs ReplicationLevelBlockSetMap) Union(
127         repLevels ReplicationLevels,
128         bs BlockSet) {
129         rlbs.GetOrCreate(repLevels).Union(bs)
130 }
131
132 // Counts outputs a sorted list of ReplicationLevelBlockCounts.
133 func (rlbs ReplicationLevelBlockSetMap) Counts() (
134         sorted ReplicationLevelBlockSetSlice) {
135         sorted = make(ReplicationLevelBlockSetSlice, len(rlbs))
136         i := 0
137         for levels, set := range rlbs {
138                 sorted[i] = ReplicationLevelBlockCount{Levels: levels, Count: len(set)}
139                 i++
140         }
141         sort.Sort(sorted)
142         return
143 }
144
145 // Implemented to meet sort.Interface
146 func (rlbss ReplicationLevelBlockSetSlice) Len() int {
147         return len(rlbss)
148 }
149
150 // Implemented to meet sort.Interface
151 func (rlbss ReplicationLevelBlockSetSlice) Less(i, j int) bool {
152         return rlbss[i].Levels.Requested < rlbss[j].Levels.Requested ||
153                 (rlbss[i].Levels.Requested == rlbss[j].Levels.Requested &&
154                         rlbss[i].Levels.Actual < rlbss[j].Levels.Actual)
155 }
156
157 // Implemented to meet sort.Interface
158 func (rlbss ReplicationLevelBlockSetSlice) Swap(i, j int) {
159         rlbss[i], rlbss[j] = rlbss[j], rlbss[i]
160 }
161
162 // ComputeCounts returns ReplicationSummaryCounts
163 func (rs ReplicationSummary) ComputeCounts() (rsc ReplicationSummaryCounts) {
164         // TODO(misha): Consider rewriting this method to iterate through
165         // the fields using reflection, instead of explictily listing the
166         // fields as we do now.
167         rsc.CollectionBlocksNotInKeep = len(rs.CollectionBlocksNotInKeep)
168         rsc.UnderReplicatedBlocks = len(rs.UnderReplicatedBlocks)
169         rsc.OverReplicatedBlocks = len(rs.OverReplicatedBlocks)
170         rsc.CorrectlyReplicatedBlocks = len(rs.CorrectlyReplicatedBlocks)
171         rsc.KeepBlocksNotInCollections = len(rs.KeepBlocksNotInCollections)
172         rsc.CollectionsNotFullyInKeep = len(rs.CollectionsNotFullyInKeep)
173         rsc.UnderReplicatedCollections = len(rs.UnderReplicatedCollections)
174         rsc.OverReplicatedCollections = len(rs.OverReplicatedCollections)
175         rsc.CorrectlyReplicatedCollections = len(rs.CorrectlyReplicatedCollections)
176         return rsc
177 }
178
179 // PrettyPrint ReplicationSummaryCounts
180 func (rsc ReplicationSummaryCounts) PrettyPrint() string {
181         return fmt.Sprintf("Replication Block Counts:"+
182                 "\n Missing From Keep: %d, "+
183                 "\n Under Replicated: %d, "+
184                 "\n Over Replicated: %d, "+
185                 "\n Replicated Just Right: %d, "+
186                 "\n Not In Any Collection: %d. "+
187                 "\nReplication Collection Counts:"+
188                 "\n Missing From Keep: %d, "+
189                 "\n Under Replicated: %d, "+
190                 "\n Over Replicated: %d, "+
191                 "\n Replicated Just Right: %d.",
192                 rsc.CollectionBlocksNotInKeep,
193                 rsc.UnderReplicatedBlocks,
194                 rsc.OverReplicatedBlocks,
195                 rsc.CorrectlyReplicatedBlocks,
196                 rsc.KeepBlocksNotInCollections,
197                 rsc.CollectionsNotFullyInKeep,
198                 rsc.UnderReplicatedCollections,
199                 rsc.OverReplicatedCollections,
200                 rsc.CorrectlyReplicatedCollections)
201 }
202
203 // BucketReplication returns ReplicationLevelBlockSetMap
204 func BucketReplication(readCollections collection.ReadCollections,
205         keepServerInfo keep.ReadServers) (rlbs ReplicationLevelBlockSetMap) {
206         rlbs = make(ReplicationLevelBlockSetMap)
207
208         for block, requestedReplication := range readCollections.BlockToDesiredReplication {
209                 rlbs.Insert(
210                         ReplicationLevels{
211                                 Requested: requestedReplication,
212                                 Actual:    len(keepServerInfo.BlockToServers[block])},
213                         block)
214         }
215
216         for block, servers := range keepServerInfo.BlockToServers {
217                 if 0 == readCollections.BlockToDesiredReplication[block] {
218                         rlbs.Insert(
219                                 ReplicationLevels{Requested: 0, Actual: len(servers)},
220                                 block)
221                 }
222         }
223         return
224 }
225
226 // SummarizeBuckets reads collections and summarizes
227 func (rlbs ReplicationLevelBlockSetMap) SummarizeBuckets(
228         readCollections collection.ReadCollections) (
229         rs ReplicationSummary) {
230         rs.CollectionBlocksNotInKeep = make(BlockSet)
231         rs.UnderReplicatedBlocks = make(BlockSet)
232         rs.OverReplicatedBlocks = make(BlockSet)
233         rs.CorrectlyReplicatedBlocks = make(BlockSet)
234         rs.KeepBlocksNotInCollections = make(BlockSet)
235
236         rs.CollectionsNotFullyInKeep = make(CollectionIndexSet)
237         rs.UnderReplicatedCollections = make(CollectionIndexSet)
238         rs.OverReplicatedCollections = make(CollectionIndexSet)
239         rs.CorrectlyReplicatedCollections = make(CollectionIndexSet)
240
241         for levels, bs := range rlbs {
242                 if levels.Actual == 0 {
243                         rs.CollectionBlocksNotInKeep.Union(bs)
244                 } else if levels.Requested == 0 {
245                         rs.KeepBlocksNotInCollections.Union(bs)
246                 } else if levels.Actual < levels.Requested {
247                         rs.UnderReplicatedBlocks.Union(bs)
248                 } else if levels.Actual > levels.Requested {
249                         rs.OverReplicatedBlocks.Union(bs)
250                 } else {
251                         rs.CorrectlyReplicatedBlocks.Union(bs)
252                 }
253         }
254
255         rs.CollectionBlocksNotInKeep.ToCollectionIndexSet(readCollections,
256                 &rs.CollectionsNotFullyInKeep)
257         // Since different collections can specify different replication
258         // levels, the fact that a block is under-replicated does not imply
259         // that all collections that it belongs to are under-replicated, but
260         // we'll ignore that for now.
261         // TODO(misha): Fix this and report the correct set of collections.
262         rs.UnderReplicatedBlocks.ToCollectionIndexSet(readCollections,
263                 &rs.UnderReplicatedCollections)
264         rs.OverReplicatedBlocks.ToCollectionIndexSet(readCollections,
265                 &rs.OverReplicatedCollections)
266
267         for i := range readCollections.CollectionIndexToUUID {
268                 if _, notInKeep := rs.CollectionsNotFullyInKeep[i]; notInKeep {
269                 } else if _, underReplicated := rs.UnderReplicatedCollections[i]; underReplicated {
270                 } else if _, overReplicated := rs.OverReplicatedCollections[i]; overReplicated {
271                 } else {
272                         rs.CorrectlyReplicatedCollections.Insert(i)
273                 }
274         }
275
276         return
277 }