71d1affec0d1343c19a1934cd6365c7b3d59d67c
[arvados.git] / services / datamanager / summary / summary.go
1 // Summarizes Collection Data and Keep Server Contents.
2 package summary
3
4 // TODO(misha): Check size of blocks as well as their digest.
5
6 import (
7         "fmt"
8         "git.curoverse.com/arvados.git/sdk/go/blockdigest"
9         "git.curoverse.com/arvados.git/services/datamanager/collection"
10         "git.curoverse.com/arvados.git/services/datamanager/keep"
11 )
12
13 type BlockSet map[blockdigest.BlockDigest]struct{}
14
15 func (bs BlockSet) Insert(digest blockdigest.BlockDigest) {
16         bs[digest] = struct{}{}
17 }
18
19 func BlockSetFromSlice(digests []blockdigest.BlockDigest) (bs BlockSet) {
20         bs = make(BlockSet)
21         for _, digest := range digests {
22                 bs.Insert(digest)
23         }
24         return
25 }
26
27 // We use the collection index to save space. To convert to & from the
28 // uuid, use collection.ReadCollections' fields CollectionIndexToUuid
29 // and CollectionUuidToIndex.
30 type CollectionIndexSet map[int]struct{}
31
32 func (cis CollectionIndexSet) Insert(collectionIndex int) {
33         cis[collectionIndex] = struct{}{}
34 }
35
36 func CollectionIndexSetFromSlice(indices []int) (cis CollectionIndexSet) {
37         cis = make(CollectionIndexSet)
38         for _, index := range indices {
39                 cis.Insert(index)
40         }
41         return
42 }
43
44
45 func (bs BlockSet) ToCollectionIndexSet(
46         readCollections collection.ReadCollections,
47         collectionIndexSet *CollectionIndexSet) {
48         for block := range bs {
49                 for collectionIndex := range readCollections.BlockToCollectionIndices[block] {
50                         collectionIndexSet.Insert(collectionIndex)
51                 }
52         }
53 }
54
55 type ReplicationSummary struct {
56         CollectionBlocksNotInKeep  BlockSet
57         UnderReplicatedBlocks      BlockSet
58         OverReplicatedBlocks       BlockSet
59         CorrectlyReplicatedBlocks  BlockSet
60         KeepBlocksNotInCollections BlockSet
61
62         CollectionsNotFullyInKeep      CollectionIndexSet
63         UnderReplicatedCollections     CollectionIndexSet
64         OverReplicatedCollections      CollectionIndexSet
65         CorrectlyReplicatedCollections CollectionIndexSet
66 }
67
68 type ReplicationSummaryCounts struct {
69         CollectionBlocksNotInKeep      int
70         UnderReplicatedBlocks          int
71         OverReplicatedBlocks           int
72         CorrectlyReplicatedBlocks      int
73         KeepBlocksNotInCollections     int
74         CollectionsNotFullyInKeep      int
75         UnderReplicatedCollections     int
76         OverReplicatedCollections      int
77         CorrectlyReplicatedCollections int
78 }
79
80 type serializedData struct {
81         ReadCollections collection.ReadCollections
82         KeepServerInfo  keep.ReadServers
83 }
84
85 func (rs ReplicationSummary) ComputeCounts() (rsc ReplicationSummaryCounts) {
86         rsc.CollectionBlocksNotInKeep = len(rs.CollectionBlocksNotInKeep)
87         rsc.UnderReplicatedBlocks = len(rs.UnderReplicatedBlocks)
88         rsc.OverReplicatedBlocks = len(rs.OverReplicatedBlocks)
89         rsc.CorrectlyReplicatedBlocks = len(rs.CorrectlyReplicatedBlocks)
90         rsc.KeepBlocksNotInCollections = len(rs.KeepBlocksNotInCollections)
91         rsc.CollectionsNotFullyInKeep = len(rs.CollectionsNotFullyInKeep)
92         rsc.UnderReplicatedCollections = len(rs.UnderReplicatedCollections)
93         rsc.OverReplicatedCollections = len(rs.OverReplicatedCollections)
94         rsc.CorrectlyReplicatedCollections = len(rs.CorrectlyReplicatedCollections)
95         return rsc
96 }
97
98 func (rsc ReplicationSummaryCounts) PrettyPrint() string {
99         return fmt.Sprintf("Replication Block Counts:"+
100                 "\n Missing From Keep: %d, "+
101                 "\n Under Replicated: %d, "+
102                 "\n Over Replicated: %d, "+
103                 "\n Replicated Just Right: %d, "+
104                 "\n Not In Any Collection: %d. "+
105                 "\nReplication Collection Counts:"+
106                 "\n Missing From Keep: %d, "+
107                 "\n Under Replicated: %d, "+
108                 "\n Over Replicated: %d, "+
109                 "\n Replicated Just Right: %d.",
110                 rsc.CollectionBlocksNotInKeep,
111                 rsc.UnderReplicatedBlocks,
112                 rsc.OverReplicatedBlocks,
113                 rsc.CorrectlyReplicatedBlocks,
114                 rsc.KeepBlocksNotInCollections,
115                 rsc.CollectionsNotFullyInKeep,
116                 rsc.UnderReplicatedCollections,
117                 rsc.OverReplicatedCollections,
118                 rsc.CorrectlyReplicatedCollections)
119 }
120
121 func SummarizeReplication(readCollections collection.ReadCollections,
122         keepServerInfo keep.ReadServers) (rs ReplicationSummary) {
123         rs.CollectionBlocksNotInKeep = make(BlockSet)
124         rs.UnderReplicatedBlocks = make(BlockSet)
125         rs.OverReplicatedBlocks = make(BlockSet)
126         rs.CorrectlyReplicatedBlocks = make(BlockSet)
127         rs.KeepBlocksNotInCollections = make(BlockSet)
128         rs.CollectionsNotFullyInKeep = make(CollectionIndexSet)
129         rs.UnderReplicatedCollections = make(CollectionIndexSet)
130         rs.OverReplicatedCollections = make(CollectionIndexSet)
131         rs.CorrectlyReplicatedCollections = make(CollectionIndexSet)
132
133         for block, requestedReplication := range readCollections.BlockToReplication {
134                 actualReplication := len(keepServerInfo.BlockToServers[block])
135                 if actualReplication == 0 {
136                         rs.CollectionBlocksNotInKeep.Insert(block)
137                 } else if actualReplication < requestedReplication {
138                         rs.UnderReplicatedBlocks.Insert(block)
139                 } else if actualReplication > requestedReplication {
140                         rs.OverReplicatedBlocks.Insert(block)
141                 } else {
142                         rs.CorrectlyReplicatedBlocks.Insert(block)
143                 }
144         }
145
146         for block, _ := range keepServerInfo.BlockToServers {
147                 if 0 == readCollections.BlockToReplication[block] {
148                         rs.KeepBlocksNotInCollections.Insert(block)
149                 }
150         }
151
152         rs.CollectionBlocksNotInKeep.ToCollectionIndexSet(readCollections,
153                 &rs.CollectionsNotFullyInKeep)
154         // Since different collections can specify different replication
155         // levels, the fact that a block is under-replicated does not imply
156         // that all collections that it belongs to are under-replicated, but
157         // we'll ignore that for now.
158         // TODO(misha): Fix this and report the correct set of collections.
159         rs.UnderReplicatedBlocks.ToCollectionIndexSet(readCollections,
160                 &rs.UnderReplicatedCollections)
161         rs.OverReplicatedBlocks.ToCollectionIndexSet(readCollections,
162                 &rs.OverReplicatedCollections)
163
164         for i := range readCollections.CollectionIndexToUuid {
165                 if _, notInKeep := rs.CollectionsNotFullyInKeep[i]; notInKeep {
166                 } else if _, underReplicated := rs.UnderReplicatedCollections[i]; underReplicated {
167                 } else if _, overReplicated := rs.OverReplicatedCollections[i]; overReplicated {
168                 } else {
169                         rs.CorrectlyReplicatedCollections.Insert(i)
170                 }
171         }
172
173         return rs
174 }