17346: Don't try to rebalance the zero-length data block.
[arvados.git] / sdk / go / arvados / collection.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 package arvados
6
7 import (
8         "bufio"
9         "crypto/md5"
10         "fmt"
11         "regexp"
12         "strings"
13         "time"
14
15         "git.arvados.org/arvados.git/sdk/go/blockdigest"
16 )
17
18 // Collection is an arvados#collection resource.
19 type Collection struct {
20         UUID                      string                 `json:"uuid"`
21         Etag                      string                 `json:"etag"`
22         OwnerUUID                 string                 `json:"owner_uuid"`
23         TrashAt                   *time.Time             `json:"trash_at"`
24         ManifestText              string                 `json:"manifest_text"`
25         UnsignedManifestText      string                 `json:"unsigned_manifest_text"`
26         Name                      string                 `json:"name"`
27         CreatedAt                 time.Time              `json:"created_at"`
28         ModifiedAt                time.Time              `json:"modified_at"`
29         ModifiedByClientUUID      string                 `json:"modified_by_client_uuid"`
30         ModifiedByUserUUID        string                 `json:"modified_by_user_uuid"`
31         PortableDataHash          string                 `json:"portable_data_hash"`
32         ReplicationConfirmed      *int                   `json:"replication_confirmed"`
33         ReplicationConfirmedAt    *time.Time             `json:"replication_confirmed_at"`
34         ReplicationDesired        *int                   `json:"replication_desired"`
35         StorageClassesDesired     []string               `json:"storage_classes_desired"`
36         StorageClassesConfirmed   []string               `json:"storage_classes_confirmed"`
37         StorageClassesConfirmedAt *time.Time             `json:"storage_classes_confirmed_at"`
38         DeleteAt                  *time.Time             `json:"delete_at"`
39         IsTrashed                 bool                   `json:"is_trashed"`
40         Properties                map[string]interface{} `json:"properties"`
41         WritableBy                []string               `json:"writable_by,omitempty"`
42         FileCount                 int                    `json:"file_count"`
43         FileSizeTotal             int64                  `json:"file_size_total"`
44         Version                   int                    `json:"version"`
45         PreserveVersion           bool                   `json:"preserve_version"`
46         CurrentVersionUUID        string                 `json:"current_version_uuid"`
47         Description               string                 `json:"description"`
48 }
49
50 func (c Collection) resourceName() string {
51         return "collection"
52 }
53
54 // SizedDigests returns the hash+size part of each data block
55 // referenced by the collection.
56 //
57 // Zero-length blocks are not included.
58 func (c *Collection) SizedDigests() ([]SizedDigest, error) {
59         manifestText := c.ManifestText
60         if manifestText == "" {
61                 manifestText = c.UnsignedManifestText
62         }
63         if manifestText == "" && c.PortableDataHash != "d41d8cd98f00b204e9800998ecf8427e+0" {
64                 // TODO: Check more subtle forms of corruption, too
65                 return nil, fmt.Errorf("manifest is missing")
66         }
67         var sds []SizedDigest
68         scanner := bufio.NewScanner(strings.NewReader(manifestText))
69         scanner.Buffer(make([]byte, 1048576), len(manifestText))
70         for scanner.Scan() {
71                 line := scanner.Text()
72                 tokens := strings.Split(line, " ")
73                 if len(tokens) < 3 {
74                         return nil, fmt.Errorf("Invalid stream (<3 tokens): %q", line)
75                 }
76                 for _, token := range tokens[1:] {
77                         if !blockdigest.LocatorPattern.MatchString(token) {
78                                 // FIXME: ensure it's a file token
79                                 break
80                         }
81                         if strings.HasPrefix(token, "d41d8cd98f00b204e9800998ecf8427e+0") {
82                                 // Exclude "empty block" placeholder
83                                 continue
84                         }
85                         // FIXME: shouldn't assume 32 char hash
86                         if i := strings.IndexRune(token[33:], '+'); i >= 0 {
87                                 token = token[:33+i]
88                         }
89                         sds = append(sds, SizedDigest(token))
90                 }
91         }
92         return sds, scanner.Err()
93 }
94
95 type CollectionList struct {
96         Items          []Collection `json:"items"`
97         ItemsAvailable int          `json:"items_available"`
98         Offset         int          `json:"offset"`
99         Limit          int          `json:"limit"`
100 }
101
102 var (
103         blkRe = regexp.MustCompile(`^ [0-9a-f]{32}\+\d+`)
104         tokRe = regexp.MustCompile(` ?[^ ]*`)
105 )
106
107 // PortableDataHash computes the portable data hash of the given
108 // manifest.
109 func PortableDataHash(mt string) string {
110         h := md5.New()
111         size := 0
112         _ = tokRe.ReplaceAllFunc([]byte(mt), func(tok []byte) []byte {
113                 if m := blkRe.Find(tok); m != nil {
114                         // write hash+size, ignore remaining block hints
115                         tok = m
116                 }
117                 n, err := h.Write(tok)
118                 if err != nil {
119                         panic(err)
120                 }
121                 size += n
122                 return nil
123         })
124         return fmt.Sprintf("%x+%d", h.Sum(nil), size)
125 }