1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: Apache-2.0
15 "git.arvados.org/arvados.git/sdk/go/blockdigest"
19 UUIDMatch = regexp.MustCompile(`^[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}$`).MatchString
20 PDHMatch = regexp.MustCompile(`^[0-9a-f]{32}\+\d+$`).MatchString
23 // Collection is an arvados#collection resource.
24 type Collection struct {
25 UUID string `json:"uuid"`
26 Etag string `json:"etag"`
27 OwnerUUID string `json:"owner_uuid"`
28 TrashAt *time.Time `json:"trash_at"`
29 ManifestText string `json:"manifest_text"`
30 UnsignedManifestText string `json:"unsigned_manifest_text"`
31 Name string `json:"name"`
32 CreatedAt time.Time `json:"created_at"`
33 ModifiedAt time.Time `json:"modified_at"`
34 ModifiedByUserUUID string `json:"modified_by_user_uuid"`
35 PortableDataHash string `json:"portable_data_hash"`
36 ReplicationConfirmed *int `json:"replication_confirmed"`
37 ReplicationConfirmedAt *time.Time `json:"replication_confirmed_at"`
38 ReplicationDesired *int `json:"replication_desired"`
39 StorageClassesDesired []string `json:"storage_classes_desired"`
40 StorageClassesConfirmed []string `json:"storage_classes_confirmed"`
41 StorageClassesConfirmedAt *time.Time `json:"storage_classes_confirmed_at"`
42 DeleteAt *time.Time `json:"delete_at"`
43 IsTrashed bool `json:"is_trashed"`
44 Properties map[string]interface{} `json:"properties"`
45 WritableBy []string `json:"writable_by,omitempty"`
46 FileCount int `json:"file_count"`
47 FileSizeTotal int64 `json:"file_size_total"`
48 Version int `json:"version"`
49 PreserveVersion bool `json:"preserve_version"`
50 CurrentVersionUUID string `json:"current_version_uuid"`
51 Description string `json:"description"`
54 func (c Collection) resourceName() string {
58 // SizedDigests returns the hash+size part of each data block
59 // referenced by the collection.
61 // Zero-length blocks are not included.
62 func (c *Collection) SizedDigests() ([]SizedDigest, error) {
63 manifestText := []byte(c.ManifestText)
64 if len(manifestText) == 0 {
65 manifestText = []byte(c.UnsignedManifestText)
67 if len(manifestText) == 0 && c.PortableDataHash != "d41d8cd98f00b204e9800998ecf8427e+0" {
68 // TODO: Check more subtle forms of corruption, too
69 return nil, fmt.Errorf("manifest is missing")
71 sds := make([]SizedDigest, 0, len(manifestText)/40)
72 for _, line := range bytes.Split(manifestText, []byte{'\n'}) {
76 tokens := bytes.Split(line, []byte{' '})
78 return nil, fmt.Errorf("Invalid stream (<3 tokens): %q", line)
80 for _, token := range tokens[1:] {
81 if !blockdigest.LocatorPattern.Match(token) {
82 // FIXME: ensure it's a file token
85 if bytes.HasPrefix(token, []byte("d41d8cd98f00b204e9800998ecf8427e+0")) {
86 // Exclude "empty block" placeholder
89 // FIXME: shouldn't assume 32 char hash
90 if i := bytes.IndexRune(token[33:], '+'); i >= 0 {
93 sds = append(sds, SizedDigest(string(token)))
99 type CollectionList struct {
100 Items []Collection `json:"items"`
101 ItemsAvailable int `json:"items_available"`
102 Offset int `json:"offset"`
103 Limit int `json:"limit"`
106 // PortableDataHash computes the portable data hash of the given
108 func PortableDataHash(mt string) string {
109 // To calculate the PDH, we write the manifest to an md5 hash
110 // func, except we skip the "extra" part of block tokens that
111 // look like "abcdef0123456789abcdef0123456789+12345+extra".
113 // This code is simplified by the facts that (A) all block
114 // tokens -- even the first and last in a stream -- are
115 // preceded and followed by a space character; and (B) all
116 // non-block tokens either start with '.' or contain ':'.
118 // A regexp-based approach (like the one this replaced) would
119 // be more readable, but very slow.
124 // sp is the end of the current token (note that if
125 // the current token is the last file token in a
126 // stream, we'll also include the \n and the dirname
127 // token on the next line, which is perfectly fine for
129 sp := bytes.IndexByte(todo, ' ')
131 // Last token of the manifest, which is never
133 n, _ := h.Write(todo)
137 if sp >= 34 && todo[32] == '+' && bytes.IndexByte(todo[:32], ':') == -1 && todo[0] != '.' {
138 // todo[:sp] is a block token.
139 sizeend := bytes.IndexByte(todo[33:sp], '+')
147 n, _ := h.Write(todo[:sizeend])
151 // todo[:sp] is not a block token.
152 n, _ := h.Write(todo[:sp+1])
157 return fmt.Sprintf("%x+%d", h.Sum(nil), size)
160 // CollectionIDFromDNSName returns a UUID or PDH if s begins with a
161 // UUID or URL-encoded PDH; otherwise "".
162 func CollectionIDFromDNSName(s string) string {
164 if i := strings.IndexRune(s, '.'); i >= 0 {
167 // Names like {uuid}--collections.example.com serve the same
168 // purpose as {uuid}.collections.example.com but can reduce
169 // cost/effort of using [additional] wildcard certificates.
170 if i := strings.Index(s, "--"); i >= 0 {
176 if pdh := strings.Replace(s, "-", "+", 1); PDHMatch(pdh) {