X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/b002129afda08bbb4fdbed6e629858a5c298c068..3b4bb3d393adc3bd3ddfb4442a65087275a5c5c3:/sdk/go/arvados/collection.go diff --git a/sdk/go/arvados/collection.go b/sdk/go/arvados/collection.go index 71f5247615..030665d77f 100644 --- a/sdk/go/arvados/collection.go +++ b/sdk/go/arvados/collection.go @@ -1,37 +1,70 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + package arvados import ( "bufio" + "crypto/md5" "fmt" + "regexp" "strings" "time" - "git.curoverse.com/arvados.git/sdk/go/manifest" + "git.arvados.org/arvados.git/sdk/go/blockdigest" ) // Collection is an arvados#collection resource. type Collection struct { - UUID string `json:"uuid,omitempty"` - ExpiresAt *time.Time `json:"expires_at,omitempty"` - ManifestText string `json:"manifest_text,omitempty"` - CreatedAt *time.Time `json:"created_at,omitempty"` - ModifiedAt *time.Time `json:"modified_at,omitempty"` - PortableDataHash string `json:"portable_data_hash,omitempty"` - ReplicationConfirmed *int `json:"replication_confirmed,omitempty"` - ReplicationConfirmedAt *time.Time `json:"replication_confirmed_at,omitempty"` - ReplicationDesired *int `json:"replication_desired,omitempty"` + UUID string `json:"uuid"` + Etag string `json:"etag"` + OwnerUUID string `json:"owner_uuid"` + TrashAt *time.Time `json:"trash_at"` + ManifestText string `json:"manifest_text"` + UnsignedManifestText string `json:"unsigned_manifest_text"` + Name string `json:"name"` + CreatedAt time.Time `json:"created_at"` + ModifiedAt time.Time `json:"modified_at"` + ModifiedByClientUUID string `json:"modified_by_client_uuid"` + ModifiedByUserUUID string `json:"modified_by_user_uuid"` + PortableDataHash string `json:"portable_data_hash"` + ReplicationConfirmed *int `json:"replication_confirmed"` + ReplicationConfirmedAt *time.Time `json:"replication_confirmed_at"` + ReplicationDesired *int `json:"replication_desired"` + StorageClassesDesired []string `json:"storage_classes_desired"` + StorageClassesConfirmed []string `json:"storage_classes_confirmed"` + StorageClassesConfirmedAt *time.Time `json:"storage_classes_confirmed_at"` + DeleteAt *time.Time `json:"delete_at"` + IsTrashed bool `json:"is_trashed"` + Properties map[string]interface{} `json:"properties"` + WritableBy []string `json:"writable_by,omitempty"` + FileCount int `json:"file_count"` + FileSizeTotal int64 `json:"file_size_total"` + Version int `json:"version"` + PreserveVersion bool `json:"preserve_version"` + CurrentVersionUUID string `json:"current_version_uuid"` + Description string `json:"description"` +} + +func (c Collection) resourceName() string { + return "collection" } // SizedDigests returns the hash+size part of each data block // referenced by the collection. func (c *Collection) SizedDigests() ([]SizedDigest, error) { - if c.ManifestText == "" && c.PortableDataHash != "d41d8cd98f00b204e9800998ecf8427e+0" { + manifestText := c.ManifestText + if manifestText == "" { + manifestText = c.UnsignedManifestText + } + if manifestText == "" && c.PortableDataHash != "d41d8cd98f00b204e9800998ecf8427e+0" { // TODO: Check more subtle forms of corruption, too return nil, fmt.Errorf("manifest is missing") } var sds []SizedDigest - scanner := bufio.NewScanner(strings.NewReader(c.ManifestText)) - scanner.Buffer(make([]byte, 1048576), len(c.ManifestText)) + scanner := bufio.NewScanner(strings.NewReader(manifestText)) + scanner.Buffer(make([]byte, 1048576), len(manifestText)) for scanner.Scan() { line := scanner.Text() tokens := strings.Split(line, " ") @@ -39,7 +72,7 @@ func (c *Collection) SizedDigests() ([]SizedDigest, error) { return nil, fmt.Errorf("Invalid stream (<3 tokens): %q", line) } for _, token := range tokens[1:] { - if !manifest.LocatorPattern.MatchString(token) { + if !blockdigest.LocatorPattern.MatchString(token) { // FIXME: ensure it's a file token break } @@ -53,10 +86,34 @@ func (c *Collection) SizedDigests() ([]SizedDigest, error) { return sds, scanner.Err() } -// CollectionList is an arvados#collectionList resource. type CollectionList struct { Items []Collection `json:"items"` ItemsAvailable int `json:"items_available"` Offset int `json:"offset"` Limit int `json:"limit"` } + +var ( + blkRe = regexp.MustCompile(`^ [0-9a-f]{32}\+\d+`) + tokRe = regexp.MustCompile(` ?[^ ]*`) +) + +// PortableDataHash computes the portable data hash of the given +// manifest. +func PortableDataHash(mt string) string { + h := md5.New() + size := 0 + _ = tokRe.ReplaceAllFunc([]byte(mt), func(tok []byte) []byte { + if m := blkRe.Find(tok); m != nil { + // write hash+size, ignore remaining block hints + tok = m + } + n, err := h.Write(tok) + if err != nil { + panic(err) + } + size += n + return nil + }) + return fmt.Sprintf("%x+%d", h.Sum(nil), size) +}