X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/f4f0614d06c4ac9abeb34dc6df7c88ecb5feebda..97d0e12a78e9245c8ed29c070ffe0399e5cd6cb4:/sdk/go/arvados/collection.go diff --git a/sdk/go/arvados/collection.go b/sdk/go/arvados/collection.go index 623a8d985f..cec20279d1 100644 --- a/sdk/go/arvados/collection.go +++ b/sdk/go/arvados/collection.go @@ -6,7 +6,9 @@ package arvados import ( "bufio" + "crypto/md5" "fmt" + "regexp" "strings" "time" @@ -24,8 +26,8 @@ type Collection struct { Name string `json:"name"` CreatedAt time.Time `json:"created_at"` ModifiedAt time.Time `json:"modified_at"` - ModifiedByClientUUID *string `json:"modified_by_client_uuid"` - ModifiedByUserUUID *string `json:"modified_by_user_uuid"` + ModifiedByClientUUID string `json:"modified_by_client_uuid"` + ModifiedByUserUUID string `json:"modified_by_user_uuid"` PortableDataHash string `json:"portable_data_hash"` ReplicationConfirmed *int `json:"replication_confirmed"` ReplicationConfirmedAt *time.Time `json:"replication_confirmed_at"` @@ -39,10 +41,10 @@ type Collection struct { WritableBy []string `json:"writable_by,omitempty"` FileCount int `json:"file_count"` FileSizeTotal int64 `json:"file_size_total"` - Version *int `json:"version"` + Version int `json:"version"` PreserveVersion bool `json:"preserve_version"` CurrentVersionUUID string `json:"current_version_uuid"` - Description *string `json:"description"` + Description string `json:"description"` } func (c Collection) resourceName() string { @@ -51,6 +53,8 @@ func (c Collection) resourceName() string { // SizedDigests returns the hash+size part of each data block // referenced by the collection. +// +// Zero-length blocks are not included. func (c *Collection) SizedDigests() ([]SizedDigest, error) { manifestText := c.ManifestText if manifestText == "" { @@ -74,6 +78,10 @@ func (c *Collection) SizedDigests() ([]SizedDigest, error) { // FIXME: ensure it's a file token break } + if strings.HasPrefix(token, "d41d8cd98f00b204e9800998ecf8427e+0") { + // Exclude "empty block" placeholder + continue + } // FIXME: shouldn't assume 32 char hash if i := strings.IndexRune(token[33:], '+'); i >= 0 { token = token[:33+i] @@ -90,3 +98,28 @@ type CollectionList struct { Offset int `json:"offset"` Limit int `json:"limit"` } + +var ( + blkRe = regexp.MustCompile(`^ [0-9a-f]{32}\+\d+`) + tokRe = regexp.MustCompile(` ?[^ ]*`) +) + +// PortableDataHash computes the portable data hash of the given +// manifest. +func PortableDataHash(mt string) string { + h := md5.New() + size := 0 + _ = tokRe.ReplaceAllFunc([]byte(mt), func(tok []byte) []byte { + if m := blkRe.Find(tok); m != nil { + // write hash+size, ignore remaining block hints + tok = m + } + n, err := h.Write(tok) + if err != nil { + panic(err) + } + size += n + return nil + }) + return fmt.Sprintf("%x+%d", h.Sum(nil), size) +}