Merge branch '17394-collectionfs-storage-class' into main
[arvados.git] / sdk / go / arvados / collection.go
index 623a8d985f6cbe93a903707635447a822c04f3a7..cec20279d1fdc81cb56936ed731dea6c6a4c0e8c 100644 (file)
@@ -6,7 +6,9 @@ package arvados
 
 import (
        "bufio"
+       "crypto/md5"
        "fmt"
+       "regexp"
        "strings"
        "time"
 
@@ -24,8 +26,8 @@ type Collection struct {
        Name                      string                 `json:"name"`
        CreatedAt                 time.Time              `json:"created_at"`
        ModifiedAt                time.Time              `json:"modified_at"`
-       ModifiedByClientUUID      *string                `json:"modified_by_client_uuid"`
-       ModifiedByUserUUID        *string                `json:"modified_by_user_uuid"`
+       ModifiedByClientUUID      string                 `json:"modified_by_client_uuid"`
+       ModifiedByUserUUID        string                 `json:"modified_by_user_uuid"`
        PortableDataHash          string                 `json:"portable_data_hash"`
        ReplicationConfirmed      *int                   `json:"replication_confirmed"`
        ReplicationConfirmedAt    *time.Time             `json:"replication_confirmed_at"`
@@ -39,10 +41,10 @@ type Collection struct {
        WritableBy                []string               `json:"writable_by,omitempty"`
        FileCount                 int                    `json:"file_count"`
        FileSizeTotal             int64                  `json:"file_size_total"`
-       Version                   *int                   `json:"version"`
+       Version                   int                    `json:"version"`
        PreserveVersion           bool                   `json:"preserve_version"`
        CurrentVersionUUID        string                 `json:"current_version_uuid"`
-       Description               *string                `json:"description"`
+       Description               string                 `json:"description"`
 }
 
 func (c Collection) resourceName() string {
@@ -51,6 +53,8 @@ func (c Collection) resourceName() string {
 
 // SizedDigests returns the hash+size part of each data block
 // referenced by the collection.
+//
+// Zero-length blocks are not included.
 func (c *Collection) SizedDigests() ([]SizedDigest, error) {
        manifestText := c.ManifestText
        if manifestText == "" {
@@ -74,6 +78,10 @@ func (c *Collection) SizedDigests() ([]SizedDigest, error) {
                                // FIXME: ensure it's a file token
                                break
                        }
+                       if strings.HasPrefix(token, "d41d8cd98f00b204e9800998ecf8427e+0") {
+                               // Exclude "empty block" placeholder
+                               continue
+                       }
                        // FIXME: shouldn't assume 32 char hash
                        if i := strings.IndexRune(token[33:], '+'); i >= 0 {
                                token = token[:33+i]
@@ -90,3 +98,28 @@ type CollectionList struct {
        Offset         int          `json:"offset"`
        Limit          int          `json:"limit"`
 }
+
+var (
+       blkRe = regexp.MustCompile(`^ [0-9a-f]{32}\+\d+`)
+       tokRe = regexp.MustCompile(` ?[^ ]*`)
+)
+
+// PortableDataHash computes the portable data hash of the given
+// manifest.
+func PortableDataHash(mt string) string {
+       h := md5.New()
+       size := 0
+       _ = tokRe.ReplaceAllFunc([]byte(mt), func(tok []byte) []byte {
+               if m := blkRe.Find(tok); m != nil {
+                       // write hash+size, ignore remaining block hints
+                       tok = m
+               }
+               n, err := h.Write(tok)
+               if err != nil {
+                       panic(err)
+               }
+               size += n
+               return nil
+       })
+       return fmt.Sprintf("%x+%d", h.Sum(nil), size)
+}