18947: Refactor keep-web as arvados-server command.
[arvados.git] / sdk / go / arvados / collection.go
index aea0cc043f40f6460feaa6ecc3f26408cdc54e18..389fe4e4841b0006405aee66a697f607c414ce0d 100644 (file)
@@ -5,30 +5,51 @@
 package arvados
 
 import (
-       "bufio"
+       "bytes"
+       "crypto/md5"
        "fmt"
+       "regexp"
        "strings"
        "time"
 
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
+       "git.arvados.org/arvados.git/sdk/go/blockdigest"
+)
+
+var (
+       UUIDMatch = regexp.MustCompile(`^[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}$`).MatchString
+       PDHMatch  = regexp.MustCompile(`^[0-9a-f]{32}\+\d+$`).MatchString
 )
 
 // Collection is an arvados#collection resource.
 type Collection struct {
-       UUID                   string     `json:"uuid,omitempty"`
-       OwnerUUID              string     `json:"owner_uuid,omitempty"`
-       TrashAt                *time.Time `json:"trash_at,omitempty"`
-       ManifestText           string     `json:"manifest_text,omitempty"`
-       UnsignedManifestText   string     `json:"unsigned_manifest_text,omitempty"`
-       Name                   string     `json:"name,omitempty"`
-       CreatedAt              *time.Time `json:"created_at,omitempty"`
-       ModifiedAt             *time.Time `json:"modified_at,omitempty"`
-       PortableDataHash       string     `json:"portable_data_hash,omitempty"`
-       ReplicationConfirmed   *int       `json:"replication_confirmed,omitempty"`
-       ReplicationConfirmedAt *time.Time `json:"replication_confirmed_at,omitempty"`
-       ReplicationDesired     *int       `json:"replication_desired,omitempty"`
-       DeleteAt               *time.Time `json:"delete_at,omitempty"`
-       IsTrashed              bool       `json:"is_trashed,omitempty"`
+       UUID                      string                 `json:"uuid"`
+       Etag                      string                 `json:"etag"`
+       OwnerUUID                 string                 `json:"owner_uuid"`
+       TrashAt                   *time.Time             `json:"trash_at"`
+       ManifestText              string                 `json:"manifest_text"`
+       UnsignedManifestText      string                 `json:"unsigned_manifest_text"`
+       Name                      string                 `json:"name"`
+       CreatedAt                 time.Time              `json:"created_at"`
+       ModifiedAt                time.Time              `json:"modified_at"`
+       ModifiedByClientUUID      string                 `json:"modified_by_client_uuid"`
+       ModifiedByUserUUID        string                 `json:"modified_by_user_uuid"`
+       PortableDataHash          string                 `json:"portable_data_hash"`
+       ReplicationConfirmed      *int                   `json:"replication_confirmed"`
+       ReplicationConfirmedAt    *time.Time             `json:"replication_confirmed_at"`
+       ReplicationDesired        *int                   `json:"replication_desired"`
+       StorageClassesDesired     []string               `json:"storage_classes_desired"`
+       StorageClassesConfirmed   []string               `json:"storage_classes_confirmed"`
+       StorageClassesConfirmedAt *time.Time             `json:"storage_classes_confirmed_at"`
+       DeleteAt                  *time.Time             `json:"delete_at"`
+       IsTrashed                 bool                   `json:"is_trashed"`
+       Properties                map[string]interface{} `json:"properties"`
+       WritableBy                []string               `json:"writable_by,omitempty"`
+       FileCount                 int                    `json:"file_count"`
+       FileSizeTotal             int64                  `json:"file_size_total"`
+       Version                   int                    `json:"version"`
+       PreserveVersion           bool                   `json:"preserve_version"`
+       CurrentVersionUUID        string                 `json:"current_version_uuid"`
+       Description               string                 `json:"description"`
 }
 
 func (c Collection) resourceName() string {
@@ -37,43 +58,95 @@ func (c Collection) resourceName() string {
 
 // SizedDigests returns the hash+size part of each data block
 // referenced by the collection.
+//
+// Zero-length blocks are not included.
 func (c *Collection) SizedDigests() ([]SizedDigest, error) {
-       manifestText := c.ManifestText
-       if manifestText == "" {
-               manifestText = c.UnsignedManifestText
+       manifestText := []byte(c.ManifestText)
+       if len(manifestText) == 0 {
+               manifestText = []byte(c.UnsignedManifestText)
        }
-       if manifestText == "" && c.PortableDataHash != "d41d8cd98f00b204e9800998ecf8427e+0" {
+       if len(manifestText) == 0 && c.PortableDataHash != "d41d8cd98f00b204e9800998ecf8427e+0" {
                // TODO: Check more subtle forms of corruption, too
                return nil, fmt.Errorf("manifest is missing")
        }
-       var sds []SizedDigest
-       scanner := bufio.NewScanner(strings.NewReader(manifestText))
-       scanner.Buffer(make([]byte, 1048576), len(manifestText))
-       for scanner.Scan() {
-               line := scanner.Text()
-               tokens := strings.Split(line, " ")
+       sds := make([]SizedDigest, 0, len(manifestText)/40)
+       for _, line := range bytes.Split(manifestText, []byte{'\n'}) {
+               if len(line) == 0 {
+                       continue
+               }
+               tokens := bytes.Split(line, []byte{' '})
                if len(tokens) < 3 {
                        return nil, fmt.Errorf("Invalid stream (<3 tokens): %q", line)
                }
                for _, token := range tokens[1:] {
-                       if !blockdigest.LocatorPattern.MatchString(token) {
+                       if !blockdigest.LocatorPattern.Match(token) {
                                // FIXME: ensure it's a file token
                                break
                        }
+                       if bytes.HasPrefix(token, []byte("d41d8cd98f00b204e9800998ecf8427e+0")) {
+                               // Exclude "empty block" placeholder
+                               continue
+                       }
                        // FIXME: shouldn't assume 32 char hash
-                       if i := strings.IndexRune(token[33:], '+'); i >= 0 {
+                       if i := bytes.IndexRune(token[33:], '+'); i >= 0 {
                                token = token[:33+i]
                        }
-                       sds = append(sds, SizedDigest(token))
+                       sds = append(sds, SizedDigest(string(token)))
                }
        }
-       return sds, scanner.Err()
+       return sds, nil
 }
 
-// CollectionList is an arvados#collectionList resource.
 type CollectionList struct {
        Items          []Collection `json:"items"`
        ItemsAvailable int          `json:"items_available"`
        Offset         int          `json:"offset"`
        Limit          int          `json:"limit"`
 }
+
+var (
+       blkRe = regexp.MustCompile(`^ [0-9a-f]{32}\+\d+`)
+       tokRe = regexp.MustCompile(` ?[^ ]*`)
+)
+
+// PortableDataHash computes the portable data hash of the given
+// manifest.
+func PortableDataHash(mt string) string {
+       h := md5.New()
+       size := 0
+       _ = tokRe.ReplaceAllFunc([]byte(mt), func(tok []byte) []byte {
+               if m := blkRe.Find(tok); m != nil {
+                       // write hash+size, ignore remaining block hints
+                       tok = m
+               }
+               n, err := h.Write(tok)
+               if err != nil {
+                       panic(err)
+               }
+               size += n
+               return nil
+       })
+       return fmt.Sprintf("%x+%d", h.Sum(nil), size)
+}
+
+// CollectionIDFromDNSName returns a UUID or PDH if s begins with a
+// UUID or URL-encoded PDH; otherwise "".
+func CollectionIDFromDNSName(s string) string {
+       // Strip domain.
+       if i := strings.IndexRune(s, '.'); i >= 0 {
+               s = s[:i]
+       }
+       // Names like {uuid}--collections.example.com serve the same
+       // purpose as {uuid}.collections.example.com but can reduce
+       // cost/effort of using [additional] wildcard certificates.
+       if i := strings.Index(s, "--"); i >= 0 {
+               s = s[:i]
+       }
+       if UUIDMatch(s) {
+               return s
+       }
+       if pdh := strings.Replace(s, "-", "+", 1); PDHMatch(pdh) {
+               return pdh
+       }
+       return ""
+}