// Copyright (C) The Arvados Authors. All rights reserved.
//
// SPDX-License-Identifier: Apache-2.0

package arvados

import (
	"bytes"
	"crypto/md5"
	"fmt"
	"regexp"
	"strings"
	"time"

	"git.arvados.org/arvados.git/sdk/go/blockdigest"
)

var (
	UUIDMatch = regexp.MustCompile(`^[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}$`).MatchString
	PDHMatch  = regexp.MustCompile(`^[0-9a-f]{32}\+\d+$`).MatchString
)

// Collection is an arvados#collection resource.
type Collection struct {
	UUID                      string                 `json:"uuid"`
	Etag                      string                 `json:"etag"`
	OwnerUUID                 string                 `json:"owner_uuid"`
	TrashAt                   *time.Time             `json:"trash_at"`
	ManifestText              string                 `json:"manifest_text"`
	UnsignedManifestText      string                 `json:"unsigned_manifest_text"`
	Name                      string                 `json:"name"`
	CreatedAt                 time.Time              `json:"created_at"`
	ModifiedAt                time.Time              `json:"modified_at"`
	ModifiedByClientUUID      string                 `json:"modified_by_client_uuid"`
	ModifiedByUserUUID        string                 `json:"modified_by_user_uuid"`
	PortableDataHash          string                 `json:"portable_data_hash"`
	ReplicationConfirmed      *int                   `json:"replication_confirmed"`
	ReplicationConfirmedAt    *time.Time             `json:"replication_confirmed_at"`
	ReplicationDesired        *int                   `json:"replication_desired"`
	StorageClassesDesired     []string               `json:"storage_classes_desired"`
	StorageClassesConfirmed   []string               `json:"storage_classes_confirmed"`
	StorageClassesConfirmedAt *time.Time             `json:"storage_classes_confirmed_at"`
	DeleteAt                  *time.Time             `json:"delete_at"`
	IsTrashed                 bool                   `json:"is_trashed"`
	Properties                map[string]interface{} `json:"properties"`
	WritableBy                []string               `json:"writable_by,omitempty"`
	FileCount                 int                    `json:"file_count"`
	FileSizeTotal             int64                  `json:"file_size_total"`
	Version                   int                    `json:"version"`
	PreserveVersion           bool                   `json:"preserve_version"`
	CurrentVersionUUID        string                 `json:"current_version_uuid"`
	Description               string                 `json:"description"`
}

func (c Collection) resourceName() string {
	return "collection"
}

// SizedDigests returns the hash+size part of each data block
// referenced by the collection.
//
// Zero-length blocks are not included.
func (c *Collection) SizedDigests() ([]SizedDigest, error) {
	manifestText := []byte(c.ManifestText)
	if len(manifestText) == 0 {
		manifestText = []byte(c.UnsignedManifestText)
	}
	if len(manifestText) == 0 && c.PortableDataHash != "d41d8cd98f00b204e9800998ecf8427e+0" {
		// TODO: Check more subtle forms of corruption, too
		return nil, fmt.Errorf("manifest is missing")
	}
	sds := make([]SizedDigest, 0, len(manifestText)/40)
	for _, line := range bytes.Split(manifestText, []byte{'\n'}) {
		if len(line) == 0 {
			continue
		}
		tokens := bytes.Split(line, []byte{' '})
		if len(tokens) < 3 {
			return nil, fmt.Errorf("Invalid stream (<3 tokens): %q", line)
		}
		for _, token := range tokens[1:] {
			if !blockdigest.LocatorPattern.Match(token) {
				// FIXME: ensure it's a file token
				break
			}
			if bytes.HasPrefix(token, []byte("d41d8cd98f00b204e9800998ecf8427e+0")) {
				// Exclude "empty block" placeholder
				continue
			}
			// FIXME: shouldn't assume 32 char hash
			if i := bytes.IndexRune(token[33:], '+'); i >= 0 {
				token = token[:33+i]
			}
			sds = append(sds, SizedDigest(string(token)))
		}
	}
	return sds, nil
}

type CollectionList struct {
	Items          []Collection `json:"items"`
	ItemsAvailable int          `json:"items_available"`
	Offset         int          `json:"offset"`
	Limit          int          `json:"limit"`
}

// PortableDataHash computes the portable data hash of the given
// manifest.
func PortableDataHash(mt string) string {
	// To calculate the PDH, we write the manifest to an md5 hash
	// func, except we skip the "extra" part of block tokens that
	// look like "abcdef0123456789abcdef0123456789+12345+extra".
	//
	// This code is simplified by the facts that (A) all block
	// tokens -- even the first and last in a stream -- are
	// preceded and followed by a space character; and (B) all
	// non-block tokens either start with '.'  or contain ':'.
	//
	// A regexp-based approach (like the one this replaced) would
	// be more readable, but very slow.
	h := md5.New()
	size := 0
	todo := []byte(mt)
	for len(todo) > 0 {
		// sp is the end of the current token (note that if
		// the current token is the last file token in a
		// stream, we'll also include the \n and the dirname
		// token on the next line, which is perfectly fine for
		// our purposes).
		sp := bytes.IndexByte(todo, ' ')
		if sp < 0 {
			// Last token of the manifest, which is never
			// a block token.
			n, _ := h.Write(todo)
			size += n
			break
		}
		if sp >= 34 && todo[32] == '+' && bytes.IndexByte(todo[:32], ':') == -1 && todo[0] != '.' {
			// todo[:sp] is a block token.
			sizeend := bytes.IndexByte(todo[33:sp], '+')
			if sizeend < 0 {
				// "hash+size"
				sizeend = sp
			} else {
				// "hash+size+extra"
				sizeend += 33
			}
			n, _ := h.Write(todo[:sizeend])
			h.Write([]byte{' '})
			size += n + 1
		} else {
			// todo[:sp] is not a block token.
			n, _ := h.Write(todo[:sp+1])
			size += n
		}
		todo = todo[sp+1:]
	}
	return fmt.Sprintf("%x+%d", h.Sum(nil), size)
}

// CollectionIDFromDNSName returns a UUID or PDH if s begins with a
// UUID or URL-encoded PDH; otherwise "".
func CollectionIDFromDNSName(s string) string {
	// Strip domain.
	if i := strings.IndexRune(s, '.'); i >= 0 {
		s = s[:i]
	}
	// Names like {uuid}--collections.example.com serve the same
	// purpose as {uuid}.collections.example.com but can reduce
	// cost/effort of using [additional] wildcard certificates.
	if i := strings.Index(s, "--"); i >= 0 {
		s = s[:i]
	}
	if UUIDMatch(s) {
		return s
	}
	if pdh := strings.Replace(s, "-", "+", 1); PDHMatch(pdh) {
		return pdh
	}
	return ""
}