X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/79870ba994f0606c8ed13806f00cb8b23d9b2c83..3429718113f11e999dd745e94ff981a4f3b01c57:/sdk/go/arvados/vocabulary.go diff --git a/sdk/go/arvados/vocabulary.go b/sdk/go/arvados/vocabulary.go index 17b9441a6b..1df43b5fb8 100644 --- a/sdk/go/arvados/vocabulary.go +++ b/sdk/go/arvados/vocabulary.go @@ -7,8 +7,10 @@ package arvados import ( "bytes" "encoding/json" + "errors" "fmt" "reflect" + "strconv" "strings" ) @@ -24,17 +26,28 @@ type VocabularyTag struct { Values map[string]VocabularyTagValue `json:"values"` } -// Cannot have a constant map in Go, so we have to use a function +// Cannot have a constant map in Go, so we have to use a function. +// If you are adding a new system property, it SHOULD start with `arv:`, +// and Check will allow it. This map is for historical exceptions that +// predate standardizing on this prefix. func (v *Vocabulary) systemTagKeys() map[string]bool { return map[string]bool{ - "type": true, - "template_uuid": true, - "groups": true, - "username": true, - "image_timestamp": true, + // Collection keys - set by arvados-cwl-runner + "container_request": true, + "container_uuid": true, + "type": true, + // Collection keys - set by arv-keepdocker (on the way out) "docker-image-repo-tag": true, - "filters": true, - "container_request": true, + // Container request keys - set by arvados-cwl-runner + "cwl_input": true, + "cwl_output": true, + "template_uuid": true, + // Group keys + "filters": true, + // Link keys + "groups": true, + "image_timestamp": true, + "username": true, } } @@ -46,17 +59,41 @@ type VocabularyTagValue struct { Labels []VocabularyLabel `json:"labels"` } +// NewVocabulary creates a new Vocabulary from a JSON definition and a list +// of reserved tag keys that will get special treatment when strict mode is +// enabled. func NewVocabulary(data []byte, managedTagKeys []string) (voc *Vocabulary, err error) { if r := bytes.Compare(data, []byte("")); r == 0 { return &Vocabulary{}, nil } err = json.Unmarshal(data, &voc) if err != nil { - return nil, fmt.Errorf("invalid JSON format error: %q", err) + var serr *json.SyntaxError + if errors.As(err, &serr) { + offset := serr.Offset + errorMsg := string(data[:offset]) + line := 1 + strings.Count(errorMsg, "\n") + column := offset - int64(strings.LastIndex(errorMsg, "\n")+len("\n")) + return nil, fmt.Errorf("invalid JSON format: %q (line %d, column %d)", err, line, column) + } + return nil, fmt.Errorf("invalid JSON format: %q", err) } if reflect.DeepEqual(voc, &Vocabulary{}) { return nil, fmt.Errorf("JSON data provided doesn't match Vocabulary format: %q", data) } + + shouldReportErrors := false + errors := []string{} + + // json.Unmarshal() doesn't error out on duplicate keys. + dupedKeys := []string{} + err = checkJSONDupedKeys(json.NewDecoder(bytes.NewReader(data)), nil, &dupedKeys) + if err != nil { + shouldReportErrors = true + for _, dk := range dupedKeys { + errors = append(errors, fmt.Sprintf("duplicate JSON key %q", dk)) + } + } voc.reservedTagKeys = make(map[string]bool) for _, managedKey := range managedTagKeys { voc.reservedTagKeys[managedKey] = true @@ -64,59 +101,122 @@ func NewVocabulary(data []byte, managedTagKeys []string) (voc *Vocabulary, err e for systemKey := range voc.systemTagKeys() { voc.reservedTagKeys[systemKey] = true } - err = voc.Validate() + validationErrs, err := voc.validate() if err != nil { - return nil, err + shouldReportErrors = true + errors = append(errors, validationErrs...) + } + if shouldReportErrors { + return nil, fmt.Errorf("%s", strings.Join(errors, "\n")) } return voc, nil } -func (v *Vocabulary) Validate() error { - if v == nil { +func checkJSONDupedKeys(d *json.Decoder, path []string, errors *[]string) error { + t, err := d.Token() + if err != nil { + return err + } + delim, ok := t.(json.Delim) + if !ok { return nil } - tagKeys := map[string]bool{} + switch delim { + case '{': + keys := make(map[string]bool) + for d.More() { + t, err := d.Token() + if err != nil { + return err + } + key := t.(string) + + if keys[key] { + *errors = append(*errors, strings.Join(append(path, key), ".")) + } + keys[key] = true + + if err := checkJSONDupedKeys(d, append(path, key), errors); err != nil { + return err + } + } + // consume closing '}' + if _, err := d.Token(); err != nil { + return err + } + case '[': + i := 0 + for d.More() { + if err := checkJSONDupedKeys(d, append(path, strconv.Itoa(i)), errors); err != nil { + return err + } + i++ + } + // consume closing ']' + if _, err := d.Token(); err != nil { + return err + } + } + if len(path) == 0 && len(*errors) > 0 { + return fmt.Errorf("duplicate JSON key(s) found") + } + return nil +} + +func (v *Vocabulary) validate() ([]string, error) { + if v == nil { + return nil, nil + } + tagKeys := map[string]string{} // Checks for Vocabulary strictness if v.StrictTags && len(v.Tags) == 0 { - return fmt.Errorf("vocabulary is strict but no tags are defined") + return nil, fmt.Errorf("vocabulary is strict but no tags are defined") } - // Checks for duplicate tag keys + // Checks for collisions between tag keys, reserved tag keys + // and tag key labels. + errors := []string{} for key := range v.Tags { if v.reservedTagKeys[key] { - return fmt.Errorf("tag key %q is reserved", key) + errors = append(errors, fmt.Sprintf("tag key %q is reserved", key)) } - if tagKeys[key] { - return fmt.Errorf("duplicate tag key %q", key) + lcKey := strings.ToLower(key) + if tagKeys[lcKey] != "" { + errors = append(errors, fmt.Sprintf("duplicate tag key %q", key)) } - tagKeys[key] = true + tagKeys[lcKey] = key for _, lbl := range v.Tags[key].Labels { label := strings.ToLower(lbl.Label) - if tagKeys[label] { - return fmt.Errorf("tag label %q for key %q already seen as a tag key or label", label, key) + if tagKeys[label] != "" { + errors = append(errors, fmt.Sprintf("tag label %q for key %q already seen as a tag key or label", lbl.Label, key)) } - tagKeys[label] = true + tagKeys[label] = lbl.Label } // Checks for value strictness if v.Tags[key].Strict && len(v.Tags[key].Values) == 0 { - return fmt.Errorf("tag key %q is configured as strict but doesn't provide values", key) + errors = append(errors, fmt.Sprintf("tag key %q is configured as strict but doesn't provide values", key)) } - // Checks for value duplication within a key - tagValues := map[string]bool{} + // Checks for collisions between tag values and tag value labels. + tagValues := map[string]string{} for val := range v.Tags[key].Values { - if tagValues[val] { - return fmt.Errorf("duplicate tag value %q for tag %q", val, key) + lcVal := strings.ToLower(val) + if tagValues[lcVal] != "" { + errors = append(errors, fmt.Sprintf("duplicate tag value %q for tag %q", val, key)) } - tagValues[val] = true + // Checks for collisions between labels from different values. + tagValues[lcVal] = val for _, tagLbl := range v.Tags[key].Values[val].Labels { label := strings.ToLower(tagLbl.Label) - if tagValues[label] { - return fmt.Errorf("tag value label %q for pair (%q:%q) already seen as a value key or label", label, key, val) + if tagValues[label] != "" && tagValues[label] != val { + errors = append(errors, fmt.Sprintf("tag value label %q for pair (%q:%q) already seen on value %q", tagLbl.Label, key, val, tagValues[label])) } - tagValues[label] = true + tagValues[label] = val } } } - return nil + if len(errors) > 0 { + return errors, fmt.Errorf("invalid vocabulary") + } + return nil, nil } func (v *Vocabulary) getLabelsToKeys() (labels map[string]string) { @@ -140,6 +240,7 @@ func (v *Vocabulary) getLabelsToValues(key string) (labels map[string]string) { labels = make(map[string]string) if _, ok := v.Tags[key]; ok { for val := range v.Tags[key].Values { + labels[strings.ToLower(val)] = val for _, tagLbl := range v.Tags[key].Values[val].Labels { label := strings.ToLower(tagLbl.Label) labels[label] = val @@ -169,7 +270,7 @@ func (v *Vocabulary) Check(data map[string]interface{}) error { } for key, val := range data { // Checks for key validity - if v.reservedTagKeys[key] { + if strings.HasPrefix(key, "arv:") || v.reservedTagKeys[key] { // Allow reserved keys to be used even if they are not defined in // the vocabulary no matter its strictness. continue @@ -201,11 +302,11 @@ func (v *Vocabulary) Check(data map[string]interface{}) error { return err } default: - return fmt.Errorf("tag value of type %T for key %q is not a valid", singleVal, key) + return fmt.Errorf("value list element type for tag key %q was %T, but expected a string", key, singleVal) } } default: - return fmt.Errorf("tag value of type %T for key %q is not a valid", val, key) + return fmt.Errorf("value type for tag key %q was %T, but expected a string or list of strings", key, val) } } return nil