1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: Apache-2.0
17 type Vocabulary struct {
18 reservedTagKeys map[string]bool `json:"-"`
19 StrictTags bool `json:"strict_tags"`
20 Tags map[string]VocabularyTag `json:"tags"`
23 type VocabularyTag struct {
24 Strict bool `json:"strict"`
25 Labels []VocabularyLabel `json:"labels"`
26 Values map[string]VocabularyTagValue `json:"values"`
29 // Cannot have a constant map in Go, so we have to use a function.
30 // If you are adding a new system property, it SHOULD start with `arv:`,
31 // and Check will allow it. This map is for historical exceptions that
32 // predate standardizing on this prefix.
33 func (v *Vocabulary) systemTagKeys() map[string]bool {
34 return map[string]bool{
35 // Collection keys - set by arvados-cwl-runner
36 "container_request": true,
37 "container_uuid": true,
39 // legacy Collection key, set by arvados-cwl-runner,
40 // was changed to container_uuid in Arvados 2.6.0 but
41 // still gets set if an older version of a-c-r is
45 // Set by several components to indicate the intended
46 // role of a collection
49 // Collection keys - set by arv-keepdocker (on the way out)
50 "docker-image-repo-tag": true,
52 // Container request keys - set by arvados-cwl-runner
56 // Container request key set alongside by Workbench 2
57 // to link to the Workflow definition used to launch
59 "template_uuid": true,
67 "image_timestamp": true,
72 type VocabularyLabel struct {
73 Label string `json:"label"`
76 type VocabularyTagValue struct {
77 Labels []VocabularyLabel `json:"labels"`
80 // NewVocabulary creates a new Vocabulary from a JSON definition and a list
81 // of reserved tag keys that will get special treatment when strict mode is
83 func NewVocabulary(data []byte, managedTagKeys []string) (voc *Vocabulary, err error) {
84 if r := bytes.Compare(data, []byte("")); r == 0 {
85 return &Vocabulary{}, nil
87 err = json.Unmarshal(data, &voc)
89 var serr *json.SyntaxError
90 if errors.As(err, &serr) {
92 errorMsg := string(data[:offset])
93 line := 1 + strings.Count(errorMsg, "\n")
94 column := offset - int64(strings.LastIndex(errorMsg, "\n")+len("\n"))
95 return nil, fmt.Errorf("invalid JSON format: %q (line %d, column %d)", err, line, column)
97 return nil, fmt.Errorf("invalid JSON format: %q", err)
99 if reflect.DeepEqual(voc, &Vocabulary{}) {
100 return nil, fmt.Errorf("JSON data provided doesn't match Vocabulary format: %q", data)
103 shouldReportErrors := false
106 // json.Unmarshal() doesn't error out on duplicate keys.
107 dupedKeys := []string{}
108 err = checkJSONDupedKeys(json.NewDecoder(bytes.NewReader(data)), nil, &dupedKeys)
110 shouldReportErrors = true
111 for _, dk := range dupedKeys {
112 errors = append(errors, fmt.Sprintf("duplicate JSON key %q", dk))
115 voc.reservedTagKeys = make(map[string]bool)
116 for _, managedKey := range managedTagKeys {
117 voc.reservedTagKeys[managedKey] = true
119 for systemKey := range voc.systemTagKeys() {
120 voc.reservedTagKeys[systemKey] = true
122 validationErrs, err := voc.validate()
124 shouldReportErrors = true
125 errors = append(errors, validationErrs...)
127 if shouldReportErrors {
128 return nil, fmt.Errorf("%s", strings.Join(errors, "\n"))
133 func checkJSONDupedKeys(d *json.Decoder, path []string, errors *[]string) error {
138 delim, ok := t.(json.Delim)
144 keys := make(map[string]bool)
153 *errors = append(*errors, strings.Join(append(path, key), "."))
157 if err := checkJSONDupedKeys(d, append(path, key), errors); err != nil {
161 // consume closing '}'
162 if _, err := d.Token(); err != nil {
168 if err := checkJSONDupedKeys(d, append(path, strconv.Itoa(i)), errors); err != nil {
173 // consume closing ']'
174 if _, err := d.Token(); err != nil {
178 if len(path) == 0 && len(*errors) > 0 {
179 return fmt.Errorf("duplicate JSON key(s) found")
184 func (v *Vocabulary) validate() ([]string, error) {
188 tagKeys := map[string]string{}
189 // Checks for Vocabulary strictness
190 if v.StrictTags && len(v.Tags) == 0 {
191 return nil, fmt.Errorf("vocabulary is strict but no tags are defined")
193 // Checks for collisions between tag keys, reserved tag keys
194 // and tag key labels.
196 for key := range v.Tags {
197 if v.reservedTagKeys[key] {
198 errors = append(errors, fmt.Sprintf("tag key %q is reserved", key))
200 lcKey := strings.ToLower(key)
201 if tagKeys[lcKey] != "" {
202 errors = append(errors, fmt.Sprintf("duplicate tag key %q", key))
205 for _, lbl := range v.Tags[key].Labels {
206 label := strings.ToLower(lbl.Label)
207 if tagKeys[label] != "" {
208 errors = append(errors, fmt.Sprintf("tag label %q for key %q already seen as a tag key or label", lbl.Label, key))
210 tagKeys[label] = lbl.Label
212 // Checks for value strictness
213 if v.Tags[key].Strict && len(v.Tags[key].Values) == 0 {
214 errors = append(errors, fmt.Sprintf("tag key %q is configured as strict but doesn't provide values", key))
216 // Checks for collisions between tag values and tag value labels.
217 tagValues := map[string]string{}
218 for val := range v.Tags[key].Values {
219 lcVal := strings.ToLower(val)
220 if tagValues[lcVal] != "" {
221 errors = append(errors, fmt.Sprintf("duplicate tag value %q for tag %q", val, key))
223 // Checks for collisions between labels from different values.
224 tagValues[lcVal] = val
225 for _, tagLbl := range v.Tags[key].Values[val].Labels {
226 label := strings.ToLower(tagLbl.Label)
227 if tagValues[label] != "" && tagValues[label] != val {
228 errors = append(errors, fmt.Sprintf("tag value label %q for pair (%q:%q) already seen on value %q", tagLbl.Label, key, val, tagValues[label]))
230 tagValues[label] = val
235 return errors, fmt.Errorf("invalid vocabulary")
240 func (v *Vocabulary) getLabelsToKeys() (labels map[string]string) {
244 labels = make(map[string]string)
245 for key, val := range v.Tags {
246 for _, lbl := range val.Labels {
247 label := strings.ToLower(lbl.Label)
254 func (v *Vocabulary) getLabelsToValues(key string) (labels map[string]string) {
258 labels = make(map[string]string)
259 if _, ok := v.Tags[key]; ok {
260 for val := range v.Tags[key].Values {
261 labels[strings.ToLower(val)] = val
262 for _, tagLbl := range v.Tags[key].Values[val].Labels {
263 label := strings.ToLower(tagLbl.Label)
271 func (v *Vocabulary) checkValue(key, val string) error {
272 if _, ok := v.Tags[key].Values[val]; !ok {
273 lcVal := strings.ToLower(val)
274 correctValue, ok := v.getLabelsToValues(key)[lcVal]
276 return fmt.Errorf("tag value %q for key %q is an alias, must be provided as %q", val, key, correctValue)
277 } else if v.Tags[key].Strict {
278 return fmt.Errorf("tag value %q is not valid for key %q", val, key)
284 // Check validates the given data against the vocabulary.
285 func (v *Vocabulary) Check(data map[string]interface{}) error {
289 for key, val := range data {
290 // Checks for key validity
291 if strings.HasPrefix(key, "arv:") || v.reservedTagKeys[key] {
292 // Allow reserved keys to be used even if they are not defined in
293 // the vocabulary no matter its strictness.
296 if _, ok := v.Tags[key]; !ok {
297 lcKey := strings.ToLower(key)
298 correctKey, ok := v.getLabelsToKeys()[lcKey]
300 return fmt.Errorf("tag key %q is an alias, must be provided as %q", key, correctKey)
301 } else if v.StrictTags {
302 return fmt.Errorf("tag key %q is not defined in the vocabulary", key)
304 // If the key is not defined, we don't need to check the value
307 // Checks for value validity -- key is defined
308 switch val := val.(type) {
310 err := v.checkValue(key, val)
315 for _, singleVal := range val {
316 switch singleVal := singleVal.(type) {
318 err := v.checkValue(key, singleVal)
323 return fmt.Errorf("value list element type for tag key %q was %T, but expected a string", key, singleVal)
327 return fmt.Errorf("value type for tag key %q was %T, but expected a string or list of strings", key, val)