1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: Apache-2.0
18 const systemKeyPattern = `^arv:[a-zA-Z]`
20 type Vocabulary struct {
21 reservedTagKeys map[string]bool `json:"-"`
22 StrictTags bool `json:"strict_tags"`
23 Tags map[string]VocabularyTag `json:"tags"`
26 type VocabularyTag struct {
27 Strict bool `json:"strict"`
28 Labels []VocabularyLabel `json:"labels"`
29 Values map[string]VocabularyTagValue `json:"values"`
32 // Cannot have a constant map in Go, so we have to use a function.
33 // If you are adding a new system property, it SHOULD match `systemKeyPattern`
34 // above, and Check will allow it. This map is for historical exceptions that
35 // predate standardizing on this prefix.
36 func (v *Vocabulary) systemTagKeys() map[string]bool {
37 return map[string]bool{
38 // Collection keys - set by arvados-cwl-runner
39 "container_request": true,
40 "container_uuid": true,
42 // Collection keys - set by arv-keepdocker (on the way out)
43 "docker-image-repo-tag": true,
44 // Container request keys - set by arvados-cwl-runner
47 "template_uuid": true,
52 "image_timestamp": true,
57 type VocabularyLabel struct {
58 Label string `json:"label"`
61 type VocabularyTagValue struct {
62 Labels []VocabularyLabel `json:"labels"`
65 // NewVocabulary creates a new Vocabulary from a JSON definition and a list
66 // of reserved tag keys that will get special treatment when strict mode is
68 func NewVocabulary(data []byte, managedTagKeys []string) (voc *Vocabulary, err error) {
69 if r := bytes.Compare(data, []byte("")); r == 0 {
70 return &Vocabulary{}, nil
72 err = json.Unmarshal(data, &voc)
74 var serr *json.SyntaxError
75 if errors.As(err, &serr) {
77 errorMsg := string(data[:offset])
78 line := 1 + strings.Count(errorMsg, "\n")
79 column := offset - int64(strings.LastIndex(errorMsg, "\n")+len("\n"))
80 return nil, fmt.Errorf("invalid JSON format: %q (line %d, column %d)", err, line, column)
82 return nil, fmt.Errorf("invalid JSON format: %q", err)
84 if reflect.DeepEqual(voc, &Vocabulary{}) {
85 return nil, fmt.Errorf("JSON data provided doesn't match Vocabulary format: %q", data)
88 shouldReportErrors := false
91 // json.Unmarshal() doesn't error out on duplicate keys.
92 dupedKeys := []string{}
93 err = checkJSONDupedKeys(json.NewDecoder(bytes.NewReader(data)), nil, &dupedKeys)
95 shouldReportErrors = true
96 for _, dk := range dupedKeys {
97 errors = append(errors, fmt.Sprintf("duplicate JSON key %q", dk))
100 voc.reservedTagKeys = make(map[string]bool)
101 for _, managedKey := range managedTagKeys {
102 voc.reservedTagKeys[managedKey] = true
104 for systemKey := range voc.systemTagKeys() {
105 voc.reservedTagKeys[systemKey] = true
107 validationErrs, err := voc.validate()
109 shouldReportErrors = true
110 errors = append(errors, validationErrs...)
112 if shouldReportErrors {
113 return nil, fmt.Errorf("%s", strings.Join(errors, "\n"))
118 func checkJSONDupedKeys(d *json.Decoder, path []string, errors *[]string) error {
123 delim, ok := t.(json.Delim)
129 keys := make(map[string]bool)
138 *errors = append(*errors, strings.Join(append(path, key), "."))
142 if err := checkJSONDupedKeys(d, append(path, key), errors); err != nil {
146 // consume closing '}'
147 if _, err := d.Token(); err != nil {
153 if err := checkJSONDupedKeys(d, append(path, strconv.Itoa(i)), errors); err != nil {
158 // consume closing ']'
159 if _, err := d.Token(); err != nil {
163 if len(path) == 0 && len(*errors) > 0 {
164 return fmt.Errorf("duplicate JSON key(s) found")
169 func (v *Vocabulary) validate() ([]string, error) {
173 tagKeys := map[string]string{}
174 // Checks for Vocabulary strictness
175 if v.StrictTags && len(v.Tags) == 0 {
176 return nil, fmt.Errorf("vocabulary is strict but no tags are defined")
178 // Checks for collisions between tag keys, reserved tag keys
179 // and tag key labels.
181 for key := range v.Tags {
182 if v.reservedTagKeys[key] {
183 errors = append(errors, fmt.Sprintf("tag key %q is reserved", key))
185 lcKey := strings.ToLower(key)
186 if tagKeys[lcKey] != "" {
187 errors = append(errors, fmt.Sprintf("duplicate tag key %q", key))
190 for _, lbl := range v.Tags[key].Labels {
191 label := strings.ToLower(lbl.Label)
192 if tagKeys[label] != "" {
193 errors = append(errors, fmt.Sprintf("tag label %q for key %q already seen as a tag key or label", lbl.Label, key))
195 tagKeys[label] = lbl.Label
197 // Checks for value strictness
198 if v.Tags[key].Strict && len(v.Tags[key].Values) == 0 {
199 errors = append(errors, fmt.Sprintf("tag key %q is configured as strict but doesn't provide values", key))
201 // Checks for collisions between tag values and tag value labels.
202 tagValues := map[string]string{}
203 for val := range v.Tags[key].Values {
204 lcVal := strings.ToLower(val)
205 if tagValues[lcVal] != "" {
206 errors = append(errors, fmt.Sprintf("duplicate tag value %q for tag %q", val, key))
208 // Checks for collisions between labels from different values.
209 tagValues[lcVal] = val
210 for _, tagLbl := range v.Tags[key].Values[val].Labels {
211 label := strings.ToLower(tagLbl.Label)
212 if tagValues[label] != "" && tagValues[label] != val {
213 errors = append(errors, fmt.Sprintf("tag value label %q for pair (%q:%q) already seen on value %q", tagLbl.Label, key, val, tagValues[label]))
215 tagValues[label] = val
220 return errors, fmt.Errorf("invalid vocabulary")
225 func (v *Vocabulary) getLabelsToKeys() (labels map[string]string) {
229 labels = make(map[string]string)
230 for key, val := range v.Tags {
231 for _, lbl := range val.Labels {
232 label := strings.ToLower(lbl.Label)
239 func (v *Vocabulary) getLabelsToValues(key string) (labels map[string]string) {
243 labels = make(map[string]string)
244 if _, ok := v.Tags[key]; ok {
245 for val := range v.Tags[key].Values {
246 labels[strings.ToLower(val)] = val
247 for _, tagLbl := range v.Tags[key].Values[val].Labels {
248 label := strings.ToLower(tagLbl.Label)
256 func (v *Vocabulary) checkValue(key, val string) error {
257 if _, ok := v.Tags[key].Values[val]; !ok {
258 lcVal := strings.ToLower(val)
259 correctValue, ok := v.getLabelsToValues(key)[lcVal]
261 return fmt.Errorf("tag value %q for key %q is an alias, must be provided as %q", val, key, correctValue)
262 } else if v.Tags[key].Strict {
263 return fmt.Errorf("tag value %q is not valid for key %q", val, key)
269 // Check validates the given data against the vocabulary.
270 func (v *Vocabulary) Check(data map[string]interface{}) error {
274 systemKeyRegexp, err := regexp.Compile(systemKeyPattern)
278 for key, val := range data {
279 // Checks for key validity
280 if systemKeyRegexp.MatchString(key) || v.reservedTagKeys[key] {
281 // Allow reserved keys to be used even if they are not defined in
282 // the vocabulary no matter its strictness.
285 if _, ok := v.Tags[key]; !ok {
286 lcKey := strings.ToLower(key)
287 correctKey, ok := v.getLabelsToKeys()[lcKey]
289 return fmt.Errorf("tag key %q is an alias, must be provided as %q", key, correctKey)
290 } else if v.StrictTags {
291 return fmt.Errorf("tag key %q is not defined in the vocabulary", key)
293 // If the key is not defined, we don't need to check the value
296 // Checks for value validity -- key is defined
297 switch val := val.(type) {
299 err := v.checkValue(key, val)
304 for _, singleVal := range val {
305 switch singleVal := singleVal.(type) {
307 err := v.checkValue(key, singleVal)
312 return fmt.Errorf("value list element type for tag key %q was %T, but expected a string", key, singleVal)
316 return fmt.Errorf("value type for tag key %q was %T, but expected a string or list of strings", key, val)