18487: Improves vocabulary error checking, with tests.

author Lucas Di Pentima <lucas.dipentima@curii.com>

Tue, 18 Jan 2022 14:29:49 +0000 (11:29 -0300)

committer Lucas Di Pentima <lucas.dipentima@curii.com>

Tue, 18 Jan 2022 20:23:39 +0000 (17:23 -0300)
author Lucas Di Pentima <lucas.dipentima@curii.com>
Tue, 18 Jan 2022 14:29:49 +0000 (11:29 -0300)
committer Lucas Di Pentima <lucas.dipentima@curii.com>
Tue, 18 Jan 2022 20:23:39 +0000 (17:23 -0300)
diff --git a/lib/controller/localdb/conn.go b/lib/controller/localdb/conn.go

index 323e660c6f1e75d79721466e513dc8c611a52833..104cfe28f5e0cccfb9cf785955229b4f3b297fdf 100644 (file)
--- a/lib/controller/localdb/conn.go
+++ b/lib/controller/localdb/conn.go
@@ -99,7 +99,7 @@ func (conn *Conn) maybeRefreshVocabularyCache(logger logrus.FieldLogger) error {
  func (conn *Conn) loadVocabularyFile() error {
         vf, err := os.ReadFile(conn.cluster.API.VocabularyPath)
         if err != nil {
-               return fmt.Errorf("couldn't reading the vocabulary file: %v", err)
+               return fmt.Errorf("while reading the vocabulary file: %v", err)
         }
         mk := make([]string, 0, len(conn.cluster.Collections.ManagedProperties))
         for k := range conn.cluster.Collections.ManagedProperties {
diff --git a/sdk/go/arvados/vocabulary.go b/sdk/go/arvados/vocabulary.go

index 150091b308505b51f1830d33351cc7a706161650..4e155f34aaee7453069ad09f4614600b2c8e4cd9 100644 (file)
--- a/sdk/go/arvados/vocabulary.go
+++ b/sdk/go/arvados/vocabulary.go
@@ -7,8 +7,10 @@ package arvados
  import (
         "bytes"
         "encoding/json"
+       "errors"
         "fmt"
         "reflect"
+       "strconv"
         "strings"
  )
  
@@ -55,7 +57,20 @@ func NewVocabulary(data []byte, managedTagKeys []string) (voc *Vocabulary, err e
         }
         err = json.Unmarshal(data, &voc)
         if err != nil {
-               return nil, fmt.Errorf("invalid JSON format error: %q", err)
+               var serr *json.SyntaxError
+               if errors.As(err, &serr) {
+                       offset := serr.Offset
+                       errorMsg := string(data[:offset])
+                       line := 1 + strings.Count(errorMsg, "\n")
+                       column := offset - int64(strings.LastIndex(errorMsg, "\n")+len("\n"))
+                       return nil, fmt.Errorf("invalid JSON format: %q (line %d, column %d)", err, line, column)
+               }
+               return nil, fmt.Errorf("invalid JSON format: %q", err)
+       }
+       // json.Unmarshal() doesn't error out on duplicate keys.
+       err = checkJSONDupedKeys(json.NewDecoder(bytes.NewReader(data)), nil, &[]string{})
+       if err != nil {
+               return nil, err
         }
         if reflect.DeepEqual(voc, &Vocabulary{}) {
                 return nil, fmt.Errorf("JSON data provided doesn't match Vocabulary format: %q", data)
@@ -74,6 +89,57 @@ func NewVocabulary(data []byte, managedTagKeys []string) (voc *Vocabulary, err e
         return voc, nil
  }
  
+func checkJSONDupedKeys(d *json.Decoder, path []string, errors *[]string) error {
+       t, err := d.Token()
+       if err != nil {
+               return err
+       }
+       delim, ok := t.(json.Delim)
+       if !ok {
+               return nil
+       }
+       switch delim {
+       case '{':
+               keys := make(map[string]bool)
+               for d.More() {
+                       t, err := d.Token()
+                       if err != nil {
+                               return err
+                       }
+                       key := t.(string)
+
+                       if keys[key] {
+                               *errors = append(*errors, strings.Join(append(path, key), "."))
+                       }
+                       keys[key] = true
+
+                       if err := checkJSONDupedKeys(d, append(path, key), errors); err != nil {
+                               return err
+                       }
+               }
+               // consume closing '}'
+               if _, err := d.Token(); err != nil {
+                       return err
+               }
+       case '[':
+               i := 0
+               for d.More() {
+                       if err := checkJSONDupedKeys(d, append(path, strconv.Itoa(i)), errors); err != nil {
+                               return err
+                       }
+                       i++
+               }
+               // consume closing ']'
+               if _, err := d.Token(); err != nil {
+                       return err
+               }
+       }
+       if len(path) == 0 && len(*errors) > 0 {
+               return fmt.Errorf("duplicate JSON key(s):\n%s", strings.Join(*errors, "\n"))
+       }
+       return nil
+}
+
  func (v *Vocabulary) validate() error {
         if v == nil {
                 return nil
@@ -85,44 +151,48 @@ func (v *Vocabulary) validate() error {
         }
         // Checks for collisions between tag keys, reserved tag keys
         // and tag key labels.
+       errors := []string{}
         for key := range v.Tags {
                 if v.reservedTagKeys[key] {
-                       return fmt.Errorf("tag key %q is reserved", key)
+                       errors = append(errors, fmt.Sprintf("tag key %q is reserved", key))
                 }
                 lcKey := strings.ToLower(key)
                 if tagKeys[lcKey] != "" {
-                       return fmt.Errorf("duplicate tag key %q", key)
+                       errors = append(errors, fmt.Sprintf("duplicate tag key %q", key))
                 }
                 tagKeys[lcKey] = key
                 for _, lbl := range v.Tags[key].Labels {
                         label := strings.ToLower(lbl.Label)
                         if tagKeys[label] != "" {
-                               return fmt.Errorf("tag label %q for key %q already seen as a tag key or label", lbl.Label, key)
+                               errors = append(errors, fmt.Sprintf("tag label %q for key %q already seen as a tag key or label", lbl.Label, key))
                         }
                         tagKeys[label] = lbl.Label
                 }
                 // Checks for value strictness
                 if v.Tags[key].Strict && len(v.Tags[key].Values) == 0 {
-                       return fmt.Errorf("tag key %q is configured as strict but doesn't provide values", key)
+                       errors = append(errors, fmt.Sprintf("tag key %q is configured as strict but doesn't provide values", key))
                 }
                 // Checks for collisions between tag values and tag value labels.
                 tagValues := map[string]string{}
                 for val := range v.Tags[key].Values {
                         lcVal := strings.ToLower(val)
                         if tagValues[lcVal] != "" {
-                               return fmt.Errorf("duplicate tag value %q for tag %q", val, key)
+                               errors = append(errors, fmt.Sprintf("duplicate tag value %q for tag %q", val, key))
                         }
                         // Checks for collisions between labels from different values.
                         tagValues[lcVal] = val
                         for _, tagLbl := range v.Tags[key].Values[val].Labels {
                                 label := strings.ToLower(tagLbl.Label)
                                 if tagValues[label] != "" && tagValues[label] != val {
-                                       return fmt.Errorf("tag value label %q for pair (%q:%q) already seen on value %q", tagLbl.Label, key, val, tagValues[label])
+                                       errors = append(errors, fmt.Sprintf("tag value label %q for pair (%q:%q) already seen on value %q", tagLbl.Label, key, val, tagValues[label]))
                                 }
                                 tagValues[label] = val
                         }
                 }
         }
+       if len(errors) > 0 {
+               return fmt.Errorf("invalid vocabulary:\n%s", strings.Join(errors, "\n"))
+       }
         return nil
  }
  
diff --git a/sdk/go/arvados/vocabulary_test.go b/sdk/go/arvados/vocabulary_test.go

index 5a5189de2b3e1c9d66d6c818bd1ce1f11554bf17..da62d5c25164c1042394432b6fd1172706aae590 100644 (file)
--- a/sdk/go/arvados/vocabulary_test.go
+++ b/sdk/go/arvados/vocabulary_test.go
@@ -257,15 +257,37 @@ func (s *VocabularySuite) TestNewVocabulary(c *check.C) {
                                 },
                         },
                 },
+               {
+                       "Invalid JSON error with line & column numbers",
+                       `{"tags":{
+                               "aKey":{
+                                       "labels": [,{"label": "A label"}]
+                               }
+                       }}`,
+                       false, `invalid JSON format:.*\(line \d+, column \d+\)`, nil,
+               },
+               {
+                       "Invalid JSON with duplicate keys",
+                       `{"tags":{
+                               "type":{
+                                       "strict": false,
+                                       "labels": [{"label": "Class", "label": "Type"}]
+                               },
+                               "type":{
+                                       "labels": []
+                               }
+                       }}`,
+                       false, "(?s).*tags.type.labels.0.label\ntags.type", nil,
+               },
                 {
                         "Valid data, but uses reserved key",
                         `{"tags":{
                                 "type":{
                                         "strict": false,
-                                       "labels": [{"label": "Type"}]
+                                       "labels": [{"label": "Class"}]
                                 }
                         }}`,
-                       false, "tag key.*is reserved", nil,
+                       false, "(?s).*tag key.*is reserved", nil,
                 },
         }
  
@@ -288,14 +310,14 @@ func (s *VocabularySuite) TestValidationErrors(c *check.C) {
         tests := []struct {
                 name       string
                 voc        *Vocabulary
-               errMatches string
+               errMatches []string
         }{
                 {
                         "Strict vocabulary, no keys",
                         &Vocabulary{
                                 StrictTags: true,
                         },
-                       "vocabulary is strict but no tags are defined",
+                       []string{"vocabulary is strict but no tags are defined"},
                 },
                 {
                         "Collision between tag key and tag key label",
@@ -312,7 +334,7 @@ func (s *VocabularySuite) TestValidationErrors(c *check.C) {
                                         },
                                 },
                         },
-                       "", // Depending on how the map is sorted, this could be one of two errors
+                       nil, // Depending on how the map is sorted, this could be one of two errors
                 },
                 {
                         "Collision between tag key and tag key label (case-insensitive)",
@@ -329,7 +351,7 @@ func (s *VocabularySuite) TestValidationErrors(c *check.C) {
                                         },
                                 },
                         },
-                       "", // Depending on how the map is sorted, this could be one of two errors
+                       nil, // Depending on how the map is sorted, this could be one of two errors
                 },
                 {
                         "Collision between tag key labels",
@@ -346,7 +368,7 @@ func (s *VocabularySuite) TestValidationErrors(c *check.C) {
                                         },
                                 },
                         },
-                       "tag label.*for key.*already seen.*",
+                       []string{"(?s).*tag label.*for key.*already seen.*"},
                 },
                 {
                         "Collision between tag value and tag value label",
@@ -367,7 +389,7 @@ func (s *VocabularySuite) TestValidationErrors(c *check.C) {
                                         },
                                 },
                         },
-                       "", // Depending on how the map is sorted, this could be one of two errors
+                       nil, // Depending on how the map is sorted, this could be one of two errors
                 },
                 {
                         "Collision between tag value and tag value label (case-insensitive)",
@@ -388,7 +410,7 @@ func (s *VocabularySuite) TestValidationErrors(c *check.C) {
                                         },
                                 },
                         },
-                       "", // Depending on how the map is sorted, this could be one of two errors
+                       nil, // Depending on how the map is sorted, this could be one of two errors
                 },
                 {
                         "Collision between tag value labels",
@@ -409,7 +431,7 @@ func (s *VocabularySuite) TestValidationErrors(c *check.C) {
                                         },
                                 },
                         },
-                       "tag value label.*for pair.*already seen.*on value.*",
+                       []string{"(?s).*tag value label.*for pair.*already seen.*on value.*"},
                 },
                 {
                         "Collision between tag value labels (case-insensitive)",
@@ -430,7 +452,7 @@ func (s *VocabularySuite) TestValidationErrors(c *check.C) {
                                         },
                                 },
                         },
-                       "tag value label.*for pair.*already seen.*on value.*",
+                       []string{"(?s).*tag value label.*for pair.*already seen.*on value.*"},
                 },
                 {
                         "Strict tag key, with no values",
@@ -443,15 +465,34 @@ func (s *VocabularySuite) TestValidationErrors(c *check.C) {
                                         },
                                 },
                         },
-                       "tag key.*is configured as strict but doesn't provide values",
+                       []string{"(?s).*tag key.*is configured as strict but doesn't provide values"},
+               },
+               {
+                       "Multiple errors reported",
+                       &Vocabulary{
+                               StrictTags: false,
+                               Tags: map[string]VocabularyTag{
+                                       "IDTAGANIMALS": {
+                                               Strict: true,
+                                               Labels: []VocabularyLabel{{Label: "Animal"}, {Label: "Creature"}},
+                                       },
+                                       "IDTAGSIZES": {
+                                               Labels: []VocabularyLabel{{Label: "Animal"}, {Label: "Size"}},
+                                       },
+                               },
+                       },
+                       []string{
+                               "(?s).*tag key.*is configured as strict but doesn't provide values.*",
+                               "(?s).*tag label.*for key.*already seen.*",
+                       },
                 },
         }
         for _, tt := range tests {
                 c.Log(c.TestName()+" ", tt.name)
                 err := tt.voc.validate()
                 c.Assert(err, check.NotNil)
-               if tt.errMatches != "" {
-                       c.Assert(err, check.ErrorMatches, tt.errMatches)
+               for _, errMatch := range tt.errMatches {
+                       c.Assert(err, check.ErrorMatches, errMatch)
                 }
         }
  }
author	Lucas Di Pentima <lucas.dipentima@curii.com>
	Tue, 18 Jan 2022 14:29:49 +0000 (11:29 -0300)
committer	Lucas Di Pentima <lucas.dipentima@curii.com>
	Tue, 18 Jan 2022 20:23:39 +0000 (17:23 -0300)
lib/controller/localdb/conn.go		patch \| blob \| history
sdk/go/arvados/vocabulary.go		patch \| blob \| history
sdk/go/arvados/vocabulary_test.go		patch \| blob \| history