[ARVADOS] created: 2.1.0-1830-gca63f8b53

Git user git at public.arvados.org
Tue Jan 18 16:04:11 UTC 2022


        at  ca63f8b53373c03444e1f2246ae991b41198dd50 (commit)


commit ca63f8b53373c03444e1f2246ae991b41198dd50
Author: Lucas Di Pentima <lucas.dipentima at curii.com>
Date:   Tue Jan 18 12:54:59 2022 -0300

    18487: Fixes error checking on nonexistant vocabulary file. Adds tests.
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima at curii.com>

diff --git a/lib/config/cmd.go b/lib/config/cmd.go
index e532a7e32..2f90bc80d 100644
--- a/lib/config/cmd.go
+++ b/lib/config/cmd.go
@@ -148,7 +148,7 @@ func (checkCommand) RunCommand(prog string, args []string, stdin io.Reader, stdo
 		if cc.API.VocabularyPath == "" {
 			continue
 		}
-		_, err = os.Stat(cc.API.VocabularyPath)
+		vd, err := os.ReadFile(cc.API.VocabularyPath)
 		if err != nil {
 			if errors.Is(err, os.ErrNotExist) {
 				// If the vocabulary path doesn't exist, it might mean that
@@ -156,11 +156,6 @@ func (checkCommand) RunCommand(prog string, args []string, stdin io.Reader, stdo
 				// error.
 				continue
 			}
-			fmt.Fprintf(stderr, "Error checking vocabulary path %q for cluster %s: %s\n", cc.API.VocabularyPath, id, err)
-			return 1
-		}
-		vd, err := os.ReadFile(cc.API.VocabularyPath)
-		if err != nil {
 			fmt.Fprintf(stderr, "Error reading vocabulary file %q for cluster %s: %s\n", cc.API.VocabularyPath, id, err)
 			return 1
 		}
diff --git a/lib/config/cmd_test.go b/lib/config/cmd_test.go
index a5cc28b80..6b5ca4e57 100644
--- a/lib/config/cmd_test.go
+++ b/lib/config/cmd_test.go
@@ -53,6 +53,7 @@ Clusters:
   SystemRootToken: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
   API:
     MaxItemsPerResponse: 1234
+    VocabularyPath: /this/path/does/not/exist
   Collections:
     BlobSigningKey: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
   PostgreSQL:
@@ -77,6 +78,60 @@ Clusters:
 	c.Check(stderr.String(), check.Equals, "")
 }
 
+func (s *CommandSuite) TestCheck_VocabularyErrors(c *check.C) {
+	tmpFile, err := ioutil.TempFile("", "")
+	c.Assert(err, check.IsNil)
+	defer os.Remove(tmpFile.Name())
+	_, err = tmpFile.WriteString(`
+{
+ "tags": {
+  "IDfoo": {
+   "labels": [
+    {"label": "foo"}
+   ]
+  },
+  "IDfoo": {
+   "labels": [
+    {"label": "baz"}
+   ]
+  }
+ }
+}`)
+	c.Assert(err, check.IsNil)
+	tmpFile.Close()
+	vocPath := tmpFile.Name()
+	var stdout, stderr bytes.Buffer
+	in := `
+Clusters:
+ z1234:
+  ManagementToken: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+  SystemRootToken: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+  API:
+    MaxItemsPerResponse: 1234
+    VocabularyPath: ` + vocPath + `
+  Collections:
+    BlobSigningKey: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+  PostgreSQL:
+    Connection:
+      sslmode: require
+  Services:
+    RailsAPI:
+      InternalURLs:
+        "http://0.0.0.0:8000": {}
+  Workbench:
+    UserProfileFormFields:
+      color:
+        Type: select
+        Options:
+          fuchsia: {}
+    ApplicationMimetypesWithViewIcon:
+      whitespace: {}
+`
+	code := CheckCommand.RunCommand("arvados config-check", []string{"-config", "-"}, bytes.NewBufferString(in), &stdout, &stderr)
+	c.Check(code, check.Equals, 1)
+	c.Check(stderr.String(), check.Matches, `(?s).*Error loading vocabulary file.*duplicate JSON key\(s\).*`)
+}
+
 func (s *CommandSuite) TestCheck_DeprecatedKeys(c *check.C) {
 	var stdout, stderr bytes.Buffer
 	in := `

commit 935674c9b13a06308b085473bac0ed303d60adf5
Author: Lucas Di Pentima <lucas.dipentima at curii.com>
Date:   Tue Jan 18 11:29:49 2022 -0300

    18487: Improves vocabulary error checking, with tests.
    
    * Adds JSON duplicated keys checking (json.Unmarshall silently accepts them).
    * Adds line & column numbers to JSON syntax errors.
    * Reports all detected non-syntax errors at once instead of returning on the
    first one. (so that the site admin doesn't have to do multiple edit+check
    cycles)
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima at curii.com>

diff --git a/lib/controller/localdb/conn.go b/lib/controller/localdb/conn.go
index 323e660c6..104cfe28f 100644
--- a/lib/controller/localdb/conn.go
+++ b/lib/controller/localdb/conn.go
@@ -99,7 +99,7 @@ func (conn *Conn) maybeRefreshVocabularyCache(logger logrus.FieldLogger) error {
 func (conn *Conn) loadVocabularyFile() error {
 	vf, err := os.ReadFile(conn.cluster.API.VocabularyPath)
 	if err != nil {
-		return fmt.Errorf("couldn't reading the vocabulary file: %v", err)
+		return fmt.Errorf("while reading the vocabulary file: %v", err)
 	}
 	mk := make([]string, 0, len(conn.cluster.Collections.ManagedProperties))
 	for k := range conn.cluster.Collections.ManagedProperties {
diff --git a/sdk/go/arvados/vocabulary.go b/sdk/go/arvados/vocabulary.go
index 150091b30..4e155f34a 100644
--- a/sdk/go/arvados/vocabulary.go
+++ b/sdk/go/arvados/vocabulary.go
@@ -7,8 +7,10 @@ package arvados
 import (
 	"bytes"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"reflect"
+	"strconv"
 	"strings"
 )
 
@@ -55,7 +57,20 @@ func NewVocabulary(data []byte, managedTagKeys []string) (voc *Vocabulary, err e
 	}
 	err = json.Unmarshal(data, &voc)
 	if err != nil {
-		return nil, fmt.Errorf("invalid JSON format error: %q", err)
+		var serr *json.SyntaxError
+		if errors.As(err, &serr) {
+			offset := serr.Offset
+			errorMsg := string(data[:offset])
+			line := 1 + strings.Count(errorMsg, "\n")
+			column := offset - int64(strings.LastIndex(errorMsg, "\n")+len("\n"))
+			return nil, fmt.Errorf("invalid JSON format: %q (line %d, column %d)", err, line, column)
+		}
+		return nil, fmt.Errorf("invalid JSON format: %q", err)
+	}
+	// json.Unmarshal() doesn't error out on duplicate keys.
+	err = checkJSONDupedKeys(json.NewDecoder(bytes.NewReader(data)), nil, &[]string{})
+	if err != nil {
+		return nil, err
 	}
 	if reflect.DeepEqual(voc, &Vocabulary{}) {
 		return nil, fmt.Errorf("JSON data provided doesn't match Vocabulary format: %q", data)
@@ -74,6 +89,57 @@ func NewVocabulary(data []byte, managedTagKeys []string) (voc *Vocabulary, err e
 	return voc, nil
 }
 
+func checkJSONDupedKeys(d *json.Decoder, path []string, errors *[]string) error {
+	t, err := d.Token()
+	if err != nil {
+		return err
+	}
+	delim, ok := t.(json.Delim)
+	if !ok {
+		return nil
+	}
+	switch delim {
+	case '{':
+		keys := make(map[string]bool)
+		for d.More() {
+			t, err := d.Token()
+			if err != nil {
+				return err
+			}
+			key := t.(string)
+
+			if keys[key] {
+				*errors = append(*errors, strings.Join(append(path, key), "."))
+			}
+			keys[key] = true
+
+			if err := checkJSONDupedKeys(d, append(path, key), errors); err != nil {
+				return err
+			}
+		}
+		// consume closing '}'
+		if _, err := d.Token(); err != nil {
+			return err
+		}
+	case '[':
+		i := 0
+		for d.More() {
+			if err := checkJSONDupedKeys(d, append(path, strconv.Itoa(i)), errors); err != nil {
+				return err
+			}
+			i++
+		}
+		// consume closing ']'
+		if _, err := d.Token(); err != nil {
+			return err
+		}
+	}
+	if len(path) == 0 && len(*errors) > 0 {
+		return fmt.Errorf("duplicate JSON key(s):\n%s", strings.Join(*errors, "\n"))
+	}
+	return nil
+}
+
 func (v *Vocabulary) validate() error {
 	if v == nil {
 		return nil
@@ -85,44 +151,48 @@ func (v *Vocabulary) validate() error {
 	}
 	// Checks for collisions between tag keys, reserved tag keys
 	// and tag key labels.
+	errors := []string{}
 	for key := range v.Tags {
 		if v.reservedTagKeys[key] {
-			return fmt.Errorf("tag key %q is reserved", key)
+			errors = append(errors, fmt.Sprintf("tag key %q is reserved", key))
 		}
 		lcKey := strings.ToLower(key)
 		if tagKeys[lcKey] != "" {
-			return fmt.Errorf("duplicate tag key %q", key)
+			errors = append(errors, fmt.Sprintf("duplicate tag key %q", key))
 		}
 		tagKeys[lcKey] = key
 		for _, lbl := range v.Tags[key].Labels {
 			label := strings.ToLower(lbl.Label)
 			if tagKeys[label] != "" {
-				return fmt.Errorf("tag label %q for key %q already seen as a tag key or label", lbl.Label, key)
+				errors = append(errors, fmt.Sprintf("tag label %q for key %q already seen as a tag key or label", lbl.Label, key))
 			}
 			tagKeys[label] = lbl.Label
 		}
 		// Checks for value strictness
 		if v.Tags[key].Strict && len(v.Tags[key].Values) == 0 {
-			return fmt.Errorf("tag key %q is configured as strict but doesn't provide values", key)
+			errors = append(errors, fmt.Sprintf("tag key %q is configured as strict but doesn't provide values", key))
 		}
 		// Checks for collisions between tag values and tag value labels.
 		tagValues := map[string]string{}
 		for val := range v.Tags[key].Values {
 			lcVal := strings.ToLower(val)
 			if tagValues[lcVal] != "" {
-				return fmt.Errorf("duplicate tag value %q for tag %q", val, key)
+				errors = append(errors, fmt.Sprintf("duplicate tag value %q for tag %q", val, key))
 			}
 			// Checks for collisions between labels from different values.
 			tagValues[lcVal] = val
 			for _, tagLbl := range v.Tags[key].Values[val].Labels {
 				label := strings.ToLower(tagLbl.Label)
 				if tagValues[label] != "" && tagValues[label] != val {
-					return fmt.Errorf("tag value label %q for pair (%q:%q) already seen on value %q", tagLbl.Label, key, val, tagValues[label])
+					errors = append(errors, fmt.Sprintf("tag value label %q for pair (%q:%q) already seen on value %q", tagLbl.Label, key, val, tagValues[label]))
 				}
 				tagValues[label] = val
 			}
 		}
 	}
+	if len(errors) > 0 {
+		return fmt.Errorf("invalid vocabulary:\n%s", strings.Join(errors, "\n"))
+	}
 	return nil
 }
 
diff --git a/sdk/go/arvados/vocabulary_test.go b/sdk/go/arvados/vocabulary_test.go
index 5a5189de2..da62d5c25 100644
--- a/sdk/go/arvados/vocabulary_test.go
+++ b/sdk/go/arvados/vocabulary_test.go
@@ -257,15 +257,37 @@ func (s *VocabularySuite) TestNewVocabulary(c *check.C) {
 				},
 			},
 		},
+		{
+			"Invalid JSON error with line & column numbers",
+			`{"tags":{
+				"aKey":{
+					"labels": [,{"label": "A label"}]
+				}
+			}}`,
+			false, `invalid JSON format:.*\(line \d+, column \d+\)`, nil,
+		},
+		{
+			"Invalid JSON with duplicate keys",
+			`{"tags":{
+				"type":{
+					"strict": false,
+					"labels": [{"label": "Class", "label": "Type"}]
+				},
+				"type":{
+					"labels": []
+				}
+			}}`,
+			false, "(?s).*tags.type.labels.0.label\ntags.type", nil,
+		},
 		{
 			"Valid data, but uses reserved key",
 			`{"tags":{
 				"type":{
 					"strict": false,
-					"labels": [{"label": "Type"}]
+					"labels": [{"label": "Class"}]
 				}
 			}}`,
-			false, "tag key.*is reserved", nil,
+			false, "(?s).*tag key.*is reserved", nil,
 		},
 	}
 
@@ -288,14 +310,14 @@ func (s *VocabularySuite) TestValidationErrors(c *check.C) {
 	tests := []struct {
 		name       string
 		voc        *Vocabulary
-		errMatches string
+		errMatches []string
 	}{
 		{
 			"Strict vocabulary, no keys",
 			&Vocabulary{
 				StrictTags: true,
 			},
-			"vocabulary is strict but no tags are defined",
+			[]string{"vocabulary is strict but no tags are defined"},
 		},
 		{
 			"Collision between tag key and tag key label",
@@ -312,7 +334,7 @@ func (s *VocabularySuite) TestValidationErrors(c *check.C) {
 					},
 				},
 			},
-			"", // Depending on how the map is sorted, this could be one of two errors
+			nil, // Depending on how the map is sorted, this could be one of two errors
 		},
 		{
 			"Collision between tag key and tag key label (case-insensitive)",
@@ -329,7 +351,7 @@ func (s *VocabularySuite) TestValidationErrors(c *check.C) {
 					},
 				},
 			},
-			"", // Depending on how the map is sorted, this could be one of two errors
+			nil, // Depending on how the map is sorted, this could be one of two errors
 		},
 		{
 			"Collision between tag key labels",
@@ -346,7 +368,7 @@ func (s *VocabularySuite) TestValidationErrors(c *check.C) {
 					},
 				},
 			},
-			"tag label.*for key.*already seen.*",
+			[]string{"(?s).*tag label.*for key.*already seen.*"},
 		},
 		{
 			"Collision between tag value and tag value label",
@@ -367,7 +389,7 @@ func (s *VocabularySuite) TestValidationErrors(c *check.C) {
 					},
 				},
 			},
-			"", // Depending on how the map is sorted, this could be one of two errors
+			nil, // Depending on how the map is sorted, this could be one of two errors
 		},
 		{
 			"Collision between tag value and tag value label (case-insensitive)",
@@ -388,7 +410,7 @@ func (s *VocabularySuite) TestValidationErrors(c *check.C) {
 					},
 				},
 			},
-			"", // Depending on how the map is sorted, this could be one of two errors
+			nil, // Depending on how the map is sorted, this could be one of two errors
 		},
 		{
 			"Collision between tag value labels",
@@ -409,7 +431,7 @@ func (s *VocabularySuite) TestValidationErrors(c *check.C) {
 					},
 				},
 			},
-			"tag value label.*for pair.*already seen.*on value.*",
+			[]string{"(?s).*tag value label.*for pair.*already seen.*on value.*"},
 		},
 		{
 			"Collision between tag value labels (case-insensitive)",
@@ -430,7 +452,7 @@ func (s *VocabularySuite) TestValidationErrors(c *check.C) {
 					},
 				},
 			},
-			"tag value label.*for pair.*already seen.*on value.*",
+			[]string{"(?s).*tag value label.*for pair.*already seen.*on value.*"},
 		},
 		{
 			"Strict tag key, with no values",
@@ -443,15 +465,34 @@ func (s *VocabularySuite) TestValidationErrors(c *check.C) {
 					},
 				},
 			},
-			"tag key.*is configured as strict but doesn't provide values",
+			[]string{"(?s).*tag key.*is configured as strict but doesn't provide values"},
+		},
+		{
+			"Multiple errors reported",
+			&Vocabulary{
+				StrictTags: false,
+				Tags: map[string]VocabularyTag{
+					"IDTAGANIMALS": {
+						Strict: true,
+						Labels: []VocabularyLabel{{Label: "Animal"}, {Label: "Creature"}},
+					},
+					"IDTAGSIZES": {
+						Labels: []VocabularyLabel{{Label: "Animal"}, {Label: "Size"}},
+					},
+				},
+			},
+			[]string{
+				"(?s).*tag key.*is configured as strict but doesn't provide values.*",
+				"(?s).*tag label.*for key.*already seen.*",
+			},
 		},
 	}
 	for _, tt := range tests {
 		c.Log(c.TestName()+" ", tt.name)
 		err := tt.voc.validate()
 		c.Assert(err, check.NotNil)
-		if tt.errMatches != "" {
-			c.Assert(err, check.ErrorMatches, tt.errMatches)
+		for _, errMatch := range tt.errMatches {
+			c.Assert(err, check.ErrorMatches, errMatch)
 		}
 	}
 }

commit 82dbf0170568659b4d61d1bcfa8ae016774a0317
Author: Lucas Di Pentima <lucas.dipentima at curii.com>
Date:   Mon Jan 17 16:20:01 2022 -0300

    18487: Adds vocabulary check to 'arvados-server config-check'
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima at curii.com>

diff --git a/lib/config/cmd.go b/lib/config/cmd.go
index eeab6ac8c..e532a7e32 100644
--- a/lib/config/cmd.go
+++ b/lib/config/cmd.go
@@ -6,6 +6,7 @@ package config
 
 import (
 	"bytes"
+	"errors"
 	"flag"
 	"fmt"
 	"io"
@@ -13,6 +14,7 @@ import (
 	"os/exec"
 
 	"git.arvados.org/arvados.git/lib/cmd"
+	"git.arvados.org/arvados.git/sdk/go/arvados"
 	"git.arvados.org/arvados.git/sdk/go/ctxlog"
 	"github.com/ghodss/yaml"
 	"github.com/sirupsen/logrus"
@@ -142,6 +144,36 @@ func (checkCommand) RunCommand(prog string, args []string, stdin io.Reader, stdo
 			return 1
 		}
 	}
+	for id, cc := range withDepr.Clusters {
+		if cc.API.VocabularyPath == "" {
+			continue
+		}
+		_, err = os.Stat(cc.API.VocabularyPath)
+		if err != nil {
+			if errors.Is(err, os.ErrNotExist) {
+				// If the vocabulary path doesn't exist, it might mean that
+				// the current node isn't the controller; so it's not an
+				// error.
+				continue
+			}
+			fmt.Fprintf(stderr, "Error checking vocabulary path %q for cluster %s: %s\n", cc.API.VocabularyPath, id, err)
+			return 1
+		}
+		vd, err := os.ReadFile(cc.API.VocabularyPath)
+		if err != nil {
+			fmt.Fprintf(stderr, "Error reading vocabulary file %q for cluster %s: %s\n", cc.API.VocabularyPath, id, err)
+			return 1
+		}
+		mk := make([]string, 0, len(cc.Collections.ManagedProperties))
+		for k := range cc.Collections.ManagedProperties {
+			mk = append(mk, k)
+		}
+		_, err = arvados.NewVocabulary(vd, mk)
+		if err != nil {
+			fmt.Fprintf(stderr, "Error loading vocabulary file %q for cluster %s: %s\n", cc.API.VocabularyPath, id, err)
+			return 1
+		}
+	}
 	return 0
 }
 

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list