[ARVADOS] created: 2.1.0-1499-g670026b66
Git user
git at public.arvados.org
Tue Oct 26 13:22:37 UTC 2021
at 670026b664b5e7de5346c863d07ee4b03b2fdaa3 (commit)
commit 670026b664b5e7de5346c863d07ee4b03b2fdaa3
Author: Lucas Di Pentima <lucas.dipentima at curii.com>
Date: Mon Oct 25 22:29:48 2021 -0300
17944: Loads vocabulary, checks its validity.
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima at curii.com>
diff --git a/lib/config/load.go b/lib/config/load.go
index 959bb7f5f..a5b626a06 100644
--- a/lib/config/load.go
+++ b/lib/config/load.go
@@ -27,6 +27,7 @@ var ErrNoClustersDefined = errors.New("config does not define any clusters")
type Loader struct {
Stdin io.Reader
Logger logrus.FieldLogger
+ LoadVocabulary bool // Load the vocabulary from API.VocabularyPath
SkipDeprecated bool // Don't load deprecated config keys
SkipLegacy bool // Don't load legacy config files
SkipAPICalls bool // Don't do checks that call RailsAPI/controller
@@ -269,6 +270,9 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
ldr.loadOldKeepBalanceConfig,
)
}
+ if ldr.LoadVocabulary {
+ loadFuncs = append(loadFuncs, ldr.loadVocabulary)
+ }
loadFuncs = append(loadFuncs, ldr.setImplicitStorageClasses)
for _, f := range loadFuncs {
err = f(&cfg)
@@ -389,9 +393,11 @@ func (ldr *Loader) checkStorageClasses(cc arvados.Cluster) error {
return nil
}
-// CheckVocabularyFile will be called only by interested components as the file
-// isn't expected to be present on every node.
-func (ldr *Loader) CheckVocabularyFile(cc arvados.Cluster) error {
+func (ldr *Loader) loadVocabulary(cfg *arvados.Config) error {
+ cc, err := cfg.GetCluster("")
+ if err != nil {
+ return err
+ }
if cc.API.VocabularyPath == "" {
return nil
}
@@ -399,11 +405,11 @@ func (ldr *Loader) CheckVocabularyFile(cc arvados.Cluster) error {
if err != nil {
return fmt.Errorf("couldn't read vocabulary file %q: %v", cc.API.VocabularyPath, err)
}
- var jsonData map[string]json.RawMessage
- err = json.Unmarshal(vf, &jsonData)
+ voc, err := arvados.NewVocabulary(vf)
if err != nil {
- return fmt.Errorf("invalid JSON data in vocabulary file %q", cc.API.VocabularyPath)
+ return fmt.Errorf("while loading vocabulary file %q: %s", cc.API.VocabularyPath, err)
}
+ cc.API.Vocabulary = voc
return nil
}
diff --git a/lib/service/cmd.go b/lib/service/cmd.go
index 9ab9371ab..71c4399f7 100644
--- a/lib/service/cmd.go
+++ b/lib/service/cmd.go
@@ -94,6 +94,10 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
// process _is_ the controller: we haven't started an
// http server yet.
loader.SkipAPICalls = true
+ // The vocabulary file is expected to be present only
+ // in the controller node, so it doesn't make sense to
+ // try loading it elsewhere.
+ loader.LoadVocabulary = true
}
cfg, err := loader.Load()
@@ -105,16 +109,6 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
return 1
}
- if strings.HasSuffix(prog, "controller") {
- // The vocabulary file is expected to be present only
- // in the controller node, so it doesn't make sense to
- // check it elsewhere.
- err = loader.CheckVocabularyFile(*cluster)
- if err != nil {
- return 1
- }
- }
-
// Now that we've read the config, replace the bootstrap
// logger with a new one according to the logging config.
log = ctxlog.New(stderr, cluster.SystemLogs.Format, cluster.SystemLogs.LogLevel)
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index 558aa68a6..50babd5f3 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -102,6 +102,7 @@ type Cluster struct {
WebsocketClientEventQueue int
WebsocketServerEventQueue int
KeepServiceRequestTimeout Duration
+ Vocabulary *Vocabulary `json:"-"`
VocabularyPath string
}
AuditLogs struct {
diff --git a/sdk/go/arvados/vocabulary.go b/sdk/go/arvados/vocabulary.go
new file mode 100644
index 000000000..585e5932a
--- /dev/null
+++ b/sdk/go/arvados/vocabulary.go
@@ -0,0 +1,89 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package arvados
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "reflect"
+)
+
+type Vocabulary struct {
+ StrictTags bool `json:"strict_tags"`
+ Tags map[string]VocabularyTag `json:"tags"`
+}
+
+type VocabularyTag struct {
+ Strict bool `json:"strict"`
+ Labels []VocabularyLabel `json:"labels"`
+ Values map[string]VocabularyTagValue `json:"values"`
+}
+
+type VocabularyLabel struct {
+ Label string `json:"label"`
+}
+
+type VocabularyTagValue struct {
+ Labels []VocabularyLabel `json:"labels"`
+}
+
+func NewVocabulary(data []byte) (voc *Vocabulary, err error) {
+ if r := bytes.Compare(data, []byte("")); r == 0 {
+ return &Vocabulary{}, nil
+ }
+ err = json.Unmarshal(data, &voc)
+ if err != nil {
+ return nil, fmt.Errorf("invalid JSON format error: %q", err)
+ }
+ if reflect.DeepEqual(voc, &Vocabulary{}) {
+ return nil, fmt.Errorf("JSON data provided doesn't match Vocabulary format: %q", data)
+ }
+ err = voc.Validate()
+ if err != nil {
+ return nil, err
+ }
+ return voc, nil
+}
+
+func (v *Vocabulary) Validate() error {
+ tagKeys := map[string]bool{}
+ // Checks for Vocabulary strictness
+ if v.StrictTags && len(v.Tags) == 0 {
+ return fmt.Errorf("vocabulary is strict but no tags are defined")
+ }
+ // Checks for duplicate tag keys
+ for key := range v.Tags {
+ if tagKeys[key] {
+ return fmt.Errorf("duplicate tag key %q", key)
+ }
+ tagKeys[key] = true
+ for _, lbl := range v.Tags[key].Labels {
+ if tagKeys[lbl.Label] {
+ return fmt.Errorf("tag label %q for key %q already seen as a tag key or label", lbl.Label, key)
+ }
+ tagKeys[lbl.Label] = true
+ }
+ // Checks for value strictness
+ if v.Tags[key].Strict && len(v.Tags[key].Values) == 0 {
+ return fmt.Errorf("tag key %q is configured as strict but doesn't provide values", key)
+ }
+ // Checks for value duplication within a key
+ tagValues := map[string]bool{}
+ for val := range v.Tags[key].Values {
+ if tagValues[val] {
+ return fmt.Errorf("duplicate tag value %q for tag %q", val, key)
+ }
+ tagValues[val] = true
+ for _, tagLbl := range v.Tags[key].Values[val].Labels {
+ if tagValues[tagLbl.Label] {
+ return fmt.Errorf("tag value label %q for value %q[%q] already seen as a value key or label", tagLbl.Label, key, val)
+ }
+ tagValues[tagLbl.Label] = true
+ }
+ }
+ }
+ return nil
+}
diff --git a/sdk/go/arvados/vocabulary_test.go b/sdk/go/arvados/vocabulary_test.go
new file mode 100644
index 000000000..45ef3dbd1
--- /dev/null
+++ b/sdk/go/arvados/vocabulary_test.go
@@ -0,0 +1,146 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package arvados
+
+import (
+ check "gopkg.in/check.v1"
+)
+
+type VocabularySuite struct{}
+
+var _ = check.Suite(&VocabularySuite{})
+
+func (s *VocabularySuite) TestNewVocabulary(c *check.C) {
+ tests := []struct {
+ name string
+ data string
+ isValid bool
+ errMatches string
+ expect *Vocabulary
+ }{
+ {"Empty data", "", true, "", &Vocabulary{}},
+ {"Invalid JSON", "foo", false, "invalid JSON format.*", nil},
+ {"Valid, empty JSON", "{}", false, ".*doesn't match Vocabulary format.*", nil},
+ {"Valid JSON, wrong data", `{"foo":"bar"}`, false, ".*doesn't match Vocabulary format.*", nil},
+ {
+ "Simple valid example",
+ `{"tags":{
+ "IDTAGANIMALS":{
+ "strict": false,
+ "labels": [{"label": "Animal"}, {"label": "Creature"}],
+ "values": {
+ "IDVALANIMAL1":{"labels":[{"label":"Human"}, {"label":"Homo sapiens"}]},
+ "IDVALANIMAL2":{"labels":[{"label":"Elephant"}, {"label":"Loxodonta"}]}
+ }
+ }
+ }}`,
+ true, "",
+ &Vocabulary{
+ StrictTags: false,
+ Tags: map[string]VocabularyTag{
+ "IDTAGANIMALS": {
+ Strict: false,
+ Labels: []VocabularyLabel{{Label: "Animal"}, {Label: "Creature"}},
+ Values: map[string]VocabularyTagValue{
+ "IDVALANIMAL1": {
+ Labels: []VocabularyLabel{{Label: "Human"}, {Label: "Homo sapiens"}},
+ },
+ "IDVALANIMAL2": {
+ Labels: []VocabularyLabel{{Label: "Elephant"}, {Label: "Loxodonta"}},
+ },
+ },
+ },
+ },
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ c.Log(c.TestName()+" ", tt.name)
+ voc, err := NewVocabulary([]byte(tt.data))
+ if tt.isValid {
+ c.Assert(err, check.IsNil)
+ } else {
+ c.Assert(err, check.NotNil)
+ if tt.errMatches != "" {
+ c.Assert(err, check.ErrorMatches, tt.errMatches)
+ }
+ }
+ c.Assert(voc, check.DeepEquals, tt.expect)
+ }
+}
+
+func (s *VocabularySuite) TestValidationErrors(c *check.C) {
+ tests := []struct {
+ name string
+ voc *Vocabulary
+ errMatches string
+ }{
+ {
+ "Strict vocabulary, no keys",
+ &Vocabulary{
+ StrictTags: true,
+ },
+ "vocabulary is strict but no tags are defined",
+ },
+ {
+ "Duplicated tag keys",
+ &Vocabulary{
+ StrictTags: false,
+ Tags: map[string]VocabularyTag{
+ "IDTAGANIMALS": {
+ Strict: false,
+ Labels: []VocabularyLabel{{Label: "Animal"}, {Label: "Creature"}},
+ },
+ "IDTAGCOMMENT": {
+ Strict: false,
+ Labels: []VocabularyLabel{{Label: "Comment"}, {Label: "Animal"}},
+ },
+ },
+ },
+ "tag label.*for key.*already seen.*",
+ },
+ {
+ "Duplicated tag values",
+ &Vocabulary{
+ StrictTags: false,
+ Tags: map[string]VocabularyTag{
+ "IDTAGANIMALS": {
+ Strict: false,
+ Labels: []VocabularyLabel{{Label: "Animal"}, {Label: "Creature"}},
+ Values: map[string]VocabularyTagValue{
+ "IDVALANIMAL1": {
+ Labels: []VocabularyLabel{{Label: "Human"}, {Label: "Mammal"}},
+ },
+ "IDVALANIMAL2": {
+ Labels: []VocabularyLabel{{Label: "Elephant"}, {Label: "Mammal"}},
+ },
+ },
+ },
+ },
+ },
+ "tag value label.*for value.*already seen.*",
+ },
+ {
+ "Strict key, no values",
+ &Vocabulary{
+ StrictTags: false,
+ Tags: map[string]VocabularyTag{
+ "IDTAGANIMALS": {
+ Strict: true,
+ Labels: []VocabularyLabel{{Label: "Animal"}, {Label: "Creature"}},
+ },
+ },
+ },
+ "tag key.*is configured as strict but doesn't provide values",
+ },
+ }
+ for _, tt := range tests {
+ c.Log(c.TestName()+" ", tt.name)
+ err := tt.voc.Validate()
+ c.Assert(err, check.NotNil)
+ c.Assert(err, check.ErrorMatches, tt.errMatches)
+ }
+}
commit feb047cb5f86675d62e98132ef9676dc05407f14
Author: Lucas Di Pentima <lucas.dipentima at curii.com>
Date: Mon Oct 25 13:05:31 2021 -0300
17944: Initial vocabulary validation on controller service.
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima at curii.com>
diff --git a/lib/config/load.go b/lib/config/load.go
index 248960beb..959bb7f5f 100644
--- a/lib/config/load.go
+++ b/lib/config/load.go
@@ -389,6 +389,24 @@ func (ldr *Loader) checkStorageClasses(cc arvados.Cluster) error {
return nil
}
+// CheckVocabularyFile will be called only by interested components as the file
+// isn't expected to be present on every node.
+func (ldr *Loader) CheckVocabularyFile(cc arvados.Cluster) error {
+ if cc.API.VocabularyPath == "" {
+ return nil
+ }
+ vf, err := os.ReadFile(cc.API.VocabularyPath)
+ if err != nil {
+ return fmt.Errorf("couldn't read vocabulary file %q: %v", cc.API.VocabularyPath, err)
+ }
+ var jsonData map[string]json.RawMessage
+ err = json.Unmarshal(vf, &jsonData)
+ if err != nil {
+ return fmt.Errorf("invalid JSON data in vocabulary file %q", cc.API.VocabularyPath)
+ }
+ return nil
+}
+
func checkKeyConflict(label string, m map[string]string) error {
saw := map[string]bool{}
for k := range m {
diff --git a/lib/controller/federation.go b/lib/controller/federation.go
index 144d41c21..cd69727ec 100644
--- a/lib/controller/federation.go
+++ b/lib/controller/federation.go
@@ -121,8 +121,6 @@ func (h *Handler) setupProxyRemoteCluster(next http.Handler) http.Handler {
mux.ServeHTTP(w, req)
})
-
- return mux
}
type CurrentUser struct {
diff --git a/lib/service/cmd.go b/lib/service/cmd.go
index e67c24f65..9ab9371ab 100644
--- a/lib/service/cmd.go
+++ b/lib/service/cmd.go
@@ -105,6 +105,16 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
return 1
}
+ if strings.HasSuffix(prog, "controller") {
+ // The vocabulary file is expected to be present only
+ // in the controller node, so it doesn't make sense to
+ // check it elsewhere.
+ err = loader.CheckVocabularyFile(*cluster)
+ if err != nil {
+ return 1
+ }
+ }
+
// Now that we've read the config, replace the bootstrap
// logger with a new one according to the logging config.
log = ctxlog.New(stderr, cluster.SystemLogs.Format, cluster.SystemLogs.LogLevel)
commit 08992b7712ae83d235ffbad00897265d3ea17b5a
Author: Lucas Di Pentima <lucas.dipentima at curii.com>
Date: Thu Oct 21 15:16:14 2021 -0300
17944: Updates config knobs and documentation.
Workbench.VocabularyURL will be auto-exported providing the new vocabulary
endpoint, for backwards compatibility.
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima at curii.com>
diff --git a/doc/_config.yml b/doc/_config.yml
index 6f1c90d9b..b7760e3b3 100644
--- a/doc/_config.yml
+++ b/doc/_config.yml
@@ -194,7 +194,7 @@ navbar:
- admin/keep-balance.html.textile.liquid
- admin/controlling-container-reuse.html.textile.liquid
- admin/logs-table-management.html.textile.liquid
- - admin/workbench2-vocabulary.html.textile.liquid
+ - admin/metadata-vocabulary.html.textile.liquid
- admin/storage-classes.html.textile.liquid
- admin/keep-recovering-data.html.textile.liquid
- admin/keep-measuring-deduplication.html.textile.liquid
diff --git a/doc/_includes/_wb2_vocabulary_example.liquid b/doc/_includes/_metadata_vocabulary_example.liquid
similarity index 90%
rename from doc/_includes/_wb2_vocabulary_example.liquid
rename to doc/_includes/_metadata_vocabulary_example.liquid
index ee2ac97ef..016b48c6a 100644
--- a/doc/_includes/_wb2_vocabulary_example.liquid
+++ b/doc/_includes/_metadata_vocabulary_example.liquid
@@ -1,3 +1,9 @@
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
{
"strict_tags": false,
"tags": {
diff --git a/doc/admin/workbench2-vocabulary.html.textile.liquid b/doc/admin/metadata-vocabulary.html.textile.liquid
similarity index 79%
rename from doc/admin/workbench2-vocabulary.html.textile.liquid
rename to doc/admin/metadata-vocabulary.html.textile.liquid
index 9a8d7fcd0..e615fa3c8 100644
--- a/doc/admin/workbench2-vocabulary.html.textile.liquid
+++ b/doc/admin/metadata-vocabulary.html.textile.liquid
@@ -1,7 +1,7 @@
---
layout: default
navsection: admin
-title: User properties vocabulary
+title: Metadata vocabulary
...
{% comment %}
@@ -12,17 +12,19 @@ SPDX-License-Identifier: CC-BY-SA-3.0
Many Arvados objects (like collections and projects) can store metadata as properties that in turn can be used in searches allowing a flexible way of organizing data inside the system.
-The Workbench2 user interface enables the site adminitrator to set up a properties vocabulary formal definition so that users can select from predefined key/value pairs of properties, offering the possibility to add different terms for the same concept.
+Arvados enables the site administrator to set up a formal metadata vocabulary definition so that users can select from predefined key/value pairs of properties, offering the possibility to add different terms for the same concept on clients' UI such as workbench2.
-h2. Workbench2 configuration
+The Controller service loads and caches the configured vocabulary file in memory at startup time, exporting it on a particular endpoint. From time to time, it'll check for updates in the local copy and refresh its cache if validation passes.
-Workbench2 retrieves the vocabulary file URL from the cluster config as shown:
+h2. Configuration
+
+The site administrator should place the JSON vocabulary file on the same host as the controller service and set up the config file as follows:
<notextile>
<pre><code>Cluster:
zzzzz:
- Workbench:
- VocabularyURL: <span class="userinput">https://site.example.com/vocabulary.json</span>
+ API:
+ VocabularyPath: <span class="userinput">/etc/arvados/vocabulary.json</span>
</code></pre>
</notextile>
@@ -35,10 +37,10 @@ Keys and values are indexed by identifiers so that the concept of a term is pres
The following is an example of a vocabulary definition:
{% codeblock as json %}
-{% include 'wb2_vocabulary_example' %}
+{% include 'metadata_vocabulary_example' %}
{% endcodeblock %}
-If the @strict_tags@ flag at the root level is @true@, it will restrict the users from saving property keys other than the ones defined in the vocabulary. Take notice that this restriction is at the client level on Workbench2, it doesn't limit the user's ability to set any arbitrary property via other means (e.g. Python SDK or CLI commands)
+If the @strict_tags@ flag at the root level is @true@, it will restrict the users from saving property keys other than the ones defined in the vocabulary. This restriction is enforced at the backend level to ensure consistency across different clients.
Inside the @tags@ member, IDs are defined (@IDTAGANIMALS@, @IDTAGCOMMENT@, @IDTAGIMPORTANCES@) and can have any format that the current application requires. Every key will declare at least a @labels@ list with zero or more label objects.
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index 4e2a0e26d..da08a2596 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -234,6 +234,12 @@ Clusters:
# Timeout on requests to internal Keep services.
KeepServiceRequestTimeout: 15s
+ # Vocabulary file path, local to the node running the controller.
+ # This JSON file should contain the description of what's allowed
+ # as object's metadata. Its format is described at:
+ # https://doc.arvados.org/admin/metadata-vocabulary.html
+ VocabularyPath: ""
+
Users:
# Config parameters to automatically setup new users. If enabled,
# this users will be able to self-activate. Enable this if you want
@@ -1521,7 +1527,6 @@ Clusters:
DefaultOpenIdPrefix: "https://www.google.com/accounts/o8/id"
# Workbench2 configs
- VocabularyURL: ""
FileViewersConfigURL: ""
# Idle time after which the user's session will be auto closed.
diff --git a/lib/config/export.go b/lib/config/export.go
index 92e2d7b4d..f2c15b0ee 100644
--- a/lib/config/export.go
+++ b/lib/config/export.go
@@ -72,6 +72,7 @@ var whitelist = map[string]bool{
"API.MaxTokenLifetime": false,
"API.RequestTimeout": true,
"API.SendTimeout": true,
+ "API.VocabularyPath": false,
"API.WebsocketClientEventQueue": false,
"API.WebsocketServerEventQueue": false,
"AuditLogs": false,
@@ -274,7 +275,6 @@ var whitelist = map[string]bool{
"Workbench.UserProfileFormFields.*.*": true,
"Workbench.UserProfileFormFields.*.*.*": true,
"Workbench.UserProfileFormMessage": true,
- "Workbench.VocabularyURL": true,
"Workbench.WelcomePageHTML": true,
}
diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go
index 875939a3e..02f2e88c5 100644
--- a/lib/config/generated_config.go
+++ b/lib/config/generated_config.go
@@ -240,6 +240,12 @@ Clusters:
# Timeout on requests to internal Keep services.
KeepServiceRequestTimeout: 15s
+ # Vocabulary file path, local to the node running the controller.
+ # This JSON file should contain the description of what's allowed
+ # as object's metadata. Its format is described at:
+ # https://doc.arvados.org/admin/metadata-vocabulary.html
+ VocabularyPath: ""
+
Users:
# Config parameters to automatically setup new users. If enabled,
# this users will be able to self-activate. Enable this if you want
@@ -1527,7 +1533,6 @@ Clusters:
DefaultOpenIdPrefix: "https://www.google.com/accounts/o8/id"
# Workbench2 configs
- VocabularyURL: ""
FileViewersConfigURL: ""
# Idle time after which the user's session will be auto closed.
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index f1d27b8dc..558aa68a6 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -102,6 +102,7 @@ type Cluster struct {
WebsocketClientEventQueue int
WebsocketServerEventQueue int
KeepServiceRequestTimeout Duration
+ VocabularyPath string
}
AuditLogs struct {
MaxAge Duration
@@ -273,7 +274,6 @@ type Cluster struct {
Options map[string]struct{}
}
UserProfileFormMessage string
- VocabularyURL string
WelcomePageHTML string
InactivePageHTML string
SSHHelpPageHTML string
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list