[ARVADOS] updated: 1.3.0-350-g4659594d4
Git user
git at public.curoverse.com
Thu Feb 21 14:40:50 EST 2019
Summary of changes:
.gitignore | 3 +-
build/build-dev-docker-jobs-image.sh | 8 +-
build/run-build-packages.sh | 5 +-
build/run-library.sh | 5 +
build/run-tests.sh | 5 +-
...loud.service => arvados-dispatch-cloud.service} | 2 +-
doc/admin/upgrading.html.textile.liquid | 8 +
docker/jobs/Dockerfile | 3 +
lib/cloud/{ => azure}/azure.go | 228 ++++----
lib/cloud/{ => azure}/azure_test.go | 123 +++--
lib/cloud/gocheck_test.go | 16 -
lib/cloud/interfaces.go | 13 +-
lib/controller/federation_test.go | 24 +-
lib/controller/handler.go | 2 -
lib/dispatchcloud/container/queue.go | 85 ++-
lib/dispatchcloud/container/queue_test.go | 131 +++++
lib/dispatchcloud/dispatcher.go | 59 ++-
lib/dispatchcloud/dispatcher_test.go | 41 +-
lib/dispatchcloud/driver.go | 3 +-
lib/dispatchcloud/instance_set_proxy.go | 25 -
lib/dispatchcloud/readme_states.txt | 31 ++
lib/dispatchcloud/scheduler/fix_stale_locks.go | 47 +-
lib/dispatchcloud/scheduler/interfaces.go | 7 +-
lib/dispatchcloud/scheduler/run_queue.go | 13 +-
lib/dispatchcloud/scheduler/run_queue_test.go | 62 ++-
lib/dispatchcloud/ssh_executor/executor.go | 34 +-
lib/dispatchcloud/ssh_executor/executor_test.go | 86 ++-
lib/dispatchcloud/test/lame_instance_set.go | 118 -----
lib/dispatchcloud/test/logger.go | 19 +
lib/dispatchcloud/test/ssh_service.go | 27 +-
lib/dispatchcloud/test/stub_driver.go | 75 ++-
lib/dispatchcloud/worker/pool.go | 183 +++++--
lib/dispatchcloud/worker/pool_test.go | 193 ++++++-
lib/dispatchcloud/worker/throttle.go | 68 +++
lib/dispatchcloud/worker/throttle_test.go | 32 ++
lib/dispatchcloud/worker/worker.go | 288 +++++++---
lib/dispatchcloud/worker/worker_test.go | 239 +++++++++
sdk/cli/arvados-cli.gemspec | 2 +-
sdk/cli/bin/crunch-job | 1 +
sdk/cwl/arvados_cwl/__init__.py | 37 +-
sdk/cwl/arvados_cwl/arvcontainer.py | 13 +-
sdk/cwl/arvados_cwl/arvjob.py | 8 +-
sdk/cwl/arvados_cwl/arvtool.py | 2 +-
sdk/cwl/arvados_cwl/arvworkflow.py | 5 +-
sdk/cwl/arvados_cwl/crunch_script.py | 5 +-
sdk/cwl/arvados_cwl/done.py | 8 +-
sdk/cwl/arvados_cwl/executor.py | 30 +-
sdk/cwl/arvados_cwl/fsaccess.py | 30 +-
sdk/cwl/arvados_cwl/http.py | 24 +-
sdk/cwl/arvados_cwl/pathmapper.py | 20 +-
sdk/cwl/arvados_cwl/perf.py | 2 +
sdk/cwl/arvados_cwl/runner.py | 18 +-
sdk/cwl/arvados_cwl/task_queue.py | 15 +-
sdk/cwl/arvados_cwl/util.py | 7 +-
sdk/cwl/fpm-info.sh | 2 +
sdk/cwl/gittaggers.py | 3 +
sdk/cwl/setup.py | 6 +-
sdk/cwl/test_with_arvbox.sh | 44 +-
sdk/cwl/tests/12213-keepref-expr.cwl | 3 +-
sdk/cwl/tests/arvados-tests.yml | 4 +-
sdk/cwl/tests/federation/framework/check_exist.py | 1 +
sdk/cwl/tests/federation/framework/prepare.py | 1 +
sdk/cwl/tests/hw.py | 3 +-
sdk/cwl/tests/matcher.py | 2 +
sdk/cwl/tests/test_container.py | 3 +
sdk/cwl/tests/test_http.py | 11 +-
sdk/cwl/tests/test_job.py | 36 +-
sdk/cwl/tests/test_make_output.py | 7 +-
sdk/cwl/tests/test_submit.py | 581 ++++++++-------------
sdk/cwl/tests/test_urljoin.py | 22 +-
sdk/cwl/tests/test_util.py | 15 +-
sdk/cwl/tests/wf/check_mem.py | 5 +-
sdk/dev-jobs.dockerfile | 20 +-
sdk/go/arvados/client.go | 9 +-
sdk/go/arvados/config.go | 11 +-
sdk/go/arvados/container.go | 31 +-
sdk/go/arvados/duration.go | 10 +-
sdk/go/arvadostest/fixtures.go | 4 +
sdk/python/arvados/commands/keepdocker.py | 3 +-
sdk/python/setup.py | 4 +
.../api/app/models/api_client_authorization.rb | 6 +
services/api/app/models/collection.rb | 1 +
services/api/app/models/container.rb | 15 +-
services/api/config/application.default.yml | 4 +
.../20190214214814_add_container_lock_count.rb | 5 +
services/api/db/structure.sql | 5 +-
.../arvados/v1/collections_controller_test.rb | 25 +-
services/api/test/unit/container_test.rb | 46 ++
services/keepstore/azure_blob_volume.go | 3 +-
services/keepstore/config.go | 6 +-
services/keepstore/metrics.go | 191 +------
services/keepstore/s3_volume.go | 3 +-
services/keepstore/s3_volume_test.go | 14 +-
services/keepstore/stats_ticker.go | 27 -
services/keepstore/volume.go | 3 +-
services/keepstore/volume_test.go | 4 +-
services/keepstore/volume_unix.go | 108 ++--
services/keepstore/volume_unix_test.go | 5 +-
services/login-sync/arvados-login-sync.gemspec | 2 +-
vendor/vendor.json | 12 +-
100 files changed, 2426 insertions(+), 1438 deletions(-)
rename cmd/arvados-server/{crunch-dispatch-cloud.service => arvados-dispatch-cloud.service} (94%)
rename lib/cloud/{ => azure}/azure.go (73%)
rename lib/cloud/{ => azure}/azure_test.go (68%)
delete mode 100644 lib/cloud/gocheck_test.go
create mode 100644 lib/dispatchcloud/container/queue_test.go
delete mode 100644 lib/dispatchcloud/instance_set_proxy.go
create mode 100644 lib/dispatchcloud/readme_states.txt
delete mode 100644 lib/dispatchcloud/test/lame_instance_set.go
create mode 100644 lib/dispatchcloud/test/logger.go
create mode 100644 lib/dispatchcloud/worker/throttle.go
create mode 100644 lib/dispatchcloud/worker/throttle_test.go
create mode 100644 lib/dispatchcloud/worker/worker_test.go
create mode 100644 services/api/db/migrate/20190214214814_add_container_lock_count.rb
via 4659594d40987a2bab1ff75d015de5d46113692d (commit)
via 43ad590772de48fbc3a6a45654445bab79a0bdc1 (commit)
via 9acc7690b9c734b7e31476ce66ef668bf7eb4bbf (commit)
via c1e1a754670eadb154fba29817dcd9b7360a4e18 (commit)
via 8be73cb77f5f32d99f8448a66601e802714225da (commit)
via a4efc2178e0e59c4b32451365691e9100ff88186 (commit)
via 5f8001297500ccd0a2153ede4a28ba14daad0d42 (commit)
via 0f14b3456d2d3bdf95b78b65a1a41280a7416928 (commit)
via a4fa040579473b598a532ee38173b1a28c6b1694 (commit)
via 1bd522fddbb5a95a11601d0a1920f3c2d8733b70 (commit)
via c9127a21551ef7689637f1782ea40d0f75822af2 (commit)
via 43e8b1419491290d4df65b2ea9121910b7c766cd (commit)
via 9fa65d7621853b24b7fb13acd44fedb28543b261 (commit)
via b7f7154a7232e49b0e8ee5bc41d124ff2d15273c (commit)
via 8be52d226caaefc971492d6639d63761c1077c1f (commit)
via fba0cd4c6cd37f6034ddc862c945464c1aa58dd6 (commit)
via d3e1ff27ec14bcb8a7037cc72f54a6816efe5916 (commit)
via ff241c645b4f9c817739141016d6bb38ae46dc97 (commit)
via fb6840e9dc571044026e8d3d78079aed3bace897 (commit)
via 503aeeb11f4aec1a80c51d7a659e083d6a0dcf3c (commit)
via a4396e183ec11a7241cf5089c5ccfbca1ad8627f (commit)
via b56e2857bfa4f7d2094546ffa3407cede877fed1 (commit)
via 4f2aab840085126c11308553860888ce231c2a15 (commit)
via 0944ea36035a3b01ba5cadf6453a1424627c8ef7 (commit)
via 5f5b8373ded1bd2da81b52c06a47b9028ed30541 (commit)
via 5a2ee4443769b236001117a502bfef94bad54d16 (commit)
via 11ba2964a6cc44bd5c02fbb511bf413e52d03774 (commit)
via 60caf7233f1061f4cd3a9cd2be75caa64d0bec9e (commit)
via 817c574dbdad73298b46ba30afdc75b091e20f2b (commit)
via 7a9e57f98e2d5bad3dec7d17cfb9c86e5c5c927c (commit)
via a394a4df81fd51691df7742f0c71ba91afbacefc (commit)
via 15a809ab5d5cd892b7f3097f82ba271890721cd1 (commit)
via 74f35aa24778f06e05fd24c0ca7eea26a42ccaef (commit)
via 171b9ac5962c21dead1d04f4c53460a81b9b9993 (commit)
via 3fc84235dd2ce76eaf7ec140ac1754b5370305bb (commit)
via d38793d3af2b0563196607a4e52bf07737ec55df (commit)
via cdd31206b8c228ffcdc6f0ab7f0017d537836678 (commit)
via f7678076065723352c46600ce9f5780beca1cdaf (commit)
via 800139c8dee7d9a563a8a2dca9e45e283c55c22c (commit)
via 2c3a6a67bc01241f57e815f4f7e4678bd6eadb03 (commit)
via 60f3033a6ad0c4d155344b1d4922f3b2e1cb29bc (commit)
via 9e7c33b6653507da7977c638a0bc158d8dfeefaa (commit)
via 09939997947db0390e62e4b450fef43896d318aa (commit)
via bdbf7c4ee346fd1615f882255b3606d8d5360710 (commit)
via 41beca9c117fa9a7f46addd3a9e8a46cc7ffe328 (commit)
via e17f20cd7d10d81b0dcb5522a3e93305b318da8f (commit)
via d3cd31711ab889e9c2c4732a977515a5de3236eb (commit)
via ecd42339b5bca26552ad2525a7a9ceba621dd967 (commit)
via 612c183a11c615d453d1ce6aa51f32652be0ae02 (commit)
via e2293bfadfd307d7dd0cac7b70e9b5fa5258defd (commit)
via 9b712f30fe6784a5c8e747a8cf229c54cc02a509 (commit)
via 38b5a8e1debf8ee7a48bfe896a44089cb7f27453 (commit)
via d1993ffdca3bc9036b1915bae6360dee145ee9f2 (commit)
via caeb94881de75f2f8ee0156951c7f54006499e15 (commit)
via ee53a267ded17bc50eaf4dfebba5ff4a3273753c (commit)
via 71fd4da18b22100682ae7e2079aadfd66360d310 (commit)
via a641be48fcab06e2efd701fddb1b276c159f26cc (commit)
via affd909987cedc1d2418ff4d9cb031f01ffe4deb (commit)
via 138ea2112594804cbb2a775c13186208d3685dfd (commit)
via fced3381bd3e4cfba379a50107f5dc27dda93aea (commit)
via 1e8da3c17b221eba0288ec3b6fd5769df2598531 (commit)
via dd98e9fb19b413e649a2656513714e1bda81bfa0 (commit)
via 77d847ff11736567d67c91cb89aea9c9f3c0c83e (commit)
via 7e1c8ea79a19d5f756f8549474547da52137ce6d (commit)
via 7d4da75d3980d465053c44b4d5b16afe166912a6 (commit)
via 3511212cb81453e6c4c1cc14eca0320635861333 (commit)
via a27b2bf3e33a80213a42dcf1e01144209eb2603a (commit)
via 5fbcb53ad80972cbe0d8c46ff821ccd32c56c11f (commit)
via b105602902e38f18a48505e2091ffea77b2c7c89 (commit)
via c1aa581b3511b89527f185fd3fac7447aa33c9fc (commit)
via 8808efaa1c87688b6b89e60c0337b6f0589df779 (commit)
via 804ed25e843c38c4a5bf381f70dbaa0a61072a86 (commit)
via 9668d19ed127b01f986ae7defc657c0fd23e604b (commit)
via c88ffa1a163c929ffa963af3eb1bcdbca1f6b6f2 (commit)
via be8ed479042df4fdefe1fd18c1e2e984e1c99bc0 (commit)
via db09e5f787a1b2d0e02abb627bf25054a047f7db (commit)
via 630601173bda46a7c02b5fbf43eaf5422a95b7d7 (commit)
via 19b7075b60ef4252c85dca4a0a0f8b0d9e67498a (commit)
via de9d45e3a238df8e9f0b2833b86c5e54fec37c7a (commit)
via fa5e2a328b26c71194bf676d850b3971542123fc (commit)
via 88cc6dd9d342aa997bafb5201dc2b9a7609805d5 (commit)
via 262d59632f85b34ef4e2bcb1ee323a6e3b4435ed (commit)
via 332f82b60422b3c445b3fd2d69ddd45915d23ff8 (commit)
via 38cf2cccc4c3ec520d3d7ab85dbe27f427f6d394 (commit)
via 3300a5ebde5463808e098e489d47b756d8613774 (commit)
via 61381f4be1dbba9c56df342093ee614f0d5a28df (commit)
via 8052381fb4e7aceb52497e8378b596178cf5af7c (commit)
via 8c593d84f88aa5cd87de0acedffdf867deca51f7 (commit)
via 003fe66cd741d4ac7e841da56eda30d7ea88f392 (commit)
via f696f142eb5dcc2b5daac56ea38f457c4106a8a7 (commit)
via 57b450aee36a60b2ad0be0a073250e2badbbda8d (commit)
via 9d6a7e96d03d1468a992f1b4038de970d40f3c78 (commit)
via 45bc60373abee3185ccd9ad757e6e2a1d98fd5d2 (commit)
via 3eab0b129b4756a665b4ff0143ef6335295ed1cb (commit)
via 06b0ca6bdf1cb278f361d6eebcf9fe965c4f350f (commit)
via 3e0836a454bde7c61192700b5ba96a319e5fafe5 (commit)
via db376a8ddd3a803f4c9c85baf41cb57c6106452f (commit)
via 1d80809a16fc97d7351824d2c921578133a93f65 (commit)
via be57c52f8ea23852d9d00ab1981b41354f59f27d (commit)
via 8855c1dea7f258f61aa8815efdb582d0f096ce3c (commit)
via a8db9566d6375d92b606b9ca59dfd92f22c41866 (commit)
via 7360ecf9c5bd593834c45b1ad60bf0b6ac30d05e (commit)
via dd517b036d14a4e5d78c1fbde97f1760bf848004 (commit)
via df9d7f8d64a2a51dea5552641f44afd3e43c636b (commit)
via 2a018b15662ae5f0b30d1d11eb2d0ffa685964e0 (commit)
via efaa80ea0f18dfeaf344a5250fa37090451f0699 (commit)
via a7da936b6106bff17a7830124b1fd87ff01ad91e (commit)
via 5e66fb80a5053b1c51b58191f042dbc40839ba9d (commit)
via 550242c2d5fb0c26b18350573422d9199fcd1d93 (commit)
via f27fcfafc8f913780e59f830587aed447cb9a5ab (commit)
via f13d808554e0f64713c3d6636fbddf5e927e9e1a (commit)
via 07782f1743267638c996cb1d9bb3a85f24c8243e (commit)
via e17fe40c08651f39bc42468b1b2b56841bbaf223 (commit)
via 7feee6a3bbacc88a62faa35cf94cb0d9d1b04994 (commit)
via 78759074e0218ca9822e73a8ab4639d813658ef7 (commit)
via a2a531ec3c8fb543382c9520073954c886ee5434 (commit)
via bc1f8d47b8233b00a449edb38f4c4bde8f5d9163 (commit)
via aa95ea325564f844472c1278615ff083a33cb151 (commit)
via 471f143424d8d471a83469df1c34300e3692f70c (commit)
via 84956714286f5d9889d587c50d5bbe501baacc51 (commit)
via d52bf847478dbb92d068e27bcea36fdeee34a9a7 (commit)
via 03f5792d109cca57f9a6dd585461e3622a9c74c5 (commit)
via d6cccb3ea4e5f076a436d9935e3835d4b620b859 (commit)
via 523dbe6efe87983ad3f1ac93a5dedfa104302650 (commit)
via 91bc40ccde4d224f32b8863eec2363c717093f8d (commit)
from f50aff88ccf1ce6e590a3fe98689eabef4ad292a (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 4659594d40987a2bab1ff75d015de5d46113692d
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date: Thu Feb 21 16:38:53 2019 -0300
13937: Simplified volume specific metric handling (WIP)
Pending: implement them on s3 & azure
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>
diff --git a/services/keepstore/azure_blob_volume.go b/services/keepstore/azure_blob_volume.go
index ab199d991..ce7063aa7 100644
--- a/services/keepstore/azure_blob_volume.go
+++ b/services/keepstore/azure_blob_volume.go
@@ -23,6 +23,7 @@ import (
"git.curoverse.com/arvados.git/sdk/go/arvados"
"github.com/Azure/azure-sdk-for-go/storage"
+ "github.com/prometheus/client_golang/prometheus"
)
const azureDefaultRequestTimeout = arvados.Duration(10 * time.Minute)
@@ -147,7 +148,7 @@ func (v *AzureBlobVolume) Type() string {
}
// Start implements Volume.
-func (v *AzureBlobVolume) Start(m *volumeMetrics) error {
+func (v *AzureBlobVolume) Start(opsCounters, errCounters, ioBytes *prometheus.CounterVec) error {
if v.ContainerName == "" {
return errors.New("no container name given")
}
diff --git a/services/keepstore/config.go b/services/keepstore/config.go
index 43b309916..0902b99eb 100644
--- a/services/keepstore/config.go
+++ b/services/keepstore/config.go
@@ -146,11 +146,7 @@ func (cfg *Config) Start(reg *prometheus.Registry) error {
}
vm := newVolumeMetricsVecs(reg)
for _, v := range cfg.Volumes {
- metrics := vm.curryWith(
- v.String(),
- v.Status().MountPoint,
- fmt.Sprintf("%d", v.Status().DeviceNum))
- if err := v.Start(metrics); err != nil {
+ if err := v.Start(vm.opsCounters, vm.errCounters, vm.ioBytes); err != nil {
return fmt.Errorf("volume %s: %s", v, err)
}
log.Printf("Using volume %v (writable=%v)", v, v.Writable())
diff --git a/services/keepstore/metrics.go b/services/keepstore/metrics.go
index 80194eb82..4a154bd7b 100644
--- a/services/keepstore/metrics.go
+++ b/services/keepstore/metrics.go
@@ -88,196 +88,43 @@ func (m *nodeMetrics) setupRequestMetrics(rc httpserver.RequestCounter) {
}
type volumeMetricsVecs struct {
- reg *prometheus.Registry
- BytesFree *prometheus.GaugeVec
- BytesUsed *prometheus.GaugeVec
- Errors *prometheus.CounterVec
- Ops *prometheus.CounterVec
- CompareOps *prometheus.CounterVec
- GetOps *prometheus.CounterVec
- PutOps *prometheus.CounterVec
- TouchOps *prometheus.CounterVec
- InBytes *prometheus.CounterVec
- OutBytes *prometheus.CounterVec
- ErrorCodes *prometheus.CounterVec
-}
-
-type volumeMetrics struct {
- reg *prometheus.Registry
- lbls []string
- internalCounters map[string]*prometheus.CounterVec
- BytesFree prometheus.Gauge
- BytesUsed prometheus.Gauge
- Errors prometheus.Counter
- Ops prometheus.Counter
- CompareOps prometheus.Counter
- GetOps prometheus.Counter
- PutOps prometheus.Counter
- TouchOps prometheus.Counter
- InBytes prometheus.Counter
- OutBytes prometheus.Counter
- ErrorCodes *prometheus.CounterVec
+ ioBytes *prometheus.CounterVec
+ errCounters *prometheus.CounterVec
+ opsCounters *prometheus.CounterVec
}
func newVolumeMetricsVecs(reg *prometheus.Registry) *volumeMetricsVecs {
- m := &volumeMetricsVecs{
- reg: reg,
- }
- m.BytesFree = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Namespace: "arvados",
- Subsystem: "keepstore",
- Name: "volume_bytes_free",
- Help: "Number of free bytes on the volume",
- },
- []string{"label", "mount_point", "device_number"},
- )
- reg.MustRegister(m.BytesFree)
- m.BytesUsed = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Namespace: "arvados",
- Subsystem: "keepstore",
- Name: "volume_bytes_used",
- Help: "Number of used bytes on the volume",
- },
- []string{"label", "mount_point", "device_number"},
- )
- reg.MustRegister(m.BytesUsed)
- m.Errors = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Namespace: "arvados",
- Subsystem: "keepstore",
- Name: "volume_io_errors",
- Help: "Number of volume I/O errors",
- },
- []string{"label", "mount_point", "device_number"},
- )
- reg.MustRegister(m.Errors)
- m.Ops = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Namespace: "arvados",
- Subsystem: "keepstore",
- Name: "volume_io_ops",
- Help: "Number of volume I/O operations",
- },
- []string{"label", "mount_point", "device_number"},
- )
- reg.MustRegister(m.Ops)
- m.CompareOps = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Namespace: "arvados",
- Subsystem: "keepstore",
- Name: "volume_io_compare_ops",
- Help: "Number of volume I/O compare operations",
- },
- []string{"label", "mount_point", "device_number"},
- )
- reg.MustRegister(m.CompareOps)
- m.GetOps = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Namespace: "arvados",
- Subsystem: "keepstore",
- Name: "volume_io_get_ops",
- Help: "Number of volume I/O get operations",
- },
- []string{"label", "mount_point", "device_number"},
- )
- reg.MustRegister(m.GetOps)
- m.PutOps = prometheus.NewCounterVec(
+ m := &volumeMetricsVecs{}
+ m.opsCounters = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "arvados",
Subsystem: "keepstore",
- Name: "volume_io_put_ops",
- Help: "Number of volume I/O put operations",
+ Name: "volume_operations",
+ Help: "Number of volume operations",
},
- []string{"label", "mount_point", "device_number"},
+ []string{"device_id", "operation"},
)
- reg.MustRegister(m.PutOps)
- m.TouchOps = prometheus.NewCounterVec(
+ reg.MustRegister(m.opsCounters)
+ m.errCounters = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "arvados",
Subsystem: "keepstore",
- Name: "volume_io_touch_ops",
- Help: "Number of volume I/O touch operations",
+ Name: "volume_errors",
+ Help: "Number of volume errors",
},
- []string{"label", "mount_point", "device_number"},
+ []string{"device_id", "error_type"},
)
- reg.MustRegister(m.TouchOps)
- m.InBytes = prometheus.NewCounterVec(
+ reg.MustRegister(m.errCounters)
+ m.ioBytes = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "arvados",
Subsystem: "keepstore",
- Name: "volume_io_in_bytes",
- Help: "Number of input bytes",
+ Name: "volume_io_bytes",
+ Help: "Volume I/O traffic in bytes",
},
- []string{"label", "mount_point", "device_number"},
+ []string{"device_id", "direction"},
)
- reg.MustRegister(m.InBytes)
- m.OutBytes = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Namespace: "arvados",
- Subsystem: "keepstore",
- Name: "volume_io_out_bytes",
- Help: "Number of output bytes",
- },
- []string{"label", "mount_point", "device_number"},
- )
- reg.MustRegister(m.OutBytes)
- m.ErrorCodes = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Namespace: "arvados",
- Subsystem: "keepstore",
- Name: "volume_io_error_codes",
- Help: "Number of I/O errors by error code",
- },
- []string{"label", "mount_point", "device_number", "error_code"},
- )
- reg.MustRegister(m.ErrorCodes)
+ reg.MustRegister(m.ioBytes)
return m
}
-
-func (m *volumeMetricsVecs) curryWith(lbl string, mnt string, dev string) *volumeMetrics {
- lbls := []string{lbl, mnt, dev}
- curried := &volumeMetrics{
- reg: m.reg,
- lbls: lbls,
- internalCounters: make(map[string]*prometheus.CounterVec),
- BytesFree: m.BytesFree.WithLabelValues(lbls...),
- BytesUsed: m.BytesUsed.WithLabelValues(lbls...),
- Errors: m.Errors.WithLabelValues(lbls...),
- Ops: m.Ops.WithLabelValues(lbls...),
- CompareOps: m.CompareOps.WithLabelValues(lbls...),
- GetOps: m.GetOps.WithLabelValues(lbls...),
- PutOps: m.PutOps.WithLabelValues(lbls...),
- TouchOps: m.TouchOps.WithLabelValues(lbls...),
- InBytes: m.InBytes.WithLabelValues(lbls...),
- OutBytes: m.OutBytes.WithLabelValues(lbls...),
- ErrorCodes: m.ErrorCodes.MustCurryWith(prometheus.Labels{
- "label": lbl,
- "mount_point": mnt,
- "device_number": dev,
- }),
- }
- return curried
-}
-
-// Returns a driver specific counter, creating it when needed. The 'name' argument
-// should include the driver prefix.
-func (m *volumeMetrics) getInternalCounter(name string, help string) prometheus.Counter {
- counterVec, ok := m.internalCounters[name]
- if !ok {
- counterVec = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Namespace: "arvados",
- Subsystem: "keepstore",
- Name: name,
- Help: help,
- },
- []string{"label", "mount_point", "device_number"},
- )
- m.reg.MustRegister(counterVec)
- m.internalCounters[name] = counterVec
- }
- return counterVec.WithLabelValues(m.lbls...)
-}
diff --git a/services/keepstore/s3_volume.go b/services/keepstore/s3_volume.go
index f281b363e..25bfa4ef0 100644
--- a/services/keepstore/s3_volume.go
+++ b/services/keepstore/s3_volume.go
@@ -25,6 +25,7 @@ import (
"git.curoverse.com/arvados.git/sdk/go/arvados"
"github.com/AdRoll/goamz/aws"
"github.com/AdRoll/goamz/s3"
+ "github.com/prometheus/client_golang/prometheus"
)
const (
@@ -198,7 +199,7 @@ func (*S3Volume) Type() string {
// Start populates private fields and verifies the configuration is
// valid.
-func (v *S3Volume) Start(m *volumeMetrics) error {
+func (v *S3Volume) Start(opsCounters, errCounters, ioBytes *prometheus.CounterVec) error {
region, ok := aws.Regions[v.Region]
if v.Endpoint == "" {
if !ok {
diff --git a/services/keepstore/s3_volume_test.go b/services/keepstore/s3_volume_test.go
index e88efffe4..baa9dda9e 100644
--- a/services/keepstore/s3_volume_test.go
+++ b/services/keepstore/s3_volume_test.go
@@ -171,9 +171,8 @@ func (s *StubbedS3Suite) testContextCancel(c *check.C, testFunc func(context.Con
vol := *v.S3Volume
vol.Endpoint = srv.URL
v = &TestableS3Volume{S3Volume: &vol}
- metrics := newVolumeMetricsVecs(prometheus.NewRegistry()).curryWith(
- v.String(), v.Status().MountPoint, fmt.Sprintf("%d", v.Status().DeviceNum))
- v.Start(metrics)
+ metrics := newVolumeMetricsVecs(prometheus.NewRegistry())
+ v.Start(metrics.opsCounters, metrics.errCounters, metrics.ioBytes)
ctx, cancel := context.WithCancel(context.Background())
@@ -433,9 +432,8 @@ func (s *StubbedS3Suite) newTestableVolume(c *check.C, raceWindow time.Duration,
server: srv,
serverClock: clock,
}
- metrics := newVolumeMetricsVecs(prometheus.NewRegistry()).curryWith(
- v.String(), v.Status().MountPoint, fmt.Sprintf("%d", v.Status().DeviceNum))
- v.Start(metrics)
+ metrics := newVolumeMetricsVecs(prometheus.NewRegistry())
+ v.Start(metrics.opsCounters, metrics.errCounters, metrics.ioBytes)
err = v.bucket.PutBucket(s3.ACL("private"))
c.Assert(err, check.IsNil)
return v
@@ -453,7 +451,7 @@ Volumes:
c.Check(cfg.Volumes[0].GetStorageClasses(), check.DeepEquals, []string{"class_a", "class_b"})
}
-func (v *TestableS3Volume) Start(m *volumeMetrics) error {
+func (v *TestableS3Volume) Start(opsCounters, errCounters, ioBytes *prometheus.CounterVec) error {
tmp, err := ioutil.TempFile("", "keepstore")
v.c.Assert(err, check.IsNil)
defer os.Remove(tmp.Name())
@@ -464,7 +462,7 @@ func (v *TestableS3Volume) Start(m *volumeMetrics) error {
v.S3Volume.AccessKeyFile = tmp.Name()
v.S3Volume.SecretKeyFile = tmp.Name()
- v.c.Assert(v.S3Volume.Start(m), check.IsNil)
+ v.c.Assert(v.S3Volume.Start(opsCounters, errCounters, ioBytes), check.IsNil)
return nil
}
diff --git a/services/keepstore/stats_ticker.go b/services/keepstore/stats_ticker.go
index a9f24744b..377a53675 100644
--- a/services/keepstore/stats_ticker.go
+++ b/services/keepstore/stats_ticker.go
@@ -7,8 +7,6 @@ package main
import (
"sync"
"sync/atomic"
-
- "github.com/prometheus/client_golang/prometheus"
)
type statsTicker struct {
@@ -16,23 +14,10 @@ type statsTicker struct {
InBytes uint64
OutBytes uint64
- // Prometheus metrics
- errors prometheus.Counter
- inBytes prometheus.Counter
- outBytes prometheus.Counter
- errCounters *prometheus.CounterVec
-
ErrorCodes map[string]uint64 `json:",omitempty"`
lock sync.Mutex
}
-func (s *statsTicker) setup(m *volumeMetrics) {
- s.errors = m.Errors
- s.errCounters = m.ErrorCodes
- s.inBytes = m.InBytes
- s.outBytes = m.OutBytes
-}
-
// Tick increments each of the given counters by 1 using
// atomic.AddUint64.
func (s *statsTicker) Tick(counters ...*uint64) {
@@ -48,9 +33,6 @@ func (s *statsTicker) TickErr(err error, errType string) {
if err == nil {
return
}
- if s.errors != nil {
- s.errors.Inc()
- }
s.Tick(&s.Errors)
s.lock.Lock()
@@ -59,23 +41,14 @@ func (s *statsTicker) TickErr(err error, errType string) {
}
s.ErrorCodes[errType]++
s.lock.Unlock()
- if s.errCounters != nil {
- s.errCounters.WithLabelValues(errType).Inc()
- }
}
// TickInBytes increments the incoming byte counter by n.
func (s *statsTicker) TickInBytes(n uint64) {
- if s.inBytes != nil {
- s.inBytes.Add(float64(n))
- }
atomic.AddUint64(&s.InBytes, n)
}
// TickOutBytes increments the outgoing byte counter by n.
func (s *statsTicker) TickOutBytes(n uint64) {
- if s.outBytes != nil {
- s.outBytes.Add(float64(n))
- }
atomic.AddUint64(&s.OutBytes, n)
}
diff --git a/services/keepstore/volume.go b/services/keepstore/volume.go
index 5c6d1f51a..39e2d5206 100644
--- a/services/keepstore/volume.go
+++ b/services/keepstore/volume.go
@@ -14,6 +14,7 @@ import (
"time"
"git.curoverse.com/arvados.git/sdk/go/arvados"
+ "github.com/prometheus/client_golang/prometheus"
)
type BlockWriter interface {
@@ -39,7 +40,7 @@ type Volume interface {
// Do whatever private setup tasks and configuration checks
// are needed. Return non-nil if the volume is unusable (e.g.,
// invalid config).
- Start(m *volumeMetrics) error
+ Start(opsCounters, errCounters, ioBytes *prometheus.CounterVec) error
// Get a block: copy the block data into buf, and return the
// number of bytes copied.
diff --git a/services/keepstore/volume_test.go b/services/keepstore/volume_test.go
index 72666638d..fd1a56c5e 100644
--- a/services/keepstore/volume_test.go
+++ b/services/keepstore/volume_test.go
@@ -15,6 +15,8 @@ import (
"strings"
"sync"
"time"
+
+ "github.com/prometheus/client_golang/prometheus"
)
// A TestableVolume allows test suites to manipulate the state of an
@@ -211,7 +213,7 @@ func (v *MockVolume) Type() string {
return "Mock"
}
-func (v *MockVolume) Start(m *volumeMetrics) error {
+func (v *MockVolume) Start(opsCounters, errCounters, ioBytes *prometheus.CounterVec) error {
return nil
}
diff --git a/services/keepstore/volume_unix.go b/services/keepstore/volume_unix.go
index 8d61f9619..50f19cd2f 100644
--- a/services/keepstore/volume_unix.go
+++ b/services/keepstore/volume_unix.go
@@ -121,7 +121,10 @@ type UnixVolume struct {
os osWithStats
- metrics *volumeMetrics
+ // Volume metrics
+ opsCounters *prometheus.CounterVec
+ errCounters *prometheus.CounterVec
+ ioBytes *prometheus.CounterVec
}
// DeviceID returns a globally unique ID for the volume's root
@@ -222,7 +225,7 @@ func (v *UnixVolume) Type() string {
}
// Start implements Volume
-func (v *UnixVolume) Start(m *volumeMetrics) error {
+func (v *UnixVolume) Start(opsCounters, errCounters, ioBytes *prometheus.CounterVec) error {
if v.Serialize {
v.locker = &sync.Mutex{}
}
@@ -235,24 +238,17 @@ func (v *UnixVolume) Start(m *volumeMetrics) error {
_, err := v.os.Stat(v.Root)
if err == nil {
// Set up prometheus metrics
- v.metrics = m
- v.os.stats.setup(v.metrics)
- // Periodically update free/used volume space
- go func() {
- for {
- v.metrics.BytesFree.Set(float64(v.Status().BytesFree))
- v.metrics.BytesUsed.Set(float64(v.Status().BytesUsed))
- time.Sleep(10 * time.Second)
- }
- }()
+ lbls := prometheus.Labels{"device_id": v.DeviceID()}
+ v.opsCounters = opsCounters.MustCurryWith(lbls)
+ v.errCounters = errCounters.MustCurryWith(lbls)
+ v.ioBytes = ioBytes.MustCurryWith(lbls)
+ v.os.promSetup(v.opsCounters, v.errCounters, v.ioBytes)
}
return err
}
// Touch sets the timestamp for the given locator to the current time
func (v *UnixVolume) Touch(loc string) error {
- v.metrics.Ops.Inc()
- v.metrics.TouchOps.Inc()
if v.ReadOnly {
return MethodDisabledError
}
@@ -271,7 +267,7 @@ func (v *UnixVolume) Touch(loc string) error {
}
defer v.unlockfile(f)
ts := syscall.NsecToTimespec(time.Now().UnixNano())
- v.os.stats.utimesOps.Inc()
+ v.os.opsCounters.With(prometheus.Labels{"operation": "utimes"}).Inc()
v.os.stats.Tick(&v.os.stats.UtimesOps)
err = syscall.UtimesNano(p, []syscall.Timespec{ts, ts})
v.os.stats.TickErr(err)
@@ -300,7 +296,12 @@ func (v *UnixVolume) getFunc(ctx context.Context, path string, fn func(io.Reader
return err
}
defer f.Close()
- return fn(NewCountingReader(ioutil.NopCloser(f), v.os.stats.TickInBytes))
+ return fn(NewCountingReader(
+ ioutil.NopCloser(f),
+ func(c uint64) {
+ v.os.stats.TickInBytes(c)
+ v.ioBytes.With(prometheus.Labels{"direction": "in"}).Add(float64(c))
+ }))
}
// stat is os.Stat() with some extra sanity checks.
@@ -319,8 +320,6 @@ func (v *UnixVolume) stat(path string) (os.FileInfo, error) {
// Get retrieves a block, copies it to the given slice, and returns
// the number of bytes copied.
func (v *UnixVolume) Get(ctx context.Context, loc string, buf []byte) (int, error) {
- v.metrics.Ops.Inc()
- v.metrics.GetOps.Inc()
return getWithPipe(ctx, loc, buf, v)
}
@@ -344,8 +343,6 @@ func (v *UnixVolume) ReadBlock(ctx context.Context, loc string, w io.Writer) err
// expect. It is functionally equivalent to Get() followed by
// bytes.Compare(), but uses less memory.
func (v *UnixVolume) Compare(ctx context.Context, loc string, expect []byte) error {
- v.metrics.Ops.Inc()
- v.metrics.CompareOps.Inc()
path := v.blockPath(loc)
if _, err := v.stat(path); err != nil {
return v.translateError(err)
@@ -360,8 +357,6 @@ func (v *UnixVolume) Compare(ctx context.Context, loc string, expect []byte) err
// returns a FullError. If the write fails due to some other error,
// that error is returned.
func (v *UnixVolume) Put(ctx context.Context, loc string, block []byte) error {
- v.metrics.Ops.Inc()
- v.metrics.PutOps.Inc()
return putWithPipe(ctx, loc, block, v)
}
@@ -393,6 +388,7 @@ func (v *UnixVolume) WriteBlock(ctx context.Context, loc string, rdr io.Reader)
}
defer v.unlock()
n, err := io.Copy(tmpfile, rdr)
+ v.os.ioBytes.With(prometheus.Labels{"direction": "out"}).Add(float64(n))
v.os.stats.TickOutBytes(uint64(n))
if err != nil {
log.Printf("%s: writing to %s: %s\n", v, bpath, err)
@@ -465,7 +461,7 @@ func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error {
return err
}
defer rootdir.Close()
- v.os.stats.readdirOps.Inc()
+ v.os.opsCounters.With(prometheus.Labels{"operation": "readdir"}).Inc()
v.os.stats.Tick(&v.os.stats.ReaddirOps)
for {
names, err := rootdir.Readdirnames(1)
@@ -488,7 +484,7 @@ func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error {
lastErr = err
continue
}
- v.os.stats.readdirOps.Inc()
+ v.os.opsCounters.With(prometheus.Labels{"operation": "readdir"}).Inc()
v.os.stats.Tick(&v.os.stats.ReaddirOps)
for {
fileInfo, err := blockdir.Readdir(1)
@@ -577,7 +573,7 @@ func (v *UnixVolume) Untrash(loc string) (err error) {
return MethodDisabledError
}
- v.os.stats.readdirOps.Inc()
+ v.os.opsCounters.With(prometheus.Labels{"operation": "readdir"}).Inc()
v.os.stats.Tick(&v.os.stats.ReaddirOps)
files, err := ioutil.ReadDir(v.blockDir(loc))
if err != nil {
@@ -724,7 +720,7 @@ func (v *UnixVolume) unlock() {
// lockfile and unlockfile use flock(2) to manage kernel file locks.
func (v *UnixVolume) lockfile(f *os.File) error {
- v.os.stats.flockOps.Inc()
+ v.os.opsCounters.With(prometheus.Labels{"operation": "flock"}).Inc()
v.os.stats.Tick(&v.os.stats.FlockOps)
err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
v.os.stats.TickErr(err)
@@ -829,27 +825,6 @@ type unixStats struct {
RenameOps uint64
UnlinkOps uint64
ReaddirOps uint64
- // Prometheus metrics -- Above ad-hoc counters will be eventually removed
- openOps prometheus.Counter
- statOps prometheus.Counter
- flockOps prometheus.Counter
- utimesOps prometheus.Counter
- createOps prometheus.Counter
- renameOps prometheus.Counter
- unlinkOps prometheus.Counter
- readdirOps prometheus.Counter
-}
-
-func (s *unixStats) setup(m *volumeMetrics) {
- s.statsTicker.setup(m)
- s.openOps = m.getInternalCounter("unix_open_ops", "Number of backend open operations")
- s.statOps = m.getInternalCounter("unix_stat_ops", "Number of backend stat operations")
- s.flockOps = m.getInternalCounter("unix_flock_ops", "Number of backend flock operations")
- s.utimesOps = m.getInternalCounter("unix_utimes_ops", "Number of backend utimes operations")
- s.createOps = m.getInternalCounter("unix_create_ops", "Number of backend create operations")
- s.renameOps = m.getInternalCounter("unix_rename_ops", "Number of backend rename operations")
- s.unlinkOps = m.getInternalCounter("unix_unlink_ops", "Number of backend unlink operations")
- s.readdirOps = m.getInternalCounter("unix_readdir_ops", "Number of backend readdir operations")
}
func (s *unixStats) TickErr(err error) {
@@ -860,56 +835,75 @@ func (s *unixStats) TickErr(err error) {
}
type osWithStats struct {
- stats unixStats
+ stats unixStats
+ opsCounters *prometheus.CounterVec
+ errCounters *prometheus.CounterVec
+ ioBytes *prometheus.CounterVec
+}
+
+func (o *osWithStats) tickErr(err error) {
+ if err == nil {
+ return
+ }
+ o.errCounters.With(prometheus.Labels{"type": fmt.Sprintf("%T", err)}).Inc()
+}
+
+func (o *osWithStats) promSetup(opsC, errC, ioB *prometheus.CounterVec) {
+ o.opsCounters = opsC
+ o.errCounters = errC
+ o.ioBytes = ioB
}
func (o *osWithStats) Open(name string) (*os.File, error) {
- o.stats.openOps.Inc()
+ o.opsCounters.With(prometheus.Labels{"operation": "open"}).Inc()
o.stats.Tick(&o.stats.OpenOps)
f, err := os.Open(name)
+ o.tickErr(err)
o.stats.TickErr(err)
return f, err
}
func (o *osWithStats) OpenFile(name string, flag int, perm os.FileMode) (*os.File, error) {
- o.stats.openOps.Inc()
+ o.opsCounters.With(prometheus.Labels{"operation": "open"}).Inc()
o.stats.Tick(&o.stats.OpenOps)
f, err := os.OpenFile(name, flag, perm)
+ o.tickErr(err)
o.stats.TickErr(err)
return f, err
}
func (o *osWithStats) Remove(path string) error {
- o.stats.unlinkOps.Inc()
+ o.opsCounters.With(prometheus.Labels{"operation": "unlink"}).Inc()
o.stats.Tick(&o.stats.UnlinkOps)
err := os.Remove(path)
+ o.tickErr(err)
o.stats.TickErr(err)
return err
}
func (o *osWithStats) Rename(a, b string) error {
- o.stats.renameOps.Inc()
+ o.opsCounters.With(prometheus.Labels{"operation": "rename"}).Inc()
o.stats.Tick(&o.stats.RenameOps)
err := os.Rename(a, b)
+ o.tickErr(err)
o.stats.TickErr(err)
return err
}
func (o *osWithStats) Stat(path string) (os.FileInfo, error) {
- // Avoid segfaulting when called from vol.Status() on theConfig.Start()
- if o.stats.statOps != nil {
- o.stats.statOps.Inc()
- }
+ o.opsCounters.With(prometheus.Labels{"operation": "stat"}).Inc()
o.stats.Tick(&o.stats.StatOps)
fi, err := os.Stat(path)
+ o.tickErr(err)
o.stats.TickErr(err)
return fi, err
}
func (o *osWithStats) TempFile(dir, base string) (*os.File, error) {
- o.stats.createOps.Inc()
+ o.opsCounters.With(prometheus.Labels{"operation": "create"}).Inc()
o.stats.Tick(&o.stats.CreateOps)
f, err := ioutil.TempFile(dir, base)
+ o.tickErr(err)
o.stats.TickErr(err)
return f, err
}
diff --git a/services/keepstore/volume_unix_test.go b/services/keepstore/volume_unix_test.go
index 05c7a93ae..fe20f33d6 100644
--- a/services/keepstore/volume_unix_test.go
+++ b/services/keepstore/volume_unix_test.go
@@ -116,9 +116,8 @@ func TestReplicationDefault1(t *testing.T) {
Root: "/",
ReadOnly: true,
}
- metrics := newVolumeMetricsVecs(prometheus.NewRegistry()).curryWith(
- v.String(), v.Status().MountPoint, fmt.Sprintf("%d", v.Status().DeviceNum))
- if err := v.Start(metrics); err != nil {
+ metrics := newVolumeMetricsVecs(prometheus.NewRegistry())
+ if err := v.Start(metrics.opsCounters, metrics.errCounters, metrics.ioBytes); err != nil {
t.Error(err)
}
if got := v.Replication(); got != 1 {
commit 43ad590772de48fbc3a6a45654445bab79a0bdc1
Merge: f50aff88c 9acc7690b
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date: Tue Feb 19 18:32:03 2019 -0300
Merge branch 'master' into 13937-keepstore-prometheus
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list