[ARVADOS] created: 1.3.0-222-gf0553505e
Git user
git at public.curoverse.com
Mon Feb 11 10:26:42 EST 2019
at f0553505e32ee00999d1d680da14260a9a0f6b99 (commit)
commit f0553505e32ee00999d1d680da14260a9a0f6b99
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date: Mon Feb 11 12:24:15 2019 -0300
13937: Export stats as prometheus metrics. (WIP)
Includes common metrics and driver-specific (unix backend only, for now)
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>
diff --git a/services/keepstore/handlers.go b/services/keepstore/handlers.go
index e079b9678..e4f025d6b 100644
--- a/services/keepstore/handlers.go
+++ b/services/keepstore/handlers.go
@@ -20,11 +20,11 @@ import (
"sync"
"time"
- "github.com/gorilla/mux"
-
"git.curoverse.com/arvados.git/sdk/go/arvados"
"git.curoverse.com/arvados.git/sdk/go/health"
"git.curoverse.com/arvados.git/sdk/go/httpserver"
+ "github.com/gorilla/mux"
+ "github.com/prometheus/client_golang/prometheus"
)
type router struct {
@@ -32,14 +32,17 @@ type router struct {
limiter httpserver.RequestCounter
cluster *arvados.Cluster
remoteProxy remoteProxy
+ registry *prometheus.Registry
+ metrics nodeMetrics
}
// MakeRESTRouter returns a new router that forwards all Keep requests
// to the appropriate handlers.
func MakeRESTRouter(cluster *arvados.Cluster) http.Handler {
rtr := &router{
- Router: mux.NewRouter(),
- cluster: cluster,
+ Router: mux.NewRouter(),
+ cluster: cluster,
+ registry: prometheus.NewRegistry(),
}
rtr.HandleFunc(
@@ -86,8 +89,13 @@ func MakeRESTRouter(cluster *arvados.Cluster) http.Handler {
rtr.NotFoundHandler = http.HandlerFunc(BadRequestHandler)
rtr.limiter = httpserver.NewRequestLimiter(theConfig.MaxRequests, rtr)
+ rtr.metrics = nodeMetrics{
+ reg: rtr.registry,
+ rc: rtr.limiter,
+ }
+ rtr.metrics.setup()
- instrumented := httpserver.Instrument(nil, nil,
+ instrumented := httpserver.Instrument(rtr.registry, nil,
httpserver.AddRequestIDs(httpserver.LogRequests(nil, rtr.limiter)))
return instrumented.ServeAPI(theConfig.ManagementToken, instrumented)
}
diff --git a/services/keepstore/metrics.go b/services/keepstore/metrics.go
new file mode 100644
index 000000000..f0815ae4a
--- /dev/null
+++ b/services/keepstore/metrics.go
@@ -0,0 +1,214 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+ "fmt"
+
+ "git.curoverse.com/arvados.git/sdk/go/httpserver"
+ "github.com/prometheus/client_golang/prometheus"
+)
+
+type nodeMetrics struct {
+ reg *prometheus.Registry
+ rc httpserver.RequestCounter
+}
+
+func (m *nodeMetrics) setup() {
+ m.reg.MustRegister(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "bufferpool_bytes_allocated",
+ Help: "Number of bytes allocated to buffers",
+ },
+ func() float64 { return float64(bufs.Alloc()) },
+ ))
+ m.reg.MustRegister(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "bufferpool_buffers_max",
+ Help: "Maximum number of buffers allowed",
+ },
+ func() float64 { return float64(bufs.Cap()) },
+ ))
+ m.reg.MustRegister(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "bufferpool_buffers_in_use",
+ Help: "Number of buffers in use",
+ },
+ func() float64 { return float64(bufs.Len()) },
+ ))
+ m.reg.MustRegister(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "pull_queue_in_progress",
+ Help: "Number of pull requests in progress",
+ },
+ func() float64 { return float64(getWorkQueueStatus(pullq).InProgress) },
+ ))
+ m.reg.MustRegister(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "pull_queue_queued",
+ Help: "Number of queued pull requests",
+ },
+ func() float64 { return float64(getWorkQueueStatus(pullq).Queued) },
+ ))
+ m.reg.MustRegister(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "trash_queue_in_progress",
+ Help: "Number of trash requests in progress",
+ },
+ func() float64 { return float64(getWorkQueueStatus(trashq).InProgress) },
+ ))
+ m.reg.MustRegister(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "trash_queue_queued",
+ Help: "Number of queued trash requests",
+ },
+ func() float64 { return float64(getWorkQueueStatus(trashq).Queued) },
+ ))
+ m.reg.MustRegister(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "requests_current",
+ Help: "Number of requests in progress",
+ },
+ func() float64 { return float64(m.rc.Current()) },
+ ))
+ m.reg.MustRegister(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "requests_max",
+ Help: "Maximum number of concurrent requests",
+ },
+ func() float64 { return float64(m.rc.Max()) },
+ ))
+ // Register individual volume's metrics
+ vols := KeepVM.AllReadable()
+ for _, vol := range vols {
+ labels := prometheus.Labels{
+ "label": vol.String(),
+ "mount_point": vol.Status().MountPoint,
+ "device_number": fmt.Sprintf("%d", vol.Status().DeviceNum),
+ }
+ if vol, ok := vol.(InternalMetricser); ok {
+ // Per-driver internal metrics
+ vol.SetupInternalMetrics(m.reg, labels)
+ }
+ m.reg.Register(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "volume_bytes_free",
+ Help: "Number of free bytes on the volume",
+ ConstLabels: labels,
+ },
+ func() float64 { return float64(vol.Status().BytesFree) },
+ ))
+ m.reg.Register(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "volume_bytes_used",
+ Help: "Number of used bytes on the volume",
+ ConstLabels: labels,
+ },
+ func() float64 { return float64(vol.Status().BytesUsed) },
+ ))
+ m.reg.Register(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "volume_io_errors",
+ Help: "Number of I/O errors",
+ ConstLabels: labels,
+ },
+ func() float64 { return float64(KeepVM.VolumeStats(vol).Errors) },
+ ))
+ m.reg.Register(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "volume_io_ops",
+ Help: "Number of I/O operations",
+ ConstLabels: labels,
+ },
+ func() float64 { return float64(KeepVM.VolumeStats(vol).Ops) },
+ ))
+ m.reg.Register(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "volume_io_compare_ops",
+ Help: "Number of I/O compare operations",
+ ConstLabels: labels,
+ },
+ func() float64 { return float64(KeepVM.VolumeStats(vol).CompareOps) },
+ ))
+ m.reg.Register(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "volume_io_get_ops",
+ Help: "Number of I/O get operations",
+ ConstLabels: labels,
+ },
+ func() float64 { return float64(KeepVM.VolumeStats(vol).GetOps) },
+ ))
+ m.reg.Register(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "volume_io_put_ops",
+ Help: "Number of I/O put operations",
+ ConstLabels: labels,
+ },
+ func() float64 { return float64(KeepVM.VolumeStats(vol).PutOps) },
+ ))
+ m.reg.Register(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "volume_io_touch_ops",
+ Help: "Number of I/O touch operations",
+ ConstLabels: labels,
+ },
+ func() float64 { return float64(KeepVM.VolumeStats(vol).TouchOps) },
+ ))
+ m.reg.Register(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "volume_io_input_bytes",
+ Help: "Number of input bytes",
+ ConstLabels: labels,
+ },
+ func() float64 { return float64(KeepVM.VolumeStats(vol).InBytes) },
+ ))
+ m.reg.Register(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: "volume_io_output_bytes",
+ Help: "Number of output bytes",
+ ConstLabels: labels,
+ },
+ func() float64 { return float64(KeepVM.VolumeStats(vol).OutBytes) },
+ ))
+ }
+}
diff --git a/services/keepstore/mounts_test.go b/services/keepstore/mounts_test.go
index 31b1a684f..588bb4299 100644
--- a/services/keepstore/mounts_test.go
+++ b/services/keepstore/mounts_test.go
@@ -131,7 +131,9 @@ func (s *MountsSuite) TestMetrics(c *check.C) {
}
json.NewDecoder(resp.Body).Decode(&j)
found := make(map[string]bool)
+ names := map[string]bool{}
for _, g := range j {
+ names[g.Name] = true
for _, m := range g.Metric {
if len(m.Label) == 2 && m.Label[0].Name == "code" && m.Label[0].Value == "200" && m.Label[1].Name == "method" && m.Label[1].Value == "put" {
c.Check(m.Summary.SampleCount, check.Equals, "2")
@@ -143,6 +145,24 @@ func (s *MountsSuite) TestMetrics(c *check.C) {
}
c.Check(found["request_duration_seconds"], check.Equals, true)
c.Check(found["time_to_status_seconds"], check.Equals, true)
+
+ metricsNames := []string{
+ "arvados_keepstore_bufferpool_buffers_in_use",
+ "arvados_keepstore_bufferpool_buffers_max",
+ "arvados_keepstore_bufferpool_bytes_allocated",
+ "arvados_keepstore_pull_queue_in_progress",
+ "arvados_keepstore_pull_queue_queued",
+ "arvados_keepstore_requests_current",
+ "arvados_keepstore_requests_max",
+ "arvados_keepstore_trash_queue_in_progress",
+ "arvados_keepstore_trash_queue_queued",
+ "request_duration_seconds",
+ "time_to_status_seconds",
+ }
+ for _, m := range metricsNames {
+ _, ok := names[m]
+ c.Check(ok, check.Equals, true)
+ }
}
func (s *MountsSuite) call(method, path, tok string, body []byte) *httptest.ResponseRecorder {
diff --git a/services/keepstore/stats_ticker.go b/services/keepstore/stats_ticker.go
index 377a53675..36fbcf98a 100644
--- a/services/keepstore/stats_ticker.go
+++ b/services/keepstore/stats_ticker.go
@@ -5,8 +5,11 @@
package main
import (
+ "fmt"
"sync"
"sync/atomic"
+
+ "github.com/prometheus/client_golang/prometheus"
)
type statsTicker struct {
@@ -18,6 +21,28 @@ type statsTicker struct {
lock sync.Mutex
}
+func (s *statsTicker) setupPrometheus(drv string, reg *prometheus.Registry, lbl prometheus.Labels) {
+ metrics := map[string][]interface{}{
+ "errors": []interface{}{string("errors"), s.Errors},
+ "in_bytes": []interface{}{string("input bytes"), s.InBytes},
+ "out_bytes": []interface{}{string("output bytes"), s.OutBytes},
+ }
+ for mName, data := range metrics {
+ mHelp := data[0].(string)
+ mVal := data[1].(uint64)
+ reg.Register(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: fmt.Sprintf("%s_%s", drv, mName),
+ Help: fmt.Sprintf("Number of %s backend %s", drv, mHelp),
+ ConstLabels: lbl,
+ },
+ func() float64 { return float64(mVal) },
+ ))
+ }
+}
+
// Tick increments each of the given counters by 1 using
// atomic.AddUint64.
func (s *statsTicker) Tick(counters ...*uint64) {
diff --git a/services/keepstore/volume.go b/services/keepstore/volume.go
index 6bce05bec..963804639 100644
--- a/services/keepstore/volume.go
+++ b/services/keepstore/volume.go
@@ -14,6 +14,7 @@ import (
"time"
"git.curoverse.com/arvados.git/sdk/go/arvados"
+ "github.com/prometheus/client_golang/prometheus"
)
type BlockWriter interface {
@@ -415,3 +416,9 @@ type ioStats struct {
type InternalStatser interface {
InternalStats() interface{}
}
+
+// InternalMetricser provides an interface for volume drivers to register their
+// own specific metrics.
+type InternalMetricser interface {
+ SetupInternalMetrics(*prometheus.Registry, prometheus.Labels)
+}
diff --git a/services/keepstore/volume_unix.go b/services/keepstore/volume_unix.go
index 23d675359..a80bb7bf4 100644
--- a/services/keepstore/volume_unix.go
+++ b/services/keepstore/volume_unix.go
@@ -21,6 +21,8 @@ import (
"sync/atomic"
"syscall"
"time"
+
+ "github.com/prometheus/client_golang/prometheus"
)
type unixVolumeAdder struct {
@@ -789,6 +791,42 @@ func (v *UnixVolume) EmptyTrash() {
log.Printf("EmptyTrash stats for %v: Deleted %v bytes in %v blocks. Remaining in trash: %v bytes in %v blocks.", v.String(), bytesDeleted, blocksDeleted, bytesInTrash-bytesDeleted, blocksInTrash-blocksDeleted)
}
+// SetupInternalMetrics registers driver stats to Prometheus.
+// Implements InternalMetricser interface.
+func (v *UnixVolume) SetupInternalMetrics(reg *prometheus.Registry, lbl prometheus.Labels) {
+ v.os.stats.setupPrometheus(reg, lbl)
+}
+
+func (s *unixStats) setupPrometheus(reg *prometheus.Registry, lbl prometheus.Labels) {
+ // Common backend metrics
+ s.statsTicker.setupPrometheus("unix", reg, lbl)
+ // Driver-specific backend metrics
+ metrics := map[string][]interface{}{
+ "open_ops": []interface{}{string("open operations"), s.OpenOps},
+ "stat_ops": []interface{}{string("stat operations"), s.StatOps},
+ "flock_ops": []interface{}{string("flock operations"), s.FlockOps},
+ "utimes_ops": []interface{}{string("utimes operations"), s.UtimesOps},
+ "create_ops": []interface{}{string("create operations"), s.CreateOps},
+ "rename_ops": []interface{}{string("rename operations"), s.RenameOps},
+ "unlink_ops": []interface{}{string("unlink operations"), s.UnlinkOps},
+ "readdir_ops": []interface{}{string("readdir operations"), s.ReaddirOps},
+ }
+ for mName, data := range metrics {
+ mHelp := data[0].(string)
+ mVal := data[1].(uint64)
+ reg.Register(prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "keepstore",
+ Name: fmt.Sprintf("unix_%s", mName),
+ Help: fmt.Sprintf("Number of unix backend %s", mHelp),
+ ConstLabels: lbl,
+ },
+ func() float64 { return float64(mVal) },
+ ))
+ }
+}
+
type unixStats struct {
statsTicker
OpenOps uint64
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list