[ARVADOS] created: 1.3.0-222-gf0553505e

Git user git at public.curoverse.com
Mon Feb 11 10:26:42 EST 2019


        at  f0553505e32ee00999d1d680da14260a9a0f6b99 (commit)


commit f0553505e32ee00999d1d680da14260a9a0f6b99
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date:   Mon Feb 11 12:24:15 2019 -0300

    13937: Export stats as prometheus metrics. (WIP)
    
    Includes common metrics and driver-specific (unix backend only, for now)
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>

diff --git a/services/keepstore/handlers.go b/services/keepstore/handlers.go
index e079b9678..e4f025d6b 100644
--- a/services/keepstore/handlers.go
+++ b/services/keepstore/handlers.go
@@ -20,11 +20,11 @@ import (
 	"sync"
 	"time"
 
-	"github.com/gorilla/mux"
-
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
 	"git.curoverse.com/arvados.git/sdk/go/health"
 	"git.curoverse.com/arvados.git/sdk/go/httpserver"
+	"github.com/gorilla/mux"
+	"github.com/prometheus/client_golang/prometheus"
 )
 
 type router struct {
@@ -32,14 +32,17 @@ type router struct {
 	limiter     httpserver.RequestCounter
 	cluster     *arvados.Cluster
 	remoteProxy remoteProxy
+	registry    *prometheus.Registry
+	metrics     nodeMetrics
 }
 
 // MakeRESTRouter returns a new router that forwards all Keep requests
 // to the appropriate handlers.
 func MakeRESTRouter(cluster *arvados.Cluster) http.Handler {
 	rtr := &router{
-		Router:  mux.NewRouter(),
-		cluster: cluster,
+		Router:   mux.NewRouter(),
+		cluster:  cluster,
+		registry: prometheus.NewRegistry(),
 	}
 
 	rtr.HandleFunc(
@@ -86,8 +89,13 @@ func MakeRESTRouter(cluster *arvados.Cluster) http.Handler {
 	rtr.NotFoundHandler = http.HandlerFunc(BadRequestHandler)
 
 	rtr.limiter = httpserver.NewRequestLimiter(theConfig.MaxRequests, rtr)
+	rtr.metrics = nodeMetrics{
+		reg: rtr.registry,
+		rc:  rtr.limiter,
+	}
+	rtr.metrics.setup()
 
-	instrumented := httpserver.Instrument(nil, nil,
+	instrumented := httpserver.Instrument(rtr.registry, nil,
 		httpserver.AddRequestIDs(httpserver.LogRequests(nil, rtr.limiter)))
 	return instrumented.ServeAPI(theConfig.ManagementToken, instrumented)
 }
diff --git a/services/keepstore/metrics.go b/services/keepstore/metrics.go
new file mode 100644
index 000000000..f0815ae4a
--- /dev/null
+++ b/services/keepstore/metrics.go
@@ -0,0 +1,214 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+	"fmt"
+
+	"git.curoverse.com/arvados.git/sdk/go/httpserver"
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+type nodeMetrics struct {
+	reg *prometheus.Registry
+	rc  httpserver.RequestCounter
+}
+
+func (m *nodeMetrics) setup() {
+	m.reg.MustRegister(prometheus.NewGaugeFunc(
+		prometheus.GaugeOpts{
+			Namespace: "arvados",
+			Subsystem: "keepstore",
+			Name:      "bufferpool_bytes_allocated",
+			Help:      "Number of bytes allocated to buffers",
+		},
+		func() float64 { return float64(bufs.Alloc()) },
+	))
+	m.reg.MustRegister(prometheus.NewGaugeFunc(
+		prometheus.GaugeOpts{
+			Namespace: "arvados",
+			Subsystem: "keepstore",
+			Name:      "bufferpool_buffers_max",
+			Help:      "Maximum number of buffers allowed",
+		},
+		func() float64 { return float64(bufs.Cap()) },
+	))
+	m.reg.MustRegister(prometheus.NewGaugeFunc(
+		prometheus.GaugeOpts{
+			Namespace: "arvados",
+			Subsystem: "keepstore",
+			Name:      "bufferpool_buffers_in_use",
+			Help:      "Number of buffers in use",
+		},
+		func() float64 { return float64(bufs.Len()) },
+	))
+	m.reg.MustRegister(prometheus.NewGaugeFunc(
+		prometheus.GaugeOpts{
+			Namespace: "arvados",
+			Subsystem: "keepstore",
+			Name:      "pull_queue_in_progress",
+			Help:      "Number of pull requests in progress",
+		},
+		func() float64 { return float64(getWorkQueueStatus(pullq).InProgress) },
+	))
+	m.reg.MustRegister(prometheus.NewGaugeFunc(
+		prometheus.GaugeOpts{
+			Namespace: "arvados",
+			Subsystem: "keepstore",
+			Name:      "pull_queue_queued",
+			Help:      "Number of queued pull requests",
+		},
+		func() float64 { return float64(getWorkQueueStatus(pullq).Queued) },
+	))
+	m.reg.MustRegister(prometheus.NewGaugeFunc(
+		prometheus.GaugeOpts{
+			Namespace: "arvados",
+			Subsystem: "keepstore",
+			Name:      "trash_queue_in_progress",
+			Help:      "Number of trash requests in progress",
+		},
+		func() float64 { return float64(getWorkQueueStatus(trashq).InProgress) },
+	))
+	m.reg.MustRegister(prometheus.NewGaugeFunc(
+		prometheus.GaugeOpts{
+			Namespace: "arvados",
+			Subsystem: "keepstore",
+			Name:      "trash_queue_queued",
+			Help:      "Number of queued trash requests",
+		},
+		func() float64 { return float64(getWorkQueueStatus(trashq).Queued) },
+	))
+	m.reg.MustRegister(prometheus.NewGaugeFunc(
+		prometheus.GaugeOpts{
+			Namespace: "arvados",
+			Subsystem: "keepstore",
+			Name:      "requests_current",
+			Help:      "Number of requests in progress",
+		},
+		func() float64 { return float64(m.rc.Current()) },
+	))
+	m.reg.MustRegister(prometheus.NewGaugeFunc(
+		prometheus.GaugeOpts{
+			Namespace: "arvados",
+			Subsystem: "keepstore",
+			Name:      "requests_max",
+			Help:      "Maximum number of concurrent requests",
+		},
+		func() float64 { return float64(m.rc.Max()) },
+	))
+	// Register individual volume's metrics
+	vols := KeepVM.AllReadable()
+	for _, vol := range vols {
+		labels := prometheus.Labels{
+			"label":         vol.String(),
+			"mount_point":   vol.Status().MountPoint,
+			"device_number": fmt.Sprintf("%d", vol.Status().DeviceNum),
+		}
+		if vol, ok := vol.(InternalMetricser); ok {
+			// Per-driver internal metrics
+			vol.SetupInternalMetrics(m.reg, labels)
+		}
+		m.reg.Register(prometheus.NewGaugeFunc(
+			prometheus.GaugeOpts{
+				Namespace:   "arvados",
+				Subsystem:   "keepstore",
+				Name:        "volume_bytes_free",
+				Help:        "Number of free bytes on the volume",
+				ConstLabels: labels,
+			},
+			func() float64 { return float64(vol.Status().BytesFree) },
+		))
+		m.reg.Register(prometheus.NewGaugeFunc(
+			prometheus.GaugeOpts{
+				Namespace:   "arvados",
+				Subsystem:   "keepstore",
+				Name:        "volume_bytes_used",
+				Help:        "Number of used bytes on the volume",
+				ConstLabels: labels,
+			},
+			func() float64 { return float64(vol.Status().BytesUsed) },
+		))
+		m.reg.Register(prometheus.NewGaugeFunc(
+			prometheus.GaugeOpts{
+				Namespace:   "arvados",
+				Subsystem:   "keepstore",
+				Name:        "volume_io_errors",
+				Help:        "Number of I/O errors",
+				ConstLabels: labels,
+			},
+			func() float64 { return float64(KeepVM.VolumeStats(vol).Errors) },
+		))
+		m.reg.Register(prometheus.NewGaugeFunc(
+			prometheus.GaugeOpts{
+				Namespace:   "arvados",
+				Subsystem:   "keepstore",
+				Name:        "volume_io_ops",
+				Help:        "Number of I/O operations",
+				ConstLabels: labels,
+			},
+			func() float64 { return float64(KeepVM.VolumeStats(vol).Ops) },
+		))
+		m.reg.Register(prometheus.NewGaugeFunc(
+			prometheus.GaugeOpts{
+				Namespace:   "arvados",
+				Subsystem:   "keepstore",
+				Name:        "volume_io_compare_ops",
+				Help:        "Number of I/O compare operations",
+				ConstLabels: labels,
+			},
+			func() float64 { return float64(KeepVM.VolumeStats(vol).CompareOps) },
+		))
+		m.reg.Register(prometheus.NewGaugeFunc(
+			prometheus.GaugeOpts{
+				Namespace:   "arvados",
+				Subsystem:   "keepstore",
+				Name:        "volume_io_get_ops",
+				Help:        "Number of I/O get operations",
+				ConstLabels: labels,
+			},
+			func() float64 { return float64(KeepVM.VolumeStats(vol).GetOps) },
+		))
+		m.reg.Register(prometheus.NewGaugeFunc(
+			prometheus.GaugeOpts{
+				Namespace:   "arvados",
+				Subsystem:   "keepstore",
+				Name:        "volume_io_put_ops",
+				Help:        "Number of I/O put operations",
+				ConstLabels: labels,
+			},
+			func() float64 { return float64(KeepVM.VolumeStats(vol).PutOps) },
+		))
+		m.reg.Register(prometheus.NewGaugeFunc(
+			prometheus.GaugeOpts{
+				Namespace:   "arvados",
+				Subsystem:   "keepstore",
+				Name:        "volume_io_touch_ops",
+				Help:        "Number of I/O touch operations",
+				ConstLabels: labels,
+			},
+			func() float64 { return float64(KeepVM.VolumeStats(vol).TouchOps) },
+		))
+		m.reg.Register(prometheus.NewGaugeFunc(
+			prometheus.GaugeOpts{
+				Namespace:   "arvados",
+				Subsystem:   "keepstore",
+				Name:        "volume_io_input_bytes",
+				Help:        "Number of input bytes",
+				ConstLabels: labels,
+			},
+			func() float64 { return float64(KeepVM.VolumeStats(vol).InBytes) },
+		))
+		m.reg.Register(prometheus.NewGaugeFunc(
+			prometheus.GaugeOpts{
+				Namespace:   "arvados",
+				Subsystem:   "keepstore",
+				Name:        "volume_io_output_bytes",
+				Help:        "Number of output bytes",
+				ConstLabels: labels,
+			},
+			func() float64 { return float64(KeepVM.VolumeStats(vol).OutBytes) },
+		))
+	}
+}
diff --git a/services/keepstore/mounts_test.go b/services/keepstore/mounts_test.go
index 31b1a684f..588bb4299 100644
--- a/services/keepstore/mounts_test.go
+++ b/services/keepstore/mounts_test.go
@@ -131,7 +131,9 @@ func (s *MountsSuite) TestMetrics(c *check.C) {
 	}
 	json.NewDecoder(resp.Body).Decode(&j)
 	found := make(map[string]bool)
+	names := map[string]bool{}
 	for _, g := range j {
+		names[g.Name] = true
 		for _, m := range g.Metric {
 			if len(m.Label) == 2 && m.Label[0].Name == "code" && m.Label[0].Value == "200" && m.Label[1].Name == "method" && m.Label[1].Value == "put" {
 				c.Check(m.Summary.SampleCount, check.Equals, "2")
@@ -143,6 +145,24 @@ func (s *MountsSuite) TestMetrics(c *check.C) {
 	}
 	c.Check(found["request_duration_seconds"], check.Equals, true)
 	c.Check(found["time_to_status_seconds"], check.Equals, true)
+
+	metricsNames := []string{
+		"arvados_keepstore_bufferpool_buffers_in_use",
+		"arvados_keepstore_bufferpool_buffers_max",
+		"arvados_keepstore_bufferpool_bytes_allocated",
+		"arvados_keepstore_pull_queue_in_progress",
+		"arvados_keepstore_pull_queue_queued",
+		"arvados_keepstore_requests_current",
+		"arvados_keepstore_requests_max",
+		"arvados_keepstore_trash_queue_in_progress",
+		"arvados_keepstore_trash_queue_queued",
+		"request_duration_seconds",
+		"time_to_status_seconds",
+	}
+	for _, m := range metricsNames {
+		_, ok := names[m]
+		c.Check(ok, check.Equals, true)
+	}
 }
 
 func (s *MountsSuite) call(method, path, tok string, body []byte) *httptest.ResponseRecorder {
diff --git a/services/keepstore/stats_ticker.go b/services/keepstore/stats_ticker.go
index 377a53675..36fbcf98a 100644
--- a/services/keepstore/stats_ticker.go
+++ b/services/keepstore/stats_ticker.go
@@ -5,8 +5,11 @@
 package main
 
 import (
+	"fmt"
 	"sync"
 	"sync/atomic"
+
+	"github.com/prometheus/client_golang/prometheus"
 )
 
 type statsTicker struct {
@@ -18,6 +21,28 @@ type statsTicker struct {
 	lock       sync.Mutex
 }
 
+func (s *statsTicker) setupPrometheus(drv string, reg *prometheus.Registry, lbl prometheus.Labels) {
+	metrics := map[string][]interface{}{
+		"errors":    []interface{}{string("errors"), s.Errors},
+		"in_bytes":  []interface{}{string("input bytes"), s.InBytes},
+		"out_bytes": []interface{}{string("output bytes"), s.OutBytes},
+	}
+	for mName, data := range metrics {
+		mHelp := data[0].(string)
+		mVal := data[1].(uint64)
+		reg.Register(prometheus.NewGaugeFunc(
+			prometheus.GaugeOpts{
+				Namespace:   "arvados",
+				Subsystem:   "keepstore",
+				Name:        fmt.Sprintf("%s_%s", drv, mName),
+				Help:        fmt.Sprintf("Number of %s backend %s", drv, mHelp),
+				ConstLabels: lbl,
+			},
+			func() float64 { return float64(mVal) },
+		))
+	}
+}
+
 // Tick increments each of the given counters by 1 using
 // atomic.AddUint64.
 func (s *statsTicker) Tick(counters ...*uint64) {
diff --git a/services/keepstore/volume.go b/services/keepstore/volume.go
index 6bce05bec..963804639 100644
--- a/services/keepstore/volume.go
+++ b/services/keepstore/volume.go
@@ -14,6 +14,7 @@ import (
 	"time"
 
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"github.com/prometheus/client_golang/prometheus"
 )
 
 type BlockWriter interface {
@@ -415,3 +416,9 @@ type ioStats struct {
 type InternalStatser interface {
 	InternalStats() interface{}
 }
+
+// InternalMetricser provides an interface for volume drivers to register their
+// own specific metrics.
+type InternalMetricser interface {
+	SetupInternalMetrics(*prometheus.Registry, prometheus.Labels)
+}
diff --git a/services/keepstore/volume_unix.go b/services/keepstore/volume_unix.go
index 23d675359..a80bb7bf4 100644
--- a/services/keepstore/volume_unix.go
+++ b/services/keepstore/volume_unix.go
@@ -21,6 +21,8 @@ import (
 	"sync/atomic"
 	"syscall"
 	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
 )
 
 type unixVolumeAdder struct {
@@ -789,6 +791,42 @@ func (v *UnixVolume) EmptyTrash() {
 	log.Printf("EmptyTrash stats for %v: Deleted %v bytes in %v blocks. Remaining in trash: %v bytes in %v blocks.", v.String(), bytesDeleted, blocksDeleted, bytesInTrash-bytesDeleted, blocksInTrash-blocksDeleted)
 }
 
+// SetupInternalMetrics registers driver stats to Prometheus.
+// Implements InternalMetricser interface.
+func (v *UnixVolume) SetupInternalMetrics(reg *prometheus.Registry, lbl prometheus.Labels) {
+	v.os.stats.setupPrometheus(reg, lbl)
+}
+
+func (s *unixStats) setupPrometheus(reg *prometheus.Registry, lbl prometheus.Labels) {
+	// Common backend metrics
+	s.statsTicker.setupPrometheus("unix", reg, lbl)
+	// Driver-specific backend metrics
+	metrics := map[string][]interface{}{
+		"open_ops":    []interface{}{string("open operations"), s.OpenOps},
+		"stat_ops":    []interface{}{string("stat operations"), s.StatOps},
+		"flock_ops":   []interface{}{string("flock operations"), s.FlockOps},
+		"utimes_ops":  []interface{}{string("utimes operations"), s.UtimesOps},
+		"create_ops":  []interface{}{string("create operations"), s.CreateOps},
+		"rename_ops":  []interface{}{string("rename operations"), s.RenameOps},
+		"unlink_ops":  []interface{}{string("unlink operations"), s.UnlinkOps},
+		"readdir_ops": []interface{}{string("readdir operations"), s.ReaddirOps},
+	}
+	for mName, data := range metrics {
+		mHelp := data[0].(string)
+		mVal := data[1].(uint64)
+		reg.Register(prometheus.NewGaugeFunc(
+			prometheus.GaugeOpts{
+				Namespace:   "arvados",
+				Subsystem:   "keepstore",
+				Name:        fmt.Sprintf("unix_%s", mName),
+				Help:        fmt.Sprintf("Number of unix backend %s", mHelp),
+				ConstLabels: lbl,
+			},
+			func() float64 { return float64(mVal) },
+		))
+	}
+}
+
 type unixStats struct {
 	statsTicker
 	OpenOps    uint64

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list