[ARVADOS] updated: a9497f8d2756104ba07d88d5c8c7b84790fd83f3

Git user git at public.curoverse.com
Tue Oct 10 16:05:52 EDT 2017


Summary of changes:
 sdk/go/arvados/config.go    |  26 ++++++++---
 sdk/go/health/aggregator.go | 105 ++++++++++++++++++++++++--------------------
 2 files changed, 79 insertions(+), 52 deletions(-)

       via  a9497f8d2756104ba07d88d5c8c7b84790fd83f3 (commit)
      from  9959bf0f5631daa84e8afa7de145154390259c67 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit a9497f8d2756104ba07d88d5c8c7b84790fd83f3
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Thu Oct 5 23:22:09 2017 -0400

    12260: Add more services. Use existing package names.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index 84e66b3..f1a7289 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -76,14 +76,30 @@ func (cc *Cluster) GetSystemNode(node string) (*SystemNode, error) {
 }
 
 type SystemNode struct {
-	Health    Health
-	Keepstore Keepstore
+	Health      SystemServiceInstance `json:"arvados-health"`
+	Keepproxy   SystemServiceInstance `json:"keepproxy"`
+	Keepstore   SystemServiceInstance `json:"keepstore"`
+	Keepweb     SystemServiceInstance `json:"keep-web"`
+	Nodemanager SystemServiceInstance `json:"arvados-node-manager"`
+	RailsAPI    SystemServiceInstance `json:"arvados-api-server"`
+	Websocket   SystemServiceInstance `json:"arvados-ws"`
+	Workbench   SystemServiceInstance `json:"arvados-workbench"`
 }
 
-type Health struct {
-	Listen string
+// ServicePorts returns the configured listening address (or "" if
+// disabled) for each service on the node.
+func (sn *SystemNode) ServicePorts() map[string]string {
+	return map[string]string{
+		"arvados-api-server":   sn.RailsAPI.Listen,
+		"arvados-node-manager": sn.Nodemanager.Listen,
+		"arvados-workbench":    sn.Workbench.Listen,
+		"arvados-ws":           sn.Websocket.Listen,
+		"keep-web":             sn.Keepweb.Listen,
+		"keepproxy":            sn.Keepproxy.Listen,
+		"keepstore":            sn.Keepstore.Listen,
+	}
 }
 
-type Keepstore struct {
+type SystemServiceInstance struct {
 	Listen string
 }
diff --git a/sdk/go/health/aggregator.go b/sdk/go/health/aggregator.go
index e881db8..334584b 100644
--- a/sdk/go/health/aggregator.go
+++ b/sdk/go/health/aggregator.go
@@ -69,82 +69,97 @@ func (agg *Aggregator) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
 	}
 }
 
-type ServiceHealth struct {
+type ClusterHealthResponse struct {
+	// "OK" if all needed services are OK, otherwise "ERROR".
 	Health string `json:"health"`
-	N      int    `json:"n"`
-}
 
-type ClusterHealthResponse struct {
-	Health   string                   `json:"health"`
-	Checks   map[string]CheckResponse `json:"checks"`
+	// An entry for each known health check of each known instance
+	// of each needed component: "instance of service S on node N
+	// reports health-check C is OK."
+	Checks map[string]CheckResult `json:"checks"`
+
+	// An entry for each service type: "service S is OK." This
+	// exposes problems that can't be expressed in Checks, like
+	// "service S is needed, but isn't configured to run
+	// anywhere."
 	Services map[string]ServiceHealth `json:"services"`
 }
 
-type CheckResponse struct {
-	Status       int         `json:"status"`
-	Health       string      `json:"health"`
-	Error        string      `json:"error,omitempty"`
-	ResponseTime json.Number `json:"responseTime"`
+type CheckResult struct {
+	Health         string                 `json:"health"`
+	Error          string                 `json:"error,omitempty"`
+	HTTPStatusCode int                    `json:",omitempty"`
+	HTTPStatusText string                 `json:",omitempty"`
+	Response       map[string]interface{} `json:"response"`
+	ResponseTime   json.Number            `json:"responseTime"`
 }
 
-func (r *CheckResponse) OK() bool {
-	return r.Health == "OK" && r.Status == http.StatusOK
+type ServiceHealth struct {
+	Health string `json:"health"`
+	N      int    `json:"n"`
 }
 
 func (agg *Aggregator) ClusterHealth(cluster *arvados.Cluster) ClusterHealthResponse {
 	resp := ClusterHealthResponse{
 		Health:   "OK",
-		Checks:   make(map[string]CheckResponse),
+		Checks:   make(map[string]CheckResult),
 		Services: make(map[string]ServiceHealth),
 	}
 
 	mtx := sync.Mutex{}
 	wg := sync.WaitGroup{}
 	for node, nodeConfig := range cluster.SystemNodes {
-		for svc, addr := range map[string]string{
-			"keepstore": nodeConfig.Keepstore.Listen,
-		} {
+		for svc, addr := range nodeConfig.ServicePorts() {
+			// Ensure svc is listed in resp.Services.
+			mtx.Lock()
+			if _, ok := resp.Services[svc]; !ok {
+				resp.Services[svc] = ServiceHealth{Health: "ERROR"}
+			}
+			mtx.Unlock()
+
 			if addr == "" {
+				// svc is not expected on this node.
 				continue
 			}
+
 			wg.Add(1)
-			go func(node string) {
+			go func(node, svc, addr string) {
 				defer wg.Done()
-				var pingResp CheckResponse
+				var result CheckResult
 				url, err := agg.pingURL(node, addr)
 				if err != nil {
-					pingResp = CheckResponse{
+					result = CheckResult{
 						Health: "ERROR",
 						Error:  err.Error(),
 					}
 				} else {
-					pingResp = agg.ping(url, cluster)
+					result = agg.ping(url, cluster)
 				}
 
 				mtx.Lock()
 				defer mtx.Unlock()
-				resp.Checks[svc+"+"+url] = pingResp
-				svHealth := resp.Services[svc]
-				if pingResp.OK() {
-					svHealth.N++
+				resp.Checks[svc+"+"+url] = result
+				if result.Health == "OK" {
+					h := resp.Services[svc]
+					h.N++
+					h.Health = "OK"
+					resp.Services[svc] = h
 				} else {
 					resp.Health = "ERROR"
 				}
-				resp.Services[svc] = svHealth
-			}(node)
+			}(node, svc, addr)
 		}
 	}
 	wg.Wait()
 
-	for svc, svHealth := range resp.Services {
-		if svHealth.N > 0 {
-			svHealth.Health = "OK"
-		} else {
-			svHealth.Health = "ERROR"
+	// Report ERROR if a needed service didn't fail any checks
+	// merely because it isn't configured to run anywhere.
+	for _, sh := range resp.Services {
+		if sh.Health != "OK" {
+			resp.Health = "ERROR"
+			break
 		}
-		resp.Services[svc] = svHealth
 	}
-
 	return resp
 }
 
@@ -153,7 +168,7 @@ func (agg *Aggregator) pingURL(node, addr string) (string, error) {
 	return "http://" + node + ":" + port + "/_health/ping", err
 }
 
-func (agg *Aggregator) ping(url string, cluster *arvados.Cluster) (result CheckResponse) {
+func (agg *Aggregator) ping(url string, cluster *arvados.Cluster) (result CheckResult) {
 	t0 := time.Now()
 
 	var err error
@@ -161,6 +176,8 @@ func (agg *Aggregator) ping(url string, cluster *arvados.Cluster) (result CheckR
 		result.ResponseTime = json.Number(fmt.Sprintf("%.6f", time.Since(t0).Seconds()))
 		if err != nil {
 			result.Health, result.Error = "ERROR", err.Error()
+		} else {
+			result.Health = "OK"
 		}
 	}()
 
@@ -170,26 +187,20 @@ func (agg *Aggregator) ping(url string, cluster *arvados.Cluster) (result CheckR
 	}
 	req.Header.Set("Authorization", "Bearer "+cluster.ManagementToken)
 
-	ctx, cancel := context.WithCancel(req.Context())
-	go func() {
-		select {
-		case <-time.After(time.Duration(agg.timeout)):
-			cancel()
-		case <-ctx.Done():
-		}
-	}()
+	ctx, cancel := context.WithTimeout(req.Context(), time.Duration(agg.timeout))
+	defer cancel()
 	req = req.WithContext(ctx)
 	resp, err := agg.httpClient.Do(req)
 	if err != nil {
 		return
 	}
-	result.Status = resp.StatusCode
-	err = json.NewDecoder(resp.Body).Decode(&result)
+	result.HTTPStatusCode = resp.StatusCode
+	result.HTTPStatusText = resp.Status
+	err = json.NewDecoder(resp.Body).Decode(&result.Response)
 	if err != nil {
 		err = fmt.Errorf("cannot decode response: %s", err)
 		return
-	}
-	if resp.StatusCode != http.StatusOK {
+	} else if resp.StatusCode != http.StatusOK {
 		err = fmt.Errorf("HTTP %d %s", resp.StatusCode, resp.Status)
 		return
 	}

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list