[ARVADOS] created: 4be40a40a509d1eaad11c442d137f5ecdeb45e25

git at public.curoverse.com git at public.curoverse.com
Fri Oct 10 17:16:24 EDT 2014


        at  4be40a40a509d1eaad11c442d137f5ecdeb45e25 (commit)


commit 4be40a40a509d1eaad11c442d137f5ecdeb45e25
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Oct 10 17:15:51 2014 -0400

    3826: Print cumulative net io as well as interval io.

diff --git a/services/crunchstat/crunchstat.go b/services/crunchstat/crunchstat.go
index e8dc915..d47b039 100644
--- a/services/crunchstat/crunchstat.go
+++ b/services/crunchstat/crunchstat.go
@@ -149,10 +149,10 @@ func DoNetworkStats(stderr chan<- string, cgroup Cgroup, lastStat map[string]Net
 		nextSample.txBytes = tx
 		nextSample.rxBytes = rx
 		if lastSample, ok := lastStat[ifName]; ok {
-			stderr <- fmt.Sprintf("crunchstat: task net %s tx %d rx %d interval %.4f",
+			stderr <- fmt.Sprintf("crunchstat: net %s tx %d +%d rx %d +%d interval %.4f",
 				ifName,
-				nextSample.txBytes - lastSample.txBytes,
-				nextSample.rxBytes - lastSample.rxBytes,
+				tx, tx - lastSample.txBytes,
+				rx, rx - lastSample.rxBytes,
 				nextSample.sampleTime.Sub(lastSample.sampleTime).Seconds())
 		}
 		lastStat[ifName] = nextSample

commit 3e4c6eab6c4ed13071fd665aaec8a626a180e6f5
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Oct 10 17:05:38 2014 -0400

    3826: Use /proc/PID/net/dev to get container net stats.

diff --git a/services/crunchstat/crunchstat.go b/services/crunchstat/crunchstat.go
index cc7acf2..e8dc915 100644
--- a/services/crunchstat/crunchstat.go
+++ b/services/crunchstat/crunchstat.go
@@ -3,6 +3,7 @@ package main
 import (
 	"bufio"
 	"flag"
+	"errors"
 	"fmt"
 	"io"
 	"io/ioutil"
@@ -10,7 +11,6 @@ import (
 	"os"
 	"os/exec"
 	"os/signal"
-	"path/filepath"
 	"strings"
 	"syscall"
 	"time"
@@ -63,7 +63,7 @@ func OpenAndReadAll(filename string, log_chan chan<- string) ([]byte, error) {
 	}
 }
 
-func FindStat(stderr chan<- string, cgroup Cgroup, statgroup string, stat string) string {
+func FindStat(stderr chan<- string, cgroup Cgroup, statgroup string, stat string, verbose bool) string {
 	var path string
 	path = fmt.Sprintf("%s/%s/%s/%s/%s", cgroup.root, statgroup, cgroup.parent, cgroup.cid, stat)
 	if _, err := os.Stat(path); err != nil {
@@ -79,42 +79,32 @@ func FindStat(stderr chan<- string, cgroup Cgroup, statgroup string, stat string
 		stderr <- fmt.Sprintf("crunchstat: did not find stats file (root %s, parent %s, cid %s, statgroup %s, stat %s)", cgroup.root, cgroup.parent, cgroup.cid, statgroup, stat)
 		return ""
 	}
-	stderr <- fmt.Sprintf("crunchstat: reading stats from %s", path)
+	if verbose {
+		stderr <- fmt.Sprintf("crunchstat: reading stats from %s", path)
+	}
 	return path
 }
 
-func SetNetworkNamespace(stderr chan<- string, procsFilename string) (string) {
-	// Not supported yet -- we'll just report host-wide network stats.
-	return "host"
-
-	if procsFilename == "" { return "host" }
+func GetContainerNetStats(stderr chan<- string, cgroup Cgroup) (io.Reader, error) {
+	procsFilename := FindStat(stderr, cgroup, "cpuacct", "cgroup.procs", false)
 	procsFile, err := os.Open(procsFilename)
 	if err != nil {
 		stderr <- fmt.Sprintf("crunchstat: open %s: %s", procsFilename, err)
-		return "host"
+		return nil, err
 	}
 	defer procsFile.Close()
 	reader := bufio.NewScanner(procsFile)
 	for reader.Scan() {
 		taskPid := reader.Text()
-		netnsFilename := fmt.Sprintf("/proc/%s/ns/net", taskPid)
-		netnsFile, err := os.Open(netnsFilename)
+		statsFilename := fmt.Sprintf("/proc/%s/net/dev", taskPid)
+		stats, err := OpenAndReadAll(statsFilename, stderr)
 		if err != nil {
-			stderr <- fmt.Sprintf("crunchstat: open %s: %s", netnsFilename, err)
+			stderr <- fmt.Sprintf("crunchstat: open %s: %s", statsFilename, err)
 			continue
 		}
-		defer netnsFile.Close()
-
-		// syscall.Setns() doesn't exist yet, and doesn't work
-		// from a multithreaded program yet.
-		//
-		// if _, err2 := syscall.Setns(netnsFile.Fd()); err != nil {
-		// 	stderr <- fmt.Sprintf("crunchstat: Setns: %s", err2)
-		// 	continue
-		// }
-		return "task"
+		return strings.NewReader(string(stats)), nil
 	}
-	return "host"
+	return nil, errors.New("Could not read stats for any proc in container")
 }
 
 type NetSample struct {
@@ -123,31 +113,43 @@ type NetSample struct {
 	rxBytes    int64
 }
 
-func DoNetworkStats(stderr chan<- string, procsFilename string, lastStat map[string]NetSample) (map[string]NetSample) {
-	statScope := SetNetworkNamespace(stderr, procsFilename)
+func DoNetworkStats(stderr chan<- string, cgroup Cgroup, lastStat map[string]NetSample) (map[string]NetSample) {
+	stats, err := GetContainerNetStats(stderr, cgroup)
+	if err != nil { return lastStat }
 
-	ifDirs, err := filepath.Glob("/sys/class/net/*")
-	if err != nil {
-		stderr <- fmt.Sprintf("crunchstat: could not list interfaces", err)
-		return lastStat
-	}
 	if lastStat == nil {
 		lastStat = make(map[string]NetSample)
 	}
-	for _, ifDir := range ifDirs {
-		ifName := filepath.Base(ifDir)
-		tx_s, tx_err := OpenAndReadAll(fmt.Sprintf("/sys/class/net/%s/statistics/tx_bytes", ifName), stderr)
-		rx_s, rx_err := OpenAndReadAll(fmt.Sprintf("/sys/class/net/%s/statistics/rx_bytes", ifName), stderr)
-		if rx_err != nil || tx_err != nil {
-			return nil
+	scanner := bufio.NewScanner(stats)
+	Iface: for scanner.Scan() {
+		var ifName string
+		var rx, tx int64
+		words := bufio.NewScanner(strings.NewReader(scanner.Text()))
+		words.Split(bufio.ScanWords)
+		wordIndex := 0
+		for words.Scan() {
+			word := words.Text()
+			switch wordIndex {
+			case 0:
+				ifName = strings.TrimRight(word, ":")
+			case 1:
+				if _, err := fmt.Sscanf(word, "%d", &rx); err != nil {
+					continue Iface
+				}
+			case 9:
+				if _, err := fmt.Sscanf(word, "%d", &tx); err != nil {
+					continue Iface
+				}
+			}
+			wordIndex++
 		}
+		if ifName == "lo" || ifName == "" { continue }
 		nextSample := NetSample{}
 		nextSample.sampleTime = time.Now()
-		fmt.Sscanf(string(tx_s), "%d", &nextSample.txBytes)
-		fmt.Sscanf(string(rx_s), "%d", &nextSample.rxBytes)
+		nextSample.txBytes = tx
+		nextSample.rxBytes = rx
 		if lastSample, ok := lastStat[ifName]; ok {
-			stderr <- fmt.Sprintf("crunchstat: %s net %s tx %d rx %d interval %.4f",
-				statScope,
+			stderr <- fmt.Sprintf("crunchstat: task net %s tx %d rx %d interval %.4f",
 				ifName,
 				nextSample.txBytes - lastSample.txBytes,
 				nextSample.rxBytes - lastSample.rxBytes,
@@ -174,12 +176,11 @@ func PollCgroupStats(cgroup Cgroup, stderr chan string, poll int64, stop_poll_ch
 
 	user_hz := float64(C.sysconf(C._SC_CLK_TCK))
 
-	cpuacct_stat := FindStat(stderr, cgroup, "cpuacct", "cpuacct.stat")
-	blkio_io_service_bytes := FindStat(stderr, cgroup, "blkio", "blkio.io_service_bytes")
-	cpuset_cpus := FindStat(stderr, cgroup, "cpuset", "cpuset.cpus")
-	memory_stat := FindStat(stderr, cgroup, "memory", "memory.stat")
-	procs := FindStat(stderr, cgroup, "cpuacct", "cgroup.procs")
-	lastNetStat := DoNetworkStats(stderr, procs, nil)
+	cpuacct_stat := FindStat(stderr, cgroup, "cpuacct", "cpuacct.stat", true)
+	blkio_io_service_bytes := FindStat(stderr, cgroup, "blkio", "blkio.io_service_bytes", true)
+	cpuset_cpus := FindStat(stderr, cgroup, "cpuset", "cpuset.cpus", true)
+	memory_stat := FindStat(stderr, cgroup, "memory", "memory.stat", true)
+	lastNetStat := DoNetworkStats(stderr, cgroup, nil)
 
 	poll_chan := make(chan bool, 1)
 	go func() {
@@ -309,7 +310,7 @@ func PollCgroupStats(cgroup Cgroup, stderr chan string, poll int64, stop_poll_ch
 			c.Close()
 		}
 
-		lastNetStat = DoNetworkStats(stderr, procs, lastNetStat)
+		lastNetStat = DoNetworkStats(stderr, cgroup, lastNetStat)
 	}
 }
 

commit 7b2a3050d98153329e7a51c3b4fdfdde0859a555
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Oct 10 16:12:16 2014 -0400

    3826: Clean up cgroup id strings. Output stats for all interfaces.

diff --git a/services/crunchstat/crunchstat.go b/services/crunchstat/crunchstat.go
index 0d27906..cc7acf2 100644
--- a/services/crunchstat/crunchstat.go
+++ b/services/crunchstat/crunchstat.go
@@ -10,6 +10,7 @@ import (
 	"os"
 	"os/exec"
 	"os/signal"
+	"path/filepath"
 	"strings"
 	"syscall"
 	"time"
@@ -24,6 +25,12 @@ import (
 import "C"
 // The above block of magic allows us to look up user_hz via _SC_CLK_TCK.
 
+type Cgroup struct {
+	root   string
+	parent string
+	cid    string
+}
+
 func CopyPipeToChan(in io.Reader, out chan string, done chan<- bool) {
 	s := bufio.NewScanner(in)
 	for s.Scan() {
@@ -56,20 +63,20 @@ func OpenAndReadAll(filename string, log_chan chan<- string) ([]byte, error) {
 	}
 }
 
-func FindStat(stderr chan<- string, cgroup_root string, cgroup_parent string, container_id string, statgroup string, stat string) string {
+func FindStat(stderr chan<- string, cgroup Cgroup, statgroup string, stat string) string {
 	var path string
-	path = fmt.Sprintf("%s/%s/%s/%s/%s", cgroup_root, statgroup, cgroup_parent, container_id, stat)
+	path = fmt.Sprintf("%s/%s/%s/%s/%s", cgroup.root, statgroup, cgroup.parent, cgroup.cid, stat)
 	if _, err := os.Stat(path); err != nil {
-		path = fmt.Sprintf("%s/%s/%s/%s", cgroup_root, cgroup_parent, container_id, stat)
+		path = fmt.Sprintf("%s/%s/%s/%s", cgroup.root, cgroup.parent, cgroup.cid, stat)
 	}
 	if _, err := os.Stat(path); err != nil {
-		path = fmt.Sprintf("%s/%s/%s", cgroup_root, statgroup, stat)
+		path = fmt.Sprintf("%s/%s/%s", cgroup.root, statgroup, stat)
 	}
 	if _, err := os.Stat(path); err != nil {
-		path = fmt.Sprintf("%s/%s", cgroup_root, stat)
+		path = fmt.Sprintf("%s/%s", cgroup.root, stat)
 	}
 	if _, err := os.Stat(path); err != nil {
-		stderr <- fmt.Sprintf("crunchstat: did not find stats file (root %s, parent %s, cid %s, statgroup %s, stat %s)", cgroup_root, cgroup_parent, container_id, statgroup, stat)
+		stderr <- fmt.Sprintf("crunchstat: did not find stats file (root %s, parent %s, cid %s, statgroup %s, stat %s)", cgroup.root, cgroup.parent, cgroup.cid, statgroup, stat)
 		return ""
 	}
 	stderr <- fmt.Sprintf("crunchstat: reading stats from %s", path)
@@ -110,40 +117,48 @@ func SetNetworkNamespace(stderr chan<- string, procsFilename string) (string) {
 	return "host"
 }
 
-type NetStat struct {
-	tx_bytes int64
-	rx_bytes int64
+type NetSample struct {
+	sampleTime time.Time
+	txBytes    int64
+	rxBytes    int64
 }
-func DoNetworkStats(stderr chan<- string, procsFilename string, lastStat *NetStat, elapsed float64) (*NetStat) {
+
+func DoNetworkStats(stderr chan<- string, procsFilename string, lastStat map[string]NetSample) (map[string]NetSample) {
 	statScope := SetNetworkNamespace(stderr, procsFilename)
 
-	ifName := "eth0"
-	tx_s, tx_err := OpenAndReadAll(fmt.Sprintf("/sys/class/net/%s/statistics/tx_bytes", ifName), stderr)
-	rx_s, rx_err := OpenAndReadAll(fmt.Sprintf("/sys/class/net/%s/statistics/rx_bytes", ifName), stderr)
-	if rx_err != nil || tx_err != nil {
-		return nil
+	ifDirs, err := filepath.Glob("/sys/class/net/*")
+	if err != nil {
+		stderr <- fmt.Sprintf("crunchstat: could not list interfaces", err)
+		return lastStat
 	}
-	nextStat := new(NetStat)
-	fmt.Sscanf(string(tx_s), "%d", &nextStat.tx_bytes)
-	fmt.Sscanf(string(rx_s), "%d", &nextStat.rx_bytes)
-	if lastStat != nil {
-		stderr <- fmt.Sprintf("crunchstat: %s net %s tx %d rx %d interval %.4f",
-			statScope,
-			ifName,
-			nextStat.tx_bytes - lastStat.tx_bytes,
-			nextStat.rx_bytes - lastStat.rx_bytes,
-			elapsed)
+	if lastStat == nil {
+		lastStat = make(map[string]NetSample)
 	}
-	return nextStat
-}
-
-type Cgroup struct {
-	cgroup_root   string
-	cgroup_parent string
-	container_id  string
+	for _, ifDir := range ifDirs {
+		ifName := filepath.Base(ifDir)
+		tx_s, tx_err := OpenAndReadAll(fmt.Sprintf("/sys/class/net/%s/statistics/tx_bytes", ifName), stderr)
+		rx_s, rx_err := OpenAndReadAll(fmt.Sprintf("/sys/class/net/%s/statistics/rx_bytes", ifName), stderr)
+		if rx_err != nil || tx_err != nil {
+			return nil
+		}
+		nextSample := NetSample{}
+		nextSample.sampleTime = time.Now()
+		fmt.Sscanf(string(tx_s), "%d", &nextSample.txBytes)
+		fmt.Sscanf(string(rx_s), "%d", &nextSample.rxBytes)
+		if lastSample, ok := lastStat[ifName]; ok {
+			stderr <- fmt.Sprintf("crunchstat: %s net %s tx %d rx %d interval %.4f",
+				statScope,
+				ifName,
+				nextSample.txBytes - lastSample.txBytes,
+				nextSample.rxBytes - lastSample.rxBytes,
+				nextSample.sampleTime.Sub(lastSample.sampleTime).Seconds())
+		}
+		lastStat[ifName] = nextSample
+	}
+	return lastStat
 }
 
-func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id string, stderr chan string, poll int64, stop_poll_chan <-chan bool) {
+func PollCgroupStats(cgroup Cgroup, stderr chan string, poll int64, stop_poll_chan <-chan bool) {
 	var last_user int64 = -1
 	var last_sys int64 = -1
 	var last_cpucount int64 = 0
@@ -159,12 +174,12 @@ func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id stri
 
 	user_hz := float64(C.sysconf(C._SC_CLK_TCK))
 
-	cpuacct_stat := FindStat(stderr, cgroup_root, cgroup_parent, container_id, "cpuacct", "cpuacct.stat")
-	blkio_io_service_bytes := FindStat(stderr, cgroup_root, cgroup_parent, container_id, "blkio", "blkio.io_service_bytes")
-	cpuset_cpus := FindStat(stderr, cgroup_root, cgroup_parent, container_id, "cpuset", "cpuset.cpus")
-	memory_stat := FindStat(stderr, cgroup_root, cgroup_parent, container_id, "memory", "memory.stat")
-	procs := FindStat(stderr, cgroup_root, cgroup_parent, container_id, "cpuacct", "cgroup.procs")
-	lastNetStat := DoNetworkStats(stderr, procs, nil, 0)
+	cpuacct_stat := FindStat(stderr, cgroup, "cpuacct", "cpuacct.stat")
+	blkio_io_service_bytes := FindStat(stderr, cgroup, "blkio", "blkio.io_service_bytes")
+	cpuset_cpus := FindStat(stderr, cgroup, "cpuset", "cpuset.cpus")
+	memory_stat := FindStat(stderr, cgroup, "memory", "memory.stat")
+	procs := FindStat(stderr, cgroup, "cpuacct", "cgroup.procs")
+	lastNetStat := DoNetworkStats(stderr, procs, nil)
 
 	poll_chan := make(chan bool, 1)
 	go func() {
@@ -294,7 +309,7 @@ func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id stri
 			c.Close()
 		}
 
-		lastNetStat = DoNetworkStats(stderr, procs, lastNetStat, elapsed)
+		lastNetStat = DoNetworkStats(stderr, procs, lastNetStat)
 	}
 }
 
@@ -390,7 +405,8 @@ func run(logger *log.Logger) error {
 	}
 
 	stop_poll_chan := make(chan bool, 1)
-	go PollCgroupStats(cgroup_root, cgroup_parent, container_id, stderr_chan, poll, stop_poll_chan)
+	cgroup := Cgroup{cgroup_root, cgroup_parent, container_id}
+	go PollCgroupStats(cgroup, stderr_chan, poll, stop_poll_chan)
 
 	// When the child exits, tell the polling goroutine to stop.
 	defer func() { stop_poll_chan <- true }()

commit 1e43af7e01b61945d94ef26c25a1a14b231d605b
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Oct 10 15:12:18 2014 -0400

    3826: Add host-level network IO monitoring, and placeholder for task-level.

diff --git a/services/crunchstat/crunchstat.go b/services/crunchstat/crunchstat.go
index 9175cf0..0d27906 100644
--- a/services/crunchstat/crunchstat.go
+++ b/services/crunchstat/crunchstat.go
@@ -56,25 +56,91 @@ func OpenAndReadAll(filename string, log_chan chan<- string) ([]byte, error) {
 	}
 }
 
-func FindStat(cgroup_root string, cgroup_parent string, container_id string, statgroup string, stat string) string {
+func FindStat(stderr chan<- string, cgroup_root string, cgroup_parent string, container_id string, statgroup string, stat string) string {
 	var path string
-	path = fmt.Sprintf("%s/%s/%s/%s/%s.%s", cgroup_root, statgroup, cgroup_parent, container_id, statgroup, stat)
-	if _, err := os.Stat(path); err == nil {
-		return path
+	path = fmt.Sprintf("%s/%s/%s/%s/%s", cgroup_root, statgroup, cgroup_parent, container_id, stat)
+	if _, err := os.Stat(path); err != nil {
+		path = fmt.Sprintf("%s/%s/%s/%s", cgroup_root, cgroup_parent, container_id, stat)
 	}
-	path = fmt.Sprintf("%s/%s/%s/%s.%s", cgroup_root, cgroup_parent, container_id, statgroup, stat)
-	if _, err := os.Stat(path); err == nil {
-		return path
+	if _, err := os.Stat(path); err != nil {
+		path = fmt.Sprintf("%s/%s/%s", cgroup_root, statgroup, stat)
 	}
-	path = fmt.Sprintf("%s/%s/%s.%s", cgroup_root, statgroup, statgroup, stat)
-	if _, err := os.Stat(path); err == nil {
-		return path
+	if _, err := os.Stat(path); err != nil {
+		path = fmt.Sprintf("%s/%s", cgroup_root, stat)
 	}
-	path = fmt.Sprintf("%s/%s.%s", cgroup_root, statgroup, stat)
-	if _, err := os.Stat(path); err == nil {
-		return path
+	if _, err := os.Stat(path); err != nil {
+		stderr <- fmt.Sprintf("crunchstat: did not find stats file (root %s, parent %s, cid %s, statgroup %s, stat %s)", cgroup_root, cgroup_parent, container_id, statgroup, stat)
+		return ""
 	}
-	return ""
+	stderr <- fmt.Sprintf("crunchstat: reading stats from %s", path)
+	return path
+}
+
+func SetNetworkNamespace(stderr chan<- string, procsFilename string) (string) {
+	// Not supported yet -- we'll just report host-wide network stats.
+	return "host"
+
+	if procsFilename == "" { return "host" }
+	procsFile, err := os.Open(procsFilename)
+	if err != nil {
+		stderr <- fmt.Sprintf("crunchstat: open %s: %s", procsFilename, err)
+		return "host"
+	}
+	defer procsFile.Close()
+	reader := bufio.NewScanner(procsFile)
+	for reader.Scan() {
+		taskPid := reader.Text()
+		netnsFilename := fmt.Sprintf("/proc/%s/ns/net", taskPid)
+		netnsFile, err := os.Open(netnsFilename)
+		if err != nil {
+			stderr <- fmt.Sprintf("crunchstat: open %s: %s", netnsFilename, err)
+			continue
+		}
+		defer netnsFile.Close()
+
+		// syscall.Setns() doesn't exist yet, and doesn't work
+		// from a multithreaded program yet.
+		//
+		// if _, err2 := syscall.Setns(netnsFile.Fd()); err != nil {
+		// 	stderr <- fmt.Sprintf("crunchstat: Setns: %s", err2)
+		// 	continue
+		// }
+		return "task"
+	}
+	return "host"
+}
+
+type NetStat struct {
+	tx_bytes int64
+	rx_bytes int64
+}
+func DoNetworkStats(stderr chan<- string, procsFilename string, lastStat *NetStat, elapsed float64) (*NetStat) {
+	statScope := SetNetworkNamespace(stderr, procsFilename)
+
+	ifName := "eth0"
+	tx_s, tx_err := OpenAndReadAll(fmt.Sprintf("/sys/class/net/%s/statistics/tx_bytes", ifName), stderr)
+	rx_s, rx_err := OpenAndReadAll(fmt.Sprintf("/sys/class/net/%s/statistics/rx_bytes", ifName), stderr)
+	if rx_err != nil || tx_err != nil {
+		return nil
+	}
+	nextStat := new(NetStat)
+	fmt.Sscanf(string(tx_s), "%d", &nextStat.tx_bytes)
+	fmt.Sscanf(string(rx_s), "%d", &nextStat.rx_bytes)
+	if lastStat != nil {
+		stderr <- fmt.Sprintf("crunchstat: %s net %s tx %d rx %d interval %.4f",
+			statScope,
+			ifName,
+			nextStat.tx_bytes - lastStat.tx_bytes,
+			nextStat.rx_bytes - lastStat.rx_bytes,
+			elapsed)
+	}
+	return nextStat
+}
+
+type Cgroup struct {
+	cgroup_root   string
+	cgroup_parent string
+	container_id  string
 }
 
 func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id string, stderr chan string, poll int64, stop_poll_chan <-chan bool) {
@@ -93,23 +159,12 @@ func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id stri
 
 	user_hz := float64(C.sysconf(C._SC_CLK_TCK))
 
-	cpuacct_stat := FindStat(cgroup_root, cgroup_parent, container_id, "cpuacct", "stat")
-	blkio_io_service_bytes := FindStat(cgroup_root, cgroup_parent, container_id, "blkio", "io_service_bytes")
-	cpuset_cpus := FindStat(cgroup_root, cgroup_parent, container_id, "cpuset", "cpus")
-	memory_stat := FindStat(cgroup_root, cgroup_parent, container_id, "memory", "stat")
-
-	if cpuacct_stat != "" {
-		stderr <- fmt.Sprintf("crunchstat: reading stats from %s", cpuacct_stat)
-	}
-	if blkio_io_service_bytes != "" {
-		stderr <- fmt.Sprintf("crunchstat: reading stats from %s", blkio_io_service_bytes)
-	}
-	if cpuset_cpus != "" {
-		stderr <- fmt.Sprintf("crunchstat: reading stats from %s", cpuset_cpus)
-	}
-	if memory_stat != "" {
-		stderr <- fmt.Sprintf("crunchstat: reading stats from %s", memory_stat)
-	}
+	cpuacct_stat := FindStat(stderr, cgroup_root, cgroup_parent, container_id, "cpuacct", "cpuacct.stat")
+	blkio_io_service_bytes := FindStat(stderr, cgroup_root, cgroup_parent, container_id, "blkio", "blkio.io_service_bytes")
+	cpuset_cpus := FindStat(stderr, cgroup_root, cgroup_parent, container_id, "cpuset", "cpuset.cpus")
+	memory_stat := FindStat(stderr, cgroup_root, cgroup_parent, container_id, "memory", "memory.stat")
+	procs := FindStat(stderr, cgroup_root, cgroup_parent, container_id, "cpuacct", "cgroup.procs")
+	lastNetStat := DoNetworkStats(stderr, procs, nil, 0)
 
 	poll_chan := make(chan bool, 1)
 	go func() {
@@ -238,6 +293,8 @@ func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id stri
 			}
 			c.Close()
 		}
+
+		lastNetStat = DoNetworkStats(stderr, procs, lastNetStat, elapsed)
 	}
 }
 

commit 3b0de2173e005deacc22d083a4aea23f9a648865
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Oct 10 15:09:27 2014 -0400

    3826: Fix up CPU usage accounting.

diff --git a/services/crunchstat/crunchstat.go b/services/crunchstat/crunchstat.go
index 856eaaf..9175cf0 100644
--- a/services/crunchstat/crunchstat.go
+++ b/services/crunchstat/crunchstat.go
@@ -15,6 +15,15 @@ import (
 	"time"
 )
 
+/*
+#include <unistd.h>
+#include <sys/types.h>
+#include <pwd.h>
+#include <stdlib.h>
+*/
+import "C"
+// The above block of magic allows us to look up user_hz via _SC_CLK_TCK.
+
 func CopyPipeToChan(in io.Reader, out chan string, done chan<- bool) {
 	s := bufio.NewScanner(in)
 	for s.Scan() {
@@ -82,6 +91,8 @@ func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id stri
 
 	disk := make(map[string]*Disk)
 
+	user_hz := float64(C.sysconf(C._SC_CLK_TCK))
+
 	cpuacct_stat := FindStat(cgroup_root, cgroup_parent, container_id, "cpuacct", "stat")
 	blkio_io_service_bytes := FindStat(cgroup_root, cgroup_parent, container_id, "blkio", "io_service_bytes")
 	cpuset_cpus := FindStat(cgroup_root, cgroup_parent, container_id, "cpuset", "cpus")
@@ -118,8 +129,7 @@ func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id stri
 			// Emit stats, then select again.
 		}
 		morning := time.Now()
-		elapsed := morning.Sub(bedtime).Nanoseconds() / int64(time.Millisecond)
-		var cpus int64 = 0
+		elapsed := morning.Sub(bedtime).Seconds()
 		if cpuset_cpus != "" {
 			b, err := OpenAndReadAll(cpuset_cpus, stderr)
 			if err != nil {
@@ -127,6 +137,7 @@ func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id stri
 				continue
 			}
 			sp := strings.Split(string(b), ",")
+			cpus := int64(0)
 			for _, v := range sp {
 				var min, max int64
 				n, _ := fmt.Sscanf(v, "%d-%d", &min, &max)
@@ -136,15 +147,8 @@ func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id stri
 					cpus += 1
 				}
 			}
-
-			if cpus != last_cpucount {
-				stderr <- fmt.Sprintf("crunchstat: cpuset.cpus %v", cpus)
-			}
 			last_cpucount = cpus
 		}
-		if cpus == 0 {
-			cpus = 1
-		}
 		if cpuacct_stat != "" {
 			b, err := OpenAndReadAll(cpuacct_stat, stderr)
 			if err != nil {
@@ -163,19 +167,18 @@ func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id stri
 			if elapsed > 0 && last_user != -1 {
 				user_diff := next_user - last_user
 				sys_diff := next_sys - last_sys
-				// Assume we're reading stats based on 100
-				// jiffies per second.  Because the elapsed
-				// time is in milliseconds, we need to boost
-				// that to 1000 jiffies per second, then boost
-				// it by another 100x to get a percentage, then
-				// finally divide by the actual elapsed time
-				// and the number of cpus to get average load
-				// over the polling period.
-				user_pct := (user_diff * 10 * 100) / (elapsed * cpus)
-				sys_pct := (sys_diff * 10 * 100) / (elapsed * cpus)
-
-				stderr <- fmt.Sprintf("crunchstat: cpuacct.stat user %v", user_pct)
-				stderr <- fmt.Sprintf("crunchstat: cpuacct.stat sys %v", sys_pct)
+				// {*_diff} == {1/user_hz}-second
+				// ticks of CPU core consumed in an
+				// {elapsed}-second interval.
+				//
+				// We report this as CPU core usage
+				// (i.e., 1.0 == one pegged core). We
+				// also report the number of cores
+				// (maximum possible usage).
+				user := float64(user_diff) / elapsed / user_hz
+				sys := float64(sys_diff) / elapsed / user_hz
+
+				stderr <- fmt.Sprintf("crunchstat: cpuacct.stat user %.4f sys %.4f cpus %d interval %.4f", user, sys, last_cpucount, elapsed)
 			}
 
 			last_user = next_user

commit e69a5874b880b9b591e533e2c5f4bc4c3e2ff5d1
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Oct 10 10:36:36 2014 -0400

    3826: Remove old cruft. Dry up OpenAndReadAll.

diff --git a/services/crunchstat/crunchstat.go b/services/crunchstat/crunchstat.go
index 387f647..856eaaf 100644
--- a/services/crunchstat/crunchstat.go
+++ b/services/crunchstat/crunchstat.go
@@ -29,6 +29,24 @@ func CopyChanToPipe(in <-chan string, out io.Writer) {
 	}
 }
 
+func OpenAndReadAll(filename string, log_chan chan<- string) ([]byte, error) {
+	in, err := os.Open(filename)
+	if err != nil {
+		if log_chan != nil {
+			log_chan <- fmt.Sprintf("open %s: %s", filename, err)
+		}
+		return nil, err
+	}
+	defer in.Close()
+	{
+		content, err := ioutil.ReadAll(in)
+		if err != nil && log_chan != nil {
+			log_chan <- fmt.Sprintf("read %s: %s", filename, err)
+		}
+		return content, err
+	}
+}
+
 func FindStat(cgroup_root string, cgroup_parent string, container_id string, statgroup string, stat string) string {
 	var path string
 	path = fmt.Sprintf("%s/%s/%s/%s/%s.%s", cgroup_root, statgroup, cgroup_parent, container_id, statgroup, stat)
@@ -51,7 +69,6 @@ func FindStat(cgroup_root string, cgroup_parent string, container_id string, sta
 }
 
 func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id string, stderr chan string, poll int64, stop_poll_chan <-chan bool) {
-	//var last_usage int64 = 0
 	var last_user int64 = -1
 	var last_sys int64 = -1
 	var last_cpucount int64 = 0
@@ -65,7 +82,6 @@ func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id stri
 
 	disk := make(map[string]*Disk)
 
-	//cpuacct_usage := FindStat(cgroup_path, "cpuacct", "usage")
 	cpuacct_stat := FindStat(cgroup_root, cgroup_parent, container_id, "cpuacct", "stat")
 	blkio_io_service_bytes := FindStat(cgroup_root, cgroup_parent, container_id, "blkio", "io_service_bytes")
 	cpuset_cpus := FindStat(cgroup_root, cgroup_parent, container_id, "cpuset", "cpus")
@@ -103,27 +119,13 @@ func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id stri
 		}
 		morning := time.Now()
 		elapsed := morning.Sub(bedtime).Nanoseconds() / int64(time.Millisecond)
-		/*{
-			c, _ := os.Open(cpuacct_usage)
-			b, _ := ioutil.ReadAll(c)
-			var next int64
-			fmt.Sscanf(string(b), "%d", &next)
-			if last_usage != 0 {
-				stderr <- fmt.Sprintf("crunchstat: cpuacct.usage %v", (next-last_usage)/10000000)
-			}
-			//fmt.Printf("usage %d %d %d %d%%\n", last_usage, next, next-last_usage, (next-last_usage)/10000000)
-			last_usage = next
-			c.Close()
-		}*/
 		var cpus int64 = 0
 		if cpuset_cpus != "" {
-			c, err := os.Open(cpuset_cpus)
+			b, err := OpenAndReadAll(cpuset_cpus, stderr)
 			if err != nil {
-				stderr <- fmt.Sprintf("open %s: %s", cpuset_cpus, err)
 				// cgroup probably gone -- skip other stats too.
 				continue
 			}
-			b, _ := ioutil.ReadAll(c)
 			sp := strings.Split(string(b), ",")
 			for _, v := range sp {
 				var min, max int64
@@ -139,16 +141,13 @@ func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id stri
 				stderr <- fmt.Sprintf("crunchstat: cpuset.cpus %v", cpus)
 			}
 			last_cpucount = cpus
-
-			c.Close()
 		}
 		if cpus == 0 {
 			cpus = 1
 		}
 		if cpuacct_stat != "" {
-			c, err := os.Open(cpuacct_stat)
+			b, err := OpenAndReadAll(cpuacct_stat, stderr)
 			if err != nil {
-				stderr <- fmt.Sprintf("open %s: %s", cpuacct_stat, err)
 				// Next time around, last_user would
 				// be >1 interval old, so stats will
 				// be incorrect. Start over instead.
@@ -157,11 +156,9 @@ func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id stri
 				// cgroup probably gone -- skip other stats too.
 				continue
 			}
-			b, _ := ioutil.ReadAll(c)
 			var next_user int64
 			var next_sys int64
 			fmt.Sscanf(string(b), "user %d\nsystem %d", &next_user, &next_sys)
-			c.Close()
 
 			if elapsed > 0 && last_user != -1 {
 				user_diff := next_user - last_user
@@ -181,9 +178,6 @@ func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id stri
 				stderr <- fmt.Sprintf("crunchstat: cpuacct.stat sys %v", sys_pct)
 			}
 
-			/*fmt.Printf("user %d %d %d%%\n", last_user, next_user, next_user-last_user)
-			fmt.Printf("sys %d %d %d%%\n", last_sys, next_sys, next_sys-last_sys)
-			fmt.Printf("sum %d%%\n", (next_user-last_user)+(next_sys-last_sys))*/
 			last_user = next_user
 			last_sys = next_sys
 		}
@@ -194,31 +188,32 @@ func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id stri
 				// cgroup probably gone -- skip other stats too.
 				continue
 			}
+			defer c.Close()
 			b := bufio.NewScanner(c)
 			var device, op string
 			var next int64
 			for b.Scan() {
-				if _, err := fmt.Sscanf(string(b.Text()), "%s %s %d", &device, &op, &next); err == nil {
-					if disk[device] == nil {
-						disk[device] = new(Disk)
-					}
-					if op == "Read" {
-						disk[device].last_read = disk[device].next_read
-						disk[device].next_read = next
-						if disk[device].last_read > 0 && (disk[device].next_read != disk[device].last_read) {
-							stderr <- fmt.Sprintf("crunchstat: blkio.io_service_bytes %s read %v", device, disk[device].next_read-disk[device].last_read)
-						}
+				if _, err := fmt.Sscanf(string(b.Text()), "%s %s %d", &device, &op, &next); err != nil {
+					continue
+				}
+				if disk[device] == nil {
+					disk[device] = new(Disk)
+				}
+				if op == "Read" {
+					disk[device].last_read = disk[device].next_read
+					disk[device].next_read = next
+					if disk[device].last_read > 0 && (disk[device].next_read != disk[device].last_read) {
+						stderr <- fmt.Sprintf("crunchstat: blkio.io_service_bytes %s read %v", device, disk[device].next_read-disk[device].last_read)
 					}
-					if op == "Write" {
-						disk[device].last_write = disk[device].next_write
-						disk[device].next_write = next
-						if disk[device].last_write > 0 && (disk[device].next_write != disk[device].last_write) {
-							stderr <- fmt.Sprintf("crunchstat: blkio.io_service_bytes %s write %v", device, disk[device].next_write-disk[device].last_write)
-						}
+				}
+				if op == "Write" {
+					disk[device].last_write = disk[device].next_write
+					disk[device].next_write = next
+					if disk[device].last_write > 0 && (disk[device].next_write != disk[device].last_write) {
+						stderr <- fmt.Sprintf("crunchstat: blkio.io_service_bytes %s write %v", device, disk[device].next_write-disk[device].last_write)
 					}
 				}
 			}
-			c.Close()
 		}
 
 		if memory_stat != "" {
@@ -321,15 +316,11 @@ func run(logger *log.Logger) error {
 		ok := false
 		var i time.Duration
 		for i = 0; i < time.Duration(wait)*time.Second; i += (100 * time.Millisecond) {
-			f, err := os.Open(cgroup_cidfile)
-			if err == nil {
-				defer f.Close()
-				cid, err2 := ioutil.ReadAll(f)
-				if err2 == nil && len(cid) > 0 {
-					ok = true
-					container_id = string(cid)
-					break
-				}
+			cid, err := OpenAndReadAll(cgroup_cidfile, nil)
+			if err == nil && len(cid) > 0 {
+				ok = true
+				container_id = string(cid)
+				break
 			}
 			time.Sleep(100 * time.Millisecond)
 		}

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list