[arvados] created: 2.6.0-537-gc03ce6b414
git repository hosting
git at public.arvados.org
Mon Aug 28 14:31:05 UTC 2023
at c03ce6b41430afbe6afea76c9448f6895fd18781 (commit)
commit c03ce6b41430afbe6afea76c9448f6895fd18781
Author: Tom Clegg <tom at curii.com>
Date: Mon Aug 28 10:23:34 2023 -0400
20612: Run diagnostics from inside the test container too.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/diagnostics/cmd.go b/lib/diagnostics/cmd.go
index 8d89b84d37..aade1086fc 100644
--- a/lib/diagnostics/cmd.go
+++ b/lib/diagnostics/cmd.go
@@ -8,6 +8,7 @@ import (
"archive/tar"
"bytes"
"context"
+ "crypto/sha256"
_ "embed"
"flag"
"fmt"
@@ -17,6 +18,7 @@ import (
"net/http"
"net/url"
"os"
+ "os/exec"
"strings"
"time"
@@ -33,9 +35,10 @@ type Command struct{}
func (Command) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
var diag diagnoser
f := flag.NewFlagSet(prog, flag.ContinueOnError)
- f.StringVar(&diag.projectName, "project-name", "scratch area for diagnostics", "name of project to find/create in home project and use for temporary/test objects")
- f.StringVar(&diag.logLevel, "log-level", "info", "logging level (debug, info, warning, error)")
- f.StringVar(&diag.dockerImage, "docker-image", "", "image to use when running a test container (default: use embedded hello-world image)")
+ f.StringVar(&diag.projectName, "project-name", "scratch area for diagnostics", "`name` of project to find/create in home project and use for temporary/test objects")
+ f.StringVar(&diag.logLevel, "log-level", "info", "logging `level` (debug, info, warning, error)")
+ f.StringVar(&diag.dockerImage, "docker-image", "", "`image` (tag or portable data hash) to use when running a test container, or \"hello-world\" to use embedded hello-world image (default: build a custom image containing this executable, and run diagnostics inside the container too)")
+ f.StringVar(&diag.dockerImageFrom, "docker-image-from", "debian:stable-slim", "`base` image to use when building a custom image (use a debian-based image similar this host's OS for best results)")
f.BoolVar(&diag.checkInternal, "internal-client", false, "check that this host is considered an \"internal\" client")
f.BoolVar(&diag.checkExternal, "external-client", false, "check that this host is considered an \"external\" client")
f.BoolVar(&diag.verbose, "v", false, "verbose: include more information in report")
@@ -44,6 +47,8 @@ func (Command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
if ok, code := cmd.ParseFlags(f, prog, args, "", stderr); !ok {
return code
}
+ diag.stdout = stdout
+ diag.stderr = stderr
diag.logger = ctxlog.New(stdout, "text", diag.logLevel)
diag.logger.SetFormatter(&logrus.TextFormatter{DisableTimestamp: true, DisableLevelTruncation: true, PadLevelText: true})
diag.runtests()
@@ -67,19 +72,20 @@ func (Command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
var HelloWorldDockerImage []byte
type diagnoser struct {
- stdout io.Writer
- stderr io.Writer
- logLevel string
- priority int
- projectName string
- dockerImage string
- checkInternal bool
- checkExternal bool
- verbose bool
- timeout time.Duration
- logger *logrus.Logger
- errors []string
- done map[int]bool
+ stdout io.Writer
+ stderr io.Writer
+ logLevel string
+ priority int
+ projectName string
+ dockerImage string
+ dockerImageFrom string
+ checkInternal bool
+ checkExternal bool
+ verbose bool
+ timeout time.Duration
+ logger *logrus.Logger
+ errors []string
+ done map[int]bool
}
func (diag *diagnoser) debugf(f string, args ...interface{}) {
@@ -444,18 +450,76 @@ func (diag *diagnoser) runtests() {
}()
}
- // Read hello-world.tar to find image ID, so we can upload it
- // as "sha256:{...}.tar"
+ tempdir, err := ioutil.TempDir("", "arvados-diagnostics")
+ if err != nil {
+ diag.errorf("error creating temp dir: %s", err)
+ return
+ }
+ defer os.RemoveAll(tempdir)
+
+ var dockerImageData []byte
+ if diag.dockerImage != "" || diag.priority < 1 {
+ // We won't be using the self-built docker image, so
+ // don't build it. But we will write the embedded
+ // "hello-world" image to our test collection to test
+ // upload/download, whether or not we're using it as a
+ // docker image.
+ dockerImageData = HelloWorldDockerImage
+ } else if selfbin, err := os.Readlink("/proc/self/exe"); err != nil {
+ diag.errorf("readlink /proc/self/exe: %s", err)
+ return
+ } else if selfbindata, err := os.ReadFile(selfbin); err != nil {
+ diag.errorf("error reading %s: %s", selfbin, err)
+ return
+ } else {
+ selfbinSha := fmt.Sprintf("%x", sha256.Sum256(selfbindata))
+ tag := "arvados-client-diagnostics:" + selfbinSha[:9]
+ err := os.WriteFile(tempdir+"/arvados-client", selfbindata, 0777)
+ if err != nil {
+ diag.errorf("error writing %s: %s", tempdir+"/arvados-client", err)
+ return
+ }
+
+ dockerfile := "FROM " + diag.dockerImageFrom + "\n"
+ dockerfile += "RUN apt-get update && apt-get install --yes --no-install-recommends libfuse2 ca-certificates && apt-get clean\n"
+ dockerfile += "COPY /arvados-client /arvados-client\n"
+ cmd := exec.Command("docker", "build", "--tag", tag, "-f", "-", tempdir)
+ cmd.Stdin = strings.NewReader(dockerfile)
+ cmd.Stdout = diag.stderr
+ cmd.Stderr = diag.stderr
+ err = cmd.Run()
+ if err != nil {
+ diag.errorf("error building docker image: %s", err)
+ return
+ }
+ checkversion, err := exec.Command("docker", "run", tag, "/arvados-client", "version").CombinedOutput()
+ if err != nil {
+ diag.errorf("docker image does not seem to work: %s", err)
+ return
+ }
+ diag.infof("arvados-client version: %s", checkversion)
+
+ buf, err := exec.Command("docker", "save", tag).Output()
+ if err != nil {
+ diag.errorf("docker save %s: %s", tag, err)
+ return
+ }
+ diag.infof("docker image size is %d", len(buf))
+ dockerImageData = buf
+ }
+
+ // Read image tarball to find image ID, so we can upload it as
+ // "sha256:{...}.tar"
var imageSHA2 string
{
- tr := tar.NewReader(bytes.NewReader(HelloWorldDockerImage))
+ tr := tar.NewReader(bytes.NewReader(dockerImageData))
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
- diag.errorf("internal error/bug: cannot read embedded docker image tar file: %s", err)
+ diag.errorf("internal error/bug: cannot read docker image tar file: %s", err)
return
}
if s := strings.TrimSuffix(hdr.Name, ".json"); len(s) == 64 && s != hdr.Name {
@@ -463,19 +527,26 @@ func (diag *diagnoser) runtests() {
}
}
if imageSHA2 == "" {
- diag.errorf("internal error/bug: cannot find {sha256}.json file in embedded docker image tar file")
+ diag.errorf("internal error/bug: cannot find {sha256}.json file in docker image tar file")
return
}
}
tarfilename := "sha256:" + imageSHA2 + ".tar"
diag.dotest(100, "uploading file via webdav", func() error {
- ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
+ timeout := diag.timeout
+ if len(dockerImageData) > 10<<20 && timeout < time.Minute {
+ // Extend the normal http timeout if we're
+ // uploading a substantial docker image.
+ timeout = time.Minute
+ }
+ ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(timeout))
defer cancel()
if collection.UUID == "" {
return fmt.Errorf("skipping, no test collection")
}
- req, err := http.NewRequestWithContext(ctx, "PUT", cluster.Services.WebDAVDownload.ExternalURL.String()+"c="+collection.UUID+"/"+tarfilename, bytes.NewReader(HelloWorldDockerImage))
+ t0 := time.Now()
+ req, err := http.NewRequestWithContext(ctx, "PUT", cluster.Services.WebDAVDownload.ExternalURL.String()+"c="+collection.UUID+"/"+tarfilename, bytes.NewReader(dockerImageData))
if err != nil {
return fmt.Errorf("BUG? http.NewRequest: %s", err)
}
@@ -488,12 +559,12 @@ func (diag *diagnoser) runtests() {
if resp.StatusCode != http.StatusCreated {
return fmt.Errorf("status %s", resp.Status)
}
- diag.debugf("ok, status %s", resp.Status)
+ diag.verbosef("upload ok, status %s, %f MB/s", resp.Status, float64(len(dockerImageData))/time.Since(t0).Seconds()/1000000)
err = client.RequestAndDecodeContext(ctx, &collection, "GET", "arvados/v1/collections/"+collection.UUID, nil, nil)
if err != nil {
return fmt.Errorf("get updated collection: %s", err)
}
- diag.debugf("ok, pdh %s", collection.PortableDataHash)
+ diag.verbosef("upload pdh %s", collection.PortableDataHash)
return nil
})
@@ -549,7 +620,7 @@ func (diag *diagnoser) runtests() {
if resp.StatusCode != trial.status {
return fmt.Errorf("unexpected response status: %s", resp.Status)
}
- if trial.status == http.StatusOK && !bytes.Equal(body, HelloWorldDockerImage) {
+ if trial.status == http.StatusOK && !bytes.Equal(body, dockerImageData) {
excerpt := body
if len(excerpt) > 128 {
excerpt = append([]byte(nil), body[:128]...)
@@ -662,13 +733,26 @@ func (diag *diagnoser) runtests() {
}
timestamp := time.Now().Format(time.RFC3339)
- ctrCommand := []string{"echo", timestamp}
- if diag.dockerImage == "" {
+
+ var ctrCommand []string
+ switch diag.dockerImage {
+ case "":
+ if collection.UUID == "" {
+ return fmt.Errorf("skipping, no test collection to use as docker image")
+ }
+ diag.dockerImage = collection.PortableDataHash
+ ctrCommand = []string{"/arvados-client", "diagnostics",
+ "-priority=0", // don't run a container
+ "-log-level=" + diag.logLevel,
+ "-internal-client=true"}
+ case "hello-world":
if collection.UUID == "" {
return fmt.Errorf("skipping, no test collection to use as docker image")
}
diag.dockerImage = collection.PortableDataHash
ctrCommand = []string{"/hello"}
+ default:
+ ctrCommand = []string{"echo", timestamp}
}
var cr arvados.ContainerRequest
@@ -692,15 +776,16 @@ func (diag *diagnoser) runtests() {
},
},
"runtime_constraints": arvados.RuntimeConstraints{
+ API: true,
VCPUs: 1,
- RAM: 1 << 26,
- KeepCacheRAM: 1 << 26,
+ RAM: 128 << 20,
+ KeepCacheRAM: 64 << 20,
},
}})
if err != nil {
return err
}
- diag.verbosef("container request uuid = %s", cr.UUID)
+ diag.infof("container request uuid = %s", cr.UUID)
diag.verbosef("container uuid = %s", cr.ContainerUUID)
timeout := 10 * time.Minute
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list