[ARVADOS] created: 1.3.0-1772-gb10e46af8
Git user
git at public.curoverse.com
Mon Oct 21 19:41:18 UTC 2019
at b10e46af8258b7b74ae77c3e0933f74cb798dba6 (commit)
commit b10e46af8258b7b74ae77c3e0933f74cb798dba6
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Mon Oct 21 15:40:24 2019 -0400
15734: Save dispatcher's InstanceType record in container log.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/runner.go b/lib/dispatchcloud/worker/runner.go
index c30ff9f2b..114765ec1 100644
--- a/lib/dispatchcloud/worker/runner.go
+++ b/lib/dispatchcloud/worker/runner.go
@@ -11,7 +11,6 @@ import (
"syscall"
"time"
- "git.curoverse.com/arvados.git/sdk/go/arvados"
"github.com/sirupsen/logrus"
)
@@ -20,7 +19,7 @@ import (
type remoteRunner struct {
uuid string
executor Executor
- arvClient *arvados.Client
+ envJSON json.RawMessage
remoteUser string
timeoutTERM time.Duration
timeoutSignal time.Duration
@@ -36,10 +35,28 @@ type remoteRunner struct {
// newRemoteRunner returns a new remoteRunner. Caller should ensure
// Close() is called to release resources.
func newRemoteRunner(uuid string, wkr *worker) *remoteRunner {
+ var instJSON bytes.Buffer
+ enc := json.NewEncoder(&instJSON)
+ enc.SetIndent("", " ")
+ if err := enc.Encode(wkr.instType); err != nil {
+ panic(err)
+ }
+ env := map[string]string{
+ "ARVADOS_API_HOST": wkr.wp.arvClient.APIHost,
+ "ARVADOS_API_TOKEN": wkr.wp.arvClient.AuthToken,
+ "ARVADOS_INSTANCE_TYPE_JSON": instJSON.String(),
+ }
+ if wkr.wp.arvClient.Insecure {
+ env["ARVADOS_API_HOST_INSECURE"] = "1"
+ }
+ envJSON, err := json.Marshal(env)
+ if err != nil {
+ panic(err)
+ }
rr := &remoteRunner{
uuid: uuid,
executor: wkr.executor,
- arvClient: wkr.wp.arvClient,
+ envJSON: envJSON,
remoteUser: wkr.instance.RemoteUser(),
timeoutTERM: wkr.wp.timeoutTERM,
timeoutSignal: wkr.wp.timeoutSignal,
@@ -57,22 +74,11 @@ func newRemoteRunner(uuid string, wkr *worker) *remoteRunner {
// assume the remote process _might_ have started, at least until it
// probes the worker and finds otherwise.
func (rr *remoteRunner) Start() {
- env := map[string]string{
- "ARVADOS_API_HOST": rr.arvClient.APIHost,
- "ARVADOS_API_TOKEN": rr.arvClient.AuthToken,
- }
- if rr.arvClient.Insecure {
- env["ARVADOS_API_HOST_INSECURE"] = "1"
- }
- envJSON, err := json.Marshal(env)
- if err != nil {
- panic(err)
- }
- stdin := bytes.NewBuffer(envJSON)
cmd := "crunch-run --detach --stdin-env '" + rr.uuid + "'"
if rr.remoteUser != "root" {
cmd = "sudo " + cmd
}
+ stdin := bytes.NewBuffer(rr.envJSON)
stdout, stderr, err := rr.executor.Execute(nil, cmd, stdin)
if err != nil {
rr.logger.WithField("stdout", string(stdout)).
diff --git a/lib/dispatchcloud/worker/worker_test.go b/lib/dispatchcloud/worker/worker_test.go
index 4f9ba911c..943fa7c71 100644
--- a/lib/dispatchcloud/worker/worker_test.go
+++ b/lib/dispatchcloud/worker/worker_test.go
@@ -25,6 +25,7 @@ func (suite *WorkerSuite) TestProbeAndUpdate(c *check.C) {
bootTimeout := time.Minute
probeTimeout := time.Second
+ ac := arvados.NewClientFromEnv()
is, err := (&test.StubDriver{}).InstanceSet(nil, "test-instance-set-id", nil, logger)
c.Assert(err, check.IsNil)
inst, err := is.Create(arvados.InstanceType{}, "", nil, "echo InitCommand", nil)
@@ -192,6 +193,7 @@ func (suite *WorkerSuite) TestProbeAndUpdate(c *check.C) {
"crunch-run --list": trial.respRun,
}
wp := &Pool{
+ arvClient: ac,
newExecutor: func(cloud.Instance) Executor { return exr },
bootProbeCommand: "bootprobe",
timeoutBooting: bootTimeout,
diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index 3261291b5..7184ac657 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -850,21 +850,43 @@ func (runner *ContainerRunner) LogContainerRecord() error {
return err
}
-// LogNodeRecord logs arvados#node record corresponding to the current host.
+// LogNodeRecord logs the InstanceType config entry (or the
+// arvados#node record) corresponding to the current host, as
+// applicable.
func (runner *ContainerRunner) LogNodeRecord() error {
- hostname := os.Getenv("SLURMD_NODENAME")
- if hostname == "" {
- hostname, _ = os.Hostname()
- }
- _, err := runner.logAPIResponse("node", "nodes", map[string]interface{}{"filters": [][]string{{"hostname", "=", hostname}}}, func(resp interface{}) {
- // The "info" field has admin-only info when obtained
- // with a privileged token, and should not be logged.
- node, ok := resp.(map[string]interface{})
- if ok {
- delete(node, "info")
- }
- })
- return err
+ if instJSON := os.Getenv("ARVADOS_INSTANCE_TYPE_JSON"); instJSON != "" {
+ // Dispatched via arvados-dispatch-cloud. Save
+ // InstanceType config fragment received from
+ // dispatcher on stdin.
+ w, err := runner.LogCollection.OpenFile("node.json", os.O_CREATE|os.O_WRONLY, 0666)
+ if err != nil {
+ return err
+ }
+ defer w.Close()
+ _, err = io.WriteString(w, instJSON)
+ if err != nil {
+ return err
+ }
+ return w.Close()
+ } else {
+ // Dispatched via crunch-dispatch-slurm. Look up
+ // apiserver's node record corresponding to
+ // $SLURMD_NODENAME.
+ hostname := os.Getenv("SLURMD_NODENAME")
+ if hostname == "" {
+ hostname, _ = os.Hostname()
+ }
+ _, err := runner.logAPIResponse("node", "nodes", map[string]interface{}{"filters": [][]string{{"hostname", "=", hostname}}}, func(resp interface{}) {
+ // The "info" field has admin-only info when
+ // obtained with a privileged token, and
+ // should not be logged.
+ node, ok := resp.(map[string]interface{})
+ if ok {
+ delete(node, "info")
+ }
+ })
+ return err
+ }
}
func (runner *ContainerRunner) logAPIResponse(label, path string, params map[string]interface{}, munge func(interface{})) (logged bool, err error) {
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list