[arvados] updated: 2.5.0-187-gae6fe3864
git repository hosting
git at public.arvados.org
Thu Feb 23 21:59:51 UTC 2023
Summary of changes:
doc/admin/spot-instances.html.textile.liquid | 12 +++++++++++-
doc/api/methods/containers.html.textile.liquid | 1 +
lib/crunchrun/crunchrun.go | 18 +++++++++++-------
lib/crunchrun/crunchrun_test.go | 6 +++++-
4 files changed, 28 insertions(+), 9 deletions(-)
via ae6fe3864ca6b254dfa3345985568c1cc94358fe (commit)
from 475dc10274ca275966aa6eefc25b8932cc4f3957 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit ae6fe3864ca6b254dfa3345985568c1cc94358fe
Author: Tom Clegg <tom at curii.com>
Date: Thu Feb 23 16:59:03 2023 -0500
19961: Save separate preemptionNotice key in runtime_status.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/doc/admin/spot-instances.html.textile.liquid b/doc/admin/spot-instances.html.textile.liquid
index aa640b9fd..b1c1ca41a 100644
--- a/doc/admin/spot-instances.html.textile.liquid
+++ b/doc/admin/spot-instances.html.textile.liquid
@@ -92,7 +92,7 @@ The account needs to have a service linked role created. This can be done by log
h3. Interruption notices
-When running a container on a spot instance, Arvados monitors the EC2 metadata endpoint for interruption notices. When an interruption notice is received, it is reported in a log entry in the @crunch-run.txt@ file as well as a @warning@ in the @runtime_status@ field of the affected container.
+When running a container on a spot instance, Arvados monitors the EC2 metadata endpoint for interruption notices. When an interruption notice is received, it is reported in a log entry in the @crunch-run.txt@ file as well as @warning@ and @preemptionNotice@ keys in the @runtime_status@ field of the affected container.
Example excerpt from @crunch-run.txt@:
@@ -100,6 +100,16 @@ Example excerpt from @crunch-run.txt@:
2023-02-21T21:12:42.350719824Z Cloud provider indicates instance action "stop" scheduled for time "2023-02-21T21:14:42Z"
</pre>
+Example @runtime_status@:
+
+<pre>
+{
+ "warning": "preemption notice",
+ "warningDetail": "Cloud provider indicates instance action \"stop\" scheduled for time \"2023-02-21T21:14:42Z\"",
+ "preemptionNotice": "Cloud provider indicates instance action \"stop\" scheduled for time \"2023-02-21T21:14:42Z\""
+}
+</pre>
+
h2. Preemptible instances on Azure
For general information, see "Use Spot VMs in Azure":https://docs.microsoft.com/en-us/azure/virtual-machines/spot-vms.
diff --git a/doc/api/methods/containers.html.textile.liquid b/doc/api/methods/containers.html.textile.liquid
index c29a91ba2..c4fa73466 100644
--- a/doc/api/methods/containers.html.textile.liquid
+++ b/doc/api/methods/containers.html.textile.liquid
@@ -93,6 +93,7 @@ table(table table-bordered table-condensed).
|activity|string|A message for the end user about what state the container is currently in.|Optional.|
|errorDetail|string|Additional structured error details.|Optional.|
|warningDetail|string|Additional structured warning details.|Optional.|
+|preemptionNotice|string|Indication that the preemptible instance where the container is running will be terminated soon.|Optional.|
h2(#scheduling_parameters). {% include 'container_scheduling_parameters' %}
diff --git a/lib/crunchrun/crunchrun.go b/lib/crunchrun/crunchrun.go
index a9c65cca4..57eed84ba 100644
--- a/lib/crunchrun/crunchrun.go
+++ b/lib/crunchrun/crunchrun.go
@@ -321,7 +321,10 @@ func (runner *ContainerRunner) ArvMountCmd(cmdline []string, token string) (c *e
"Unhandled exception during FUSE operation",
},
ReportFunc: func(pattern, text string) {
- runner.updateRuntimeStatus("arv-mount: "+pattern, text)
+ runner.updateRuntimeStatus(arvadosclient.Dict{
+ "warning": "arv-mount: " + pattern,
+ "warningDetail": text,
+ })
},
}
c.Stdout = runner.arvMountLog
@@ -1290,7 +1293,11 @@ func (runner *ContainerRunner) checkSpotInterruptionNotices() {
lastmetadata = metadata
text := fmt.Sprintf("Cloud provider indicates instance action %q scheduled for time %q", metadata.Action, metadata.Time.UTC().Format(time.RFC3339))
runner.CrunchLog.Printf("%s", text)
- runner.updateRuntimeStatus("instance interruption", text)
+ runner.updateRuntimeStatus(arvadosclient.Dict{
+ "warning": "preemption notice",
+ "warningDetail": text,
+ "preemptionNotice": text,
+ })
if proc, err := os.FindProcess(os.Getpid()); err == nil {
// trigger updateLogs
proc.Signal(syscall.SIGUSR1)
@@ -1299,13 +1306,10 @@ func (runner *ContainerRunner) checkSpotInterruptionNotices() {
}
}
-func (runner *ContainerRunner) updateRuntimeStatus(warning, detail string) {
+func (runner *ContainerRunner) updateRuntimeStatus(status arvadosclient.Dict) {
err := runner.DispatcherArvClient.Update("containers", runner.Container.UUID, arvadosclient.Dict{
"container": arvadosclient.Dict{
- "runtime_status": arvadosclient.Dict{
- "warning": warning,
- "warningDetail": detail,
- },
+ "runtime_status": status,
},
}, nil)
if err != nil {
diff --git a/lib/crunchrun/crunchrun_test.go b/lib/crunchrun/crunchrun_test.go
index 5b4c6827b..1f732c051 100644
--- a/lib/crunchrun/crunchrun_test.go
+++ b/lib/crunchrun/crunchrun_test.go
@@ -848,7 +848,11 @@ func (s *TestSuite) testSpotInterruptionNotice(c *C, failureRate float64) {
if failureRate == 1 {
c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Giving up on checking spot interruptions after too many consecutive failures.*`)
} else {
- c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Cloud provider indicates instance action "stop" scheduled for time "`+stoptime.Load().(time.Time).Format(time.RFC3339)+`".*`)
+ text := `Cloud provider indicates instance action "stop" scheduled for time "` + stoptime.Load().(time.Time).Format(time.RFC3339) + `"`
+ c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*`+text+`.*`)
+ c.Check(s.api.CalledWith("container.runtime_status.warning", "preemption notice"), NotNil)
+ c.Check(s.api.CalledWith("container.runtime_status.warningDetail", text), NotNil)
+ c.Check(s.api.CalledWith("container.runtime_status.preemptionNotice", text), NotNil)
}
}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list