[ARVADOS] created: b887b0b6185d0c4f51db511b54306686ac17f6ce
Git user
git at public.curoverse.com
Thu Dec 1 16:40:43 EST 2016
at b887b0b6185d0c4f51db511b54306686ac17f6ce (commit)
commit b887b0b6185d0c4f51db511b54306686ac17f6ce
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Dec 1 12:56:17 2016 -0500
10649: Make errors emitted by squeue and scancel show up in logs.
diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
index 3c4f281..2e5908c 100644
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
+++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
@@ -302,12 +302,13 @@ func run(dispatcher *dispatch.Dispatcher,
// Mutex between squeue sync and running sbatch or scancel.
squeueUpdater.SlurmLock.Lock()
- err := scancelCmd(container).Run()
+ cmd := scancelCmd(container)
+ msg, err := cmd.CombinedOutput()
squeueUpdater.SlurmLock.Unlock()
if err != nil {
- log.Printf("Error stopping container %s with scancel: %v",
- container.UUID, err)
+ log.Printf("Error stopping container %s with %v %v: %v %v",
+ container.UUID, cmd.Path, cmd.Args, err, string(msg))
if squeueUpdater.CheckSqueue(container.UUID) {
log.Printf("Container %s is still in squeue after scancel.",
container.UUID)
diff --git a/services/crunch-dispatch-slurm/squeue.go b/services/crunch-dispatch-slurm/squeue.go
index 61decde..1f5d586 100644
--- a/services/crunch-dispatch-slurm/squeue.go
+++ b/services/crunch-dispatch-slurm/squeue.go
@@ -2,6 +2,7 @@ package main
import (
"bufio"
+ "io/ioutil"
"log"
"os/exec"
"sync"
@@ -45,31 +46,47 @@ func (squeue *Squeue) RunSqueue() {
log.Printf("Error creating stdout pipe for squeue: %v", err)
return
}
+
+ stderrReader, err := cmd.StderrPipe()
+ if err != nil {
+ log.Printf("Error creating stderr pipe for squeue: %v", err)
+ return
+ }
+
err = cmd.Start()
if err != nil {
log.Printf("Error running squeue: %v", err)
return
}
+
+ stderrChan := make(chan []byte)
+ go func() {
+ b, _ := ioutil.ReadAll(stderrReader)
+ stderrReader.Close()
+ stderrChan <- b
+ }()
+
scanner := bufio.NewScanner(sq)
for scanner.Scan() {
newSqueueContents = append(newSqueueContents, scanner.Text())
}
- if err := scanner.Err(); err != nil {
- cmd.Wait()
- log.Printf("Error reading from squeue pipe: %v", err)
- return
- }
err = cmd.Wait()
+ stderrmsg := <-stderrChan
+
+ if scanner.Err() != nil {
+ log.Printf("Error reading from squeue pipe: %v", err)
+ }
if err != nil {
- log.Printf("Error running squeue: %v", err)
- return
+ log.Printf("Error running %v %v: %v %q", cmd.Path, cmd.Args, err, string(stderrmsg))
}
- squeue.squeueCond.L.Lock()
- squeue.squeueContents = newSqueueContents
- squeue.squeueCond.Broadcast()
- squeue.squeueCond.L.Unlock()
+ if scanner.Err() == nil && err == nil {
+ squeue.squeueCond.L.Lock()
+ squeue.squeueContents = newSqueueContents
+ squeue.squeueCond.Broadcast()
+ squeue.squeueCond.L.Unlock()
+ }
}
// CheckSqueue checks if a given container UUID is in the slurm queue. This
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list