[ARVADOS] created: 1.3.0-1331-g726881f89
Git user
git at public.curoverse.com
Thu Jul 18 14:26:34 UTC 2019
at 726881f8950e4f361193a612c66c90ca9535e81a (commit)
commit 726881f8950e4f361193a612c66c90ca9535e81a
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Thu Jul 18 10:25:44 2019 -0400
14713: Fix handling default/missing keys, can't load default file
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/lib/config/deprecated.go b/lib/config/deprecated.go
index 614f5dcf9..8f3f3d7ed 100644
--- a/lib/config/deprecated.go
+++ b/lib/config/deprecated.go
@@ -108,19 +108,17 @@ type oldKeepstoreConfig struct {
Debug *bool
}
-func (ldr *Loader) loadOldConfigHelper(component, path, defaultPath string, target interface{}) error {
+func (ldr *Loader) loadOldConfigHelper(component, path string, target interface{}) error {
if path == "" {
return nil
}
buf, err := ioutil.ReadFile(path)
- if os.IsNotExist(err) && path == defaultPath {
- return nil
- } else if err != nil {
+ if err != nil {
return err
- } else {
- ldr.Logger.Warnf("you should remove the legacy %v config file (%s) after migrating all config keys to the cluster configuration file (%s)", component, path, ldr.Path)
}
+ ldr.Logger.Warnf("you should remove the legacy %v config file (%s) after migrating all config keys to the cluster configuration file (%s)", component, path, ldr.Path)
+
err = yaml.Unmarshal(buf, target)
if err != nil {
return fmt.Errorf("%s: %s", path, err)
@@ -131,8 +129,10 @@ func (ldr *Loader) loadOldConfigHelper(component, path, defaultPath string, targ
// update config using values from an old-style keepstore config file.
func (ldr *Loader) loadOldKeepstoreConfig(cfg *arvados.Config) error {
var oc oldKeepstoreConfig
- err := ldr.loadOldConfigHelper("keepstore", ldr.KeepstorePath, defaultKeepstoreConfigPath, &oc)
- if err != nil {
+ err := ldr.loadOldConfigHelper("keepstore", ldr.KeepstorePath, &oc)
+ if os.IsNotExist(err) && ldr.KeepstorePath == defaultKeepstoreConfigPath {
+ return nil
+ } else if err != nil {
return err
}
@@ -153,27 +153,27 @@ func (ldr *Loader) loadOldKeepstoreConfig(cfg *arvados.Config) error {
}
type oldCrunchDispatchSlurmConfig struct {
- Client arvados.Client
+ Client *arvados.Client
- SbatchArguments []string
- PollPeriod arvados.Duration
- PrioritySpread int64
+ SbatchArguments *[]string
+ PollPeriod *arvados.Duration
+ PrioritySpread *int64
// crunch-run command to invoke. The container UUID will be
// appended. If nil, []string{"crunch-run"} will be used.
//
// Example: []string{"crunch-run", "--cgroup-parent-subsystem=memory"}
- CrunchRunCommand []string
+ CrunchRunCommand *[]string
// Extra RAM to reserve (in Bytes) for SLURM job, in addition
// to the amount specified in the container's RuntimeConstraints
- ReserveExtraRAM int64
+ ReserveExtraRAM *int64
// Minimum time between two attempts to run the same container
- MinRetryPeriod arvados.Duration
+ MinRetryPeriod *arvados.Duration
// Batch size for container queries
- BatchSize int64
+ BatchSize *int64
}
const defaultCrunchDispatchSlurmConfigPath = "/etc/arvados/crunch-dispatch-slurm/crunch-dispatch-slurm.yml"
@@ -181,8 +181,10 @@ const defaultCrunchDispatchSlurmConfigPath = "/etc/arvados/crunch-dispatch-slurm
// update config using values from an crunch-dispatch-slurm config file.
func (ldr *Loader) loadOldCrunchDispatchSlurmConfig(cfg *arvados.Config) error {
var oc oldCrunchDispatchSlurmConfig
- err := ldr.loadOldConfigHelper("crunch-dispatch-slurm", ldr.CrunchDispatchSlurmPath, defaultCrunchDispatchSlurmConfigPath, &oc)
- if err != nil {
+ err := ldr.loadOldConfigHelper("crunch-dispatch-slurm", ldr.CrunchDispatchSlurmPath, &oc)
+ if os.IsNotExist(err) && ldr.CrunchDispatchSlurmPath == defaultCrunchDispatchSlurmConfigPath {
+ return nil
+ } else if err != nil {
return err
}
@@ -191,30 +193,45 @@ func (ldr *Loader) loadOldCrunchDispatchSlurmConfig(cfg *arvados.Config) error {
return err
}
- u := arvados.URL{}
- u.Host = oc.Client.APIHost
- if oc.Client.Scheme != "" {
- u.Scheme = oc.Client.Scheme
- } else {
- u.Scheme = "https"
+ if oc.Client != nil {
+ u := arvados.URL{}
+ u.Host = oc.Client.APIHost
+ if oc.Client.Scheme != "" {
+ u.Scheme = oc.Client.Scheme
+ } else {
+ u.Scheme = "https"
+ }
+ cluster.Services.Controller.ExternalURL = u
+ cluster.SystemRootToken = oc.Client.AuthToken
+ cluster.TLS.Insecure = oc.Client.Insecure
}
- cluster.Services.Controller.ExternalURL = u
- cluster.SystemRootToken = oc.Client.AuthToken
- cluster.TLS.Insecure = oc.Client.Insecure
- cluster.Containers.SLURM.SbatchArgumentsList = oc.SbatchArguments
- cluster.Containers.CloudVMs.PollInterval = oc.PollPeriod
- cluster.Containers.SLURM.PrioritySpread = oc.PrioritySpread
- if len(oc.CrunchRunCommand) >= 1 {
- cluster.Containers.CrunchRunCommand = oc.CrunchRunCommand[0]
+ if oc.SbatchArguments != nil {
+ cluster.Containers.SLURM.SbatchArgumentsList = *oc.SbatchArguments
}
- if len(oc.CrunchRunCommand) >= 2 {
- cluster.Containers.CrunchRunArgumentsList = oc.CrunchRunCommand[1:]
+ if oc.PollPeriod != nil {
+ cluster.Containers.CloudVMs.PollInterval = *oc.PollPeriod
+ }
+ if oc.PrioritySpread != nil {
+ cluster.Containers.SLURM.PrioritySpread = *oc.PrioritySpread
+ }
+ if oc.CrunchRunCommand != nil {
+ if len(*oc.CrunchRunCommand) >= 1 {
+ cluster.Containers.CrunchRunCommand = (*oc.CrunchRunCommand)[0]
+ }
+ if len(*oc.CrunchRunCommand) >= 2 {
+ cluster.Containers.CrunchRunArgumentsList = (*oc.CrunchRunCommand)[1:]
+ }
+ }
+ if oc.ReserveExtraRAM != nil {
+ cluster.Containers.ReserveExtraRAM = arvados.ByteSize(*oc.ReserveExtraRAM)
+ }
+ if oc.MinRetryPeriod != nil {
+ cluster.Containers.MinRetryPeriod = *oc.MinRetryPeriod
+ }
+ if oc.BatchSize != nil {
+ cluster.API.MaxItemsPerResponse = int(*oc.BatchSize)
}
- cluster.Containers.ReserveExtraRAM = arvados.ByteSize(oc.ReserveExtraRAM)
- cluster.Containers.MinRetryPeriod = oc.MinRetryPeriod
-
- cluster.API.MaxItemsPerResponse = int(oc.BatchSize)
cfg.Clusters[cluster.ClusterID] = *cluster
return nil
diff --git a/lib/config/export.go b/lib/config/export.go
index a050492b3..dbbaac127 100644
--- a/lib/config/export.go
+++ b/lib/config/export.go
@@ -101,7 +101,7 @@ var whitelist = map[string]bool{
"Containers.MaxDispatchAttempts": false,
"Containers.MaxRetryAttempts": true,
"Containers.MinRetryPeriod": true,
- "Containers.ReserveExtraRam": true,
+ "Containers.ReserveExtraRAM": true,
"Containers.SLURM": false,
"Containers.StaleLockTimeout": false,
"Containers.SupportedDockerImageFormats": true,
diff --git a/lib/config/load_test.go b/lib/config/load_test.go
index 340eb0a0a..fa04c0075 100644
--- a/lib/config/load_test.go
+++ b/lib/config/load_test.go
@@ -168,7 +168,9 @@ func (s *LoadSuite) TestSampleKeys(c *check.C) {
}
func (s *LoadSuite) TestMultipleClusters(c *check.C) {
- cfg, err := testLoader(c, `{"Clusters":{"z1111":{},"z2222":{}}}`, nil).Load()
+ ldr := testLoader(c, `{"Clusters":{"z1111":{},"z2222":{}}}`, nil)
+ ldr.SkipDeprecated = true
+ cfg, err := ldr.Load()
c.Assert(err, check.IsNil)
c1, err := cfg.GetCluster("z1111")
c.Assert(err, check.IsNil)
commit a88377d3b2fb2e4393dac984ad1c6e06aa0431f3
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Wed Jul 17 16:54:01 2019 -0400
14713: Migrate old crunch-dispatch-slurm config to new config
Update code to use new config.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/lib/config/deprecated.go b/lib/config/deprecated.go
index 0b0bb2668..614f5dcf9 100644
--- a/lib/config/deprecated.go
+++ b/lib/config/deprecated.go
@@ -108,29 +108,37 @@ type oldKeepstoreConfig struct {
Debug *bool
}
-// update config using values from an old-style keepstore config file.
-func (ldr *Loader) loadOldKeepstoreConfig(cfg *arvados.Config) error {
- path := ldr.KeepstorePath
+func (ldr *Loader) loadOldConfigHelper(component, path, defaultPath string, target interface{}) error {
if path == "" {
return nil
}
buf, err := ioutil.ReadFile(path)
- if os.IsNotExist(err) && path == defaultKeepstoreConfigPath {
+ if os.IsNotExist(err) && path == defaultPath {
return nil
} else if err != nil {
return err
} else {
- ldr.Logger.Warnf("you should remove the legacy keepstore config file (%s) after migrating all config keys to the cluster configuration file (%s)", path, ldr.Path)
+ ldr.Logger.Warnf("you should remove the legacy %v config file (%s) after migrating all config keys to the cluster configuration file (%s)", component, path, ldr.Path)
}
- cluster, err := cfg.GetCluster("")
+
+ err = yaml.Unmarshal(buf, target)
if err != nil {
- return err
+ return fmt.Errorf("%s: %s", path, err)
}
+ return nil
+}
+// update config using values from an old-style keepstore config file.
+func (ldr *Loader) loadOldKeepstoreConfig(cfg *arvados.Config) error {
var oc oldKeepstoreConfig
- err = yaml.Unmarshal(buf, &oc)
+ err := ldr.loadOldConfigHelper("keepstore", ldr.KeepstorePath, defaultKeepstoreConfigPath, &oc)
if err != nil {
- return fmt.Errorf("%s: %s", path, err)
+ return err
+ }
+
+ cluster, err := cfg.GetCluster("")
+ if err != nil {
+ return err
}
if v := oc.Debug; v == nil {
@@ -143,3 +151,71 @@ func (ldr *Loader) loadOldKeepstoreConfig(cfg *arvados.Config) error {
cfg.Clusters[cluster.ClusterID] = *cluster
return nil
}
+
+type oldCrunchDispatchSlurmConfig struct {
+ Client arvados.Client
+
+ SbatchArguments []string
+ PollPeriod arvados.Duration
+ PrioritySpread int64
+
+ // crunch-run command to invoke. The container UUID will be
+ // appended. If nil, []string{"crunch-run"} will be used.
+ //
+ // Example: []string{"crunch-run", "--cgroup-parent-subsystem=memory"}
+ CrunchRunCommand []string
+
+ // Extra RAM to reserve (in Bytes) for SLURM job, in addition
+ // to the amount specified in the container's RuntimeConstraints
+ ReserveExtraRAM int64
+
+ // Minimum time between two attempts to run the same container
+ MinRetryPeriod arvados.Duration
+
+ // Batch size for container queries
+ BatchSize int64
+}
+
+const defaultCrunchDispatchSlurmConfigPath = "/etc/arvados/crunch-dispatch-slurm/crunch-dispatch-slurm.yml"
+
+// update config using values from an crunch-dispatch-slurm config file.
+func (ldr *Loader) loadOldCrunchDispatchSlurmConfig(cfg *arvados.Config) error {
+ var oc oldCrunchDispatchSlurmConfig
+ err := ldr.loadOldConfigHelper("crunch-dispatch-slurm", ldr.CrunchDispatchSlurmPath, defaultCrunchDispatchSlurmConfigPath, &oc)
+ if err != nil {
+ return err
+ }
+
+ cluster, err := cfg.GetCluster("")
+ if err != nil {
+ return err
+ }
+
+ u := arvados.URL{}
+ u.Host = oc.Client.APIHost
+ if oc.Client.Scheme != "" {
+ u.Scheme = oc.Client.Scheme
+ } else {
+ u.Scheme = "https"
+ }
+ cluster.Services.Controller.ExternalURL = u
+ cluster.SystemRootToken = oc.Client.AuthToken
+ cluster.TLS.Insecure = oc.Client.Insecure
+
+ cluster.Containers.SLURM.SbatchArgumentsList = oc.SbatchArguments
+ cluster.Containers.CloudVMs.PollInterval = oc.PollPeriod
+ cluster.Containers.SLURM.PrioritySpread = oc.PrioritySpread
+ if len(oc.CrunchRunCommand) >= 1 {
+ cluster.Containers.CrunchRunCommand = oc.CrunchRunCommand[0]
+ }
+ if len(oc.CrunchRunCommand) >= 2 {
+ cluster.Containers.CrunchRunArgumentsList = oc.CrunchRunCommand[1:]
+ }
+ cluster.Containers.ReserveExtraRAM = arvados.ByteSize(oc.ReserveExtraRAM)
+ cluster.Containers.MinRetryPeriod = oc.MinRetryPeriod
+
+ cluster.API.MaxItemsPerResponse = int(oc.BatchSize)
+
+ cfg.Clusters[cluster.ClusterID] = *cluster
+ return nil
+}
diff --git a/lib/config/export.go b/lib/config/export.go
index b79dec4d9..a050492b3 100644
--- a/lib/config/export.go
+++ b/lib/config/export.go
@@ -83,6 +83,8 @@ var whitelist = map[string]bool{
"Collections.TrustAllContent": false,
"Containers": true,
"Containers.CloudVMs": false,
+ "Containers.CrunchRunCommand": false,
+ "Containers.CrunchRunArgumentsList": false,
"Containers.DefaultKeepCacheRAM": true,
"Containers.DispatchPrivateKey": false,
"Containers.JobsAPI": true,
@@ -98,6 +100,8 @@ var whitelist = map[string]bool{
"Containers.MaxComputeVMs": false,
"Containers.MaxDispatchAttempts": false,
"Containers.MaxRetryAttempts": true,
+ "Containers.MinRetryPeriod": true,
+ "Containers.ReserveExtraRam": true,
"Containers.SLURM": false,
"Containers.StaleLockTimeout": false,
"Containers.SupportedDockerImageFormats": true,
diff --git a/lib/config/load.go b/lib/config/load.go
index 168c1aa22..bce57d759 100644
--- a/lib/config/load.go
+++ b/lib/config/load.go
@@ -28,8 +28,9 @@ type Loader struct {
Logger logrus.FieldLogger
SkipDeprecated bool // Don't load legacy/deprecated config keys/files
- Path string
- KeepstorePath string
+ Path string
+ KeepstorePath string
+ CrunchDispatchSlurmPath string
configdata []byte
}
@@ -57,6 +58,7 @@ func NewLoader(stdin io.Reader, logger logrus.FieldLogger) *Loader {
func (ldr *Loader) SetupFlags(flagset *flag.FlagSet) {
flagset.StringVar(&ldr.Path, "config", arvados.DefaultConfigFile, "Site configuration `file` (default may be overridden by setting an ARVADOS_CONFIG environment variable)")
flagset.StringVar(&ldr.KeepstorePath, "legacy-keepstore-config", defaultKeepstoreConfigPath, "Legacy keepstore configuration `file`")
+ flagset.StringVar(&ldr.CrunchDispatchSlurmPath, "legacy-crunch-dispatch-slurm-config", defaultCrunchDispatchSlurmConfigPath, "Legacy crunch-dispatch-slurm configuration `file`")
}
// MungeLegacyConfigArgs checks args for a -config flag whose argument
@@ -205,6 +207,7 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
}
for _, err := range []error{
ldr.loadOldKeepstoreConfig(&cfg),
+ ldr.loadOldCrunchDispatchSlurmConfig(&cfg),
} {
if err != nil {
return nil, err
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index c8206c7da..12ec8e6b2 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -253,12 +253,16 @@ type InstanceType struct {
type ContainersConfig struct {
CloudVMs CloudVMsConfig
+ CrunchRunCommand string
+ CrunchRunArgumentsList []string
DefaultKeepCacheRAM ByteSize
DispatchPrivateKey string
LogReuseDecisions bool
MaxComputeVMs int
MaxDispatchAttempts int
MaxRetryAttempts int
+ MinRetryPeriod Duration
+ ReserveExtraRAM ByteSize
StaleLockTimeout Duration
SupportedDockerImageFormats []string
UsePreemptibleInstances bool
@@ -285,7 +289,9 @@ type ContainersConfig struct {
LogUpdateSize ByteSize
}
SLURM struct {
- Managed struct {
+ PrioritySpread int64
+ SbatchArgumentsList []string
+ Managed struct {
DNSServerConfDir string
DNSServerConfTemplate string
DNSServerReloadCommand string
diff --git a/sdk/go/dispatch/dispatch.go b/sdk/go/dispatch/dispatch.go
index fdb52e510..587c9999c 100644
--- a/sdk/go/dispatch/dispatch.go
+++ b/sdk/go/dispatch/dispatch.go
@@ -38,7 +38,7 @@ type Dispatcher struct {
Logger Logger
// Batch size for container queries
- BatchSize int64
+ BatchSize int
// Queue polling frequency
PollPeriod time.Duration
diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
index 09e3d591a..1a7ad6fac 100644
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
+++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
@@ -25,6 +25,7 @@ import (
"git.curoverse.com/arvados.git/sdk/go/dispatch"
"github.com/coreos/go-systemd/daemon"
"github.com/sirupsen/logrus"
+ "gopkg.in/yaml.v2"
)
type logger interface {
@@ -35,8 +36,7 @@ type logger interface {
const initialNiceValue int64 = 10000
var (
- version = "dev"
- defaultConfigPath = "/etc/arvados/crunch-dispatch-slurm/crunch-dispatch-slurm.yml"
+ version = "dev"
)
type Dispatcher struct {
@@ -74,16 +74,15 @@ func (disp *Dispatcher) Run(prog string, args []string) error {
// configure() loads config files. Tests skip this.
func (disp *Dispatcher) configure(prog string, args []string) error {
+ if disp.logger == nil {
+ disp.logger = logrus.StandardLogger()
+ }
flags := flag.NewFlagSet(prog, flag.ExitOnError)
flags.Usage = func() { usage(flags) }
- loader := config.NewLoader(stdin, log)
+ loader := config.NewLoader(nil, disp.logger)
loader.SetupFlags(flags)
- configPath := flags.String(
- "config",
- defaultConfigPath,
- "`path` to JSON or YAML configuration file")
dumpConfig := flag.Bool(
"dump-config",
false,
@@ -93,10 +92,10 @@ func (disp *Dispatcher) configure(prog string, args []string) error {
false,
"Print version information and exit.")
- args = loader.MungeLegacyConfigArgs(logrus.StandardLogger(), args, "-crunch-dispatch-slurm-config")
+ args = loader.MungeLegacyConfigArgs(logrus.StandardLogger(), args, "-legacy-crunch-dispatch-slurm-config")
// Parse args; omit the first arg which is the command name
- flags.Parse(args)
+ err := flags.Parse(args)
if err == flag.ErrHelp {
return nil
@@ -119,8 +118,7 @@ func (disp *Dispatcher) configure(prog string, args []string) error {
return fmt.Errorf("config error: %s", err)
}
- disp.Client.APIHost = fmt.Sprintf("%s:%d", disp.cluster.Services.Controller.ExternalURL.Host,
- disp.cluster.Services.Controller.ExternalURL.Port)
+ disp.Client.APIHost = disp.cluster.Services.Controller.ExternalURL.Host
disp.Client.AuthToken = disp.cluster.SystemRootToken
disp.Client.Insecure = disp.cluster.TLS.Insecure
@@ -141,7 +139,14 @@ func (disp *Dispatcher) configure(prog string, args []string) error {
}
if *dumpConfig {
- return config.DumpAndExit(cfg)
+ out, err := yaml.Marshal(cfg)
+ if err != nil {
+ return err
+ }
+ _, err = os.Stdout.Write(out)
+ if err != nil {
+ return err
+ }
}
return nil
@@ -149,9 +154,6 @@ func (disp *Dispatcher) configure(prog string, args []string) error {
// setup() initializes private fields after configure().
func (disp *Dispatcher) setup() {
- if disp.logger == nil {
- disp.logger = logrus.StandardLogger()
- }
arv, err := arvadosclient.MakeArvadosClient()
if err != nil {
disp.logger.Fatalf("Error making Arvados client: %v", err)
@@ -161,17 +163,17 @@ func (disp *Dispatcher) setup() {
disp.slurm = NewSlurmCLI()
disp.sqCheck = &SqueueChecker{
Logger: disp.logger,
- Period: time.Duration(disp.PollPeriod),
- PrioritySpread: disp.PrioritySpread,
+ Period: time.Duration(disp.cluster.Containers.CloudVMs.PollInterval),
+ PrioritySpread: disp.cluster.Containers.SLURM.PrioritySpread,
Slurm: disp.slurm,
}
disp.Dispatcher = &dispatch.Dispatcher{
Arv: arv,
Logger: disp.logger,
- BatchSize: disp.BatchSize,
+ BatchSize: disp.cluster.API.MaxItemsPerResponse,
RunContainer: disp.runContainer,
- PollPeriod: time.Duration(disp.PollPeriod),
- MinRetryPeriod: time.Duration(disp.MinRetryPeriod),
+ PollPeriod: time.Duration(disp.cluster.Containers.CloudVMs.PollInterval),
+ MinRetryPeriod: time.Duration(disp.cluster.Containers.MinRetryPeriod),
}
}
@@ -209,7 +211,9 @@ func (disp *Dispatcher) checkSqueueForOrphans() {
}
func (disp *Dispatcher) slurmConstraintArgs(container arvados.Container) []string {
- mem := int64(math.Ceil(float64(container.RuntimeConstraints.RAM+container.RuntimeConstraints.KeepCacheRAM+disp.ReserveExtraRAM) / float64(1048576)))
+ mem := int64(math.Ceil(float64(container.RuntimeConstraints.RAM+
+ container.RuntimeConstraints.KeepCacheRAM+
+ int64(disp.cluster.Containers.ReserveExtraRAM)) / float64(1048576)))
disk := dispatchcloud.EstimateScratchSpace(&container)
disk = int64(math.Ceil(float64(disk) / float64(1048576)))
@@ -222,7 +226,7 @@ func (disp *Dispatcher) slurmConstraintArgs(container arvados.Container) []strin
func (disp *Dispatcher) sbatchArgs(container arvados.Container) ([]string, error) {
var args []string
- args = append(args, disp.SbatchArguments...)
+ args = append(args, disp.cluster.Containers.SLURM.SbatchArgumentsList...)
args = append(args, "--job-name="+container.UUID, fmt.Sprintf("--nice=%d", initialNiceValue), "--no-requeue")
if disp.cluster == nil {
@@ -270,7 +274,9 @@ func (disp *Dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Contain
if ctr.State == dispatch.Locked && !disp.sqCheck.HasUUID(ctr.UUID) {
log.Printf("Submitting container %s to slurm", ctr.UUID)
- if err := disp.submit(ctr, disp.CrunchRunCommand); err != nil {
+ cmd := []string{disp.cluster.Containers.CrunchRunCommand}
+ cmd = append(cmd, disp.cluster.Containers.CrunchRunArgumentsList...)
+ if err := disp.submit(ctr, cmd); err != nil {
var text string
if err, ok := err.(dispatchcloud.ConstraintsNotSatisfiableError); ok {
var logBuf bytes.Buffer
@@ -361,12 +367,3 @@ func (disp *Dispatcher) scancel(ctr arvados.Container) {
time.Sleep(time.Second)
}
}
-
-func (disp *Dispatcher) readConfig(path string) error {
- err := config.LoadFile(disp, path)
- if err != nil && os.IsNotExist(err) && path == defaultConfigPath {
- log.Printf("Config not specified. Continue with default configuration.")
- err = nil
- }
- return err
-}
diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
index eea102012..ca3944d76 100644
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
+++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
@@ -11,6 +11,7 @@ import (
"fmt"
"io"
"io/ioutil"
+ "log"
"net/http"
"net/http/httptest"
"os"
@@ -45,6 +46,7 @@ func (s *IntegrationSuite) SetUpTest(c *C) {
arvadostest.StartAPI()
os.Setenv("ARVADOS_API_TOKEN", arvadostest.Dispatch1Token)
s.disp = Dispatcher{}
+ s.disp.cluster = &arvados.Cluster{}
s.disp.setup()
s.slurm = slurmFake{}
}
@@ -118,7 +120,7 @@ func (s *IntegrationSuite) integrationTest(c *C,
c.Check(err, IsNil)
c.Assert(len(containers.Items), Equals, 1)
- s.disp.CrunchRunCommand = []string{"echo"}
+ s.disp.cluster.Containers.CrunchRunCommand = "echo"
ctx, cancel := context.WithCancel(context.Background())
doneRun := make(chan struct{})
@@ -243,6 +245,7 @@ type StubbedSuite struct {
func (s *StubbedSuite) SetUpTest(c *C) {
s.disp = Dispatcher{}
+ s.disp.cluster = &arvados.Cluster{}
s.disp.setup()
}
@@ -272,7 +275,7 @@ func (s *StubbedSuite) testWithServerStub(c *C, apiStubResponses map[string]arva
logrus.SetOutput(io.MultiWriter(buf, os.Stderr))
defer logrus.SetOutput(os.Stderr)
- s.disp.CrunchRunCommand = []string{crunchCmd}
+ s.disp.cluster.Containers.CrunchRunCommand = "crunchCmd"
ctx, cancel := context.WithCancel(context.Background())
dispatcher := dispatch.Dispatcher{
@@ -302,51 +305,6 @@ func (s *StubbedSuite) testWithServerStub(c *C, apiStubResponses map[string]arva
c.Check(buf.String(), Matches, `(?ms).*`+expected+`.*`)
}
-func (s *StubbedSuite) TestNoSuchConfigFile(c *C) {
- err := s.disp.readConfig("/nosuchdir89j7879/8hjwr7ojgyy7")
- c.Assert(err, NotNil)
-}
-
-func (s *StubbedSuite) TestBadSbatchArgsConfig(c *C) {
- tmpfile, err := ioutil.TempFile(os.TempDir(), "config")
- c.Check(err, IsNil)
- defer os.Remove(tmpfile.Name())
-
- _, err = tmpfile.Write([]byte(`{"SbatchArguments": "oops this is not a string array"}`))
- c.Check(err, IsNil)
-
- err = s.disp.readConfig(tmpfile.Name())
- c.Assert(err, NotNil)
-}
-
-func (s *StubbedSuite) TestNoSuchArgInConfigIgnored(c *C) {
- tmpfile, err := ioutil.TempFile(os.TempDir(), "config")
- c.Check(err, IsNil)
- defer os.Remove(tmpfile.Name())
-
- _, err = tmpfile.Write([]byte(`{"NoSuchArg": "Nobody loves me, not one tiny hunk."}`))
- c.Check(err, IsNil)
-
- err = s.disp.readConfig(tmpfile.Name())
- c.Assert(err, IsNil)
- c.Check(0, Equals, len(s.disp.SbatchArguments))
-}
-
-func (s *StubbedSuite) TestReadConfig(c *C) {
- tmpfile, err := ioutil.TempFile(os.TempDir(), "config")
- c.Check(err, IsNil)
- defer os.Remove(tmpfile.Name())
-
- args := []string{"--arg1=v1", "--arg2", "--arg3=v3"}
- argsS := `{"SbatchArguments": ["--arg1=v1", "--arg2", "--arg3=v3"]}`
- _, err = tmpfile.Write([]byte(argsS))
- c.Check(err, IsNil)
-
- err = s.disp.readConfig(tmpfile.Name())
- c.Assert(err, IsNil)
- c.Check(args, DeepEquals, s.disp.SbatchArguments)
-}
-
func (s *StubbedSuite) TestSbatchArgs(c *C) {
container := arvados.Container{
UUID: "123",
@@ -360,7 +318,7 @@ func (s *StubbedSuite) TestSbatchArgs(c *C) {
{"--arg1=v1", "--arg2"},
} {
c.Logf("%#v", defaults)
- s.disp.SbatchArguments = defaults
+ s.disp.cluster.Containers.SLURM.SbatchArgumentsList = defaults
args, err := s.disp.sbatchArgs(container)
c.Check(args, DeepEquals, append(defaults, "--job-name=123", "--nice=10000", "--no-requeue", "--mem=239", "--cpus-per-task=2", "--tmp=0"))
@@ -432,3 +390,44 @@ func (s *StubbedSuite) TestSbatchPartition(c *C) {
})
c.Check(err, IsNil)
}
+
+func (s *StubbedSuite) TestLoadLegacyConfig(c *C) {
+ content := []byte(`
+Client:
+ APIHost: example.com
+ APIToken: abcdefg
+SbatchArguments: ["--foo", "bar"]
+PollPeriod: 12s
+PrioritySpread: 42
+CrunchRunCommand: ["x-crunch-run", "--cgroup-parent-subsystem=memory"]
+ReserveExtraRAM: 12345
+MinRetryPeriod: 13s
+BatchSize: 99
+`)
+ tmpfile, err := ioutil.TempFile("", "example")
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ defer os.Remove(tmpfile.Name()) // clean up
+
+ if _, err := tmpfile.Write(content); err != nil {
+ log.Fatal(err)
+ }
+ if err := tmpfile.Close(); err != nil {
+ log.Fatal(err)
+
+ }
+ err = s.disp.configure("crunch-dispatch-slurm", []string{"-config", tmpfile.Name()})
+ c.Check(err, IsNil)
+
+ c.Check(s.disp.cluster.Services.Controller.ExternalURL, Equals, arvados.URL{Scheme: "https", Host: "example.com"})
+ c.Check(s.disp.cluster.Containers.SLURM.SbatchArgumentsList, DeepEquals, []string{"--foo", "bar"})
+ c.Check(s.disp.cluster.Containers.CloudVMs.PollInterval, Equals, arvados.Duration(12*time.Second))
+ c.Check(s.disp.cluster.Containers.SLURM.PrioritySpread, Equals, int64(42))
+ c.Check(s.disp.cluster.Containers.CrunchRunCommand, Equals, "x-crunch-run")
+ c.Check(s.disp.cluster.Containers.CrunchRunArgumentsList, DeepEquals, []string{"--cgroup-parent-subsystem=memory"})
+ c.Check(s.disp.cluster.Containers.ReserveExtraRAM, Equals, arvados.ByteSize(12345))
+ c.Check(s.disp.cluster.Containers.MinRetryPeriod, Equals, arvados.Duration(13*time.Second))
+ c.Check(s.disp.cluster.API.MaxItemsPerResponse, Equals, 99)
+}
commit ebf72b207121ddba5781d5ccf5253cd204e49243
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Mon Jul 15 14:02:51 2019 -0400
Fix test
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index 7e5b47191..15bae9af8 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -449,6 +449,20 @@ Clusters:
# stale locks from a previous dispatch process.
StaleLockTimeout: 1m
+ # The crunch-run command to manage the container on a node
+ CrunchRunCommand: "crunch-run"
+
+ # Extra arguments to add to crunch-run invocation
+ # Example: ["--cgroup-parent-subsystem=memory"]
+ CrunchRunArgumentsList: []
+
+ # Extra RAM to reserve on the node, in addition to
+ # the amount specified in the container's RuntimeConstraints
+ ReserveExtraRAM: 256MiB
+
+ # Minimum time between two attempts to run the same container
+ MinRetryPeriod: 0s
+
Logging:
# When you run the db:delete_old_container_logs task, it will find
# containers that have been finished for at least this many seconds,
@@ -492,6 +506,8 @@ Clusters:
LogUpdateSize: 32MiB
SLURM:
+ PrioritySpread: 0
+ SbatchArgumentsList: []
Managed:
# Path to dns server configuration directory
# (e.g. /etc/unbound.d/conf.d). If false, do not write any config
diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go
index 0a9d7a5b6..58a7690f4 100644
--- a/lib/config/generated_config.go
+++ b/lib/config/generated_config.go
@@ -455,6 +455,20 @@ Clusters:
# stale locks from a previous dispatch process.
StaleLockTimeout: 1m
+ # The crunch-run command to manage the container on a node
+ CrunchRunCommand: "crunch-run"
+
+ # Extra arguments to add to crunch-run invocation
+ # Example: ["--cgroup-parent-subsystem=memory"]
+ CrunchRunArgumentsList: []
+
+ # Extra RAM to reserve on the node, in addition to
+ # the amount specified in the container's RuntimeConstraints
+ ReserveExtraRAM: 256MiB
+
+ # Minimum time between two attempts to run the same container
+ MinRetryPeriod: 0s
+
Logging:
# When you run the db:delete_old_container_logs task, it will find
# containers that have been finished for at least this many seconds,
@@ -498,6 +512,8 @@ Clusters:
LogUpdateSize: 32MiB
SLURM:
+ PrioritySpread: 0
+ SbatchArgumentsList: []
Managed:
# Path to dns server configuration directory
# (e.g. /etc/unbound.d/conf.d). If false, do not write any config
diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
index 889e41095..09e3d591a 100644
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
+++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
@@ -18,10 +18,10 @@ import (
"strings"
"time"
+ "git.curoverse.com/arvados.git/lib/config"
"git.curoverse.com/arvados.git/lib/dispatchcloud"
"git.curoverse.com/arvados.git/sdk/go/arvados"
"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
- "git.curoverse.com/arvados.git/sdk/go/config"
"git.curoverse.com/arvados.git/sdk/go/dispatch"
"github.com/coreos/go-systemd/daemon"
"github.com/sirupsen/logrus"
@@ -47,26 +47,6 @@ type Dispatcher struct {
slurm Slurm
Client arvados.Client
-
- SbatchArguments []string
- PollPeriod arvados.Duration
- PrioritySpread int64
-
- // crunch-run command to invoke. The container UUID will be
- // appended. If nil, []string{"crunch-run"} will be used.
- //
- // Example: []string{"crunch-run", "--cgroup-parent-subsystem=memory"}
- CrunchRunCommand []string
-
- // Extra RAM to reserve (in Bytes) for SLURM job, in addition
- // to the amount specified in the container's RuntimeConstraints
- ReserveExtraRAM int64
-
- // Minimum time between two attempts to run the same container
- MinRetryPeriod arvados.Duration
-
- // Batch size for container queries
- BatchSize int64
}
func main() {
@@ -97,6 +77,9 @@ func (disp *Dispatcher) configure(prog string, args []string) error {
flags := flag.NewFlagSet(prog, flag.ExitOnError)
flags.Usage = func() { usage(flags) }
+ loader := config.NewLoader(stdin, log)
+ loader.SetupFlags(flags)
+
configPath := flags.String(
"config",
defaultConfigPath,
@@ -109,9 +92,16 @@ func (disp *Dispatcher) configure(prog string, args []string) error {
"version",
false,
"Print version information and exit.")
+
+ args = loader.MungeLegacyConfigArgs(logrus.StandardLogger(), args, "-crunch-dispatch-slurm-config")
+
// Parse args; omit the first arg which is the command name
flags.Parse(args)
+ if err == flag.ErrHelp {
+ return nil
+ }
+
// Print version information if requested
if *getVersion {
fmt.Printf("crunch-dispatch-slurm %s\n", version)
@@ -120,18 +110,19 @@ func (disp *Dispatcher) configure(prog string, args []string) error {
disp.logger.Printf("crunch-dispatch-slurm %s started", version)
- err := disp.readConfig(*configPath)
+ cfg, err := loader.Load()
if err != nil {
return err
}
- if disp.CrunchRunCommand == nil {
- disp.CrunchRunCommand = []string{"crunch-run"}
+ if disp.cluster, err = cfg.GetCluster(""); err != nil {
+ return fmt.Errorf("config error: %s", err)
}
- if disp.PollPeriod == 0 {
- disp.PollPeriod = arvados.Duration(10 * time.Second)
- }
+ disp.Client.APIHost = fmt.Sprintf("%s:%d", disp.cluster.Services.Controller.ExternalURL.Host,
+ disp.cluster.Services.Controller.ExternalURL.Port)
+ disp.Client.AuthToken = disp.cluster.SystemRootToken
+ disp.Client.Insecure = disp.cluster.TLS.Insecure
if disp.Client.APIHost != "" || disp.Client.AuthToken != "" {
// Copy real configs into env vars so [a]
@@ -150,16 +141,7 @@ func (disp *Dispatcher) configure(prog string, args []string) error {
}
if *dumpConfig {
- return config.DumpAndExit(disp)
- }
-
- siteConfig, err := arvados.GetConfig(arvados.DefaultConfigFile)
- if os.IsNotExist(err) {
- disp.logger.Warnf("no cluster config (%s), proceeding with no node types defined", err)
- } else if err != nil {
- return fmt.Errorf("error loading config: %s", err)
- } else if disp.cluster, err = siteConfig.GetCluster(""); err != nil {
- return fmt.Errorf("config error: %s", err)
+ return config.DumpAndExit(cfg)
}
return nil
diff --git a/services/login-sync/Gemfile.lock b/services/login-sync/Gemfile.lock
index eff59d2f7..f0283b611 100644
--- a/services/login-sync/Gemfile.lock
+++ b/services/login-sync/Gemfile.lock
@@ -1,7 +1,7 @@
PATH
remote: .
specs:
- arvados-login-sync (1.4.0.20190701162225)
+ arvados-login-sync (1.4.0.20190709140013)
arvados (~> 1.3.0, >= 1.3.0)
GEM
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list