[arvados] created: 2.7.0-6259-g75a99ed24a
git repository hosting
git at public.arvados.org
Fri Mar 29 20:33:25 UTC 2024
at 75a99ed24a198018b556a764d91619017da56aaa (commit)
commit 75a99ed24a198018b556a764d91619017da56aaa
Author: Tom Clegg <tom at curii.com>
Date: Fri Mar 29 16:33:02 2024 -0400
12430: Drop non-matching files from output if output_glob specified.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/doc/api/methods/container_requests.html.textile.liquid b/doc/api/methods/container_requests.html.textile.liquid
index 1c269fb3e6..66368848c6 100644
--- a/doc/api/methods/container_requests.html.textile.liquid
+++ b/doc/api/methods/container_requests.html.textile.liquid
@@ -49,6 +49,9 @@ table(table table-bordered table-condensed).
|cwd|string|Initial working directory, given as an absolute path (in the container) or a path relative to the WORKDIR given in the image's Dockerfile.|Required.|
|command|array of strings|Command to execute in the container.|Required. e.g., @["echo","hello"]@|
|output_path|string|Path to a directory or file inside the container that should be preserved as container's output when it finishes. This path must be one of the mount targets. For best performance, point output_path to a writable collection mount. See "Pre-populate output using Mount points":#pre-populate-output for details regarding optional output pre-population using mount points and "Symlinks in output":#symlinks-in-output for additional details.|Required.|
+|output_glob|array of strings|Glob patterns determining which files (of those present in the output directory when the container finishes) will be included in the output collection. If multiple patterns are given, files that match any pattern are included. If null or empty, all files will be included.|"Doublestar" patterns are accepted:
+ at foo/**@ matches the entire tree rooted at @foo@ in the top level of the output directory.
+@**/fo[og]@ matches all files named @foo@ or @fog@ anywhere in the tree.|
|output_name|string|Desired name for the output collection. If null or empty, a name will be assigned automatically.||
|output_ttl|integer|Desired lifetime for the output collection, in seconds. If zero, the output collection will not be deleted automatically.||
|priority|integer|Range 0-1000. Indicate scheduling order preference.|Clients are expected to submit container requests with zero priority in order to preview the container that will be used to satisfy it. Priority can be null if and only if state!="Committed". See "below for more details":#priority .|
diff --git a/doc/api/methods/containers.html.textile.liquid b/doc/api/methods/containers.html.textile.liquid
index 1d2fed768c..f6f59291da 100644
--- a/doc/api/methods/containers.html.textile.liquid
+++ b/doc/api/methods/containers.html.textile.liquid
@@ -30,9 +30,10 @@ table(table table-bordered table-condensed).
|finished_at|datetime|When this container finished.|Null if container has not yet finished.|
|log|string|Portable data hash of a collection containing the log messages produced when executing the container.|Null if container has not yet started. The Crunch system will periodically update this field for a running container.|
|environment|hash|Environment variables and values that should be set in the container environment (@docker run --env@). This augments and (when conflicts exist) overrides environment variables given in the image's Dockerfile.|Must be equal to a ContainerRequest's environment in order to satisfy the ContainerRequest.|
-|cwd|string|Initial working directory.|Must be equal to a ContainerRequest's cwd in order to satisfy the ContainerRequest|
+|cwd|string|Initial working directory.|Must be equal to a ContainerRequest's cwd in order to satisfy the ContainerRequest.|
|command|array of strings|Command to execute.| Must be equal to a ContainerRequest's command in order to satisfy the ContainerRequest.|
|output_path|string|Path to a directory or file inside the container that should be preserved as this container's output when it finishes.|Must be equal to a ContainerRequest's output_path in order to satisfy the ContainerRequest.|
+|output_glob|array of strings|Glob patterns determining which files will be included in the output collection. See corresponding attribute in the "container_requests resource":container_requests.html.|Must be equal to a ContainerRequest's output_glob in order to satisfy the ContainerRequest.|
|mounts|hash|Must contain the same keys as the ContainerRequest being satisfied. Each value must be within the range of values described in the ContainerRequest at the time the Container is assigned to the ContainerRequest.|See "Mount types":#mount_types for more details.|
|secret_mounts|hash|Must contain the same keys as the ContainerRequest being satisfied. Each value must be within the range of values described in the ContainerRequest at the time the Container is assigned to the ContainerRequest.|Not returned in API responses. Reset to empty when state is "Complete" or "Cancelled".|
|runtime_constraints|hash|Compute resources, and access to the outside world, that are / were available to the container.
diff --git a/go.mod b/go.mod
index 0011d7970f..62a31c1c58 100644
--- a/go.mod
+++ b/go.mod
@@ -65,6 +65,7 @@ require (
github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bgentry/speakeasy v0.1.0 // indirect
+ github.com/bmatcuk/doublestar v1.3.4 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dimchansky/utfbom v1.1.1 // indirect
diff --git a/go.sum b/go.sum
index fb2fe5e3f0..fe6febb043 100644
--- a/go.sum
+++ b/go.sum
@@ -59,6 +59,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bgentry/speakeasy v0.1.0 h1:ByYyxL9InA1OWqxJqqp2A5pYHUrCiAL6K3J+LKSsQkY=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
+github.com/bmatcuk/doublestar v1.3.4 h1:gPypJ5xD31uhX6Tf54sDPUOBXTqKH4c9aPY66CyQrS0=
+github.com/bmatcuk/doublestar v1.3.4/go.mod h1:wiQtGV+rzVYxB7WIlirSN++5HPtPlXEo9MEoZQC/PmE=
github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps=
github.com/bradleypeabody/godap v0.0.0-20170216002349-c249933bc092 h1:0Di2onNnlN5PAyWPbqlPyN45eOQ+QW/J9eqLynt4IV4=
github.com/bradleypeabody/godap v0.0.0-20170216002349-c249933bc092/go.mod h1:8IzBjZCRSnsvM6MJMG8HNNtnzMl48H22rbJL2kRUJ0Y=
diff --git a/lib/crunchrun/copier.go b/lib/crunchrun/copier.go
index a081c5d325..8d986d7066 100644
--- a/lib/crunchrun/copier.go
+++ b/lib/crunchrun/copier.go
@@ -9,6 +9,7 @@ import (
"errors"
"fmt"
"io"
+ "io/fs"
"os"
"path/filepath"
"sort"
@@ -17,6 +18,7 @@ import (
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/keepclient"
"git.arvados.org/arvados.git/sdk/go/manifest"
+ "github.com/bmatcuk/doublestar"
)
type printfer interface {
@@ -54,6 +56,7 @@ type copier struct {
keepClient IKeepClient
hostOutputDir string
ctrOutputDir string
+ globs []string
bindmounts map[string]bindmount
mounts map[string]arvados.Mount
secretMounts map[string]arvados.Mount
@@ -72,12 +75,17 @@ func (cp *copier) Copy() (string, error) {
if err != nil {
return "", fmt.Errorf("error scanning files to copy to output: %v", err)
}
- fs, err := (&arvados.Collection{ManifestText: cp.manifest}).FileSystem(cp.client, cp.keepClient)
+ collfs, err := (&arvados.Collection{ManifestText: cp.manifest}).FileSystem(cp.client, cp.keepClient)
if err != nil {
return "", fmt.Errorf("error creating Collection.FileSystem: %v", err)
}
+ err = cp.applyGlobsToCollectionFS(collfs)
+ if err != nil {
+ return "", fmt.Errorf("error while removing non-matching files from output collection: %w", err)
+ }
+ cp.applyGlobsToFilesAndDirs()
for _, d := range cp.dirs {
- err = fs.Mkdir(d, 0777)
+ err = collfs.Mkdir(d, 0777)
if err != nil && err != os.ErrExist {
return "", fmt.Errorf("error making directory %q in output collection: %v", d, err)
}
@@ -91,20 +99,118 @@ func (cp *copier) Copy() (string, error) {
// open so f's data can be packed with it).
dir, _ := filepath.Split(f.dst)
if dir != lastparentdir || unflushed > keepclient.BLOCKSIZE {
- if err := fs.Flush("/"+lastparentdir, dir != lastparentdir); err != nil {
+ if err := collfs.Flush("/"+lastparentdir, dir != lastparentdir); err != nil {
return "", fmt.Errorf("error flushing output collection file data: %v", err)
}
unflushed = 0
}
lastparentdir = dir
- n, err := cp.copyFile(fs, f)
+ n, err := cp.copyFile(collfs, f)
if err != nil {
return "", fmt.Errorf("error copying file %q into output collection: %v", f, err)
}
unflushed += n
}
- return fs.MarshalManifest(".")
+ return collfs.MarshalManifest(".")
+}
+
+func (cp *copier) matchGlobs(path string) bool {
+ // An entry in the top level of the output directory looks
+ // like "/foo", but globs look like "foo", so we strip the
+ // leading "/" before matching.
+ path = strings.TrimLeft(path, "/")
+ for _, glob := range cp.globs {
+ if match, _ := doublestar.Match(glob, path); match {
+ return true
+ }
+ }
+ return false
+}
+
+// Delete entries from cp.files that do not match cp.globs.
+//
+// Delete entries from cp.dirs that do not match cp.globs.
+//
+// Ensure parent/ancestor directories of remaining cp.files and
+// cp.dirs entries are still present in cp.dirs, even if they do not
+// match cp.globs themselves.
+func (cp *copier) applyGlobsToFilesAndDirs() {
+ if len(cp.globs) == 0 {
+ return
+ }
+ keepdirs := make(map[string]bool)
+ for _, path := range cp.dirs {
+ if cp.matchGlobs(path) {
+ keepdirs[path] = true
+ }
+ }
+ for path := range keepdirs {
+ for i, c := range path {
+ if i > 0 && c == '/' {
+ keepdirs[path[:i]] = true
+ }
+ }
+ }
+ var keepfiles []filetodo
+ for _, file := range cp.files {
+ if cp.matchGlobs(file.dst) {
+ keepfiles = append(keepfiles, file)
+ }
+ }
+ for _, file := range keepfiles {
+ for i, c := range file.dst {
+ if i > 0 && c == '/' {
+ keepdirs[file.dst[:i]] = true
+ }
+ }
+ }
+ cp.dirs = nil
+ for path := range keepdirs {
+ cp.dirs = append(cp.dirs, path)
+ }
+ sort.Strings(cp.dirs)
+ cp.files = keepfiles
+}
+
+// Delete files in collfs that do not match cp.globs. Also delete
+// directories that are empty (after deleting non-matching files) and
+// do not match cp.globs themselves.
+func (cp *copier) applyGlobsToCollectionFS(collfs arvados.CollectionFileSystem) error {
+ if len(cp.globs) == 0 {
+ return nil
+ }
+ include := make(map[string]bool)
+ err := fs.WalkDir(arvados.FS(collfs), "", func(path string, ent fs.DirEntry, err error) error {
+ if cp.matchGlobs(path) {
+ for i, c := range path {
+ if i > 0 && c == '/' {
+ include[path[:i]] = true
+ }
+ }
+ include[path] = true
+ }
+ return nil
+ })
+ if err != nil {
+ return err
+ }
+ err = fs.WalkDir(arvados.FS(collfs), "", func(path string, ent fs.DirEntry, err error) error {
+ if err != nil || path == "" {
+ return err
+ }
+ if !include[path] {
+ err := collfs.RemoveAll(path)
+ if err != nil {
+ return err
+ }
+ if ent.IsDir() {
+ return fs.SkipDir
+ }
+ }
+ return nil
+ })
+ return err
}
func (cp *copier) copyFile(fs arvados.CollectionFileSystem, f filetodo) (int64, error) {
diff --git a/lib/crunchrun/copier_test.go b/lib/crunchrun/copier_test.go
index c8936d1a9f..bf7e789e45 100644
--- a/lib/crunchrun/copier_test.go
+++ b/lib/crunchrun/copier_test.go
@@ -7,8 +7,10 @@ package crunchrun
import (
"bytes"
"io"
+ "io/fs"
"io/ioutil"
"os"
+ "sort"
"syscall"
"git.arvados.org/arvados.git/sdk/go/arvados"
@@ -222,3 +224,173 @@ func (s *copierSuite) writeFileInOutputDir(c *check.C, path, data string) {
c.Assert(err, check.IsNil)
c.Assert(f.Close(), check.IsNil)
}
+
+func (s *copierSuite) TestApplyGlobsToFilesAndDirs(c *check.C) {
+ dirs := []string{"dir1", "dir1/dir11", "dir1/dir12", "dir2"}
+ files := []string{"dir1/file11", "dir1/dir11/file111", "dir2/file2"}
+ for _, trial := range []struct {
+ globs []string
+ dirs []string
+ files []string
+ }{
+ {
+ globs: []string{},
+ dirs: append([]string{}, dirs...),
+ files: append([]string{}, files...),
+ },
+ {
+ globs: []string{"**"},
+ dirs: append([]string{}, dirs...),
+ files: append([]string{}, files...),
+ },
+ {
+ globs: []string{"**/file111"},
+ dirs: []string{"dir1", "dir1/dir11"},
+ files: []string{"dir1/dir11/file111"},
+ },
+ {
+ globs: []string{"nothing"},
+ dirs: nil,
+ files: nil,
+ },
+ {
+ globs: []string{"**/dir12"},
+ dirs: []string{"dir1", "dir1/dir12"},
+ files: nil,
+ },
+ {
+ globs: []string{"**/file*"},
+ dirs: []string{"dir1", "dir1/dir11", "dir2"},
+ files: append([]string{}, files...),
+ },
+ {
+ globs: []string{"**/dir1[12]"},
+ dirs: []string{"dir1", "dir1/dir11", "dir1/dir12"},
+ files: nil,
+ },
+ {
+ globs: []string{"**/dir1[^2]"},
+ dirs: []string{"dir1", "dir1/dir11"},
+ files: nil,
+ },
+ } {
+ c.Logf("=== globs: %q", trial.globs)
+ cp := copier{
+ globs: trial.globs,
+ dirs: dirs,
+ }
+ for _, path := range files {
+ cp.files = append(cp.files, filetodo{dst: path})
+ }
+ cp.applyGlobsToFilesAndDirs()
+ var gotFiles []string
+ for _, file := range cp.files {
+ gotFiles = append(gotFiles, file.dst)
+ }
+ c.Check(cp.dirs, check.DeepEquals, trial.dirs)
+ c.Check(gotFiles, check.DeepEquals, trial.files)
+ }
+}
+
+func (s *copierSuite) TestApplyGlobsToCollectionFS(c *check.C) {
+ for _, trial := range []struct {
+ globs []string
+ expect []string
+ }{
+ {
+ globs: nil,
+ expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
+ },
+ {
+ globs: []string{"foo"},
+ expect: []string{"foo"},
+ },
+ {
+ globs: []string{"baz/parent1/item1"},
+ expect: []string{"baz/parent1/item1"},
+ },
+ {
+ globs: []string{"**"},
+ expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
+ },
+ {
+ globs: []string{"**/*"},
+ expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
+ },
+ {
+ globs: []string{"*"},
+ expect: []string{"foo", "bar"},
+ },
+ {
+ globs: []string{"baz"},
+ expect: nil,
+ },
+ {
+ globs: []string{"b*/**"},
+ expect: []string{"baz/quux", "baz/parent1/item1"},
+ },
+ {
+ globs: []string{"baz"},
+ expect: nil,
+ },
+ {
+ globs: []string{"baz/**"},
+ expect: []string{"baz/quux", "baz/parent1/item1"},
+ },
+ {
+ globs: []string{"baz/*"},
+ expect: []string{"baz/quux"},
+ },
+ {
+ globs: []string{"baz/**/*uu?"},
+ expect: []string{"baz/quux"},
+ },
+ {
+ globs: []string{"**/*m1"},
+ expect: []string{"baz/parent1/item1"},
+ },
+ {
+ globs: []string{"*/*/*/**/*1"},
+ expect: nil,
+ },
+ {
+ globs: []string{"f*", "**/q*"},
+ expect: []string{"foo", "baz/quux"},
+ },
+ {
+ globs: []string{"\\"}, // invalid pattern matches nothing
+ expect: nil,
+ },
+ {
+ globs: []string{"\\", "foo"},
+ expect: []string{"foo"},
+ },
+ {
+ globs: []string{"foo/**"},
+ expect: nil,
+ },
+ {
+ globs: []string{"foo*/**"},
+ expect: nil,
+ },
+ } {
+ c.Logf("=== globs: %q", trial.globs)
+ collfs, err := (&arvados.Collection{ManifestText: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo 0:0:bar 0:0:baz/quux 0:0:baz/parent1/item1\n"}).FileSystem(nil, nil)
+ c.Assert(err, check.IsNil)
+ cp := copier{globs: trial.globs}
+ err = cp.applyGlobsToCollectionFS(collfs)
+ if !c.Check(err, check.IsNil) {
+ continue
+ }
+ var got []string
+ fs.WalkDir(arvados.FS(collfs), "", func(path string, ent fs.DirEntry, err error) error {
+ if !ent.IsDir() {
+ got = append(got, path)
+ }
+ return nil
+ })
+ sort.Strings(got)
+ sort.Strings(trial.expect)
+ c.Check(got, check.DeepEquals, trial.expect)
+ }
+}
diff --git a/lib/crunchrun/crunchrun.go b/lib/crunchrun/crunchrun.go
index bde13424dd..1f1c5675aa 100644
--- a/lib/crunchrun/crunchrun.go
+++ b/lib/crunchrun/crunchrun.go
@@ -1365,6 +1365,7 @@ func (runner *ContainerRunner) CaptureOutput(bindmounts map[string]bindmount) er
keepClient: runner.ContainerKeepClient,
hostOutputDir: runner.HostOutputDir,
ctrOutputDir: runner.Container.OutputPath,
+ globs: runner.Container.OutputGlob,
bindmounts: bindmounts,
mounts: runner.Container.Mounts,
secretMounts: runner.SecretMounts,
diff --git a/lib/crunchrun/integration_test.go b/lib/crunchrun/integration_test.go
index 4f0100b267..def5f07fc0 100644
--- a/lib/crunchrun/integration_test.go
+++ b/lib/crunchrun/integration_test.go
@@ -148,6 +148,7 @@ func (s *integrationSuite) setup(c *C) {
"state": s.cr.State,
"command": s.cr.Command,
"output_path": s.cr.OutputPath,
+ "output_glob": s.cr.OutputGlob,
"container_image": s.cr.ContainerImage,
"mounts": s.cr.Mounts,
"runtime_constraints": s.cr.RuntimeConstraints,
@@ -272,6 +273,19 @@ func (s *integrationSuite) TestRunTrivialContainerWithNoLocalKeepstore(c *C) {
c.Check(s.logFiles["crunch-run.txt"], Matches, `(?ms).*loaded config file \Q`+os.Getenv("ARVADOS_CONFIG")+`\E\n.*`)
}
+func (s *integrationSuite) TestRunTrivialContainerWithOutputGlob(c *C) {
+ s.cr.OutputGlob = []string{"js?n"}
+ s.testRunTrivialContainer(c)
+ fs, err := s.outputCollection.FileSystem(s.client, s.kc)
+ c.Assert(err, IsNil)
+ _, err = fs.Stat("json")
+ c.Check(err, IsNil)
+ _, err = fs.Stat("inputfile")
+ c.Check(err, Equals, os.ErrNotExist)
+ _, err = fs.Stat("emptydir")
+ c.Check(err, Equals, os.ErrNotExist)
+}
+
func (s *integrationSuite) testRunTrivialContainer(c *C) {
if err := exec.Command("which", s.engine).Run(); err != nil {
c.Skip(fmt.Sprintf("%s: %s", s.engine, err))
@@ -321,34 +335,37 @@ func (s *integrationSuite) testRunTrivialContainer(c *C) {
var output arvados.Collection
err = s.client.RequestAndDecode(&output, "GET", "arvados/v1/collections/"+s.cr.OutputUUID, nil, nil)
c.Assert(err, IsNil)
- fs, err = output.FileSystem(s.client, s.kc)
- c.Assert(err, IsNil)
- if f, err := fs.Open("inputfile"); c.Check(err, IsNil) {
- defer f.Close()
- buf, err := ioutil.ReadAll(f)
- c.Check(err, IsNil)
- c.Check(string(buf), Equals, "inputdata")
- }
- if f, err := fs.Open("json"); c.Check(err, IsNil) {
- defer f.Close()
- buf, err := ioutil.ReadAll(f)
- c.Check(err, IsNil)
- c.Check(string(buf), Equals, `["foo",{"foo":"bar"},null]`)
- }
- if fi, err := fs.Stat("emptydir"); c.Check(err, IsNil) {
- c.Check(fi.IsDir(), Equals, true)
- }
- if d, err := fs.Open("emptydir"); c.Check(err, IsNil) {
- defer d.Close()
- fis, err := d.Readdir(-1)
+ s.outputCollection = output
+
+ if len(s.cr.OutputGlob) == 0 {
+ fs, err = output.FileSystem(s.client, s.kc)
c.Assert(err, IsNil)
- // crunch-run still saves a ".keep" file to preserve
- // empty dirs even though that shouldn't be
- // necessary. Ideally we would do:
- // c.Check(fis, HasLen, 0)
- for _, fi := range fis {
- c.Check(fi.Name(), Equals, ".keep")
+ if f, err := fs.Open("inputfile"); c.Check(err, IsNil) {
+ defer f.Close()
+ buf, err := ioutil.ReadAll(f)
+ c.Check(err, IsNil)
+ c.Check(string(buf), Equals, "inputdata")
+ }
+ if f, err := fs.Open("json"); c.Check(err, IsNil) {
+ defer f.Close()
+ buf, err := ioutil.ReadAll(f)
+ c.Check(err, IsNil)
+ c.Check(string(buf), Equals, `["foo",{"foo":"bar"},null]`)
+ }
+ if fi, err := fs.Stat("emptydir"); c.Check(err, IsNil) {
+ c.Check(fi.IsDir(), Equals, true)
+ }
+ if d, err := fs.Open("emptydir"); c.Check(err, IsNil) {
+ defer d.Close()
+ fis, err := d.Readdir(-1)
+ c.Assert(err, IsNil)
+ // crunch-run still saves a ".keep" file to preserve
+ // empty dirs even though that shouldn't be
+ // necessary. Ideally we would do:
+ // c.Check(fis, HasLen, 0)
+ for _, fi := range fis {
+ c.Check(fi.Name(), Equals, ".keep")
+ }
}
}
- s.outputCollection = output
}
diff --git a/sdk/go/arvados/container.go b/sdk/go/arvados/container.go
index 91c8fbfe29..aefad9b5e5 100644
--- a/sdk/go/arvados/container.go
+++ b/sdk/go/arvados/container.go
@@ -23,6 +23,7 @@ type Container struct {
Mounts map[string]Mount `json:"mounts"`
Output string `json:"output"`
OutputPath string `json:"output_path"`
+ OutputGlob []string `json:"output_glob"`
Priority int64 `json:"priority"`
RuntimeConstraints RuntimeConstraints `json:"runtime_constraints"`
State ContainerState `json:"state"`
@@ -68,6 +69,7 @@ type ContainerRequest struct {
Cwd string `json:"cwd"`
Command []string `json:"command"`
OutputPath string `json:"output_path"`
+ OutputGlob []string `json:"output_glob"`
OutputName string `json:"output_name"`
OutputTTL int `json:"output_ttl"`
Priority int `json:"priority"`
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index ee338b81ff..880b18b663 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -30,6 +30,7 @@ class Container < ArvadosModel
serialize :runtime_constraints, Hash
serialize :command, Array
serialize :scheduling_parameters, Hash
+ serialize :output_glob, Array
after_find :fill_container_defaults_after_find
before_validation :fill_field_defaults, :if => :new_record?
@@ -73,6 +74,7 @@ class Container < ArvadosModel
t.add :mounts
t.add :output
t.add :output_path
+ t.add :output_glob
t.add :priority
t.add :progress
t.add :runtime_constraints
@@ -164,6 +166,7 @@ class Container < ArvadosModel
cwd: req.cwd,
environment: req.environment,
output_path: req.output_path,
+ output_glob: req.output_glob,
container_image: resolve_container_image(req.container_image),
mounts: resolve_mounts(req.mounts),
runtime_constraints: resolve_runtime_constraints(req.runtime_constraints),
@@ -263,6 +266,9 @@ class Container < ArvadosModel
candidates = candidates.where('output_path = ?', attrs[:output_path])
log_reuse_info(candidates) { "after filtering on output_path #{attrs[:output_path].inspect}" }
+ candidates = candidates.where('output_glob = ?', attrs[:output_glob])
+ log_reuse_info(candidates) { "after filtering on output_glob #{attrs[:output_glob].inspect}" }
+
image = resolve_container_image(attrs[:container_image])
candidates = candidates.where('container_image = ?', image)
log_reuse_info(candidates) { "after filtering on container_image #{image.inspect} (resolved from #{attrs[:container_image].inspect})" }
@@ -531,11 +537,11 @@ class Container < ArvadosModel
if self.new_record?
permitted.push(:owner_uuid, :command, :container_image, :cwd,
- :environment, :mounts, :output_path, :priority,
- :runtime_constraints, :scheduling_parameters,
- :secret_mounts, :runtime_token,
- :runtime_user_uuid, :runtime_auth_scopes,
- :output_storage_classes)
+ :environment, :mounts, :output_path, :output_glob,
+ :priority, :runtime_constraints,
+ :scheduling_parameters, :secret_mounts,
+ :runtime_token, :runtime_user_uuid,
+ :runtime_auth_scopes, :output_storage_classes)
end
case self.state
@@ -798,6 +804,7 @@ class Container < ArvadosModel
cwd: self.cwd,
environment: self.environment,
output_path: self.output_path,
+ output_glob: self.output_glob,
container_image: self.container_image,
mounts: self.mounts,
runtime_constraints: self.runtime_constraints,
diff --git a/services/api/app/models/container_request.rb b/services/api/app/models/container_request.rb
index f5789f31f6..dd5a5bfc69 100644
--- a/services/api/app/models/container_request.rb
+++ b/services/api/app/models/container_request.rb
@@ -34,6 +34,7 @@ class ContainerRequest < ArvadosModel
serialize :runtime_constraints, Hash
serialize :command, Array
serialize :scheduling_parameters, Hash
+ serialize :output_glob, Array
after_find :fill_container_defaults_after_find
after_initialize { @state_was_when_initialized = self.state_was } # see finalize_if_needed
@@ -73,6 +74,7 @@ class ContainerRequest < ArvadosModel
t.add :name
t.add :output_name
t.add :output_path
+ t.add :output_glob
t.add :output_uuid
t.add :output_ttl
t.add :priority
@@ -104,7 +106,7 @@ class ContainerRequest < ArvadosModel
AttrsPermittedAlways = [:owner_uuid, :state, :name, :description, :properties]
AttrsPermittedBeforeCommit = [:command, :container_count_max,
:container_image, :cwd, :environment, :filters, :mounts,
- :output_path, :priority, :runtime_token,
+ :output_path, :output_glob, :priority, :runtime_token,
:runtime_constraints, :state, :container_uuid, :use_existing,
:scheduling_parameters, :secret_mounts, :output_name, :output_ttl,
:output_storage_classes, :output_properties]
@@ -442,6 +444,11 @@ class ContainerRequest < ArvadosModel
errors.add(:environment, "must be an map of String to String but has entry #{k.class} to #{v.class}")
end
end
+ output_glob.each do |g|
+ if !g.is_a? String
+ errors.add(:output_glob, "must be an array of strings but has entry #{g.class}")
+ end
+ end
[:mounts, :secret_mounts].each do |m|
self[m].each do |k, v|
if !k.is_a?(String) || !v.is_a?(Hash)
diff --git a/services/api/db/migrate/20240329173437_add_output_glob_to_containers.rb b/services/api/db/migrate/20240329173437_add_output_glob_to_containers.rb
new file mode 100644
index 0000000000..7c46f1ab25
--- /dev/null
+++ b/services/api/db/migrate/20240329173437_add_output_glob_to_containers.rb
@@ -0,0 +1,10 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class AddOutputGlobToContainers < ActiveRecord::Migration[7.0]
+ def change
+ add_column :containers, :output_glob, :json, default: []
+ add_column :container_requests, :output_glob, :json, default: []
+ end
+end
diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql
index c0d4263d97..766bf8fc44 100644
--- a/services/api/db/structure.sql
+++ b/services/api/db/structure.sql
@@ -568,7 +568,8 @@ CREATE TABLE public.container_requests (
runtime_token text,
output_storage_classes jsonb DEFAULT '["default"]'::jsonb,
output_properties jsonb DEFAULT '{}'::jsonb,
- cumulative_cost double precision DEFAULT 0.0 NOT NULL
+ cumulative_cost double precision DEFAULT 0.0 NOT NULL,
+ output_glob json DEFAULT '[]'::json
);
@@ -634,7 +635,8 @@ CREATE TABLE public.containers (
output_storage_classes jsonb DEFAULT '["default"]'::jsonb,
output_properties jsonb DEFAULT '{}'::jsonb,
cost double precision DEFAULT 0.0 NOT NULL,
- subrequests_cost double precision DEFAULT 0.0 NOT NULL
+ subrequests_cost double precision DEFAULT 0.0 NOT NULL,
+ output_glob json DEFAULT '[]'::json
);
@@ -3316,4 +3318,5 @@ INSERT INTO "schema_migrations" (version) VALUES
('20230815160000'),
('20230821000000'),
('20230922000000'),
-('20231013000000');
+('20231013000000'),
+('20240329173437');
diff --git a/services/api/test/unit/container_request_test.rb b/services/api/test/unit/container_request_test.rb
index d25c08a579..abf19a2905 100644
--- a/services/api/test/unit/container_request_test.rb
+++ b/services/api/test/unit/container_request_test.rb
@@ -112,7 +112,11 @@ class ContainerRequestTest < ActiveSupport::TestCase
{"mounts" => {"FOO" => {}}},
{"mounts" => {"FOO" => {"kind" => "tmp", "capacity" => 42.222}}},
{"command" => ["echo", 55]},
- {"environment" => {"FOO" => 55}}
+ {"environment" => {"FOO" => 55}},
+ {"output_glob" => [false]},
+ {"output_glob" => [["bad"]]},
+ {"output_glob" => "bad"},
+ {"output_glob" => ["nope", -1]},
].each do |value|
test "Create with invalid #{value}" do
set_user_from_auth :active
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list