[arvados] created: 2.1.0-3009-g08ac4baf5
git repository hosting
git at public.arvados.org
Fri Nov 4 01:14:17 UTC 2022
at 08ac4baf572e0215f49f71c30defd86043664a98 (commit)
commit 08ac4baf572e0215f49f71c30defd86043664a98
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Thu Nov 3 21:14:00 2022 -0400
19699: Ensure collection name doesn't go over the limit
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/cwl/arvados_cwl/http.py b/sdk/cwl/arvados_cwl/http.py
index dcc2a5119..b061f44f9 100644
--- a/sdk/cwl/arvados_cwl/http.py
+++ b/sdk/cwl/arvados_cwl/http.py
@@ -151,6 +151,15 @@ def http_to_keep(api, project_uuid, url, utcnow=datetime.datetime.utcnow):
collectionname = "Downloaded from %s" % urllib.parse.quote(url, safe='')
+
+ # max length - space to add a timestamp used by ensure_unique_name
+ max_name_len = 254 - 28
+
+ if len(collectionname) > max_name_len:
+ over = len(collectionname) - max_name_len
+ split = int(max_name_len/2)
+ collectionname = collectionname[0:split] + "…" + collectionname[split+over:]
+
c.save_new(name=collectionname, owner_uuid=project_uuid, ensure_unique_name=True)
api.collections().update(uuid=c.manifest_locator(), body={"collection":{"properties": properties}}).execute()
commit af9e64edd245b96ce0f9fbee2edab5fcf051fb57
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Thu Nov 3 21:01:03 2022 -0400
19699: Add option to defer downloads
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 08a05d571..86f49fa18 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -213,6 +213,9 @@ def arg_parser(): # type: () -> argparse.ArgumentParser
parser.add_argument("--http-timeout", type=int,
default=5*60, dest="http_timeout", help="API request timeout in seconds. Default is 300 seconds (5 minutes).")
+ parser.add_argument("--defer-downloads", action="store_true", default=False,
+ help="When submitting a workflow, defer downloading HTTP URLs to workflow launch instead of downloading to Keep before submit.")
+
exgroup = parser.add_mutually_exclusive_group()
exgroup.add_argument("--enable-preemptible", dest="enable_preemptible", default=None, action="store_true", help="Use preemptible instances. Control individual steps with arv:UsePreemptible hint.")
exgroup.add_argument("--disable-preemptible", dest="enable_preemptible", default=None, action="store_false", help="Don't use preemptible instances.")
diff --git a/sdk/cwl/arvados_cwl/context.py b/sdk/cwl/arvados_cwl/context.py
index 64f85e207..ec473a04f 100644
--- a/sdk/cwl/arvados_cwl/context.py
+++ b/sdk/cwl/arvados_cwl/context.py
@@ -39,6 +39,7 @@ class ArvRuntimeContext(RuntimeContext):
self.match_local_docker = False
self.enable_preemptible = None
self.copy_deps = None
+ self.defer_downloads = False
super(ArvRuntimeContext, self).__init__(kwargs)
diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py
index 694f77baf..115c962d6 100644
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -203,6 +203,8 @@ The 'jobs' API is no longer supported.
self.toplevel_runtimeContext.make_fs_access = partial(CollectionFsAccess,
collection_cache=self.collection_cache)
+ self.defer_downloads = arvargs.submit and arvargs.defer_downloads
+
validate_cluster_target(self, self.toplevel_runtimeContext)
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 64fdfa0d0..89364a905 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -105,9 +105,13 @@ class ArvPathMapper(PathMapper):
raise WorkflowException("Directory literal '%s' is missing `listing`" % src)
elif src.startswith("http:") or src.startswith("https:"):
try:
- keepref = http_to_keep(self.arvrunner.api, self.arvrunner.project_uuid, src)
- logger.info("%s is %s", src, keepref)
- self._pathmap[src] = MapperEnt(keepref, keepref, srcobj["class"], True)
+ if self.arvrunner.defer_downloads:
+ # passthrough, we'll download it later.
+ self._pathmap[src] = MapperEnt(src, src, srcobj["class"], True)
+ else:
+ keepref = http_to_keep(self.arvrunner.api, self.arvrunner.project_uuid, src)
+ logger.info("%s is %s", src, keepref)
+ self._pathmap[src] = MapperEnt(keepref, keepref, srcobj["class"], True)
except Exception as e:
logger.warning(str(e))
else:
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list