[ARVADOS] created: c8bb5cc118575835aa38122c7035989eeebe759f

Git user git at public.curoverse.com
Fri Sep 30 11:25:15 EDT 2016


        at  c8bb5cc118575835aa38122c7035989eeebe759f (commit)


commit c8bb5cc118575835aa38122c7035989eeebe759f
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Fri Sep 30 11:24:53 2016 -0400

    10032: Update schema salad and cwltool packages to get document validation optimizations.

diff --git a/build/run-build-packages.sh b/build/run-build-packages.sh
index 3acabab..53dcec4 100755
--- a/build/run-build-packages.sh
+++ b/build/run-build-packages.sh
@@ -452,8 +452,8 @@ fpm_build lockfile "" "" python 0.12.2 --epoch 1
 
 # schema_salad. This is a python dependency of arvados-cwl-runner,
 # but we can't use the usual PYTHONPACKAGES way to build this package due to the
-# intricacies of how version numbers get generated in setup.py: we need version
-# 1.7.20160316203940. If we don't explicitly list that version with the -v
+# intricacies of how version numbers get generated in setup.py: we need a specific version,
+# e.g. 1.7.20160316203940. If we don't explicitly list that version with the -v
 # argument to fpm, and instead specify it as schema_salad==1.7.20160316203940, we get
 # a package with version 1.7. That's because our gittagger hack is not being
 # picked up by self.distribution.get_version(), which is called from
@@ -465,7 +465,7 @@ fpm_build lockfile "" "" python 0.12.2 --epoch 1
 # So we build this thing separately.
 #
 # Ward, 2016-03-17
-fpm_build schema_salad "" "" python 1.18.20160907135919 --depends "${PYTHON2_PKG_PREFIX}-lockfile >= 1:0.12.2-2"
+fpm_build schema_salad "" "" python 1.18.20160930145650 --depends "${PYTHON2_PKG_PREFIX}-lockfile >= 1:0.12.2-2"
 
 # And schema_salad now depends on ruamel-yaml, which apparently has a braindead setup.py that requires special arguments to build (otherwise, it aborts with 'error: you have to install with "pip install ."'). Sigh.
 # Ward, 2016-05-26
@@ -476,7 +476,7 @@ fpm_build ruamel.yaml "" "" python 0.12.4 --python-setup-py-arguments "--single-
 fpm_build cwltest "" "" python 1.0.20160907111242
 
 # And for cwltool we have the same problem as for schema_salad. Ward, 2016-03-17
-fpm_build cwltool "" "" python 1.0.20160923180109
+fpm_build cwltool "" "" python 1.0.20160930152149
 
 # FPM eats the trailing .0 in the python-rdflib-jsonld package when built with 'rdflib-jsonld>=0.3.0'. Force the version. Ward, 2016-03-25
 fpm_build rdflib-jsonld "" "" python 0.3.0
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py
index fc5a52c..929011b 100644
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -33,7 +33,7 @@ setup(name='arvados-cwl-runner',
       # Make sure to update arvados/build/run-build-packages.sh as well
       # when updating the cwltool version pin.
       install_requires=[
-          'cwltool==1.0.20160923180109',
+          'cwltool==1.0.20160930152149',
           'arvados-python-client>=0.1.20160826210445'
       ],
       data_files=[

commit 95f399d4fca0c4e36c6da4e98e4092106ebfdc6d
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Fri Sep 30 11:24:15 2016 -0400

    Drill down on metrics on job submission.

diff --git a/sdk/cwl/arvados_cwl/arvjob.py b/sdk/cwl/arvados_cwl/arvjob.py
index abb41f4..0818d5d 100644
--- a/sdk/cwl/arvados_cwl/arvjob.py
+++ b/sdk/cwl/arvados_cwl/arvjob.py
@@ -38,21 +38,27 @@ class ArvadosJob(object):
         }
         runtime_constraints = {}
 
-        if self.generatefiles["listing"]:
-            vwd = arvados.collection.Collection()
-            script_parameters["task.vwd"] = {}
-            generatemapper = InitialWorkDirPathMapper([self.generatefiles], "", "",
-                                        separateDirs=False)
-            for f, p in generatemapper.items():
-                if p.type == "CreateFile":
-                    with vwd.open(p.target, "w") as n:
-                        n.write(p.resolved.encode("utf-8"))
-            vwd.save_new()
-            for f, p in generatemapper.items():
-                if p.type == "File":
-                    script_parameters["task.vwd"][p.target] = p.resolved
-                if p.type == "CreateFile":
-                    script_parameters["task.vwd"][p.target] = "$(task.keep)/%s/%s" % (vwd.portable_data_hash(), p.target)
+        with Perf(metrics, "generatefiles %s" % self.name):
+            if self.generatefiles["listing"]:
+                vwd = arvados.collection.Collection()
+                script_parameters["task.vwd"] = {}
+                generatemapper = InitialWorkDirPathMapper([self.generatefiles], "", "",
+                                                          separateDirs=False)
+
+                with Perf(metrics, "createfiles %s" % self.name):
+                    for f, p in generatemapper.items():
+                        if p.type == "CreateFile":
+                            with vwd.open(p.target, "w") as n:
+                                n.write(p.resolved.encode("utf-8"))
+
+                with Perf(metrics, "generatefiles.save_new %s" % self.name):
+                    vwd.save_new()
+
+                for f, p in generatemapper.items():
+                    if p.type == "File":
+                        script_parameters["task.vwd"][p.target] = p.resolved
+                    if p.type == "CreateFile":
+                        script_parameters["task.vwd"][p.target] = "$(task.keep)/%s/%s" % (vwd.portable_data_hash(), p.target)
 
         script_parameters["task.env"] = {"TMPDIR": self.tmpdir, "HOME": self.outdir}
         if self.environment:
@@ -74,11 +80,12 @@ class ArvadosJob(object):
         if self.permanentFailCodes:
             script_parameters["task.permanentFailCodes"] = self.permanentFailCodes
 
-        (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
-        if docker_req and kwargs.get("use_container") is not False:
-            runtime_constraints["docker_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid)
-        else:
-            runtime_constraints["docker_image"] = "arvados/jobs"
+        with Perf(metrics, "arv_docker_get_image %s" % self.name):
+            (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
+            if docker_req and kwargs.get("use_container") is not False:
+                runtime_constraints["docker_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid)
+            else:
+                runtime_constraints["docker_image"] = "arvados/jobs"
 
         resources = self.builder.resources
         if resources is not None:
diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py
index f4ce446..8eb8fe6 100644
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -13,8 +13,10 @@ import ruamel.yaml as yaml
 
 from .runner import upload_docker, upload_dependencies, trim_listing
 from .arvtool import ArvadosCommandTool
+from .perf import Perf
 
 logger = logging.getLogger('arvados.cwl-runner')
+metrics = logging.getLogger('arvados.cwl-runner.metrics')
 
 def upload_workflow(arvRunner, tool, job_order, project_uuid, update_uuid):
     upload_docker(arvRunner, tool)
@@ -62,40 +64,42 @@ class ArvadosWorkflow(Workflow):
         if req:
             document_loader, workflowobj, uri = (self.doc_loader, self.doc_loader.fetch(self.tool["id"]), self.tool["id"])
 
-            workflowobj["requirements"] = self.requirements + workflowobj.get("requirements", [])
-            workflowobj["hints"] = self.hints + workflowobj.get("hints", [])
-            packed = pack(document_loader, workflowobj, uri, self.metadata)
-
-            upload_dependencies(self.arvrunner,
-                                kwargs.get("name", ""),
-                                document_loader,
-                                packed,
-                                uri,
-                                False)
-
-            upload_dependencies(self.arvrunner,
-                                os.path.basename(joborder.get("id", "#")),
-                                document_loader,
-                                joborder,
-                                joborder.get("id", "#"),
-                                False)
-
-            joborder_keepmount = copy.deepcopy(joborder)
-
-            def keepmount(obj):
-                if obj["location"].startswith("keep:"):
-                    obj["location"] = "/keep/" + obj["location"][5:]
-                    if "listing" in obj:
-                        del obj["listing"]
-                elif obj["location"].startswith("_:"):
-                    del obj["location"]
-                else:
-                    raise WorkflowException("Location is not a keep reference or a literal: '%s'" % obj["location"])
-
-            adjustFileObjs(joborder_keepmount, keepmount)
-            adjustDirObjs(joborder_keepmount, keepmount)
-            adjustFileObjs(packed, keepmount)
-            adjustDirObjs(packed, keepmount)
+            with Perf(metrics, "subworkflow upload_deps"):
+                workflowobj["requirements"] = self.requirements + workflowobj.get("requirements", [])
+                workflowobj["hints"] = self.hints + workflowobj.get("hints", [])
+                packed = pack(document_loader, workflowobj, uri, self.metadata)
+
+                upload_dependencies(self.arvrunner,
+                                    kwargs.get("name", ""),
+                                    document_loader,
+                                    packed,
+                                    uri,
+                                    False)
+
+                upload_dependencies(self.arvrunner,
+                                    os.path.basename(joborder.get("id", "#")),
+                                    document_loader,
+                                    joborder,
+                                    joborder.get("id", "#"),
+                                    False)
+
+            with Perf(metrics, "subworkflow adjust"):
+                joborder_keepmount = copy.deepcopy(joborder)
+
+                def keepmount(obj):
+                    if obj["location"].startswith("keep:"):
+                        obj["location"] = "/keep/" + obj["location"][5:]
+                        if "listing" in obj:
+                            del obj["listing"]
+                    elif obj["location"].startswith("_:"):
+                        del obj["location"]
+                    else:
+                        raise WorkflowException("Location is not a keep reference or a literal: '%s'" % obj["location"])
+
+                adjustFileObjs(joborder_keepmount, keepmount)
+                adjustDirObjs(joborder_keepmount, keepmount)
+                adjustFileObjs(packed, keepmount)
+                adjustDirObjs(packed, keepmount)
 
             wf_runner = {
                 "class": "CommandLineTool",
@@ -104,7 +108,6 @@ class ArvadosWorkflow(Workflow):
                 "outputs": self.tool["outputs"],
                 "stdout": "cwl.output.json",
                 "requirements": workflowobj["requirements"]+[
-                    {"class": "InlineJavascriptRequirement"},
                     {
                     "class": "InitialWorkDirRequirement",
                     "listing": [{
@@ -116,7 +119,7 @@ class ArvadosWorkflow(Workflow):
                         }]
                 }],
                 "hints": workflowobj["hints"],
-                "arguments": ["--no-container", "--move-outputs", "workflow.cwl#main", "cwl.input.yml"]
+                "arguments": ["--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl#main", "cwl.input.yml"]
             }
             kwargs["loader"] = self.doc_loader
             kwargs["avsc_names"] = self.doc_schema

commit 9ea95168dc095e9547d0ae235e3986e428bf50c2
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Sep 29 10:30:30 2016 -0400

    10032: Add more metrics

diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index cd38003..5262cb4 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -34,8 +34,10 @@ from cwltool.draft2tool import compute_checksums
 from arvados.api import OrderedJsonModel
 
 logger = logging.getLogger('arvados.cwl-runner')
+metrics = logging.getLogger('arvados.cwl-runner.metrics')
 logger.setLevel(logging.INFO)
 
+
 class ArvCwlRunner(object):
     """Execute a CWL tool or workflow, submit work (using either jobs or
     containers API), wait for them to complete, and report output.
@@ -103,7 +105,7 @@ class ArvCwlRunner(object):
                         self.cond.acquire()
                         j = self.processes[uuid]
                         logger.info("Job %s (%s) is %s", j.name, uuid, event["properties"]["new_attributes"]["state"])
-                        with Perf(logger, "done %s" % j.name):
+                        with Perf(metrics, "done %s" % j.name):
                             j.done(event["properties"]["new_attributes"])
                         self.cond.notify()
                     finally:
@@ -244,9 +246,12 @@ class ArvCwlRunner(object):
             # except when in cond.wait(), at which point on_message can update
             # job state and process output callbacks.
 
+            loopperf = Perf(metrics, "jobiter")
+            loopperf.__enter__()
             for runnable in jobiter:
+                loopperf.__exit__()
                 if runnable:
-                    with Perf(logger, "run"):
+                    with Perf(metrics, "run"):
                         runnable.run(**kwargs)
                 else:
                     if self.processes:
@@ -254,6 +259,8 @@ class ArvCwlRunner(object):
                     else:
                         logger.error("Workflow is deadlocked, no runnable jobs and not waiting on any pending jobs.")
                         break
+                loopperf.__enter__()
+            loopperf.__exit__()
 
             while self.processes:
                 self.cond.wait(1)
@@ -322,6 +329,8 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
     exgroup.add_argument("--quiet", action="store_true", help="Only print warnings and errors.")
     exgroup.add_argument("--debug", action="store_true", help="Print even more logging")
 
+    parser.add_argument("--metrics", action="store_true", help="Print timing metrics")
+
     parser.add_argument("--tool-help", action="store_true", help="Print command line help for tool")
 
     exgroup = parser.add_mutually_exclusive_group()
@@ -401,6 +410,10 @@ def main(args, stdout, stderr, api_client=None):
         logger.setLevel(logging.WARN)
         logging.getLogger('arvados.arv-run').setLevel(logging.WARN)
 
+    if arvargs.metrics:
+        metrics.setLevel(logging.DEBUG)
+        logging.getLogger("cwltool.metrics").setLevel(logging.DEBUG)
+
     arvargs.conformance_test = None
     arvargs.use_container = True
 
diff --git a/sdk/cwl/arvados_cwl/arvjob.py b/sdk/cwl/arvados_cwl/arvjob.py
index 1098002..abb41f4 100644
--- a/sdk/cwl/arvados_cwl/arvjob.py
+++ b/sdk/cwl/arvados_cwl/arvjob.py
@@ -18,6 +18,7 @@ from .perf import Perf
 from . import done
 
 logger = logging.getLogger('arvados.cwl-runner')
+metrics = logging.getLogger('arvados.cwl-runner.metrics')
 
 tmpdirre = re.compile(r"^\S+ \S+ \d+ \d+ stderr \S+ \S+ crunchrunner: \$\(task\.tmpdir\)=(.*)")
 outdirre = re.compile(r"^\S+ \S+ \d+ \d+ stderr \S+ \S+ crunchrunner: \$\(task\.outdir\)=(.*)")
@@ -96,7 +97,7 @@ class ArvadosJob(object):
             filters.append(["docker_image_locator", "in docker", runtime_constraints["docker_image"]])
 
         try:
-            with Perf(logger, "create %s" % self.name):
+            with Perf(metrics, "create %s" % self.name):
                 response = self.arvrunner.api.jobs().create(
                     body={
                         "owner_uuid": self.arvrunner.project_uuid,
@@ -118,7 +119,7 @@ class ArvadosJob(object):
             logger.info("Job %s (%s) is %s", self.name, response["uuid"], response["state"])
 
             if response["state"] in ("Complete", "Failed", "Cancelled"):
-                with Perf(logger, "done %s" % self.name):
+                with Perf(metrics, "done %s" % self.name):
                     self.done(response)
         except Exception as e:
             logger.error("Got error %s" % str(e))
@@ -127,7 +128,7 @@ class ArvadosJob(object):
     def update_pipeline_component(self, record):
         if self.arvrunner.pipeline:
             self.arvrunner.pipeline["components"][self.name] = {"job": record}
-            with Perf(logger, "update_pipeline_component %s" % self.name):
+            with Perf(metrics, "update_pipeline_component %s" % self.name):
                 self.arvrunner.pipeline = self.arvrunner.api.pipeline_instances().update(uuid=self.arvrunner.pipeline["uuid"],
                                                                                  body={
                                                                                     "components": self.arvrunner.pipeline["components"]
@@ -160,7 +161,7 @@ class ArvadosJob(object):
             outputs = {}
             try:
                 if record["output"]:
-                    with Perf(logger, "inspect log %s" % self.name):
+                    with Perf(metrics, "inspect log %s" % self.name):
                         logc = arvados.collection.Collection(record["log"])
                         log = logc.open(logc.keys()[0])
                         tmpdir = None
@@ -185,7 +186,7 @@ class ArvadosJob(object):
                             if g:
                                 keepdir = g.group(1)
 
-                    with Perf(logger, "output collection %s" % self.name):
+                    with Perf(metrics, "output collection %s" % self.name):
                         outputs = done.done(self, record, tmpdir, outdir, keepdir)
             except WorkflowException as e:
                 logger.error("Error while collecting job outputs:\n%s", e, exc_info=(e if self.arvrunner.debug else False))
diff --git a/sdk/cwl/arvados_cwl/perf.py b/sdk/cwl/arvados_cwl/perf.py
index 64fda59..a418ced 100644
--- a/sdk/cwl/arvados_cwl/perf.py
+++ b/sdk/cwl/arvados_cwl/perf.py
@@ -10,6 +10,6 @@ class Perf(object):
         self.time = time.time()
         self.logger.debug("ENTER %s %s", self.name, self.time)
 
-    def __exit__(self, exc_type, exc_value, traceback):
+    def __exit__(self, exc_type=None, exc_value=None, traceback=None):
         now = time.time()
         self.logger.debug("EXIT %s %s %s", self.name, now, now - self.time)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list