[ARVADOS] updated: 7d4a10bcc197e909a7a9d5aeb4ba18c91a218976

Git user git at public.curoverse.com
Tue Jul 11 13:09:40 EDT 2017


Summary of changes:
 .licenseignore                                     |  2 +
 AUTHORS                                            | 18 ++++++++
 COPYING                                            | 28 ++++++------
 LICENSE-2.0.txt => apache-2.0.txt                  |  0
 .../app/controllers/collections_controller.rb      | 13 +++++-
 .../app/views/collections/_show_tags.html.erb      |  2 +-
 .../test/integration/collection_upload_test.rb     |  4 +-
 .../workbench/test/integration/collections_test.rb |  4 --
 apps/workbench/test/integration/download_test.rb   | 15 ++++---
 build/check-copyright-notices                      |  2 +-
 by-sa-3.0.txt => cc-by-sa-3.0.txt                  |  0
 sdk/cli/bin/arv-run-pipeline-instance              | 50 +++++++++++++++-------
 sdk/cli/bin/crunch-job                             |  2 +-
 sdk/cwl/arvados_cwl/__init__.py                    |  2 +-
 sdk/cwl/test_with_arvbox.sh                        |  4 +-
 sdk/cwl/tests/noreuse.cwl                          |  4 ++
 sdk/cwl/tests/stdout.cwl                           |  4 ++
 sdk/python/tests/run_test_server.py                | 17 ++++----
 services/api/Rakefile                              |  2 +-
 .../db/migrate/20170628185847_jobs_yaml_to_json.rb |  4 ++
 services/api/lib/migrate_yaml_to_json.rb           |  4 ++
 services/fuse/arvados_fuse/fusedir.py              | 35 +++++++++++++--
 services/keep-web/cache.go                         | 15 ++++---
 services/keep-web/cache_test.go                    |  6 +++
 services/nodemanager/arvnodeman/jobqueue.py        |  4 +-
 services/nodemanager/tests/integration_test.py     | 48 ++++++++++++++++++++-
 26 files changed, 222 insertions(+), 67 deletions(-)
 create mode 100644 AUTHORS
 rename LICENSE-2.0.txt => apache-2.0.txt (100%)
 rename by-sa-3.0.txt => cc-by-sa-3.0.txt (100%)

       via  7d4a10bcc197e909a7a9d5aeb4ba18c91a218976 (commit)
       via  17cd77ac947e2c8f4ca51aa930ffc235051d7f72 (commit)
       via  13d40db2d01ab306e171d2b5c540e51796bdfa44 (commit)
       via  0f650530237e998c0733d45ed8eadda0534f1c76 (commit)
       via  ad47ed8a7e7847a60ca775916a707d515ed256e9 (commit)
       via  1e31815d4a0d094633d4acb4f6265d6b8b6e3246 (commit)
       via  e2b3986edaa262ad49c3ef74a75518611776c2cf (commit)
       via  c5b9f2d232a7c5a4d85fea4f36b99eb0c5330cca (commit)
       via  446d2d8959fcd84169d8a0efb1288f8465b79f59 (commit)
       via  e914c48b16e6bb8199ebad89096a09e477a7efe8 (commit)
       via  7d751b9d0966b6e0ed544fe4d69f58511c14842f (commit)
       via  a5b458cad80a34a6e90745d47c7e522cbbeedb6b (commit)
       via  fcf3ca5baf89bdd944e3a7dcdc1b65f8ff4945ca (commit)
       via  c8cb8059605b3fa1f58dda40c5af3ad03a3c116e (commit)
       via  0eb72b526bf8bbb011551ecf019f604e17a534f1 (commit)
       via  9aeb8d558c48ccabc2e1f57f525a6515bc5169f5 (commit)
       via  ef8ff20b1ede6f3eb583d93646a8eacbee461bab (commit)
       via  ad77601bb4c911f4f02797ade7daafce8fcf16e3 (commit)
       via  94b75d06930701ccbc9b3100c6a2ae9dd6ca1f67 (commit)
       via  20dfea763e114e4be677e0ba698261fa349f47cd (commit)
       via  02b3b0a005156173a970728cf6a32b55c8ca67b1 (commit)
       via  7951799135bbc35b5e862886fa014b4cd951f72a (commit)
       via  e59c1d365d9b6e1eff9b5cb030a8b1a3aaf14353 (commit)
       via  1b8d669706fb952f0e5d1ad5e407073f4815907f (commit)
       via  048821c9c8dc25c4c78c3247fd15430cbadd6190 (commit)
       via  ff723f5f08e286df4c4c58a27a9db574ca604a6f (commit)
       via  6c51f11ab5affb4023762227ffb53a5be11a1003 (commit)
       via  014d7fdf7ce27777dae18588beb07e4e96625d26 (commit)
       via  26964109a599de6def966183b63b714e21084358 (commit)
      from  f507162f3974797b741a0f740b407daefceab0b6 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 7d4a10bcc197e909a7a9d5aeb4ba18c91a218976
Author: Lucas Di Pentima <lucas at curoverse.com>
Date:   Tue Jul 11 14:08:39 2017 -0300

    7475: Added integration test that checks for scancel to be called and a log entry
    added to an unsatisfiable container.
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas at curoverse.com>

diff --git a/services/nodemanager/arvnodeman/jobqueue.py b/services/nodemanager/arvnodeman/jobqueue.py
index e60967a..fa1ea98 100644
--- a/services/nodemanager/arvnodeman/jobqueue.py
+++ b/services/nodemanager/arvnodeman/jobqueue.py
@@ -178,20 +178,20 @@ class JobQueueMonitorActor(clientactor.RemotePollLoopActor):
         # Cancel any job/container with unsatisfiable requirements, emitting
         # a log explaining why.
         for job_uuid, reason in unsatisfiable_jobs.iteritems():
-            self._logger.debug("Cancelling unsatisfiable job '%s'", job_uuid)
             try:
                 self._client.logs().create(body={
                     'object_uuid': job_uuid,
                     'event_type': 'stderr',
                     'properties': {'text': reason},
                 }).execute()
-                # Cancel the job depending on it type
+                # Cancel the job depending on its type
                 if arvados.util.container_uuid_pattern.match(job_uuid):
                     subprocess.check_call(['scancel', '--name='+job_uuid])
                 elif arvados.util.job_uuid_pattern.match(job_uuid):
                     self._client.jobs().cancel(uuid=job_uuid).execute()
                 else:
                     raise Exception('Unknown job type')
+                self._logger.debug("Cancelled unsatisfiable job '%s'", job_uuid)
             except Exception as error:
                 self._logger.error("Trying to cancel job '%s': %s",
                                    job_uuid,
diff --git a/services/nodemanager/tests/integration_test.py b/services/nodemanager/tests/integration_test.py
index feba3ce..c4565fd 100755
--- a/services/nodemanager/tests/integration_test.py
+++ b/services/nodemanager/tests/integration_test.py
@@ -40,6 +40,7 @@ detail.addHandler(logging.StreamHandler(detail_content))
 fake_slurm = None
 compute_nodes = None
 all_jobs = None
+unsatisfiable_job_scancelled = os.path.join(tempfile.mkdtemp(), "scancel_called")
 
 def update_script(path, val):
     with open(path+"_", "w") as f:
@@ -54,6 +55,33 @@ def set_squeue(g):
                   "\n".join("echo '1|100|100|%s|%s'" % (v, k) for k,v in all_jobs.items()))
     return 0
 
+def set_queue_unsatisfiable(g):
+    global all_jobs, unsatisfiable_job_scancelled
+    # Simulate a job requesting a 99 core node.
+    update_script(os.path.join(fake_slurm, "squeue"), "#!/bin/sh\n" +
+                  "\n".join("echo '99|100|100|%s|%s'" % (v, k) for k,v in all_jobs.items()))
+    update_script(os.path.join(fake_slurm, "scancel"), "#!/bin/sh\n" +
+                  "\ntouch %s" % unsatisfiable_job_scancelled)
+    return 0
+
+def job_cancelled(g):
+    global unsatisfiable_job_scancelled
+    cancelled_job = g.group(1)
+    api = arvados.api('v1')
+    # Check that 'scancel' was called
+    if not os.path.isfile(unsatisfiable_job_scancelled):
+        return 1
+    # Check for the log entry
+    log_entry = api.logs().list(
+        filters=[
+            ['object_uuid', '=', cancelled_job],
+            ['event_type', '=', 'stderr'],
+        ]).execute()['items'][0]
+    if not re.match(
+            r"Requirements for a single node exceed the available cloud node size",
+            log_entry['properties']['text']):
+        return 1
+    return 0
 
 def node_paired(g):
     global compute_nodes
@@ -159,7 +187,7 @@ def run_test(name, actions, checks, driver_class, jobs, provider):
 
     # Test main loop:
     # - Read line
-    # - Apply negative checks (thinks that are not supposed to happen)
+    # - Apply negative checks (things that are not supposed to happen)
     # - Check timeout
     # - Check if the next action should trigger
     # - If all actions are exhausted, terminate with test success
@@ -213,6 +241,7 @@ def run_test(name, actions, checks, driver_class, jobs, provider):
         code = 1
 
     shutil.rmtree(fake_slurm)
+    shutil.rmtree(os.path.dirname(unsatisfiable_job_scancelled))
 
     if code == 0:
         logger.info("%s passed", name)
@@ -228,6 +257,23 @@ def main():
     # Test lifecycle.
 
     tests = {
+        "test_unsatisfiable_jobs" : (
+            # Actions (pattern -> action)
+            [
+                (r".*Daemon started", set_queue_unsatisfiable),
+                (r".*Cancelled unsatisfiable job '(\S+)'", job_cancelled),
+            ],
+            # Checks (things that shouldn't happen)
+            {
+                r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)": fail,
+                r".*Trying to cancel job '(\S+)'": fail,
+            },
+            # Driver class
+            "arvnodeman.test.fake_driver.FakeDriver",
+            # Jobs
+            {"34t0i-dz642-h42bg3hq4bdfpf9": "ReqNodeNotAvail"},
+            # Provider
+            "azure"),
         "test_single_node_azure": (
             [
                 (r".*Daemon started", set_squeue),

commit 17cd77ac947e2c8f4ca51aa930ffc235051d7f72
Merge: f507162 13d40db
Author: Lucas Di Pentima <lucas at curoverse.com>
Date:   Mon Jul 10 10:22:44 2017 -0300

    7475: Merge branch 'master' into 7475-nodemgr-unsatisfiable-job-comms
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas at curoverse.com>


-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list