[ARVADOS] created: 1.1.4-347-g32568bb

Git user git at public.curoverse.com
Fri Jun 1 14:54:42 EDT 2018


        at  32568bbde8ad549452ccb57f10bb54672fda6fa6 (commit)


commit 32568bbde8ad549452ccb57f10bb54672fda6fa6
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Fri Jun 1 13:35:10 2018 -0400

    12061: Disable nodemanager integration tests.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/build/run-tests.sh b/build/run-tests.sh
index 8a8f5b6..2b280c7 100755
--- a/build/run-tests.sh
+++ b/build/run-tests.sh
@@ -270,6 +270,8 @@ declare -a failures
 declare -A skip
 declare -A testargs
 skip[apps/workbench_profile]=1
+# nodemanager_integration tests are not reliable, see #12061.
+skip[services/nodemanager_integration]=1
 
 while [[ -n "$1" ]]
 do

commit 6c5fc89c06338ea829f2e4f2a7e0529548890efd
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Fri Jun 1 13:36:43 2018 -0400

    12061: Remove unreliable check.
    
    "Shutdown because state=down" is possible during a race, when
    nodemanager logs "Shutdown success" and polls sinfo again before the
    test suite has finished updating the sinfo stub.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/services/nodemanager/tests/integration_test.py b/services/nodemanager/tests/integration_test.py
index 3f616c6..a8429e1 100755
--- a/services/nodemanager/tests/integration_test.py
+++ b/services/nodemanager/tests/integration_test.py
@@ -113,7 +113,8 @@ def node_busy(g):
 
 def node_shutdown(g):
     global compute_nodes
-    del compute_nodes[g.group(1)]
+    if g.group(1) in compute_nodes:
+        del compute_nodes[g.group(1)]
     return 0
 
 def jobs_req(g):
@@ -321,7 +322,6 @@ def main():
             ],
             # Checks (things that shouldn't happen)
             {
-                r".*Suggesting shutdown because node state is \('down', .*\)": fail,
                 r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)": partial(expect_count, 4),
                 r".*Setting node quota.*": fail,
             },
@@ -347,7 +347,6 @@ def main():
             ],
             # Checks (things that shouldn't happen)
             {
-                r".*Suggesting shutdown because node state is \('down', .*\)": fail,
                 r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)": partial(expect_count, 2),
                 r".*Sending create_node request.*": partial(expect_count, 5)
             },
@@ -384,7 +383,6 @@ def main():
             ],
             # Checks (things that shouldn't happen)
             {
-                r".*Suggesting shutdown because node state is \('down', .*\)": fail,
                 r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)": partial(expect_count, 6),
                 r".*Sending create_node request.*": partial(expect_count, 9)
             },
@@ -444,7 +442,6 @@ def main():
             ],
             # Checks (things that shouldn't happen)
             {
-                r".*Suggesting shutdown because node state is \('down', .*\)": fail,
                 r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)": partial(expect_count, 1),
                 r".*Setting node quota.*": fail,
             },
@@ -465,7 +462,6 @@ def main():
             ],
             # Checks (things that shouldn't happen)
             {
-                r".*Suggesting shutdown because node state is \('down', .*\)": fail,
                 r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)": partial(expect_count, 1),
                 r".*Setting node quota.*": fail,
             },

commit 3108e6a627f4413819af8d2eb006016f1695b9a8
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Fri Jun 1 12:50:02 2018 -0400

    12061: Reduce per-test timeout.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/services/nodemanager/tests/integration_test.py b/services/nodemanager/tests/integration_test.py
index 2de3769..3f616c6 100755
--- a/services/nodemanager/tests/integration_test.py
+++ b/services/nodemanager/tests/integration_test.py
@@ -175,8 +175,8 @@ def run_test(name, actions, checks, driver_class, jobs, provider):
                                       driver_class=driver_class,
                                       ssh_key=os.path.join(fake_slurm, "id_rsa.pub")))
 
-    # Tests must complete in less than 3 minutes.
-    timeout = time.time() + 180
+    # Tests must complete in less than 30 seconds.
+    timeout = time.time() + 30
     terminated = False
 
     # Now start node manager

commit e1ee43ee90b7f6e6189930cb031448014d0cf638
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Fri Jun 1 12:49:42 2018 -0400

    12061: Fix nodes marked busy while expecting shutdown decision.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/services/nodemanager/tests/integration_test.py b/services/nodemanager/tests/integration_test.py
index 4aa8c9e..2de3769 100755
--- a/services/nodemanager/tests/integration_test.py
+++ b/services/nodemanager/tests/integration_test.py
@@ -106,18 +106,6 @@ def node_paired(g):
 
     return 0
 
-def remaining_jobs(g):
-    update_script(os.path.join(fake_slurm, "sinfo"), "#!/bin/sh\n" +
-                  "\n".join("echo '%s|alloc|(null)'" % (v) for k,v in compute_nodes.items()))
-
-    for k,v in all_jobs.items():
-        all_jobs[k] = "Running"
-
-    set_squeue(g)
-
-    return 0
-
-
 def node_busy(g):
     update_script(os.path.join(fake_slurm, "sinfo"), "#!/bin/sh\n" +
                   "\n".join("echo '%s|idle|(null)'" % (v) for k,v in compute_nodes.items()))
@@ -353,7 +341,7 @@ def main():
                 (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
                 (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
                 (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Not eligible for shut down because node state is \('busy', 'open', .*\)", node_busy),
-                (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Suggesting shutdown because node state is \('idle', 'open', .*\)", remaining_jobs),
+                (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Suggesting shutdown because node state is \('idle', 'open', .*\)", noop),
                 (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown),
                 (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown)
             ],
@@ -379,7 +367,7 @@ def main():
                 (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
                 (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
                 (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Not eligible for shut down because node state is \('busy', 'open', .*\)", node_busy),
-                (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Suggesting shutdown because node state is \('idle', 'open', .*\)", remaining_jobs),
+                (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Suggesting shutdown because node state is \('idle', 'open', .*\)", noop),
                 (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown),
                 (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown),
                 (r".*sending request", jobs_req),

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list