[ARVADOS] updated: 44203eefb793d8523e895d417ab4b726f63ec4c9

git at public.curoverse.com git at public.curoverse.com
Tue May 27 10:48:25 EDT 2014


Summary of changes:
 sdk/python/arvados/collection.py   | 21 +++++++++++------
 sdk/python/arvados/commands/put.py | 13 +++++------
 sdk/python/tests/test_arv-put.py   | 47 +++++++++++++++++++++++++++++++++++---
 3 files changed, 64 insertions(+), 17 deletions(-)

       via  44203eefb793d8523e895d417ab4b726f63ec4c9 (commit)
       via  c47f322cb4efa351808bc317e22ebaec3c260d44 (commit)
       via  78722718f1369094e4cc9216f14c846c3b614e7d (commit)
      from  54ae96cf01a16cc57f1b40bb2bfbbddcc95d7b5b (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 44203eefb793d8523e895d417ab4b726f63ec4c9
Author: Brett Smith <brett at curoverse.com>
Date:   Tue May 27 10:49:13 2014 -0400

    2752: Avoid hiccup on arv-put with short stdin.
    
    Trying to call os.stat(os.path.realpath('/dev/stdin')) usually fails
    in this case.  Since we're never going to be able to resume from stdin
    anyway, don't bother with that check in this case.

diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 32c4528..41adc88 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -443,15 +443,22 @@ class ResumableCollectionWriter(CollectionWriter):
             raise errors.AssertionError("{} not a file path".format(source))
         try:
             path_stat = os.stat(src_path)
-        except OSError as error:
-            raise errors.AssertionError(
-                "could not stat {}: {}".format(source, error))
+        except OSError as stat_error:
+            path_stat = None
         super(ResumableCollectionWriter, self)._queue_file(source, filename)
         fd_stat = os.fstat(self._queued_file.fileno())
-        if path_stat.st_ino != fd_stat.st_ino:
+        if not S_ISREG(fd_stat.st_mode):
+            # We won't be able to resume from this cache anyway, so don't
+            # worry about further checks.
+            self._dependencies[source] = tuple(fd_stat)
+        elif path_stat is None:
+            raise errors.AssertionError(
+                "could not stat {}: {}".format(source, stat_error))
+        elif path_stat.st_ino != fd_stat.st_ino:
             raise errors.AssertionError(
                 "{} changed between open and stat calls".format(source))
-        self._dependencies[src_path] = tuple(fd_stat)
+        else:
+            self._dependencies[src_path] = tuple(fd_stat)
 
     def write(self, data):
         if self._queued_file is None:
diff --git a/sdk/python/tests/test_arv-put.py b/sdk/python/tests/test_arv-put.py
index cd0e965..2e57285 100644
--- a/sdk/python/tests/test_arv-put.py
+++ b/sdk/python/tests/test_arv-put.py
@@ -4,7 +4,10 @@
 import os
 import re
 import shutil
+import subprocess
+import sys
 import tempfile
+import time
 import unittest
 
 import arvados
@@ -319,6 +322,25 @@ class ArvadosPutTest(ArvadosKeepLocalStoreTestCase):
                                         '098f6bcd4621d373cade4e832627b4f6')),
             "did not find file stream in Keep store")
 
+    def test_short_put_from_stdin(self):
+        # Have to run this separately since arv-put can't read from the
+        # tests' stdin.
+        # arv-put usually can't stat(os.path.realpath('/dev/stdin')) in this
+        # case, because the /proc entry is already gone by the time it tries.
+        pipe = subprocess.Popen(
+            [sys.executable, arv_put.__file__, '--stream'],
+            stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+        pipe.stdin.write('stdin test\n')
+        pipe.stdin.close()
+        deadline = time.time() + 5
+        while (pipe.poll() is None) and (time.time() < deadline):
+            time.sleep(.1)
+        if pipe.returncode is None:
+            pipe.terminate()
+            self.fail("arv-put did not PUT from stdin within 5 seconds")
+        self.assertEquals(pipe.returncode, 0)
+        self.assertIn('4a9c8b735dce4b5fa3acf221a0b13628+11', pipe.stdout.read())
+
 
 if __name__ == '__main__':
     unittest.main()

commit c47f322cb4efa351808bc317e22ebaec3c260d44
Author: Brett Smith <brett at curoverse.com>
Date:   Tue May 27 10:20:50 2014 -0400

    2752: Improve arv-put initialization from cache.
    
    This puts ArvPutCollectionWriter's own instance variables, like the
    progress reporter function, in place before work is resumed.

diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 7b38ccd..32c4528 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -385,8 +385,8 @@ class ResumableCollectionWriter(CollectionWriter):
         super(ResumableCollectionWriter, self).__init__()
 
     @classmethod
-    def from_state(cls, state):
-        writer = cls()
+    def from_state(cls, state, *init_args, **init_kwargs):
+        writer = cls(*init_args, **init_kwargs)
         for attr_name in cls.STATE_PROPS:
             attr_value = state[attr_name]
             attr_class = getattr(writer, attr_name).__class__
diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index 4398c5b..3926369 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -229,25 +229,21 @@ class ArvPutCollectionWriter(arvados.ResumableCollectionWriter):
 
     def __init__(self, cache=None, reporter=None, bytes_expected=None):
         self.bytes_written = 0
-        self.__init_locals__(cache, reporter, bytes_expected)
-        super(ArvPutCollectionWriter, self).__init__()
-
-    def __init_locals__(self, cache, reporter, bytes_expected):
         self.cache = cache
         self.report_func = reporter
         self.bytes_expected = bytes_expected
+        super(ArvPutCollectionWriter, self).__init__()
 
     @classmethod
     def from_cache(cls, cache, reporter=None, bytes_expected=None):
         try:
             state = cache.load()
             state['_data_buffer'] = [base64.decodestring(state['_data_buffer'])]
-            writer = cls.from_state(state)
+            writer = cls.from_state(state, cache, reporter, bytes_expected)
         except (TypeError, ValueError,
                 arvados.errors.StaleWriterStateError) as error:
             return cls(cache, reporter, bytes_expected)
         else:
-            writer.__init_locals__(cache, reporter, bytes_expected)
             return writer
 
     def checkpoint_state(self):

commit 78722718f1369094e4cc9216f14c846c3b614e7d
Author: Brett Smith <brett at curoverse.com>
Date:   Tue May 27 10:04:19 2014 -0400

    2752: arv-put saves and restores write progress information.
    
    This will make output less confusing to the user, and help them
    understand that an upload is resuming.

diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index 4568565..4398c5b 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -224,14 +224,17 @@ class ResumeCache(object):
 
 
 class ArvPutCollectionWriter(arvados.ResumableCollectionWriter):
+    STATE_PROPS = (arvados.ResumableCollectionWriter.STATE_PROPS +
+                   ['bytes_written'])
+
     def __init__(self, cache=None, reporter=None, bytes_expected=None):
+        self.bytes_written = 0
         self.__init_locals__(cache, reporter, bytes_expected)
         super(ArvPutCollectionWriter, self).__init__()
 
     def __init_locals__(self, cache, reporter, bytes_expected):
         self.cache = cache
         self.report_func = reporter
-        self.bytes_written = 0
         self.bytes_expected = bytes_expected
 
     @classmethod
diff --git a/sdk/python/tests/test_arv-put.py b/sdk/python/tests/test_arv-put.py
index a98eaa6..cd0e965 100644
--- a/sdk/python/tests/test_arv-put.py
+++ b/sdk/python/tests/test_arv-put.py
@@ -236,17 +236,36 @@ class ArvadosPutCollectionWriterTest(ArvadosKeepLocalStoreTestCase):
                 self.cache)
         self.assertEquals(cwriter.manifest_text(), new_writer.manifest_text())
 
+    def make_progress_tester(self):
+        progression = []
+        def record_func(written, expected):
+            progression.append((written, expected))
+        return progression, record_func
+
     def test_progress_reporting(self):
         for expect_count in (None, 8):
-            progression = []
+            progression, reporter = self.make_progress_tester()
             cwriter = arv_put.ArvPutCollectionWriter(
-                reporter=lambda *args: progression.append(args),
-                bytes_expected=expect_count)
+                reporter=reporter, bytes_expected=expect_count)
             with self.make_test_file() as testfile:
                 cwriter.write_file(testfile.name, 'test')
             cwriter.finish_current_stream()
             self.assertIn((4, expect_count), progression)
 
+    def test_resume_progress(self):
+        cwriter = arv_put.ArvPutCollectionWriter(self.cache, bytes_expected=4)
+        with self.make_test_file() as testfile:
+            # Set up a writer with some flushed bytes.
+            cwriter.write_file(testfile.name, 'test')
+            cwriter.finish_current_stream()
+            cwriter.checkpoint_state()
+            # Restore a writer from that state and check its progress report.
+            progression, reporter = self.make_progress_tester()
+            new_writer = arv_put.ArvPutCollectionWriter.from_cache(
+                self.cache, reporter, bytes_expected=4)
+            new_writer.flush_data()
+            self.assertIn((4, 4), progression)
+
 
 class ArvadosExpectedBytesTest(ArvadosBaseTestCase):
     TEST_SIZE = os.path.getsize(__file__)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list