[ARVADOS] updated: 85378b8c8d216f2aacaf0486896f57886f3e5349

git at public.curoverse.com git at public.curoverse.com
Tue Apr 14 10:59:01 EDT 2015


Summary of changes:
 .../assets/images/getting_started/mouse-move.gif   | Bin 48854 -> 0 bytes
 .../app/assets/images/getting_started/pipeline.gif | Bin 128898 -> 0 bytes
 .../app/assets/stylesheets/application.css.scss    |   4 +++
 apps/workbench/app/views/layouts/body.html.erb     |  12 +++++++-
 doc/_config.yml                                    |  20 ++++++-------
 doc/_includes/_navbar_top.liquid                   |   6 ++--
 doc/_layouts/default.html.liquid                   |   2 +-
 doc/index.html.liquid                              |  32 +++++++++++++++++----
 doc/user/index.html.textile.liquid                 |   4 +--
 9 files changed, 56 insertions(+), 24 deletions(-)
 delete mode 100644 apps/workbench/app/assets/images/getting_started/mouse-move.gif
 delete mode 100644 apps/workbench/app/assets/images/getting_started/pipeline.gif

  discards  aefbec0fb20f18306c6926ab0b98dfbb9f0a77b2 (commit)
  discards  585859388bfcc7de1c6ea29b5ea607f80e66e8d8 (commit)
  discards  bb38e24b6f71cd051c6f43e2f7956347c7d341fa (commit)
  discards  0fdc60ffe22d3dcc16ee50c08cb336da2d3843c2 (commit)
       via  85378b8c8d216f2aacaf0486896f57886f3e5349 (commit)
       via  966585bc935190d025868134ae181e999c9738c5 (commit)
       via  99401ace106b84e150fae1d699ed1f9d803e1bb5 (commit)
       via  4c209418ed2140f4d2a1330b61916f41816b9b37 (commit)
       via  912464ad82bad38f1ce7984b6d4b19734a9816a9 (commit)
       via  c50acf62d8bc4a25f3fb432479fabad9b060f878 (commit)
       via  1f6a187a7011becd08bb3cef8bc5c4e253900590 (commit)
       via  08c4f5f03fd9afd86c4d9c43c7b8620ef9a0fade (commit)
       via  33d54abd1574c16685e2d631d367a1be4e969018 (commit)
       via  cdecff5125a3bc09720be4d210536a183e43ce7e (commit)
       via  18ac672a5c9b5416e21a3d45ed268f2edb509786 (commit)
       via  cd8db86a9178dd43f9582f55d9a14bde3f2e348a (commit)
       via  0bff4d895d0ce312524c9d689923c9590f81f36f (commit)
       via  be238c74d0d3835b0a384b712d7591dc95da8c07 (commit)
       via  a756a90a381473adcfe8cce6a022f1733395610a (commit)
       via  d743a56addcde9d92876d19201b9f46db42ea582 (commit)
       via  1b2afc5aa599eb452a1f30e706e19b964e26cae0 (commit)
       via  dbdd94518d916b89f8e3a3fa0901da21a2493962 (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (aefbec0fb20f18306c6926ab0b98dfbb9f0a77b2)
            \
             N -- N -- N (85378b8c8d216f2aacaf0486896f57886f3e5349)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 85378b8c8d216f2aacaf0486896f57886f3e5349
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Apr 13 22:54:11 2015 -0400

    3126: Always act as current_user in Commit.find_commit_range.

diff --git a/services/api/app/controllers/arvados/v1/jobs_controller.rb b/services/api/app/controllers/arvados/v1/jobs_controller.rb
index 46b5099..d12f6c1 100644
--- a/services/api/app/controllers/arvados/v1/jobs_controller.rb
+++ b/services/api/app/controllers/arvados/v1/jobs_controller.rb
@@ -254,8 +254,7 @@ class Arvados::V1::JobsController < ApplicationController
       else
         raise ArgumentError.new("unknown attribute for git filter: #{attr}")
       end
-      revisions = Commit.find_commit_range(current_user,
-                                           filter["repository"],
+      revisions = Commit.find_commit_range(filter["repository"],
                                            filter["min_version"],
                                            filter["max_version"],
                                            filter["exclude_versions"])
diff --git a/services/api/app/models/commit.rb b/services/api/app/models/commit.rb
index 9e1176b..5d6e555 100644
--- a/services/api/app/models/commit.rb
+++ b/services/api/app/models/commit.rb
@@ -1,4 +1,6 @@
 class Commit < ActiveRecord::Base
+  extend CurrentApiClient
+
   class GitError < StandardError
     def http_status
       422
@@ -29,7 +31,7 @@ class Commit < ActiveRecord::Base
   # repository can be the name of a locally hosted repository or a git
   # URL (see git-fetch(1)). Currently http, https, and git schemes are
   # supported.
-  def self.find_commit_range(current_user, repository, minimum, maximum, exclude)
+  def self.find_commit_range repository, minimum, maximum, exclude
     if minimum and minimum.empty?
       minimum = nil
     end
@@ -109,14 +111,14 @@ class Commit < ActiveRecord::Base
   # The repo can be a remote url, but in this case sha1 must already
   # be present in our local cache for that repo: e.g., sha1 was just
   # returned by find_commit_range.
-  def self.tag_in_internal_repository repo, sha1, tag
+  def self.tag_in_internal_repository repo_name, sha1, tag
     unless git_check_ref_format tag
       raise ArgumentError.new "invalid tag #{tag}"
     end
     unless /^[0-9a-f]{40}$/ =~ sha1
       raise ArgumentError.new "invalid sha1 #{sha1}"
     end
-    src_gitdir, _ = git_dir_for repo
+    src_gitdir, _ = git_dir_for repo_name
     dst_gitdir = Rails.configuration.git_internal_dir
     must_pipe("echo #{sha1.shellescape}",
               "git --git-dir #{src_gitdir.shellescape} pack-objects -q --revs --stdout",
@@ -127,8 +129,8 @@ class Commit < ActiveRecord::Base
 
   protected
 
-  def self.remote_url? repository
-    /^(https?|git):\/\// =~ repository
+  def self.remote_url? repo_name
+    /^(https?|git):\/\// =~ repo_name
   end
 
   # Return [local_git_dir, is_remote]. If is_remote, caller must use
diff --git a/services/api/app/models/job.rb b/services/api/app/models/job.rb
index 423d93c..41a16bf 100644
--- a/services/api/app/models/job.rb
+++ b/services/api/app/models/job.rb
@@ -132,7 +132,7 @@ class Job < ArvadosModel
       return true
     end
     if new_record? or repository_changed? or script_version_changed?
-      sha1 = Commit.find_commit_range(current_user, repository,
+      sha1 = Commit.find_commit_range(repository,
                                       nil, script_version, nil).first
       if not sha1
         errors.add :script_version, "#{script_version} does not resolve to a commit"
@@ -189,7 +189,7 @@ class Job < ArvadosModel
   def find_arvados_sdk_version
     resolve_runtime_constraint("arvados_sdk_version",
                                :arvados_sdk_version) do |git_search|
-      commits = Commit.find_commit_range(current_user, "arvados",
+      commits = Commit.find_commit_range("arvados",
                                          nil, git_search, nil)
       if commits.empty?
         [false, "#{git_search} does not resolve to a commit"]
diff --git a/services/api/test/unit/commit_test.rb b/services/api/test/unit/commit_test.rb
index 7e348bd..1b82dcf 100644
--- a/services/api/test/unit/commit_test.rb
+++ b/services/api/test/unit/commit_test.rb
@@ -1,7 +1,7 @@
 require 'test_helper'
 require 'helpers/git_test_helper'
 
-# NOTE: calling Commit.find_commit_range(user, nil, nil, 'rev')
+# NOTE: calling Commit.find_commit_range(nil, nil, 'rev')
 # produces an error message "fatal: bad object 'rev'" on stderr if
 # 'rev' does not exist in a given repository.  Many of these tests
 # report such errors; their presence does not represent a fatal
@@ -11,9 +11,14 @@ class CommitTest < ActiveSupport::TestCase
   # See git_setup.rb for the commit log for test.git.tar
   include GitTestHelper
 
+  setup do
+    authorize_with :active
+  end
+
   test 'find_commit_range does not bypass permissions' do
+    authorize_with :inactive
     assert_raises ArgumentError do
-      c = Commit.find_commit_range users(:inactive), 'foo', nil, 'master', []
+      c = Commit.find_commit_range 'foo', nil, 'master', []
     end
   end
 
@@ -26,7 +31,7 @@ class CommitTest < ActiveSupport::TestCase
       fake_gitdir = repositories(:foo).server_path
       Commit.expects(:fetch_remote_repository).
         once.with(Commit.cache_dir_for(url), url).returns fake_gitdir
-      c = Commit.find_commit_range users(:active), url, nil, 'master', []
+      c = Commit.find_commit_range url, nil, 'master', []
       refute_empty c
     end
   end
@@ -42,7 +47,7 @@ class CommitTest < ActiveSupport::TestCase
     test "find_commit_range skips fetch_remote_repository for #{url}" do
       Commit.expects(:fetch_remote_repository).never
       assert_raises ArgumentError do
-        Commit.find_commit_range users(:active), url, nil, 'master', []
+        Commit.find_commit_range url, nil, 'master', []
       end
     end
   end
@@ -50,12 +55,12 @@ class CommitTest < ActiveSupport::TestCase
   test 'fetch_remote_repository does not leak commits across repositories' do
     url = "http://localhost:1/fake/fake.git"
     fetch_remote_from_local_repo url, :foo
-    c = Commit.find_commit_range users(:active), url, nil, 'master', []
+    c = Commit.find_commit_range url, nil, 'master', []
     assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57'], c
 
     url = "http://localhost:2/fake/fake.git"
     fetch_remote_from_local_repo url, 'file://' + File.expand_path('../../.git', Rails.root)
-    c = Commit.find_commit_range users(:active), url, nil, '077ba2ad3ea24a929091a9e6ce545c93199b8e57', []
+    c = Commit.find_commit_range url, nil, '077ba2ad3ea24a929091a9e6ce545c93199b8e57', []
     assert_equal [], c
   end
 
@@ -78,77 +83,77 @@ class CommitTest < ActiveSupport::TestCase
     authorize_with :active
 
     # single
-    a = Commit.find_commit_range(users(:active), 'active/foo', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
+    a = Commit.find_commit_range('active/foo', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
     assert_equal ['31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
 
     #test "test_branch1" do
-    a = Commit.find_commit_range(users(:active), 'active/foo', nil, 'master', nil)
+    a = Commit.find_commit_range('active/foo', nil, 'master', nil)
     assert_includes(a, '077ba2ad3ea24a929091a9e6ce545c93199b8e57')
 
     #test "test_branch2" do
-    a = Commit.find_commit_range(users(:active), 'active/foo', nil, 'b1', nil)
+    a = Commit.find_commit_range('active/foo', nil, 'b1', nil)
     assert_equal ['1de84a854e2b440dc53bf42f8548afa4c17da332'], a
 
     #test "test_branch3" do
-    a = Commit.find_commit_range(users(:active), 'active/foo', nil, 'HEAD', nil)
+    a = Commit.find_commit_range('active/foo', nil, 'HEAD', nil)
     assert_equal ['1de84a854e2b440dc53bf42f8548afa4c17da332'], a
 
     #test "test_single_revision_repo" do
-    a = Commit.find_commit_range(users(:active), 'active/foo', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
+    a = Commit.find_commit_range('active/foo', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
     assert_equal ['31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
-    a = Commit.find_commit_range(users(:active), 'arvados', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
+    a = Commit.find_commit_range('arvados', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
     assert_equal [], a
 
     #test "test_multi_revision" do
     # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-    a = Commit.find_commit_range(users(:active), 'active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', nil)
+    a = Commit.find_commit_range('active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', nil)
     assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '4fe459abe02d9b365932b8f5dc419439ab4e2577', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
 
     #test "test_tag" do
     # complains "fatal: ambiguous argument 'tag1': unknown revision or path
     # not in the working tree."
-    a = Commit.find_commit_range(users(:active), 'active/foo', 'tag1', 'master', nil)
+    a = Commit.find_commit_range('active/foo', 'tag1', 'master', nil)
     assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '4fe459abe02d9b365932b8f5dc419439ab4e2577'], a
 
     #test "test_multi_revision_exclude" do
-    a = Commit.find_commit_range(users(:active), 'active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['4fe459abe02d9b365932b8f5dc419439ab4e2577'])
+    a = Commit.find_commit_range('active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['4fe459abe02d9b365932b8f5dc419439ab4e2577'])
     assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
 
     #test "test_multi_revision_tagged_exclude" do
     # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-    a = Commit.find_commit_range(users(:active), 'active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['tag1'])
+    a = Commit.find_commit_range('active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['tag1'])
     assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
 
     Dir.mktmpdir do |touchdir|
       # invalid input to maximum
-      a = Commit.find_commit_range(users(:active), 'active/foo', nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", nil)
+      a = Commit.find_commit_range('active/foo', nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", nil)
       assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
       assert_equal [], a
 
       # invalid input to maximum
-      a = Commit.find_commit_range(users(:active), 'active/foo', nil, "$(uname>#{touchdir}/uh_oh)", nil)
+      a = Commit.find_commit_range('active/foo', nil, "$(uname>#{touchdir}/uh_oh)", nil)
       assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
       assert_equal [], a
 
       # invalid input to minimum
-      a = Commit.find_commit_range(users(:active), 'active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
+      a = Commit.find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
       assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
       assert_equal [], a
 
       # invalid input to minimum
-      a = Commit.find_commit_range(users(:active), 'active/foo', "$(uname>#{touchdir}/uh_oh)", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
+      a = Commit.find_commit_range('active/foo', "$(uname>#{touchdir}/uh_oh)", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
       assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
       assert_equal [], a
 
       # invalid input to 'excludes'
       # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-      a = Commit.find_commit_range(users(:active), 'active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["4fe459abe02d9b365932b8f5dc419439ab4e2577 ; touch #{touchdir}/uh_oh"])
+      a = Commit.find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["4fe459abe02d9b365932b8f5dc419439ab4e2577 ; touch #{touchdir}/uh_oh"])
       assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
       assert_equal [], a
 
       # invalid input to 'excludes'
       # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-      a = Commit.find_commit_range(users(:active), 'active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["$(uname>#{touchdir}/uh_oh)"])
+      a = Commit.find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["$(uname>#{touchdir}/uh_oh)"])
       assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
       assert_equal [], a
     end

commit 966585bc935190d025868134ae181e999c9738c5
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Apr 13 12:43:34 2015 -0400

    3126: Do not try to do anything with job.repository in crunch-dispatch if the commit is already in internal.git.

diff --git a/services/api/script/crunch-dispatch.rb b/services/api/script/crunch-dispatch.rb
index 7b3ed9e..1002f91 100755
--- a/services/api/script/crunch-dispatch.rb
+++ b/services/api/script/crunch-dispatch.rb
@@ -279,26 +279,24 @@ class Dispatcher
     @authorizations[job.uuid]
   end
 
-  def get_commit(src_repo, commit_hash)
-    # @fetched_commits[V]==true if we know commit V exists in the
-    # arvados_internal git repository.
-    if !@fetched_commits[commit_hash]
-      # check if the commit needs to be fetched or not
-      commit_rev = stdout_s(git_cmd("rev-list", "-n1", commit_hash),
-                            err: "/dev/null")
-      unless $? == 0 and commit_rev == commit_hash
-        # commit does not exist in internal repository, so import the source repository using git fetch-pack
-        cmd = git_cmd("fetch-pack", "--no-progress", "--all", src_repo)
-        $stderr.puts "dispatch: #{cmd}"
-        $stderr.puts(stdout_s(cmd))
-        unless $? == 0
-          fail_job job, "git fetch-pack failed"
-          return nil
-        end
-      end
-      @fetched_commits[commit_hash] = true
+  def internal_repo_has_commit? sha1
+    if (not @fetched_commits[sha1] and
+        sha1 == stdout_s(git_cmd("rev-list", "-n1", sha1), err: "/dev/null") and
+        $? == 0)
+      @fetched_commits[sha1] = true
     end
-    @fetched_commits[commit_hash]
+    return @fetched_commits[sha1]
+  end
+
+  def get_commit src_repo, sha1
+    return true if internal_repo_has_commit? sha1
+
+    # commit does not exist in internal repository, so import the
+    # source repository using git fetch-pack
+    cmd = git_cmd("fetch-pack", "--no-progress", "--all", src_repo)
+    $stderr.puts "dispatch: #{cmd}"
+    $stderr.puts(stdout_s(cmd))
+    @fetched_commits[sha1] = ($? == 0)
   end
 
   def tag_commit(commit_hash, tag_name)
@@ -377,20 +375,42 @@ class Dispatcher
                          "GEM_PATH=#{ENV['GEM_PATH']}")
       end
 
-      repo = Repository.where(name: job.repository).first
-      if repo.nil? or repo.server_path.nil?
-        fail_job "Repository #{job.repository} not found under #{@repo_root}"
-        next
+      next unless get_authorization job
+
+      ready = internal_repo_has_commit? job.script_version
+
+      if not ready
+        # Import the commit from the specified repository into the
+        # internal repository. This should have been done already when
+        # the job was created/updated; this code is obsolete except to
+        # avoid deployment races. Failing the job would be a
+        # reasonable thing to do at this point.
+        repo = Repository.where(name: job.repository).first
+        if repo.nil? or repo.server_path.nil?
+          fail_job "Repository #{job.repository} not found under #{@repo_root}"
+          next
+        end
+        ready &&= get_commit repo.server_path, job.script_version
+        ready &&= tag_commit job.script_version, job.uuid
       end
 
-      ready = (get_authorization(job) and
-               get_commit(repo.server_path, job.script_version) and
-               tag_commit(job.script_version, job.uuid))
-      if ready and job.arvados_sdk_version
-        ready = (get_commit(@arvados_repo_path, job.arvados_sdk_version) and
-                 tag_commit(job.arvados_sdk_version, "#{job.uuid}-arvados-sdk"))
+      # This should be unnecessary, because API server does it during
+      # job create/update, but it's still not a bad idea to verify the
+      # tag is correct before starting the job:
+      ready &&= tag_commit job.script_version, job.uuid
+
+      # The arvados_sdk_version doesn't support use of arbitrary
+      # remote URLs, so the requested version isn't necessarily copied
+      # into the internal repository yet.
+      if job.arvados_sdk_version
+        ready &&= get_commit @arvados_repo_path, job.arvados_sdk_version
+        ready &&= tag_commit job.arvados_sdk_version, "#{job.uuid}-arvados-sdk"
+      end
+
+      if not ready
+        fail_job job, "commit not present in internal repository"
+        next
       end
-      next unless ready
 
       cmd_args += [@crunch_job_bin,
                    '--job-api-token', @authorizations[job.uuid].api_token,

commit 99401ace106b84e150fae1d699ed1f9d803e1bb5
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Apr 10 03:40:55 2015 -0400

    3126: Update API docs.

diff --git a/doc/api/methods/jobs.html.textile.liquid b/doc/api/methods/jobs.html.textile.liquid
index b57858e..90a3c4c 100644
--- a/doc/api/methods/jobs.html.textile.liquid
+++ b/doc/api/methods/jobs.html.textile.liquid
@@ -34,7 +34,7 @@ table(table table-bordered table-condensed).
 |minimum_script_version |string     |Git branch, tag, or commit hash specifying the minimum acceptable script version (earliest ancestor) to consider when deciding whether to re-use a past job.[1]|query|@"c3e86c9"@|
 |exclude_script_versions|array of strings|Git commit branches, tags, or hashes to exclude when deciding whether to re-use a past job.|query|@["8f03c71","8f03c71"]@
 @["badtag1","badtag2"]@|
-|filters|array|Conditions to find Jobs to reuse.|query||
+|filters|array of arrays|Conditions to find Jobs to reuse.|query||
 |find_or_create         |boolean    |Before creating, look for an existing job that has identical script, script_version, and script_parameters to those in the present job, has nondeterministic=false, and did not fail (it could be queued, running, or completed). If such a job exists, respond with the existing job instead of submitting a new one.|query|@false@|
 
 When a job is submitted to the queue using the **create** method, the @script_version@ attribute is updated to a full 40-character Git commit hash based on the current content of the specified repository. If @script_version@ cannot be resolved, the job submission is rejected.
@@ -60,8 +60,8 @@ Because Arvados records the exact version of the script, input parameters, and r
 notextile. <div class="spaced-out">
 
 # If @find_or_create@ is false or omitted, create a new job and skip the rest of these steps.
-# If @filters@ are specified, find jobs that match those filters.  Filters *must* be specified to limit the @repository@ and @script@ attributes.  An error is returned if they are missing.
-# If @filters@ are not specified, find jobs with the same @repository@ and @script@, with a @script_version@ between @minimum_script_version@ and @script_version@ (excluding @excluded_script_versions@), and a @docker_image_locator@ with the latest Collection that matches the submitted job's @docker_image@ constraint.  If the submitted job includes an @arvados_sdk_version@ constraint, jobs must have an @arvados_sdk_version@ between that refspec and HEAD to be found.
+# If @filters@ are specified, find jobs that match those filters. If any filters are given, there must be at least one filter on the @repository@ attribute and one on the @script@ attribute: otherwise an error is returned.
+# If @filters@ are not specified, find jobs with the same @repository@ and @script@, with a @script_version@ between @minimum_script_version@ and @script_version@ inclusively (excluding @excluded_script_versions@), and a @docker_image_locator@ with the latest Collection that matches the submitted job's @docker_image@ constraint.  If the submitted job includes an @arvados_sdk_version@ constraint, jobs must have an @arvados_sdk_version@ between that refspec and HEAD to be found. *This form is deprecated: use filters instead.*
 # If the found jobs include a completed job, and all found completed jobs have consistent output, return one of them.  Which specific job is returned is undefined.
 # If the found jobs only include incomplete jobs, return one of them.  Which specific job is returned is undefined.
 # If no job has been returned so far, create and return a new job.
diff --git a/doc/api/schema/Job.html.textile.liquid b/doc/api/schema/Job.html.textile.liquid
index 80f5de6..fd63503 100644
--- a/doc/api/schema/Job.html.textile.liquid
+++ b/doc/api/schema/Job.html.textile.liquid
@@ -23,7 +23,10 @@ table(table table-bordered table-condensed).
 |_. Attribute|_. Type|_. Description|_. Notes|
 |script|string|The filename of the job script.|This program will be invoked by Crunch for each job task. It is given as a path to an executable file, relative to the @/crunch_scripts@ directory in the Git tree specified by the _repository_ and _script_version_ attributes.|
 |script_parameters|hash|The input parameters for the job.|Conventionally, one of the parameters is called @"input"@. Typically, some parameter values are collection UUIDs. Ultimately, though, the significance of parameters is left entirely up to the script itself.|
-|repository|string|Git repository|Given as the name of a locally hosted Git repository.|
+|repository|string|Git repository name or URL.|Source of the repository where the given script_version is to be found. This can be given as the name of a locally hosted repository, or as a publicly accessible URL starting with @git://@, @http://@, or @https://@.
+Examples:
+ at yourusername/yourrepo@
+ at https://github.com/curoverse/arvados.git@|
 |script_version|string|Git commit|During a **create** transaction, this is the Git branch, tag, or hash supplied by the client. Before the job starts, Arvados updates it to the full 40-character SHA-1 hash of the commit used by the job.
 See "Specifying Git versions":#script_version below for more detail about acceptable ways to specify a commit.|
 |cancelled_by_client_uuid|string|API client ID|Is null if job has not been cancelled|

commit 4c209418ed2140f4d2a1330b61916f41816b9b37
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Apr 9 18:09:40 2015 -0400

    3126: Accept remote http/https/git url as repository attr in jobs.create/save.

diff --git a/services/api/Gemfile b/services/api/Gemfile
index 70f67d5..1ce85d8 100644
--- a/services/api/Gemfile
+++ b/services/api/Gemfile
@@ -14,6 +14,7 @@ group :test, :development do
   # still mandatory.
   gem 'simplecov', '~> 0.7.1', require: false
   gem 'simplecov-rcov', require: false
+  gem 'mocha', require: false
 end
 
 # This might not be needed in :test and :development, but we load it
diff --git a/services/api/Gemfile.lock b/services/api/Gemfile.lock
index a6a8326..49775d9 100644
--- a/services/api/Gemfile.lock
+++ b/services/api/Gemfile.lock
@@ -114,7 +114,10 @@ GEM
     mail (2.5.4)
       mime-types (~> 1.16)
       treetop (~> 1.4.8)
+    metaclass (0.0.4)
     mime-types (1.25.1)
+    mocha (1.1.0)
+      metaclass (~> 0.0.1)
     multi_json (1.10.1)
     multipart-post (1.2.0)
     net-scp (1.2.0)
@@ -232,6 +235,7 @@ DEPENDENCIES
   faye-websocket
   google-api-client (~> 0.6.3)
   jquery-rails
+  mocha
   multi_json
   oj
   omniauth (= 1.1.1)
diff --git a/services/api/app/controllers/arvados/v1/jobs_controller.rb b/services/api/app/controllers/arvados/v1/jobs_controller.rb
index ce8a05c..46b5099 100644
--- a/services/api/app/controllers/arvados/v1/jobs_controller.rb
+++ b/services/api/app/controllers/arvados/v1/jobs_controller.rb
@@ -254,18 +254,18 @@ class Arvados::V1::JobsController < ApplicationController
       else
         raise ArgumentError.new("unknown attribute for git filter: #{attr}")
       end
-      version_range = Commit.find_commit_range(current_user,
-                                               filter["repository"],
-                                               filter["min_version"],
-                                               filter["max_version"],
-                                               filter["exclude_versions"])
-      if version_range.nil?
+      revisions = Commit.find_commit_range(current_user,
+                                           filter["repository"],
+                                           filter["min_version"],
+                                           filter["max_version"],
+                                           filter["exclude_versions"])
+      if revisions.empty?
         raise ArgumentError.
           new("error searching #{filter['repository']} from " +
               "'#{filter['min_version']}' to '#{filter['max_version']}', " +
               "excluding #{filter['exclude_versions']}")
       end
-      @filters.append([attr, "in", version_range])
+      @filters.append([attr, "in", revisions])
     end
   end
 
diff --git a/services/api/app/models/commit.rb b/services/api/app/models/commit.rb
index 0d47b63..9e1176b 100644
--- a/services/api/app/models/commit.rb
+++ b/services/api/app/models/commit.rb
@@ -1,5 +1,9 @@
 class Commit < ActiveRecord::Base
-  require 'shellwords'
+  class GitError < StandardError
+    def http_status
+      422
+    end
+  end
 
   def self.git_check_ref_format(e)
     if !e or e.empty? or e[0] == '-' or e[0] == '$'
@@ -11,6 +15,20 @@ class Commit < ActiveRecord::Base
     end
   end
 
+  # Return an array of commits (each a 40-char sha1) satisfying the
+  # given criteria.
+  #
+  # Return [] if the revisions given in minimum/maximum are invalid or
+  # don't exist in the given repository.
+  #
+  # Raise ArgumentError if the given repository is invalid, does not
+  # exist, or cannot be read for any reason. (Any transient error that
+  # prevents commit ranges from resolving must raise rather than
+  # returning an empty array.)
+  #
+  # repository can be the name of a locally hosted repository or a git
+  # URL (see git-fetch(1)). Currently http, https, and git schemes are
+  # supported.
   def self.find_commit_range(current_user, repository, minimum, maximum, exclude)
     if minimum and minimum.empty?
       minimum = nil
@@ -18,135 +36,159 @@ class Commit < ActiveRecord::Base
 
     if minimum and !git_check_ref_format(minimum)
       logger.warn "find_commit_range called with invalid minimum revision: '#{minimum}'"
-      return nil
+      return []
     end
 
     if maximum and !git_check_ref_format(maximum)
       logger.warn "find_commit_range called with invalid maximum revision: '#{maximum}'"
-      return nil
+      return []
     end
 
     if !maximum
       maximum = "HEAD"
     end
 
-    # Get list of actual repository directories under management
-    on_disk_repos = repositories
-
-    # Get list of repository objects readable by user
-    readable = Repository.readable_by(current_user)
-
-    # filter repository objects on requested repository name
-    if repository
-      readable = readable.where(name: repository)
-    end
+    gitdir, is_remote = git_dir_for repository
+    fetch_remote_repository gitdir, repository if is_remote
+    ENV['GIT_DIR'] = gitdir
 
     commits = []
-    readable.each do |r|
-      if on_disk_repos[r.name]
-        ENV['GIT_DIR'] = on_disk_repos[r.name][:git_dir]
 
-        # We've filtered for invalid characters, so we can pass the contents of
-        # minimum and maximum safely on the command line
+    # Get the commit hash for the upper bound
+    max_hash = nil
+    IO.foreach("|git rev-list --max-count=1 #{maximum.shellescape} --") do |line|
+      max_hash = line.strip
+    end
 
-        # Get the commit hash for the upper bound
-        max_hash = nil
-        IO.foreach("|git rev-list --max-count=1 #{maximum.shellescape} --") do |line|
-          max_hash = line.strip
-        end
+    # If not found or string is invalid, nothing else to do
+    return [] if !max_hash or !git_check_ref_format(max_hash)
 
-        # If not found or string is invalid, nothing else to do
-        next if !max_hash or !git_check_ref_format(max_hash)
-
-        resolved_exclude = nil
-        if exclude
-          resolved_exclude = []
-          exclude.each do |e|
-            if git_check_ref_format(e)
-              IO.foreach("|git rev-list --max-count=1 #{e.shellescape} --") do |line|
-                resolved_exclude.push(line.strip)
-              end
-            else
-              logger.warn "find_commit_range called with invalid exclude invalid characters: '#{exclude}'"
-              return nil
-            end
+    resolved_exclude = nil
+    if exclude
+      resolved_exclude = []
+      exclude.each do |e|
+        if git_check_ref_format(e)
+          IO.foreach("|git rev-list --max-count=1 #{e.shellescape} --") do |line|
+            resolved_exclude.push(line.strip)
           end
+        else
+          logger.warn "find_commit_range called with invalid exclude invalid characters: '#{exclude}'"
+          return []
         end
+      end
+    end
 
-        if minimum
-          # Get the commit hash for the lower bound
-          min_hash = nil
-          IO.foreach("|git rev-list --max-count=1 #{minimum.shellescape} --") do |line|
-            min_hash = line.strip
-          end
-
-          # If not found or string is invalid, nothing else to do
-          next if !min_hash or !git_check_ref_format(min_hash)
+    if minimum
+      # Get the commit hash for the lower bound
+      min_hash = nil
+      IO.foreach("|git rev-list --max-count=1 #{minimum.shellescape} --") do |line|
+        min_hash = line.strip
+      end
 
-          # Now find all commits between them
-          IO.foreach("|git rev-list #{min_hash.shellescape}..#{max_hash.shellescape} --") do |line|
-            hash = line.strip
-            commits.push(hash) if !resolved_exclude or !resolved_exclude.include? hash
-          end
+      # If not found or string is invalid, nothing else to do
+      return [] if !min_hash or !git_check_ref_format(min_hash)
 
-          commits.push(min_hash) if !resolved_exclude or !resolved_exclude.include? min_hash
-        else
-          commits.push(max_hash) if !resolved_exclude or !resolved_exclude.include? max_hash
-        end
-      else
-        logger.warn "Repository #{r.name} exists in table but not found on disk"
+      # Now find all commits between them
+      IO.foreach("|git rev-list #{min_hash.shellescape}..#{max_hash.shellescape} --") do |line|
+        hash = line.strip
+        commits.push(hash) if !resolved_exclude or !resolved_exclude.include? hash
       end
-    end
 
-    if !commits or commits.empty?
-      nil
+      commits.push(min_hash) if !resolved_exclude or !resolved_exclude.include? min_hash
     else
-      commits
+      commits.push(max_hash) if !resolved_exclude or !resolved_exclude.include? max_hash
     end
+
+    commits
   end
 
-  # Import all commits from configured git directory into the commits
-  # database.
-
-  def self.import_all
-    repositories.each do |repo_name, repo|
-      stat = { true => 0, false => 0 }
-      ENV['GIT_DIR'] = repo[:git_dir]
-      IO.foreach("|git rev-list --format=oneline --all") do |line|
-        sha1, message = line.strip.split " ", 2
-        imported = false
-        Commit.find_or_create_by_repository_name_and_sha1_and_message(repo_name, sha1, message[0..254]) do
-          imported = true
-        end
-        stat[!!imported] += 1
-        if (stat[true] + stat[false]) % 100 == 0
-          if $stdout.tty? or ARGV[0] == '-v'
-            puts "#{$0} #{$$}: repo #{repo_name} add #{stat[true]} skip #{stat[false]}"
-          end
-        end
-      end
-      if $stdout.tty? or ARGV[0] == '-v'
-        puts "#{$0} #{$$}: repo #{repo_name} add #{stat[true]} skip #{stat[false]}"
-      end
+  # Given a repository (url, or name of hosted repo) and commit sha1,
+  # copy the commit into the internal git repo and tag it with the
+  # given tag (typically a job UUID).
+  #
+  # The repo can be a remote url, but in this case sha1 must already
+  # be present in our local cache for that repo: e.g., sha1 was just
+  # returned by find_commit_range.
+  def self.tag_in_internal_repository repo, sha1, tag
+    unless git_check_ref_format tag
+      raise ArgumentError.new "invalid tag #{tag}"
     end
+    unless /^[0-9a-f]{40}$/ =~ sha1
+      raise ArgumentError.new "invalid sha1 #{sha1}"
+    end
+    src_gitdir, _ = git_dir_for repo
+    dst_gitdir = Rails.configuration.git_internal_dir
+    must_pipe("echo #{sha1.shellescape}",
+              "git --git-dir #{src_gitdir.shellescape} pack-objects -q --revs --stdout",
+              "git --git-dir #{dst_gitdir.shellescape} unpack-objects -q")
+    must_git(dst_gitdir,
+             "tag --force #{tag.shellescape} #{sha1.shellescape}")
   end
 
-  def self.refresh_repositories
-    @repositories = nil
+  protected
+
+  def self.remote_url? repository
+    /^(https?|git):\/\// =~ repository
   end
 
-  protected
+  # Return [local_git_dir, is_remote]. If is_remote, caller must use
+  # fetch_remote_repository to ensure content is up-to-date.
+  #
+  # Raises an exception if the latest content could not be fetched for
+  # any reason.
+  def self.git_dir_for repo_name
+    if remote_url? repo_name
+      return [cache_dir_for(repo_name), true]
+    end
+    repos = Repository.readable_by(current_user).where(name: repo_name)
+    if repos.count == 0
+      raise ArgumentError.new "Repository not found: '#{repo_name}'"
+    elsif repos.count > 1
+      logger.error "Multiple repositories with name=='#{repo_name}'!"
+      raise ArgumentError.new "Name conflict"
+    else
+      return [repos.first.server_path, false]
+    end
+  end
+
+  def self.cache_dir_for git_url
+    Rails.root.join('tmp', 'git', Digest::SHA1.hexdigest(git_url) + ".git").
+      to_s
+  end
 
- def self.repositories
-   return @repositories if @repositories
+  def self.fetch_remote_repository gitdir, git_url
+    # Caller decides which protocols are worth using. This is just a
+    # safety check to ensure we never use urls like "--flag" or wander
+    # into git's hardlink features by using bare "/path/foo" instead
+    # of "file:///path/foo".
+    unless /^[a-z]+:\/\// =~ git_url
+      raise ArgumentError.new "invalid git url #{git_url}"
+    end
+    FileUtils.mkdir_p gitdir
+    must_git(gitdir,
+             "init",
+             "fetch --no-progress --tags --prune --force --update-head-ok #{git_url.shellescape} 'refs/heads/*:refs/heads/*'")
+  end
 
-   @repositories = {}
-   Repository.find_each do |repo|
-     if git_dir = repo.server_path
-       @repositories[repo.name] = {git_dir: git_dir}
-     end
-   end
+  def self.must_git gitdir, *cmds
+    # Clear token in case a git helper tries to use it as a password.
+    orig_token = ENV['ARVADOS_API_TOKEN']
+    ENV['ARVADOS_API_TOKEN'] = ''
+    begin
+      git = "git --git-dir #{gitdir.shellescape}"
+      cmds.each do |cmd|
+        must_pipe git+" "+cmd
+      end
+    ensure
+      ENV['ARVADOS_API_TOKEN'] = orig_token
+    end
+  end
 
-   @repositories
- end
+  def self.must_pipe *cmds
+    cmd = cmds.join(" 2>&1 |") + " 2>&1"
+    out = IO.read("| </dev/null #{cmd}")
+    if not $?.success?
+      raise GitError.new "#{cmd}: #{$?}: #{out}"
+    end
+  end
 end
diff --git a/services/api/app/models/job.rb b/services/api/app/models/job.rb
index 01df069..423d93c 100644
--- a/services/api/app/models/job.rb
+++ b/services/api/app/models/job.rb
@@ -16,6 +16,7 @@ class Job < ArvadosModel
   validate :validate_status
   validate :validate_state_change
   validate :ensure_no_collection_uuids_in_script_params
+  before_save :tag_version_in_internal_repository
   before_save :update_timestamps_when_state_changes
 
   has_many :commit_ancestors, :foreign_key => :descendant, :primary_key => :script_version
@@ -130,15 +131,34 @@ class Job < ArvadosModel
       # instead of a commit-ish.
       return true
     end
-    if new_record? or script_version_changed?
-      sha1 = Commit.find_commit_range(current_user, self.repository, nil, self.script_version, nil)[0] rescue nil
-      if sha1
-        self.supplied_script_version = self.script_version if self.supplied_script_version.nil? or self.supplied_script_version.empty?
-        self.script_version = sha1
-      else
-        self.errors.add :script_version, "#{self.script_version} does not resolve to a commit"
+    if new_record? or repository_changed? or script_version_changed?
+      sha1 = Commit.find_commit_range(current_user, repository,
+                                      nil, script_version, nil).first
+      if not sha1
+        errors.add :script_version, "#{script_version} does not resolve to a commit"
         return false
       end
+      if supplied_script_version.nil? or supplied_script_version.empty?
+        self.supplied_script_version = script_version
+      end
+      self.script_version = sha1
+    end
+    true
+  end
+
+  def tag_version_in_internal_repository
+    if self.state == Running
+      # No point now. See ensure_script_version_is_commit.
+      true
+    elsif new_record? or repository_changed? or script_version_changed?
+      uuid_was = uuid
+      begin
+        assign_uuid
+        Commit.tag_in_internal_repository repository, script_version, uuid
+      rescue
+        uuid = uuid_was
+        raise
+      end
     end
   end
 
@@ -171,7 +191,7 @@ class Job < ArvadosModel
                                :arvados_sdk_version) do |git_search|
       commits = Commit.find_commit_range(current_user, "arvados",
                                          nil, git_search, nil)
-      if commits.nil? or commits.empty?
+      if commits.empty?
         [false, "#{git_search} does not resolve to a commit"]
       elsif not runtime_constraints["docker_image"]
         [false, "cannot be specified without a Docker image constraint"]
diff --git a/services/api/config/application.default.yml b/services/api/config/application.default.yml
index 1696e2c..0d936b7 100644
--- a/services/api/config/application.default.yml
+++ b/services/api/config/application.default.yml
@@ -45,6 +45,8 @@ test:
   blob_signing_key: zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc
   user_profile_notification_address: arvados at example.com
   workbench_address: https://localhost:3001/
+  git_repositories_dir: <%= Rails.root.join 'tmp', 'git', 'test' %>
+  git_internal_dir: <%= Rails.root.join 'tmp', 'internal.git' %>
 
 common:
   # The prefix used for all database identifiers to identify the record as
diff --git a/services/api/test/functional/arvados/v1/commits_controller_test.rb b/services/api/test/functional/arvados/v1/commits_controller_test.rb
index 2b109b6..4af1c6e 100644
--- a/services/api/test/functional/arvados/v1/commits_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/commits_controller_test.rb
@@ -1,102 +1,4 @@
 require 'test_helper'
-require 'helpers/git_test_helper'
-
-# NOTE: calling Commit.find_commit_range(user, nil, nil, 'rev') will produce
-# an error message "fatal: bad object 'rev'" on stderr if 'rev' does not exist
-# in a given repository.  Many of these tests report such errors; their presence
-# does not represent a fatal condition.
-#
-# TODO(twp): consider better error handling of these messages, or
-# decide to abandon it.
 
 class Arvados::V1::CommitsControllerTest < ActionController::TestCase
-  fixtures :repositories, :users
-
-  # See git_setup.rb for the commit log for test.git.tar
-  include GitTestHelper
-
-  test "test_find_commit_range" do
-    authorize_with :active
-
-  # single
-    a = Commit.find_commit_range(users(:active), nil, nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
-    assert_equal ['31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
-
-  #test "test_branch1" do
-    # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-    a = Commit.find_commit_range(users(:active), nil, nil, 'master', nil)
-    assert_includes(a, 'f35f99b7d32bac257f5989df02b9f12ee1a9b0d6')
-    assert_includes(a, '077ba2ad3ea24a929091a9e6ce545c93199b8e57')
-
-  #test "test_branch2" do
-    a = Commit.find_commit_range(users(:active), 'active/foo', nil, 'b1', nil)
-    assert_equal ['1de84a854e2b440dc53bf42f8548afa4c17da332'], a
-
-  #test "test_branch3" do
-    a = Commit.find_commit_range(users(:active), 'active/foo', nil, 'HEAD', nil)
-    assert_equal ['1de84a854e2b440dc53bf42f8548afa4c17da332'], a
-
-  #test "test_single_revision_repo" do
-    a = Commit.find_commit_range(users(:active), "active/foo", nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
-    assert_equal ['31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
-    a = Commit.find_commit_range(users(:active), "arvados", nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
-    assert_equal nil, a
-
-  #test "test_multi_revision" do
-    # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-    a = Commit.find_commit_range(users(:active), nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', nil)
-    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '4fe459abe02d9b365932b8f5dc419439ab4e2577', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
-
-  #test "test_tag" do
-    # complains "fatal: ambiguous argument 'tag1': unknown revision or path
-    # not in the working tree."
-    a = Commit.find_commit_range(users(:active), nil, 'tag1', 'master', nil)
-    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '4fe459abe02d9b365932b8f5dc419439ab4e2577'], a
-
-  #test "test_multi_revision_exclude" do
-    a = Commit.find_commit_range(users(:active), nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['4fe459abe02d9b365932b8f5dc419439ab4e2577'])
-    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
-
-  #test "test_multi_revision_tagged_exclude" do
-    # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-    a = Commit.find_commit_range(users(:active), nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['tag1'])
-    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
-
-    Dir.mktmpdir do |touchdir|
-      # invalid input to maximum
-      a = Commit.find_commit_range(users(:active), nil, nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
-      assert_equal nil, a
-
-      # invalid input to maximum
-      a = Commit.find_commit_range(users(:active), nil, nil, "$(uname>#{touchdir}/uh_oh)", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
-      assert_equal nil, a
-
-      # invalid input to minimum
-      a = Commit.find_commit_range(users(:active), nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
-      assert_equal nil, a
-
-      # invalid input to minimum
-      a = Commit.find_commit_range(users(:active), nil, "$(uname>#{touchdir}/uh_oh)", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
-      assert_equal nil, a
-
-      # invalid input to 'excludes'
-      # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-      a = Commit.find_commit_range(users(:active), nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["4fe459abe02d9b365932b8f5dc419439ab4e2577 ; touch #{touchdir}/uh_oh"])
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
-      assert_equal nil, a
-
-      # invalid input to 'excludes'
-      # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-      a = Commit.find_commit_range(users(:active), nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["$(uname>#{touchdir}/uh_oh)"])
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
-      assert_equal nil, a
-
-    end
-
-  end
-
 end
diff --git a/services/api/test/functional/arvados/v1/jobs_controller_test.rb b/services/api/test/functional/arvados/v1/jobs_controller_test.rb
index b8b061f..1e1425e 100644
--- a/services/api/test/functional/arvados/v1/jobs_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/jobs_controller_test.rb
@@ -392,4 +392,45 @@ class Arvados::V1::JobsControllerTest < ActionController::TestCase
     post :lock, {id: jobs(:running).uuid}
     assert_response 403 # forbidden
   end
+
+  test 'reject invalid commit in remote repository' do
+    authorize_with :active
+    url = "http://localhost:1/fake/fake.git"
+    fetch_remote_from_local_repo url, :foo
+    post :create, job: {
+      script: "hash",
+      script_version: "abc123",
+      repository: url,
+      script_parameters: {}
+    }
+    assert_response 422
+  end
+
+  test 'tag remote commit in internal repository' do
+    authorize_with :active
+    url = "http://localhost:1/fake/fake.git"
+    fetch_remote_from_local_repo url, :foo
+    post :create, job: {
+      script: "hash",
+      script_version: "master",
+      repository: url,
+      script_parameters: {}
+    }
+    assert_response :success
+    assert_equal('077ba2ad3ea24a929091a9e6ce545c93199b8e57',
+                 internal_tag(json_response['uuid']))
+  end
+
+  test 'tag local commit in internal repository' do
+    authorize_with :active
+    post :create, job: {
+      script: "hash",
+      script_version: "master",
+      repository: "active/foo",
+      script_parameters: {}
+    }
+    assert_response :success
+    assert_equal('077ba2ad3ea24a929091a9e6ce545c93199b8e57',
+                 internal_tag(json_response['uuid']))
+  end
 end
diff --git a/services/api/test/helpers/git_test_helper.rb b/services/api/test/helpers/git_test_helper.rb
index 67e99c1..b4ac46d 100644
--- a/services/api/test/helpers/git_test_helper.rb
+++ b/services/api/test/helpers/git_test_helper.rb
@@ -17,12 +17,37 @@ module GitTestHelper
       @tmpdir = Dir.mktmpdir()
       system("tar", "-xC", @tmpdir, "-f", "test/test.git.tar")
       Rails.configuration.git_repositories_dir = "#{@tmpdir}/test"
-      Commit.refresh_repositories
+      intdir = Rails.configuration.git_internal_dir
+      if not File.exist? intdir
+        FileUtils.mkdir_p intdir
+        IO.read("|git --git-dir #{intdir.to_s.shellescape} init")
+        assert $?.success?
+      end
     end
 
     base.teardown do
       FileUtils.remove_entry @tmpdir, true
-      Commit.refresh_repositories
+    end
+  end
+
+  def internal_tag tag
+    IO.read "|git --git-dir #{Rails.configuration.git_internal_dir.shellescape} log --format=format:%H -n1 #{tag.shellescape}"
+  end
+
+  # Intercept fetch_remote_repository and fetch from a specified url
+  # or local fixture instead of the remote url requested. fakeurl can
+  # be a url (probably starting with file:///) or the name of a
+  # fixture (as a symbol)
+  def fetch_remote_from_local_repo url, fakeurl
+    if fakeurl.is_a? Symbol
+      fakeurl = 'file://' + repositories(fakeurl).server_path
+    end
+    Commit.expects(:fetch_remote_repository).once.with do |gitdir, giturl|
+      if giturl == url
+        Commit.unstub(:fetch_remote_repository)
+        Commit.fetch_remote_repository gitdir, fakeurl
+        true
+      end
     end
   end
 end
diff --git a/services/api/test/test_helper.rb b/services/api/test/test_helper.rb
index bf5afea..68d4bbf 100644
--- a/services/api/test/test_helper.rb
+++ b/services/api/test/test_helper.rb
@@ -22,6 +22,7 @@ end
 
 require File.expand_path('../../config/environment', __FILE__)
 require 'rails/test_help'
+require 'mocha/mini_test'
 
 module ArvadosTestSupport
   def json_response
diff --git a/services/api/test/unit/commit_test.rb b/services/api/test/unit/commit_test.rb
index 2424af3..7e348bd 100644
--- a/services/api/test/unit/commit_test.rb
+++ b/services/api/test/unit/commit_test.rb
@@ -1,7 +1,156 @@
 require 'test_helper'
+require 'helpers/git_test_helper'
+
+# NOTE: calling Commit.find_commit_range(user, nil, nil, 'rev')
+# produces an error message "fatal: bad object 'rev'" on stderr if
+# 'rev' does not exist in a given repository.  Many of these tests
+# report such errors; their presence does not represent a fatal
+# condition.
 
 class CommitTest < ActiveSupport::TestCase
-  # test "the truth" do
-  #   assert true
-  # end
+  # See git_setup.rb for the commit log for test.git.tar
+  include GitTestHelper
+
+  test 'find_commit_range does not bypass permissions' do
+    assert_raises ArgumentError do
+      c = Commit.find_commit_range users(:inactive), 'foo', nil, 'master', []
+    end
+  end
+
+  [
+   'https://github.com/curoverse/arvados.git',
+   'http://github.com/curoverse/arvados.git',
+   'git://github.com/curoverse/arvados.git',
+  ].each do |url|
+    test "find_commit_range uses fetch_remote_repository to get #{url}" do
+      fake_gitdir = repositories(:foo).server_path
+      Commit.expects(:fetch_remote_repository).
+        once.with(Commit.cache_dir_for(url), url).returns fake_gitdir
+      c = Commit.find_commit_range users(:active), url, nil, 'master', []
+      refute_empty c
+    end
+  end
+
+  [
+   'bogus/repo',
+   '/bogus/repo',
+   '/not/allowed/.git',
+   'file:///not/allowed.git',
+   'git.curoverse.com/arvados.git',
+   'github.com/curoverse/arvados.git',
+  ].each do |url|
+    test "find_commit_range skips fetch_remote_repository for #{url}" do
+      Commit.expects(:fetch_remote_repository).never
+      assert_raises ArgumentError do
+        Commit.find_commit_range users(:active), url, nil, 'master', []
+      end
+    end
+  end
+
+  test 'fetch_remote_repository does not leak commits across repositories' do
+    url = "http://localhost:1/fake/fake.git"
+    fetch_remote_from_local_repo url, :foo
+    c = Commit.find_commit_range users(:active), url, nil, 'master', []
+    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57'], c
+
+    url = "http://localhost:2/fake/fake.git"
+    fetch_remote_from_local_repo url, 'file://' + File.expand_path('../../.git', Rails.root)
+    c = Commit.find_commit_range users(:active), url, nil, '077ba2ad3ea24a929091a9e6ce545c93199b8e57', []
+    assert_equal [], c
+  end
+
+  test 'fetch_remote_repository finds versions' do
+    skip 'not written yet'
+  end
+
+  test 'tag_in_internal_repository creates and updates tags in internal.git' do
+    authorize_with :active
+    gitint = "git --git-dir #{Rails.configuration.git_internal_dir}"
+    IO.read("|#{gitint} tag -d testtag 2>/dev/null") # "no such tag", fine
+    assert_match /^fatal: /, IO.read("|#{gitint} show testtag 2>&1")
+    refute $?.success?
+    Commit.tag_in_internal_repository 'active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', 'testtag'
+    assert_match /^commit 31ce37f/, IO.read("|#{gitint} show testtag")
+    assert $?.success?
+  end
+
+  test "find_commit_range laundry list" do
+    authorize_with :active
+
+    # single
+    a = Commit.find_commit_range(users(:active), 'active/foo', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
+    assert_equal ['31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
+
+    #test "test_branch1" do
+    a = Commit.find_commit_range(users(:active), 'active/foo', nil, 'master', nil)
+    assert_includes(a, '077ba2ad3ea24a929091a9e6ce545c93199b8e57')
+
+    #test "test_branch2" do
+    a = Commit.find_commit_range(users(:active), 'active/foo', nil, 'b1', nil)
+    assert_equal ['1de84a854e2b440dc53bf42f8548afa4c17da332'], a
+
+    #test "test_branch3" do
+    a = Commit.find_commit_range(users(:active), 'active/foo', nil, 'HEAD', nil)
+    assert_equal ['1de84a854e2b440dc53bf42f8548afa4c17da332'], a
+
+    #test "test_single_revision_repo" do
+    a = Commit.find_commit_range(users(:active), 'active/foo', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
+    assert_equal ['31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
+    a = Commit.find_commit_range(users(:active), 'arvados', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
+    assert_equal [], a
+
+    #test "test_multi_revision" do
+    # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
+    a = Commit.find_commit_range(users(:active), 'active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', nil)
+    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '4fe459abe02d9b365932b8f5dc419439ab4e2577', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
+
+    #test "test_tag" do
+    # complains "fatal: ambiguous argument 'tag1': unknown revision or path
+    # not in the working tree."
+    a = Commit.find_commit_range(users(:active), 'active/foo', 'tag1', 'master', nil)
+    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '4fe459abe02d9b365932b8f5dc419439ab4e2577'], a
+
+    #test "test_multi_revision_exclude" do
+    a = Commit.find_commit_range(users(:active), 'active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['4fe459abe02d9b365932b8f5dc419439ab4e2577'])
+    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
+
+    #test "test_multi_revision_tagged_exclude" do
+    # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
+    a = Commit.find_commit_range(users(:active), 'active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['tag1'])
+    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
+
+    Dir.mktmpdir do |touchdir|
+      # invalid input to maximum
+      a = Commit.find_commit_range(users(:active), 'active/foo', nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", nil)
+      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
+      assert_equal [], a
+
+      # invalid input to maximum
+      a = Commit.find_commit_range(users(:active), 'active/foo', nil, "$(uname>#{touchdir}/uh_oh)", nil)
+      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
+      assert_equal [], a
+
+      # invalid input to minimum
+      a = Commit.find_commit_range(users(:active), 'active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
+      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
+      assert_equal [], a
+
+      # invalid input to minimum
+      a = Commit.find_commit_range(users(:active), 'active/foo', "$(uname>#{touchdir}/uh_oh)", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
+      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
+      assert_equal [], a
+
+      # invalid input to 'excludes'
+      # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
+      a = Commit.find_commit_range(users(:active), 'active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["4fe459abe02d9b365932b8f5dc419439ab4e2577 ; touch #{touchdir}/uh_oh"])
+      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
+      assert_equal [], a
+
+      # invalid input to 'excludes'
+      # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
+      a = Commit.find_commit_range(users(:active), 'active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["$(uname>#{touchdir}/uh_oh)"])
+      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
+      assert_equal [], a
+    end
+  end
 end
diff --git a/services/api/test/unit/job_test.rb b/services/api/test/unit/job_test.rb
index 1c8573e..3cffbb9 100644
--- a/services/api/test/unit/job_test.rb
+++ b/services/api/test/unit/job_test.rb
@@ -400,4 +400,15 @@ class JobTest < ActiveSupport::TestCase
     job = Job.create!(job_attrs(good_params))
     assert job.valid?
   end
+
+  test 'update job uuid tag in internal.git when version changes' do
+    authorize_with :active
+    j = jobs :queued
+    j.update_attributes repository: 'active/foo', script_version: 'b1'
+    assert_equal('1de84a854e2b440dc53bf42f8548afa4c17da332',
+                 internal_tag(j.uuid))
+    j.update_attributes repository: 'active/foo', script_version: 'master'
+    assert_equal('077ba2ad3ea24a929091a9e6ce545c93199b8e57',
+                 internal_tag(j.uuid))
+  end
 end

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list