[ARVADOS] updated: 79f5693886a4aae6202f5d4b76a9d0a6ec89bcb0

git at public.curoverse.com git at public.curoverse.com
Fri Oct 16 20:03:56 EDT 2015


Summary of changes:

  discards  85b8f04b72fe97f329cefc3e3bef48451d0fe085 (commit)
  discards  b943ec0bc162454befcb70a6682a6d05248c44cc (commit)
  discards  cdd6a89b654e3eb530793cc8a552f452fc359a92 (commit)
  discards  fbeaa254898528f11ba3e54edfd47d99bc534ba4 (commit)
  discards  d93cabc080f1bbba53b7fde50b96d14f6b900143 (commit)
  discards  7d0dfbb31312c2f22d698572d38216015f885ea8 (commit)
  discards  1367c2b1f4771dd3d877df5a943c9a859636da7d (commit)
  discards  1dba563361d3d9dce369d336d3c0d8ce48b9a819 (commit)
  discards  d36d775dd76467129a0c9dbd93878d5b6b583040 (commit)
  discards  1a6dbfaaea9520accfa8ca401887be61ed884420 (commit)
  discards  c5566fdadf1bb466b6eb2d5133445887520aae63 (commit)
  discards  432fff73232a661701552550e0cb4eec10ad791c (commit)
  discards  bb1d507519384faec1344b96d250d6e1cf3e0f36 (commit)
  discards  11633603c279de5904b07d0d05a47a6bd2897f78 (commit)
  discards  a9b1a3fb8bcfa92735a1f70c1e982d75ae325fa4 (commit)
  discards  9b712695237b01dcf19aba4a403f1beedd2485b8 (commit)
  discards  1fd3a57b1a4a91e70478282b882f6da55ccb43f0 (commit)
  discards  f60d6b260c77c03db86c3aaaeab13b863e18d9cf (commit)
  discards  5cde00c12a33398eda11e069aa5ba8b89419d72f (commit)
  discards  12c77e30a5d36fc24f6897944914248819b2b3f4 (commit)
  discards  3d5693f7261a52cfa6eca54f054a43e7f5d049f9 (commit)
       via  79f5693886a4aae6202f5d4b76a9d0a6ec89bcb0 (commit)
       via  6a07585bb5fa3b2974a2d795314d5c0d2fcdc40e (commit)
       via  bf159738719badbbe1e08ee4f839027892912ab2 (commit)
       via  3db04ce738a8e032eda2ebf8fe1acfa3ced41bbd (commit)
       via  ead2387d5dbbf15065d0ec07a3a4982628fae995 (commit)
       via  c8f07fc798c13ad8a45fb322eda4a922f664bb4c (commit)
       via  6e19c8ac3af42ad249d6e2b48b93b629e43dc79b (commit)
       via  e333aa8b481c3347a3543eb108a622dfef5b30b6 (commit)
       via  d8a8e73d39fb5fe7bd4f23b2da11e5384299489d (commit)
       via  a3b118be7cfb7df89d4b236a6fa4d70963fc4931 (commit)
       via  7f26842ce8dbc3933086a824648969d1b90555f3 (commit)
       via  bbc35c5bc1ae72ef2b635292f490f2b0cf3f5475 (commit)
       via  f9bd0dc5aabd8ec7d811688cfb06c275a296acd5 (commit)
       via  d4f30dc6f8ef842e716cf37b850ab9b4fa45869a (commit)
       via  6535e3ebbb702bc73a031b3b4a4cc4987729dedb (commit)
       via  4f245ddd7f4cb4e10dc0d844a78e809c0cd97af7 (commit)
       via  c47e155f5f14a6563151b3a41374a4f547d10f0b (commit)
       via  4f5a6df52559b90d2c9412624f3c4c7fbe467579 (commit)
       via  477ff5db1a0cae68c75b045f0597c12ee06f67c9 (commit)
       via  11890d971215ec44b7abb1e24ad748aefa1061a7 (commit)
       via  2d45dab88989fcaa157b44a014c889d664919672 (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (85b8f04b72fe97f329cefc3e3bef48451d0fe085)
            \
             N -- N -- N (79f5693886a4aae6202f5d4b76a9d0a6ec89bcb0)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 79f5693886a4aae6202f5d4b76a9d0a6ec89bcb0
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Oct 14 04:07:37 2015 -0400

    5824: Update bundle

diff --git a/apps/workbench/Gemfile.lock b/apps/workbench/Gemfile.lock
index 20b8d61..8b2118c 100644
--- a/apps/workbench/Gemfile.lock
+++ b/apps/workbench/Gemfile.lock
@@ -74,7 +74,7 @@ GEM
       rack (>= 1.0.0)
       rack-test (>= 0.5.4)
       xpath (~> 2.0)
-    childprocess (0.5.5)
+    childprocess (0.5.6)
       ffi (~> 1.0, >= 1.0.11)
     cliver (0.3.2)
     coffee-rails (4.1.0)
@@ -98,7 +98,7 @@ GEM
     fast_stack (0.1.0)
       rake
       rake-compiler
-    ffi (1.9.6)
+    ffi (1.9.10)
     flamegraph (0.1.0)
       fast_stack
     google-api-client (0.6.4)
@@ -139,7 +139,7 @@ GEM
       metaclass (~> 0.0.1)
     morrisjs-rails (0.5.1)
       railties (> 3.1, < 5)
-    multi_json (1.11.1)
+    multi_json (1.11.2)
     multipart-post (1.2.0)
     net-scp (1.2.1)
       net-ssh (>= 2.6.5)
@@ -192,7 +192,7 @@ GEM
     ref (1.0.5)
     ruby-debug-passenger (0.2.0)
     ruby-prof (0.15.2)
-    rubyzip (1.1.6)
+    rubyzip (1.1.7)
     rvm-capistrano (1.5.5)
       capistrano (~> 2.15.4)
     sass (3.4.9)
@@ -202,7 +202,7 @@ GEM
       sprockets (>= 2.8, < 4.0)
       sprockets-rails (>= 2.0, < 4.0)
       tilt (~> 1.1)
-    selenium-webdriver (2.44.0)
+    selenium-webdriver (2.48.1)
       childprocess (~> 0.5)
       multi_json (~> 1.0)
       rubyzip (~> 1.0)
@@ -239,7 +239,7 @@ GEM
       execjs (>= 0.3.0)
       json (>= 1.8.0)
     uuidtools (2.1.5)
-    websocket (1.2.1)
+    websocket (1.2.2)
     websocket-driver (0.5.1)
       websocket-extensions (>= 0.1.0)
     websocket-extensions (0.1.1)
@@ -294,3 +294,6 @@ DEPENDENCIES
   therubyracer
   uglifier (>= 1.0.3)
   wiselinks
+
+BUNDLED WITH
+   1.10.6

commit 6a07585bb5fa3b2974a2d795314d5c0d2fcdc40e
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Oct 13 10:52:06 2015 -0400

    5824: Use keep-web in Workbench integration tests

diff --git a/apps/workbench/test/helpers/download_helper.rb b/apps/workbench/test/helpers/download_helper.rb
new file mode 100644
index 0000000..21fb4cd
--- /dev/null
+++ b/apps/workbench/test/helpers/download_helper.rb
@@ -0,0 +1,21 @@
+module DownloadHelper
+  module_function
+
+  def path
+    Rails.root.join 'tmp', 'downloads'
+  end
+
+  def clear
+    FileUtils.rm_f path
+    begin
+      Dir.mkdir path
+    rescue Errno::EEXIST
+    end
+  end
+
+  def done
+    Dir[path.join '*'].reject do |f|
+      /\.part$/ =~ f
+    end
+  end
+end
diff --git a/apps/workbench/test/integration/collection_upload_test.rb b/apps/workbench/test/integration/collection_upload_test.rb
index 62efee4..5e407ce 100644
--- a/apps/workbench/test/integration/collection_upload_test.rb
+++ b/apps/workbench/test/integration/collection_upload_test.rb
@@ -7,9 +7,19 @@ class CollectionUploadTest < ActionDispatch::IntegrationTest
         io.write content
       end
     end
+    # Database reset doesn't restore KeepServices; we have to
+    # save/restore manually.
+    use_token :admin do
+      @keep_services = KeepService.all.to_a
+    end
   end
 
   teardown do
+    use_token :admin do
+      @keep_services.each do |ks|
+        KeepService.find(ks.uuid).update_attributes(ks.attributes)
+      end
+    end
     testfiles.each do |filename, _|
       File.unlink(testfile_path filename)
     end
@@ -64,10 +74,9 @@ class CollectionUploadTest < ActionDispatch::IntegrationTest
   test "Report mixed-content error" do
     skip 'Test suite does not use TLS'
     need_selenium "to make file uploads work"
-    begin
-      use_token :admin
-      proxy = KeepService.find(api_fixture('keep_services')['proxy']['uuid'])
-      proxy.update_attributes service_ssl_flag: false
+    use_token :admin do
+      KeepService.where(service_type: 'proxy').first.
+        update_attributes(service_ssl_flag: false)
     end
     visit page_with_token 'active', sandbox_path
     find('.nav-tabs a', text: 'Upload').click
@@ -82,11 +91,12 @@ class CollectionUploadTest < ActionDispatch::IntegrationTest
 
   test "Report network error" do
     need_selenium "to make file uploads work"
-    begin
-      use_token :admin
-      proxy = KeepService.find(api_fixture('keep_services')['proxy']['uuid'])
-      # Even if you somehow do port>2^16, surely nx.example.net won't respond
-      proxy.update_attributes service_host: 'nx.example.net', service_port: 99999
+    use_token :admin do
+      # Even if you somehow do port>2^16, surely nx.example.net won't
+      # respond
+      KeepService.where(service_type: 'proxy').first.
+        update_attributes(service_host: 'nx.example.net',
+                          service_port: 99999)
     end
     visit page_with_token 'active', sandbox_path
     find('.nav-tabs a', text: 'Upload').click
diff --git a/apps/workbench/test/integration/download_test.rb b/apps/workbench/test/integration/download_test.rb
new file mode 100644
index 0000000..9e4fd56
--- /dev/null
+++ b/apps/workbench/test/integration/download_test.rb
@@ -0,0 +1,45 @@
+require 'integration_helper'
+require 'helpers/download_helper'
+
+class DownloadTest < ActionDispatch::IntegrationTest
+  setup do
+    portfile = File.expand_path '../../../../../tmp/keep-web-ssl.port', __FILE__
+    @kwport = File.read portfile
+    Rails.configuration.keep_web_url = "https://localhost:#{@kwport}/c=%{uuid_or_pdh}"
+    CollectionsController.any_instance.expects(:file_enumerator).never
+
+    # Make sure Capybara can download files.
+    need_selenium 'for downloading', :selenium_with_download
+    DownloadHelper.clear
+
+    # Keep data isn't populated by fixtures, so we have to write any
+    # data we expect to read.
+    unless /^acbd/ =~ `echo -n foo | arv-put --no-progress --raw -` && $?.success?
+      raise $?.to_s
+    end
+  end
+
+  test "download from keep-web with a reader token" do
+    uuid = api_fixture('collections')['foo_file']['uuid']
+    token = api_fixture('api_client_authorizations')['active_all_collections']['api_token']
+    visit "/collections/download/#{uuid}/#{token}/"
+    within "#collection_files" do
+      click_link "foo"
+    end
+    data = nil
+    tries = 0
+    while tries < 20
+      sleep 0.1
+      tries += 1
+      data = File.read(DownloadHelper.path.join 'foo') rescue nil
+    end
+    assert_equal 'foo', data
+  end
+
+  # TODO(TC): test "view pages hosted by keep-web, using session
+  # token". We might persuade selenium to send
+  # "collection-uuid.dl.example" requests to localhost by configuring
+  # our test nginx server to work as its forward proxy. Until then,
+  # we're relying on the "Redirect to keep_web_url via #{id_type}"
+  # test in CollectionsControllerTest (and keep-web's tests).
+end
diff --git a/apps/workbench/test/integration_helper.rb b/apps/workbench/test/integration_helper.rb
index 39fdf4b..5750a1b 100644
--- a/apps/workbench/test/integration_helper.rb
+++ b/apps/workbench/test/integration_helper.rb
@@ -19,6 +19,17 @@ Capybara.register_driver :poltergeist_without_file_api do |app|
   Capybara::Poltergeist::Driver.new app, POLTERGEIST_OPTS.merge(extensions: [js])
 end
 
+Capybara.register_driver :selenium_with_download do |app|
+  profile = Selenium::WebDriver::Firefox::Profile.new
+  profile['browser.download.dir'] = DownloadHelper.path.to_s
+  profile['browser.download.downloadDir'] = DownloadHelper.path.to_s
+  profile['browser.download.defaultFolder'] = DownloadHelper.path.to_s
+  profile['browser.download.folderList'] = 2 # "save to user-defined location"
+  profile['browser.download.manager.showWhenStarting'] = false
+  profile['browser.helperApps.alwaysAsk.force'] = false
+  Capybara::Selenium::Driver.new app, profile: profile
+end
+
 module WaitForAjax
   Capybara.default_wait_time = 5
   def wait_for_ajax
@@ -73,8 +84,8 @@ module HeadlessHelper
     end
   end
 
-  def need_selenium reason=nil
-    Capybara.current_driver = :selenium
+  def need_selenium reason=nil, driver=:selenium
+    Capybara.current_driver = driver
     unless ENV['ARVADOS_TEST_HEADFUL'] or @headless
       @headless = HeadlessSingleton.get
       @headless.start
diff --git a/apps/workbench/test/test_helper.rb b/apps/workbench/test/test_helper.rb
index 89d15c6..41592af 100644
--- a/apps/workbench/test/test_helper.rb
+++ b/apps/workbench/test/test_helper.rb
@@ -176,7 +176,10 @@ class ApiServerForTests
       # though it doesn't need to start up a new server).
       env_script = check_output %w(python ./run_test_server.py start --auth admin)
       check_output %w(python ./run_test_server.py start_arv-git-httpd)
+      check_output %w(python ./run_test_server.py start_keep-web)
       check_output %w(python ./run_test_server.py start_nginx)
+      # This one isn't a no-op, even under run-tests.sh.
+      check_output %w(python ./run_test_server.py start_keep)
     end
     test_env = {}
     env_script.each_line do |line|
@@ -192,9 +195,11 @@ class ApiServerForTests
 
   def stop_test_server
     Dir.chdir PYTHON_TESTS_DIR do
+      check_output %w(python ./run_test_server.py stop_keep)
       # These are no-ops if we're running within run-tests.sh
       check_output %w(python ./run_test_server.py stop_nginx)
       check_output %w(python ./run_test_server.py stop_arv-git-httpd)
+      check_output %w(python ./run_test_server.py stop_keep-web)
       check_output %w(python ./run_test_server.py stop)
     end
     @@server_is_running = false
diff --git a/sdk/python/tests/nginx.conf b/sdk/python/tests/nginx.conf
index 6196605..885f84e 100644
--- a/sdk/python/tests/nginx.conf
+++ b/sdk/python/tests/nginx.conf
@@ -28,4 +28,18 @@ http {
       proxy_pass http://keepproxy;
     }
   }
+  upstream keep-web {
+    server localhost:{{KEEPWEBPORT}};
+  }
+  server {
+    listen *:{{KEEPWEBSSLPORT}} ssl default_server;
+    server_name ~^(?<request_host>.*)$;
+    ssl_certificate {{SSLCERT}};
+    ssl_certificate_key {{SSLKEY}};
+    location  / {
+      proxy_pass http://keep-web;
+      proxy_set_header Host $request_host:{{KEEPWEBPORT}};
+      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    }
+  }
 }
diff --git a/sdk/python/tests/run_test_server.py b/sdk/python/tests/run_test_server.py
index d90d2ad..809cf40 100644
--- a/sdk/python/tests/run_test_server.py
+++ b/sdk/python/tests/run_test_server.py
@@ -345,7 +345,7 @@ def run_keep(blob_signing_key=None, enforce_permissions=False, num_servers=2):
         token=os.environ['ARVADOS_API_TOKEN'],
         insecure=True)
 
-    for d in api.keep_services().list().execute()['items']:
+    for d in api.keep_services().list(filters=[['service_type','=','disk']]).execute()['items']:
         api.keep_services().delete(uuid=d['uuid']).execute()
     for d in api.keep_disks().list().execute()['items']:
         api.keep_disks().delete(uuid=d['uuid']).execute()
@@ -438,10 +438,35 @@ def stop_arv_git_httpd():
         return
     kill_server_pid(_pidfile('arv-git-httpd'), wait=0)
 
+def run_keep_web():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    stop_keep_web()
+
+    keepwebport = find_available_port()
+    env = os.environ.copy()
+    env.pop('ARVADOS_API_TOKEN', None)
+    keepweb = subprocess.Popen(
+        ['keep-web',
+         '-attachment-only-host=localhost:'+str(keepwebport),
+         '-address=:'+str(keepwebport)],
+        env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+    with open(_pidfile('keep-web'), 'w') as f:
+        f.write(str(keepweb.pid))
+    _setport('keep-web', keepwebport)
+    _wait_until_port_listens(keepwebport)
+
+def stop_keep_web():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    kill_server_pid(_pidfile('keep-web'), wait=0)
+
 def run_nginx():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
         return
     nginxconf = {}
+    nginxconf['KEEPWEBPORT'] = _getport('keep-web')
+    nginxconf['KEEPWEBSSLPORT'] = find_available_port()
     nginxconf['KEEPPROXYPORT'] = _getport('keepproxy')
     nginxconf['KEEPPROXYSSLPORT'] = find_available_port()
     nginxconf['GITPORT'] = _getport('arv-git-httpd')
@@ -465,6 +490,7 @@ def run_nginx():
          '-g', 'pid '+_pidfile('nginx')+';',
          '-c', conffile],
         env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+    _setport('keep-web-ssl', nginxconf['KEEPWEBSSLPORT'])
     _setport('keepproxy-ssl', nginxconf['KEEPPROXYSSLPORT'])
     _setport('arv-git-httpd-ssl', nginxconf['GITSSLPORT'])
 
@@ -564,7 +590,8 @@ class TestCaseWithServers(unittest.TestCase):
         for server_kwargs, start_func, stop_func in (
                 (cls.MAIN_SERVER, run, reset),
                 (cls.KEEP_SERVER, run_keep, stop_keep),
-                (cls.KEEP_PROXY_SERVER, run_keep_proxy, stop_keep_proxy)):
+                (cls.KEEP_PROXY_SERVER, run_keep_proxy, stop_keep_proxy),
+                (cls.KEEP_WEB_SERVER, run_keep_web, stop_keep_web)):
             if server_kwargs is not None:
                 start_func(**server_kwargs)
                 cls._cleanup_funcs.append(stop_func)
@@ -590,6 +617,7 @@ if __name__ == "__main__":
         'start', 'stop',
         'start_keep', 'stop_keep',
         'start_keep_proxy', 'stop_keep_proxy',
+        'start_keep-web', 'stop_keep-web',
         'start_arv-git-httpd', 'stop_arv-git-httpd',
         'start_nginx', 'stop_nginx',
     ]
@@ -629,6 +657,10 @@ if __name__ == "__main__":
         run_arv_git_httpd()
     elif args.action == 'stop_arv-git-httpd':
         stop_arv_git_httpd()
+    elif args.action == 'start_keep-web':
+        run_keep_web()
+    elif args.action == 'stop_keep-web':
+        stop_keep_web()
     elif args.action == 'start_nginx':
         run_nginx()
     elif args.action == 'stop_nginx':
diff --git a/services/api/app/controllers/database_controller.rb b/services/api/app/controllers/database_controller.rb
index 64818da..21c8e47 100644
--- a/services/api/app/controllers/database_controller.rb
+++ b/services/api/app/controllers/database_controller.rb
@@ -29,6 +29,10 @@ class DatabaseController < ApplicationController
     fixturesets = Dir.glob(Rails.root.join('test', 'fixtures', '*.yml')).
       collect { |yml| yml.match(/([^\/]*)\.yml$/)[1] }
 
+    # Don't reset keep_services: clients need to discover our
+    # integration-testing keepstores, not test fixtures.
+    fixturesets -= %w[keep_services]
+
     table_names = '"' + ActiveRecord::Base.connection.tables.join('","') + '"'
 
     attempts_left = 20
diff --git a/services/api/test/fixtures/api_client_authorizations.yml b/services/api/test/fixtures/api_client_authorizations.yml
index 9199d17..cb96295 100644
--- a/services/api/test/fixtures/api_client_authorizations.yml
+++ b/services/api/test/fixtures/api_client_authorizations.yml
@@ -87,7 +87,7 @@ active_all_collections:
   user: active
   api_token: activecollectionsabcdefghijklmnopqrstuvwxyz1234567
   expires_at: 2038-01-01 00:00:00
-  scopes: ["GET /arvados/v1/collections/", "GET /arvados/v1/keep_disks"]
+  scopes: ["GET /arvados/v1/collections/", "GET /arvados/v1/keep_services/accessible"]
 
 active_userlist:
   api_client: untrusted

commit bf159738719badbbe1e08ee4f839027892912ab2
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Oct 12 19:15:06 2015 -0400

    5824: Add option to redirect Workbench downloads to a keep-web service

diff --git a/apps/workbench/app/controllers/collections_controller.rb b/apps/workbench/app/controllers/collections_controller.rb
index e01151c..38b58a1 100644
--- a/apps/workbench/app/controllers/collections_controller.rb
+++ b/apps/workbench/app/controllers/collections_controller.rb
@@ -1,4 +1,6 @@
 require "arvados/keep"
+require "uri"
+require "cgi"
 
 class CollectionsController < ApplicationController
   include ActionController::Live
@@ -130,11 +132,27 @@ class CollectionsController < ApplicationController
     usable_token = find_usable_token(tokens) do
       coll = Collection.find(params[:uuid])
     end
+    if usable_token.nil?
+      # Response already rendered.
+      return
+    end
+
+    if Rails.configuration.keep_web_url
+      opts = {}
+      if usable_token == params[:reader_token]
+        opts[:path_token] = usable_token
+      elsif usable_token == Rails.configuration.anonymous_user_token
+        # Don't pass a token at all
+      else
+        # We pass the current user's real token only if it's necessary
+        # to read the collection.
+        opts[:query_token] = usable_token
+      end
+      return redirect_to keep_web_url(params[:uuid], params[:file], opts)
+    end
 
     file_name = params[:file].andand.sub(/^(\.\/|\/|)/, './')
-    if usable_token.nil?
-      return  # Response already rendered.
-    elsif file_name.nil? or not coll.manifest.has_file?(file_name)
+    if file_name.nil? or not coll.manifest.has_file?(file_name)
       return render_not_found
     end
 
@@ -305,6 +323,21 @@ class CollectionsController < ApplicationController
     return nil
   end
 
+  def keep_web_url(uuid_or_pdh, file, opts)
+    fmt = {uuid_or_pdh: uuid_or_pdh.sub('+', '-')}
+    uri = URI.parse(Rails.configuration.keep_web_url % fmt)
+    uri.path += '/' unless uri.path.end_with? '/'
+    if opts[:path_token]
+      uri.path += 't=' + opts[:path_token] + '/'
+    end
+    uri.path += '_/'
+    uri.path += CGI::escape(file)
+    if opts[:query_token]
+      uri.query = 'api_token=' + CGI::escape(opts[:query_token])
+    end
+    uri.to_s
+  end
+
   # Note: several controller and integration tests rely on stubbing
   # file_enumerator to return fake file content.
   def file_enumerator opts
diff --git a/apps/workbench/config/application.default.yml b/apps/workbench/config/application.default.yml
index 00959bb..5504fd2 100644
--- a/apps/workbench/config/application.default.yml
+++ b/apps/workbench/config/application.default.yml
@@ -225,3 +225,11 @@ common:
   # E.g., using a name-based proxy server to forward connections to shell hosts:
   # https://%{hostname}.webshell.uuid_prefix.arvadosapi.com/
   shell_in_a_box_url: false
+
+  # Format of download/preview links. If false, use Workbench's
+  # download facility.
+  #
+  # Examples:
+  # keep_web_url: https://%{uuid_or_pdh}.dl.zzzzz.your.domain
+  # keep_web_url: https://%{uuid_or_pdh}--dl.zzzzz.your.domain
+  keep_web_url: false
diff --git a/apps/workbench/test/controllers/collections_controller_test.rb b/apps/workbench/test/controllers/collections_controller_test.rb
index 13644e0..b4e7dd3 100644
--- a/apps/workbench/test/controllers/collections_controller_test.rb
+++ b/apps/workbench/test/controllers/collections_controller_test.rb
@@ -514,4 +514,55 @@ class CollectionsControllerTest < ActionController::TestCase
     get :show, {id: api_fixture('collections')['user_agreement']['uuid']}, session_for(:active)
     assert_not_includes @response.body, '<a href="#Upload"'
   end
+
+  def setup_for_keep_web cfg='https://%{uuid_or_pdh}.dl.zzzzz.example'
+    Rails.configuration.keep_web_url = cfg
+    @controller.expects(:file_enumerator).never
+  end
+
+  %w(uuid portable_data_hash).each do |id_type|
+    test "Redirect to keep_web_url via #{id_type}" do
+      setup_for_keep_web
+      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      id = api_fixture('collections')['w_a_z_file'][id_type]
+      get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
+      assert_response :redirect
+      assert_equal "https://#{id.sub '+', '-'}.dl.zzzzz.example/_/w+a+z?api_token=#{tok}", @response.redirect_url
+    end
+
+    test "Redirect to keep_web_url via #{id_type} with reader token" do
+      setup_for_keep_web
+      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      id = api_fixture('collections')['w_a_z_file'][id_type]
+      get :show_file, {uuid: id, file: "w a z", reader_token: tok}, session_for(:expired)
+      assert_response :redirect
+      assert_equal "https://#{id.sub '+', '-'}.dl.zzzzz.example/t=#{tok}/_/w+a+z", @response.redirect_url
+    end
+
+    test "Redirect to keep_web_url via #{id_type} with no token" do
+      setup_for_keep_web
+      Rails.configuration.anonymous_user_token =
+        api_fixture('api_client_authorizations')['anonymous']['api_token']
+      id = api_fixture('collections')['public_text_file'][id_type]
+      get :show_file, {uuid: id, file: "Hello World.txt"}
+      assert_response :redirect
+      assert_equal "https://#{id.sub '+', '-'}.dl.zzzzz.example/_/Hello+World.txt", @response.redirect_url
+    end
+
+    test "Redirect to keep_web_url via #{id_type} using -attachment-only-host mode" do
+      setup_for_keep_web 'https://dl.zzzzz.example/c=%{uuid_or_pdh}'
+      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      id = api_fixture('collections')['w_a_z_file'][id_type]
+      get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
+      assert_response :redirect
+      assert_equal "https://dl.zzzzz.example/c=#{id.sub '+', '-'}/_/w+a+z?api_token=#{tok}", @response.redirect_url
+    end
+  end
+
+  test "No redirect to keep_web_url if collection not found" do
+    setup_for_keep_web
+    id = api_fixture('collections')['w_a_z_file']['uuid']
+    get :show_file, {uuid: id, file: "w a z"}, session_for(:spectator)
+    assert_response 404
+  end
 end

commit 3db04ce738a8e032eda2ebf8fe1acfa3ced41bbd
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Oct 1 22:16:51 2015 -0400

    5824: gofmt

diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index b39a941..9751cd1 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -21,9 +21,9 @@ import (
 type handler struct{}
 
 var (
-	clientPool      = arvadosclient.MakeClientPool()
-	trustAllContent = false
-	anonymousTokens []string
+	clientPool         = arvadosclient.MakeClientPool()
+	trustAllContent    = false
+	anonymousTokens    []string
 	attachmentOnlyHost = ""
 )
 
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index a64aeb5..9b5ab2a 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -16,7 +16,7 @@ import (
 
 var _ = check.Suite(&UnitSuite{})
 
-type UnitSuite struct {}
+type UnitSuite struct{}
 
 func mustParseURL(s string) *url.URL {
 	r, err := url.Parse(s)
@@ -34,7 +34,7 @@ func (s *IntegrationSuite) TestVhost404(c *check.C) {
 		resp := httptest.NewRecorder()
 		req := &http.Request{
 			Method: "GET",
-			URL: mustParseURL(testURL),
+			URL:    mustParseURL(testURL),
 		}
 		(&handler{}).ServeHTTP(resp, req)
 		c.Check(resp.Code, check.Equals, http.StatusNotFound)
@@ -52,7 +52,7 @@ func (s *IntegrationSuite) TestVhostViaAuthzHeader(c *check.C) {
 	doVhostRequests(c, authzViaAuthzHeader)
 }
 func authzViaAuthzHeader(r *http.Request, tok string) int {
-	r.Header.Add("Authorization", "OAuth2 " + tok)
+	r.Header.Add("Authorization", "OAuth2 "+tok)
 	return http.StatusUnauthorized
 }
 
@@ -61,7 +61,7 @@ func (s *IntegrationSuite) TestVhostViaCookieValue(c *check.C) {
 }
 func authzViaCookieValue(r *http.Request, tok string) int {
 	r.AddCookie(&http.Cookie{
-		Name: "api_token",
+		Name:  "api_token",
 		Value: auth.EncodeTokenCookie([]byte(tok)),
 	})
 	return http.StatusUnauthorized
@@ -120,8 +120,8 @@ func doVhostRequestsWithHostPath(c *check.C, authz authorizer, hostPath string)
 		u := mustParseURL("http://" + hostPath)
 		req := &http.Request{
 			Method: "GET",
-			Host: u.Host,
-			URL: u,
+			Host:   u.Host,
+			URL:    u,
 			Header: http.Header{},
 		}
 		failCode := authz(req, tok)
@@ -157,8 +157,8 @@ func doReq(req *http.Request) *httptest.ResponseRecorder {
 	u, _ := req.URL.Parse(resp.Header().Get("Location"))
 	req = &http.Request{
 		Method: "GET",
-		Host: u.Host,
-		URL: u,
+		Host:   u.Host,
+		URL:    u,
 		Header: http.Header{},
 	}
 	for _, c := range cookies {
@@ -169,8 +169,8 @@ func doReq(req *http.Request) *httptest.ResponseRecorder {
 
 func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
 	s.testVhostRedirectTokenToCookie(c, "GET",
-		arvadostest.FooCollection + ".example.com/foo",
-		"?api_token=" + arvadostest.ActiveToken,
+		arvadostest.FooCollection+".example.com/foo",
+		"?api_token="+arvadostest.ActiveToken,
 		"text/plain",
 		"",
 		http.StatusOK,
@@ -179,8 +179,8 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
 
 func (s *IntegrationSuite) TestVhostRedirectQueryTokenSingleOriginError(c *check.C) {
 	s.testVhostRedirectTokenToCookie(c, "GET",
-		"example.com/c=" + arvadostest.FooCollection + "/foo",
-		"?api_token=" + arvadostest.ActiveToken,
+		"example.com/c="+arvadostest.FooCollection+"/foo",
+		"?api_token="+arvadostest.ActiveToken,
 		"text/plain",
 		"",
 		http.StatusBadRequest,
@@ -193,8 +193,8 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C
 	}(trustAllContent)
 	trustAllContent = true
 	s.testVhostRedirectTokenToCookie(c, "GET",
-		"example.com/c=" + arvadostest.FooCollection + "/foo",
-		"?api_token=" + arvadostest.ActiveToken,
+		"example.com/c="+arvadostest.FooCollection+"/foo",
+		"?api_token="+arvadostest.ActiveToken,
 		"text/plain",
 		"",
 		http.StatusOK,
@@ -208,16 +208,16 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenAttachmentOnlyHost(c *chec
 	attachmentOnlyHost = "example.com:1234"
 
 	s.testVhostRedirectTokenToCookie(c, "GET",
-		"example.com/c=" + arvadostest.FooCollection + "/foo",
-		"?api_token=" + arvadostest.ActiveToken,
+		"example.com/c="+arvadostest.FooCollection+"/foo",
+		"?api_token="+arvadostest.ActiveToken,
 		"text/plain",
 		"",
 		http.StatusBadRequest,
 	)
 
 	resp := s.testVhostRedirectTokenToCookie(c, "GET",
-		"example.com:1234/c=" + arvadostest.FooCollection + "/foo",
-		"?api_token=" + arvadostest.ActiveToken,
+		"example.com:1234/c="+arvadostest.FooCollection+"/foo",
+		"?api_token="+arvadostest.ActiveToken,
 		"text/plain",
 		"",
 		http.StatusOK,
@@ -227,7 +227,7 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenAttachmentOnlyHost(c *chec
 
 func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
 	s.testVhostRedirectTokenToCookie(c, "POST",
-		arvadostest.FooCollection + ".example.com/foo",
+		arvadostest.FooCollection+".example.com/foo",
 		"",
 		"application/x-www-form-urlencoded",
 		url.Values{"api_token": {arvadostest.ActiveToken}}.Encode(),
@@ -237,7 +237,7 @@ func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
 
 func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C) {
 	s.testVhostRedirectTokenToCookie(c, "POST",
-		arvadostest.FooCollection + ".example.com/foo",
+		arvadostest.FooCollection+".example.com/foo",
 		"",
 		"application/x-www-form-urlencoded",
 		url.Values{"api_token": {arvadostest.SpectatorToken}}.Encode(),
@@ -249,10 +249,10 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
 	u, _ := url.Parse(`http://` + hostPath + queryString)
 	req := &http.Request{
 		Method: method,
-		Host: u.Host,
-		URL: u,
+		Host:   u.Host,
+		URL:    u,
 		Header: http.Header{"Content-Type": {contentType}},
-		Body: ioutil.NopCloser(strings.NewReader(body)),
+		Body:   ioutil.NopCloser(strings.NewReader(body)),
 	}
 
 	resp := httptest.NewRecorder()
@@ -261,14 +261,14 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
 		c.Assert(resp.Code, check.Equals, expectStatus)
 		return resp
 	}
-	c.Check(resp.Body.String(), check.Matches, `.*href="//` + regexp.QuoteMeta(html.EscapeString(hostPath)) + `".*`)
+	c.Check(resp.Body.String(), check.Matches, `.*href="//`+regexp.QuoteMeta(html.EscapeString(hostPath))+`".*`)
 	cookies := (&http.Response{Header: resp.Header()}).Cookies()
 
 	u, _ = u.Parse(resp.Header().Get("Location"))
 	req = &http.Request{
 		Method: "GET",
-		Host: u.Host,
-		URL: u,
+		Host:   u.Host,
+		URL:    u,
 		Header: http.Header{},
 	}
 	for _, c := range cookies {
diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
index fdbb50e..740d243 100644
--- a/services/keep-web/server_test.go
+++ b/services/keep-web/server_test.go
@@ -105,14 +105,14 @@ func (s *IntegrationSuite) test100BlockFile(c *check.C, blocksize int) {
 	err = arv.Create("collections",
 		map[string]interface{}{
 			"collection": map[string]interface{}{
-				"name": fmt.Sprintf("testdata blocksize=%d", blocksize),
+				"name":          fmt.Sprintf("testdata blocksize=%d", blocksize),
 				"manifest_text": mtext,
 			},
 		}, &coll)
 	c.Assert(err, check.Equals, nil)
 	uuid := coll["uuid"].(string)
 
-	hdr, body, size := s.runCurl(c, arv.ApiToken, uuid + ".dl.example.com", "/testdata.bin")
+	hdr, body, size := s.runCurl(c, arv.ApiToken, uuid+".dl.example.com", "/testdata.bin")
 	c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
 	c.Check(hdr, check.Matches, `(?si).*Content-length: `+fmt.Sprintf("%d00", blocksize)+`\r\n.*`)
 	c.Check([]byte(body)[:1234], check.DeepEquals, testdata[:1234])
@@ -139,82 +139,82 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 	for _, spec := range []curlCase{
 		// My collection
 		{
-			auth: arvadostest.ActiveToken,
-			host: arvadostest.FooCollection + "--dl.example.com",
-			path: "/foo",
+			auth:    arvadostest.ActiveToken,
+			host:    arvadostest.FooCollection + "--dl.example.com",
+			path:    "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			host: strings.Replace(arvadostest.FooPdh, "+", "-", 1) + ".dl.example.com",
-			path: "/t=" + arvadostest.ActiveToken + "/foo",
+			host:    strings.Replace(arvadostest.FooPdh, "+", "-", 1) + ".dl.example.com",
+			path:    "/t=" + arvadostest.ActiveToken + "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			path: "/c=" + arvadostest.FooPdh + "/t=" + arvadostest.ActiveToken + "/foo",
+			path:    "/c=" + arvadostest.FooPdh + "/t=" + arvadostest.ActiveToken + "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			path: "/c=" + strings.Replace(arvadostest.FooPdh, "+", "-", 1) + "/t=" + arvadostest.ActiveToken + "/_/foo",
+			path:    "/c=" + strings.Replace(arvadostest.FooPdh, "+", "-", 1) + "/t=" + arvadostest.ActiveToken + "/_/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			auth: "tokensobogus",
-			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			auth:    "tokensobogus",
+			path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			auth: arvadostest.ActiveToken,
-			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			auth:    arvadostest.ActiveToken,
+			path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			auth: arvadostest.AnonymousToken,
-			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			auth:    arvadostest.AnonymousToken,
+			path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 
 		// Anonymously accessible user agreement
 		{
-			path: "/c=" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			path:    "/c=" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			host: arvadostest.HelloWorldCollection + ".dl.example.com",
-			path: "/Hello%20world.txt",
+			host:    arvadostest.HelloWorldCollection + ".dl.example.com",
+			path:    "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			host: arvadostest.HelloWorldCollection + ".dl.example.com",
-			path: "/_/Hello%20world.txt",
+			host:    arvadostest.HelloWorldCollection + ".dl.example.com",
+			path:    "/_/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			path:    "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			auth: arvadostest.ActiveToken,
-			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			auth:    arvadostest.ActiveToken,
+			path:    "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			auth: arvadostest.SpectatorToken,
-			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			auth:    arvadostest.SpectatorToken,
+			path:    "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			auth: arvadostest.SpectatorToken,
-			host: arvadostest.HelloWorldCollection + "--dl.example.com",
-			path: "/Hello%20world.txt",
+			auth:    arvadostest.SpectatorToken,
+			host:    arvadostest.HelloWorldCollection + "--dl.example.com",
+			path:    "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			auth: arvadostest.SpectatorToken,
-			path: "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt",
+			auth:    arvadostest.SpectatorToken,
+			path:    "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 	} {
@@ -238,7 +238,7 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 func (s *IntegrationSuite) runCurl(c *check.C, token, host, uri string, args ...string) (hdr, bodyPart string, bodySize int64) {
 	curlArgs := []string{"--silent", "--show-error", "--include"}
 	testHost, testPort, _ := net.SplitHostPort(s.testServer.Addr)
-	curlArgs = append(curlArgs, "--resolve", host + ":" + testPort + ":" + testHost)
+	curlArgs = append(curlArgs, "--resolve", host+":"+testPort+":"+testHost)
 	if token != "" {
 		curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
 	}

commit ead2387d5dbbf15065d0ec07a3a4982628fae995
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Sep 7 03:43:59 2015 -0400

    5824: Add -attachment-only-host feature.

diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index 236820e..cc47ebe 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -173,6 +173,19 @@
 // (``https://dl.example.com/'') and upload it to some other site
 // chosen by the author of collection X.
 //
+// Attachment-Only host
+//
+// It is possible to serve untrusted content and accept user
+// credentials at the same origin as long as the content is only
+// downloaded, never executed by browsers. A single origin (hostname
+// and port) can be designated as an "attachment-only" origin: cookies
+// will be accepted and all responses will have a
+// "Content-Disposition: attachment" header. This behavior is invoked
+// only when the designated origin matches exactly the Host header
+// provided by the client or upstream proxy.
+//
+//   keep-web -attachment-only-host domain.example:9999
+//
 // Trust All Content mode
 //
 // In "trust all content" mode, Keep-web will accept credentials (API
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index c5d439a..b39a941 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -24,11 +24,14 @@ var (
 	clientPool      = arvadosclient.MakeClientPool()
 	trustAllContent = false
 	anonymousTokens []string
+	attachmentOnlyHost = ""
 )
 
 func init() {
 	flag.BoolVar(&trustAllContent, "trust-all-content", false,
 		"Serve non-public content from a single origin. Dangerous: read docs before using!")
+	flag.StringVar(&attachmentOnlyHost, "attachment-only-host", "",
+		"Accept credentials, and add \"Content-Disposition: attachment\" response headers, for requests at this hostname:port. Prohibiting inline display makes it possible to serve untrusted and non-public content from a single origin, i.e., without wildcard DNS or SSL.")
 }
 
 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
@@ -111,8 +114,16 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var tokens []string
 	var reqTokens []string
 	var pathToken bool
+	var attachment bool
 	credentialsOK := trustAllContent
 
+	if r.Host != "" && r.Host == attachmentOnlyHost {
+		credentialsOK = true
+		attachment = true
+	} else if r.FormValue("disposition") == "attachment" {
+		attachment = true
+	}
+
 	if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
 		// http://ID.dl.example/PATH...
 		credentialsOK = true
@@ -293,6 +304,9 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		}
 	}
 	w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
+	if attachment {
+		w.Header().Set("Content-Disposition", "attachment")
+	}
 
 	w.WriteHeader(http.StatusOK)
 	_, err = io.Copy(w, rdr)
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index e2f8edd..a64aeb5 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -201,6 +201,30 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C
 	)
 }
 
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenAttachmentOnlyHost(c *check.C) {
+	defer func(orig string) {
+		attachmentOnlyHost = orig
+	}(attachmentOnlyHost)
+	attachmentOnlyHost = "example.com:1234"
+
+	s.testVhostRedirectTokenToCookie(c, "GET",
+		"example.com/c=" + arvadostest.FooCollection + "/foo",
+		"?api_token=" + arvadostest.ActiveToken,
+		"text/plain",
+		"",
+		http.StatusBadRequest,
+	)
+
+	resp := s.testVhostRedirectTokenToCookie(c, "GET",
+		"example.com:1234/c=" + arvadostest.FooCollection + "/foo",
+		"?api_token=" + arvadostest.ActiveToken,
+		"text/plain",
+		"",
+		http.StatusOK,
+	)
+	c.Check(resp.Header().Get("Content-Disposition"), check.Equals, "attachment")
+}
+
 func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
 	s.testVhostRedirectTokenToCookie(c, "POST",
 		arvadostest.FooCollection + ".example.com/foo",
@@ -221,7 +245,7 @@ func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C)
 	)
 }
 
-func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString, contentType, body string, expectStatus int) {
+func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString, contentType, body string, expectStatus int) *httptest.ResponseRecorder {
 	u, _ := url.Parse(`http://` + hostPath + queryString)
 	req := &http.Request{
 		Method: method,
@@ -235,7 +259,7 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
 	(&handler{}).ServeHTTP(resp, req)
 	if resp.Code != http.StatusSeeOther {
 		c.Assert(resp.Code, check.Equals, expectStatus)
-		return
+		return resp
 	}
 	c.Check(resp.Body.String(), check.Matches, `.*href="//` + regexp.QuoteMeta(html.EscapeString(hostPath)) + `".*`)
 	cookies := (&http.Response{Header: resp.Header()}).Cookies()
@@ -258,4 +282,5 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
 	if expectStatus == http.StatusOK {
 		c.Check(resp.Body.String(), check.Equals, "foo")
 	}
+	return resp
 }

commit c8f07fc798c13ad8a45fb322eda4a922f664bb4c
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Sep 7 02:39:10 2015 -0400

    5824: Implement "trust all content" mode.

diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index f6c71c4..236820e 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -49,7 +49,7 @@
 // The following "same origin" URL patterns are supported for public
 // collections (i.e., collections which can be served by keep-web
 // without making use of any credentials supplied by the client). See
-// "Same-origin mode" below.
+// "Same-origin URLs" below.
 //
 //   http://dl.example.com/c=uuid_or_pdh/path/file.txt
 //   http://dl.example.com/c=uuid_or_pdh/t=TOKEN/path/file.txt
@@ -163,7 +163,7 @@
 // the local network -- the upstream proxy should configured to return
 // 401 for all paths beginning with "/c=".
 //
-// Same-origin mode
+// Same-origin URLs
 //
 // Without the same-origin protection outlined above, a web page
 // stored in collection X could execute JavaScript code that uses the
@@ -173,19 +173,7 @@
 // (``https://dl.example.com/'') and upload it to some other site
 // chosen by the author of collection X.
 //
-package main
-
-// TODO(TC): Implement?
-//
-// Trusted content
-//
-// Normally, Keep-web is installed using a wildcard DNS entry and a
-// wildcard HTTPS certificate, serving data from collection X at
-// ``https://X--dl.example.com/path/file.ext''.
-//
-// It will also serve publicly accessible data at
-// ``https://dl.example.com/collections/X/path/file.txt'', but it does not
-// accept any kind of credentials at paths like these.
+// Trust All Content mode
 //
 // In "trust all content" mode, Keep-web will accept credentials (API
 // tokens) and serve any collection X at
@@ -198,4 +186,4 @@ package main
 //
 //   keep-web -trust-all-content [...]
 //
-// In the general case, this should not be enabled: 
+package main
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 600e685..c5d439a 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -1,6 +1,7 @@
 package main
 
 import (
+	"flag"
 	"fmt"
 	"html"
 	"io"
@@ -19,8 +20,16 @@ import (
 
 type handler struct{}
 
-var clientPool = arvadosclient.MakeClientPool()
-var anonymousTokens []string
+var (
+	clientPool      = arvadosclient.MakeClientPool()
+	trustAllContent = false
+	anonymousTokens []string
+)
+
+func init() {
+	flag.BoolVar(&trustAllContent, "trust-all-content", false,
+		"Serve non-public content from a single origin. Dangerous: read docs before using!")
+}
 
 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
 // otherwise return "".
@@ -102,7 +111,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var tokens []string
 	var reqTokens []string
 	var pathToken bool
-	var credentialsOK bool
+	credentialsOK := trustAllContent
 
 	if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
 		// http://ID.dl.example/PATH...
@@ -139,7 +148,8 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		if !credentialsOK {
 			// It is not safe to copy the provided token
 			// into a cookie unless the current vhost
-			// (origin) serves only a single collection.
+			// (origin) serves only a single collection or
+			// we are in trustAllContent mode.
 			statusCode = http.StatusBadRequest
 			return
 		}
@@ -160,7 +170,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 			Name:     "api_token",
 			Value:    auth.EncodeTokenCookie([]byte(t)),
 			Path:     "/",
-			Expires:  time.Now().AddDate(10,0,0),
+			Expires:  time.Now().AddDate(10, 0, 0),
 			HttpOnly: true,
 		})
 		redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index 50fd717..e2f8edd 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -177,6 +177,30 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
 	)
 }
 
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenSingleOriginError(c *check.C) {
+	s.testVhostRedirectTokenToCookie(c, "GET",
+		"example.com/c=" + arvadostest.FooCollection + "/foo",
+		"?api_token=" + arvadostest.ActiveToken,
+		"text/plain",
+		"",
+		http.StatusBadRequest,
+	)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C) {
+	defer func(orig bool) {
+		trustAllContent = orig
+	}(trustAllContent)
+	trustAllContent = true
+	s.testVhostRedirectTokenToCookie(c, "GET",
+		"example.com/c=" + arvadostest.FooCollection + "/foo",
+		"?api_token=" + arvadostest.ActiveToken,
+		"text/plain",
+		"",
+		http.StatusOK,
+	)
+}
+
 func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
 	s.testVhostRedirectTokenToCookie(c, "POST",
 		arvadostest.FooCollection + ".example.com/foo",
@@ -209,7 +233,10 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
 
 	resp := httptest.NewRecorder()
 	(&handler{}).ServeHTTP(resp, req)
-	c.Assert(resp.Code, check.Equals, http.StatusSeeOther)
+	if resp.Code != http.StatusSeeOther {
+		c.Assert(resp.Code, check.Equals, expectStatus)
+		return
+	}
 	c.Check(resp.Body.String(), check.Matches, `.*href="//` + regexp.QuoteMeta(html.EscapeString(hostPath)) + `".*`)
 	cookies := (&http.Response{Header: resp.Header()}).Cookies()
 

commit 6e19c8ac3af42ad249d6e2b48b93b629e43dc79b
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Aug 30 02:45:58 2015 -0400

    5824: Add read-error and lots-of-blocks tests.

diff --git a/sdk/go/keepclient/collectionreader_test.go b/sdk/go/keepclient/collectionreader_test.go
index 51710b7..94e41e2 100644
--- a/sdk/go/keepclient/collectionreader_test.go
+++ b/sdk/go/keepclient/collectionreader_test.go
@@ -2,21 +2,48 @@ package keepclient
 
 import (
 	"crypto/md5"
+	"crypto/rand"
 	"fmt"
 	"io"
 	"io/ioutil"
 	"net/http"
 	"os"
+	"strconv"
+	"strings"
 
 	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
 	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
 	check "gopkg.in/check.v1"
 )
 
-var _ = check.Suite(&IntegrationSuite{})
+var _ = check.Suite(&CollectionReaderUnit{})
 
-// IntegrationSuite tests need an API server
-type IntegrationSuite struct{}
+type CollectionReaderUnit struct {
+	arv     arvadosclient.ArvadosClient
+	kc      *KeepClient
+	handler SuccessHandler
+}
+
+func (s *CollectionReaderUnit) SetUpTest(c *check.C) {
+	var err error
+	s.arv, err = arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.IsNil)
+	s.arv.ApiToken = arvadostest.ActiveToken
+
+	s.kc, err = MakeKeepClient(&s.arv)
+	c.Assert(err, check.IsNil)
+
+	s.handler = SuccessHandler{
+		disk: make(map[string][]byte),
+		lock: make(chan struct{}, 1),
+		ops: new(int),
+	}
+	localRoots := make(map[string]string)
+	for i, k := range RunSomeFakeKeepServers(s.handler, 4) {
+		localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+	}
+	s.kc.SetServiceRoots(localRoots, localRoots, nil)
+}
 
 type SuccessHandler struct {
 	disk map[string][]byte
@@ -64,33 +91,11 @@ type rdrTest struct {
 	want interface{} // error or string to expect
 }
 
-func StubWithFakeServers(kc *KeepClient, h http.Handler) {
-	localRoots := make(map[string]string)
-	for i, k := range RunSomeFakeKeepServers(h, 4) {
-		localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
-	}
-	kc.SetServiceRoots(localRoots, localRoots, nil)
-}
-
-func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
-	arv, err := arvadosclient.MakeArvadosClient()
-	c.Assert(err, check.IsNil)
-	arv.ApiToken = arvadostest.ActiveToken
-
-	kc, err := MakeKeepClient(&arv)
-	c.Assert(err, check.IsNil)
-
-	{
-		h := SuccessHandler{
-			disk: make(map[string][]byte),
-			lock: make(chan struct{}, 1),
-		}
-		StubWithFakeServers(kc, h)
-		kc.PutB([]byte("foo"))
-		kc.PutB([]byte("bar"))
-		kc.PutB([]byte("Hello world\n"))
-		kc.PutB([]byte(""))
-	}
+func (s *CollectionReaderUnit) TestCollectionReaderContent(c *check.C) {
+	s.kc.PutB([]byte("foo"))
+	s.kc.PutB([]byte("bar"))
+	s.kc.PutB([]byte("Hello world\n"))
+	s.kc.PutB([]byte(""))
 
 	mt := arvadostest.PathologicalManifest
 
@@ -116,7 +121,7 @@ func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 		{mt: mt, f: "segmented/frob", want: "frob"},
 		{mt: mt, f: "segmented/oof", want: "oof"},
 	} {
-		rdr, err := kc.CollectionFileReader(map[string]interface{}{"manifest_text": testCase.mt}, testCase.f)
+		rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": testCase.mt}, testCase.f)
 		switch want := testCase.want.(type) {
 		case error:
 			c.Check(rdr, check.IsNil)
@@ -136,21 +141,34 @@ func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 	}
 }
 
-func (s *ServerRequiredSuite) TestCollectionReaderCloseEarly(c *check.C) {
-	arv, err := arvadosclient.MakeArvadosClient()
-	c.Assert(err, check.IsNil)
-	arv.ApiToken = arvadostest.ActiveToken
-
-	kc, err := MakeKeepClient(&arv)
-	c.Assert(err, check.IsNil)
-
-	h := SuccessHandler{
-		disk: make(map[string][]byte),
-		lock: make(chan struct{}, 1),
-		ops: new(int),
+func (s *CollectionReaderUnit) TestCollectionReaderManyBlocks(c *check.C) {
+	h := md5.New()
+	buf := make([]byte, 4096)
+	locs := make([]string, len(buf))
+	filesize := 0
+	for i := 0; i < len(locs); i++ {
+		_, err := io.ReadFull(rand.Reader, buf[:i])
+		c.Assert(err, check.IsNil)
+		h.Write(buf[:i])
+		locs[i], _, err = s.kc.PutB(buf[:i])
+		c.Assert(err, check.IsNil)
+		filesize += i
 	}
-	StubWithFakeServers(kc, h)
-	kc.PutB([]byte("foo"))
+	manifest := "./random " + strings.Join(locs, " ") + " 0:" + strconv.Itoa(filesize) + ":bytes.bin\n"
+	dataMD5 := h.Sum(nil)
+
+	checkMD5 := md5.New()
+	rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": manifest}, "random/bytes.bin")
+	c.Check(err, check.IsNil)
+	_, err = io.Copy(checkMD5, rdr)
+	c.Check(err, check.IsNil)
+	_, err = rdr.Read(make([]byte, 1))
+	c.Check(err, check.Equals, io.EOF)
+	c.Check(checkMD5.Sum(nil), check.DeepEquals, dataMD5)
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderCloseEarly(c *check.C) {
+	s.kc.PutB([]byte("foo"))
 
 	mt := ". "
 	for i := 0; i < 1000; i++ {
@@ -161,23 +179,45 @@ func (s *ServerRequiredSuite) TestCollectionReaderCloseEarly(c *check.C) {
 	// Grab the stub server's lock, ensuring our cfReader doesn't
 	// get anything back from its first call to kc.Get() before we
 	// have a chance to call Close().
-	h.lock <- struct{}{}
-	opsBeforeRead := *h.ops
+	s.handler.lock <- struct{}{}
+	opsBeforeRead := *s.handler.ops
 
-	rdr, err := kc.CollectionFileReader(map[string]interface{}{"manifest_text": mt}, "foo1000.txt")
+	rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": mt}, "foo1000.txt")
 	c.Assert(err, check.IsNil)
+
+	firstReadDone := make(chan struct{})
+	go func() {
+		rdr.Read(make([]byte, 6))
+		firstReadDone <- struct{}{}
+	}()
 	err = rdr.Close()
 	c.Assert(err, check.IsNil)
 	c.Assert(rdr.Error(), check.IsNil)
 
 	// Release the stub server's lock. The first GET operation will proceed.
-	<-h.lock
+	<-s.handler.lock
+
+	// Make sure our first read operation consumes the data
+	// received from the first GET.
+	<-firstReadDone
 
 	// doGet() should close toRead before sending any more bufs to it.
-	if what, ok := <-rdr.toRead;  ok {
-		c.Errorf("Got %+v, expected toRead to be closed", what)
+	if what, ok := <-rdr.toRead; ok {
+		c.Errorf("Got %q, expected toRead to be closed", string(what))
 	}
 
 	// Stub should have handled exactly one GET request.
-	c.Assert(*h.ops, check.Equals, opsBeforeRead+1)
+	c.Assert(*s.handler.ops, check.Equals, opsBeforeRead+1)
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderDataError(c *check.C) {
+	manifest := ". ffffffffffffffffffffffffffffffff+1 0:1:notfound.txt\n"
+	buf := make([]byte, 1)
+	rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": manifest}, "notfound.txt")
+	c.Check(err, check.IsNil)
+	for i := 0; i < 2; i++ {
+		_, err = io.ReadFull(rdr, buf)
+		c.Check(err, check.Not(check.IsNil))
+		c.Check(err, check.Not(check.Equals), io.EOF)
+	}
 }

commit e333aa8b481c3347a3543eb108a622dfef5b30b6
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 11:08:16 2015 -0400

    5824: Fix up DNS docs.

diff --git a/doc/install/install-keep-web.html.textile.liquid b/doc/install/install-keep-web.html.textile.liquid
index 51b8d71..9282a8e 100644
--- a/doc/install/install-keep-web.html.textile.liquid
+++ b/doc/install/install-keep-web.html.textile.liquid
@@ -95,22 +95,20 @@ server {
 }
 </pre></notextile>
 
-h3. Tell the API server about the keep-web service
+h3. Configure DNS
 
-If your wildcard certificate is valid for <code>*.dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, add the following entry to your API server's @config/application.yml@ file:
+Configure your DNS servers so the following names resolve to your Nginx proxy's public IP address.
+* @*--dl.uuid_prefix.your.domain@, if your DNS server allows this without interfering with other DNS names; or
+* @*.dl.uuid_prefix.your.domain@, if you have a wildcard SSL certificate valid for these names; or
+* @dl.uuid_prefix.your.domain@, if neither of the above options is feasible. In this case, only unauthenticated requests will be served, i.e., public data and collection sharing links.
 
-<notextile>
-<pre><code>keep-web: https://%{uuid_or_pdh}.dl.<span class="userinput">uuid_prefix</span>.your.domain
-</code></pre>
-</notextile>
+h3. Tell the API server about the keep-web service
 
-If your wildcard certificate is valid for <code>*.<span class="userinput">uuid_prefix</span>.your.domain</code>, add the following entry to your API server's @config/application.yml@ file. This approach requires a wildcard DNS entry covering <code>*--dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, which might be difficult to do without affecting other services, depending on your DNS server software.
+Add *one* of the following entries to your API server's @config/application.yml@ file, depending on your DNS setup:
 
 <notextile>
 <pre><code>keep-web: https://%{uuid_or_pdh}--dl.<span class="userinput">uuid_prefix</span>.your.domain
+keep-web: https://%{uuid_or_pdh}.dl.<span class="userinput">uuid_prefix</span>.your.domain
+keep-web: https://dl.<span class="userinput">uuid_prefix</span>.your.domain
 </code></pre>
 </notextile>
-
-h3. Configure DNS
-
-Configure your DNS servers so <code>*--dl.<span class="userinput">uuid_prefix</span>.your.domain</code> or <code>*.dl.<span class="userinput">uuid_prefix</span>.your.domain</code> (according to the way you configured Workbench), as well as <code>dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, resolve to your Nginx proxy's public IP address.

commit d8a8e73d39fb5fe7bd4f23b2da11e5384299489d
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 02:30:45 2015 -0400

    5824: Log X-Forwarded-For header value if provided.

diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 98dfdb3..600e685 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -62,6 +62,11 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var statusCode = 0
 	var statusText string
 
+	remoteAddr := r.RemoteAddr
+	if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
+		remoteAddr = xff + "," + remoteAddr
+	}
+
 	w := httpserver.WrapResponseWriter(wOrig)
 	defer func() {
 		if statusCode == 0 {
@@ -75,7 +80,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		if statusText == "" {
 			statusText = http.StatusText(statusCode)
 		}
-		httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
+		httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
 	}()
 
 	if r.Method != "GET" && r.Method != "POST" {

commit a3b118be7cfb7df89d4b236a6fa4d70963fc4931
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 01:45:35 2015 -0400

    5824: Fail at startup if ARVADOS_API_HOST is not set.

diff --git a/services/keep-web/main.go b/services/keep-web/main.go
index d780cc3..751543e 100644
--- a/services/keep-web/main.go
+++ b/services/keep-web/main.go
@@ -17,6 +17,9 @@ func init() {
 
 func main() {
 	flag.Parse()
+	if os.Getenv("ARVADOS_API_HOST") == "" {
+		log.Fatal("ARVADOS_API_HOST environment variable must be set.")
+	}
 	srv := &server{}
 	if err := srv.Start(); err != nil {
 		log.Fatal(err)

commit 7f26842ce8dbc3933086a824648969d1b90555f3
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 01:28:43 2015 -0400

    5824: Accept anonymous tokens on command line.

diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 7a2124a..98dfdb3 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -17,16 +17,10 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/keepclient"
 )
 
-var clientPool = arvadosclient.MakeClientPool()
-
-var anonymousTokens []string
-
 type handler struct{}
 
-func init() {
-	// TODO(TC): Get anonymousTokens from flags
-	anonymousTokens = []string{}
-}
+var clientPool = arvadosclient.MakeClientPool()
+var anonymousTokens []string
 
 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
 // otherwise return "".
diff --git a/services/keep-web/server.go b/services/keep-web/server.go
index 44da00f..2359f23 100644
--- a/services/keep-web/server.go
+++ b/services/keep-web/server.go
@@ -10,8 +10,8 @@ import (
 var address string
 
 func init() {
-	flag.StringVar(&address, "address", "0.0.0.0:80",
-		"Address to listen on, \"host:port\".")
+	flag.StringVar(&address, "address", ":80",
+		"Address to listen on: \"host:port\", or \":port\" to listen on all interfaces.")
 }
 
 type server struct {

commit bbc35c5bc1ae72ef2b635292f490f2b0cf3f5475
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 01:09:46 2015 -0400

    5824: Handle various combinations of c= and t= more consistently. Use vhosts in integration tests.

diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index 8908560..f6c71c4 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -52,13 +52,12 @@
 // "Same-origin mode" below.
 //
 //   http://dl.example.com/c=uuid_or_pdh/path/file.txt
-//   http://dl.example.com/c=uuid_or_pdh/path/t=TOKEN/file.txt
+//   http://dl.example.com/c=uuid_or_pdh/t=TOKEN/path/file.txt
 //
 // The following "multiple origin" URL patterns are supported for all
 // collections:
 //
 //   http://uuid_or_pdh--dl.example.com/path/file.txt
-//   http://uuid_or_pdh--dl.example.com/t=/path/file.txt
 //   http://uuid_or_pdh--dl.example.com/t=TOKEN/path/file.txt
 //
 // In the "multiple origin" form, the string "--" can be replaced with
@@ -81,17 +80,35 @@
 // collection UUID or a portable data hash with the "+" character
 // replaced by "-".
 //
+// In all of the above forms, a top level directory called "_" is
+// skipped. In cases where the "path/file.txt" part might start with
+// "t=" or "c=" or "_/", links should be constructed with a leading
+// "_/" to ensure the top level directory is not interpreted as a
+// token or collection ID.
+//
 // Assuming there is a collection with UUID
 // zzzzz-4zz18-znfnqtbbv4spc3w and portable data hash
 // 1f4b0bc7583c2a7f9102c395f4ffc5e3+45, the following URLs are
 // interchangeable:
 //
 //   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/foo
-//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/t=/foo
-//   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/t=/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/_/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/_/foo
 //   http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--foo.example.com/foo
 //   http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--.invalid/foo
 //
+// An additional form is supported specifically to make it more
+// convenient to maintain support for existing Workbench download
+// links:
+//
+//   http://dl.example.com/collections/download/uuid_or_pdh/TOKEN/path/file.txt
+//
+// A regular Workbench "download" link is also accepted, but
+// credentials passed via cookie, header, etc. are ignored. Only
+// public data can be served this way:
+//
+//   http://dl.example.com/collections/uuid_or_pdh/path/file.txt
+//
 // Authorization mechanisms
 //
 // A token can be provided in an Authorization header:
@@ -158,7 +175,7 @@
 //
 package main
 
-// TODO(TC): Implement
+// TODO(TC): Implement?
 //
 // Trusted content
 //
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 657c72d..7a2124a 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -50,6 +50,20 @@ func parseCollectionIdFromDNSName(s string) string {
 	return ""
 }
 
+var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
+
+// return a UUID or PDH if s is a UUID or a PDH (even if it is a PDH
+// with "+" replaced by " " or "-"); otherwise return "".
+func parseCollectionIdFromURL(s string) string {
+	if arvadosclient.UUIDMatch(s) {
+		return s
+	}
+	if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
+		return pdh
+	}
+	return ""
+}
+
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var statusCode = 0
 	var statusText string
@@ -89,79 +103,104 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var tokens []string
 	var reqTokens []string
 	var pathToken bool
+	var credentialsOK bool
 
 	if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
-		// "http://{id}.domain.example.com/{path}" form
-		if t := r.FormValue("api_token"); t != "" {
-			// ...with explicit token in query string or
-			// form in POST body. We must encrypt the
-			// token such that it can only be used for
-			// this collection; put it in an HttpOnly
-			// cookie; and redirect to the same URL with
-			// the query param redacted, and method =
-			// GET.
-			//
-			// The HttpOnly flag is necessary to prevent
-			// JavaScript code (included in, or loaded by,
-			// a page in the collection being served) from
-			// employing the user's token beyond reading
-			// other files in the same domain, i.e., same
-			// the collection.
-			//
-			// The 303 redirect is necessary in the case
-			// of a GET request to avoid exposing the
-			// token in the Location bar, and in the case
-			// of a POST request to avoid raising warnings
-			// when the user refreshes the resulting page.
-			http.SetCookie(w, &http.Cookie{
-				Name:    "api_token",
-				Value:   auth.EncodeTokenCookie([]byte(t)),
-				Path:    "/",
-				Expires: time.Now().AddDate(10,0,0),
-			})
-			redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
-
-			w.Header().Add("Location", redir)
-			statusCode, statusText = http.StatusSeeOther, redir
-			w.WriteHeader(statusCode)
-			io.WriteString(w, `<A href="`)
-			io.WriteString(w, html.EscapeString(redir))
-			io.WriteString(w, `">Continue</A>`)
-			return
-		} else if strings.HasPrefix(pathParts[0], "t=") {
-			// ...with explicit token in path,
-			// "{...}.com/t={token}/{path}".  This form
-			// must only be used to pass scoped tokens
-			// that give permission for a single
-			// collection. See FormValue case above.
-			tokens = []string{pathParts[0][2:]}
-			targetPath = pathParts[1:]
+		// http://ID.dl.example/PATH...
+		credentialsOK = true
+		targetPath = pathParts
+	} else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
+		// /c=ID/PATH...
+		targetId = parseCollectionIdFromURL(pathParts[0][2:])
+		targetPath = pathParts[1:]
+	} else if len(pathParts) >= 3 && pathParts[0] == "collections" {
+		if len(pathParts) >= 5 && pathParts[1] == "download" {
+			// /collections/download/ID/TOKEN/PATH...
+			targetId = pathParts[2]
+			tokens = []string{pathParts[3]}
+			targetPath = pathParts[4:]
 			pathToken = true
 		} else {
-			// ...with cookie, Authorization header, or
-			// no token at all
-			reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
-			tokens = append(reqTokens, anonymousTokens...)
-			targetPath = pathParts
+			// /collections/ID/PATH...
+			targetId = pathParts[1]
+			tokens = anonymousTokens
+			targetPath = pathParts[2:]
 		}
-	} else if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
+	} else {
 		statusCode = http.StatusNotFound
 		return
-	} else if len(pathParts) >= 5 && pathParts[1] == "download" {
-		// "/collections/download/{id}/{token}/path..." form:
-		// Don't use our configured anonymous tokens,
-		// Authorization headers, etc.  Just use the token in
-		// the path.
-		targetId = pathParts[2]
-		tokens = []string{pathParts[3]}
-		targetPath = pathParts[4:]
+	}
+	if t := r.FormValue("api_token"); t != "" {
+		// The client provided an explicit token in the query
+		// string, or a form in POST body. We must put the
+		// token in an HttpOnly cookie, and redirect to the
+		// same URL with the query param redacted and method =
+		// GET.
+
+		if !credentialsOK {
+			// It is not safe to copy the provided token
+			// into a cookie unless the current vhost
+			// (origin) serves only a single collection.
+			statusCode = http.StatusBadRequest
+			return
+		}
+
+		// The HttpOnly flag is necessary to prevent
+		// JavaScript code (included in, or loaded by, a page
+		// in the collection being served) from employing the
+		// user's token beyond reading other files in the same
+		// domain, i.e., same collection.
+		//
+		// The 303 redirect is necessary in the case of a GET
+		// request to avoid exposing the token in the Location
+		// bar, and in the case of a POST request to avoid
+		// raising warnings when the user refreshes the
+		// resulting page.
+
+		http.SetCookie(w, &http.Cookie{
+			Name:     "api_token",
+			Value:    auth.EncodeTokenCookie([]byte(t)),
+			Path:     "/",
+			Expires:  time.Now().AddDate(10,0,0),
+			HttpOnly: true,
+		})
+		redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
+
+		w.Header().Add("Location", redir)
+		statusCode, statusText = http.StatusSeeOther, redir
+		w.WriteHeader(statusCode)
+		io.WriteString(w, `<A href="`)
+		io.WriteString(w, html.EscapeString(redir))
+		io.WriteString(w, `">Continue</A>`)
+		return
+	}
+
+	if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
+		// http://ID.example/t=TOKEN/PATH...
+		// /c=ID/t=TOKEN/PATH...
+		//
+		// This form must only be used to pass scoped tokens
+		// that give permission for a single collection. See
+		// FormValue case above.
+		tokens = []string{targetPath[0][2:]}
 		pathToken = true
-	} else {
-		// "/collections/{id}/path..." form
-		targetId = pathParts[1]
-		reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+		targetPath = targetPath[1:]
+	}
+
+	if tokens == nil {
+		if credentialsOK {
+			reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+		}
 		tokens = append(reqTokens, anonymousTokens...)
-		targetPath = pathParts[2:]
+	}
+
+	if len(targetPath) > 0 && targetPath[0] == "_" {
+		// If a collection has a directory called "t=foo" or
+		// "_", it can be served at //dl.example/_/t=foo/ or
+		// //dl.example/_/_/ respectively: //dl.example/t=foo/
+		// won't work because t=foo will be interpreted as a
+		// token "foo".
+		targetPath = targetPath[1:]
 	}
 
 	tokenResult := make(map[string]int)
@@ -188,11 +227,12 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		return
 	}
 	if !found {
-		if pathToken {
-			// The URL is a "secret sharing link", but it
-			// didn't work out. Asking the client for
-			// additional credentials would just be
-			// confusing.
+		if pathToken || !credentialsOK {
+			// Either the URL is a "secret sharing link"
+			// that didn't work out (and asking the client
+			// for additional credentials would just be
+			// confusing), or we don't even accept
+			// credentials at this path.
 			statusCode = http.StatusNotFound
 			return
 		}
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index b788a38..50fd717 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -99,8 +99,10 @@ func authzViaPOST(r *http.Request, tok string) int {
 func doVhostRequests(c *check.C, authz authorizer) {
 	for _, hostPath := range []string{
 		arvadostest.FooCollection + ".example.com/foo",
+		arvadostest.FooCollection + "--dl.example.com/foo",
+		arvadostest.FooCollection + "--dl.example.com/_/foo",
 		arvadostest.FooPdh + ".example.com/foo",
-		strings.Replace(arvadostest.FooPdh, "+", "-", -1) + ".example.com/foo",
+		strings.Replace(arvadostest.FooPdh, "+", "-", -1) + "--dl.example.com/foo",
 	} {
 		c.Log("doRequests: ", hostPath)
 		doVhostRequestsWithHostPath(c, authz, hostPath)
diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
index a2a5754..fdbb50e 100644
--- a/services/keep-web/server_test.go
+++ b/services/keep-web/server_test.go
@@ -29,7 +29,7 @@ func (s *IntegrationSuite) TestNoToken(c *check.C) {
 		"bogustoken",
 	} {
 		hdr, body, _ := s.runCurl(c, token, "dl.example.com", "/collections/"+arvadostest.FooCollection+"/foo")
-		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
+		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 		c.Check(body, check.Equals, "")
 
 		if token != "" {
@@ -119,6 +119,14 @@ func (s *IntegrationSuite) test100BlockFile(c *check.C, blocksize int) {
 	c.Check(size, check.Equals, int64(blocksize)*100)
 }
 
+type curlCase struct {
+	id      string
+	auth    string
+	host    string
+	path    string
+	dataMD5 string
+}
+
 func (s *IntegrationSuite) Test200(c *check.C) {
 	anonymousTokens = []string{arvadostest.AnonymousToken}
 	arv, err := arvadosclient.MakeArvadosClient()
@@ -128,28 +136,101 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 	c.Assert(err, check.Equals, nil)
 	kc.PutB([]byte("Hello world\n"))
 	kc.PutB([]byte("foo"))
-	for _, spec := range [][]string{
+	for _, spec := range []curlCase{
 		// My collection
-		{arvadostest.ActiveToken, "/collections/" + arvadostest.FooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{"", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{"tokensobogus", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{arvadostest.ActiveToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{arvadostest.AnonymousToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		// Anonymously accessible user agreement.
-		{"", "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{arvadostest.ActiveToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{arvadostest.SpectatorToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{arvadostest.SpectatorToken, "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{
+			auth: arvadostest.ActiveToken,
+			host: arvadostest.FooCollection + "--dl.example.com",
+			path: "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			host: strings.Replace(arvadostest.FooPdh, "+", "-", 1) + ".dl.example.com",
+			path: "/t=" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			path: "/c=" + arvadostest.FooPdh + "/t=" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			path: "/c=" + strings.Replace(arvadostest.FooPdh, "+", "-", 1) + "/t=" + arvadostest.ActiveToken + "/_/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			auth: "tokensobogus",
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			auth: arvadostest.ActiveToken,
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			auth: arvadostest.AnonymousToken,
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+
+		// Anonymously accessible user agreement
+		{
+			path: "/c=" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			host: arvadostest.HelloWorldCollection + ".dl.example.com",
+			path: "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			host: arvadostest.HelloWorldCollection + ".dl.example.com",
+			path: "/_/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.ActiveToken,
+			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.SpectatorToken,
+			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.SpectatorToken,
+			host: arvadostest.HelloWorldCollection + "--dl.example.com",
+			path: "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.SpectatorToken,
+			path: "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
 	} {
-		hdr, body, _ := s.runCurl(c, spec[0], "dl.example.com", spec[1])
+		host := spec.host
+		if host == "" {
+			host = "dl.example.com"
+		}
+		hdr, body, _ := s.runCurl(c, spec.auth, host, spec.path)
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
-		if strings.HasSuffix(spec[1], ".txt") {
+		if strings.HasSuffix(spec.path, ".txt") {
 			c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
 			// TODO: Check some types that aren't
 			// automatically detected by Go's http server
 			// by sniffing the content.
 		}
-		c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec[2])
+		c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec.dataMD5)
 	}
 }
 

commit f9bd0dc5aabd8ec7d811688cfb06c275a296acd5
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:34:21 2015 -0400

    5824: Comment to explain "authorizer" test helpers.

diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index 0494376..b788a38 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -42,6 +42,10 @@ func (s *IntegrationSuite) TestVhost404(c *check.C) {
 	}
 }
 
+// An authorizer modifies an HTTP request to make use of the given
+// token -- by adding it to a header, cookie, query param, or whatever
+// -- and returns the HTTP status code we should expect from keep-web if
+// the token is invalid.
 type authorizer func(*http.Request, string) int
 
 func (s *IntegrationSuite) TestVhostViaAuthzHeader(c *check.C) {

commit d4f30dc6f8ef842e716cf37b850ab9b4fa45869a
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:33:44 2015 -0400

    5824: Fix up support for PDH in vhostname.

diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index 40ff00a..8908560 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -74,7 +74,8 @@
 // upstream proxy.
 //
 // In all of the above forms, the "dl.example.com" part can be
-// anything at all.
+// anything at all: keep-web ignores everything after the first "." or
+// "--".
 //
 // In all of the above forms, the "uuid_or_pdh" part can be either a
 // collection UUID or a portable data hash with the "+" character
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 30b4b64..657c72d 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -28,7 +28,8 @@ func init() {
 	anonymousTokens = []string{}
 }
 
-// return s if s is a UUID or a PDH, otherwise ""
+// return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
+// otherwise return "".
 func parseCollectionIdFromDNSName(s string) string {
 	// Strip domain.
 	if i := strings.IndexRune(s, '.'); i >= 0 {
@@ -40,10 +41,13 @@ func parseCollectionIdFromDNSName(s string) string {
 	if i := strings.Index(s, "--"); i >= 0 {
 		s = s[:i]
 	}
-	if !arvadosclient.UUIDMatch(s) && !arvadosclient.PDHMatch(s) {
-		return ""
+	if arvadosclient.UUIDMatch(s) {
+		return s
 	}
-	return s
+	if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
+		return pdh
+	}
+	return ""
 }
 
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index a1f5e1a..0494376 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -93,7 +93,17 @@ func authzViaPOST(r *http.Request, tok string) int {
 // Try some combinations of {url, token} using the given authorization
 // mechanism, and verify the result is correct.
 func doVhostRequests(c *check.C, authz authorizer) {
-	hostPath := arvadostest.FooCollection + ".example.com/foo"
+	for _, hostPath := range []string{
+		arvadostest.FooCollection + ".example.com/foo",
+		arvadostest.FooPdh + ".example.com/foo",
+		strings.Replace(arvadostest.FooPdh, "+", "-", -1) + ".example.com/foo",
+	} {
+		c.Log("doRequests: ", hostPath)
+		doVhostRequestsWithHostPath(c, authz, hostPath)
+	}
+}
+
+func doVhostRequestsWithHostPath(c *check.C, authz authorizer, hostPath string) {
 	for _, tok := range []string{
 		arvadostest.ActiveToken,
 		arvadostest.ActiveToken[:15],

commit 6535e3ebbb702bc73a031b3b4a4cc4987729dedb
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:31:19 2015 -0400

    5824: Modernize install page, cf. other services.

diff --git a/doc/_config.yml b/doc/_config.yml
index 1bdd2ab..d67668a 100644
--- a/doc/_config.yml
+++ b/doc/_config.yml
@@ -154,6 +154,7 @@ navbar:
       - install/create-standard-objects.html.textile.liquid
       - install/install-keepstore.html.textile.liquid
       - install/install-keepproxy.html.textile.liquid
+      - install/install-keep-web.html.textile.liquid
       - install/install-crunch-dispatch.html.textile.liquid
       - install/install-compute-node.html.textile.liquid
     - Helpful hints:
diff --git a/doc/install/install-keep-web.html.textile.liquid b/doc/install/install-keep-web.html.textile.liquid
index 4777668..51b8d71 100644
--- a/doc/install/install-keep-web.html.textile.liquid
+++ b/doc/install/install-keep-web.html.textile.liquid
@@ -1,31 +1,33 @@
 ---
 layout: default
 navsection: installguide
-title: Install download server
+title: Install the download server
 ...
 
-This installation guide assumes you are on a 64 bit Debian or Ubuntu system.
-
-The keep-web server provides read-only HTTP access to files stored in Keep. It serves public data to anonymous and unauthenticated clients, and accepts authentication via Arvados tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides SSL support.
+The keep-web server provides read-only HTTP access to files stored in Keep. It serves public data to unauthenticated clients, and serves private data to clients that supply Arvados API tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides SSL support. See the "godoc page":http://godoc.org/github.com/curoverse/arvados/services/keep-web for more detail.
 
 By convention, we use the following hostname for the download service:
 
-<div class="offset1">
-table(table table-bordered table-condensed).
-|dl. at uuid_prefix@.your.domain|
-</div>
+<notextile>
+<pre><code>dl.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>
 
 This hostname should resolve from anywhere on the internet.
 
 h2. Install keep-web
 
-First add the Arvados apt repository, and then install the keep-web package.
+On Debian-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install keep-web</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems:
 
 <notextile>
-<pre><code>~$ <span class="userinput">echo "deb http://apt.arvados.org/ wheezy main" | sudo tee /etc/apt/sources.list.d/apt.arvados.org.list</span>
-~$ <span class="userinput">sudo /usr/bin/apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7</span>
-~$ <span class="userinput">sudo /usr/bin/apt-get update</span>
-~$ <span class="userinput">sudo /usr/bin/apt-get install keep-web</span>
+<pre><code>~$ <span class="userinput">sudo yum install keep-web</span>
 </code></pre>
 </notextile>
 
@@ -34,31 +36,81 @@ Verify that @keep-web@ is functional:
 <notextile>
 <pre><code>~$ <span class="userinput">keep-web -h</span>
 Usage of keep-web:
-  -address="0.0.0.0:80": Address to listen on, "host:port".
+  -address string
+        Address to listen on: "host:port", or ":port" to listen on all interfaces. (default ":80")
+  -anonymous-token value
+        API token to try when none of the tokens provided in an HTTP request succeed in reading the desired collection. If this flag is used more than once, each token will be attempted in turn until one works. (default [])
 </code></pre>
 </notextile>
 
-We recommend running @arv-git-httpd@ under "runit":https://packages.debian.org/search?keywords=runit or something similar.
+If you intend to use Keep-web to serve public data to anonymous clients, configure it with an anonymous token. You can use the same one you used when you set up your Keepproxy server, or use the following command on the <strong>API server</strong> to create another:
+
+<notextile>
+<pre><code>/var/www/arvados-api/current/script$ <span class="userinput">RAILS_ENV=production bundle exec ./get_anonymous_user_token.rb</span>
+hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r
+</code></pre></notextile>
 
-Your @run@ script should look something like this:
+We recommend running @keep-web@ under "runit":https://packages.debian.org/search?keywords=runit or a similar supervisor. The basic command to start @keep-web@ is:
 
 <notextile>
 <pre><code>export ARVADOS_API_HOST=<span class="userinput">uuid_prefix</span>.your.domain
-exec sudo -u nobody keep-web -address=:9002 2>&1
+exec sudo -u nobody keep-web -address=<span class="userinput">:9002</span> -anonymous-token=<span class="userinput">hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r</span> 2>&1
 </code></pre>
 </notextile>
 
+Omit the @-anonymous-token@ arguments if you do not want to serve public data.
+
+Set @ARVADOS_API_HOST_INSECURE=1@ if your API server's SSL certificate is not signed by a recognized CA.
+
 h3. Set up a reverse proxy with SSL support
 
 The keep-web service will be accessible from anywhere on the internet, so we recommend using SSL for transport encryption.
 
 This is best achieved by putting a reverse proxy with SSL support in front of keep-web, running on port 443 and passing requests to keep-web on port 9002 (or whatever port you chose in your run script).
 
+Note: A wildcard SSL certificate is required in order to proxy keep-web effectively.
+
+For example, using Nginx:
+
+<notextile><pre>
+upstream keep-web {
+  server                127.0.0.1:<span class="userinput">9002</span>;
+}
+
+server {
+  listen                <span class="userinput">[your public IP address]</span>:443 ssl;
+  server_name           dl.<span class="userinput">uuid_prefix</span>.your.domain *.dl.<span class="userinput">uuid_prefix</span>.your.domain ~.*--dl.<span class="userinput">uuid_prefix</span>.your.domain;
+
+  proxy_connect_timeout 90s;
+  proxy_read_timeout    300s;
+
+  ssl                   on;
+  ssl_certificate       <span class="userinput"/>YOUR/PATH/TO/cert.pem</span>;
+  ssl_certificate_key   <span class="userinput"/>YOUR/PATH/TO/cert.key</span>;
+
+  location / {
+    proxy_pass          http://keep-web;
+    proxy_set_header    X-Forwarded-For $proxy_add_x_forwarded_for;
+  }
+}
+</pre></notextile>
+
 h3. Tell the API server about the keep-web service
 
-In your API server's config/application.yml file, add the following entry:
+If your wildcard certificate is valid for <code>*.dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, add the following entry to your API server's @config/application.yml@ file:
 
 <notextile>
-<pre><code>keep-web: dl.<span class="userinput">uuid_prefix</span>.your.domain
+<pre><code>keep-web: https://%{uuid_or_pdh}.dl.<span class="userinput">uuid_prefix</span>.your.domain
 </code></pre>
 </notextile>
+
+If your wildcard certificate is valid for <code>*.<span class="userinput">uuid_prefix</span>.your.domain</code>, add the following entry to your API server's @config/application.yml@ file. This approach requires a wildcard DNS entry covering <code>*--dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, which might be difficult to do without affecting other services, depending on your DNS server software.
+
+<notextile>
+<pre><code>keep-web: https://%{uuid_or_pdh}--dl.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>
+
+h3. Configure DNS
+
+Configure your DNS servers so <code>*--dl.<span class="userinput">uuid_prefix</span>.your.domain</code> or <code>*.dl.<span class="userinput">uuid_prefix</span>.your.domain</code> (according to the way you configured Workbench), as well as <code>dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, resolve to your Nginx proxy's public IP address.
diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index 5914cd5..40ff00a 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -4,6 +4,8 @@
 // can be installed anywhere with access to Keep services, typically
 // behind a web proxy that supports TLS.
 //
+// See http://doc.arvados.org/install/install-keep-web.html.
+//
 // Starting the server
 //
 // Serve HTTP requests at port 1234 on all interfaces:

commit 4f245ddd7f4cb4e10dc0d844a78e809c0cd97af7
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:17:54 2015 -0400

    5824: Clarify difference between keepproxy and keepstore (bandwidth and convenience -- not security).

diff --git a/doc/install/install-keepproxy.html.textile.liquid b/doc/install/install-keepproxy.html.textile.liquid
index 14b252f..3b658f8 100644
--- a/doc/install/install-keepproxy.html.textile.liquid
+++ b/doc/install/install-keepproxy.html.textile.liquid
@@ -4,9 +4,9 @@ navsection: installguide
 title: Install Keepproxy server
 ...
 
-The Keepproxy server is a gateway into your Keep storage. Unlike the Keepstore servers, which are only accessible on the local LAN, Keepproxy is designed to provide secure access into Keep from anywhere on the internet.
+The Keepproxy server is a gateway into your Keep storage. Unlike the Keepstore servers, which are only accessible on the local LAN, Keepproxy is suitable for lower-bandwidth clients located elsewhere on the internet: a client sends a single copy of a data block, and Keepproxy sends copies to the appropriate Keepstore servers. Keepproxy also accepts requests from clients that do not compute data hashes before uploading data: notably, the browser-based upload feature in Workbench requires Keepproxy.
 
-By convention, we use the following hostname for the Keepproxy:
+By convention, we use the following hostname for the Keepproxy server:
 
 <div class="offset1">
 table(table table-bordered table-condensed).

commit c47e155f5f14a6563151b3a41374a4f547d10f0b
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:16:26 2015 -0400

    5824: Update keepproxy usage.

diff --git a/doc/install/install-keepproxy.html.textile.liquid b/doc/install/install-keepproxy.html.textile.liquid
index 6a531a3..14b252f 100644
--- a/doc/install/install-keepproxy.html.textile.liquid
+++ b/doc/install/install-keepproxy.html.textile.liquid
@@ -36,12 +36,13 @@ Verify that Keepproxy is functional:
 
 <notextile>
 <pre><code>~$ <span class="userinput">keepproxy -h</span>
-Usage of default:
+Usage of keepproxy:
   -default-replicas=2: Default number of replicas to write if not specified by the client.
   -listen=":25107": Interface on which to listen for requests, in the format ipaddr:port. e.g. -listen=10.0.1.24:8000. Use -listen=:port to listen on all network interfaces.
   -no-get=false: If set, disable GET operations
   -no-put=false: If set, disable PUT operations
   -pid="": Path to write pid file
+  -timeout=15: Timeout on requests to internal Keep services (default 15 seconds)
 </code></pre>
 </notextile>
 
diff --git a/services/keepproxy/keepproxy.go b/services/keepproxy/keepproxy.go
index 7900096..8e734f7 100644
--- a/services/keepproxy/keepproxy.go
+++ b/services/keepproxy/keepproxy.go
@@ -37,7 +37,7 @@ func main() {
 		pidfile          string
 	)
 
-	flagset := flag.NewFlagSet("default", flag.ExitOnError)
+	flagset := flag.NewFlagSet("keepproxy", flag.ExitOnError)
 
 	flagset.StringVar(
 		&listen,

commit 4f5a6df52559b90d2c9412624f3c4c7fbe467579
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Aug 16 00:16:27 2015 -0400

    5824: Fix up error checking and early-close behavior inCollectionFileReader.

diff --git a/sdk/go/keepclient/collectionreader.go b/sdk/go/keepclient/collectionreader.go
index 5db944c..0d05b8a 100644
--- a/sdk/go/keepclient/collectionreader.go
+++ b/sdk/go/keepclient/collectionreader.go
@@ -8,6 +8,17 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/manifest"
 )
 
+const (
+	// After reading a data block from Keep, cfReader slices it up
+	// and sends the slices to a buffered channel to be consumed
+	// by the caller via Read().
+	//
+	// dataSliceSize is the maximum size of the slices, and
+	// therefore the maximum number of bytes that will be returned
+	// by a single call to Read().
+	dataSliceSize = 1 << 20
+)
+
 // ErrNoManifest indicates the given collection has no manifest
 // information (e.g., manifest_text was excluded by a "select"
 // parameter when retrieving the collection record).
@@ -40,8 +51,10 @@ func (kc *KeepClient) CollectionFileReader(collection map[string]interface{}, fi
 			}
 			q = append(q, seg)
 			r.totalSize += uint64(seg.Len)
-			// Send toGet whatever it's ready to receive.
-			Q: for len(q) > 0 {
+			// Send toGet as many segments as we can until
+			// it blocks.
+		Q:
+			for len(q) > 0 {
 				select {
 				case r.toGet <- q[0]:
 					q = q[1:]
@@ -75,84 +88,127 @@ type cfReader struct {
 	// doGet() reads FileSegments from toGet, gets the data from
 	// Keep, and sends byte slices to toRead to be consumed by
 	// Read().
-	toGet        chan *manifest.FileSegment
-	toRead       chan []byte
+	toGet chan *manifest.FileSegment
+	// toRead is a buffered channel, sized to fit one full Keep
+	// block. This lets us verify checksums without having a
+	// store-and-forward delay between blocks: by the time the
+	// caller starts receiving data from block N, cfReader is
+	// starting to fetch block N+1. A larger buffer would be
+	// useful for a caller whose read speed varies a lot.
+	toRead chan []byte
 	// bytes ready to send next time someone calls Read()
-	buf          []byte
+	buf []byte
 	// Total size of the file being read. Not safe to read this
 	// until countDone is closed.
-	totalSize    uint64
-	countDone    chan struct{}
+	totalSize uint64
+	countDone chan struct{}
 	// First error encountered.
-	err          error
+	err error
+	// errNotNil is closed IFF err contains a non-nil error.
+	// Receiving from it will block until an error occurs.
+	errNotNil chan struct{}
+	// rdrClosed is closed IFF the reader's Close() method has
+	// been called. Any goroutines associated with the reader will
+	// stop and free up resources when they notice this channel is
+	// closed.
+	rdrClosed chan struct{}
 }
 
-func (r *cfReader) Read(outbuf []byte) (n int, err error) {
-	if r.err != nil {
-		return 0, r.err
+func (r *cfReader) Read(outbuf []byte) (int, error) {
+	if r.Error() != nil {
+		return 0, r.Error()
 	}
 	for r.buf == nil || len(r.buf) == 0 {
 		var ok bool
 		r.buf, ok = <-r.toRead
-		if r.err != nil {
-			return 0, r.err
+		if r.Error() != nil {
+			return 0, r.Error()
 		} else if !ok {
 			return 0, io.EOF
 		}
 	}
+	n := len(r.buf)
 	if len(r.buf) > len(outbuf) {
 		n = len(outbuf)
-	} else {
-		n = len(r.buf)
 	}
 	copy(outbuf[:n], r.buf[:n])
 	r.buf = r.buf[n:]
-	return
+	return n, nil
 }
 
 func (r *cfReader) Close() error {
-	_, _ = <-r.countDone
-	for _ = range r.toGet {
-	}
-	for _ = range r.toRead {
+	close(r.rdrClosed)
+	return r.Error()
+}
+
+func (r *cfReader) Error() error {
+	select {
+	case <-r.errNotNil:
+		return r.err
+	default:
+		return nil
 	}
-	return r.err
 }
 
 func (r *cfReader) Len() uint64 {
 	// Wait for all segments to be counted
-	_, _ = <-r.countDone
+	<-r.countDone
 	return r.totalSize
 }
 
 func (r *cfReader) doGet() {
 	defer close(r.toRead)
+GET:
 	for fs := range r.toGet {
 		rdr, _, _, err := r.keepClient.Get(fs.Locator)
 		if err != nil {
 			r.err = err
+			close(r.errNotNil)
 			return
 		}
 		var buf = make([]byte, fs.Offset+fs.Len)
 		_, err = io.ReadFull(rdr, buf)
 		if err != nil {
 			r.err = err
+			close(r.errNotNil)
 			return
 		}
-		for bOff, bLen := fs.Offset, 1<<20; bOff <= fs.Offset+fs.Len && bLen > 0; bOff += bLen {
+		for bOff, bLen := fs.Offset, dataSliceSize; bOff <= fs.Offset+fs.Len && bLen > 0; bOff += bLen {
 			if bOff+bLen > fs.Offset+fs.Len {
 				bLen = fs.Offset + fs.Len - bOff
 			}
-			r.toRead <- buf[bOff : bOff+bLen]
+			select {
+			case r.toRead <- buf[bOff : bOff+bLen]:
+			case <-r.rdrClosed:
+				// Reader is closed: no point sending
+				// anything more to toRead.
+				break GET
+			}
+		}
+		// It is possible that r.rdrClosed is closed but we
+		// never noticed because r.toRead was also ready in
+		// every select{} above. Here we check before wasting
+		// a keepclient.Get() call.
+		select {
+		case <-r.rdrClosed:
+			break GET
+		default:
 		}
 	}
+	// In case we exited the above loop early: before returning,
+	// drain the toGet channel so its sender doesn't sit around
+	// blocking forever.
+	for _ = range r.toGet {
+	}
 }
 
 func newCFReader(kc *KeepClient) (r *cfReader) {
 	r = new(cfReader)
 	r.keepClient = kc
+	r.rdrClosed = make(chan struct{})
+	r.errNotNil = make(chan struct{})
 	r.toGet = make(chan *manifest.FileSegment, 2)
-	r.toRead = make(chan []byte)
+	r.toRead = make(chan []byte, (BLOCKSIZE+dataSliceSize-1)/dataSliceSize)
 	r.countDone = make(chan struct{})
 	go r.doGet()
 	return
diff --git a/sdk/go/keepclient/collectionreader_test.go b/sdk/go/keepclient/collectionreader_test.go
index f271208..51710b7 100644
--- a/sdk/go/keepclient/collectionreader_test.go
+++ b/sdk/go/keepclient/collectionreader_test.go
@@ -20,7 +20,8 @@ type IntegrationSuite struct{}
 
 type SuccessHandler struct {
 	disk map[string][]byte
-	lock chan struct{}
+	lock chan struct{}	// channel with buffer==1: full when an operation is in progress.
+	ops  *int		// number of operations completed
 }
 
 func (h SuccessHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
@@ -34,12 +35,18 @@ func (h SuccessHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
 		pdh := fmt.Sprintf("%x+%d", md5.Sum(buf), len(buf))
 		h.lock <- struct{}{}
 		h.disk[pdh] = buf
+		if h.ops != nil {
+			(*h.ops)++
+		}
 		<- h.lock
 		resp.Write([]byte(pdh))
 	case "GET":
 		pdh := req.URL.Path[1:]
 		h.lock <- struct{}{}
 		buf, ok := h.disk[pdh]
+		if h.ops != nil {
+			(*h.ops)++
+		}
 		<- h.lock
 		if !ok {
 			resp.WriteHeader(http.StatusNotFound)
@@ -57,6 +64,14 @@ type rdrTest struct {
 	want interface{} // error or string to expect
 }
 
+func StubWithFakeServers(kc *KeepClient, h http.Handler) {
+	localRoots := make(map[string]string)
+	for i, k := range RunSomeFakeKeepServers(h, 4) {
+		localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+	}
+	kc.SetServiceRoots(localRoots, localRoots, nil)
+}
+
 func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 	arv, err := arvadosclient.MakeArvadosClient()
 	c.Assert(err, check.IsNil)
@@ -66,12 +81,11 @@ func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 	c.Assert(err, check.IsNil)
 
 	{
-		localRoots := make(map[string]string)
-		h := SuccessHandler{disk: make(map[string][]byte), lock: make(chan struct{}, 1)}
-		for i, k := range RunSomeFakeKeepServers(h, 4) {
-			localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+		h := SuccessHandler{
+			disk: make(map[string][]byte),
+			lock: make(chan struct{}, 1),
 		}
-		kc.SetServiceRoots(localRoots, localRoots, nil)
+		StubWithFakeServers(kc, h)
 		kc.PutB([]byte("foo"))
 		kc.PutB([]byte("bar"))
 		kc.PutB([]byte("Hello world\n"))
@@ -121,3 +135,49 @@ func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 		}
 	}
 }
+
+func (s *ServerRequiredSuite) TestCollectionReaderCloseEarly(c *check.C) {
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.IsNil)
+	arv.ApiToken = arvadostest.ActiveToken
+
+	kc, err := MakeKeepClient(&arv)
+	c.Assert(err, check.IsNil)
+
+	h := SuccessHandler{
+		disk: make(map[string][]byte),
+		lock: make(chan struct{}, 1),
+		ops: new(int),
+	}
+	StubWithFakeServers(kc, h)
+	kc.PutB([]byte("foo"))
+
+	mt := ". "
+	for i := 0; i < 1000; i++ {
+		mt += "acbd18db4cc2f85cedef654fccc4a4d8+3 "
+	}
+	mt += "0:3000:foo1000.txt\n"
+
+	// Grab the stub server's lock, ensuring our cfReader doesn't
+	// get anything back from its first call to kc.Get() before we
+	// have a chance to call Close().
+	h.lock <- struct{}{}
+	opsBeforeRead := *h.ops
+
+	rdr, err := kc.CollectionFileReader(map[string]interface{}{"manifest_text": mt}, "foo1000.txt")
+	c.Assert(err, check.IsNil)
+	err = rdr.Close()
+	c.Assert(err, check.IsNil)
+	c.Assert(rdr.Error(), check.IsNil)
+
+	// Release the stub server's lock. The first GET operation will proceed.
+	<-h.lock
+
+	// doGet() should close toRead before sending any more bufs to it.
+	if what, ok := <-rdr.toRead;  ok {
+		c.Errorf("Got %+v, expected toRead to be closed", what)
+	}
+
+	// Stub should have handled exactly one GET request.
+	c.Assert(*h.ops, check.Equals, opsBeforeRead+1)
+}

commit 477ff5db1a0cae68c75b045f0597c12ee06f67c9
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jul 29 03:33:19 2015 -0400

    5824: Add Content-Length header.

diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 03b3e26..30b4b64 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -239,6 +239,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 			w.Header().Set("Content-Type", t)
 		}
 	}
+	w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
 
 	w.WriteHeader(http.StatusOK)
 	_, err = io.Copy(w, rdr)

commit 11890d971215ec44b7abb1e24ad748aefa1061a7
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jul 29 01:56:55 2015 -0400

    5824: Use vhosts in curl integration tests. Add large file test.

diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
index b4d6d17..a2a5754 100644
--- a/services/keep-web/server_test.go
+++ b/services/keep-web/server_test.go
@@ -3,6 +3,9 @@ package main
 import (
 	"crypto/md5"
 	"fmt"
+	"io"
+	"io/ioutil"
+	"net"
 	"os/exec"
 	"strings"
 	"testing"
@@ -25,17 +28,17 @@ func (s *IntegrationSuite) TestNoToken(c *check.C) {
 		"",
 		"bogustoken",
 	} {
-		hdr, body := s.runCurl(c, token, "/collections/"+arvadostest.FooCollection+"/foo")
+		hdr, body, _ := s.runCurl(c, token, "dl.example.com", "/collections/"+arvadostest.FooCollection+"/foo")
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
 		c.Check(body, check.Equals, "")
 
 		if token != "" {
-			hdr, body = s.runCurl(c, token, "/collections/download/"+arvadostest.FooCollection+"/"+token+"/foo")
+			hdr, body, _ = s.runCurl(c, token, "dl.example.com", "/collections/download/"+arvadostest.FooCollection+"/"+token+"/foo")
 			c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 			c.Check(body, check.Equals, "")
 		}
 
-		hdr, body = s.runCurl(c, token, "/bad-route")
+		hdr, body, _ = s.runCurl(c, token, "dl.example.com", "/bad-route")
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 		c.Check(body, check.Equals, "")
 	}
@@ -64,12 +67,58 @@ func (s *IntegrationSuite) Test404(c *check.C) {
 		"/collections/" + arvadostest.NonexistentCollection + "/theperthcountyconspiracy",
 		"/collections/download/" + arvadostest.NonexistentCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
 	} {
-		hdr, body := s.runCurl(c, arvadostest.ActiveToken, uri)
+		hdr, body, _ := s.runCurl(c, arvadostest.ActiveToken, "dl.example.com", uri)
 		c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
 		c.Check(body, check.Equals, "")
 	}
 }
 
+func (s *IntegrationSuite) Test1GBFile(c *check.C) {
+	if testing.Short() {
+		c.Skip("skipping 1GB integration test in short mode")
+	}
+	s.test100BlockFile(c, 10000000)
+}
+
+func (s *IntegrationSuite) Test300MBFile(c *check.C) {
+	s.test100BlockFile(c, 3000000)
+}
+
+func (s *IntegrationSuite) test100BlockFile(c *check.C, blocksize int) {
+	testdata := make([]byte, blocksize)
+	for i := 0; i < blocksize; i++ {
+		testdata[i] = byte(' ')
+	}
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.Equals, nil)
+	arv.ApiToken = arvadostest.ActiveToken
+	kc, err := keepclient.MakeKeepClient(&arv)
+	c.Assert(err, check.Equals, nil)
+	loc, _, err := kc.PutB(testdata[:])
+	c.Assert(err, check.Equals, nil)
+	mtext := "."
+	for i := 0; i < 100; i++ {
+		mtext = mtext + " " + loc
+	}
+	mtext = mtext + fmt.Sprintf(" 0:%d00:testdata.bin\n", blocksize)
+	coll := map[string]interface{}{}
+	err = arv.Create("collections",
+		map[string]interface{}{
+			"collection": map[string]interface{}{
+				"name": fmt.Sprintf("testdata blocksize=%d", blocksize),
+				"manifest_text": mtext,
+			},
+		}, &coll)
+	c.Assert(err, check.Equals, nil)
+	uuid := coll["uuid"].(string)
+
+	hdr, body, size := s.runCurl(c, arv.ApiToken, uuid + ".dl.example.com", "/testdata.bin")
+	c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+	c.Check(hdr, check.Matches, `(?si).*Content-length: `+fmt.Sprintf("%d00", blocksize)+`\r\n.*`)
+	c.Check([]byte(body)[:1234], check.DeepEquals, testdata[:1234])
+	c.Check(size, check.Equals, int64(blocksize)*100)
+}
+
 func (s *IntegrationSuite) Test200(c *check.C) {
 	anonymousTokens = []string{arvadostest.AnonymousToken}
 	arv, err := arvadosclient.MakeArvadosClient()
@@ -86,19 +135,13 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 		{"tokensobogus", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
 		{arvadostest.ActiveToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
 		{arvadostest.AnonymousToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		// Anonymously accessible user agreement. These should
-		// start working when CollectionFileReader provides
-		// real data instead of fake/stub data.
+		// Anonymously accessible user agreement.
 		{"", "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 		{arvadostest.ActiveToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 		{arvadostest.SpectatorToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 		{arvadostest.SpectatorToken, "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 	} {
-		hdr, body := s.runCurl(c, spec[0], spec[1])
-		if strings.HasPrefix(hdr, "HTTP/1.1 501 Not Implemented\r\n") && body == "" {
-			c.Log("Not implemented!")
-			continue
-		}
+		hdr, body, _ := s.runCurl(c, spec[0], "dl.example.com", spec[1])
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
 		if strings.HasSuffix(spec[1], ".txt") {
 			c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
@@ -111,15 +154,34 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 }
 
 // Return header block and body.
-func (s *IntegrationSuite) runCurl(c *check.C, token, uri string, args ...string) (hdr, body string) {
+func (s *IntegrationSuite) runCurl(c *check.C, token, host, uri string, args ...string) (hdr, bodyPart string, bodySize int64) {
 	curlArgs := []string{"--silent", "--show-error", "--include"}
+	testHost, testPort, _ := net.SplitHostPort(s.testServer.Addr)
+	curlArgs = append(curlArgs, "--resolve", host + ":" + testPort + ":" + testHost)
 	if token != "" {
 		curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
 	}
 	curlArgs = append(curlArgs, args...)
-	curlArgs = append(curlArgs, "http://"+s.testServer.Addr+uri)
+	curlArgs = append(curlArgs, "http://"+host+":"+testPort+uri)
 	c.Log(fmt.Sprintf("curlArgs == %#v", curlArgs))
-	output, err := exec.Command("curl", curlArgs...).CombinedOutput()
+	cmd := exec.Command("curl", curlArgs...)
+	stdout, err := cmd.StdoutPipe()
+	c.Assert(err, check.Equals, nil)
+	cmd.Stderr = cmd.Stdout
+	go cmd.Start()
+	buf := make([]byte, 2<<27)
+	n, err := io.ReadFull(stdout, buf)
+	// Discard (but measure size of) anything past 128 MiB.
+	var discarded int64
+	if err == io.ErrUnexpectedEOF {
+		err = nil
+		buf = buf[:n]
+	} else {
+		c.Assert(err, check.Equals, nil)
+		discarded, err = io.Copy(ioutil.Discard, stdout)
+		c.Assert(err, check.Equals, nil)
+	}
+	err = cmd.Wait()
 	// Without "-f", curl exits 0 as long as it gets a valid HTTP
 	// response from the server, even if the response status
 	// indicates that the request failed. In our test suite, we
@@ -127,10 +189,11 @@ func (s *IntegrationSuite) runCurl(c *check.C, token, uri string, args ...string
 	// headers ourselves. If curl exits non-zero, our testing
 	// environment is broken.
 	c.Assert(err, check.Equals, nil)
-	hdrsAndBody := strings.SplitN(string(output), "\r\n\r\n", 2)
+	hdrsAndBody := strings.SplitN(string(buf), "\r\n\r\n", 2)
 	c.Assert(len(hdrsAndBody), check.Equals, 2)
 	hdr = hdrsAndBody[0]
-	body = hdrsAndBody[1]
+	bodyPart = hdrsAndBody[1]
+	bodySize = int64(len(bodyPart)) + discarded
 	return
 }
 

commit 2d45dab88989fcaa157b44a014c889d664919672
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Jul 23 01:20:28 2015 -0400

    5824: Support vhost-based collection lookups.

diff --git a/sdk/go/arvadostest/fixtures.go b/sdk/go/arvadostest/fixtures.go
index 87b28f8..3040e0a 100644
--- a/sdk/go/arvadostest/fixtures.go
+++ b/sdk/go/arvadostest/fixtures.go
@@ -7,6 +7,8 @@ const (
 	FooCollection         = "zzzzz-4zz18-fy296fx3hot09f7"
 	NonexistentCollection = "zzzzz-4zz18-totallynotexist"
 	HelloWorldCollection  = "zzzzz-4zz18-4en62shvi99lxd4"
+	FooPdh                = "1f4b0bc7583c2a7f9102c395f4ffc5e3+45"
+	HelloWorldPdh         = "55713e6a34081eb03609e7ad5fcad129+62"
 	PathologicalManifest  = ". acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 73feffa4b7f6bb68e44cf984c85f6e88+3+Z+K at xyzzy acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero at 0 0:1:f 1:0:zero at 1 1:4:ooba 4:0:zero at 4 5:1:r 5:4:rbaz 9:0:zero at 9\n" +
 		"./overlapReverse acbd18db4cc2f85cedef654fccc4a4d8+3 acbd18db4cc2f85cedef654fccc4a4d8+3 5:1:o 4:2:oo 2:4:ofoo\n" +
 		"./segmented acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 0:1:frob 5:1:frob 1:1:frob 1:2:oof 0:1:oof 5:0:frob 3:1:frob\n" +
diff --git a/sdk/go/auth/auth.go b/sdk/go/auth/auth.go
index 4a719e9..41cfb99 100644
--- a/sdk/go/auth/auth.go
+++ b/sdk/go/auth/auth.go
@@ -1,6 +1,7 @@
 package auth
 
 import (
+	"encoding/base64"
 	"net/http"
 	"net/url"
 	"strings"
@@ -20,6 +21,15 @@ func NewCredentialsFromHTTPRequest(r *http.Request) *Credentials {
 	return c
 }
 
+// EncodeTokenCookie accepts a token and returns a byte slice suitable
+// for use as a cookie value, such that it will be decoded correctly
+// by LoadTokensFromHTTPRequest.
+var EncodeTokenCookie func([]byte) string = base64.URLEncoding.EncodeToString
+
+// DecodeTokenCookie accepts a cookie value and returns the encoded
+// token.
+var DecodeTokenCookie func(string) ([]byte, error) = base64.URLEncoding.DecodeString
+
 // LoadTokensFromHttpRequest loads all tokens it can find in the
 // headers and query string of an http query.
 func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
@@ -51,6 +61,8 @@ func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
 		a.Tokens = append(a.Tokens, val...)
 	}
 
+	a.loadTokenFromCookie(r)
+
 	// TODO: Load token from Rails session cookie (if Rails site
 	// secret is known)
 }
@@ -59,3 +71,15 @@ func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
 // LoadTokensFromHttpRequest() that [or how] we should read and parse
 // the request body. This has to be requested explicitly by the
 // application.
+
+func (a *Credentials) loadTokenFromCookie(r *http.Request) {
+	cookie, err := r.Cookie("api_token")
+	if err != nil || len(cookie.Value) == 0 {
+		return
+	}
+	token, err := DecodeTokenCookie(cookie.Value)
+	if err != nil {
+		return
+	}
+	a.Tokens = append(a.Tokens, string(token))
+}
diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index dbf4f5b..5914cd5 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -1,28 +1,158 @@
 // Keep-web provides read-only HTTP access to files stored in Keep. It
 // serves public data to anonymous and unauthenticated clients, and
-// accepts authentication via Arvados tokens. It can be installed
-// anywhere with access to Keep services, typically behind a web proxy
-// that provides SSL support.
+// serves private data to clients that supply Arvados API tokens. It
+// can be installed anywhere with access to Keep services, typically
+// behind a web proxy that supports TLS.
 //
-// Given that this amounts to a web hosting service for arbitrary
-// content, it is vital to ensure that at least one of the following is
-// true:
+// Starting the server
 //
-// Usage
-//
-// Listening:
+// Serve HTTP requests at port 1234 on all interfaces:
 //
 //   keep-web -address=:1234
 //
-// Start an HTTP server on port 1234.
+// Serve HTTP requests at port 1234 on the interface with IP address 1.2.3.4:
 //
 //   keep-web -address=1.2.3.4:1234
 //
-// Start an HTTP server on port 1234, on the interface with IP address 1.2.3.4.
+// Proxy configuration
 //
 // Keep-web does not support SSL natively. Typically, it is installed
 // behind a proxy like nginx.
 //
+// Here is an example nginx configuration.
+//
+//	http {
+//	  upstream keep-web {
+//	    server localhost:1234;
+//	  }
+//	  server {
+//	    listen *:443 ssl;
+//	    server_name dl.example.com *.dl.example.com ~.*--dl.example.com;
+//	    ssl_certificate /root/wildcard.example.com.crt;
+//	    ssl_certificate_key /root/wildcard.example.com.key;
+//	    location  / {
+//	      proxy_pass http://keep-web;
+//	      proxy_set_header Host $host;
+//	      proxy_set_header X-Forwarded-For $remote_addr;
+//	    }
+//	  }
+//	}
+//
+// It is not necessary to run keep-web on the same host as the nginx
+// proxy. However, TLS is not used between nginx and keep-web, so
+// intervening networks must be secured by other means.
+//
+// Download URLs
+//
+// The following "same origin" URL patterns are supported for public
+// collections (i.e., collections which can be served by keep-web
+// without making use of any credentials supplied by the client). See
+// "Same-origin mode" below.
+//
+//   http://dl.example.com/c=uuid_or_pdh/path/file.txt
+//   http://dl.example.com/c=uuid_or_pdh/path/t=TOKEN/file.txt
+//
+// The following "multiple origin" URL patterns are supported for all
+// collections:
+//
+//   http://uuid_or_pdh--dl.example.com/path/file.txt
+//   http://uuid_or_pdh--dl.example.com/t=/path/file.txt
+//   http://uuid_or_pdh--dl.example.com/t=TOKEN/path/file.txt
+//
+// In the "multiple origin" form, the string "--" can be replaced with
+// "." with identical results (assuming the upstream proxy is
+// configured accordingly). These two are equivalent:
+//
+//   http://uuid_or_pdh--dl.example.com/path/file.txt
+//   http://uuid_or_pdh.dl.example.com/path/file.txt
+//
+// The first form minimizes the cost and effort of deploying a
+// wildcard TLS certificate for *.dl.example.com. The second form is
+// likely to be easier to configure, and more efficient to run, on an
+// upstream proxy.
+//
+// In all of the above forms, the "dl.example.com" part can be
+// anything at all.
+//
+// In all of the above forms, the "uuid_or_pdh" part can be either a
+// collection UUID or a portable data hash with the "+" character
+// replaced by "-".
+//
+// Assuming there is a collection with UUID
+// zzzzz-4zz18-znfnqtbbv4spc3w and portable data hash
+// 1f4b0bc7583c2a7f9102c395f4ffc5e3+45, the following URLs are
+// interchangeable:
+//
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/t=/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/t=/foo
+//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--foo.example.com/foo
+//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--.invalid/foo
+//
+// Authorization mechanisms
+//
+// A token can be provided in an Authorization header:
+//
+//   Authorization: OAuth2 o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// A base64-encoded token can be provided in a cookie named "api_token":
+//
+//   Cookie: api_token=bzA3ajRweDdSbEpLNEN1TVlwN0MwTERUNEN6UjFKMXFCRTVBdm83ZUNjVWpPVGlreEs=
+//
+// A token can be provided in an URL-encoded query string:
+//
+//   GET /foo.txt?api_token=o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// A suitably encoded token can be provided in a POST body if the
+// request has a content type of application/x-www-form-urlencoded or
+// multipart/form-data:
+//
+//   POST /foo.txt
+//   Content-Type: application/x-www-form-urlencoded
+//   [...]
+//   api_token=o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// If a token is provided in a query string or in a POST request, the
+// response is an HTTP 303 redirect to an equivalent GET request, with
+// the token stripped from the query string and added to a cookie
+// instead.
+//
+// Compatibility
+//
+// Client-provided authorization tokens are ignored if the client does
+// not provide a Host header.
+//
+// In order to use the query string or a POST form authorization
+// mechanisms, the client must follow 303 redirects; the client must
+// accept cookies with a 303 response and send those cookies when
+// performing the redirect; and either the client or an intervening
+// proxy must resolve a relative URL ("//host/path") if given in a
+// response Location header.
+//
+// Intranet mode
+//
+// Normally, Keep-web accepts requests for multiple collections using
+// the same host name, provided the client's credentials are not being
+// used. This provides insufficient XSS protection in an installation
+// where the "anonymously accessible" data is not truly public, but
+// merely protected by network topology.
+//
+// In such cases -- for example, a site which is not reachable from
+// the internet, where some data is world-readable from Arvados's
+// perspective but is intended to be available only to users within
+// the local network -- the upstream proxy should configured to return
+// 401 for all paths beginning with "/c=".
+//
+// Same-origin mode
+//
+// Without the same-origin protection outlined above, a web page
+// stored in collection X could execute JavaScript code that uses the
+// current viewer's credentials to download additional data from
+// collection Y -- data which is accessible to the current viewer, but
+// not to the author of collection X -- from the same origin
+// (``https://dl.example.com/'') and upload it to some other site
+// chosen by the author of collection X.
+//
 package main
 
 // TODO(TC): Implement
@@ -31,7 +161,7 @@ package main
 //
 // Normally, Keep-web is installed using a wildcard DNS entry and a
 // wildcard HTTPS certificate, serving data from collection X at
-// ``https://X.dl.example.com/path/file.ext''.
+// ``https://X--dl.example.com/path/file.ext''.
 //
 // It will also serve publicly accessible data at
 // ``https://dl.example.com/collections/X/path/file.txt'', but it does not
@@ -48,10 +178,4 @@ package main
 //
 //   keep-web -trust-all-content [...]
 //
-// In the general case, this should not be enabled: A web page stored
-// in collection X can execute JavaScript code that uses the current
-// viewer's credentials to download additional data -- data which is
-// accessible to the current viewer, but not to the author of
-// collection X -- from the same origin (``https://dl.example.com/'')
-// and upload it to some other site chosen by the author of collection
-// X.
+// In the general case, this should not be enabled: 
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 04af920..03b3e26 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -2,11 +2,14 @@ package main
 
 import (
 	"fmt"
+	"html"
 	"io"
 	"mime"
 	"net/http"
+	"net/url"
 	"os"
 	"strings"
+	"time"
 
 	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
 	"git.curoverse.com/arvados.git/sdk/go/auth"
@@ -25,26 +28,49 @@ func init() {
 	anonymousTokens = []string{}
 }
 
+// return s if s is a UUID or a PDH, otherwise ""
+func parseCollectionIdFromDNSName(s string) string {
+	// Strip domain.
+	if i := strings.IndexRune(s, '.'); i >= 0 {
+		s = s[:i]
+	}
+	// Names like {uuid}--dl.example.com serve the same purpose as
+	// {uuid}.dl.example.com but can reduce cost/effort of using
+	// [additional] wildcard certificates.
+	if i := strings.Index(s, "--"); i >= 0 {
+		s = s[:i]
+	}
+	if !arvadosclient.UUIDMatch(s) && !arvadosclient.PDHMatch(s) {
+		return ""
+	}
+	return s
+}
+
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
-	var statusCode int
+	var statusCode = 0
 	var statusText string
 
 	w := httpserver.WrapResponseWriter(wOrig)
 	defer func() {
-		if statusCode > 0 {
-			if w.WroteStatus() == 0 {
-				w.WriteHeader(statusCode)
-			} else {
-				httpserver.Log(r.RemoteAddr, "WARNING",
-					fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
-			}
+		if statusCode == 0 {
+			statusCode = w.WroteStatus()
+		} else if w.WroteStatus() == 0 {
+			w.WriteHeader(statusCode)
+		} else if w.WroteStatus() != statusCode {
+			httpserver.Log(r.RemoteAddr, "WARNING",
+				fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
 		}
 		if statusText == "" {
 			statusText = http.StatusText(statusCode)
 		}
-		httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.URL.Path)
+		httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
 	}()
 
+	if r.Method != "GET" && r.Method != "POST" {
+		statusCode, statusText = http.StatusMethodNotAllowed, r.Method
+		return
+	}
+
 	arv := clientPool.Get()
 	if arv == nil {
 		statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
@@ -54,17 +80,70 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 
 	pathParts := strings.Split(r.URL.Path[1:], "/")
 
-	if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
-		statusCode = http.StatusNotFound
-		return
-	}
-
 	var targetId string
 	var targetPath []string
 	var tokens []string
 	var reqTokens []string
 	var pathToken bool
-	if len(pathParts) >= 5 && pathParts[1] == "download" {
+
+	if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
+		// "http://{id}.domain.example.com/{path}" form
+		if t := r.FormValue("api_token"); t != "" {
+			// ...with explicit token in query string or
+			// form in POST body. We must encrypt the
+			// token such that it can only be used for
+			// this collection; put it in an HttpOnly
+			// cookie; and redirect to the same URL with
+			// the query param redacted, and method =
+			// GET.
+			//
+			// The HttpOnly flag is necessary to prevent
+			// JavaScript code (included in, or loaded by,
+			// a page in the collection being served) from
+			// employing the user's token beyond reading
+			// other files in the same domain, i.e., same
+			// the collection.
+			//
+			// The 303 redirect is necessary in the case
+			// of a GET request to avoid exposing the
+			// token in the Location bar, and in the case
+			// of a POST request to avoid raising warnings
+			// when the user refreshes the resulting page.
+			http.SetCookie(w, &http.Cookie{
+				Name:    "api_token",
+				Value:   auth.EncodeTokenCookie([]byte(t)),
+				Path:    "/",
+				Expires: time.Now().AddDate(10,0,0),
+			})
+			redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
+
+			w.Header().Add("Location", redir)
+			statusCode, statusText = http.StatusSeeOther, redir
+			w.WriteHeader(statusCode)
+			io.WriteString(w, `<A href="`)
+			io.WriteString(w, html.EscapeString(redir))
+			io.WriteString(w, `">Continue</A>`)
+			return
+		} else if strings.HasPrefix(pathParts[0], "t=") {
+			// ...with explicit token in path,
+			// "{...}.com/t={token}/{path}".  This form
+			// must only be used to pass scoped tokens
+			// that give permission for a single
+			// collection. See FormValue case above.
+			tokens = []string{pathParts[0][2:]}
+			targetPath = pathParts[1:]
+			pathToken = true
+		} else {
+			// ...with cookie, Authorization header, or
+			// no token at all
+			reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+			tokens = append(reqTokens, anonymousTokens...)
+			targetPath = pathParts
+		}
+	} else if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
+		statusCode = http.StatusNotFound
+		return
+	} else if len(pathParts) >= 5 && pathParts[1] == "download" {
 		// "/collections/download/{id}/{token}/path..." form:
 		// Don't use our configured anonymous tokens,
 		// Authorization headers, etc.  Just use the token in
@@ -86,7 +165,6 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	found := false
 	for _, arv.ApiToken = range tokens {
 		err := arv.Get("collections", targetId, nil, &collection)
-		httpserver.Log(err)
 		if err == nil {
 			// Success
 			found = true
@@ -131,8 +209,8 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		// someone trying (anonymously) to download public
 		// data that has been deleted.  Allow a referrer to
 		// provide this context somehow?
-		statusCode = http.StatusUnauthorized
 		w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"")
+		statusCode = http.StatusUnauthorized
 		return
 	}
 
@@ -162,6 +240,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		}
 	}
 
+	w.WriteHeader(http.StatusOK)
 	_, err = io.Copy(w, rdr)
 	if err != nil {
 		statusCode, statusText = http.StatusBadGateway, err.Error()
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
new file mode 100644
index 0000000..a1f5e1a
--- /dev/null
+++ b/services/keep-web/handler_test.go
@@ -0,0 +1,218 @@
+package main
+
+import (
+	"html"
+	"io/ioutil"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"regexp"
+	"strings"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	"git.curoverse.com/arvados.git/sdk/go/auth"
+	check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&UnitSuite{})
+
+type UnitSuite struct {}
+
+func mustParseURL(s string) *url.URL {
+	r, err := url.Parse(s)
+	if err != nil {
+		panic("parse URL: " + s)
+	}
+	return r
+}
+
+func (s *IntegrationSuite) TestVhost404(c *check.C) {
+	for _, testURL := range []string{
+		arvadostest.NonexistentCollection + ".example.com/theperthcountyconspiracy",
+		arvadostest.NonexistentCollection + ".example.com/t=" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
+	} {
+		resp := httptest.NewRecorder()
+		req := &http.Request{
+			Method: "GET",
+			URL: mustParseURL(testURL),
+		}
+		(&handler{}).ServeHTTP(resp, req)
+		c.Check(resp.Code, check.Equals, http.StatusNotFound)
+		c.Check(resp.Body.String(), check.Equals, "")
+	}
+}
+
+type authorizer func(*http.Request, string) int
+
+func (s *IntegrationSuite) TestVhostViaAuthzHeader(c *check.C) {
+	doVhostRequests(c, authzViaAuthzHeader)
+}
+func authzViaAuthzHeader(r *http.Request, tok string) int {
+	r.Header.Add("Authorization", "OAuth2 " + tok)
+	return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaCookieValue(c *check.C) {
+	doVhostRequests(c, authzViaCookieValue)
+}
+func authzViaCookieValue(r *http.Request, tok string) int {
+	r.AddCookie(&http.Cookie{
+		Name: "api_token",
+		Value: auth.EncodeTokenCookie([]byte(tok)),
+	})
+	return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaPath(c *check.C) {
+	doVhostRequests(c, authzViaPath)
+}
+func authzViaPath(r *http.Request, tok string) int {
+	r.URL.Path = "/t=" + tok + r.URL.Path
+	return http.StatusNotFound
+}
+
+func (s *IntegrationSuite) TestVhostViaQueryString(c *check.C) {
+	doVhostRequests(c, authzViaQueryString)
+}
+func authzViaQueryString(r *http.Request, tok string) int {
+	r.URL.RawQuery = "api_token=" + tok
+	return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaPOST(c *check.C) {
+	doVhostRequests(c, authzViaPOST)
+}
+func authzViaPOST(r *http.Request, tok string) int {
+	r.Method = "POST"
+	r.Header.Add("Content-Type", "application/x-www-form-urlencoded")
+	r.Body = ioutil.NopCloser(strings.NewReader(
+		url.Values{"api_token": {tok}}.Encode()))
+	return http.StatusUnauthorized
+}
+
+// Try some combinations of {url, token} using the given authorization
+// mechanism, and verify the result is correct.
+func doVhostRequests(c *check.C, authz authorizer) {
+	hostPath := arvadostest.FooCollection + ".example.com/foo"
+	for _, tok := range []string{
+		arvadostest.ActiveToken,
+		arvadostest.ActiveToken[:15],
+		arvadostest.SpectatorToken,
+		"bogus",
+		"",
+	} {
+		u := mustParseURL("http://" + hostPath)
+		req := &http.Request{
+			Method: "GET",
+			Host: u.Host,
+			URL: u,
+			Header: http.Header{},
+		}
+		failCode := authz(req, tok)
+		resp := doReq(req)
+		code, body := resp.Code, resp.Body.String()
+		if tok == arvadostest.ActiveToken {
+			c.Check(code, check.Equals, http.StatusOK)
+			c.Check(body, check.Equals, "foo")
+		} else {
+			c.Check(code >= 400, check.Equals, true)
+			c.Check(code < 500, check.Equals, true)
+			if tok == arvadostest.SpectatorToken {
+				// Valid token never offers to retry
+				// with different credentials.
+				c.Check(code, check.Equals, http.StatusNotFound)
+			} else {
+				// Invalid token can ask to retry
+				// depending on the authz method.
+				c.Check(code, check.Equals, failCode)
+			}
+			c.Check(body, check.Equals, "")
+		}
+	}
+}
+
+func doReq(req *http.Request) *httptest.ResponseRecorder {
+	resp := httptest.NewRecorder()
+	(&handler{}).ServeHTTP(resp, req)
+	if resp.Code != http.StatusSeeOther {
+		return resp
+	}
+	cookies := (&http.Response{Header: resp.Header()}).Cookies()
+	u, _ := req.URL.Parse(resp.Header().Get("Location"))
+	req = &http.Request{
+		Method: "GET",
+		Host: u.Host,
+		URL: u,
+		Header: http.Header{},
+	}
+	for _, c := range cookies {
+		req.AddCookie(c)
+	}
+	return doReq(req)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
+	s.testVhostRedirectTokenToCookie(c, "GET",
+		arvadostest.FooCollection + ".example.com/foo",
+		"?api_token=" + arvadostest.ActiveToken,
+		"text/plain",
+		"",
+		http.StatusOK,
+	)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
+	s.testVhostRedirectTokenToCookie(c, "POST",
+		arvadostest.FooCollection + ".example.com/foo",
+		"",
+		"application/x-www-form-urlencoded",
+		url.Values{"api_token": {arvadostest.ActiveToken}}.Encode(),
+		http.StatusOK,
+	)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C) {
+	s.testVhostRedirectTokenToCookie(c, "POST",
+		arvadostest.FooCollection + ".example.com/foo",
+		"",
+		"application/x-www-form-urlencoded",
+		url.Values{"api_token": {arvadostest.SpectatorToken}}.Encode(),
+		http.StatusNotFound,
+	)
+}
+
+func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString, contentType, body string, expectStatus int) {
+	u, _ := url.Parse(`http://` + hostPath + queryString)
+	req := &http.Request{
+		Method: method,
+		Host: u.Host,
+		URL: u,
+		Header: http.Header{"Content-Type": {contentType}},
+		Body: ioutil.NopCloser(strings.NewReader(body)),
+	}
+
+	resp := httptest.NewRecorder()
+	(&handler{}).ServeHTTP(resp, req)
+	c.Assert(resp.Code, check.Equals, http.StatusSeeOther)
+	c.Check(resp.Body.String(), check.Matches, `.*href="//` + regexp.QuoteMeta(html.EscapeString(hostPath)) + `".*`)
+	cookies := (&http.Response{Header: resp.Header()}).Cookies()
+
+	u, _ = u.Parse(resp.Header().Get("Location"))
+	req = &http.Request{
+		Method: "GET",
+		Host: u.Host,
+		URL: u,
+		Header: http.Header{},
+	}
+	for _, c := range cookies {
+		req.AddCookie(c)
+	}
+
+	resp = httptest.NewRecorder()
+	(&handler{}).ServeHTTP(resp, req)
+	c.Check(resp.Header().Get("Location"), check.Equals, "")
+	c.Check(resp.Code, check.Equals, expectStatus)
+	if expectStatus == http.StatusOK {
+		c.Check(resp.Body.String(), check.Equals, "foo")
+	}
+}

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list