[ARVADOS] updated: 3d8314faaf74da71b75cbb4b530b5c3ed4a2532a

git at public.curoverse.com git at public.curoverse.com
Fri Oct 16 15:50:33 EDT 2015


Summary of changes:
 apps/workbench/test/integration/download_test.rb   |   7 +
 ...nstall-manual-prerequisites.html.textile.liquid |   3 +-
 doc/sdk/cli/subcommands.html.textile.liquid        |  14 +
 sdk/cli/bin/arv                                    | 104 ++++--
 sdk/cli/test/test_arv-get.rb                       | 328 +++++++---------
 .../test/{test_arv-get.rb => test_arv-keep-get.rb} |   2 +-
 .../test/{test_arv-put.rb => test_arv-keep-put.rb} |   2 +-
 sdk/go/arvadostest/run_servers.go                  |  25 +-
 sdk/go/keepclient/keepclient.go                    | 137 ++++---
 sdk/go/keepclient/keepclient_test.go               | 150 ++++++--
 sdk/go/keepclient/support.go                       |  63 ++--
 sdk/go/streamer/streamer_test.go                   |   1 +
 sdk/go/streamer/transfer.go                        |   4 +-
 sdk/python/tests/run_test_server.py                |  20 +-
 services/api/Gemfile                               |   2 +-
 services/api/Gemfile.lock                          |   8 +-
 services/keepproxy/keepproxy.go                    |   1 -
 services/keepstore/azure_blob_volume.go            |  35 +-
 services/keepstore/azure_blob_volume_test.go       |   8 +-
 services/keepstore/volume_generic_test.go          |   7 +
 services/keepstore/volume_unix.go                  |   4 +
 tools/keep-exercise/.gitignore                     |   1 +
 tools/keep-exercise/keep-exercise.go               | 157 ++++++++
 tools/keep-rsync/.gitignore                        |   1 +
 tools/keep-rsync/keep-rsync.go                     | 288 ++++++++++++++
 tools/keep-rsync/keep-rsync_test.go                | 414 +++++++++++++++++++++
 26 files changed, 1440 insertions(+), 346 deletions(-)
 copy sdk/cli/test/{test_arv-get.rb => test_arv-keep-get.rb} (99%)
 rename sdk/cli/test/{test_arv-put.rb => test_arv-keep-put.rb} (99%)
 create mode 100644 tools/keep-exercise/.gitignore
 create mode 100644 tools/keep-exercise/keep-exercise.go
 create mode 100644 tools/keep-rsync/.gitignore
 create mode 100644 tools/keep-rsync/keep-rsync.go
 create mode 100644 tools/keep-rsync/keep-rsync_test.go

  discards  cddcee0f27a7f27d4a7c1bc58a6d1b384b83ec53 (commit)
  discards  34a97b7577fec489eb9e866ccc9c2f5df709b52e (commit)
  discards  0381cd5f601ac996e47a0e9a6887926b3222f85f (commit)
  discards  acd0ee16f6ebf6341dc1df5ba37c9896f720dcb8 (commit)
  discards  670fa72d01d23fc2d10c7ad61dab961a49e1772d (commit)
  discards  e6616df2a2d933fd5fd5ab63124d6ad254c0ef56 (commit)
  discards  f1a1faf919edce78261d0ac252758d1bf01d69e3 (commit)
  discards  8ad914c4d6976d4d862300514d65d555126481ca (commit)
  discards  391fbe89b56b718f674822534c34ff80aa107576 (commit)
  discards  796a00b5887121e462f8a82391bac125120c2841 (commit)
  discards  a5741dcc18ae0862b220ed08d4db136cfa979ec5 (commit)
  discards  eeda48bc31d5cd03d3a72becaaac4d643c9de46d (commit)
  discards  5ba24fbe0b0529bbbb5bda2790b61c35ef256469 (commit)
  discards  7796b112345b302ff6108ff761ff0d3c871bf888 (commit)
  discards  8bb5dcb75f10e8128e2b4b5b741a96e781174997 (commit)
  discards  e0aa44d1f555056ba1dc8e866cf6ae50494cb3e2 (commit)
  discards  f04287b86a8d4e8b74cf8d894ef7ce5420364f7a (commit)
  discards  91062e85e93d0d0ae557a478f1f83b133d4d2080 (commit)
  discards  b9d5acacf4ac027867e388040221101b73dbf118 (commit)
  discards  8146d4cc8c3d52f548f6af66de26f30881ebec39 (commit)
  discards  2a54340956104d689ab52f60d82af5555a103919 (commit)
  discards  967632c79df826ab16a0449ff63b0b9f6d35c599 (commit)
  discards  d1289b1021929fbbba35027f70a33e05cb594bb3 (commit)
  discards  d37ebe600d3984c821386c347f33c59ef3355e03 (commit)
  discards  ff7759fbe3aab8f814a41437cd6e97b80fe56d8d (commit)
  discards  fc7c9e1ad6345888595205c0978c58a6710d7446 (commit)
  discards  e3778ef533f50b0492eef80bb2525a7a09628c32 (commit)
       via  3d8314faaf74da71b75cbb4b530b5c3ed4a2532a (commit)
       via  452179c1862b985fd4512fe472bdea5d7060eef6 (commit)
       via  cdd6a89b654e3eb530793cc8a552f452fc359a92 (commit)
       via  fbeaa254898528f11ba3e54edfd47d99bc534ba4 (commit)
       via  d93cabc080f1bbba53b7fde50b96d14f6b900143 (commit)
       via  7d0dfbb31312c2f22d698572d38216015f885ea8 (commit)
       via  1367c2b1f4771dd3d877df5a943c9a859636da7d (commit)
       via  1dba563361d3d9dce369d336d3c0d8ce48b9a819 (commit)
       via  d36d775dd76467129a0c9dbd93878d5b6b583040 (commit)
       via  1a6dbfaaea9520accfa8ca401887be61ed884420 (commit)
       via  c5566fdadf1bb466b6eb2d5133445887520aae63 (commit)
       via  432fff73232a661701552550e0cb4eec10ad791c (commit)
       via  bb1d507519384faec1344b96d250d6e1cf3e0f36 (commit)
       via  11633603c279de5904b07d0d05a47a6bd2897f78 (commit)
       via  a9b1a3fb8bcfa92735a1f70c1e982d75ae325fa4 (commit)
       via  9b712695237b01dcf19aba4a403f1beedd2485b8 (commit)
       via  1fd3a57b1a4a91e70478282b882f6da55ccb43f0 (commit)
       via  f60d6b260c77c03db86c3aaaeab13b863e18d9cf (commit)
       via  5cde00c12a33398eda11e069aa5ba8b89419d72f (commit)
       via  12c77e30a5d36fc24f6897944914248819b2b3f4 (commit)
       via  3d5693f7261a52cfa6eca54f054a43e7f5d049f9 (commit)
       via  b65d8b9008c4d0e6b5816d21bf6f1ae81167ee56 (commit)
       via  df9cb8ad02aaee8045cb31e207fd9c6a13c01684 (commit)
       via  96ce48a816ce1857e1ca3d035b3ab9002b7bc4c4 (commit)
       via  8841e6b3fb247d3b3342379c86c0231d246731ff (commit)
       via  0e7e5ba38ac7e86e0e398df063f612d84927692a (commit)
       via  807e4cd4abee760736fa32704785d673b0e908cd (commit)
       via  494de455828983fe827c96cf425fac6492618308 (commit)
       via  ce30948199736d45112ee9103642c22f59f84997 (commit)
       via  86087cc5ea76bd498d9f615fc314c14c08e721b9 (commit)
       via  e88a0a3ff00fd5448f893909d08ee11dc301776e (commit)
       via  db868a3c02cb84f068de77b438b4f2cb498826ae (commit)
       via  22928daa55bc0de7d539793ff27503affac8a39a (commit)
       via  fc17d3c31cc2074a63f6db8698cfcc00a85ec449 (commit)
       via  c262d3c07f5e8025d51646877c86315996022bae (commit)
       via  3e0f54e6f4d0be807d3e3a0f2b6a49cfef0c6a5e (commit)
       via  c7e90eb35082a139d4f3f254efc538516fefaffa (commit)
       via  6445c6a979f54a9dfea782077cc39b62a6901416 (commit)
       via  e02d4d685419b759d904bb265b3f24e2061012b2 (commit)
       via  3bb46b45d572286721530aea8dc1ec068cf8ffea (commit)
       via  2d02430766b915675ce09899e554cd48e6dba036 (commit)
       via  f19e50e99dc4939199a9b9a4381a775d40453267 (commit)
       via  1f37045c831a45c6899ef0d44f3cccd15476831a (commit)
       via  a50278e3d0e26bb5d513d0af5da2fb559b112388 (commit)
       via  3ce19b04f941270fc46bcfe739a215c3feab161d (commit)
       via  845f7b9048d4b674a05e04fe919f2eb37479b1a0 (commit)
       via  b212abf2074b52057f9c1dbf8ad9cef77b2ca8ce (commit)
       via  ef42f0396f5b51fb8f87b2f7e605d50f32f256b4 (commit)
       via  7072110da82050af324cfb7a511f65a543a062d1 (commit)
       via  94788532e822ef26b8c9eac7818f03e3a94fb124 (commit)
       via  07b382c82d7d834e801cf9dc85e2ed5ffcd7cd91 (commit)
       via  bc816b50fc16182fef2f5d17ffd61578432e83c3 (commit)
       via  6141682d816511814fb59fae4cdb5cf6090e735f (commit)
       via  f1ac845d8fdaa73ef8298ffdc1a6f82a515fb2bf (commit)
       via  f553dd496de6b30bf7263424c17e4be4452b593e (commit)
       via  20135b59855216b0ccd8154795e359c4bd11d456 (commit)
       via  84982dcc65c1da979080453e3f5460f0c2c2d67a (commit)
       via  d1e55b0176e20451df3ad11f7a0c24c0fe666738 (commit)
       via  70c7c9f19692ea7275541006e720c3ea56ba32e1 (commit)
       via  9fed03f52ee563ef537222b0e21958a33436406c (commit)
       via  37d19a49d22a7d0ae7b4848135fb5df5128d4847 (commit)
       via  38354fdf55e0895378f87e6d6bff62b7eb377a25 (commit)
       via  fba6fa53d377c6704849a2bdb731be9a0eec3aa2 (commit)
       via  e1492d82f4326cc58531b92844118b987575846e (commit)
       via  5a7951a1684382e5bdb04d4e8ef945a6ced19cee (commit)
       via  1841871773da24d4a743bf9083e1fa461a510125 (commit)
       via  cef1d949ecc75417a3575be79f683b2b0048953f (commit)
       via  1996b03c10e45d4c1959b40333c57261a040bffb (commit)
       via  c4f5281d2453faf3184c367f54b29bb77ecf533b (commit)
       via  3596b89edfbea42ce52d6c7112e4e18797895423 (commit)
       via  8d4e8098591e5545a66670e60911599d5e0903ac (commit)
       via  d3c7b9502d9297d71225ebdbb81616f7613348b5 (commit)
       via  8653e6d34f5d895ee56fe3657c7fc3ca3cf615f4 (commit)
       via  db728a63e741c61f93a200129997c74fdc3065b7 (commit)
       via  2f3634990e8c2d9b374c99d7432a188f43175a35 (commit)
       via  5790a95b32d3a2268feea07d5ae580e28235bd4c (commit)
       via  84de88800275db7046e3c784ff73d4af1a6f03b1 (commit)
       via  e531e2bb20a6ad0acaef4c7a19fb7625f94ccc0c (commit)
       via  dfc31c02280518a4040a7168326b5fbfe742ed28 (commit)
       via  ac2426ef6ba7c2c722da0f7b0add2c5040529bd3 (commit)
       via  fb9e39f948b75cbae8de76359f589a9f33ebd1ac (commit)
       via  9cb175a358a7ac410901827d5f7de6a5aabe52bf (commit)
       via  0fa9a01d6fb567fa6e2f72cae3596da2c12c35fd (commit)
       via  0bae20041e6dd78b086ba1354b8de9aee4e6baed (commit)
       via  0384dc6b233e42537dc4e16e7de7283a2ef94897 (commit)
       via  96dab84d9f1cd7d590f2c119ed6e484c8ed4888c (commit)
       via  d7712854004c2136b86f69617552559c93caf600 (commit)
       via  8ff8be6aa9a21f9fe0d9f7398e2efb62da70df64 (commit)
       via  a717f156747bb997bd9e9cefe1919c8e53c28c4b (commit)
       via  e1dbdc4b39eb8c75c088f971cee0e7bad92b2848 (commit)
       via  841bd266812983c8edd98a480b116c50000f47b9 (commit)
       via  11e3caaba692bd76d2d12b0c0e1d8e531d1a0910 (commit)
       via  99ca2640c855e88c7b08c3509b21be9e160ccac8 (commit)
       via  956c3a765e3794a291376cbb5a2ee9ec6ef4560b (commit)
       via  bd1580a11a377270a5a7eed5abc8a6dfe6f9547d (commit)
       via  290247250e0c5702a341f98dfdc380e29f1b45c7 (commit)
       via  f3b5ffc2a4409d4c1b676e40ba4e582ca2beeb61 (commit)
       via  6823f2d65a48bc989b819b85b6428c230b223c82 (commit)
       via  cb48eb95d516b1d4a1ffe18be34703005531117a (commit)
       via  86df40e33e586ccb4dc506e00f773392d454804c (commit)
       via  16082e400046005f0b785ff4c63eda5801258415 (commit)
       via  dd4d5ce9b5bbcf0ec18698f604a4d666f37fb9b9 (commit)
       via  e738f6d9d628d01ed2e6ff9800979bb27cfc44f5 (commit)
       via  76bb593e5b16879a5efc85515cb47e179b31601f (commit)
       via  1b5e6506170075e5c01dda7489d814dd4c3f7774 (commit)
       via  59ee5d1b20c9d4b06c194b33b781d353d8e0ba3c (commit)
       via  7e6304bf646aaeb38196d24ce98c8bafc212e144 (commit)
       via  9e845fe4a5ff18f66c3850b25aae5f77680ac7cd (commit)
       via  1a4b37d333d312a97c7979b1203c6246a28e601b (commit)
       via  fd0bb03ede4bf503fc599f9bac468adb620b2142 (commit)
       via  044804a54376e69baccb23a241f7e7bffd7b529b (commit)
       via  ba0d6a92c642ca86dae5ab807b3ad7fe04e335d2 (commit)
       via  be9b62b5186bd9aa961f08f66bc104200acf760a (commit)
       via  489caef97d5185d91e7441980125480f98428c79 (commit)
       via  80572f8164b46079617f0248b1372cf860ca9957 (commit)
       via  89e2ddbacad459856fc15f56bfa2b5c036b090a1 (commit)
       via  5ba12c5e842c926a38fad477d890ccbf9e96278d (commit)
       via  45f10d80d1b584808a6e375214b5be6bc7d2a730 (commit)
       via  8e2ad737e429bc263620859468da3835708258e2 (commit)
       via  ae0f6b92c3c302757ff7e81239bd7b41a4430e70 (commit)
       via  df9e166a5ffc4aa79658bec1a5d552a3b413f0d8 (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (cddcee0f27a7f27d4a7c1bc58a6d1b384b83ec53)
            \
             N -- N -- N (3d8314faaf74da71b75cbb4b530b5c3ed4a2532a)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 3d8314faaf74da71b75cbb4b530b5c3ed4a2532a
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Oct 14 04:07:37 2015 -0400

    5824: Update bundle

diff --git a/apps/workbench/Gemfile.lock b/apps/workbench/Gemfile.lock
index 20b8d61..8b2118c 100644
--- a/apps/workbench/Gemfile.lock
+++ b/apps/workbench/Gemfile.lock
@@ -74,7 +74,7 @@ GEM
       rack (>= 1.0.0)
       rack-test (>= 0.5.4)
       xpath (~> 2.0)
-    childprocess (0.5.5)
+    childprocess (0.5.6)
       ffi (~> 1.0, >= 1.0.11)
     cliver (0.3.2)
     coffee-rails (4.1.0)
@@ -98,7 +98,7 @@ GEM
     fast_stack (0.1.0)
       rake
       rake-compiler
-    ffi (1.9.6)
+    ffi (1.9.10)
     flamegraph (0.1.0)
       fast_stack
     google-api-client (0.6.4)
@@ -139,7 +139,7 @@ GEM
       metaclass (~> 0.0.1)
     morrisjs-rails (0.5.1)
       railties (> 3.1, < 5)
-    multi_json (1.11.1)
+    multi_json (1.11.2)
     multipart-post (1.2.0)
     net-scp (1.2.1)
       net-ssh (>= 2.6.5)
@@ -192,7 +192,7 @@ GEM
     ref (1.0.5)
     ruby-debug-passenger (0.2.0)
     ruby-prof (0.15.2)
-    rubyzip (1.1.6)
+    rubyzip (1.1.7)
     rvm-capistrano (1.5.5)
       capistrano (~> 2.15.4)
     sass (3.4.9)
@@ -202,7 +202,7 @@ GEM
       sprockets (>= 2.8, < 4.0)
       sprockets-rails (>= 2.0, < 4.0)
       tilt (~> 1.1)
-    selenium-webdriver (2.44.0)
+    selenium-webdriver (2.48.1)
       childprocess (~> 0.5)
       multi_json (~> 1.0)
       rubyzip (~> 1.0)
@@ -239,7 +239,7 @@ GEM
       execjs (>= 0.3.0)
       json (>= 1.8.0)
     uuidtools (2.1.5)
-    websocket (1.2.1)
+    websocket (1.2.2)
     websocket-driver (0.5.1)
       websocket-extensions (>= 0.1.0)
     websocket-extensions (0.1.1)
@@ -294,3 +294,6 @@ DEPENDENCIES
   therubyracer
   uglifier (>= 1.0.3)
   wiselinks
+
+BUNDLED WITH
+   1.10.6

commit 452179c1862b985fd4512fe472bdea5d7060eef6
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Oct 13 10:52:06 2015 -0400

    5824: Use keep-web in Workbench integration tests

diff --git a/apps/workbench/test/helpers/download_helper.rb b/apps/workbench/test/helpers/download_helper.rb
new file mode 100644
index 0000000..21fb4cd
--- /dev/null
+++ b/apps/workbench/test/helpers/download_helper.rb
@@ -0,0 +1,21 @@
+module DownloadHelper
+  module_function
+
+  def path
+    Rails.root.join 'tmp', 'downloads'
+  end
+
+  def clear
+    FileUtils.rm_f path
+    begin
+      Dir.mkdir path
+    rescue Errno::EEXIST
+    end
+  end
+
+  def done
+    Dir[path.join '*'].reject do |f|
+      /\.part$/ =~ f
+    end
+  end
+end
diff --git a/apps/workbench/test/integration/collection_upload_test.rb b/apps/workbench/test/integration/collection_upload_test.rb
index 62efee4..5e407ce 100644
--- a/apps/workbench/test/integration/collection_upload_test.rb
+++ b/apps/workbench/test/integration/collection_upload_test.rb
@@ -7,9 +7,19 @@ class CollectionUploadTest < ActionDispatch::IntegrationTest
         io.write content
       end
     end
+    # Database reset doesn't restore KeepServices; we have to
+    # save/restore manually.
+    use_token :admin do
+      @keep_services = KeepService.all.to_a
+    end
   end
 
   teardown do
+    use_token :admin do
+      @keep_services.each do |ks|
+        KeepService.find(ks.uuid).update_attributes(ks.attributes)
+      end
+    end
     testfiles.each do |filename, _|
       File.unlink(testfile_path filename)
     end
@@ -64,10 +74,9 @@ class CollectionUploadTest < ActionDispatch::IntegrationTest
   test "Report mixed-content error" do
     skip 'Test suite does not use TLS'
     need_selenium "to make file uploads work"
-    begin
-      use_token :admin
-      proxy = KeepService.find(api_fixture('keep_services')['proxy']['uuid'])
-      proxy.update_attributes service_ssl_flag: false
+    use_token :admin do
+      KeepService.where(service_type: 'proxy').first.
+        update_attributes(service_ssl_flag: false)
     end
     visit page_with_token 'active', sandbox_path
     find('.nav-tabs a', text: 'Upload').click
@@ -82,11 +91,12 @@ class CollectionUploadTest < ActionDispatch::IntegrationTest
 
   test "Report network error" do
     need_selenium "to make file uploads work"
-    begin
-      use_token :admin
-      proxy = KeepService.find(api_fixture('keep_services')['proxy']['uuid'])
-      # Even if you somehow do port>2^16, surely nx.example.net won't respond
-      proxy.update_attributes service_host: 'nx.example.net', service_port: 99999
+    use_token :admin do
+      # Even if you somehow do port>2^16, surely nx.example.net won't
+      # respond
+      KeepService.where(service_type: 'proxy').first.
+        update_attributes(service_host: 'nx.example.net',
+                          service_port: 99999)
     end
     visit page_with_token 'active', sandbox_path
     find('.nav-tabs a', text: 'Upload').click
diff --git a/apps/workbench/test/integration/download_test.rb b/apps/workbench/test/integration/download_test.rb
new file mode 100644
index 0000000..9e4fd56
--- /dev/null
+++ b/apps/workbench/test/integration/download_test.rb
@@ -0,0 +1,45 @@
+require 'integration_helper'
+require 'helpers/download_helper'
+
+class DownloadTest < ActionDispatch::IntegrationTest
+  setup do
+    portfile = File.expand_path '../../../../../tmp/keep-web-ssl.port', __FILE__
+    @kwport = File.read portfile
+    Rails.configuration.keep_web_url = "https://localhost:#{@kwport}/c=%{uuid_or_pdh}"
+    CollectionsController.any_instance.expects(:file_enumerator).never
+
+    # Make sure Capybara can download files.
+    need_selenium 'for downloading', :selenium_with_download
+    DownloadHelper.clear
+
+    # Keep data isn't populated by fixtures, so we have to write any
+    # data we expect to read.
+    unless /^acbd/ =~ `echo -n foo | arv-put --no-progress --raw -` && $?.success?
+      raise $?.to_s
+    end
+  end
+
+  test "download from keep-web with a reader token" do
+    uuid = api_fixture('collections')['foo_file']['uuid']
+    token = api_fixture('api_client_authorizations')['active_all_collections']['api_token']
+    visit "/collections/download/#{uuid}/#{token}/"
+    within "#collection_files" do
+      click_link "foo"
+    end
+    data = nil
+    tries = 0
+    while tries < 20
+      sleep 0.1
+      tries += 1
+      data = File.read(DownloadHelper.path.join 'foo') rescue nil
+    end
+    assert_equal 'foo', data
+  end
+
+  # TODO(TC): test "view pages hosted by keep-web, using session
+  # token". We might persuade selenium to send
+  # "collection-uuid.dl.example" requests to localhost by configuring
+  # our test nginx server to work as its forward proxy. Until then,
+  # we're relying on the "Redirect to keep_web_url via #{id_type}"
+  # test in CollectionsControllerTest (and keep-web's tests).
+end
diff --git a/apps/workbench/test/integration_helper.rb b/apps/workbench/test/integration_helper.rb
index 39fdf4b..5750a1b 100644
--- a/apps/workbench/test/integration_helper.rb
+++ b/apps/workbench/test/integration_helper.rb
@@ -19,6 +19,17 @@ Capybara.register_driver :poltergeist_without_file_api do |app|
   Capybara::Poltergeist::Driver.new app, POLTERGEIST_OPTS.merge(extensions: [js])
 end
 
+Capybara.register_driver :selenium_with_download do |app|
+  profile = Selenium::WebDriver::Firefox::Profile.new
+  profile['browser.download.dir'] = DownloadHelper.path.to_s
+  profile['browser.download.downloadDir'] = DownloadHelper.path.to_s
+  profile['browser.download.defaultFolder'] = DownloadHelper.path.to_s
+  profile['browser.download.folderList'] = 2 # "save to user-defined location"
+  profile['browser.download.manager.showWhenStarting'] = false
+  profile['browser.helperApps.alwaysAsk.force'] = false
+  Capybara::Selenium::Driver.new app, profile: profile
+end
+
 module WaitForAjax
   Capybara.default_wait_time = 5
   def wait_for_ajax
@@ -73,8 +84,8 @@ module HeadlessHelper
     end
   end
 
-  def need_selenium reason=nil
-    Capybara.current_driver = :selenium
+  def need_selenium reason=nil, driver=:selenium
+    Capybara.current_driver = driver
     unless ENV['ARVADOS_TEST_HEADFUL'] or @headless
       @headless = HeadlessSingleton.get
       @headless.start
diff --git a/apps/workbench/test/test_helper.rb b/apps/workbench/test/test_helper.rb
index 89d15c6..41592af 100644
--- a/apps/workbench/test/test_helper.rb
+++ b/apps/workbench/test/test_helper.rb
@@ -176,7 +176,10 @@ class ApiServerForTests
       # though it doesn't need to start up a new server).
       env_script = check_output %w(python ./run_test_server.py start --auth admin)
       check_output %w(python ./run_test_server.py start_arv-git-httpd)
+      check_output %w(python ./run_test_server.py start_keep-web)
       check_output %w(python ./run_test_server.py start_nginx)
+      # This one isn't a no-op, even under run-tests.sh.
+      check_output %w(python ./run_test_server.py start_keep)
     end
     test_env = {}
     env_script.each_line do |line|
@@ -192,9 +195,11 @@ class ApiServerForTests
 
   def stop_test_server
     Dir.chdir PYTHON_TESTS_DIR do
+      check_output %w(python ./run_test_server.py stop_keep)
       # These are no-ops if we're running within run-tests.sh
       check_output %w(python ./run_test_server.py stop_nginx)
       check_output %w(python ./run_test_server.py stop_arv-git-httpd)
+      check_output %w(python ./run_test_server.py stop_keep-web)
       check_output %w(python ./run_test_server.py stop)
     end
     @@server_is_running = false
diff --git a/sdk/python/tests/nginx.conf b/sdk/python/tests/nginx.conf
index 6196605..885f84e 100644
--- a/sdk/python/tests/nginx.conf
+++ b/sdk/python/tests/nginx.conf
@@ -28,4 +28,18 @@ http {
       proxy_pass http://keepproxy;
     }
   }
+  upstream keep-web {
+    server localhost:{{KEEPWEBPORT}};
+  }
+  server {
+    listen *:{{KEEPWEBSSLPORT}} ssl default_server;
+    server_name ~^(?<request_host>.*)$;
+    ssl_certificate {{SSLCERT}};
+    ssl_certificate_key {{SSLKEY}};
+    location  / {
+      proxy_pass http://keep-web;
+      proxy_set_header Host $request_host:{{KEEPWEBPORT}};
+      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    }
+  }
 }
diff --git a/sdk/python/tests/run_test_server.py b/sdk/python/tests/run_test_server.py
index d90d2ad..809cf40 100644
--- a/sdk/python/tests/run_test_server.py
+++ b/sdk/python/tests/run_test_server.py
@@ -345,7 +345,7 @@ def run_keep(blob_signing_key=None, enforce_permissions=False, num_servers=2):
         token=os.environ['ARVADOS_API_TOKEN'],
         insecure=True)
 
-    for d in api.keep_services().list().execute()['items']:
+    for d in api.keep_services().list(filters=[['service_type','=','disk']]).execute()['items']:
         api.keep_services().delete(uuid=d['uuid']).execute()
     for d in api.keep_disks().list().execute()['items']:
         api.keep_disks().delete(uuid=d['uuid']).execute()
@@ -438,10 +438,35 @@ def stop_arv_git_httpd():
         return
     kill_server_pid(_pidfile('arv-git-httpd'), wait=0)
 
+def run_keep_web():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    stop_keep_web()
+
+    keepwebport = find_available_port()
+    env = os.environ.copy()
+    env.pop('ARVADOS_API_TOKEN', None)
+    keepweb = subprocess.Popen(
+        ['keep-web',
+         '-attachment-only-host=localhost:'+str(keepwebport),
+         '-address=:'+str(keepwebport)],
+        env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+    with open(_pidfile('keep-web'), 'w') as f:
+        f.write(str(keepweb.pid))
+    _setport('keep-web', keepwebport)
+    _wait_until_port_listens(keepwebport)
+
+def stop_keep_web():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    kill_server_pid(_pidfile('keep-web'), wait=0)
+
 def run_nginx():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
         return
     nginxconf = {}
+    nginxconf['KEEPWEBPORT'] = _getport('keep-web')
+    nginxconf['KEEPWEBSSLPORT'] = find_available_port()
     nginxconf['KEEPPROXYPORT'] = _getport('keepproxy')
     nginxconf['KEEPPROXYSSLPORT'] = find_available_port()
     nginxconf['GITPORT'] = _getport('arv-git-httpd')
@@ -465,6 +490,7 @@ def run_nginx():
          '-g', 'pid '+_pidfile('nginx')+';',
          '-c', conffile],
         env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+    _setport('keep-web-ssl', nginxconf['KEEPWEBSSLPORT'])
     _setport('keepproxy-ssl', nginxconf['KEEPPROXYSSLPORT'])
     _setport('arv-git-httpd-ssl', nginxconf['GITSSLPORT'])
 
@@ -564,7 +590,8 @@ class TestCaseWithServers(unittest.TestCase):
         for server_kwargs, start_func, stop_func in (
                 (cls.MAIN_SERVER, run, reset),
                 (cls.KEEP_SERVER, run_keep, stop_keep),
-                (cls.KEEP_PROXY_SERVER, run_keep_proxy, stop_keep_proxy)):
+                (cls.KEEP_PROXY_SERVER, run_keep_proxy, stop_keep_proxy),
+                (cls.KEEP_WEB_SERVER, run_keep_web, stop_keep_web)):
             if server_kwargs is not None:
                 start_func(**server_kwargs)
                 cls._cleanup_funcs.append(stop_func)
@@ -590,6 +617,7 @@ if __name__ == "__main__":
         'start', 'stop',
         'start_keep', 'stop_keep',
         'start_keep_proxy', 'stop_keep_proxy',
+        'start_keep-web', 'stop_keep-web',
         'start_arv-git-httpd', 'stop_arv-git-httpd',
         'start_nginx', 'stop_nginx',
     ]
@@ -629,6 +657,10 @@ if __name__ == "__main__":
         run_arv_git_httpd()
     elif args.action == 'stop_arv-git-httpd':
         stop_arv_git_httpd()
+    elif args.action == 'start_keep-web':
+        run_keep_web()
+    elif args.action == 'stop_keep-web':
+        stop_keep_web()
     elif args.action == 'start_nginx':
         run_nginx()
     elif args.action == 'stop_nginx':
diff --git a/services/api/app/controllers/database_controller.rb b/services/api/app/controllers/database_controller.rb
index 64818da..21c8e47 100644
--- a/services/api/app/controllers/database_controller.rb
+++ b/services/api/app/controllers/database_controller.rb
@@ -29,6 +29,10 @@ class DatabaseController < ApplicationController
     fixturesets = Dir.glob(Rails.root.join('test', 'fixtures', '*.yml')).
       collect { |yml| yml.match(/([^\/]*)\.yml$/)[1] }
 
+    # Don't reset keep_services: clients need to discover our
+    # integration-testing keepstores, not test fixtures.
+    fixturesets -= %w[keep_services]
+
     table_names = '"' + ActiveRecord::Base.connection.tables.join('","') + '"'
 
     attempts_left = 20
diff --git a/services/api/test/fixtures/api_client_authorizations.yml b/services/api/test/fixtures/api_client_authorizations.yml
index 9199d17..ecb9adb 100644
--- a/services/api/test/fixtures/api_client_authorizations.yml
+++ b/services/api/test/fixtures/api_client_authorizations.yml
@@ -87,7 +87,7 @@ active_all_collections:
   user: active
   api_token: activecollectionsabcdefghijklmnopqrstuvwxyz1234567
   expires_at: 2038-01-01 00:00:00
-  scopes: ["GET /arvados/v1/collections/", "GET /arvados/v1/keep_disks"]
+  scopes: ["GET /arvados/v1/collections/", "GET /arvados/v1/keep_services", "GET /arvados/v1/keep_services/"]
 
 active_userlist:
   api_client: untrusted

commit cdd6a89b654e3eb530793cc8a552f452fc359a92
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Oct 12 19:15:06 2015 -0400

    5824: Add option to redirect Workbench downloads to a keep-web service

diff --git a/apps/workbench/app/controllers/collections_controller.rb b/apps/workbench/app/controllers/collections_controller.rb
index e01151c..38b58a1 100644
--- a/apps/workbench/app/controllers/collections_controller.rb
+++ b/apps/workbench/app/controllers/collections_controller.rb
@@ -1,4 +1,6 @@
 require "arvados/keep"
+require "uri"
+require "cgi"
 
 class CollectionsController < ApplicationController
   include ActionController::Live
@@ -130,11 +132,27 @@ class CollectionsController < ApplicationController
     usable_token = find_usable_token(tokens) do
       coll = Collection.find(params[:uuid])
     end
+    if usable_token.nil?
+      # Response already rendered.
+      return
+    end
+
+    if Rails.configuration.keep_web_url
+      opts = {}
+      if usable_token == params[:reader_token]
+        opts[:path_token] = usable_token
+      elsif usable_token == Rails.configuration.anonymous_user_token
+        # Don't pass a token at all
+      else
+        # We pass the current user's real token only if it's necessary
+        # to read the collection.
+        opts[:query_token] = usable_token
+      end
+      return redirect_to keep_web_url(params[:uuid], params[:file], opts)
+    end
 
     file_name = params[:file].andand.sub(/^(\.\/|\/|)/, './')
-    if usable_token.nil?
-      return  # Response already rendered.
-    elsif file_name.nil? or not coll.manifest.has_file?(file_name)
+    if file_name.nil? or not coll.manifest.has_file?(file_name)
       return render_not_found
     end
 
@@ -305,6 +323,21 @@ class CollectionsController < ApplicationController
     return nil
   end
 
+  def keep_web_url(uuid_or_pdh, file, opts)
+    fmt = {uuid_or_pdh: uuid_or_pdh.sub('+', '-')}
+    uri = URI.parse(Rails.configuration.keep_web_url % fmt)
+    uri.path += '/' unless uri.path.end_with? '/'
+    if opts[:path_token]
+      uri.path += 't=' + opts[:path_token] + '/'
+    end
+    uri.path += '_/'
+    uri.path += CGI::escape(file)
+    if opts[:query_token]
+      uri.query = 'api_token=' + CGI::escape(opts[:query_token])
+    end
+    uri.to_s
+  end
+
   # Note: several controller and integration tests rely on stubbing
   # file_enumerator to return fake file content.
   def file_enumerator opts
diff --git a/apps/workbench/config/application.default.yml b/apps/workbench/config/application.default.yml
index 00959bb..5504fd2 100644
--- a/apps/workbench/config/application.default.yml
+++ b/apps/workbench/config/application.default.yml
@@ -225,3 +225,11 @@ common:
   # E.g., using a name-based proxy server to forward connections to shell hosts:
   # https://%{hostname}.webshell.uuid_prefix.arvadosapi.com/
   shell_in_a_box_url: false
+
+  # Format of download/preview links. If false, use Workbench's
+  # download facility.
+  #
+  # Examples:
+  # keep_web_url: https://%{uuid_or_pdh}.dl.zzzzz.your.domain
+  # keep_web_url: https://%{uuid_or_pdh}--dl.zzzzz.your.domain
+  keep_web_url: false
diff --git a/apps/workbench/test/controllers/collections_controller_test.rb b/apps/workbench/test/controllers/collections_controller_test.rb
index 13644e0..b4e7dd3 100644
--- a/apps/workbench/test/controllers/collections_controller_test.rb
+++ b/apps/workbench/test/controllers/collections_controller_test.rb
@@ -514,4 +514,55 @@ class CollectionsControllerTest < ActionController::TestCase
     get :show, {id: api_fixture('collections')['user_agreement']['uuid']}, session_for(:active)
     assert_not_includes @response.body, '<a href="#Upload"'
   end
+
+  def setup_for_keep_web cfg='https://%{uuid_or_pdh}.dl.zzzzz.example'
+    Rails.configuration.keep_web_url = cfg
+    @controller.expects(:file_enumerator).never
+  end
+
+  %w(uuid portable_data_hash).each do |id_type|
+    test "Redirect to keep_web_url via #{id_type}" do
+      setup_for_keep_web
+      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      id = api_fixture('collections')['w_a_z_file'][id_type]
+      get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
+      assert_response :redirect
+      assert_equal "https://#{id.sub '+', '-'}.dl.zzzzz.example/_/w+a+z?api_token=#{tok}", @response.redirect_url
+    end
+
+    test "Redirect to keep_web_url via #{id_type} with reader token" do
+      setup_for_keep_web
+      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      id = api_fixture('collections')['w_a_z_file'][id_type]
+      get :show_file, {uuid: id, file: "w a z", reader_token: tok}, session_for(:expired)
+      assert_response :redirect
+      assert_equal "https://#{id.sub '+', '-'}.dl.zzzzz.example/t=#{tok}/_/w+a+z", @response.redirect_url
+    end
+
+    test "Redirect to keep_web_url via #{id_type} with no token" do
+      setup_for_keep_web
+      Rails.configuration.anonymous_user_token =
+        api_fixture('api_client_authorizations')['anonymous']['api_token']
+      id = api_fixture('collections')['public_text_file'][id_type]
+      get :show_file, {uuid: id, file: "Hello World.txt"}
+      assert_response :redirect
+      assert_equal "https://#{id.sub '+', '-'}.dl.zzzzz.example/_/Hello+World.txt", @response.redirect_url
+    end
+
+    test "Redirect to keep_web_url via #{id_type} using -attachment-only-host mode" do
+      setup_for_keep_web 'https://dl.zzzzz.example/c=%{uuid_or_pdh}'
+      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      id = api_fixture('collections')['w_a_z_file'][id_type]
+      get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
+      assert_response :redirect
+      assert_equal "https://dl.zzzzz.example/c=#{id.sub '+', '-'}/_/w+a+z?api_token=#{tok}", @response.redirect_url
+    end
+  end
+
+  test "No redirect to keep_web_url if collection not found" do
+    setup_for_keep_web
+    id = api_fixture('collections')['w_a_z_file']['uuid']
+    get :show_file, {uuid: id, file: "w a z"}, session_for(:spectator)
+    assert_response 404
+  end
 end
diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index 8ae9490..cc47ebe 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -94,8 +94,8 @@
 //   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/foo
 //   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/_/foo
 //   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/_/foo
-//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--foo.example.com/foo
-//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--.invalid/foo
+//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--foo.example.com/foo
+//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--.invalid/foo
 //
 // An additional form is supported specifically to make it more
 // convenient to maintain support for existing Workbench download

commit fbeaa254898528f11ba3e54edfd47d99bc534ba4
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Oct 1 22:16:51 2015 -0400

    5824: gofmt

diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index b39a941..9751cd1 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -21,9 +21,9 @@ import (
 type handler struct{}
 
 var (
-	clientPool      = arvadosclient.MakeClientPool()
-	trustAllContent = false
-	anonymousTokens []string
+	clientPool         = arvadosclient.MakeClientPool()
+	trustAllContent    = false
+	anonymousTokens    []string
 	attachmentOnlyHost = ""
 )
 
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index a64aeb5..9b5ab2a 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -16,7 +16,7 @@ import (
 
 var _ = check.Suite(&UnitSuite{})
 
-type UnitSuite struct {}
+type UnitSuite struct{}
 
 func mustParseURL(s string) *url.URL {
 	r, err := url.Parse(s)
@@ -34,7 +34,7 @@ func (s *IntegrationSuite) TestVhost404(c *check.C) {
 		resp := httptest.NewRecorder()
 		req := &http.Request{
 			Method: "GET",
-			URL: mustParseURL(testURL),
+			URL:    mustParseURL(testURL),
 		}
 		(&handler{}).ServeHTTP(resp, req)
 		c.Check(resp.Code, check.Equals, http.StatusNotFound)
@@ -52,7 +52,7 @@ func (s *IntegrationSuite) TestVhostViaAuthzHeader(c *check.C) {
 	doVhostRequests(c, authzViaAuthzHeader)
 }
 func authzViaAuthzHeader(r *http.Request, tok string) int {
-	r.Header.Add("Authorization", "OAuth2 " + tok)
+	r.Header.Add("Authorization", "OAuth2 "+tok)
 	return http.StatusUnauthorized
 }
 
@@ -61,7 +61,7 @@ func (s *IntegrationSuite) TestVhostViaCookieValue(c *check.C) {
 }
 func authzViaCookieValue(r *http.Request, tok string) int {
 	r.AddCookie(&http.Cookie{
-		Name: "api_token",
+		Name:  "api_token",
 		Value: auth.EncodeTokenCookie([]byte(tok)),
 	})
 	return http.StatusUnauthorized
@@ -120,8 +120,8 @@ func doVhostRequestsWithHostPath(c *check.C, authz authorizer, hostPath string)
 		u := mustParseURL("http://" + hostPath)
 		req := &http.Request{
 			Method: "GET",
-			Host: u.Host,
-			URL: u,
+			Host:   u.Host,
+			URL:    u,
 			Header: http.Header{},
 		}
 		failCode := authz(req, tok)
@@ -157,8 +157,8 @@ func doReq(req *http.Request) *httptest.ResponseRecorder {
 	u, _ := req.URL.Parse(resp.Header().Get("Location"))
 	req = &http.Request{
 		Method: "GET",
-		Host: u.Host,
-		URL: u,
+		Host:   u.Host,
+		URL:    u,
 		Header: http.Header{},
 	}
 	for _, c := range cookies {
@@ -169,8 +169,8 @@ func doReq(req *http.Request) *httptest.ResponseRecorder {
 
 func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
 	s.testVhostRedirectTokenToCookie(c, "GET",
-		arvadostest.FooCollection + ".example.com/foo",
-		"?api_token=" + arvadostest.ActiveToken,
+		arvadostest.FooCollection+".example.com/foo",
+		"?api_token="+arvadostest.ActiveToken,
 		"text/plain",
 		"",
 		http.StatusOK,
@@ -179,8 +179,8 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
 
 func (s *IntegrationSuite) TestVhostRedirectQueryTokenSingleOriginError(c *check.C) {
 	s.testVhostRedirectTokenToCookie(c, "GET",
-		"example.com/c=" + arvadostest.FooCollection + "/foo",
-		"?api_token=" + arvadostest.ActiveToken,
+		"example.com/c="+arvadostest.FooCollection+"/foo",
+		"?api_token="+arvadostest.ActiveToken,
 		"text/plain",
 		"",
 		http.StatusBadRequest,
@@ -193,8 +193,8 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C
 	}(trustAllContent)
 	trustAllContent = true
 	s.testVhostRedirectTokenToCookie(c, "GET",
-		"example.com/c=" + arvadostest.FooCollection + "/foo",
-		"?api_token=" + arvadostest.ActiveToken,
+		"example.com/c="+arvadostest.FooCollection+"/foo",
+		"?api_token="+arvadostest.ActiveToken,
 		"text/plain",
 		"",
 		http.StatusOK,
@@ -208,16 +208,16 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenAttachmentOnlyHost(c *chec
 	attachmentOnlyHost = "example.com:1234"
 
 	s.testVhostRedirectTokenToCookie(c, "GET",
-		"example.com/c=" + arvadostest.FooCollection + "/foo",
-		"?api_token=" + arvadostest.ActiveToken,
+		"example.com/c="+arvadostest.FooCollection+"/foo",
+		"?api_token="+arvadostest.ActiveToken,
 		"text/plain",
 		"",
 		http.StatusBadRequest,
 	)
 
 	resp := s.testVhostRedirectTokenToCookie(c, "GET",
-		"example.com:1234/c=" + arvadostest.FooCollection + "/foo",
-		"?api_token=" + arvadostest.ActiveToken,
+		"example.com:1234/c="+arvadostest.FooCollection+"/foo",
+		"?api_token="+arvadostest.ActiveToken,
 		"text/plain",
 		"",
 		http.StatusOK,
@@ -227,7 +227,7 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenAttachmentOnlyHost(c *chec
 
 func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
 	s.testVhostRedirectTokenToCookie(c, "POST",
-		arvadostest.FooCollection + ".example.com/foo",
+		arvadostest.FooCollection+".example.com/foo",
 		"",
 		"application/x-www-form-urlencoded",
 		url.Values{"api_token": {arvadostest.ActiveToken}}.Encode(),
@@ -237,7 +237,7 @@ func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
 
 func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C) {
 	s.testVhostRedirectTokenToCookie(c, "POST",
-		arvadostest.FooCollection + ".example.com/foo",
+		arvadostest.FooCollection+".example.com/foo",
 		"",
 		"application/x-www-form-urlencoded",
 		url.Values{"api_token": {arvadostest.SpectatorToken}}.Encode(),
@@ -249,10 +249,10 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
 	u, _ := url.Parse(`http://` + hostPath + queryString)
 	req := &http.Request{
 		Method: method,
-		Host: u.Host,
-		URL: u,
+		Host:   u.Host,
+		URL:    u,
 		Header: http.Header{"Content-Type": {contentType}},
-		Body: ioutil.NopCloser(strings.NewReader(body)),
+		Body:   ioutil.NopCloser(strings.NewReader(body)),
 	}
 
 	resp := httptest.NewRecorder()
@@ -261,14 +261,14 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
 		c.Assert(resp.Code, check.Equals, expectStatus)
 		return resp
 	}
-	c.Check(resp.Body.String(), check.Matches, `.*href="//` + regexp.QuoteMeta(html.EscapeString(hostPath)) + `".*`)
+	c.Check(resp.Body.String(), check.Matches, `.*href="//`+regexp.QuoteMeta(html.EscapeString(hostPath))+`".*`)
 	cookies := (&http.Response{Header: resp.Header()}).Cookies()
 
 	u, _ = u.Parse(resp.Header().Get("Location"))
 	req = &http.Request{
 		Method: "GET",
-		Host: u.Host,
-		URL: u,
+		Host:   u.Host,
+		URL:    u,
 		Header: http.Header{},
 	}
 	for _, c := range cookies {
diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
index fdbb50e..740d243 100644
--- a/services/keep-web/server_test.go
+++ b/services/keep-web/server_test.go
@@ -105,14 +105,14 @@ func (s *IntegrationSuite) test100BlockFile(c *check.C, blocksize int) {
 	err = arv.Create("collections",
 		map[string]interface{}{
 			"collection": map[string]interface{}{
-				"name": fmt.Sprintf("testdata blocksize=%d", blocksize),
+				"name":          fmt.Sprintf("testdata blocksize=%d", blocksize),
 				"manifest_text": mtext,
 			},
 		}, &coll)
 	c.Assert(err, check.Equals, nil)
 	uuid := coll["uuid"].(string)
 
-	hdr, body, size := s.runCurl(c, arv.ApiToken, uuid + ".dl.example.com", "/testdata.bin")
+	hdr, body, size := s.runCurl(c, arv.ApiToken, uuid+".dl.example.com", "/testdata.bin")
 	c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
 	c.Check(hdr, check.Matches, `(?si).*Content-length: `+fmt.Sprintf("%d00", blocksize)+`\r\n.*`)
 	c.Check([]byte(body)[:1234], check.DeepEquals, testdata[:1234])
@@ -139,82 +139,82 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 	for _, spec := range []curlCase{
 		// My collection
 		{
-			auth: arvadostest.ActiveToken,
-			host: arvadostest.FooCollection + "--dl.example.com",
-			path: "/foo",
+			auth:    arvadostest.ActiveToken,
+			host:    arvadostest.FooCollection + "--dl.example.com",
+			path:    "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			host: strings.Replace(arvadostest.FooPdh, "+", "-", 1) + ".dl.example.com",
-			path: "/t=" + arvadostest.ActiveToken + "/foo",
+			host:    strings.Replace(arvadostest.FooPdh, "+", "-", 1) + ".dl.example.com",
+			path:    "/t=" + arvadostest.ActiveToken + "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			path: "/c=" + arvadostest.FooPdh + "/t=" + arvadostest.ActiveToken + "/foo",
+			path:    "/c=" + arvadostest.FooPdh + "/t=" + arvadostest.ActiveToken + "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			path: "/c=" + strings.Replace(arvadostest.FooPdh, "+", "-", 1) + "/t=" + arvadostest.ActiveToken + "/_/foo",
+			path:    "/c=" + strings.Replace(arvadostest.FooPdh, "+", "-", 1) + "/t=" + arvadostest.ActiveToken + "/_/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			auth: "tokensobogus",
-			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			auth:    "tokensobogus",
+			path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			auth: arvadostest.ActiveToken,
-			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			auth:    arvadostest.ActiveToken,
+			path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			auth: arvadostest.AnonymousToken,
-			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			auth:    arvadostest.AnonymousToken,
+			path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 
 		// Anonymously accessible user agreement
 		{
-			path: "/c=" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			path:    "/c=" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			host: arvadostest.HelloWorldCollection + ".dl.example.com",
-			path: "/Hello%20world.txt",
+			host:    arvadostest.HelloWorldCollection + ".dl.example.com",
+			path:    "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			host: arvadostest.HelloWorldCollection + ".dl.example.com",
-			path: "/_/Hello%20world.txt",
+			host:    arvadostest.HelloWorldCollection + ".dl.example.com",
+			path:    "/_/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			path:    "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			auth: arvadostest.ActiveToken,
-			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			auth:    arvadostest.ActiveToken,
+			path:    "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			auth: arvadostest.SpectatorToken,
-			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			auth:    arvadostest.SpectatorToken,
+			path:    "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			auth: arvadostest.SpectatorToken,
-			host: arvadostest.HelloWorldCollection + "--dl.example.com",
-			path: "/Hello%20world.txt",
+			auth:    arvadostest.SpectatorToken,
+			host:    arvadostest.HelloWorldCollection + "--dl.example.com",
+			path:    "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			auth: arvadostest.SpectatorToken,
-			path: "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt",
+			auth:    arvadostest.SpectatorToken,
+			path:    "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 	} {
@@ -238,7 +238,7 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 func (s *IntegrationSuite) runCurl(c *check.C, token, host, uri string, args ...string) (hdr, bodyPart string, bodySize int64) {
 	curlArgs := []string{"--silent", "--show-error", "--include"}
 	testHost, testPort, _ := net.SplitHostPort(s.testServer.Addr)
-	curlArgs = append(curlArgs, "--resolve", host + ":" + testPort + ":" + testHost)
+	curlArgs = append(curlArgs, "--resolve", host+":"+testPort+":"+testHost)
 	if token != "" {
 		curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
 	}

commit d93cabc080f1bbba53b7fde50b96d14f6b900143
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Sep 7 03:43:59 2015 -0400

    5824: Add -attachment-only-host feature.

diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index 993b9db..8ae9490 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -173,6 +173,19 @@
 // (``https://dl.example.com/'') and upload it to some other site
 // chosen by the author of collection X.
 //
+// Attachment-Only host
+//
+// It is possible to serve untrusted content and accept user
+// credentials at the same origin as long as the content is only
+// downloaded, never executed by browsers. A single origin (hostname
+// and port) can be designated as an "attachment-only" origin: cookies
+// will be accepted and all responses will have a
+// "Content-Disposition: attachment" header. This behavior is invoked
+// only when the designated origin matches exactly the Host header
+// provided by the client or upstream proxy.
+//
+//   keep-web -attachment-only-host domain.example:9999
+//
 // Trust All Content mode
 //
 // In "trust all content" mode, Keep-web will accept credentials (API
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index c5d439a..b39a941 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -24,11 +24,14 @@ var (
 	clientPool      = arvadosclient.MakeClientPool()
 	trustAllContent = false
 	anonymousTokens []string
+	attachmentOnlyHost = ""
 )
 
 func init() {
 	flag.BoolVar(&trustAllContent, "trust-all-content", false,
 		"Serve non-public content from a single origin. Dangerous: read docs before using!")
+	flag.StringVar(&attachmentOnlyHost, "attachment-only-host", "",
+		"Accept credentials, and add \"Content-Disposition: attachment\" response headers, for requests at this hostname:port. Prohibiting inline display makes it possible to serve untrusted and non-public content from a single origin, i.e., without wildcard DNS or SSL.")
 }
 
 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
@@ -111,8 +114,16 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var tokens []string
 	var reqTokens []string
 	var pathToken bool
+	var attachment bool
 	credentialsOK := trustAllContent
 
+	if r.Host != "" && r.Host == attachmentOnlyHost {
+		credentialsOK = true
+		attachment = true
+	} else if r.FormValue("disposition") == "attachment" {
+		attachment = true
+	}
+
 	if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
 		// http://ID.dl.example/PATH...
 		credentialsOK = true
@@ -293,6 +304,9 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		}
 	}
 	w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
+	if attachment {
+		w.Header().Set("Content-Disposition", "attachment")
+	}
 
 	w.WriteHeader(http.StatusOK)
 	_, err = io.Copy(w, rdr)
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index e2f8edd..a64aeb5 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -201,6 +201,30 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C
 	)
 }
 
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenAttachmentOnlyHost(c *check.C) {
+	defer func(orig string) {
+		attachmentOnlyHost = orig
+	}(attachmentOnlyHost)
+	attachmentOnlyHost = "example.com:1234"
+
+	s.testVhostRedirectTokenToCookie(c, "GET",
+		"example.com/c=" + arvadostest.FooCollection + "/foo",
+		"?api_token=" + arvadostest.ActiveToken,
+		"text/plain",
+		"",
+		http.StatusBadRequest,
+	)
+
+	resp := s.testVhostRedirectTokenToCookie(c, "GET",
+		"example.com:1234/c=" + arvadostest.FooCollection + "/foo",
+		"?api_token=" + arvadostest.ActiveToken,
+		"text/plain",
+		"",
+		http.StatusOK,
+	)
+	c.Check(resp.Header().Get("Content-Disposition"), check.Equals, "attachment")
+}
+
 func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
 	s.testVhostRedirectTokenToCookie(c, "POST",
 		arvadostest.FooCollection + ".example.com/foo",
@@ -221,7 +245,7 @@ func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C)
 	)
 }
 
-func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString, contentType, body string, expectStatus int) {
+func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString, contentType, body string, expectStatus int) *httptest.ResponseRecorder {
 	u, _ := url.Parse(`http://` + hostPath + queryString)
 	req := &http.Request{
 		Method: method,
@@ -235,7 +259,7 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
 	(&handler{}).ServeHTTP(resp, req)
 	if resp.Code != http.StatusSeeOther {
 		c.Assert(resp.Code, check.Equals, expectStatus)
-		return
+		return resp
 	}
 	c.Check(resp.Body.String(), check.Matches, `.*href="//` + regexp.QuoteMeta(html.EscapeString(hostPath)) + `".*`)
 	cookies := (&http.Response{Header: resp.Header()}).Cookies()
@@ -258,4 +282,5 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
 	if expectStatus == http.StatusOK {
 		c.Check(resp.Body.String(), check.Equals, "foo")
 	}
+	return resp
 }

commit 7d0dfbb31312c2f22d698572d38216015f885ea8
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Sep 7 02:39:10 2015 -0400

    5824: Implement "trust all content" mode.

diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index 2f45781..993b9db 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -49,7 +49,7 @@
 // The following "same origin" URL patterns are supported for public
 // collections (i.e., collections which can be served by keep-web
 // without making use of any credentials supplied by the client). See
-// "Same-origin mode" below.
+// "Same-origin URLs" below.
 //
 //   http://dl.example.com/c=uuid_or_pdh/path/file.txt
 //   http://dl.example.com/c=uuid_or_pdh/t=TOKEN/path/file.txt
@@ -163,7 +163,7 @@
 // the local network -- the upstream proxy should configured to return
 // 401 for all paths beginning with "/c=".
 //
-// Same-origin mode
+// Same-origin URLs
 //
 // Without the same-origin protection outlined above, a web page
 // stored in collection X could execute JavaScript code that uses the
@@ -173,19 +173,7 @@
 // (``https://dl.example.com/'') and upload it to some other site
 // chosen by the author of collection X.
 //
-package main
-
-// TODO(TC): Implement?
-//
-// Trusted content
-//
-// Normally, Keep-web is installed using a wildcard DNS entry and a
-// wildcard HTTPS certificate, serving data from collection X at
-// ``https://X--dl.example.com/path/file.ext''.
-//
-// It will also serve publicly accessible data at
-// ``https://dl.example.com/collections/X/path/file.txt'', but it does not
-// accept any kind of credentials at paths like these.
+// Trust All Content mode
 //
 // In "trust all content" mode, Keep-web will accept credentials (API
 // tokens) and serve any collection X at
@@ -198,4 +186,4 @@ package main
 //
 //   keep-web -trust-all-content [...]
 //
-// In the general case, this should not be enabled: 
+package main
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 600e685..c5d439a 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -1,6 +1,7 @@
 package main
 
 import (
+	"flag"
 	"fmt"
 	"html"
 	"io"
@@ -19,8 +20,16 @@ import (
 
 type handler struct{}
 
-var clientPool = arvadosclient.MakeClientPool()
-var anonymousTokens []string
+var (
+	clientPool      = arvadosclient.MakeClientPool()
+	trustAllContent = false
+	anonymousTokens []string
+)
+
+func init() {
+	flag.BoolVar(&trustAllContent, "trust-all-content", false,
+		"Serve non-public content from a single origin. Dangerous: read docs before using!")
+}
 
 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
 // otherwise return "".
@@ -102,7 +111,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var tokens []string
 	var reqTokens []string
 	var pathToken bool
-	var credentialsOK bool
+	credentialsOK := trustAllContent
 
 	if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
 		// http://ID.dl.example/PATH...
@@ -139,7 +148,8 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		if !credentialsOK {
 			// It is not safe to copy the provided token
 			// into a cookie unless the current vhost
-			// (origin) serves only a single collection.
+			// (origin) serves only a single collection or
+			// we are in trustAllContent mode.
 			statusCode = http.StatusBadRequest
 			return
 		}
@@ -160,7 +170,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 			Name:     "api_token",
 			Value:    auth.EncodeTokenCookie([]byte(t)),
 			Path:     "/",
-			Expires:  time.Now().AddDate(10,0,0),
+			Expires:  time.Now().AddDate(10, 0, 0),
 			HttpOnly: true,
 		})
 		redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index 50fd717..e2f8edd 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -177,6 +177,30 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
 	)
 }
 
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenSingleOriginError(c *check.C) {
+	s.testVhostRedirectTokenToCookie(c, "GET",
+		"example.com/c=" + arvadostest.FooCollection + "/foo",
+		"?api_token=" + arvadostest.ActiveToken,
+		"text/plain",
+		"",
+		http.StatusBadRequest,
+	)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C) {
+	defer func(orig bool) {
+		trustAllContent = orig
+	}(trustAllContent)
+	trustAllContent = true
+	s.testVhostRedirectTokenToCookie(c, "GET",
+		"example.com/c=" + arvadostest.FooCollection + "/foo",
+		"?api_token=" + arvadostest.ActiveToken,
+		"text/plain",
+		"",
+		http.StatusOK,
+	)
+}
+
 func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
 	s.testVhostRedirectTokenToCookie(c, "POST",
 		arvadostest.FooCollection + ".example.com/foo",
@@ -209,7 +233,10 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
 
 	resp := httptest.NewRecorder()
 	(&handler{}).ServeHTTP(resp, req)
-	c.Assert(resp.Code, check.Equals, http.StatusSeeOther)
+	if resp.Code != http.StatusSeeOther {
+		c.Assert(resp.Code, check.Equals, expectStatus)
+		return
+	}
 	c.Check(resp.Body.String(), check.Matches, `.*href="//` + regexp.QuoteMeta(html.EscapeString(hostPath)) + `".*`)
 	cookies := (&http.Response{Header: resp.Header()}).Cookies()
 

commit 1367c2b1f4771dd3d877df5a943c9a859636da7d
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Aug 30 02:45:58 2015 -0400

    5824: Add read-error and lots-of-blocks tests.

diff --git a/sdk/go/keepclient/collectionreader_test.go b/sdk/go/keepclient/collectionreader_test.go
index 51710b7..94e41e2 100644
--- a/sdk/go/keepclient/collectionreader_test.go
+++ b/sdk/go/keepclient/collectionreader_test.go
@@ -2,21 +2,48 @@ package keepclient
 
 import (
 	"crypto/md5"
+	"crypto/rand"
 	"fmt"
 	"io"
 	"io/ioutil"
 	"net/http"
 	"os"
+	"strconv"
+	"strings"
 
 	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
 	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
 	check "gopkg.in/check.v1"
 )
 
-var _ = check.Suite(&IntegrationSuite{})
+var _ = check.Suite(&CollectionReaderUnit{})
 
-// IntegrationSuite tests need an API server
-type IntegrationSuite struct{}
+type CollectionReaderUnit struct {
+	arv     arvadosclient.ArvadosClient
+	kc      *KeepClient
+	handler SuccessHandler
+}
+
+func (s *CollectionReaderUnit) SetUpTest(c *check.C) {
+	var err error
+	s.arv, err = arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.IsNil)
+	s.arv.ApiToken = arvadostest.ActiveToken
+
+	s.kc, err = MakeKeepClient(&s.arv)
+	c.Assert(err, check.IsNil)
+
+	s.handler = SuccessHandler{
+		disk: make(map[string][]byte),
+		lock: make(chan struct{}, 1),
+		ops: new(int),
+	}
+	localRoots := make(map[string]string)
+	for i, k := range RunSomeFakeKeepServers(s.handler, 4) {
+		localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+	}
+	s.kc.SetServiceRoots(localRoots, localRoots, nil)
+}
 
 type SuccessHandler struct {
 	disk map[string][]byte
@@ -64,33 +91,11 @@ type rdrTest struct {
 	want interface{} // error or string to expect
 }
 
-func StubWithFakeServers(kc *KeepClient, h http.Handler) {
-	localRoots := make(map[string]string)
-	for i, k := range RunSomeFakeKeepServers(h, 4) {
-		localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
-	}
-	kc.SetServiceRoots(localRoots, localRoots, nil)
-}
-
-func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
-	arv, err := arvadosclient.MakeArvadosClient()
-	c.Assert(err, check.IsNil)
-	arv.ApiToken = arvadostest.ActiveToken
-
-	kc, err := MakeKeepClient(&arv)
-	c.Assert(err, check.IsNil)
-
-	{
-		h := SuccessHandler{
-			disk: make(map[string][]byte),
-			lock: make(chan struct{}, 1),
-		}
-		StubWithFakeServers(kc, h)
-		kc.PutB([]byte("foo"))
-		kc.PutB([]byte("bar"))
-		kc.PutB([]byte("Hello world\n"))
-		kc.PutB([]byte(""))
-	}
+func (s *CollectionReaderUnit) TestCollectionReaderContent(c *check.C) {
+	s.kc.PutB([]byte("foo"))
+	s.kc.PutB([]byte("bar"))
+	s.kc.PutB([]byte("Hello world\n"))
+	s.kc.PutB([]byte(""))
 
 	mt := arvadostest.PathologicalManifest
 
@@ -116,7 +121,7 @@ func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 		{mt: mt, f: "segmented/frob", want: "frob"},
 		{mt: mt, f: "segmented/oof", want: "oof"},
 	} {
-		rdr, err := kc.CollectionFileReader(map[string]interface{}{"manifest_text": testCase.mt}, testCase.f)
+		rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": testCase.mt}, testCase.f)
 		switch want := testCase.want.(type) {
 		case error:
 			c.Check(rdr, check.IsNil)
@@ -136,21 +141,34 @@ func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 	}
 }
 
-func (s *ServerRequiredSuite) TestCollectionReaderCloseEarly(c *check.C) {
-	arv, err := arvadosclient.MakeArvadosClient()
-	c.Assert(err, check.IsNil)
-	arv.ApiToken = arvadostest.ActiveToken
-
-	kc, err := MakeKeepClient(&arv)
-	c.Assert(err, check.IsNil)
-
-	h := SuccessHandler{
-		disk: make(map[string][]byte),
-		lock: make(chan struct{}, 1),
-		ops: new(int),
+func (s *CollectionReaderUnit) TestCollectionReaderManyBlocks(c *check.C) {
+	h := md5.New()
+	buf := make([]byte, 4096)
+	locs := make([]string, len(buf))
+	filesize := 0
+	for i := 0; i < len(locs); i++ {
+		_, err := io.ReadFull(rand.Reader, buf[:i])
+		c.Assert(err, check.IsNil)
+		h.Write(buf[:i])
+		locs[i], _, err = s.kc.PutB(buf[:i])
+		c.Assert(err, check.IsNil)
+		filesize += i
 	}
-	StubWithFakeServers(kc, h)
-	kc.PutB([]byte("foo"))
+	manifest := "./random " + strings.Join(locs, " ") + " 0:" + strconv.Itoa(filesize) + ":bytes.bin\n"
+	dataMD5 := h.Sum(nil)
+
+	checkMD5 := md5.New()
+	rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": manifest}, "random/bytes.bin")
+	c.Check(err, check.IsNil)
+	_, err = io.Copy(checkMD5, rdr)
+	c.Check(err, check.IsNil)
+	_, err = rdr.Read(make([]byte, 1))
+	c.Check(err, check.Equals, io.EOF)
+	c.Check(checkMD5.Sum(nil), check.DeepEquals, dataMD5)
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderCloseEarly(c *check.C) {
+	s.kc.PutB([]byte("foo"))
 
 	mt := ". "
 	for i := 0; i < 1000; i++ {
@@ -161,23 +179,45 @@ func (s *ServerRequiredSuite) TestCollectionReaderCloseEarly(c *check.C) {
 	// Grab the stub server's lock, ensuring our cfReader doesn't
 	// get anything back from its first call to kc.Get() before we
 	// have a chance to call Close().
-	h.lock <- struct{}{}
-	opsBeforeRead := *h.ops
+	s.handler.lock <- struct{}{}
+	opsBeforeRead := *s.handler.ops
 
-	rdr, err := kc.CollectionFileReader(map[string]interface{}{"manifest_text": mt}, "foo1000.txt")
+	rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": mt}, "foo1000.txt")
 	c.Assert(err, check.IsNil)
+
+	firstReadDone := make(chan struct{})
+	go func() {
+		rdr.Read(make([]byte, 6))
+		firstReadDone <- struct{}{}
+	}()
 	err = rdr.Close()
 	c.Assert(err, check.IsNil)
 	c.Assert(rdr.Error(), check.IsNil)
 
 	// Release the stub server's lock. The first GET operation will proceed.
-	<-h.lock
+	<-s.handler.lock
+
+	// Make sure our first read operation consumes the data
+	// received from the first GET.
+	<-firstReadDone
 
 	// doGet() should close toRead before sending any more bufs to it.
-	if what, ok := <-rdr.toRead;  ok {
-		c.Errorf("Got %+v, expected toRead to be closed", what)
+	if what, ok := <-rdr.toRead; ok {
+		c.Errorf("Got %q, expected toRead to be closed", string(what))
 	}
 
 	// Stub should have handled exactly one GET request.
-	c.Assert(*h.ops, check.Equals, opsBeforeRead+1)
+	c.Assert(*s.handler.ops, check.Equals, opsBeforeRead+1)
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderDataError(c *check.C) {
+	manifest := ". ffffffffffffffffffffffffffffffff+1 0:1:notfound.txt\n"
+	buf := make([]byte, 1)
+	rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": manifest}, "notfound.txt")
+	c.Check(err, check.IsNil)
+	for i := 0; i < 2; i++ {
+		_, err = io.ReadFull(rdr, buf)
+		c.Check(err, check.Not(check.IsNil))
+		c.Check(err, check.Not(check.Equals), io.EOF)
+	}
 }

commit 1dba563361d3d9dce369d336d3c0d8ce48b9a819
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 11:08:16 2015 -0400

    5824: Fix up DNS docs.

diff --git a/doc/install/install-keep-web.html.textile.liquid b/doc/install/install-keep-web.html.textile.liquid
index 51b8d71..9282a8e 100644
--- a/doc/install/install-keep-web.html.textile.liquid
+++ b/doc/install/install-keep-web.html.textile.liquid
@@ -95,22 +95,20 @@ server {
 }
 </pre></notextile>
 
-h3. Tell the API server about the keep-web service
+h3. Configure DNS
 
-If your wildcard certificate is valid for <code>*.dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, add the following entry to your API server's @config/application.yml@ file:
+Configure your DNS servers so the following names resolve to your Nginx proxy's public IP address.
+* @*--dl.uuid_prefix.your.domain@, if your DNS server allows this without interfering with other DNS names; or
+* @*.dl.uuid_prefix.your.domain@, if you have a wildcard SSL certificate valid for these names; or
+* @dl.uuid_prefix.your.domain@, if neither of the above options is feasible. In this case, only unauthenticated requests will be served, i.e., public data and collection sharing links.
 
-<notextile>
-<pre><code>keep-web: https://%{uuid_or_pdh}.dl.<span class="userinput">uuid_prefix</span>.your.domain
-</code></pre>
-</notextile>
+h3. Tell the API server about the keep-web service
 
-If your wildcard certificate is valid for <code>*.<span class="userinput">uuid_prefix</span>.your.domain</code>, add the following entry to your API server's @config/application.yml@ file. This approach requires a wildcard DNS entry covering <code>*--dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, which might be difficult to do without affecting other services, depending on your DNS server software.
+Add *one* of the following entries to your API server's @config/application.yml@ file, depending on your DNS setup:
 
 <notextile>
 <pre><code>keep-web: https://%{uuid_or_pdh}--dl.<span class="userinput">uuid_prefix</span>.your.domain
+keep-web: https://%{uuid_or_pdh}.dl.<span class="userinput">uuid_prefix</span>.your.domain
+keep-web: https://dl.<span class="userinput">uuid_prefix</span>.your.domain
 </code></pre>
 </notextile>
-
-h3. Configure DNS
-
-Configure your DNS servers so <code>*--dl.<span class="userinput">uuid_prefix</span>.your.domain</code> or <code>*.dl.<span class="userinput">uuid_prefix</span>.your.domain</code> (according to the way you configured Workbench), as well as <code>dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, resolve to your Nginx proxy's public IP address.

commit d36d775dd76467129a0c9dbd93878d5b6b583040
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 02:30:45 2015 -0400

    5824: Log X-Forwarded-For header value if provided.

diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 98dfdb3..600e685 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -62,6 +62,11 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var statusCode = 0
 	var statusText string
 
+	remoteAddr := r.RemoteAddr
+	if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
+		remoteAddr = xff + "," + remoteAddr
+	}
+
 	w := httpserver.WrapResponseWriter(wOrig)
 	defer func() {
 		if statusCode == 0 {
@@ -75,7 +80,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		if statusText == "" {
 			statusText = http.StatusText(statusCode)
 		}
-		httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
+		httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
 	}()
 
 	if r.Method != "GET" && r.Method != "POST" {

commit 1a6dbfaaea9520accfa8ca401887be61ed884420
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 01:45:35 2015 -0400

    5824: Fail at startup if ARVADOS_API_HOST is not set.

diff --git a/services/keep-web/main.go b/services/keep-web/main.go
index d780cc3..751543e 100644
--- a/services/keep-web/main.go
+++ b/services/keep-web/main.go
@@ -17,6 +17,9 @@ func init() {
 
 func main() {
 	flag.Parse()
+	if os.Getenv("ARVADOS_API_HOST") == "" {
+		log.Fatal("ARVADOS_API_HOST environment variable must be set.")
+	}
 	srv := &server{}
 	if err := srv.Start(); err != nil {
 		log.Fatal(err)

commit c5566fdadf1bb466b6eb2d5133445887520aae63
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 01:28:43 2015 -0400

    5824: Accept anonymous tokens on command line.

diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 7a2124a..98dfdb3 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -17,16 +17,10 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/keepclient"
 )
 
-var clientPool = arvadosclient.MakeClientPool()
-
-var anonymousTokens []string
-
 type handler struct{}
 
-func init() {
-	// TODO(TC): Get anonymousTokens from flags
-	anonymousTokens = []string{}
-}
+var clientPool = arvadosclient.MakeClientPool()
+var anonymousTokens []string
 
 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
 // otherwise return "".
diff --git a/services/keep-web/server.go b/services/keep-web/server.go
index 44da00f..2359f23 100644
--- a/services/keep-web/server.go
+++ b/services/keep-web/server.go
@@ -10,8 +10,8 @@ import (
 var address string
 
 func init() {
-	flag.StringVar(&address, "address", "0.0.0.0:80",
-		"Address to listen on, \"host:port\".")
+	flag.StringVar(&address, "address", ":80",
+		"Address to listen on: \"host:port\", or \":port\" to listen on all interfaces.")
 }
 
 type server struct {

commit 432fff73232a661701552550e0cb4eec10ad791c
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 01:09:46 2015 -0400

    5824: Handle various combinations of c= and t= more consistently. Use vhosts in integration tests.

diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index 44bcaec..2f45781 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -52,13 +52,12 @@
 // "Same-origin mode" below.
 //
 //   http://dl.example.com/c=uuid_or_pdh/path/file.txt
-//   http://dl.example.com/c=uuid_or_pdh/path/t=TOKEN/file.txt
+//   http://dl.example.com/c=uuid_or_pdh/t=TOKEN/path/file.txt
 //
 // The following "multiple origin" URL patterns are supported for all
 // collections:
 //
 //   http://uuid_or_pdh--dl.example.com/path/file.txt
-//   http://uuid_or_pdh--dl.example.com/t=/path/file.txt
 //   http://uuid_or_pdh--dl.example.com/t=TOKEN/path/file.txt
 //
 // In the "multiple origin" form, the string "--" can be replaced with
@@ -81,17 +80,35 @@
 // collection UUID or a portable data hash with the "+" character
 // replaced by "-".
 //
+// In all of the above forms, a top level directory called "_" is
+// skipped. In cases where the "path/file.txt" part might start with
+// "t=" or "c=" or "_/", links should be constructed with a leading
+// "_/" to ensure the top level directory is not interpreted as a
+// token or collection ID.
+//
 // Assuming there is a collection with UUID
 // zzzzz-4zz18-znfnqtbbv4spc3w and portable data hash
 // 1f4b0bc7583c2a7f9102c395f4ffc5e3+45, the following URLs are
 // interchangeable:
 //
 //   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/foo
-//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/t=/foo
-//   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/t=/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/_/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/_/foo
 //   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--foo.example.com/foo
 //   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--.invalid/foo
 //
+// An additional form is supported specifically to make it more
+// convenient to maintain support for existing Workbench download
+// links:
+//
+//   http://dl.example.com/collections/download/uuid_or_pdh/TOKEN/path/file.txt
+//
+// A regular Workbench "download" link is also accepted, but
+// credentials passed via cookie, header, etc. are ignored. Only
+// public data can be served this way:
+//
+//   http://dl.example.com/collections/uuid_or_pdh/path/file.txt
+//
 // Authorization mechanisms
 //
 // A token can be provided in an Authorization header:
@@ -158,7 +175,7 @@
 //
 package main
 
-// TODO(TC): Implement
+// TODO(TC): Implement?
 //
 // Trusted content
 //
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 657c72d..7a2124a 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -50,6 +50,20 @@ func parseCollectionIdFromDNSName(s string) string {
 	return ""
 }
 
+var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
+
+// return a UUID or PDH if s is a UUID or a PDH (even if it is a PDH
+// with "+" replaced by " " or "-"); otherwise return "".
+func parseCollectionIdFromURL(s string) string {
+	if arvadosclient.UUIDMatch(s) {
+		return s
+	}
+	if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
+		return pdh
+	}
+	return ""
+}
+
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var statusCode = 0
 	var statusText string
@@ -89,79 +103,104 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var tokens []string
 	var reqTokens []string
 	var pathToken bool
+	var credentialsOK bool
 
 	if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
-		// "http://{id}.domain.example.com/{path}" form
-		if t := r.FormValue("api_token"); t != "" {
-			// ...with explicit token in query string or
-			// form in POST body. We must encrypt the
-			// token such that it can only be used for
-			// this collection; put it in an HttpOnly
-			// cookie; and redirect to the same URL with
-			// the query param redacted, and method =
-			// GET.
-			//
-			// The HttpOnly flag is necessary to prevent
-			// JavaScript code (included in, or loaded by,
-			// a page in the collection being served) from
-			// employing the user's token beyond reading
-			// other files in the same domain, i.e., same
-			// the collection.
-			//
-			// The 303 redirect is necessary in the case
-			// of a GET request to avoid exposing the
-			// token in the Location bar, and in the case
-			// of a POST request to avoid raising warnings
-			// when the user refreshes the resulting page.
-			http.SetCookie(w, &http.Cookie{
-				Name:    "api_token",
-				Value:   auth.EncodeTokenCookie([]byte(t)),
-				Path:    "/",
-				Expires: time.Now().AddDate(10,0,0),
-			})
-			redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
-
-			w.Header().Add("Location", redir)
-			statusCode, statusText = http.StatusSeeOther, redir
-			w.WriteHeader(statusCode)
-			io.WriteString(w, `<A href="`)
-			io.WriteString(w, html.EscapeString(redir))
-			io.WriteString(w, `">Continue</A>`)
-			return
-		} else if strings.HasPrefix(pathParts[0], "t=") {
-			// ...with explicit token in path,
-			// "{...}.com/t={token}/{path}".  This form
-			// must only be used to pass scoped tokens
-			// that give permission for a single
-			// collection. See FormValue case above.
-			tokens = []string{pathParts[0][2:]}
-			targetPath = pathParts[1:]
+		// http://ID.dl.example/PATH...
+		credentialsOK = true
+		targetPath = pathParts
+	} else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
+		// /c=ID/PATH...
+		targetId = parseCollectionIdFromURL(pathParts[0][2:])
+		targetPath = pathParts[1:]
+	} else if len(pathParts) >= 3 && pathParts[0] == "collections" {
+		if len(pathParts) >= 5 && pathParts[1] == "download" {
+			// /collections/download/ID/TOKEN/PATH...
+			targetId = pathParts[2]
+			tokens = []string{pathParts[3]}
+			targetPath = pathParts[4:]
 			pathToken = true
 		} else {
-			// ...with cookie, Authorization header, or
-			// no token at all
-			reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
-			tokens = append(reqTokens, anonymousTokens...)
-			targetPath = pathParts
+			// /collections/ID/PATH...
+			targetId = pathParts[1]
+			tokens = anonymousTokens
+			targetPath = pathParts[2:]
 		}
-	} else if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
+	} else {
 		statusCode = http.StatusNotFound
 		return
-	} else if len(pathParts) >= 5 && pathParts[1] == "download" {
-		// "/collections/download/{id}/{token}/path..." form:
-		// Don't use our configured anonymous tokens,
-		// Authorization headers, etc.  Just use the token in
-		// the path.
-		targetId = pathParts[2]
-		tokens = []string{pathParts[3]}
-		targetPath = pathParts[4:]
+	}
+	if t := r.FormValue("api_token"); t != "" {
+		// The client provided an explicit token in the query
+		// string, or a form in POST body. We must put the
+		// token in an HttpOnly cookie, and redirect to the
+		// same URL with the query param redacted and method =
+		// GET.
+
+		if !credentialsOK {
+			// It is not safe to copy the provided token
+			// into a cookie unless the current vhost
+			// (origin) serves only a single collection.
+			statusCode = http.StatusBadRequest
+			return
+		}
+
+		// The HttpOnly flag is necessary to prevent
+		// JavaScript code (included in, or loaded by, a page
+		// in the collection being served) from employing the
+		// user's token beyond reading other files in the same
+		// domain, i.e., same collection.
+		//
+		// The 303 redirect is necessary in the case of a GET
+		// request to avoid exposing the token in the Location
+		// bar, and in the case of a POST request to avoid
+		// raising warnings when the user refreshes the
+		// resulting page.
+
+		http.SetCookie(w, &http.Cookie{
+			Name:     "api_token",
+			Value:    auth.EncodeTokenCookie([]byte(t)),
+			Path:     "/",
+			Expires:  time.Now().AddDate(10,0,0),
+			HttpOnly: true,
+		})
+		redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
+
+		w.Header().Add("Location", redir)
+		statusCode, statusText = http.StatusSeeOther, redir
+		w.WriteHeader(statusCode)
+		io.WriteString(w, `<A href="`)
+		io.WriteString(w, html.EscapeString(redir))
+		io.WriteString(w, `">Continue</A>`)
+		return
+	}
+
+	if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
+		// http://ID.example/t=TOKEN/PATH...
+		// /c=ID/t=TOKEN/PATH...
+		//
+		// This form must only be used to pass scoped tokens
+		// that give permission for a single collection. See
+		// FormValue case above.
+		tokens = []string{targetPath[0][2:]}
 		pathToken = true
-	} else {
-		// "/collections/{id}/path..." form
-		targetId = pathParts[1]
-		reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+		targetPath = targetPath[1:]
+	}
+
+	if tokens == nil {
+		if credentialsOK {
+			reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+		}
 		tokens = append(reqTokens, anonymousTokens...)
-		targetPath = pathParts[2:]
+	}
+
+	if len(targetPath) > 0 && targetPath[0] == "_" {
+		// If a collection has a directory called "t=foo" or
+		// "_", it can be served at //dl.example/_/t=foo/ or
+		// //dl.example/_/_/ respectively: //dl.example/t=foo/
+		// won't work because t=foo will be interpreted as a
+		// token "foo".
+		targetPath = targetPath[1:]
 	}
 
 	tokenResult := make(map[string]int)
@@ -188,11 +227,12 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		return
 	}
 	if !found {
-		if pathToken {
-			// The URL is a "secret sharing link", but it
-			// didn't work out. Asking the client for
-			// additional credentials would just be
-			// confusing.
+		if pathToken || !credentialsOK {
+			// Either the URL is a "secret sharing link"
+			// that didn't work out (and asking the client
+			// for additional credentials would just be
+			// confusing), or we don't even accept
+			// credentials at this path.
 			statusCode = http.StatusNotFound
 			return
 		}
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index b788a38..50fd717 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -99,8 +99,10 @@ func authzViaPOST(r *http.Request, tok string) int {
 func doVhostRequests(c *check.C, authz authorizer) {
 	for _, hostPath := range []string{
 		arvadostest.FooCollection + ".example.com/foo",
+		arvadostest.FooCollection + "--dl.example.com/foo",
+		arvadostest.FooCollection + "--dl.example.com/_/foo",
 		arvadostest.FooPdh + ".example.com/foo",
-		strings.Replace(arvadostest.FooPdh, "+", "-", -1) + ".example.com/foo",
+		strings.Replace(arvadostest.FooPdh, "+", "-", -1) + "--dl.example.com/foo",
 	} {
 		c.Log("doRequests: ", hostPath)
 		doVhostRequestsWithHostPath(c, authz, hostPath)
diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
index a2a5754..fdbb50e 100644
--- a/services/keep-web/server_test.go
+++ b/services/keep-web/server_test.go
@@ -29,7 +29,7 @@ func (s *IntegrationSuite) TestNoToken(c *check.C) {
 		"bogustoken",
 	} {
 		hdr, body, _ := s.runCurl(c, token, "dl.example.com", "/collections/"+arvadostest.FooCollection+"/foo")
-		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
+		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 		c.Check(body, check.Equals, "")
 
 		if token != "" {
@@ -119,6 +119,14 @@ func (s *IntegrationSuite) test100BlockFile(c *check.C, blocksize int) {
 	c.Check(size, check.Equals, int64(blocksize)*100)
 }
 
+type curlCase struct {
+	id      string
+	auth    string
+	host    string
+	path    string
+	dataMD5 string
+}
+
 func (s *IntegrationSuite) Test200(c *check.C) {
 	anonymousTokens = []string{arvadostest.AnonymousToken}
 	arv, err := arvadosclient.MakeArvadosClient()
@@ -128,28 +136,101 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 	c.Assert(err, check.Equals, nil)
 	kc.PutB([]byte("Hello world\n"))
 	kc.PutB([]byte("foo"))
-	for _, spec := range [][]string{
+	for _, spec := range []curlCase{
 		// My collection
-		{arvadostest.ActiveToken, "/collections/" + arvadostest.FooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{"", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{"tokensobogus", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{arvadostest.ActiveToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{arvadostest.AnonymousToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		// Anonymously accessible user agreement.
-		{"", "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{arvadostest.ActiveToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{arvadostest.SpectatorToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{arvadostest.SpectatorToken, "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{
+			auth: arvadostest.ActiveToken,
+			host: arvadostest.FooCollection + "--dl.example.com",
+			path: "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			host: strings.Replace(arvadostest.FooPdh, "+", "-", 1) + ".dl.example.com",
+			path: "/t=" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			path: "/c=" + arvadostest.FooPdh + "/t=" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			path: "/c=" + strings.Replace(arvadostest.FooPdh, "+", "-", 1) + "/t=" + arvadostest.ActiveToken + "/_/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			auth: "tokensobogus",
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			auth: arvadostest.ActiveToken,
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			auth: arvadostest.AnonymousToken,
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+
+		// Anonymously accessible user agreement
+		{
+			path: "/c=" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			host: arvadostest.HelloWorldCollection + ".dl.example.com",
+			path: "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			host: arvadostest.HelloWorldCollection + ".dl.example.com",
+			path: "/_/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.ActiveToken,
+			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.SpectatorToken,
+			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.SpectatorToken,
+			host: arvadostest.HelloWorldCollection + "--dl.example.com",
+			path: "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.SpectatorToken,
+			path: "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
 	} {
-		hdr, body, _ := s.runCurl(c, spec[0], "dl.example.com", spec[1])
+		host := spec.host
+		if host == "" {
+			host = "dl.example.com"
+		}
+		hdr, body, _ := s.runCurl(c, spec.auth, host, spec.path)
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
-		if strings.HasSuffix(spec[1], ".txt") {
+		if strings.HasSuffix(spec.path, ".txt") {
 			c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
 			// TODO: Check some types that aren't
 			// automatically detected by Go's http server
 			// by sniffing the content.
 		}
-		c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec[2])
+		c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec.dataMD5)
 	}
 }
 

commit bb1d507519384faec1344b96d250d6e1cf3e0f36
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:34:21 2015 -0400

    5824: Comment to explain "authorizer" test helpers.

diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index 0494376..b788a38 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -42,6 +42,10 @@ func (s *IntegrationSuite) TestVhost404(c *check.C) {
 	}
 }
 
+// An authorizer modifies an HTTP request to make use of the given
+// token -- by adding it to a header, cookie, query param, or whatever
+// -- and returns the HTTP status code we should expect from keep-web if
+// the token is invalid.
 type authorizer func(*http.Request, string) int
 
 func (s *IntegrationSuite) TestVhostViaAuthzHeader(c *check.C) {

commit 11633603c279de5904b07d0d05a47a6bd2897f78
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:33:44 2015 -0400

    5824: Fix up support for PDH in vhostname.

diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index f4f9ab5..44bcaec 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -74,7 +74,8 @@
 // upstream proxy.
 //
 // In all of the above forms, the "dl.example.com" part can be
-// anything at all.
+// anything at all: keep-web ignores everything after the first "." or
+// "--".
 //
 // In all of the above forms, the "uuid_or_pdh" part can be either a
 // collection UUID or a portable data hash with the "+" character
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 30b4b64..657c72d 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -28,7 +28,8 @@ func init() {
 	anonymousTokens = []string{}
 }
 
-// return s if s is a UUID or a PDH, otherwise ""
+// return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
+// otherwise return "".
 func parseCollectionIdFromDNSName(s string) string {
 	// Strip domain.
 	if i := strings.IndexRune(s, '.'); i >= 0 {
@@ -40,10 +41,13 @@ func parseCollectionIdFromDNSName(s string) string {
 	if i := strings.Index(s, "--"); i >= 0 {
 		s = s[:i]
 	}
-	if !arvadosclient.UUIDMatch(s) && !arvadosclient.PDHMatch(s) {
-		return ""
+	if arvadosclient.UUIDMatch(s) {
+		return s
 	}
-	return s
+	if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
+		return pdh
+	}
+	return ""
 }
 
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index a1f5e1a..0494376 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -93,7 +93,17 @@ func authzViaPOST(r *http.Request, tok string) int {
 // Try some combinations of {url, token} using the given authorization
 // mechanism, and verify the result is correct.
 func doVhostRequests(c *check.C, authz authorizer) {
-	hostPath := arvadostest.FooCollection + ".example.com/foo"
+	for _, hostPath := range []string{
+		arvadostest.FooCollection + ".example.com/foo",
+		arvadostest.FooPdh + ".example.com/foo",
+		strings.Replace(arvadostest.FooPdh, "+", "-", -1) + ".example.com/foo",
+	} {
+		c.Log("doRequests: ", hostPath)
+		doVhostRequestsWithHostPath(c, authz, hostPath)
+	}
+}
+
+func doVhostRequestsWithHostPath(c *check.C, authz authorizer, hostPath string) {
 	for _, tok := range []string{
 		arvadostest.ActiveToken,
 		arvadostest.ActiveToken[:15],

commit a9b1a3fb8bcfa92735a1f70c1e982d75ae325fa4
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:31:19 2015 -0400

    5824: Modernize install page, cf. other services.

diff --git a/doc/_config.yml b/doc/_config.yml
index 1bdd2ab..d67668a 100644
--- a/doc/_config.yml
+++ b/doc/_config.yml
@@ -154,6 +154,7 @@ navbar:
       - install/create-standard-objects.html.textile.liquid
       - install/install-keepstore.html.textile.liquid
       - install/install-keepproxy.html.textile.liquid
+      - install/install-keep-web.html.textile.liquid
       - install/install-crunch-dispatch.html.textile.liquid
       - install/install-compute-node.html.textile.liquid
     - Helpful hints:
diff --git a/doc/install/install-keep-web.html.textile.liquid b/doc/install/install-keep-web.html.textile.liquid
index 4777668..51b8d71 100644
--- a/doc/install/install-keep-web.html.textile.liquid
+++ b/doc/install/install-keep-web.html.textile.liquid
@@ -1,31 +1,33 @@
 ---
 layout: default
 navsection: installguide
-title: Install download server
+title: Install the download server
 ...
 
-This installation guide assumes you are on a 64 bit Debian or Ubuntu system.
-
-The keep-web server provides read-only HTTP access to files stored in Keep. It serves public data to anonymous and unauthenticated clients, and accepts authentication via Arvados tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides SSL support.
+The keep-web server provides read-only HTTP access to files stored in Keep. It serves public data to unauthenticated clients, and serves private data to clients that supply Arvados API tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides SSL support. See the "godoc page":http://godoc.org/github.com/curoverse/arvados/services/keep-web for more detail.
 
 By convention, we use the following hostname for the download service:
 
-<div class="offset1">
-table(table table-bordered table-condensed).
-|dl. at uuid_prefix@.your.domain|
-</div>
+<notextile>
+<pre><code>dl.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>
 
 This hostname should resolve from anywhere on the internet.
 
 h2. Install keep-web
 
-First add the Arvados apt repository, and then install the keep-web package.
+On Debian-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install keep-web</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems:
 
 <notextile>
-<pre><code>~$ <span class="userinput">echo "deb http://apt.arvados.org/ wheezy main" | sudo tee /etc/apt/sources.list.d/apt.arvados.org.list</span>
-~$ <span class="userinput">sudo /usr/bin/apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7</span>
-~$ <span class="userinput">sudo /usr/bin/apt-get update</span>
-~$ <span class="userinput">sudo /usr/bin/apt-get install keep-web</span>
+<pre><code>~$ <span class="userinput">sudo yum install keep-web</span>
 </code></pre>
 </notextile>
 
@@ -34,31 +36,81 @@ Verify that @keep-web@ is functional:
 <notextile>
 <pre><code>~$ <span class="userinput">keep-web -h</span>
 Usage of keep-web:
-  -address="0.0.0.0:80": Address to listen on, "host:port".
+  -address string
+        Address to listen on: "host:port", or ":port" to listen on all interfaces. (default ":80")
+  -anonymous-token value
+        API token to try when none of the tokens provided in an HTTP request succeed in reading the desired collection. If this flag is used more than once, each token will be attempted in turn until one works. (default [])
 </code></pre>
 </notextile>
 
-We recommend running @arv-git-httpd@ under "runit":https://packages.debian.org/search?keywords=runit or something similar.
+If you intend to use Keep-web to serve public data to anonymous clients, configure it with an anonymous token. You can use the same one you used when you set up your Keepproxy server, or use the following command on the <strong>API server</strong> to create another:
+
+<notextile>
+<pre><code>/var/www/arvados-api/current/script$ <span class="userinput">RAILS_ENV=production bundle exec ./get_anonymous_user_token.rb</span>
+hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r
+</code></pre></notextile>
 
-Your @run@ script should look something like this:
+We recommend running @keep-web@ under "runit":https://packages.debian.org/search?keywords=runit or a similar supervisor. The basic command to start @keep-web@ is:
 
 <notextile>
 <pre><code>export ARVADOS_API_HOST=<span class="userinput">uuid_prefix</span>.your.domain
-exec sudo -u nobody keep-web -address=:9002 2>&1
+exec sudo -u nobody keep-web -address=<span class="userinput">:9002</span> -anonymous-token=<span class="userinput">hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r</span> 2>&1
 </code></pre>
 </notextile>
 
+Omit the @-anonymous-token@ arguments if you do not want to serve public data.
+
+Set @ARVADOS_API_HOST_INSECURE=1@ if your API server's SSL certificate is not signed by a recognized CA.
+
 h3. Set up a reverse proxy with SSL support
 
 The keep-web service will be accessible from anywhere on the internet, so we recommend using SSL for transport encryption.
 
 This is best achieved by putting a reverse proxy with SSL support in front of keep-web, running on port 443 and passing requests to keep-web on port 9002 (or whatever port you chose in your run script).
 
+Note: A wildcard SSL certificate is required in order to proxy keep-web effectively.
+
+For example, using Nginx:
+
+<notextile><pre>
+upstream keep-web {
+  server                127.0.0.1:<span class="userinput">9002</span>;
+}
+
+server {
+  listen                <span class="userinput">[your public IP address]</span>:443 ssl;
+  server_name           dl.<span class="userinput">uuid_prefix</span>.your.domain *.dl.<span class="userinput">uuid_prefix</span>.your.domain ~.*--dl.<span class="userinput">uuid_prefix</span>.your.domain;
+
+  proxy_connect_timeout 90s;
+  proxy_read_timeout    300s;
+
+  ssl                   on;
+  ssl_certificate       <span class="userinput"/>YOUR/PATH/TO/cert.pem</span>;
+  ssl_certificate_key   <span class="userinput"/>YOUR/PATH/TO/cert.key</span>;
+
+  location / {
+    proxy_pass          http://keep-web;
+    proxy_set_header    X-Forwarded-For $proxy_add_x_forwarded_for;
+  }
+}
+</pre></notextile>
+
 h3. Tell the API server about the keep-web service
 
-In your API server's config/application.yml file, add the following entry:
+If your wildcard certificate is valid for <code>*.dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, add the following entry to your API server's @config/application.yml@ file:
 
 <notextile>
-<pre><code>keep-web: dl.<span class="userinput">uuid_prefix</span>.your.domain
+<pre><code>keep-web: https://%{uuid_or_pdh}.dl.<span class="userinput">uuid_prefix</span>.your.domain
 </code></pre>
 </notextile>
+
+If your wildcard certificate is valid for <code>*.<span class="userinput">uuid_prefix</span>.your.domain</code>, add the following entry to your API server's @config/application.yml@ file. This approach requires a wildcard DNS entry covering <code>*--dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, which might be difficult to do without affecting other services, depending on your DNS server software.
+
+<notextile>
+<pre><code>keep-web: https://%{uuid_or_pdh}--dl.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>
+
+h3. Configure DNS
+
+Configure your DNS servers so <code>*--dl.<span class="userinput">uuid_prefix</span>.your.domain</code> or <code>*.dl.<span class="userinput">uuid_prefix</span>.your.domain</code> (according to the way you configured Workbench), as well as <code>dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, resolve to your Nginx proxy's public IP address.
diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index 325668f..f4f9ab5 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -4,6 +4,8 @@
 // can be installed anywhere with access to Keep services, typically
 // behind a web proxy that supports TLS.
 //
+// See http://doc.arvados.org/install/install-keep-web.html.
+//
 // Starting the server
 //
 // Serve HTTP requests at port 1234 on all interfaces:

commit 9b712695237b01dcf19aba4a403f1beedd2485b8
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:17:54 2015 -0400

    5824: Clarify difference between keepproxy and keepstore (bandwidth and convenience -- not security).

diff --git a/doc/install/install-keepproxy.html.textile.liquid b/doc/install/install-keepproxy.html.textile.liquid
index 14b252f..3b658f8 100644
--- a/doc/install/install-keepproxy.html.textile.liquid
+++ b/doc/install/install-keepproxy.html.textile.liquid
@@ -4,9 +4,9 @@ navsection: installguide
 title: Install Keepproxy server
 ...
 
-The Keepproxy server is a gateway into your Keep storage. Unlike the Keepstore servers, which are only accessible on the local LAN, Keepproxy is designed to provide secure access into Keep from anywhere on the internet.
+The Keepproxy server is a gateway into your Keep storage. Unlike the Keepstore servers, which are only accessible on the local LAN, Keepproxy is suitable for lower-bandwidth clients located elsewhere on the internet: a client sends a single copy of a data block, and Keepproxy sends copies to the appropriate Keepstore servers. Keepproxy also accepts requests from clients that do not compute data hashes before uploading data: notably, the browser-based upload feature in Workbench requires Keepproxy.
 
-By convention, we use the following hostname for the Keepproxy:
+By convention, we use the following hostname for the Keepproxy server:
 
 <div class="offset1">
 table(table table-bordered table-condensed).

commit 1fd3a57b1a4a91e70478282b882f6da55ccb43f0
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:16:26 2015 -0400

    5824: Update keepproxy usage.

diff --git a/doc/install/install-keepproxy.html.textile.liquid b/doc/install/install-keepproxy.html.textile.liquid
index 6a531a3..14b252f 100644
--- a/doc/install/install-keepproxy.html.textile.liquid
+++ b/doc/install/install-keepproxy.html.textile.liquid
@@ -36,12 +36,13 @@ Verify that Keepproxy is functional:
 
 <notextile>
 <pre><code>~$ <span class="userinput">keepproxy -h</span>
-Usage of default:
+Usage of keepproxy:
   -default-replicas=2: Default number of replicas to write if not specified by the client.
   -listen=":25107": Interface on which to listen for requests, in the format ipaddr:port. e.g. -listen=10.0.1.24:8000. Use -listen=:port to listen on all network interfaces.
   -no-get=false: If set, disable GET operations
   -no-put=false: If set, disable PUT operations
   -pid="": Path to write pid file
+  -timeout=15: Timeout on requests to internal Keep services (default 15 seconds)
 </code></pre>
 </notextile>
 
diff --git a/services/keepproxy/keepproxy.go b/services/keepproxy/keepproxy.go
index 7900096..8e734f7 100644
--- a/services/keepproxy/keepproxy.go
+++ b/services/keepproxy/keepproxy.go
@@ -37,7 +37,7 @@ func main() {
 		pidfile          string
 	)
 
-	flagset := flag.NewFlagSet("default", flag.ExitOnError)
+	flagset := flag.NewFlagSet("keepproxy", flag.ExitOnError)
 
 	flagset.StringVar(
 		&listen,

commit f60d6b260c77c03db86c3aaaeab13b863e18d9cf
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Aug 16 00:16:27 2015 -0400

    5824: Fix up error checking and early-close behavior inCollectionFileReader.

diff --git a/sdk/go/keepclient/collectionreader.go b/sdk/go/keepclient/collectionreader.go
index 5db944c..0d05b8a 100644
--- a/sdk/go/keepclient/collectionreader.go
+++ b/sdk/go/keepclient/collectionreader.go
@@ -8,6 +8,17 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/manifest"
 )
 
+const (
+	// After reading a data block from Keep, cfReader slices it up
+	// and sends the slices to a buffered channel to be consumed
+	// by the caller via Read().
+	//
+	// dataSliceSize is the maximum size of the slices, and
+	// therefore the maximum number of bytes that will be returned
+	// by a single call to Read().
+	dataSliceSize = 1 << 20
+)
+
 // ErrNoManifest indicates the given collection has no manifest
 // information (e.g., manifest_text was excluded by a "select"
 // parameter when retrieving the collection record).
@@ -40,8 +51,10 @@ func (kc *KeepClient) CollectionFileReader(collection map[string]interface{}, fi
 			}
 			q = append(q, seg)
 			r.totalSize += uint64(seg.Len)
-			// Send toGet whatever it's ready to receive.
-			Q: for len(q) > 0 {
+			// Send toGet as many segments as we can until
+			// it blocks.
+		Q:
+			for len(q) > 0 {
 				select {
 				case r.toGet <- q[0]:
 					q = q[1:]
@@ -75,84 +88,127 @@ type cfReader struct {
 	// doGet() reads FileSegments from toGet, gets the data from
 	// Keep, and sends byte slices to toRead to be consumed by
 	// Read().
-	toGet        chan *manifest.FileSegment
-	toRead       chan []byte
+	toGet chan *manifest.FileSegment
+	// toRead is a buffered channel, sized to fit one full Keep
+	// block. This lets us verify checksums without having a
+	// store-and-forward delay between blocks: by the time the
+	// caller starts receiving data from block N, cfReader is
+	// starting to fetch block N+1. A larger buffer would be
+	// useful for a caller whose read speed varies a lot.
+	toRead chan []byte
 	// bytes ready to send next time someone calls Read()
-	buf          []byte
+	buf []byte
 	// Total size of the file being read. Not safe to read this
 	// until countDone is closed.
-	totalSize    uint64
-	countDone    chan struct{}
+	totalSize uint64
+	countDone chan struct{}
 	// First error encountered.
-	err          error
+	err error
+	// errNotNil is closed IFF err contains a non-nil error.
+	// Receiving from it will block until an error occurs.
+	errNotNil chan struct{}
+	// rdrClosed is closed IFF the reader's Close() method has
+	// been called. Any goroutines associated with the reader will
+	// stop and free up resources when they notice this channel is
+	// closed.
+	rdrClosed chan struct{}
 }
 
-func (r *cfReader) Read(outbuf []byte) (n int, err error) {
-	if r.err != nil {
-		return 0, r.err
+func (r *cfReader) Read(outbuf []byte) (int, error) {
+	if r.Error() != nil {
+		return 0, r.Error()
 	}
 	for r.buf == nil || len(r.buf) == 0 {
 		var ok bool
 		r.buf, ok = <-r.toRead
-		if r.err != nil {
-			return 0, r.err
+		if r.Error() != nil {
+			return 0, r.Error()
 		} else if !ok {
 			return 0, io.EOF
 		}
 	}
+	n := len(r.buf)
 	if len(r.buf) > len(outbuf) {
 		n = len(outbuf)
-	} else {
-		n = len(r.buf)
 	}
 	copy(outbuf[:n], r.buf[:n])
 	r.buf = r.buf[n:]
-	return
+	return n, nil
 }
 
 func (r *cfReader) Close() error {
-	_, _ = <-r.countDone
-	for _ = range r.toGet {
-	}
-	for _ = range r.toRead {
+	close(r.rdrClosed)
+	return r.Error()
+}
+
+func (r *cfReader) Error() error {
+	select {
+	case <-r.errNotNil:
+		return r.err
+	default:
+		return nil
 	}
-	return r.err
 }
 
 func (r *cfReader) Len() uint64 {
 	// Wait for all segments to be counted
-	_, _ = <-r.countDone
+	<-r.countDone
 	return r.totalSize
 }
 
 func (r *cfReader) doGet() {
 	defer close(r.toRead)
+GET:
 	for fs := range r.toGet {
 		rdr, _, _, err := r.keepClient.Get(fs.Locator)
 		if err != nil {
 			r.err = err
+			close(r.errNotNil)
 			return
 		}
 		var buf = make([]byte, fs.Offset+fs.Len)
 		_, err = io.ReadFull(rdr, buf)
 		if err != nil {
 			r.err = err
+			close(r.errNotNil)
 			return
 		}
-		for bOff, bLen := fs.Offset, 1<<20; bOff <= fs.Offset+fs.Len && bLen > 0; bOff += bLen {
+		for bOff, bLen := fs.Offset, dataSliceSize; bOff <= fs.Offset+fs.Len && bLen > 0; bOff += bLen {
 			if bOff+bLen > fs.Offset+fs.Len {
 				bLen = fs.Offset + fs.Len - bOff
 			}
-			r.toRead <- buf[bOff : bOff+bLen]
+			select {
+			case r.toRead <- buf[bOff : bOff+bLen]:
+			case <-r.rdrClosed:
+				// Reader is closed: no point sending
+				// anything more to toRead.
+				break GET
+			}
+		}
+		// It is possible that r.rdrClosed is closed but we
+		// never noticed because r.toRead was also ready in
+		// every select{} above. Here we check before wasting
+		// a keepclient.Get() call.
+		select {
+		case <-r.rdrClosed:
+			break GET
+		default:
 		}
 	}
+	// In case we exited the above loop early: before returning,
+	// drain the toGet channel so its sender doesn't sit around
+	// blocking forever.
+	for _ = range r.toGet {
+	}
 }
 
 func newCFReader(kc *KeepClient) (r *cfReader) {
 	r = new(cfReader)
 	r.keepClient = kc
+	r.rdrClosed = make(chan struct{})
+	r.errNotNil = make(chan struct{})
 	r.toGet = make(chan *manifest.FileSegment, 2)
-	r.toRead = make(chan []byte)
+	r.toRead = make(chan []byte, (BLOCKSIZE+dataSliceSize-1)/dataSliceSize)
 	r.countDone = make(chan struct{})
 	go r.doGet()
 	return
diff --git a/sdk/go/keepclient/collectionreader_test.go b/sdk/go/keepclient/collectionreader_test.go
index f271208..51710b7 100644
--- a/sdk/go/keepclient/collectionreader_test.go
+++ b/sdk/go/keepclient/collectionreader_test.go
@@ -20,7 +20,8 @@ type IntegrationSuite struct{}
 
 type SuccessHandler struct {
 	disk map[string][]byte
-	lock chan struct{}
+	lock chan struct{}	// channel with buffer==1: full when an operation is in progress.
+	ops  *int		// number of operations completed
 }
 
 func (h SuccessHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
@@ -34,12 +35,18 @@ func (h SuccessHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
 		pdh := fmt.Sprintf("%x+%d", md5.Sum(buf), len(buf))
 		h.lock <- struct{}{}
 		h.disk[pdh] = buf
+		if h.ops != nil {
+			(*h.ops)++
+		}
 		<- h.lock
 		resp.Write([]byte(pdh))
 	case "GET":
 		pdh := req.URL.Path[1:]
 		h.lock <- struct{}{}
 		buf, ok := h.disk[pdh]
+		if h.ops != nil {
+			(*h.ops)++
+		}
 		<- h.lock
 		if !ok {
 			resp.WriteHeader(http.StatusNotFound)
@@ -57,6 +64,14 @@ type rdrTest struct {
 	want interface{} // error or string to expect
 }
 
+func StubWithFakeServers(kc *KeepClient, h http.Handler) {
+	localRoots := make(map[string]string)
+	for i, k := range RunSomeFakeKeepServers(h, 4) {
+		localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+	}
+	kc.SetServiceRoots(localRoots, localRoots, nil)
+}
+
 func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 	arv, err := arvadosclient.MakeArvadosClient()
 	c.Assert(err, check.IsNil)
@@ -66,12 +81,11 @@ func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 	c.Assert(err, check.IsNil)
 
 	{
-		localRoots := make(map[string]string)
-		h := SuccessHandler{disk: make(map[string][]byte), lock: make(chan struct{}, 1)}
-		for i, k := range RunSomeFakeKeepServers(h, 4) {
-			localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+		h := SuccessHandler{
+			disk: make(map[string][]byte),
+			lock: make(chan struct{}, 1),
 		}
-		kc.SetServiceRoots(localRoots, localRoots, nil)
+		StubWithFakeServers(kc, h)
 		kc.PutB([]byte("foo"))
 		kc.PutB([]byte("bar"))
 		kc.PutB([]byte("Hello world\n"))
@@ -121,3 +135,49 @@ func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 		}
 	}
 }
+
+func (s *ServerRequiredSuite) TestCollectionReaderCloseEarly(c *check.C) {
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.IsNil)
+	arv.ApiToken = arvadostest.ActiveToken
+
+	kc, err := MakeKeepClient(&arv)
+	c.Assert(err, check.IsNil)
+
+	h := SuccessHandler{
+		disk: make(map[string][]byte),
+		lock: make(chan struct{}, 1),
+		ops: new(int),
+	}
+	StubWithFakeServers(kc, h)
+	kc.PutB([]byte("foo"))
+
+	mt := ". "
+	for i := 0; i < 1000; i++ {
+		mt += "acbd18db4cc2f85cedef654fccc4a4d8+3 "
+	}
+	mt += "0:3000:foo1000.txt\n"
+
+	// Grab the stub server's lock, ensuring our cfReader doesn't
+	// get anything back from its first call to kc.Get() before we
+	// have a chance to call Close().
+	h.lock <- struct{}{}
+	opsBeforeRead := *h.ops
+
+	rdr, err := kc.CollectionFileReader(map[string]interface{}{"manifest_text": mt}, "foo1000.txt")
+	c.Assert(err, check.IsNil)
+	err = rdr.Close()
+	c.Assert(err, check.IsNil)
+	c.Assert(rdr.Error(), check.IsNil)
+
+	// Release the stub server's lock. The first GET operation will proceed.
+	<-h.lock
+
+	// doGet() should close toRead before sending any more bufs to it.
+	if what, ok := <-rdr.toRead;  ok {
+		c.Errorf("Got %+v, expected toRead to be closed", what)
+	}
+
+	// Stub should have handled exactly one GET request.
+	c.Assert(*h.ops, check.Equals, opsBeforeRead+1)
+}

commit 5cde00c12a33398eda11e069aa5ba8b89419d72f
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jul 29 03:33:19 2015 -0400

    5824: Add Content-Length header.

diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 03b3e26..30b4b64 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -239,6 +239,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 			w.Header().Set("Content-Type", t)
 		}
 	}
+	w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
 
 	w.WriteHeader(http.StatusOK)
 	_, err = io.Copy(w, rdr)

commit 12c77e30a5d36fc24f6897944914248819b2b3f4
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jul 29 01:56:55 2015 -0400

    5824: Use vhosts in curl integration tests. Add large file test.

diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
index b4d6d17..a2a5754 100644
--- a/services/keep-web/server_test.go
+++ b/services/keep-web/server_test.go
@@ -3,6 +3,9 @@ package main
 import (
 	"crypto/md5"
 	"fmt"
+	"io"
+	"io/ioutil"
+	"net"
 	"os/exec"
 	"strings"
 	"testing"
@@ -25,17 +28,17 @@ func (s *IntegrationSuite) TestNoToken(c *check.C) {
 		"",
 		"bogustoken",
 	} {
-		hdr, body := s.runCurl(c, token, "/collections/"+arvadostest.FooCollection+"/foo")
+		hdr, body, _ := s.runCurl(c, token, "dl.example.com", "/collections/"+arvadostest.FooCollection+"/foo")
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
 		c.Check(body, check.Equals, "")
 
 		if token != "" {
-			hdr, body = s.runCurl(c, token, "/collections/download/"+arvadostest.FooCollection+"/"+token+"/foo")
+			hdr, body, _ = s.runCurl(c, token, "dl.example.com", "/collections/download/"+arvadostest.FooCollection+"/"+token+"/foo")
 			c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 			c.Check(body, check.Equals, "")
 		}
 
-		hdr, body = s.runCurl(c, token, "/bad-route")
+		hdr, body, _ = s.runCurl(c, token, "dl.example.com", "/bad-route")
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 		c.Check(body, check.Equals, "")
 	}
@@ -64,12 +67,58 @@ func (s *IntegrationSuite) Test404(c *check.C) {
 		"/collections/" + arvadostest.NonexistentCollection + "/theperthcountyconspiracy",
 		"/collections/download/" + arvadostest.NonexistentCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
 	} {
-		hdr, body := s.runCurl(c, arvadostest.ActiveToken, uri)
+		hdr, body, _ := s.runCurl(c, arvadostest.ActiveToken, "dl.example.com", uri)
 		c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
 		c.Check(body, check.Equals, "")
 	}
 }
 
+func (s *IntegrationSuite) Test1GBFile(c *check.C) {
+	if testing.Short() {
+		c.Skip("skipping 1GB integration test in short mode")
+	}
+	s.test100BlockFile(c, 10000000)
+}
+
+func (s *IntegrationSuite) Test300MBFile(c *check.C) {
+	s.test100BlockFile(c, 3000000)
+}
+
+func (s *IntegrationSuite) test100BlockFile(c *check.C, blocksize int) {
+	testdata := make([]byte, blocksize)
+	for i := 0; i < blocksize; i++ {
+		testdata[i] = byte(' ')
+	}
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.Equals, nil)
+	arv.ApiToken = arvadostest.ActiveToken
+	kc, err := keepclient.MakeKeepClient(&arv)
+	c.Assert(err, check.Equals, nil)
+	loc, _, err := kc.PutB(testdata[:])
+	c.Assert(err, check.Equals, nil)
+	mtext := "."
+	for i := 0; i < 100; i++ {
+		mtext = mtext + " " + loc
+	}
+	mtext = mtext + fmt.Sprintf(" 0:%d00:testdata.bin\n", blocksize)
+	coll := map[string]interface{}{}
+	err = arv.Create("collections",
+		map[string]interface{}{
+			"collection": map[string]interface{}{
+				"name": fmt.Sprintf("testdata blocksize=%d", blocksize),
+				"manifest_text": mtext,
+			},
+		}, &coll)
+	c.Assert(err, check.Equals, nil)
+	uuid := coll["uuid"].(string)
+
+	hdr, body, size := s.runCurl(c, arv.ApiToken, uuid + ".dl.example.com", "/testdata.bin")
+	c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+	c.Check(hdr, check.Matches, `(?si).*Content-length: `+fmt.Sprintf("%d00", blocksize)+`\r\n.*`)
+	c.Check([]byte(body)[:1234], check.DeepEquals, testdata[:1234])
+	c.Check(size, check.Equals, int64(blocksize)*100)
+}
+
 func (s *IntegrationSuite) Test200(c *check.C) {
 	anonymousTokens = []string{arvadostest.AnonymousToken}
 	arv, err := arvadosclient.MakeArvadosClient()
@@ -86,19 +135,13 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 		{"tokensobogus", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
 		{arvadostest.ActiveToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
 		{arvadostest.AnonymousToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		// Anonymously accessible user agreement. These should
-		// start working when CollectionFileReader provides
-		// real data instead of fake/stub data.
+		// Anonymously accessible user agreement.
 		{"", "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 		{arvadostest.ActiveToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 		{arvadostest.SpectatorToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 		{arvadostest.SpectatorToken, "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 	} {
-		hdr, body := s.runCurl(c, spec[0], spec[1])
-		if strings.HasPrefix(hdr, "HTTP/1.1 501 Not Implemented\r\n") && body == "" {
-			c.Log("Not implemented!")
-			continue
-		}
+		hdr, body, _ := s.runCurl(c, spec[0], "dl.example.com", spec[1])
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
 		if strings.HasSuffix(spec[1], ".txt") {
 			c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
@@ -111,15 +154,34 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 }
 
 // Return header block and body.
-func (s *IntegrationSuite) runCurl(c *check.C, token, uri string, args ...string) (hdr, body string) {
+func (s *IntegrationSuite) runCurl(c *check.C, token, host, uri string, args ...string) (hdr, bodyPart string, bodySize int64) {
 	curlArgs := []string{"--silent", "--show-error", "--include"}
+	testHost, testPort, _ := net.SplitHostPort(s.testServer.Addr)
+	curlArgs = append(curlArgs, "--resolve", host + ":" + testPort + ":" + testHost)
 	if token != "" {
 		curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
 	}
 	curlArgs = append(curlArgs, args...)
-	curlArgs = append(curlArgs, "http://"+s.testServer.Addr+uri)
+	curlArgs = append(curlArgs, "http://"+host+":"+testPort+uri)
 	c.Log(fmt.Sprintf("curlArgs == %#v", curlArgs))
-	output, err := exec.Command("curl", curlArgs...).CombinedOutput()
+	cmd := exec.Command("curl", curlArgs...)
+	stdout, err := cmd.StdoutPipe()
+	c.Assert(err, check.Equals, nil)
+	cmd.Stderr = cmd.Stdout
+	go cmd.Start()
+	buf := make([]byte, 2<<27)
+	n, err := io.ReadFull(stdout, buf)
+	// Discard (but measure size of) anything past 128 MiB.
+	var discarded int64
+	if err == io.ErrUnexpectedEOF {
+		err = nil
+		buf = buf[:n]
+	} else {
+		c.Assert(err, check.Equals, nil)
+		discarded, err = io.Copy(ioutil.Discard, stdout)
+		c.Assert(err, check.Equals, nil)
+	}
+	err = cmd.Wait()
 	// Without "-f", curl exits 0 as long as it gets a valid HTTP
 	// response from the server, even if the response status
 	// indicates that the request failed. In our test suite, we
@@ -127,10 +189,11 @@ func (s *IntegrationSuite) runCurl(c *check.C, token, uri string, args ...string
 	// headers ourselves. If curl exits non-zero, our testing
 	// environment is broken.
 	c.Assert(err, check.Equals, nil)
-	hdrsAndBody := strings.SplitN(string(output), "\r\n\r\n", 2)
+	hdrsAndBody := strings.SplitN(string(buf), "\r\n\r\n", 2)
 	c.Assert(len(hdrsAndBody), check.Equals, 2)
 	hdr = hdrsAndBody[0]
-	body = hdrsAndBody[1]
+	bodyPart = hdrsAndBody[1]
+	bodySize = int64(len(bodyPart)) + discarded
 	return
 }
 

commit 3d5693f7261a52cfa6eca54f054a43e7f5d049f9
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Jul 23 01:20:28 2015 -0400

    5824: Support vhost-based collection lookups.

diff --git a/sdk/go/arvadostest/fixtures.go b/sdk/go/arvadostest/fixtures.go
index 87b28f8..3040e0a 100644
--- a/sdk/go/arvadostest/fixtures.go
+++ b/sdk/go/arvadostest/fixtures.go
@@ -7,6 +7,8 @@ const (
 	FooCollection         = "zzzzz-4zz18-fy296fx3hot09f7"
 	NonexistentCollection = "zzzzz-4zz18-totallynotexist"
 	HelloWorldCollection  = "zzzzz-4zz18-4en62shvi99lxd4"
+	FooPdh                = "1f4b0bc7583c2a7f9102c395f4ffc5e3+45"
+	HelloWorldPdh         = "55713e6a34081eb03609e7ad5fcad129+62"
 	PathologicalManifest  = ". acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 73feffa4b7f6bb68e44cf984c85f6e88+3+Z+K at xyzzy acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero at 0 0:1:f 1:0:zero at 1 1:4:ooba 4:0:zero at 4 5:1:r 5:4:rbaz 9:0:zero at 9\n" +
 		"./overlapReverse acbd18db4cc2f85cedef654fccc4a4d8+3 acbd18db4cc2f85cedef654fccc4a4d8+3 5:1:o 4:2:oo 2:4:ofoo\n" +
 		"./segmented acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 0:1:frob 5:1:frob 1:1:frob 1:2:oof 0:1:oof 5:0:frob 3:1:frob\n" +
diff --git a/sdk/go/auth/auth.go b/sdk/go/auth/auth.go
index 4a719e9..41cfb99 100644
--- a/sdk/go/auth/auth.go
+++ b/sdk/go/auth/auth.go
@@ -1,6 +1,7 @@
 package auth
 
 import (
+	"encoding/base64"
 	"net/http"
 	"net/url"
 	"strings"
@@ -20,6 +21,15 @@ func NewCredentialsFromHTTPRequest(r *http.Request) *Credentials {
 	return c
 }
 
+// EncodeTokenCookie accepts a token and returns a byte slice suitable
+// for use as a cookie value, such that it will be decoded correctly
+// by LoadTokensFromHTTPRequest.
+var EncodeTokenCookie func([]byte) string = base64.URLEncoding.EncodeToString
+
+// DecodeTokenCookie accepts a cookie value and returns the encoded
+// token.
+var DecodeTokenCookie func(string) ([]byte, error) = base64.URLEncoding.DecodeString
+
 // LoadTokensFromHttpRequest loads all tokens it can find in the
 // headers and query string of an http query.
 func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
@@ -51,6 +61,8 @@ func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
 		a.Tokens = append(a.Tokens, val...)
 	}
 
+	a.loadTokenFromCookie(r)
+
 	// TODO: Load token from Rails session cookie (if Rails site
 	// secret is known)
 }
@@ -59,3 +71,15 @@ func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
 // LoadTokensFromHttpRequest() that [or how] we should read and parse
 // the request body. This has to be requested explicitly by the
 // application.
+
+func (a *Credentials) loadTokenFromCookie(r *http.Request) {
+	cookie, err := r.Cookie("api_token")
+	if err != nil || len(cookie.Value) == 0 {
+		return
+	}
+	token, err := DecodeTokenCookie(cookie.Value)
+	if err != nil {
+		return
+	}
+	a.Tokens = append(a.Tokens, string(token))
+}
diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index dbf4f5b..325668f 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -1,28 +1,158 @@
 // Keep-web provides read-only HTTP access to files stored in Keep. It
 // serves public data to anonymous and unauthenticated clients, and
-// accepts authentication via Arvados tokens. It can be installed
-// anywhere with access to Keep services, typically behind a web proxy
-// that provides SSL support.
+// serves private data to clients that supply Arvados API tokens. It
+// can be installed anywhere with access to Keep services, typically
+// behind a web proxy that supports TLS.
 //
-// Given that this amounts to a web hosting service for arbitrary
-// content, it is vital to ensure that at least one of the following is
-// true:
+// Starting the server
 //
-// Usage
-//
-// Listening:
+// Serve HTTP requests at port 1234 on all interfaces:
 //
 //   keep-web -address=:1234
 //
-// Start an HTTP server on port 1234.
+// Serve HTTP requests at port 1234 on the interface with IP address 1.2.3.4:
 //
 //   keep-web -address=1.2.3.4:1234
 //
-// Start an HTTP server on port 1234, on the interface with IP address 1.2.3.4.
+// Proxy configuration
 //
 // Keep-web does not support SSL natively. Typically, it is installed
 // behind a proxy like nginx.
 //
+// Here is an example nginx configuration.
+//
+//	http {
+//	  upstream keep-web {
+//	    server localhost:1234;
+//	  }
+//	  server {
+//	    listen *:443 ssl;
+//	    server_name dl.example.com *.dl.example.com ~.*--dl.example.com;
+//	    ssl_certificate /root/wildcard.example.com.crt;
+//	    ssl_certificate_key /root/wildcard.example.com.key;
+//	    location  / {
+//	      proxy_pass http://keep-web;
+//	      proxy_set_header Host $host;
+//	      proxy_set_header X-Forwarded-For $remote_addr;
+//	    }
+//	  }
+//	}
+//
+// It is not necessary to run keep-web on the same host as the nginx
+// proxy. However, TLS is not used between nginx and keep-web, so
+// intervening networks must be secured by other means.
+//
+// Download URLs
+//
+// The following "same origin" URL patterns are supported for public
+// collections (i.e., collections which can be served by keep-web
+// without making use of any credentials supplied by the client). See
+// "Same-origin mode" below.
+//
+//   http://dl.example.com/c=uuid_or_pdh/path/file.txt
+//   http://dl.example.com/c=uuid_or_pdh/path/t=TOKEN/file.txt
+//
+// The following "multiple origin" URL patterns are supported for all
+// collections:
+//
+//   http://uuid_or_pdh--dl.example.com/path/file.txt
+//   http://uuid_or_pdh--dl.example.com/t=/path/file.txt
+//   http://uuid_or_pdh--dl.example.com/t=TOKEN/path/file.txt
+//
+// In the "multiple origin" form, the string "--" can be replaced with
+// "." with identical results (assuming the upstream proxy is
+// configured accordingly). These two are equivalent:
+//
+//   http://uuid_or_pdh--dl.example.com/path/file.txt
+//   http://uuid_or_pdh.dl.example.com/path/file.txt
+//
+// The first form minimizes the cost and effort of deploying a
+// wildcard TLS certificate for *.dl.example.com. The second form is
+// likely to be easier to configure, and more efficient to run, on an
+// upstream proxy.
+//
+// In all of the above forms, the "dl.example.com" part can be
+// anything at all.
+//
+// In all of the above forms, the "uuid_or_pdh" part can be either a
+// collection UUID or a portable data hash with the "+" character
+// replaced by "-".
+//
+// Assuming there is a collection with UUID
+// zzzzz-4zz18-znfnqtbbv4spc3w and portable data hash
+// 1f4b0bc7583c2a7f9102c395f4ffc5e3+45, the following URLs are
+// interchangeable:
+//
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/t=/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/t=/foo
+//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--foo.example.com/foo
+//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--.invalid/foo
+//
+// Authorization mechanisms
+//
+// A token can be provided in an Authorization header:
+//
+//   Authorization: OAuth2 o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// A base64-encoded token can be provided in a cookie named "api_token":
+//
+//   Cookie: api_token=bzA3ajRweDdSbEpLNEN1TVlwN0MwTERUNEN6UjFKMXFCRTVBdm83ZUNjVWpPVGlreEs=
+//
+// A token can be provided in an URL-encoded query string:
+//
+//   GET /foo.txt?api_token=o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// A suitably encoded token can be provided in a POST body if the
+// request has a content type of application/x-www-form-urlencoded or
+// multipart/form-data:
+//
+//   POST /foo.txt
+//   Content-Type: application/x-www-form-urlencoded
+//   [...]
+//   api_token=o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// If a token is provided in a query string or in a POST request, the
+// response is an HTTP 303 redirect to an equivalent GET request, with
+// the token stripped from the query string and added to a cookie
+// instead.
+//
+// Compatibility
+//
+// Client-provided authorization tokens are ignored if the client does
+// not provide a Host header.
+//
+// In order to use the query string or a POST form authorization
+// mechanisms, the client must follow 303 redirects; the client must
+// accept cookies with a 303 response and send those cookies when
+// performing the redirect; and either the client or an intervening
+// proxy must resolve a relative URL ("//host/path") if given in a
+// response Location header.
+//
+// Intranet mode
+//
+// Normally, Keep-web accepts requests for multiple collections using
+// the same host name, provided the client's credentials are not being
+// used. This provides insufficient XSS protection in an installation
+// where the "anonymously accessible" data is not truly public, but
+// merely protected by network topology.
+//
+// In such cases -- for example, a site which is not reachable from
+// the internet, where some data is world-readable from Arvados's
+// perspective but is intended to be available only to users within
+// the local network -- the upstream proxy should configured to return
+// 401 for all paths beginning with "/c=".
+//
+// Same-origin mode
+//
+// Without the same-origin protection outlined above, a web page
+// stored in collection X could execute JavaScript code that uses the
+// current viewer's credentials to download additional data from
+// collection Y -- data which is accessible to the current viewer, but
+// not to the author of collection X -- from the same origin
+// (``https://dl.example.com/'') and upload it to some other site
+// chosen by the author of collection X.
+//
 package main
 
 // TODO(TC): Implement
@@ -31,7 +161,7 @@ package main
 //
 // Normally, Keep-web is installed using a wildcard DNS entry and a
 // wildcard HTTPS certificate, serving data from collection X at
-// ``https://X.dl.example.com/path/file.ext''.
+// ``https://X--dl.example.com/path/file.ext''.
 //
 // It will also serve publicly accessible data at
 // ``https://dl.example.com/collections/X/path/file.txt'', but it does not
@@ -48,10 +178,4 @@ package main
 //
 //   keep-web -trust-all-content [...]
 //
-// In the general case, this should not be enabled: A web page stored
-// in collection X can execute JavaScript code that uses the current
-// viewer's credentials to download additional data -- data which is
-// accessible to the current viewer, but not to the author of
-// collection X -- from the same origin (``https://dl.example.com/'')
-// and upload it to some other site chosen by the author of collection
-// X.
+// In the general case, this should not be enabled: 
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 04af920..03b3e26 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -2,11 +2,14 @@ package main
 
 import (
 	"fmt"
+	"html"
 	"io"
 	"mime"
 	"net/http"
+	"net/url"
 	"os"
 	"strings"
+	"time"
 
 	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
 	"git.curoverse.com/arvados.git/sdk/go/auth"
@@ -25,26 +28,49 @@ func init() {
 	anonymousTokens = []string{}
 }
 
+// return s if s is a UUID or a PDH, otherwise ""
+func parseCollectionIdFromDNSName(s string) string {
+	// Strip domain.
+	if i := strings.IndexRune(s, '.'); i >= 0 {
+		s = s[:i]
+	}
+	// Names like {uuid}--dl.example.com serve the same purpose as
+	// {uuid}.dl.example.com but can reduce cost/effort of using
+	// [additional] wildcard certificates.
+	if i := strings.Index(s, "--"); i >= 0 {
+		s = s[:i]
+	}
+	if !arvadosclient.UUIDMatch(s) && !arvadosclient.PDHMatch(s) {
+		return ""
+	}
+	return s
+}
+
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
-	var statusCode int
+	var statusCode = 0
 	var statusText string
 
 	w := httpserver.WrapResponseWriter(wOrig)
 	defer func() {
-		if statusCode > 0 {
-			if w.WroteStatus() == 0 {
-				w.WriteHeader(statusCode)
-			} else {
-				httpserver.Log(r.RemoteAddr, "WARNING",
-					fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
-			}
+		if statusCode == 0 {
+			statusCode = w.WroteStatus()
+		} else if w.WroteStatus() == 0 {
+			w.WriteHeader(statusCode)
+		} else if w.WroteStatus() != statusCode {
+			httpserver.Log(r.RemoteAddr, "WARNING",
+				fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
 		}
 		if statusText == "" {
 			statusText = http.StatusText(statusCode)
 		}
-		httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.URL.Path)
+		httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
 	}()
 
+	if r.Method != "GET" && r.Method != "POST" {
+		statusCode, statusText = http.StatusMethodNotAllowed, r.Method
+		return
+	}
+
 	arv := clientPool.Get()
 	if arv == nil {
 		statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
@@ -54,17 +80,70 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 
 	pathParts := strings.Split(r.URL.Path[1:], "/")
 
-	if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
-		statusCode = http.StatusNotFound
-		return
-	}
-
 	var targetId string
 	var targetPath []string
 	var tokens []string
 	var reqTokens []string
 	var pathToken bool
-	if len(pathParts) >= 5 && pathParts[1] == "download" {
+
+	if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
+		// "http://{id}.domain.example.com/{path}" form
+		if t := r.FormValue("api_token"); t != "" {
+			// ...with explicit token in query string or
+			// form in POST body. We must encrypt the
+			// token such that it can only be used for
+			// this collection; put it in an HttpOnly
+			// cookie; and redirect to the same URL with
+			// the query param redacted, and method =
+			// GET.
+			//
+			// The HttpOnly flag is necessary to prevent
+			// JavaScript code (included in, or loaded by,
+			// a page in the collection being served) from
+			// employing the user's token beyond reading
+			// other files in the same domain, i.e., same
+			// the collection.
+			//
+			// The 303 redirect is necessary in the case
+			// of a GET request to avoid exposing the
+			// token in the Location bar, and in the case
+			// of a POST request to avoid raising warnings
+			// when the user refreshes the resulting page.
+			http.SetCookie(w, &http.Cookie{
+				Name:    "api_token",
+				Value:   auth.EncodeTokenCookie([]byte(t)),
+				Path:    "/",
+				Expires: time.Now().AddDate(10,0,0),
+			})
+			redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
+
+			w.Header().Add("Location", redir)
+			statusCode, statusText = http.StatusSeeOther, redir
+			w.WriteHeader(statusCode)
+			io.WriteString(w, `<A href="`)
+			io.WriteString(w, html.EscapeString(redir))
+			io.WriteString(w, `">Continue</A>`)
+			return
+		} else if strings.HasPrefix(pathParts[0], "t=") {
+			// ...with explicit token in path,
+			// "{...}.com/t={token}/{path}".  This form
+			// must only be used to pass scoped tokens
+			// that give permission for a single
+			// collection. See FormValue case above.
+			tokens = []string{pathParts[0][2:]}
+			targetPath = pathParts[1:]
+			pathToken = true
+		} else {
+			// ...with cookie, Authorization header, or
+			// no token at all
+			reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+			tokens = append(reqTokens, anonymousTokens...)
+			targetPath = pathParts
+		}
+	} else if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
+		statusCode = http.StatusNotFound
+		return
+	} else if len(pathParts) >= 5 && pathParts[1] == "download" {
 		// "/collections/download/{id}/{token}/path..." form:
 		// Don't use our configured anonymous tokens,
 		// Authorization headers, etc.  Just use the token in
@@ -86,7 +165,6 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	found := false
 	for _, arv.ApiToken = range tokens {
 		err := arv.Get("collections", targetId, nil, &collection)
-		httpserver.Log(err)
 		if err == nil {
 			// Success
 			found = true
@@ -131,8 +209,8 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		// someone trying (anonymously) to download public
 		// data that has been deleted.  Allow a referrer to
 		// provide this context somehow?
-		statusCode = http.StatusUnauthorized
 		w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"")
+		statusCode = http.StatusUnauthorized
 		return
 	}
 
@@ -162,6 +240,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		}
 	}
 
+	w.WriteHeader(http.StatusOK)
 	_, err = io.Copy(w, rdr)
 	if err != nil {
 		statusCode, statusText = http.StatusBadGateway, err.Error()
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
new file mode 100644
index 0000000..a1f5e1a
--- /dev/null
+++ b/services/keep-web/handler_test.go
@@ -0,0 +1,218 @@
+package main
+
+import (
+	"html"
+	"io/ioutil"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"regexp"
+	"strings"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	"git.curoverse.com/arvados.git/sdk/go/auth"
+	check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&UnitSuite{})
+
+type UnitSuite struct {}
+
+func mustParseURL(s string) *url.URL {
+	r, err := url.Parse(s)
+	if err != nil {
+		panic("parse URL: " + s)
+	}
+	return r
+}
+
+func (s *IntegrationSuite) TestVhost404(c *check.C) {
+	for _, testURL := range []string{
+		arvadostest.NonexistentCollection + ".example.com/theperthcountyconspiracy",
+		arvadostest.NonexistentCollection + ".example.com/t=" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
+	} {
+		resp := httptest.NewRecorder()
+		req := &http.Request{
+			Method: "GET",
+			URL: mustParseURL(testURL),
+		}
+		(&handler{}).ServeHTTP(resp, req)
+		c.Check(resp.Code, check.Equals, http.StatusNotFound)
+		c.Check(resp.Body.String(), check.Equals, "")
+	}
+}
+
+type authorizer func(*http.Request, string) int
+
+func (s *IntegrationSuite) TestVhostViaAuthzHeader(c *check.C) {
+	doVhostRequests(c, authzViaAuthzHeader)
+}
+func authzViaAuthzHeader(r *http.Request, tok string) int {
+	r.Header.Add("Authorization", "OAuth2 " + tok)
+	return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaCookieValue(c *check.C) {
+	doVhostRequests(c, authzViaCookieValue)
+}
+func authzViaCookieValue(r *http.Request, tok string) int {
+	r.AddCookie(&http.Cookie{
+		Name: "api_token",
+		Value: auth.EncodeTokenCookie([]byte(tok)),
+	})
+	return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaPath(c *check.C) {
+	doVhostRequests(c, authzViaPath)
+}
+func authzViaPath(r *http.Request, tok string) int {
+	r.URL.Path = "/t=" + tok + r.URL.Path
+	return http.StatusNotFound
+}
+
+func (s *IntegrationSuite) TestVhostViaQueryString(c *check.C) {
+	doVhostRequests(c, authzViaQueryString)
+}
+func authzViaQueryString(r *http.Request, tok string) int {
+	r.URL.RawQuery = "api_token=" + tok
+	return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaPOST(c *check.C) {
+	doVhostRequests(c, authzViaPOST)
+}
+func authzViaPOST(r *http.Request, tok string) int {
+	r.Method = "POST"
+	r.Header.Add("Content-Type", "application/x-www-form-urlencoded")
+	r.Body = ioutil.NopCloser(strings.NewReader(
+		url.Values{"api_token": {tok}}.Encode()))
+	return http.StatusUnauthorized
+}
+
+// Try some combinations of {url, token} using the given authorization
+// mechanism, and verify the result is correct.
+func doVhostRequests(c *check.C, authz authorizer) {
+	hostPath := arvadostest.FooCollection + ".example.com/foo"
+	for _, tok := range []string{
+		arvadostest.ActiveToken,
+		arvadostest.ActiveToken[:15],
+		arvadostest.SpectatorToken,
+		"bogus",
+		"",
+	} {
+		u := mustParseURL("http://" + hostPath)
+		req := &http.Request{
+			Method: "GET",
+			Host: u.Host,
+			URL: u,
+			Header: http.Header{},
+		}
+		failCode := authz(req, tok)
+		resp := doReq(req)
+		code, body := resp.Code, resp.Body.String()
+		if tok == arvadostest.ActiveToken {
+			c.Check(code, check.Equals, http.StatusOK)
+			c.Check(body, check.Equals, "foo")
+		} else {
+			c.Check(code >= 400, check.Equals, true)
+			c.Check(code < 500, check.Equals, true)
+			if tok == arvadostest.SpectatorToken {
+				// Valid token never offers to retry
+				// with different credentials.
+				c.Check(code, check.Equals, http.StatusNotFound)
+			} else {
+				// Invalid token can ask to retry
+				// depending on the authz method.
+				c.Check(code, check.Equals, failCode)
+			}
+			c.Check(body, check.Equals, "")
+		}
+	}
+}
+
+func doReq(req *http.Request) *httptest.ResponseRecorder {
+	resp := httptest.NewRecorder()
+	(&handler{}).ServeHTTP(resp, req)
+	if resp.Code != http.StatusSeeOther {
+		return resp
+	}
+	cookies := (&http.Response{Header: resp.Header()}).Cookies()
+	u, _ := req.URL.Parse(resp.Header().Get("Location"))
+	req = &http.Request{
+		Method: "GET",
+		Host: u.Host,
+		URL: u,
+		Header: http.Header{},
+	}
+	for _, c := range cookies {
+		req.AddCookie(c)
+	}
+	return doReq(req)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
+	s.testVhostRedirectTokenToCookie(c, "GET",
+		arvadostest.FooCollection + ".example.com/foo",
+		"?api_token=" + arvadostest.ActiveToken,
+		"text/plain",
+		"",
+		http.StatusOK,
+	)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
+	s.testVhostRedirectTokenToCookie(c, "POST",
+		arvadostest.FooCollection + ".example.com/foo",
+		"",
+		"application/x-www-form-urlencoded",
+		url.Values{"api_token": {arvadostest.ActiveToken}}.Encode(),
+		http.StatusOK,
+	)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C) {
+	s.testVhostRedirectTokenToCookie(c, "POST",
+		arvadostest.FooCollection + ".example.com/foo",
+		"",
+		"application/x-www-form-urlencoded",
+		url.Values{"api_token": {arvadostest.SpectatorToken}}.Encode(),
+		http.StatusNotFound,
+	)
+}
+
+func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString, contentType, body string, expectStatus int) {
+	u, _ := url.Parse(`http://` + hostPath + queryString)
+	req := &http.Request{
+		Method: method,
+		Host: u.Host,
+		URL: u,
+		Header: http.Header{"Content-Type": {contentType}},
+		Body: ioutil.NopCloser(strings.NewReader(body)),
+	}
+
+	resp := httptest.NewRecorder()
+	(&handler{}).ServeHTTP(resp, req)
+	c.Assert(resp.Code, check.Equals, http.StatusSeeOther)
+	c.Check(resp.Body.String(), check.Matches, `.*href="//` + regexp.QuoteMeta(html.EscapeString(hostPath)) + `".*`)
+	cookies := (&http.Response{Header: resp.Header()}).Cookies()
+
+	u, _ = u.Parse(resp.Header().Get("Location"))
+	req = &http.Request{
+		Method: "GET",
+		Host: u.Host,
+		URL: u,
+		Header: http.Header{},
+	}
+	for _, c := range cookies {
+		req.AddCookie(c)
+	}
+
+	resp = httptest.NewRecorder()
+	(&handler{}).ServeHTTP(resp, req)
+	c.Check(resp.Header().Get("Location"), check.Equals, "")
+	c.Check(resp.Code, check.Equals, expectStatus)
+	if expectStatus == http.StatusOK {
+		c.Check(resp.Body.String(), check.Equals, "foo")
+	}
+}

commit b65d8b9008c4d0e6b5816d21bf6f1ae81167ee56
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jun 24 23:33:08 2015 -0400

    5824: add (*KeepClient)CollectionFileReader()

diff --git a/sdk/go/arvadostest/fixtures.go b/sdk/go/arvadostest/fixtures.go
new file mode 100644
index 0000000..87b28f8
--- /dev/null
+++ b/sdk/go/arvadostest/fixtures.go
@@ -0,0 +1,17 @@
+package arvadostest
+
+const (
+	SpectatorToken        = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
+	ActiveToken           = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
+	AnonymousToken        = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
+	FooCollection         = "zzzzz-4zz18-fy296fx3hot09f7"
+	NonexistentCollection = "zzzzz-4zz18-totallynotexist"
+	HelloWorldCollection  = "zzzzz-4zz18-4en62shvi99lxd4"
+	PathologicalManifest  = ". acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 73feffa4b7f6bb68e44cf984c85f6e88+3+Z+K at xyzzy acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero at 0 0:1:f 1:0:zero at 1 1:4:ooba 4:0:zero at 4 5:1:r 5:4:rbaz 9:0:zero at 9\n" +
+		"./overlapReverse acbd18db4cc2f85cedef654fccc4a4d8+3 acbd18db4cc2f85cedef654fccc4a4d8+3 5:1:o 4:2:oo 2:4:ofoo\n" +
+		"./segmented acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 0:1:frob 5:1:frob 1:1:frob 1:2:oof 0:1:oof 5:0:frob 3:1:frob\n" +
+		`./foo\040b\141r acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:baz` + "\n" +
+		`./foo\040b\141r acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:b\141z\040w\141z` + "\n" +
+		"./foo acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero 0:3:foo\n" +
+		". acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:foo/zero 0:3:foo/foo\n"
+)
diff --git a/sdk/go/keepclient/collectionreader.go b/sdk/go/keepclient/collectionreader.go
new file mode 100644
index 0000000..5db944c
--- /dev/null
+++ b/sdk/go/keepclient/collectionreader.go
@@ -0,0 +1,159 @@
+package keepclient
+
+import (
+	"errors"
+	"io"
+	"os"
+
+	"git.curoverse.com/arvados.git/sdk/go/manifest"
+)
+
+// ErrNoManifest indicates the given collection has no manifest
+// information (e.g., manifest_text was excluded by a "select"
+// parameter when retrieving the collection record).
+var ErrNoManifest = errors.New("Collection has no manifest")
+
+// CollectionFileReader returns an io.Reader that reads file content
+// from a collection. The filename must be given relative to the root
+// of the collection, without a leading "./".
+func (kc *KeepClient) CollectionFileReader(collection map[string]interface{}, filename string) (*cfReader, error) {
+	mText, ok := collection["manifest_text"].(string)
+	if !ok {
+		return nil, ErrNoManifest
+	}
+	m := manifest.Manifest{Text: mText}
+	rdrChan := make(chan *cfReader)
+	go func() {
+		// q is a queue of FileSegments that we have received but
+		// haven't yet been able to send to toGet.
+		var q []*manifest.FileSegment
+		var r *cfReader
+		for seg := range m.FileSegmentIterByName(filename) {
+			if r == nil {
+				// We've just discovered that the
+				// requested filename does appear in
+				// the manifest, so we can return a
+				// real reader (not nil) from
+				// CollectionFileReader().
+				r = newCFReader(kc)
+				rdrChan <- r
+			}
+			q = append(q, seg)
+			r.totalSize += uint64(seg.Len)
+			// Send toGet whatever it's ready to receive.
+			Q: for len(q) > 0 {
+				select {
+				case r.toGet <- q[0]:
+					q = q[1:]
+				default:
+					break Q
+				}
+			}
+		}
+		if r == nil {
+			// File not found
+			rdrChan <- nil
+			return
+		}
+		close(r.countDone)
+		for _, seg := range q {
+			r.toGet <- seg
+		}
+		close(r.toGet)
+	}()
+	// Before returning a reader, wait until we know whether the
+	// file exists here:
+	r := <-rdrChan
+	if r == nil {
+		return nil, os.ErrNotExist
+	}
+	return r, nil
+}
+
+type cfReader struct {
+	keepClient *KeepClient
+	// doGet() reads FileSegments from toGet, gets the data from
+	// Keep, and sends byte slices to toRead to be consumed by
+	// Read().
+	toGet        chan *manifest.FileSegment
+	toRead       chan []byte
+	// bytes ready to send next time someone calls Read()
+	buf          []byte
+	// Total size of the file being read. Not safe to read this
+	// until countDone is closed.
+	totalSize    uint64
+	countDone    chan struct{}
+	// First error encountered.
+	err          error
+}
+
+func (r *cfReader) Read(outbuf []byte) (n int, err error) {
+	if r.err != nil {
+		return 0, r.err
+	}
+	for r.buf == nil || len(r.buf) == 0 {
+		var ok bool
+		r.buf, ok = <-r.toRead
+		if r.err != nil {
+			return 0, r.err
+		} else if !ok {
+			return 0, io.EOF
+		}
+	}
+	if len(r.buf) > len(outbuf) {
+		n = len(outbuf)
+	} else {
+		n = len(r.buf)
+	}
+	copy(outbuf[:n], r.buf[:n])
+	r.buf = r.buf[n:]
+	return
+}
+
+func (r *cfReader) Close() error {
+	_, _ = <-r.countDone
+	for _ = range r.toGet {
+	}
+	for _ = range r.toRead {
+	}
+	return r.err
+}
+
+func (r *cfReader) Len() uint64 {
+	// Wait for all segments to be counted
+	_, _ = <-r.countDone
+	return r.totalSize
+}
+
+func (r *cfReader) doGet() {
+	defer close(r.toRead)
+	for fs := range r.toGet {
+		rdr, _, _, err := r.keepClient.Get(fs.Locator)
+		if err != nil {
+			r.err = err
+			return
+		}
+		var buf = make([]byte, fs.Offset+fs.Len)
+		_, err = io.ReadFull(rdr, buf)
+		if err != nil {
+			r.err = err
+			return
+		}
+		for bOff, bLen := fs.Offset, 1<<20; bOff <= fs.Offset+fs.Len && bLen > 0; bOff += bLen {
+			if bOff+bLen > fs.Offset+fs.Len {
+				bLen = fs.Offset + fs.Len - bOff
+			}
+			r.toRead <- buf[bOff : bOff+bLen]
+		}
+	}
+}
+
+func newCFReader(kc *KeepClient) (r *cfReader) {
+	r = new(cfReader)
+	r.keepClient = kc
+	r.toGet = make(chan *manifest.FileSegment, 2)
+	r.toRead = make(chan []byte)
+	r.countDone = make(chan struct{})
+	go r.doGet()
+	return
+}
diff --git a/sdk/go/keepclient/collectionreader_test.go b/sdk/go/keepclient/collectionreader_test.go
new file mode 100644
index 0000000..f271208
--- /dev/null
+++ b/sdk/go/keepclient/collectionreader_test.go
@@ -0,0 +1,123 @@
+package keepclient
+
+import (
+	"crypto/md5"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"net/http"
+	"os"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&IntegrationSuite{})
+
+// IntegrationSuite tests need an API server
+type IntegrationSuite struct{}
+
+type SuccessHandler struct {
+	disk map[string][]byte
+	lock chan struct{}
+}
+
+func (h SuccessHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+	switch req.Method {
+	case "PUT":
+		buf, err := ioutil.ReadAll(req.Body)
+		if err != nil {
+			resp.WriteHeader(500)
+			return
+		}
+		pdh := fmt.Sprintf("%x+%d", md5.Sum(buf), len(buf))
+		h.lock <- struct{}{}
+		h.disk[pdh] = buf
+		<- h.lock
+		resp.Write([]byte(pdh))
+	case "GET":
+		pdh := req.URL.Path[1:]
+		h.lock <- struct{}{}
+		buf, ok := h.disk[pdh]
+		<- h.lock
+		if !ok {
+			resp.WriteHeader(http.StatusNotFound)
+		} else {
+			resp.Write(buf)
+		}
+	default:
+		resp.WriteHeader(http.StatusMethodNotAllowed)
+	}
+}
+
+type rdrTest struct {
+	mt   string      // manifest text
+	f    string      // filename
+	want interface{} // error or string to expect
+}
+
+func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.IsNil)
+	arv.ApiToken = arvadostest.ActiveToken
+
+	kc, err := MakeKeepClient(&arv)
+	c.Assert(err, check.IsNil)
+
+	{
+		localRoots := make(map[string]string)
+		h := SuccessHandler{disk: make(map[string][]byte), lock: make(chan struct{}, 1)}
+		for i, k := range RunSomeFakeKeepServers(h, 4) {
+			localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+		}
+		kc.SetServiceRoots(localRoots, localRoots, nil)
+		kc.PutB([]byte("foo"))
+		kc.PutB([]byte("bar"))
+		kc.PutB([]byte("Hello world\n"))
+		kc.PutB([]byte(""))
+	}
+
+	mt := arvadostest.PathologicalManifest
+
+	for _, testCase := range []rdrTest{
+		{mt: mt, f: "zzzz", want: os.ErrNotExist},
+		{mt: mt, f: "frob", want: os.ErrNotExist},
+		{mt: mt, f: "/segmented/frob", want: os.ErrNotExist},
+		{mt: mt, f: "./segmented/frob", want: os.ErrNotExist},
+		{mt: mt, f: "/f", want: os.ErrNotExist},
+		{mt: mt, f: "./f", want: os.ErrNotExist},
+		{mt: mt, f: "foo bar//baz", want: os.ErrNotExist},
+		{mt: mt, f: "foo/zero", want: ""},
+		{mt: mt, f: "zero at 0", want: ""},
+		{mt: mt, f: "zero at 1", want: ""},
+		{mt: mt, f: "zero at 4", want: ""},
+		{mt: mt, f: "zero at 9", want: ""},
+		{mt: mt, f: "f", want: "f"},
+		{mt: mt, f: "ooba", want: "ooba"},
+		{mt: mt, f: "overlapReverse/o", want: "o"},
+		{mt: mt, f: "overlapReverse/oo", want: "oo"},
+		{mt: mt, f: "overlapReverse/ofoo", want: "ofoo"},
+		{mt: mt, f: "foo bar/baz", want: "foo"},
+		{mt: mt, f: "segmented/frob", want: "frob"},
+		{mt: mt, f: "segmented/oof", want: "oof"},
+	} {
+		rdr, err := kc.CollectionFileReader(map[string]interface{}{"manifest_text": testCase.mt}, testCase.f)
+		switch want := testCase.want.(type) {
+		case error:
+			c.Check(rdr, check.IsNil)
+			c.Check(err, check.Equals, want)
+		case string:
+			buf := make([]byte, len(want))
+			n, err := io.ReadFull(rdr, buf)
+			c.Check(err, check.IsNil)
+			for i := 0; i < 4; i++ {
+				c.Check(string(buf), check.Equals, want)
+				n, err = rdr.Read(buf)
+				c.Check(n, check.Equals, 0)
+				c.Check(err, check.Equals, io.EOF)
+			}
+			c.Check(rdr.Close(), check.Equals, nil)
+		}
+	}
+}
diff --git a/sdk/go/manifest/manifest.go b/sdk/go/manifest/manifest.go
index 4e816cd..f104d9a 100644
--- a/sdk/go/manifest/manifest.go
+++ b/sdk/go/manifest/manifest.go
@@ -5,25 +5,185 @@
 package manifest
 
 import (
+	"errors"
+	"fmt"
 	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
 	"log"
+	"regexp"
+	"strconv"
 	"strings"
 )
 
+var ErrInvalidToken = errors.New("Invalid token")
+
+var LocatorPattern = regexp.MustCompile(
+	"^[0-9a-fA-F]{32}\\+[0-9]+(\\+[A-Z][A-Za-z0-9 at _-]+)*$")
+
 type Manifest struct {
 	Text string
 }
 
+type BlockLocator struct {
+	Digest blockdigest.BlockDigest
+	Size   int
+	Hints  []string
+}
+
+type DataSegment struct {
+	BlockLocator
+	Locator      string
+	StreamOffset uint64
+}
+
+// FileSegment is a portion of a file that is contained within a
+// single block.
+type FileSegment struct {
+	Locator string
+	// Offset (within this block) of this data segment
+	Offset int
+	Len    int
+}
+
 // Represents a single line from a manifest.
 type ManifestStream struct {
 	StreamName string
 	Blocks     []string
-	Files      []string
+	FileTokens []string
+}
+
+var escapeSeq = regexp.MustCompile(`\\([0-9]{3}|\\)`)
+
+func unescapeSeq(seq string) string {
+	if seq == `\\` {
+		return `\`
+	}
+	i, err := strconv.ParseUint(seq[1:], 8, 8)
+	if err != nil {
+		// Invalid escape sequence: can't unescape.
+		return seq
+	}
+	return string([]byte{byte(i)})
+}
+
+func UnescapeName(s string) string {
+	return escapeSeq.ReplaceAllStringFunc(s, unescapeSeq)
+}
+
+func ParseBlockLocator(s string) (b BlockLocator, err error) {
+	if !LocatorPattern.MatchString(s) {
+		err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern "+
+			"\"%s\".",
+			s,
+			LocatorPattern.String())
+	} else {
+		tokens := strings.Split(s, "+")
+		var blockSize int64
+		var blockDigest blockdigest.BlockDigest
+		// We expect both of the following to succeed since LocatorPattern
+		// restricts the strings appropriately.
+		blockDigest, err = blockdigest.FromString(tokens[0])
+		if err != nil {
+			return
+		}
+		blockSize, err = strconv.ParseInt(tokens[1], 10, 0)
+		if err != nil {
+			return
+		}
+		b.Digest = blockDigest
+		b.Size = int(blockSize)
+		b.Hints = tokens[2:]
+	}
+	return
+}
+
+func parseFileToken(tok string) (segPos, segLen uint64, name string, err error) {
+	parts := strings.SplitN(tok, ":", 3)
+	if len(parts) != 3 {
+		err = ErrInvalidToken
+		return
+	}
+	segPos, err = strconv.ParseUint(parts[0], 10, 64)
+	if err != nil {
+		return
+	}
+	segLen, err = strconv.ParseUint(parts[1], 10, 64)
+	if err != nil {
+		return
+	}
+	name = UnescapeName(parts[2])
+	return
+}
+
+func (s *ManifestStream) FileSegmentIterByName(filepath string) <-chan *FileSegment {
+	ch := make(chan *FileSegment)
+	go func() {
+		s.sendFileSegmentIterByName(filepath, ch)
+		close(ch)
+	}()
+	return ch
+}
+
+func (s *ManifestStream) sendFileSegmentIterByName(filepath string, ch chan<- *FileSegment) {
+	blockLens := make([]int, 0, len(s.Blocks))
+	// This is what streamName+"/"+fileName will look like:
+	target := "./" + filepath
+	for _, fTok := range s.FileTokens {
+		wantPos, wantLen, name, err := parseFileToken(fTok)
+		if err != nil {
+			// Skip (!) invalid file tokens.
+			continue
+		}
+		if s.StreamName+"/"+name != target {
+			continue
+		}
+		if wantLen == 0 {
+			ch <- &FileSegment{Locator: "d41d8cd98f00b204e9800998ecf8427e+0", Offset: 0, Len: 0}
+			continue
+		}
+		// Linear search for blocks containing data for this
+		// file
+		var blockPos uint64 = 0 // position of block in stream
+		for i, loc := range s.Blocks {
+			if blockPos >= wantPos+wantLen {
+				break
+			}
+			if len(blockLens) <= i {
+				blockLens = blockLens[:i+1]
+				b, err := ParseBlockLocator(loc)
+				if err != nil {
+					// Unparseable locator -> unusable
+					// stream.
+					ch <- nil
+					return
+				}
+				blockLens[i] = b.Size
+			}
+			blockLen := uint64(blockLens[i])
+			if blockPos+blockLen <= wantPos {
+				blockPos += blockLen
+				continue
+			}
+			fseg := FileSegment{
+				Locator: loc,
+				Offset:  0,
+				Len:     blockLens[i],
+			}
+			if blockPos < wantPos {
+				fseg.Offset = int(wantPos - blockPos)
+				fseg.Len -= fseg.Offset
+			}
+			if blockPos+blockLen > wantPos+wantLen {
+				fseg.Len = int(wantPos+wantLen-blockPos) - fseg.Offset
+			}
+			ch <- &fseg
+			blockPos += blockLen
+		}
+	}
 }
 
 func parseManifestStream(s string) (m ManifestStream) {
 	tokens := strings.Split(s, " ")
-	m.StreamName = tokens[0]
+	m.StreamName = UnescapeName(tokens[0])
 	tokens = tokens[1:]
 	var i int
 	for i = range tokens {
@@ -32,7 +192,7 @@ func parseManifestStream(s string) (m ManifestStream) {
 		}
 	}
 	m.Blocks = tokens[:i]
-	m.Files = tokens[i:]
+	m.FileTokens = tokens[i:]
 	return
 }
 
@@ -58,6 +218,20 @@ func (m *Manifest) StreamIter() <-chan ManifestStream {
 	return ch
 }
 
+func (m *Manifest) FileSegmentIterByName(filepath string) <-chan *FileSegment {
+	ch := make(chan *FileSegment)
+	go func() {
+		for stream := range m.StreamIter() {
+			if !strings.HasPrefix("./"+filepath, stream.StreamName+"/") {
+				continue
+			}
+			stream.sendFileSegmentIterByName(filepath, ch)
+		}
+		close(ch)
+	}()
+	return ch
+}
+
 // Blocks may appear mulitple times within the same manifest if they
 // are used by multiple files. In that case this Iterator will output
 // the same block multiple times.
diff --git a/sdk/go/manifest/manifest_test.go b/sdk/go/manifest/manifest_test.go
index 8cfe3d9..364648d 100644
--- a/sdk/go/manifest/manifest_test.go
+++ b/sdk/go/manifest/manifest_test.go
@@ -1,10 +1,13 @@
 package manifest
 
 import (
-	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
 	"io/ioutil"
+	"reflect"
 	"runtime"
 	"testing"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
 )
 
 func getStackTrace() string {
@@ -60,7 +63,7 @@ func expectStringSlicesEqual(t *testing.T, actual []string, expected []string) {
 func expectManifestStream(t *testing.T, actual ManifestStream, expected ManifestStream) {
 	expectEqual(t, actual.StreamName, expected.StreamName)
 	expectStringSlicesEqual(t, actual.Blocks, expected.Blocks)
-	expectStringSlicesEqual(t, actual.Files, expected.Files)
+	expectStringSlicesEqual(t, actual.FileTokens, expected.FileTokens)
 }
 
 func expectBlockLocator(t *testing.T, actual blockdigest.BlockLocator, expected blockdigest.BlockLocator) {
@@ -72,8 +75,19 @@ func expectBlockLocator(t *testing.T, actual blockdigest.BlockLocator, expected
 func TestParseManifestStreamSimple(t *testing.T) {
 	m := parseManifestStream(". 365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf 0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt")
 	expectManifestStream(t, m, ManifestStream{StreamName: ".",
-		Blocks: []string{"365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
-		Files:  []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
+		Blocks:     []string{"365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
+		FileTokens: []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
+}
+
+func TestParseBlockLocatorSimple(t *testing.T) {
+	b, err := ParseBlockLocator("365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf")
+	if err != nil {
+		t.Fatalf("Unexpected error parsing block locator: %v", err)
+	}
+	expectBlockLocator(t, b, BlockLocator{Digest: blockdigest.AssertFromString("365f83f5f808896ec834c8b595288735"),
+		Size: 2310,
+		Hints: []string{"K at qr1hi",
+			"Af0c9a66381f3b028677411926f0be1c6282fe67c at 542b5ddf"}})
 }
 
 func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
@@ -88,8 +102,8 @@ func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
 	expectManifestStream(t,
 		firstStream,
 		ManifestStream{StreamName: ".",
-			Blocks: []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475 at 5441920c"},
-			Files:  []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
+			Blocks:     []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475 at 5441920c"},
+			FileTokens: []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
 
 	received, ok := <-streamIter
 	if ok {
@@ -126,3 +140,58 @@ func TestBlockIterLongManifest(t *testing.T) {
 			Size:  31367794,
 			Hints: []string{"E53f903684239bcc114f7bf8ff9bd6089f33058db at 5441920c"}})
 }
+
+func TestUnescape(t *testing.T) {
+	for _, testCase := range [][]string{
+		{`\040`, ` `},
+		{`\009`, `\009`},
+		{`\\\040\\`, `\ \`},
+		{`\\040\`, `\040\`},
+	} {
+		in := testCase[0]
+		expect := testCase[1]
+		got := UnescapeName(in)
+		if expect != got {
+			t.Errorf("For '%s' got '%s' instead of '%s'", in, got, expect)
+		}
+	}
+}
+
+type fsegtest struct {
+	mt   string        // manifest text
+	f    string        // filename
+	want []FileSegment // segments should be received on channel
+}
+
+func TestFileSegmentIterByName(t *testing.T) {
+	mt := arvadostest.PathologicalManifest
+	for _, testCase := range []fsegtest{
+		{mt: mt, f: "zzzz", want: nil},
+		// This case is too sensitive: it would be acceptable
+		// (even preferable) to return only one empty segment.
+		{mt: mt, f: "foo/zero", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}, {"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "zero at 0", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "zero at 1", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "zero at 4", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "zero at 9", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "f", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}}},
+		{mt: mt, f: "ooba", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}, {"37b51d194a7513e45b56f6524f2d51f2+3", 0, 2}}},
+		{mt: mt, f: "overlapReverse/o", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 2, 1}}},
+		{mt: mt, f: "overlapReverse/oo", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}}},
+		{mt: mt, f: "overlapReverse/ofoo", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 2, 1}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 3}}},
+		{mt: mt, f: "foo bar/baz", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 3}}},
+		// This case is too sensitive: it would be better to
+		// omit the empty segment.
+		{mt: mt, f: "segmented/frob", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}, {"37b51d194a7513e45b56f6524f2d51f2+3", 2, 1}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 1}, {"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}, {"37b51d194a7513e45b56f6524f2d51f2+3", 0, 1}}},
+		{mt: mt, f: "segmented/oof", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}}},
+	} {
+		m := Manifest{Text: testCase.mt}
+		var got []FileSegment
+		for fs := range m.FileSegmentIterByName(testCase.f) {
+			got = append(got, *fs)
+		}
+		if !reflect.DeepEqual(got, testCase.want) {
+			t.Errorf("For %#v:\n got  %#v\n want %#v", testCase.f, got, testCase.want)
+		}
+	}
+}
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 48e3640..04af920 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -11,6 +11,7 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
 	"git.curoverse.com/arvados.git/sdk/go/auth"
 	"git.curoverse.com/arvados.git/sdk/go/httpserver"
+	"git.curoverse.com/arvados.git/sdk/go/keepclient"
 )
 
 var clientPool = arvadosclient.MakeClientPool()
@@ -136,17 +137,20 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	}
 
 	filename := strings.Join(targetPath, "/")
-	rdr, err := arvadosclient.CollectionFileReader(collection, filename)
+	kc, err := keepclient.MakeKeepClient(arv)
+	if err != nil {
+		statusCode, statusText = http.StatusInternalServerError, err.Error()
+		return
+	}
+	rdr, err := kc.CollectionFileReader(collection, filename)
 	if os.IsNotExist(err) {
 		statusCode = http.StatusNotFound
 		return
-	} else if err == arvadosclient.ErrNotImplemented {
-		statusCode = http.StatusNotImplemented
-		return
 	} else if err != nil {
 		statusCode, statusText = http.StatusBadGateway, err.Error()
 		return
 	}
+	defer rdr.Close()
 
 	// One or both of these can be -1 if not found:
 	basenamePos := strings.LastIndex(filename, "/")
diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
index 66c6812..b4d6d17 100644
--- a/services/keep-web/server_test.go
+++ b/services/keep-web/server_test.go
@@ -15,16 +15,7 @@ import (
 
 var _ = check.Suite(&IntegrationSuite{})
 
-const (
-	spectatorToken  = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
-	activeToken     = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
-	anonymousToken  = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
-	fooCollection   = "zzzzz-4zz18-fy296fx3hot09f7"
-	bogusCollection = "zzzzz-4zz18-totallynotexist"
-	hwCollection    = "zzzzz-4zz18-4en62shvi99lxd4"
-)
-
-// IntegrationSuite tests need an API server and an arv-git-httpd server
+// IntegrationSuite tests need an API server and a keep-web server
 type IntegrationSuite struct {
 	testServer *server
 }
@@ -34,12 +25,12 @@ func (s *IntegrationSuite) TestNoToken(c *check.C) {
 		"",
 		"bogustoken",
 	} {
-		hdr, body := s.runCurl(c, token, "/collections/"+fooCollection+"/foo")
+		hdr, body := s.runCurl(c, token, "/collections/"+arvadostest.FooCollection+"/foo")
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
 		c.Check(body, check.Equals, "")
 
 		if token != "" {
-			hdr, body = s.runCurl(c, token, "/collections/download/"+fooCollection+"/"+token+"/foo")
+			hdr, body = s.runCurl(c, token, "/collections/download/"+arvadostest.FooCollection+"/"+token+"/foo")
 			c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 			c.Check(body, check.Equals, "")
 		}
@@ -62,46 +53,46 @@ func (s *IntegrationSuite) Test404(c *check.C) {
 		"/download",
 		"/collections",
 		"/collections/",
-		"/collections/" + fooCollection,
-		"/collections/" + fooCollection + "/",
+		"/collections/" + arvadostest.FooCollection,
+		"/collections/" + arvadostest.FooCollection + "/",
 		// Non-existent file in collection
-		"/collections/" + fooCollection + "/theperthcountyconspiracy",
-		"/collections/download/" + fooCollection + "/" + activeToken + "/theperthcountyconspiracy",
+		"/collections/" + arvadostest.FooCollection + "/theperthcountyconspiracy",
+		"/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
 		// Non-existent collection
-		"/collections/" + bogusCollection,
-		"/collections/" + bogusCollection + "/",
-		"/collections/" + bogusCollection + "/theperthcountyconspiracy",
-		"/collections/download/" + bogusCollection + "/" + activeToken + "/theperthcountyconspiracy",
+		"/collections/" + arvadostest.NonexistentCollection,
+		"/collections/" + arvadostest.NonexistentCollection + "/",
+		"/collections/" + arvadostest.NonexistentCollection + "/theperthcountyconspiracy",
+		"/collections/download/" + arvadostest.NonexistentCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
 	} {
-		hdr, body := s.runCurl(c, activeToken, uri)
+		hdr, body := s.runCurl(c, arvadostest.ActiveToken, uri)
 		c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
 		c.Check(body, check.Equals, "")
 	}
 }
 
 func (s *IntegrationSuite) Test200(c *check.C) {
-	anonymousTokens = []string{anonymousToken}
+	anonymousTokens = []string{arvadostest.AnonymousToken}
 	arv, err := arvadosclient.MakeArvadosClient()
 	c.Assert(err, check.Equals, nil)
-	arv.ApiToken = activeToken
+	arv.ApiToken = arvadostest.ActiveToken
 	kc, err := keepclient.MakeKeepClient(&arv)
 	c.Assert(err, check.Equals, nil)
 	kc.PutB([]byte("Hello world\n"))
 	kc.PutB([]byte("foo"))
 	for _, spec := range [][]string{
 		// My collection
-		{activeToken, "/collections/" + fooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{"", "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{"tokensobogus", "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{activeToken, "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{anonymousToken, "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{arvadostest.ActiveToken, "/collections/" + arvadostest.FooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{"", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{"tokensobogus", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{arvadostest.ActiveToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{arvadostest.AnonymousToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
 		// Anonymously accessible user agreement. These should
 		// start working when CollectionFileReader provides
 		// real data instead of fake/stub data.
-		{"", "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{activeToken, "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{spectatorToken, "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{spectatorToken, "/collections/download/"+hwCollection+"/"+spectatorToken+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{"", "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{arvadostest.ActiveToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{arvadostest.SpectatorToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{arvadostest.SpectatorToken, "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 	} {
 		hdr, body := s.runCurl(c, spec[0], spec[1])
 		if strings.HasPrefix(hdr, "HTTP/1.1 501 Not Implemented\r\n") && body == "" {

commit df9cb8ad02aaee8045cb31e207fd9c6a13c01684
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Jul 23 01:09:02 2015 -0400

    5824: Add doc.go

diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
new file mode 100644
index 0000000..dbf4f5b
--- /dev/null
+++ b/services/keep-web/doc.go
@@ -0,0 +1,57 @@
+// Keep-web provides read-only HTTP access to files stored in Keep. It
+// serves public data to anonymous and unauthenticated clients, and
+// accepts authentication via Arvados tokens. It can be installed
+// anywhere with access to Keep services, typically behind a web proxy
+// that provides SSL support.
+//
+// Given that this amounts to a web hosting service for arbitrary
+// content, it is vital to ensure that at least one of the following is
+// true:
+//
+// Usage
+//
+// Listening:
+//
+//   keep-web -address=:1234
+//
+// Start an HTTP server on port 1234.
+//
+//   keep-web -address=1.2.3.4:1234
+//
+// Start an HTTP server on port 1234, on the interface with IP address 1.2.3.4.
+//
+// Keep-web does not support SSL natively. Typically, it is installed
+// behind a proxy like nginx.
+//
+package main
+
+// TODO(TC): Implement
+//
+// Trusted content
+//
+// Normally, Keep-web is installed using a wildcard DNS entry and a
+// wildcard HTTPS certificate, serving data from collection X at
+// ``https://X.dl.example.com/path/file.ext''.
+//
+// It will also serve publicly accessible data at
+// ``https://dl.example.com/collections/X/path/file.txt'', but it does not
+// accept any kind of credentials at paths like these.
+//
+// In "trust all content" mode, Keep-web will accept credentials (API
+// tokens) and serve any collection X at
+// "https://dl.example.com/collections/X/path/file.ext".  This is
+// UNSAFE except in the special case where everyone who is able write
+// ANY data to Keep, and every JavaScript and HTML file written to
+// Keep, is also trusted to read ALL of the data in Keep.
+//
+// In such cases you can enable trust-all-content mode.
+//
+//   keep-web -trust-all-content [...]
+//
+// In the general case, this should not be enabled: A web page stored
+// in collection X can execute JavaScript code that uses the current
+// viewer's credentials to download additional data -- data which is
+// accessible to the current viewer, but not to the author of
+// collection X -- from the same origin (``https://dl.example.com/'')
+// and upload it to some other site chosen by the author of collection
+// X.

commit 96ce48a816ce1857e1ca3d035b3ab9002b7bc4c4
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Jun 23 19:12:58 2015 -0400

    5824: Add install doc

diff --git a/doc/install/install-keep-web.html.textile.liquid b/doc/install/install-keep-web.html.textile.liquid
new file mode 100644
index 0000000..4777668
--- /dev/null
+++ b/doc/install/install-keep-web.html.textile.liquid
@@ -0,0 +1,64 @@
+---
+layout: default
+navsection: installguide
+title: Install download server
+...
+
+This installation guide assumes you are on a 64 bit Debian or Ubuntu system.
+
+The keep-web server provides read-only HTTP access to files stored in Keep. It serves public data to anonymous and unauthenticated clients, and accepts authentication via Arvados tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides SSL support.
+
+By convention, we use the following hostname for the download service:
+
+<div class="offset1">
+table(table table-bordered table-condensed).
+|dl. at uuid_prefix@.your.domain|
+</div>
+
+This hostname should resolve from anywhere on the internet.
+
+h2. Install keep-web
+
+First add the Arvados apt repository, and then install the keep-web package.
+
+<notextile>
+<pre><code>~$ <span class="userinput">echo "deb http://apt.arvados.org/ wheezy main" | sudo tee /etc/apt/sources.list.d/apt.arvados.org.list</span>
+~$ <span class="userinput">sudo /usr/bin/apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7</span>
+~$ <span class="userinput">sudo /usr/bin/apt-get update</span>
+~$ <span class="userinput">sudo /usr/bin/apt-get install keep-web</span>
+</code></pre>
+</notextile>
+
+Verify that @keep-web@ is functional:
+
+<notextile>
+<pre><code>~$ <span class="userinput">keep-web -h</span>
+Usage of keep-web:
+  -address="0.0.0.0:80": Address to listen on, "host:port".
+</code></pre>
+</notextile>
+
+We recommend running @arv-git-httpd@ under "runit":https://packages.debian.org/search?keywords=runit or something similar.
+
+Your @run@ script should look something like this:
+
+<notextile>
+<pre><code>export ARVADOS_API_HOST=<span class="userinput">uuid_prefix</span>.your.domain
+exec sudo -u nobody keep-web -address=:9002 2>&1
+</code></pre>
+</notextile>
+
+h3. Set up a reverse proxy with SSL support
+
+The keep-web service will be accessible from anywhere on the internet, so we recommend using SSL for transport encryption.
+
+This is best achieved by putting a reverse proxy with SSL support in front of keep-web, running on port 443 and passing requests to keep-web on port 9002 (or whatever port you chose in your run script).
+
+h3. Tell the API server about the keep-web service
+
+In your API server's config/application.yml file, add the following entry:
+
+<notextile>
+<pre><code>keep-web: dl.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>

commit 8841e6b3fb247d3b3342379c86c0231d246731ff
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jun 17 02:47:49 2015 -0400

    5824: Assign MIME type by file extension. closes #6327

diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index bbcd53c..48e3640 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -3,6 +3,7 @@ package main
 import (
 	"fmt"
 	"io"
+	"mime"
 	"net/http"
 	"os"
 	"strings"
@@ -146,6 +147,17 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		statusCode, statusText = http.StatusBadGateway, err.Error()
 		return
 	}
+
+	// One or both of these can be -1 if not found:
+	basenamePos := strings.LastIndex(filename, "/")
+	extPos := strings.LastIndex(filename, ".")
+	if extPos > basenamePos {
+		// Now extPos is safely >= 0.
+		if t := mime.TypeByExtension(filename[extPos:]); t != "" {
+			w.Header().Set("Content-Type", t)
+		}
+	}
+
 	_, err = io.Copy(w, rdr)
 	if err != nil {
 		statusCode, statusText = http.StatusBadGateway, err.Error()
diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
index 1c36f98..66c6812 100644
--- a/services/keep-web/server_test.go
+++ b/services/keep-web/server_test.go
@@ -109,6 +109,12 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 			continue
 		}
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+		if strings.HasSuffix(spec[1], ".txt") {
+			c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
+			// TODO: Check some types that aren't
+			// automatically detected by Go's http server
+			// by sniffing the content.
+		}
 		c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec[2])
 	}
 }

commit 0e7e5ba38ac7e86e0e398df063f612d84927692a
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Jul 23 00:02:11 2015 -0400

    5824: Add keepdl.

diff --git a/services/keep-web/.gitignore b/services/keep-web/.gitignore
new file mode 100644
index 0000000..173e306
--- /dev/null
+++ b/services/keep-web/.gitignore
@@ -0,0 +1 @@
+keepdl
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
new file mode 100644
index 0000000..bbcd53c
--- /dev/null
+++ b/services/keep-web/handler.go
@@ -0,0 +1,153 @@
+package main
+
+import (
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"strings"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+	"git.curoverse.com/arvados.git/sdk/go/auth"
+	"git.curoverse.com/arvados.git/sdk/go/httpserver"
+)
+
+var clientPool = arvadosclient.MakeClientPool()
+
+var anonymousTokens []string
+
+type handler struct{}
+
+func init() {
+	// TODO(TC): Get anonymousTokens from flags
+	anonymousTokens = []string{}
+}
+
+func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
+	var statusCode int
+	var statusText string
+
+	w := httpserver.WrapResponseWriter(wOrig)
+	defer func() {
+		if statusCode > 0 {
+			if w.WroteStatus() == 0 {
+				w.WriteHeader(statusCode)
+			} else {
+				httpserver.Log(r.RemoteAddr, "WARNING",
+					fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
+			}
+		}
+		if statusText == "" {
+			statusText = http.StatusText(statusCode)
+		}
+		httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.URL.Path)
+	}()
+
+	arv := clientPool.Get()
+	if arv == nil {
+		statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
+		return
+	}
+	defer clientPool.Put(arv)
+
+	pathParts := strings.Split(r.URL.Path[1:], "/")
+
+	if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
+		statusCode = http.StatusNotFound
+		return
+	}
+
+	var targetId string
+	var targetPath []string
+	var tokens []string
+	var reqTokens []string
+	var pathToken bool
+	if len(pathParts) >= 5 && pathParts[1] == "download" {
+		// "/collections/download/{id}/{token}/path..." form:
+		// Don't use our configured anonymous tokens,
+		// Authorization headers, etc.  Just use the token in
+		// the path.
+		targetId = pathParts[2]
+		tokens = []string{pathParts[3]}
+		targetPath = pathParts[4:]
+		pathToken = true
+	} else {
+		// "/collections/{id}/path..." form
+		targetId = pathParts[1]
+		reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+		tokens = append(reqTokens, anonymousTokens...)
+		targetPath = pathParts[2:]
+	}
+
+	tokenResult := make(map[string]int)
+	collection := make(map[string]interface{})
+	found := false
+	for _, arv.ApiToken = range tokens {
+		err := arv.Get("collections", targetId, nil, &collection)
+		httpserver.Log(err)
+		if err == nil {
+			// Success
+			found = true
+			break
+		}
+		if srvErr, ok := err.(arvadosclient.APIServerError); ok {
+			switch srvErr.HttpStatusCode {
+			case 404, 401:
+				// Token broken or insufficient to
+				// retrieve collection
+				tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
+				continue
+			}
+		}
+		// Something more serious is wrong
+		statusCode, statusText = http.StatusInternalServerError, err.Error()
+		return
+	}
+	if !found {
+		if pathToken {
+			// The URL is a "secret sharing link", but it
+			// didn't work out. Asking the client for
+			// additional credentials would just be
+			// confusing.
+			statusCode = http.StatusNotFound
+			return
+		}
+		for _, t := range reqTokens {
+			if tokenResult[t] == 404 {
+				// The client provided valid token(s), but the
+				// collection was not found.
+				statusCode = http.StatusNotFound
+				return
+			}
+		}
+		// The client's token was invalid (e.g., expired), or
+		// the client didn't even provide one.  Propagate the
+		// 401 to encourage the client to use a [different]
+		// token.
+		//
+		// TODO(TC): This response would be confusing to
+		// someone trying (anonymously) to download public
+		// data that has been deleted.  Allow a referrer to
+		// provide this context somehow?
+		statusCode = http.StatusUnauthorized
+		w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"")
+		return
+	}
+
+	filename := strings.Join(targetPath, "/")
+	rdr, err := arvadosclient.CollectionFileReader(collection, filename)
+	if os.IsNotExist(err) {
+		statusCode = http.StatusNotFound
+		return
+	} else if err == arvadosclient.ErrNotImplemented {
+		statusCode = http.StatusNotImplemented
+		return
+	} else if err != nil {
+		statusCode, statusText = http.StatusBadGateway, err.Error()
+		return
+	}
+	_, err = io.Copy(w, rdr)
+	if err != nil {
+		statusCode, statusText = http.StatusBadGateway, err.Error()
+	}
+}
diff --git a/services/keep-web/main.go b/services/keep-web/main.go
new file mode 100644
index 0000000..d780cc3
--- /dev/null
+++ b/services/keep-web/main.go
@@ -0,0 +1,28 @@
+package main
+
+import (
+	"flag"
+	"log"
+	"os"
+)
+
+func init() {
+	// MakeArvadosClient returns an error if this env var isn't
+	// available as a default token (even if we explicitly set a
+	// different token before doing anything with the client). We
+	// set this dummy value during init so it doesn't clobber the
+	// one used by "run test servers".
+	os.Setenv("ARVADOS_API_TOKEN", "xxx")
+}
+
+func main() {
+	flag.Parse()
+	srv := &server{}
+	if err := srv.Start(); err != nil {
+		log.Fatal(err)
+	}
+	log.Println("Listening at", srv.Addr)
+	if err := srv.Wait(); err != nil {
+		log.Fatal(err)
+	}
+}
diff --git a/services/keep-web/server.go b/services/keep-web/server.go
new file mode 100644
index 0000000..44da00f
--- /dev/null
+++ b/services/keep-web/server.go
@@ -0,0 +1,27 @@
+package main
+
+import (
+	"flag"
+	"net/http"
+
+	"git.curoverse.com/arvados.git/sdk/go/httpserver"
+)
+
+var address string
+
+func init() {
+	flag.StringVar(&address, "address", "0.0.0.0:80",
+		"Address to listen on, \"host:port\".")
+}
+
+type server struct {
+	httpserver.Server
+}
+
+func (srv *server) Start() error {
+	mux := http.NewServeMux()
+	mux.Handle("/", &handler{})
+	srv.Handler = mux
+	srv.Addr = address
+	return srv.Server.Start()
+}
diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
new file mode 100644
index 0000000..1c36f98
--- /dev/null
+++ b/services/keep-web/server_test.go
@@ -0,0 +1,170 @@
+package main
+
+import (
+	"crypto/md5"
+	"fmt"
+	"os/exec"
+	"strings"
+	"testing"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	"git.curoverse.com/arvados.git/sdk/go/keepclient"
+	check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&IntegrationSuite{})
+
+const (
+	spectatorToken  = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
+	activeToken     = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
+	anonymousToken  = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
+	fooCollection   = "zzzzz-4zz18-fy296fx3hot09f7"
+	bogusCollection = "zzzzz-4zz18-totallynotexist"
+	hwCollection    = "zzzzz-4zz18-4en62shvi99lxd4"
+)
+
+// IntegrationSuite tests need an API server and an arv-git-httpd server
+type IntegrationSuite struct {
+	testServer *server
+}
+
+func (s *IntegrationSuite) TestNoToken(c *check.C) {
+	for _, token := range []string{
+		"",
+		"bogustoken",
+	} {
+		hdr, body := s.runCurl(c, token, "/collections/"+fooCollection+"/foo")
+		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
+		c.Check(body, check.Equals, "")
+
+		if token != "" {
+			hdr, body = s.runCurl(c, token, "/collections/download/"+fooCollection+"/"+token+"/foo")
+			c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+			c.Check(body, check.Equals, "")
+		}
+
+		hdr, body = s.runCurl(c, token, "/bad-route")
+		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+		c.Check(body, check.Equals, "")
+	}
+}
+
+// TODO: Move most cases to functional tests -- at least use Go's own
+// http client instead of forking curl. Just leave enough of an
+// integration test to assure that the documented way of invoking curl
+// really works against the server.
+func (s *IntegrationSuite) Test404(c *check.C) {
+	for _, uri := range []string{
+		// Routing errors
+		"/",
+		"/foo",
+		"/download",
+		"/collections",
+		"/collections/",
+		"/collections/" + fooCollection,
+		"/collections/" + fooCollection + "/",
+		// Non-existent file in collection
+		"/collections/" + fooCollection + "/theperthcountyconspiracy",
+		"/collections/download/" + fooCollection + "/" + activeToken + "/theperthcountyconspiracy",
+		// Non-existent collection
+		"/collections/" + bogusCollection,
+		"/collections/" + bogusCollection + "/",
+		"/collections/" + bogusCollection + "/theperthcountyconspiracy",
+		"/collections/download/" + bogusCollection + "/" + activeToken + "/theperthcountyconspiracy",
+	} {
+		hdr, body := s.runCurl(c, activeToken, uri)
+		c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
+		c.Check(body, check.Equals, "")
+	}
+}
+
+func (s *IntegrationSuite) Test200(c *check.C) {
+	anonymousTokens = []string{anonymousToken}
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.Equals, nil)
+	arv.ApiToken = activeToken
+	kc, err := keepclient.MakeKeepClient(&arv)
+	c.Assert(err, check.Equals, nil)
+	kc.PutB([]byte("Hello world\n"))
+	kc.PutB([]byte("foo"))
+	for _, spec := range [][]string{
+		// My collection
+		{activeToken, "/collections/" + fooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{"", "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{"tokensobogus", "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{activeToken, "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{anonymousToken, "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		// Anonymously accessible user agreement. These should
+		// start working when CollectionFileReader provides
+		// real data instead of fake/stub data.
+		{"", "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{activeToken, "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{spectatorToken, "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{spectatorToken, "/collections/download/"+hwCollection+"/"+spectatorToken+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+	} {
+		hdr, body := s.runCurl(c, spec[0], spec[1])
+		if strings.HasPrefix(hdr, "HTTP/1.1 501 Not Implemented\r\n") && body == "" {
+			c.Log("Not implemented!")
+			continue
+		}
+		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+		c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec[2])
+	}
+}
+
+// Return header block and body.
+func (s *IntegrationSuite) runCurl(c *check.C, token, uri string, args ...string) (hdr, body string) {
+	curlArgs := []string{"--silent", "--show-error", "--include"}
+	if token != "" {
+		curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
+	}
+	curlArgs = append(curlArgs, args...)
+	curlArgs = append(curlArgs, "http://"+s.testServer.Addr+uri)
+	c.Log(fmt.Sprintf("curlArgs == %#v", curlArgs))
+	output, err := exec.Command("curl", curlArgs...).CombinedOutput()
+	// Without "-f", curl exits 0 as long as it gets a valid HTTP
+	// response from the server, even if the response status
+	// indicates that the request failed. In our test suite, we
+	// always expect a valid HTTP response, and we parse the
+	// headers ourselves. If curl exits non-zero, our testing
+	// environment is broken.
+	c.Assert(err, check.Equals, nil)
+	hdrsAndBody := strings.SplitN(string(output), "\r\n\r\n", 2)
+	c.Assert(len(hdrsAndBody), check.Equals, 2)
+	hdr = hdrsAndBody[0]
+	body = hdrsAndBody[1]
+	return
+}
+
+func (s *IntegrationSuite) SetUpSuite(c *check.C) {
+	arvadostest.StartAPI()
+	arvadostest.StartKeep()
+}
+
+func (s *IntegrationSuite) TearDownSuite(c *check.C) {
+	arvadostest.StopKeep()
+	arvadostest.StopAPI()
+}
+
+func (s *IntegrationSuite) SetUpTest(c *check.C) {
+	arvadostest.ResetEnv()
+	s.testServer = &server{}
+	var err error
+	address = "127.0.0.1:0"
+	err = s.testServer.Start()
+	c.Assert(err, check.Equals, nil)
+}
+
+func (s *IntegrationSuite) TearDownTest(c *check.C) {
+	var err error
+	if s.testServer != nil {
+		err = s.testServer.Close()
+	}
+	c.Check(err, check.Equals, nil)
+}
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+	check.TestingT(t)
+}

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list