[ARVADOS] updated: d96f0405962b9ea907e700b662b4aee444455c1b

git at public.curoverse.com git at public.curoverse.com
Sun Aug 30 02:48:44 EDT 2015


Summary of changes:
 .gitignore                                         |   3 +
 .../app/assets/stylesheets/application.css.scss    |   4 +
 .../app/controllers/projects_controller.rb         |   6 +-
 apps/workbench/app/models/authorized_key.rb        |   2 +-
 apps/workbench/app/models/repository.rb            |   2 +-
 .../views/application/_projects_tree_menu.html.erb |  15 +-
 apps/workbench/app/views/layouts/body.html.erb     |  27 +++-
 .../app/views/projects/_show_dashboard.html.erb    |  17 +--
 .../test/controllers/projects_controller_test.rb   |  11 ++
 .../test/integration/anonymous_access_test.rb      |   9 +-
 .../test/integration/application_layout_test.rb    |  25 ++--
 apps/workbench/test/integration/errors_test.rb     |   6 +-
 apps/workbench/test/integration/projects_test.rb   |  26 +++-
 doc/_config.yml                                    |   5 +-
 doc/_includes/_install_git.liquid                  |   3 +
 .../_tutorial_git_repo_expectations.liquid         |   3 +
 doc/install/index.html.textile.liquid              |  11 +-
 doc/install/install-api-server.html.textile.liquid |  10 +-
 .../install-arv-git-httpd.html.textile.liquid      |  18 ++-
 .../install-compute-node.html.textile.liquid       |   9 +-
 doc/install/install-keepdl.html.textile.liquid     |  48 ++++--
 doc/install/install-keepproxy.html.textile.liquid  |  38 +++--
 .../install-shell-server.html.textile.liquid       |  95 ++++++++++++
 .../install-workbench-app.html.textile.liquid      |  46 +++---
 doc/user/topics/arv-docker.html.textile.liquid     |   2 +-
 .../add-new-repository.html.textile.liquid         |   2 +-
 .../git-arvados-guide.html.textile.liquid          |  89 +++++++++++
 .../tutorial-submit-job.html.textile.liquid        |  49 +-----
 docker/workbench/application.yml.in                |   3 +-
 sdk/cli/bin/arv-tag                                |   9 +-
 sdk/go/keepclient/collectionreader_test.go         | 144 +++++++++++-------
 sdk/go/keepclient/keepclient_test.go               |  17 +--
 sdk/go/keepclient/support.go                       |  24 +--
 sdk/python/arvados/commands/arv_copy.py            |   5 +-
 .../arvados/v1/virtual_machines_controller.rb      |  18 ++-
 services/api/lib/salvage_collection.rb             |  94 ++++++++++++
 services/api/script/salvage_collection.rb          |  26 ++++
 .../arvados/v1/virtual_machines_controller_test.rb |   7 +
 services/api/test/unit/salvage_collection_test.rb  | 165 +++++++++++++++++++++
 services/arv-git-httpd/auth_handler.go             |   7 +-
 services/arv-git-httpd/doc.go                      |  26 ++--
 services/arv-git-httpd/git_handler.go              |  59 ++++++++
 services/arv-git-httpd/git_handler_test.go         |  56 +++++++
 services/arv-git-httpd/main.go                     |   2 +-
 services/arv-git-httpd/server.go                   |  13 +-
 services/arv-git-httpd/server_test.go              |   2 +-
 services/keepdl/doc.go                             |   2 +
 services/keepdl/handler.go                         |   7 +-
 services/keepstore/volume_unix.go                  |  72 ++++++---
 services/keepstore/volume_unix_test.go             |  41 ++++-
 {sdk/ruby => services/login-sync}/.gitignore       |   2 +-
 services/login-sync/Gemfile                        |   7 +
 {sdk/ruby => services/login-sync}/Rakefile         |   0
 .../login-sync/arvados-login-sync.gemspec          |  18 +--
 services/login-sync/bin/arvados-login-sync         | 111 ++++++++++++++
 services/login-sync/test/binstub_new_user/useradd  |   9 ++
 services/login-sync/test/stubs.rb                  |  52 +++++++
 services/login-sync/test/test_add_user.rb          |  37 +++++
 58 files changed, 1299 insertions(+), 317 deletions(-)
 create mode 100644 doc/_includes/_install_git.liquid
 create mode 100644 doc/_includes/_tutorial_git_repo_expectations.liquid
 create mode 100644 doc/user/tutorials/git-arvados-guide.html.textile.liquid
 create mode 100755 services/api/lib/salvage_collection.rb
 create mode 100755 services/api/script/salvage_collection.rb
 create mode 100644 services/api/test/unit/salvage_collection_test.rb
 create mode 100644 services/arv-git-httpd/git_handler.go
 create mode 100644 services/arv-git-httpd/git_handler_test.go
 copy {sdk/ruby => services/login-sync}/.gitignore (52%)
 create mode 100644 services/login-sync/Gemfile
 copy {sdk/ruby => services/login-sync}/Rakefile (100%)
 copy sdk/ruby/arvados.gemspec => services/login-sync/arvados-login-sync.gemspec (51%)
 create mode 100755 services/login-sync/bin/arvados-login-sync
 create mode 100755 services/login-sync/test/binstub_new_user/useradd
 create mode 100644 services/login-sync/test/stubs.rb
 create mode 100644 services/login-sync/test/test_add_user.rb

  discards  936b0f1a267b4d274062a3cf8bef4ae8454a5e8f (commit)
  discards  c792414b525a64a3407c876f6b8e69570adcb456 (commit)
  discards  f8381e235f2d63714f3dcc521298a2f5c3b5c439 (commit)
  discards  3cbaa1bece831d2c2f4204cedeaf1751b47a597a (commit)
  discards  15cfa6bc3793a54d07fab0f3147cababec4dc5f6 (commit)
  discards  99bc88b31986033fbc497c194907a12a97fdce61 (commit)
  discards  fa9174d4f575883826465cd29cc2f374d0ed6ed3 (commit)
  discards  777fcbd316c112be6e0f4265ca65a9a9f226f27d (commit)
  discards  ba094b970272ece90f9ff368e18549660faa05de (commit)
  discards  1ee305e5d4b80e478eee71b4b11e8221456eff15 (commit)
  discards  cdecbc7f736831dfd6017d975753bec66433e5bf (commit)
  discards  dbebb56bc61fb52928edfcfca64cfec9a697362c (commit)
  discards  310ccee9f772373256fd836e421f918349d6416f (commit)
  discards  5be8366eb525129c9b6d2b710365cf4dbc9392c0 (commit)
  discards  e7f7945dfc2d3ec30e0cef4f781cd47c31287efa (commit)
  discards  26ffcd38fd6b861a9005959ac016e826d30b8904 (commit)
  discards  374ce34bc1f1e8bca18c12f70d7de8dc8c5b60b6 (commit)
  discards  59976e5583eda9ff1bf35cb299165829829c5b5d (commit)
       via  d96f0405962b9ea907e700b662b4aee444455c1b (commit)
       via  ed6af5a868544100e2dffe8f6ccc8148f28edb93 (commit)
       via  2a6f138489902caba2b5c6ecc392dd70f85ba6d3 (commit)
       via  b420fea834591130dea8b5fd3a6165f514d0610c (commit)
       via  de603aa2e7ec727677772c52808c1821b79dd1fc (commit)
       via  67025bef2c15a40c2c7585633066c2a924d03ae7 (commit)
       via  db06fc8096c5a40c4c7046e5d13d8eef7c941f7e (commit)
       via  9038caea5d623ecab36d28b7a2d6a6f73b21e549 (commit)
       via  69e4f576d63166ca65061e7f304c28f9da752d1c (commit)
       via  30d316a3a597cf03d3f2540f74cc4c0faa8a7231 (commit)
       via  c04029c2147c845d8a0f349026dba062c6261d6f (commit)
       via  a14a2266b6b59e996685cfd694e86e95b9631de3 (commit)
       via  688a00fbf445629eb845ee88a87338c14424afac (commit)
       via  7b78afd6c670f1159ddff1f4e2ddfa6e7fa5b23c (commit)
       via  345a6ea565c56d488d5c471931fe2e452a5901c9 (commit)
       via  908432379061b7b4c1abef255c11bcf7c2f68538 (commit)
       via  73d1f74fcdf96a1ac298c4f7b8181e702c797ca5 (commit)
       via  1d59d3aa015ca5263e1ecaaad437f18db654beb4 (commit)
       via  3345d7c809056cb5e2352b404080355dfc0cc474 (commit)
       via  e78b4337cc4da187fac20a556b46e5ef31b79fd8 (commit)
       via  1dd92db0cd22631e965d777d2e51f6ded4765ca8 (commit)
       via  caddb03ec1d6e9d5d39f2ade9ddc70a4b7dddabb (commit)
       via  59b414ff7a625ad3d6e92659b20bdabc6d89e7d4 (commit)
       via  7bedb6e68dc97a4e081217c9a8901c31b7f9e13f (commit)
       via  5912867d965664504148f12cff801836a56f0162 (commit)
       via  9c5d1369b762e55838cc5aa24cdd61ebc079772d (commit)
       via  30012ed996dd5336cbfa7394234d1cbbf08a2b78 (commit)
       via  7d972883c71002a4ef11a7f85b47311b48847fa3 (commit)
       via  42c4ef158ac51f01f071a285e4a131acbf753ab9 (commit)
       via  deaf93e06ddfb5346f0492f99f441aa4734e6bf5 (commit)
       via  9259c169b6254ea581fcdcb18e1cdbe9b9fbea1e (commit)
       via  8e17ce539b4a90711f4a8059a0eff11b9c64207b (commit)
       via  4b8da3c9e3a48820ae3b1538103eda0282118392 (commit)
       via  bb7be2d3e49876a74fddb76260f3eac07bf0a431 (commit)
       via  7fc67e3f9bfd12058e6f3d86d995704ed8962a8b (commit)
       via  a1379fdd1f825c22a3ada0a2c085a1b0121ce89a (commit)
       via  f2afe64c25d1b3e79b38972e69104431a2587935 (commit)
       via  cf2d5c1b966cae7d5023ff482891c4ea7e767de4 (commit)
       via  a9359cd6ff10866acc943889ee77ab3f971dfa4e (commit)
       via  51a185e0f13ec3b2c376d3229c56d012dc91a04a (commit)
       via  51e6fbab8d08a5a92fc08c552f841a84bccbe0d6 (commit)
       via  8c1005ef92cffff5a4a9321f3b62da715b141c3d (commit)
       via  f71873ca6ec9969a90242a1c4d391770cabce1c2 (commit)
       via  ef8f43b830273dd54ae75f6a3823a946a455c2b3 (commit)
       via  1ca973446ddf0094cd563a7af63f4e3078f5b772 (commit)
       via  a8b431b5cfccd36995514560f965b4943ac93c6b (commit)
       via  36b1667a9b2ab888a80e84b6ecd75403c8f6782c (commit)
       via  2a6a3720271a12bc8a47928ade5b2c641514ca44 (commit)
       via  d78a90009ca25da4f8cfa75672df0b5bd77cabfe (commit)
       via  54819dd75fb929acfab581890dd1c8ee17cccf3e (commit)
       via  2656de63a9531f23aea840f6cb92e0cdc803599f (commit)
       via  2c9b40bdc8acd7fdd906c80e2c7f44abfb720de3 (commit)
       via  e513251f7b0f6acdc0c0d6df5792c18358030221 (commit)
       via  6feefc59c459d778046a83cb29178afc04acdf1c (commit)
       via  b5a7183d43ca4607fdc259267087e7f795d14de2 (commit)
       via  9f277b55034436e3a4ed251e4e86ea088adef20e (commit)
       via  a99c7bb0683175f2e8d365b7439e64a21f6cdb31 (commit)
       via  9494d7df964f24d7d8d53a09233d696d4ffcc234 (commit)
       via  a53d5ecd720cebfe1d81c077996cd2107ed80359 (commit)
       via  9f88440df10de6d3d2c2d8b50a802663fec9d0b7 (commit)
       via  1cecdfb6fabf47c921e7f422063368621619dcfb (commit)
       via  f92e43f87f137e00d3e23a2defa5c891da0e8462 (commit)
       via  ba0bc2f7111c9a783889c29878bf804083248bd2 (commit)
       via  77c7b991cfd9eefa42832c14383aa47f4d3ccd8c (commit)
       via  c62a4ff13d31bf40e8262f4b8b029167fa5e17b0 (commit)
       via  6a79f929394e3bf1be12ea892593d13861b65573 (commit)
       via  64b5df6b8c744a6a5d7dae5ef57966c9d0cc57f3 (commit)
       via  6e5158294c7e19af9031fc43fefc21275e758911 (commit)
       via  80d3540193e78a2dfee8373490a0f9e67f9d804a (commit)
       via  53ee080ab32335c5b77906fea6c50f7bb9ba837f (commit)
       via  748680bc8d517ce69a1fcdcc348b5a72e19e2c7a (commit)
       via  590154457cf00df8dc20fcaf2c513b213f59468d (commit)
       via  03a84f9276f02e97fe02113c9388652477e3ebbb (commit)
       via  21f2d53a70dbf6ae04db07e163f4485e3af47478 (commit)
       via  74ea7122c61a81b434e8f258ee104550c2f0b29e (commit)
       via  1de4f2fbc12c840726bc340792a73814818f3faa (commit)
       via  b20aa99b4356a2afa092bb355a0db78b0b8f711f (commit)
       via  6791c56b529545a191a3e9f714c5493744948ba4 (commit)
       via  9a4c7cb68d761329152bf49637c6fd0f8322bbd5 (commit)
       via  647c3bac741b56ce16c7e22ab2d462725a34198d (commit)
       via  15696397596dfebd42ebb415772237394ceca978 (commit)
       via  96b2b2fef69eaad2da73c1c5a0ee01f939089e15 (commit)
       via  36b7f52b07d4cc89110971a95774b927c5a22cc2 (commit)
       via  58f1ae16bd00d030edecbba3045c9cc00222c9d8 (commit)
       via  8dccf76ea830057d433b07c40bc2c1294891ea39 (commit)
       via  c1ceb0f74d5547c56cf6cdde56044adba171efaf (commit)
       via  3bfb9c5cbf5dd56b84fd17f9e1dcdd6a219fe5fe (commit)
       via  7ba5211a4228bcf01f679157d23dd99a9f0bbcd8 (commit)
       via  bdeee3f460ce7d8ef35b865eaf8aa1827194a42b (commit)
       via  406b3de5426bf0d63564410cf6caf2834ba2b7bb (commit)
       via  39c9193f03471aa7826769b34d6b55890a2c98a3 (commit)
       via  dac77d5d53bb56054b91ba2ed223a49da08848f8 (commit)
       via  381951637e6688e1868fae9b0642411f5cd1c223 (commit)
       via  65f76d01b775c944d2ef6c1673633adb6229d9d8 (commit)
       via  45fcd8aeec29e7e3b86415811f2cf2a5beb82a54 (commit)
       via  d929be450d1886ac04d7c2c98cd16eb44f468413 (commit)
       via  21f6342de550d985db7e2d6964af2ad0f8405406 (commit)
       via  3c068341659783d0d7132f92b14c05c07830d5b2 (commit)
       via  a25971cae157b6bc40037e391db226da36dc9b30 (commit)
       via  446868283108deda5510f4419b4065d254a4c5a8 (commit)
       via  3987bcc3f5f6acf9207e94049561c632b454f38a (commit)
       via  84d14d659b8c31c266a3b08e688171f90fe46cad (commit)
       via  42e27f90df96881d22e365af7069f36c4538cf07 (commit)
       via  749b308236ab70cd15088fcb81d093c9c5e8d30b (commit)
       via  636d833ed4206ffe44e5e9b9b9100ac9d82bc603 (commit)
       via  08a10afd0681a04e5bae019870d636d49c3dc223 (commit)
       via  a0daf49f4231d5b4ec6a8a00bfa0aac1ce69cde8 (commit)
       via  ca35c58c19b28dcdbbff71fe3da0ee16ca760673 (commit)
       via  07f2ff368aa089e4f128578c49287d40d84dfcdf (commit)
       via  687a5246e161ef1d191264d3b579271bc834223c (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (936b0f1a267b4d274062a3cf8bef4ae8454a5e8f)
            \
             N -- N -- N (d96f0405962b9ea907e700b662b4aee444455c1b)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit d96f0405962b9ea907e700b662b4aee444455c1b
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Aug 30 02:45:58 2015 -0400

    5824: Add read-error and lots-of-blocks tests.

diff --git a/sdk/go/keepclient/collectionreader_test.go b/sdk/go/keepclient/collectionreader_test.go
index 51710b7..94e41e2 100644
--- a/sdk/go/keepclient/collectionreader_test.go
+++ b/sdk/go/keepclient/collectionreader_test.go
@@ -2,21 +2,48 @@ package keepclient
 
 import (
 	"crypto/md5"
+	"crypto/rand"
 	"fmt"
 	"io"
 	"io/ioutil"
 	"net/http"
 	"os"
+	"strconv"
+	"strings"
 
 	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
 	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
 	check "gopkg.in/check.v1"
 )
 
-var _ = check.Suite(&IntegrationSuite{})
+var _ = check.Suite(&CollectionReaderUnit{})
 
-// IntegrationSuite tests need an API server
-type IntegrationSuite struct{}
+type CollectionReaderUnit struct {
+	arv     arvadosclient.ArvadosClient
+	kc      *KeepClient
+	handler SuccessHandler
+}
+
+func (s *CollectionReaderUnit) SetUpTest(c *check.C) {
+	var err error
+	s.arv, err = arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.IsNil)
+	s.arv.ApiToken = arvadostest.ActiveToken
+
+	s.kc, err = MakeKeepClient(&s.arv)
+	c.Assert(err, check.IsNil)
+
+	s.handler = SuccessHandler{
+		disk: make(map[string][]byte),
+		lock: make(chan struct{}, 1),
+		ops: new(int),
+	}
+	localRoots := make(map[string]string)
+	for i, k := range RunSomeFakeKeepServers(s.handler, 4) {
+		localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+	}
+	s.kc.SetServiceRoots(localRoots, localRoots, nil)
+}
 
 type SuccessHandler struct {
 	disk map[string][]byte
@@ -64,33 +91,11 @@ type rdrTest struct {
 	want interface{} // error or string to expect
 }
 
-func StubWithFakeServers(kc *KeepClient, h http.Handler) {
-	localRoots := make(map[string]string)
-	for i, k := range RunSomeFakeKeepServers(h, 4) {
-		localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
-	}
-	kc.SetServiceRoots(localRoots, localRoots, nil)
-}
-
-func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
-	arv, err := arvadosclient.MakeArvadosClient()
-	c.Assert(err, check.IsNil)
-	arv.ApiToken = arvadostest.ActiveToken
-
-	kc, err := MakeKeepClient(&arv)
-	c.Assert(err, check.IsNil)
-
-	{
-		h := SuccessHandler{
-			disk: make(map[string][]byte),
-			lock: make(chan struct{}, 1),
-		}
-		StubWithFakeServers(kc, h)
-		kc.PutB([]byte("foo"))
-		kc.PutB([]byte("bar"))
-		kc.PutB([]byte("Hello world\n"))
-		kc.PutB([]byte(""))
-	}
+func (s *CollectionReaderUnit) TestCollectionReaderContent(c *check.C) {
+	s.kc.PutB([]byte("foo"))
+	s.kc.PutB([]byte("bar"))
+	s.kc.PutB([]byte("Hello world\n"))
+	s.kc.PutB([]byte(""))
 
 	mt := arvadostest.PathologicalManifest
 
@@ -116,7 +121,7 @@ func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 		{mt: mt, f: "segmented/frob", want: "frob"},
 		{mt: mt, f: "segmented/oof", want: "oof"},
 	} {
-		rdr, err := kc.CollectionFileReader(map[string]interface{}{"manifest_text": testCase.mt}, testCase.f)
+		rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": testCase.mt}, testCase.f)
 		switch want := testCase.want.(type) {
 		case error:
 			c.Check(rdr, check.IsNil)
@@ -136,21 +141,34 @@ func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 	}
 }
 
-func (s *ServerRequiredSuite) TestCollectionReaderCloseEarly(c *check.C) {
-	arv, err := arvadosclient.MakeArvadosClient()
-	c.Assert(err, check.IsNil)
-	arv.ApiToken = arvadostest.ActiveToken
-
-	kc, err := MakeKeepClient(&arv)
-	c.Assert(err, check.IsNil)
-
-	h := SuccessHandler{
-		disk: make(map[string][]byte),
-		lock: make(chan struct{}, 1),
-		ops: new(int),
+func (s *CollectionReaderUnit) TestCollectionReaderManyBlocks(c *check.C) {
+	h := md5.New()
+	buf := make([]byte, 4096)
+	locs := make([]string, len(buf))
+	filesize := 0
+	for i := 0; i < len(locs); i++ {
+		_, err := io.ReadFull(rand.Reader, buf[:i])
+		c.Assert(err, check.IsNil)
+		h.Write(buf[:i])
+		locs[i], _, err = s.kc.PutB(buf[:i])
+		c.Assert(err, check.IsNil)
+		filesize += i
 	}
-	StubWithFakeServers(kc, h)
-	kc.PutB([]byte("foo"))
+	manifest := "./random " + strings.Join(locs, " ") + " 0:" + strconv.Itoa(filesize) + ":bytes.bin\n"
+	dataMD5 := h.Sum(nil)
+
+	checkMD5 := md5.New()
+	rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": manifest}, "random/bytes.bin")
+	c.Check(err, check.IsNil)
+	_, err = io.Copy(checkMD5, rdr)
+	c.Check(err, check.IsNil)
+	_, err = rdr.Read(make([]byte, 1))
+	c.Check(err, check.Equals, io.EOF)
+	c.Check(checkMD5.Sum(nil), check.DeepEquals, dataMD5)
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderCloseEarly(c *check.C) {
+	s.kc.PutB([]byte("foo"))
 
 	mt := ". "
 	for i := 0; i < 1000; i++ {
@@ -161,23 +179,45 @@ func (s *ServerRequiredSuite) TestCollectionReaderCloseEarly(c *check.C) {
 	// Grab the stub server's lock, ensuring our cfReader doesn't
 	// get anything back from its first call to kc.Get() before we
 	// have a chance to call Close().
-	h.lock <- struct{}{}
-	opsBeforeRead := *h.ops
+	s.handler.lock <- struct{}{}
+	opsBeforeRead := *s.handler.ops
 
-	rdr, err := kc.CollectionFileReader(map[string]interface{}{"manifest_text": mt}, "foo1000.txt")
+	rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": mt}, "foo1000.txt")
 	c.Assert(err, check.IsNil)
+
+	firstReadDone := make(chan struct{})
+	go func() {
+		rdr.Read(make([]byte, 6))
+		firstReadDone <- struct{}{}
+	}()
 	err = rdr.Close()
 	c.Assert(err, check.IsNil)
 	c.Assert(rdr.Error(), check.IsNil)
 
 	// Release the stub server's lock. The first GET operation will proceed.
-	<-h.lock
+	<-s.handler.lock
+
+	// Make sure our first read operation consumes the data
+	// received from the first GET.
+	<-firstReadDone
 
 	// doGet() should close toRead before sending any more bufs to it.
-	if what, ok := <-rdr.toRead;  ok {
-		c.Errorf("Got %+v, expected toRead to be closed", what)
+	if what, ok := <-rdr.toRead; ok {
+		c.Errorf("Got %q, expected toRead to be closed", string(what))
 	}
 
 	// Stub should have handled exactly one GET request.
-	c.Assert(*h.ops, check.Equals, opsBeforeRead+1)
+	c.Assert(*s.handler.ops, check.Equals, opsBeforeRead+1)
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderDataError(c *check.C) {
+	manifest := ". ffffffffffffffffffffffffffffffff+1 0:1:notfound.txt\n"
+	buf := make([]byte, 1)
+	rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": manifest}, "notfound.txt")
+	c.Check(err, check.IsNil)
+	for i := 0; i < 2; i++ {
+		_, err = io.ReadFull(rdr, buf)
+		c.Check(err, check.Not(check.IsNil))
+		c.Check(err, check.Not(check.Equals), io.EOF)
+	}
 }

commit ed6af5a868544100e2dffe8f6ccc8148f28edb93
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Aug 30 02:44:18 2015 -0400

    5824: Turn off debug printfs unless enabled by calling program.

diff --git a/sdk/go/keepclient/keepclient_test.go b/sdk/go/keepclient/keepclient_test.go
index e4e459e..e54d34b 100644
--- a/sdk/go/keepclient/keepclient_test.go
+++ b/sdk/go/keepclient/keepclient_test.go
@@ -126,10 +126,9 @@ func (s *StandaloneSuite) TestUploadToStubKeepServer(c *C) {
 		make(chan string)}
 
 	UploadToStubHelper(c, st,
-		func(kc *KeepClient, url string, reader io.ReadCloser,
-			writer io.WriteCloser, upload_status chan uploadStatus) {
-
-			go kc.uploadToKeepServer(url, st.expectPath, reader, upload_status, int64(len("foo")), "TestUploadToStubKeepServer")
+		func(kc *KeepClient, url string, reader io.ReadCloser, writer io.WriteCloser, upload_status chan uploadStatus) {
+			
+			go kc.uploadToKeepServer(url, st.expectPath, reader, upload_status, int64(len("foo")), []byte{0})
 
 			writer.Write([]byte("foo"))
 			writer.Close()
@@ -153,15 +152,14 @@ func (s *StandaloneSuite) TestUploadToStubKeepServerBufferReader(c *C) {
 		make(chan string)}
 
 	UploadToStubHelper(c, st,
-		func(kc *KeepClient, url string, reader io.ReadCloser,
-			writer io.WriteCloser, upload_status chan uploadStatus) {
+		func(kc *KeepClient, url string, reader io.ReadCloser, writer io.WriteCloser, upload_status chan uploadStatus) {
 
 			tr := streamer.AsyncStreamFromReader(512, reader)
 			defer tr.Close()
 
 			br1 := tr.MakeStreamReader()
 
-			go kc.uploadToKeepServer(url, st.expectPath, br1, upload_status, 3, "TestUploadToStubKeepServerBufferReader")
+			go kc.uploadToKeepServer(url, st.expectPath, br1, upload_status, 3, []byte{0})
 
 			writer.Write([]byte("foo"))
 			writer.Close()
@@ -193,10 +191,9 @@ func (s *StandaloneSuite) TestFailedUploadToStubKeepServer(c *C) {
 	hash := "acbd18db4cc2f85cedef654fccc4a4d8"
 
 	UploadToStubHelper(c, st,
-		func(kc *KeepClient, url string, reader io.ReadCloser,
-			writer io.WriteCloser, upload_status chan uploadStatus) {
+		func(kc *KeepClient, url string, reader io.ReadCloser, writer io.WriteCloser, upload_status chan uploadStatus) {
 
-			go kc.uploadToKeepServer(url, hash, reader, upload_status, 3, "TestFailedUploadToStubKeepServer")
+			go kc.uploadToKeepServer(url, hash, reader, upload_status, 3, []byte{0})
 
 			writer.Write([]byte("foo"))
 			writer.Close()
diff --git a/sdk/go/keepclient/support.go b/sdk/go/keepclient/support.go
index b467d06..808da0b 100644
--- a/sdk/go/keepclient/support.go
+++ b/sdk/go/keepclient/support.go
@@ -2,18 +2,20 @@ package keepclient
 
 import (
 	"crypto/md5"
+	"crypto/rand"
 	"errors"
 	"fmt"
 	"git.curoverse.com/arvados.git/sdk/go/streamer"
 	"io"
 	"io/ioutil"
-	"log"
 	"net"
 	"net/http"
 	"strings"
 	"time"
 )
 
+var debugPrintf = func(string, ...interface{}){}
+
 type keepDisk struct {
 	Uuid     string `json:"uuid"`
 	Hostname string `json:"service_host"`
@@ -147,13 +149,13 @@ type uploadStatus struct {
 }
 
 func (this KeepClient) uploadToKeepServer(host string, hash string, body io.ReadCloser,
-	upload_status chan<- uploadStatus, expectedLength int64, requestId string) {
+	upload_status chan<- uploadStatus, expectedLength int64, requestId []byte) {
 
 	var req *http.Request
 	var err error
 	var url = fmt.Sprintf("%s/%s", host, hash)
 	if req, err = http.NewRequest("PUT", url, nil); err != nil {
-		log.Printf("[%v] Error creating request PUT %v error: %v", requestId, url, err.Error())
+		debugPrintf("[%x] Error creating request PUT %v error: %v", requestId, url, err.Error())
 		upload_status <- uploadStatus{err, url, 0, 0, ""}
 		body.Close()
 		return
@@ -181,7 +183,7 @@ func (this KeepClient) uploadToKeepServer(host string, hash string, body io.Read
 
 	var resp *http.Response
 	if resp, err = this.Client.Do(req); err != nil {
-		log.Printf("[%v] Upload failed %v error: %v", requestId, url, err.Error())
+		debugPrintf("[%x] Upload failed %v error: %v", requestId, url, err.Error())
 		upload_status <- uploadStatus{err, url, 0, 0, ""}
 		return
 	}
@@ -197,13 +199,13 @@ func (this KeepClient) uploadToKeepServer(host string, hash string, body io.Read
 	respbody, err2 := ioutil.ReadAll(&io.LimitedReader{resp.Body, 4096})
 	response := strings.TrimSpace(string(respbody))
 	if err2 != nil && err2 != io.EOF {
-		log.Printf("[%v] Upload %v error: %v response: %v", requestId, url, err2.Error(), response)
+		debugPrintf("[%x] Upload %v error: %v response: %v", requestId, url, err2.Error(), response)
 		upload_status <- uploadStatus{err2, url, resp.StatusCode, rep, response}
 	} else if resp.StatusCode == http.StatusOK {
-		log.Printf("[%v] Upload %v success", requestId, url)
+		debugPrintf("[%x] Upload %v success", requestId, url)
 		upload_status <- uploadStatus{nil, url, resp.StatusCode, rep, response}
 	} else {
-		log.Printf("[%v] Upload %v error: %v response: %v", requestId, url, resp.StatusCode, response)
+		debugPrintf("[%x] Upload %v error: %v response: %v", requestId, url, resp.StatusCode, response)
 		upload_status <- uploadStatus{errors.New(resp.Status), url, resp.StatusCode, rep, response}
 	}
 }
@@ -215,7 +217,8 @@ func (this KeepClient) putReplicas(
 
 	// Take the hash of locator and timestamp in order to identify this
 	// specific transaction in log statements.
-	requestId := fmt.Sprintf("%x", md5.Sum([]byte(locator+time.Now().String())))[0:8]
+	requestId := make([]byte, 4)
+	io.ReadFull(rand.Reader, requestId)
 
 	// Calculate the ordering for uploading to servers
 	sv := NewRootSorter(this.WritableLocalRoots(), hash).GetSortedRoots()
@@ -237,7 +240,7 @@ func (this KeepClient) putReplicas(
 		for active < remaining_replicas {
 			// Start some upload requests
 			if next_server < len(sv) {
-				log.Printf("[%v] Begin upload %s to %s", requestId, hash, sv[next_server])
+				debugPrintf("[%x] Begin upload %s to %s", requestId, hash, sv[next_server])
 				go this.uploadToKeepServer(sv[next_server], hash, tr.MakeStreamReader(), upload_status, expectedLength, requestId)
 				next_server += 1
 				active += 1
@@ -249,8 +252,7 @@ func (this KeepClient) putReplicas(
 				}
 			}
 		}
-		log.Printf("[%v] Replicas remaining to write: %v active uploads: %v",
-			requestId, remaining_replicas, active)
+		debugPrintf("[%x] Replicas remaining to write: %v active uploads: %v", requestId, remaining_replicas, active)
 
 		// Now wait for something to happen.
 		status := <-upload_status

commit 2a6f138489902caba2b5c6ecc392dd70f85ba6d3
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 11:08:16 2015 -0400

    5824: Fix up DNS docs.

diff --git a/doc/install/install-keepdl.html.textile.liquid b/doc/install/install-keepdl.html.textile.liquid
index 80d50c2..448d11f 100644
--- a/doc/install/install-keepdl.html.textile.liquid
+++ b/doc/install/install-keepdl.html.textile.liquid
@@ -95,22 +95,20 @@ server {
 }
 </pre></notextile>
 
-h3. Tell the API server about the keepdl service
+h3. Configure DNS
 
-If your wildcard certificate is valid for <code>*.dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, add the following entry to your API server's @config/application.yml@ file:
+Configure your DNS servers so the following names resolve to your Nginx proxy's public IP address.
+* @*--dl.uuid_prefix.your.domain@, if your DNS server allows this without interfering with other DNS names; or
+* @*.dl.uuid_prefix.your.domain@, if you have a wildcard SSL certificate valid for these names; or
+* @dl.uuid_prefix.your.domain@, if neither of the above options is feasible. In this case, only unauthenticated requests will be served, i.e., public data and collection sharing links.
 
-<notextile>
-<pre><code>keepdl: https://%{uuid_or_pdh}.dl.<span class="userinput">uuid_prefix</span>.your.domain
-</code></pre>
-</notextile>
+h3. Tell the API server about the keepdl service
 
-If your wildcard certificate is valid for <code>*.<span class="userinput">uuid_prefix</span>.your.domain</code>, add the following entry to your API server's @config/application.yml@ file. This approach requires a wildcard DNS entry covering <code>*--dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, which might be difficult to do without affecting other services, depending on your DNS server software.
+Add *one* of the following entries to your API server's @config/application.yml@ file, depending on your DNS setup:
 
 <notextile>
 <pre><code>keepdl: https://%{uuid_or_pdh}--dl.<span class="userinput">uuid_prefix</span>.your.domain
+keepdl: https://%{uuid_or_pdh}.dl.<span class="userinput">uuid_prefix</span>.your.domain
+keepdl: https://dl.<span class="userinput">uuid_prefix</span>.your.domain
 </code></pre>
 </notextile>
-
-h3. Configure DNS
-
-Configure your DNS servers so <code>*--dl.<span class="userinput">uuid_prefix</span>.your.domain</code> or <code>*.dl.<span class="userinput">uuid_prefix</span>.your.domain</code> (according to the way you configured Workbench), as well as <code>dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, resolve to your Nginx proxy's public IP address.

commit b420fea834591130dea8b5fd3a6165f514d0610c
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 02:30:45 2015 -0400

    5824: Log X-Forwarded-For header value if provided.

diff --git a/services/keepdl/handler.go b/services/keepdl/handler.go
index bc68625..3c38728 100644
--- a/services/keepdl/handler.go
+++ b/services/keepdl/handler.go
@@ -61,6 +61,11 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var statusCode = 0
 	var statusText string
 
+	remoteAddr := r.RemoteAddr
+	if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
+		remoteAddr = xff + "," + remoteAddr
+	}
+
 	w := httpserver.WrapResponseWriter(wOrig)
 	defer func() {
 		if statusCode == 0 {
@@ -74,7 +79,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		if statusText == "" {
 			statusText = http.StatusText(statusCode)
 		}
-		httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
+		httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
 	}()
 
 	if r.Method != "GET" && r.Method != "POST" {

commit de603aa2e7ec727677772c52808c1821b79dd1fc
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 01:45:35 2015 -0400

    5824: Fail at startup if ARVADOS_API_HOST is not set.

diff --git a/services/keepdl/main.go b/services/keepdl/main.go
index d780cc3..751543e 100644
--- a/services/keepdl/main.go
+++ b/services/keepdl/main.go
@@ -17,6 +17,9 @@ func init() {
 
 func main() {
 	flag.Parse()
+	if os.Getenv("ARVADOS_API_HOST") == "" {
+		log.Fatal("ARVADOS_API_HOST environment variable must be set.")
+	}
 	srv := &server{}
 	if err := srv.Start(); err != nil {
 		log.Fatal(err)

commit 67025bef2c15a40c2c7585633066c2a924d03ae7
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 01:28:43 2015 -0400

    5824: Accept anonymous tokens on command line.

diff --git a/services/keepdl/handler.go b/services/keepdl/handler.go
index 7a2124a..bc68625 100644
--- a/services/keepdl/handler.go
+++ b/services/keepdl/handler.go
@@ -17,16 +17,9 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/keepclient"
 )
 
-var clientPool = arvadosclient.MakeClientPool()
-
-var anonymousTokens []string
-
 type handler struct{}
 
-func init() {
-	// TODO(TC): Get anonymousTokens from flags
-	anonymousTokens = []string{}
-}
+var clientPool = arvadosclient.MakeClientPool()
 
 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
 // otherwise return "".
diff --git a/services/keepdl/server.go b/services/keepdl/server.go
index 44da00f..2359f23 100644
--- a/services/keepdl/server.go
+++ b/services/keepdl/server.go
@@ -10,8 +10,8 @@ import (
 var address string
 
 func init() {
-	flag.StringVar(&address, "address", "0.0.0.0:80",
-		"Address to listen on, \"host:port\".")
+	flag.StringVar(&address, "address", ":80",
+		"Address to listen on: \"host:port\", or \":port\" to listen on all interfaces.")
 }
 
 type server struct {

commit db06fc8096c5a40c4c7046e5d13d8eef7c941f7e
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 01:09:46 2015 -0400

    5824: Handle various combinations of c= and t= more consistently. Use vhosts in integration tests.

diff --git a/services/keepdl/doc.go b/services/keepdl/doc.go
index 440f116..6b7242f 100644
--- a/services/keepdl/doc.go
+++ b/services/keepdl/doc.go
@@ -52,13 +52,12 @@
 // "Same-origin mode" below.
 //
 //   http://dl.example.com/c=uuid_or_pdh/path/file.txt
-//   http://dl.example.com/c=uuid_or_pdh/path/t=TOKEN/file.txt
+//   http://dl.example.com/c=uuid_or_pdh/t=TOKEN/path/file.txt
 //
 // The following "multiple origin" URL patterns are supported for all
 // collections:
 //
 //   http://uuid_or_pdh--dl.example.com/path/file.txt
-//   http://uuid_or_pdh--dl.example.com/t=/path/file.txt
 //   http://uuid_or_pdh--dl.example.com/t=TOKEN/path/file.txt
 //
 // In the "multiple origin" form, the string "--" can be replaced with
@@ -81,17 +80,35 @@
 // collection UUID or a portable data hash with the "+" character
 // replaced by "-".
 //
+// In all of the above forms, a top level directory called "_" is
+// skipped. In cases where the "path/file.txt" part might start with
+// "t=" or "c=" or "_/", links should be constructed with a leading
+// "_/" to ensure the top level directory is not interpreted as a
+// token or collection ID.
+//
 // Assuming there is a collection with UUID
 // zzzzz-4zz18-znfnqtbbv4spc3w and portable data hash
 // 1f4b0bc7583c2a7f9102c395f4ffc5e3+45, the following URLs are
 // interchangeable:
 //
 //   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/foo
-//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/t=/foo
-//   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/t=/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/_/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/_/foo
 //   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--foo.example.com/foo
 //   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--.invalid/foo
 //
+// An additional form is supported specifically to make it more
+// convenient to maintain support for existing Workbench download
+// links:
+//
+//   http://dl.example.com/collections/download/uuid_or_pdh/TOKEN/path/file.txt
+//
+// A regular Workbench "download" link is also accepted, but
+// credentials passed via cookie, header, etc. are ignored. Only
+// public data can be served this way:
+//
+//   http://dl.example.com/collections/uuid_or_pdh/path/file.txt
+//
 // Authorization mechanisms
 //
 // A token can be provided in an Authorization header:
@@ -158,7 +175,7 @@
 //
 package main
 
-// TODO(TC): Implement
+// TODO(TC): Implement?
 //
 // Trusted content
 //
diff --git a/services/keepdl/handler.go b/services/keepdl/handler.go
index 657c72d..7a2124a 100644
--- a/services/keepdl/handler.go
+++ b/services/keepdl/handler.go
@@ -50,6 +50,20 @@ func parseCollectionIdFromDNSName(s string) string {
 	return ""
 }
 
+var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
+
+// return a UUID or PDH if s is a UUID or a PDH (even if it is a PDH
+// with "+" replaced by " " or "-"); otherwise return "".
+func parseCollectionIdFromURL(s string) string {
+	if arvadosclient.UUIDMatch(s) {
+		return s
+	}
+	if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
+		return pdh
+	}
+	return ""
+}
+
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var statusCode = 0
 	var statusText string
@@ -89,79 +103,104 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var tokens []string
 	var reqTokens []string
 	var pathToken bool
+	var credentialsOK bool
 
 	if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
-		// "http://{id}.domain.example.com/{path}" form
-		if t := r.FormValue("api_token"); t != "" {
-			// ...with explicit token in query string or
-			// form in POST body. We must encrypt the
-			// token such that it can only be used for
-			// this collection; put it in an HttpOnly
-			// cookie; and redirect to the same URL with
-			// the query param redacted, and method =
-			// GET.
-			//
-			// The HttpOnly flag is necessary to prevent
-			// JavaScript code (included in, or loaded by,
-			// a page in the collection being served) from
-			// employing the user's token beyond reading
-			// other files in the same domain, i.e., same
-			// the collection.
-			//
-			// The 303 redirect is necessary in the case
-			// of a GET request to avoid exposing the
-			// token in the Location bar, and in the case
-			// of a POST request to avoid raising warnings
-			// when the user refreshes the resulting page.
-			http.SetCookie(w, &http.Cookie{
-				Name:    "api_token",
-				Value:   auth.EncodeTokenCookie([]byte(t)),
-				Path:    "/",
-				Expires: time.Now().AddDate(10,0,0),
-			})
-			redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
-
-			w.Header().Add("Location", redir)
-			statusCode, statusText = http.StatusSeeOther, redir
-			w.WriteHeader(statusCode)
-			io.WriteString(w, `<A href="`)
-			io.WriteString(w, html.EscapeString(redir))
-			io.WriteString(w, `">Continue</A>`)
-			return
-		} else if strings.HasPrefix(pathParts[0], "t=") {
-			// ...with explicit token in path,
-			// "{...}.com/t={token}/{path}".  This form
-			// must only be used to pass scoped tokens
-			// that give permission for a single
-			// collection. See FormValue case above.
-			tokens = []string{pathParts[0][2:]}
-			targetPath = pathParts[1:]
+		// http://ID.dl.example/PATH...
+		credentialsOK = true
+		targetPath = pathParts
+	} else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
+		// /c=ID/PATH...
+		targetId = parseCollectionIdFromURL(pathParts[0][2:])
+		targetPath = pathParts[1:]
+	} else if len(pathParts) >= 3 && pathParts[0] == "collections" {
+		if len(pathParts) >= 5 && pathParts[1] == "download" {
+			// /collections/download/ID/TOKEN/PATH...
+			targetId = pathParts[2]
+			tokens = []string{pathParts[3]}
+			targetPath = pathParts[4:]
 			pathToken = true
 		} else {
-			// ...with cookie, Authorization header, or
-			// no token at all
-			reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
-			tokens = append(reqTokens, anonymousTokens...)
-			targetPath = pathParts
+			// /collections/ID/PATH...
+			targetId = pathParts[1]
+			tokens = anonymousTokens
+			targetPath = pathParts[2:]
 		}
-	} else if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
+	} else {
 		statusCode = http.StatusNotFound
 		return
-	} else if len(pathParts) >= 5 && pathParts[1] == "download" {
-		// "/collections/download/{id}/{token}/path..." form:
-		// Don't use our configured anonymous tokens,
-		// Authorization headers, etc.  Just use the token in
-		// the path.
-		targetId = pathParts[2]
-		tokens = []string{pathParts[3]}
-		targetPath = pathParts[4:]
+	}
+	if t := r.FormValue("api_token"); t != "" {
+		// The client provided an explicit token in the query
+		// string, or a form in POST body. We must put the
+		// token in an HttpOnly cookie, and redirect to the
+		// same URL with the query param redacted and method =
+		// GET.
+
+		if !credentialsOK {
+			// It is not safe to copy the provided token
+			// into a cookie unless the current vhost
+			// (origin) serves only a single collection.
+			statusCode = http.StatusBadRequest
+			return
+		}
+
+		// The HttpOnly flag is necessary to prevent
+		// JavaScript code (included in, or loaded by, a page
+		// in the collection being served) from employing the
+		// user's token beyond reading other files in the same
+		// domain, i.e., same collection.
+		//
+		// The 303 redirect is necessary in the case of a GET
+		// request to avoid exposing the token in the Location
+		// bar, and in the case of a POST request to avoid
+		// raising warnings when the user refreshes the
+		// resulting page.
+
+		http.SetCookie(w, &http.Cookie{
+			Name:     "api_token",
+			Value:    auth.EncodeTokenCookie([]byte(t)),
+			Path:     "/",
+			Expires:  time.Now().AddDate(10,0,0),
+			HttpOnly: true,
+		})
+		redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
+
+		w.Header().Add("Location", redir)
+		statusCode, statusText = http.StatusSeeOther, redir
+		w.WriteHeader(statusCode)
+		io.WriteString(w, `<A href="`)
+		io.WriteString(w, html.EscapeString(redir))
+		io.WriteString(w, `">Continue</A>`)
+		return
+	}
+
+	if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
+		// http://ID.example/t=TOKEN/PATH...
+		// /c=ID/t=TOKEN/PATH...
+		//
+		// This form must only be used to pass scoped tokens
+		// that give permission for a single collection. See
+		// FormValue case above.
+		tokens = []string{targetPath[0][2:]}
 		pathToken = true
-	} else {
-		// "/collections/{id}/path..." form
-		targetId = pathParts[1]
-		reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+		targetPath = targetPath[1:]
+	}
+
+	if tokens == nil {
+		if credentialsOK {
+			reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+		}
 		tokens = append(reqTokens, anonymousTokens...)
-		targetPath = pathParts[2:]
+	}
+
+	if len(targetPath) > 0 && targetPath[0] == "_" {
+		// If a collection has a directory called "t=foo" or
+		// "_", it can be served at //dl.example/_/t=foo/ or
+		// //dl.example/_/_/ respectively: //dl.example/t=foo/
+		// won't work because t=foo will be interpreted as a
+		// token "foo".
+		targetPath = targetPath[1:]
 	}
 
 	tokenResult := make(map[string]int)
@@ -188,11 +227,12 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		return
 	}
 	if !found {
-		if pathToken {
-			// The URL is a "secret sharing link", but it
-			// didn't work out. Asking the client for
-			// additional credentials would just be
-			// confusing.
+		if pathToken || !credentialsOK {
+			// Either the URL is a "secret sharing link"
+			// that didn't work out (and asking the client
+			// for additional credentials would just be
+			// confusing), or we don't even accept
+			// credentials at this path.
 			statusCode = http.StatusNotFound
 			return
 		}
diff --git a/services/keepdl/handler_test.go b/services/keepdl/handler_test.go
index 8977ebc..69dff4f 100644
--- a/services/keepdl/handler_test.go
+++ b/services/keepdl/handler_test.go
@@ -99,8 +99,10 @@ func authzViaPOST(r *http.Request, tok string) int {
 func doVhostRequests(c *check.C, authz authorizer) {
 	for _, hostPath := range []string{
 		arvadostest.FooCollection + ".example.com/foo",
+		arvadostest.FooCollection + "--dl.example.com/foo",
+		arvadostest.FooCollection + "--dl.example.com/_/foo",
 		arvadostest.FooPdh + ".example.com/foo",
-		strings.Replace(arvadostest.FooPdh, "+", "-", -1) + ".example.com/foo",
+		strings.Replace(arvadostest.FooPdh, "+", "-", -1) + "--dl.example.com/foo",
 	} {
 		c.Log("doRequests: ", hostPath)
 		doVhostRequestsWithHostPath(c, authz, hostPath)
diff --git a/services/keepdl/server_test.go b/services/keepdl/server_test.go
index 6fef9b8..964fa3a 100644
--- a/services/keepdl/server_test.go
+++ b/services/keepdl/server_test.go
@@ -29,7 +29,7 @@ func (s *IntegrationSuite) TestNoToken(c *check.C) {
 		"bogustoken",
 	} {
 		hdr, body, _ := s.runCurl(c, token, "dl.example.com", "/collections/"+arvadostest.FooCollection+"/foo")
-		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
+		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 		c.Check(body, check.Equals, "")
 
 		if token != "" {
@@ -119,6 +119,14 @@ func (s *IntegrationSuite) test100BlockFile(c *check.C, blocksize int) {
 	c.Check(size, check.Equals, int64(blocksize)*100)
 }
 
+type curlCase struct {
+	id      string
+	auth    string
+	host    string
+	path    string
+	dataMD5 string
+}
+
 func (s *IntegrationSuite) Test200(c *check.C) {
 	anonymousTokens = []string{arvadostest.AnonymousToken}
 	arv, err := arvadosclient.MakeArvadosClient()
@@ -128,28 +136,101 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 	c.Assert(err, check.Equals, nil)
 	kc.PutB([]byte("Hello world\n"))
 	kc.PutB([]byte("foo"))
-	for _, spec := range [][]string{
+	for _, spec := range []curlCase{
 		// My collection
-		{arvadostest.ActiveToken, "/collections/" + arvadostest.FooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{"", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{"tokensobogus", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{arvadostest.ActiveToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{arvadostest.AnonymousToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		// Anonymously accessible user agreement.
-		{"", "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{arvadostest.ActiveToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{arvadostest.SpectatorToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{arvadostest.SpectatorToken, "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{
+			auth: arvadostest.ActiveToken,
+			host: arvadostest.FooCollection + "--dl.example.com",
+			path: "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			host: strings.Replace(arvadostest.FooPdh, "+", "-", 1) + ".dl.example.com",
+			path: "/t=" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			path: "/c=" + arvadostest.FooPdh + "/t=" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			path: "/c=" + strings.Replace(arvadostest.FooPdh, "+", "-", 1) + "/t=" + arvadostest.ActiveToken + "/_/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			auth: "tokensobogus",
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			auth: arvadostest.ActiveToken,
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			auth: arvadostest.AnonymousToken,
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+
+		// Anonymously accessible user agreement
+		{
+			path: "/c=" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			host: arvadostest.HelloWorldCollection + ".dl.example.com",
+			path: "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			host: arvadostest.HelloWorldCollection + ".dl.example.com",
+			path: "/_/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.ActiveToken,
+			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.SpectatorToken,
+			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.SpectatorToken,
+			host: arvadostest.HelloWorldCollection + "--dl.example.com",
+			path: "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.SpectatorToken,
+			path: "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
 	} {
-		hdr, body, _ := s.runCurl(c, spec[0], "dl.example.com", spec[1])
+		host := spec.host
+		if host == "" {
+			host = "dl.example.com"
+		}
+		hdr, body, _ := s.runCurl(c, spec.auth, host, spec.path)
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
-		if strings.HasSuffix(spec[1], ".txt") {
+		if strings.HasSuffix(spec.path, ".txt") {
 			c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
 			// TODO: Check some types that aren't
 			// automatically detected by Go's http server
 			// by sniffing the content.
 		}
-		c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec[2])
+		c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec.dataMD5)
 	}
 }
 

commit 9038caea5d623ecab36d28b7a2d6a6f73b21e549
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:34:21 2015 -0400

    5824: Comment to explain "authorizer" test helpers.

diff --git a/services/keepdl/handler_test.go b/services/keepdl/handler_test.go
index 0494376..8977ebc 100644
--- a/services/keepdl/handler_test.go
+++ b/services/keepdl/handler_test.go
@@ -42,6 +42,10 @@ func (s *IntegrationSuite) TestVhost404(c *check.C) {
 	}
 }
 
+// An authorizer modifies an HTTP request to make use of the given
+// token -- by adding it to a header, cookie, query param, or whatever
+// -- and returns the HTTP status code we should expect from keepdl if
+// the token is invalid.
 type authorizer func(*http.Request, string) int
 
 func (s *IntegrationSuite) TestVhostViaAuthzHeader(c *check.C) {

commit 69e4f576d63166ca65061e7f304c28f9da752d1c
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:33:44 2015 -0400

    5824: Fix up support for PDH in vhostname.

diff --git a/services/keepdl/doc.go b/services/keepdl/doc.go
index 38b6fe5..440f116 100644
--- a/services/keepdl/doc.go
+++ b/services/keepdl/doc.go
@@ -74,7 +74,8 @@
 // upstream proxy.
 //
 // In all of the above forms, the "dl.example.com" part can be
-// anything at all.
+// anything at all: keepdl ignores everything after the first "." or
+// "--".
 //
 // In all of the above forms, the "uuid_or_pdh" part can be either a
 // collection UUID or a portable data hash with the "+" character
diff --git a/services/keepdl/handler.go b/services/keepdl/handler.go
index 30b4b64..657c72d 100644
--- a/services/keepdl/handler.go
+++ b/services/keepdl/handler.go
@@ -28,7 +28,8 @@ func init() {
 	anonymousTokens = []string{}
 }
 
-// return s if s is a UUID or a PDH, otherwise ""
+// return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
+// otherwise return "".
 func parseCollectionIdFromDNSName(s string) string {
 	// Strip domain.
 	if i := strings.IndexRune(s, '.'); i >= 0 {
@@ -40,10 +41,13 @@ func parseCollectionIdFromDNSName(s string) string {
 	if i := strings.Index(s, "--"); i >= 0 {
 		s = s[:i]
 	}
-	if !arvadosclient.UUIDMatch(s) && !arvadosclient.PDHMatch(s) {
-		return ""
+	if arvadosclient.UUIDMatch(s) {
+		return s
 	}
-	return s
+	if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
+		return pdh
+	}
+	return ""
 }
 
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
diff --git a/services/keepdl/handler_test.go b/services/keepdl/handler_test.go
index a1f5e1a..0494376 100644
--- a/services/keepdl/handler_test.go
+++ b/services/keepdl/handler_test.go
@@ -93,7 +93,17 @@ func authzViaPOST(r *http.Request, tok string) int {
 // Try some combinations of {url, token} using the given authorization
 // mechanism, and verify the result is correct.
 func doVhostRequests(c *check.C, authz authorizer) {
-	hostPath := arvadostest.FooCollection + ".example.com/foo"
+	for _, hostPath := range []string{
+		arvadostest.FooCollection + ".example.com/foo",
+		arvadostest.FooPdh + ".example.com/foo",
+		strings.Replace(arvadostest.FooPdh, "+", "-", -1) + ".example.com/foo",
+	} {
+		c.Log("doRequests: ", hostPath)
+		doVhostRequestsWithHostPath(c, authz, hostPath)
+	}
+}
+
+func doVhostRequestsWithHostPath(c *check.C, authz authorizer, hostPath string) {
 	for _, tok := range []string{
 		arvadostest.ActiveToken,
 		arvadostest.ActiveToken[:15],

commit 30d316a3a597cf03d3f2540f74cc4c0faa8a7231
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:31:19 2015 -0400

    5824: Modernize install page, cf. other services.

diff --git a/doc/_config.yml b/doc/_config.yml
index 1bdd2ab..29281f9 100644
--- a/doc/_config.yml
+++ b/doc/_config.yml
@@ -154,6 +154,7 @@ navbar:
       - install/create-standard-objects.html.textile.liquid
       - install/install-keepstore.html.textile.liquid
       - install/install-keepproxy.html.textile.liquid
+      - install/install-keepdl.html.textile.liquid
       - install/install-crunch-dispatch.html.textile.liquid
       - install/install-compute-node.html.textile.liquid
     - Helpful hints:
diff --git a/doc/install/install-keepdl.html.textile.liquid b/doc/install/install-keepdl.html.textile.liquid
index 6730dff..80d50c2 100644
--- a/doc/install/install-keepdl.html.textile.liquid
+++ b/doc/install/install-keepdl.html.textile.liquid
@@ -1,31 +1,33 @@
 ---
 layout: default
 navsection: installguide
-title: Install download server
+title: Install the download server
 ...
 
-This installation guide assumes you are on a 64 bit Debian or Ubuntu system.
-
-The keepdl server provides read-only HTTP access to files stored in Keep. It serves public data to anonymous and unauthenticated clients, and accepts authentication via Arvados tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides SSL support.
+The keepdl server provides read-only HTTP access to files stored in Keep. It serves public data to unauthenticated clients, and serves private data to clients that supply Arvados API tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides SSL support. See the "godoc page":http://godoc.org/github.com/curoverse/arvados/services/keepdl for more detail.
 
 By convention, we use the following hostname for the download service:
 
-<div class="offset1">
-table(table table-bordered table-condensed).
-|dl. at uuid_prefix@.your.domain|
-</div>
+<notextile>
+<pre><code>dl.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>
 
 This hostname should resolve from anywhere on the internet.
 
 h2. Install keepdl
 
-First add the Arvados apt repository, and then install the keepdl package.
+On Debian-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install keepdl</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems:
 
 <notextile>
-<pre><code>~$ <span class="userinput">echo "deb http://apt.arvados.org/ wheezy main" | sudo tee /etc/apt/sources.list.d/apt.arvados.org.list</span>
-~$ <span class="userinput">sudo /usr/bin/apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7</span>
-~$ <span class="userinput">sudo /usr/bin/apt-get update</span>
-~$ <span class="userinput">sudo /usr/bin/apt-get install keepdl</span>
+<pre><code>~$ <span class="userinput">sudo yum install keepdl</span>
 </code></pre>
 </notextile>
 
@@ -34,31 +36,81 @@ Verify that @keepdl@ is functional:
 <notextile>
 <pre><code>~$ <span class="userinput">keepdl -h</span>
 Usage of keepdl:
-  -address="0.0.0.0:80": Address to listen on, "host:port".
+  -address string
+        Address to listen on: "host:port", or ":port" to listen on all interfaces. (default ":80")
+  -anonymous-token value
+        API token to try when none of the tokens provided in an HTTP request succeed in reading the desired collection. If this flag is used more than once, each token will be attempted in turn until one works. (default [])
 </code></pre>
 </notextile>
 
-We recommend running @arv-git-httpd@ under "runit":https://packages.debian.org/search?keywords=runit or something similar.
+If you intend to use Keepdl to serve public data to anonymous clients, configure it with an anonymous token. You can use the same one you used when you set up your Keepproxy server, or use the following command on the <strong>API server</strong> to create another:
+
+<notextile>
+<pre><code>/var/www/arvados-api/current/script$ <span class="userinput">RAILS_ENV=production bundle exec ./get_anonymous_user_token.rb</span>
+hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r
+</code></pre></notextile>
 
-Your @run@ script should look something like this:
+We recommend running @keepdl@ under "runit":https://packages.debian.org/search?keywords=runit or a similar supervisor. The basic command to start @keepdl@ is:
 
 <notextile>
 <pre><code>export ARVADOS_API_HOST=<span class="userinput">uuid_prefix</span>.your.domain
-exec sudo -u nobody keepdl -address=:9002 2>&1
+exec sudo -u nobody keepdl -address=<span class="userinput">:9002</span> -anonymous-token=<span class="userinput">hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r</span> 2>&1
 </code></pre>
 </notextile>
 
+Omit the @-anonymous-token@ arguments if you do not want to serve public data.
+
+Set @ARVADOS_API_HOST_INSECURE=1@ if your API server's SSL certificate is not signed by a recognized CA.
+
 h3. Set up a reverse proxy with SSL support
 
 The keepdl service will be accessible from anywhere on the internet, so we recommend using SSL for transport encryption.
 
 This is best achieved by putting a reverse proxy with SSL support in front of keepdl, running on port 443 and passing requests to keepdl on port 9002 (or whatever port you chose in your run script).
 
+Note: A wildcard SSL certificate is required in order to proxy keepdl effectively.
+
+For example, using Nginx:
+
+<notextile><pre>
+upstream keepdl {
+  server                127.0.0.1:<span class="userinput">9002</span>;
+}
+
+server {
+  listen                <span class="userinput">[your public IP address]</span>:443 ssl;
+  server_name           dl.<span class="userinput">uuid_prefix</span>.your.domain *.dl.<span class="userinput">uuid_prefix</span>.your.domain ~.*--dl.<span class="userinput">uuid_prefix</span>.your.domain;
+
+  proxy_connect_timeout 90s;
+  proxy_read_timeout    300s;
+
+  ssl                   on;
+  ssl_certificate       <span class="userinput"/>YOUR/PATH/TO/cert.pem</span>;
+  ssl_certificate_key   <span class="userinput"/>YOUR/PATH/TO/cert.key</span>;
+
+  location / {
+    proxy_pass          http://keepdl;
+    proxy_set_header    X-Forwarded-For $proxy_add_x_forwarded_for;
+  }
+}
+</pre></notextile>
+
 h3. Tell the API server about the keepdl service
 
-In your API server's config/application.yml file, add the following entry:
+If your wildcard certificate is valid for <code>*.dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, add the following entry to your API server's @config/application.yml@ file:
 
 <notextile>
-<pre><code>keepdl: dl.<span class="userinput">uuid_prefix</span>.your.domain
+<pre><code>keepdl: https://%{uuid_or_pdh}.dl.<span class="userinput">uuid_prefix</span>.your.domain
 </code></pre>
 </notextile>
+
+If your wildcard certificate is valid for <code>*.<span class="userinput">uuid_prefix</span>.your.domain</code>, add the following entry to your API server's @config/application.yml@ file. This approach requires a wildcard DNS entry covering <code>*--dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, which might be difficult to do without affecting other services, depending on your DNS server software.
+
+<notextile>
+<pre><code>keepdl: https://%{uuid_or_pdh}--dl.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>
+
+h3. Configure DNS
+
+Configure your DNS servers so <code>*--dl.<span class="userinput">uuid_prefix</span>.your.domain</code> or <code>*.dl.<span class="userinput">uuid_prefix</span>.your.domain</code> (according to the way you configured Workbench), as well as <code>dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, resolve to your Nginx proxy's public IP address.
diff --git a/services/keepdl/doc.go b/services/keepdl/doc.go
index d877356..38b6fe5 100644
--- a/services/keepdl/doc.go
+++ b/services/keepdl/doc.go
@@ -4,6 +4,8 @@
 // can be installed anywhere with access to Keep services, typically
 // behind a web proxy that supports TLS.
 //
+// See http://doc.arvados.org/install/install-keepdl.html.
+//
 // Starting the server
 //
 // Serve HTTP requests at port 1234 on all interfaces:

commit c04029c2147c845d8a0f349026dba062c6261d6f
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:17:54 2015 -0400

    5824: Clarify difference between keepproxy and keepstore (bandwidth and convenience -- not security).

diff --git a/doc/install/install-keepproxy.html.textile.liquid b/doc/install/install-keepproxy.html.textile.liquid
index 14b252f..3b658f8 100644
--- a/doc/install/install-keepproxy.html.textile.liquid
+++ b/doc/install/install-keepproxy.html.textile.liquid
@@ -4,9 +4,9 @@ navsection: installguide
 title: Install Keepproxy server
 ...
 
-The Keepproxy server is a gateway into your Keep storage. Unlike the Keepstore servers, which are only accessible on the local LAN, Keepproxy is designed to provide secure access into Keep from anywhere on the internet.
+The Keepproxy server is a gateway into your Keep storage. Unlike the Keepstore servers, which are only accessible on the local LAN, Keepproxy is suitable for lower-bandwidth clients located elsewhere on the internet: a client sends a single copy of a data block, and Keepproxy sends copies to the appropriate Keepstore servers. Keepproxy also accepts requests from clients that do not compute data hashes before uploading data: notably, the browser-based upload feature in Workbench requires Keepproxy.
 
-By convention, we use the following hostname for the Keepproxy:
+By convention, we use the following hostname for the Keepproxy server:
 
 <div class="offset1">
 table(table table-bordered table-condensed).

commit a14a2266b6b59e996685cfd694e86e95b9631de3
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:16:26 2015 -0400

    5824: Update keepproxy usage.

diff --git a/doc/install/install-keepproxy.html.textile.liquid b/doc/install/install-keepproxy.html.textile.liquid
index 6a531a3..14b252f 100644
--- a/doc/install/install-keepproxy.html.textile.liquid
+++ b/doc/install/install-keepproxy.html.textile.liquid
@@ -36,12 +36,13 @@ Verify that Keepproxy is functional:
 
 <notextile>
 <pre><code>~$ <span class="userinput">keepproxy -h</span>
-Usage of default:
+Usage of keepproxy:
   -default-replicas=2: Default number of replicas to write if not specified by the client.
   -listen=":25107": Interface on which to listen for requests, in the format ipaddr:port. e.g. -listen=10.0.1.24:8000. Use -listen=:port to listen on all network interfaces.
   -no-get=false: If set, disable GET operations
   -no-put=false: If set, disable PUT operations
   -pid="": Path to write pid file
+  -timeout=15: Timeout on requests to internal Keep services (default 15 seconds)
 </code></pre>
 </notextile>
 
diff --git a/services/keepproxy/keepproxy.go b/services/keepproxy/keepproxy.go
index d0af4a5..313a285 100644
--- a/services/keepproxy/keepproxy.go
+++ b/services/keepproxy/keepproxy.go
@@ -37,7 +37,7 @@ func main() {
 		pidfile          string
 	)
 
-	flagset := flag.NewFlagSet("default", flag.ExitOnError)
+	flagset := flag.NewFlagSet("keepproxy", flag.ExitOnError)
 
 	flagset.StringVar(
 		&listen,

commit 688a00fbf445629eb845ee88a87338c14424afac
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Aug 16 00:16:27 2015 -0400

    5824: Fix up error checking and early-close behavior inCollectionFileReader.

diff --git a/sdk/go/keepclient/collectionreader.go b/sdk/go/keepclient/collectionreader.go
index 929f693..eac23d0 100644
--- a/sdk/go/keepclient/collectionreader.go
+++ b/sdk/go/keepclient/collectionreader.go
@@ -8,6 +8,17 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/manifest"
 )
 
+const (
+	// After reading a data block from Keep, cfReader slices it up
+	// and sends the slices to a buffered channel to be consumed
+	// by the caller via Read().
+	//
+	// dataSliceSize is the maximum size of the slices, and
+	// therefore the maximum number of bytes that will be returned
+	// by a single call to Read().
+	dataSliceSize = 1 << 20
+)
+
 var (
 	ErrNoManifest     = errors.New("Collection has no manifest")
 	ErrNotImplemented = errors.New("Not implemented")
@@ -40,8 +51,10 @@ func (kc *KeepClient) CollectionFileReader(collection map[string]interface{}, fi
 			}
 			q = append(q, seg)
 			r.totalSize += uint64(seg.Len)
-			// Send toGet whatever it's ready to receive.
-			Q: for len(q) > 0 {
+			// Send toGet as many segments as we can until
+			// it blocks.
+		Q:
+			for len(q) > 0 {
 				select {
 				case r.toGet <- q[0]:
 					q = q[1:]
@@ -75,84 +88,127 @@ type cfReader struct {
 	// doGet() reads FileSegments from toGet, gets the data from
 	// Keep, and sends byte slices to toRead to be consumed by
 	// Read().
-	toGet        chan *manifest.FileSegment
-	toRead       chan []byte
+	toGet chan *manifest.FileSegment
+	// toRead is a buffered channel, sized to fit one full Keep
+	// block. This lets us verify checksums without having a
+	// store-and-forward delay between blocks: by the time the
+	// caller starts receiving data from block N, cfReader is
+	// starting to fetch block N+1. A larger buffer would be
+	// useful for a caller whose read speed varies a lot.
+	toRead chan []byte
 	// bytes ready to send next time someone calls Read()
-	buf          []byte
+	buf []byte
 	// Total size of the file being read. Not safe to read this
 	// until countDone is closed.
-	totalSize    uint64
-	countDone    chan struct{}
+	totalSize uint64
+	countDone chan struct{}
 	// First error encountered.
-	err          error
+	err error
+	// errNotNil is closed IFF err contains a non-nil error.
+	// Receiving from it will block until an error occurs.
+	errNotNil chan struct{}
+	// rdrClosed is closed IFF the reader's Close() method has
+	// been called. Any goroutines associated with the reader will
+	// stop and free up resources when they notice this channel is
+	// closed.
+	rdrClosed chan struct{}
 }
 
-func (r *cfReader) Read(outbuf []byte) (n int, err error) {
-	if r.err != nil {
-		return 0, r.err
+func (r *cfReader) Read(outbuf []byte) (int, error) {
+	if r.Error() != nil {
+		return 0, r.Error()
 	}
 	for r.buf == nil || len(r.buf) == 0 {
 		var ok bool
 		r.buf, ok = <-r.toRead
-		if r.err != nil {
-			return 0, r.err
+		if r.Error() != nil {
+			return 0, r.Error()
 		} else if !ok {
 			return 0, io.EOF
 		}
 	}
+	n := len(r.buf)
 	if len(r.buf) > len(outbuf) {
 		n = len(outbuf)
-	} else {
-		n = len(r.buf)
 	}
 	copy(outbuf[:n], r.buf[:n])
 	r.buf = r.buf[n:]
-	return
+	return n, nil
 }
 
 func (r *cfReader) Close() error {
-	_, _ = <-r.countDone
-	for _ = range r.toGet {
-	}
-	for _ = range r.toRead {
+	close(r.rdrClosed)
+	return r.Error()
+}
+
+func (r *cfReader) Error() error {
+	select {
+	case <-r.errNotNil:
+		return r.err
+	default:
+		return nil
 	}
-	return r.err
 }
 
 func (r *cfReader) Len() uint64 {
 	// Wait for all segments to be counted
-	_, _ = <-r.countDone
+	<-r.countDone
 	return r.totalSize
 }
 
 func (r *cfReader) doGet() {
 	defer close(r.toRead)
+GET:
 	for fs := range r.toGet {
 		rdr, _, _, err := r.keepClient.Get(fs.Locator)
 		if err != nil {
 			r.err = err
+			close(r.errNotNil)
 			return
 		}
 		var buf = make([]byte, fs.Offset+fs.Len)
 		_, err = io.ReadFull(rdr, buf)
 		if err != nil {
 			r.err = err
+			close(r.errNotNil)
 			return
 		}
-		for bOff, bLen := fs.Offset, 1<<20; bOff <= fs.Offset+fs.Len && bLen > 0; bOff += bLen {
+		for bOff, bLen := fs.Offset, dataSliceSize; bOff <= fs.Offset+fs.Len && bLen > 0; bOff += bLen {
 			if bOff+bLen > fs.Offset+fs.Len {
 				bLen = fs.Offset + fs.Len - bOff
 			}
-			r.toRead <- buf[bOff : bOff+bLen]
+			select {
+			case r.toRead <- buf[bOff : bOff+bLen]:
+			case <-r.rdrClosed:
+				// Reader is closed: no point sending
+				// anything more to toRead.
+				break GET
+			}
+		}
+		// It is possible that r.rdrClosed is closed but we
+		// never noticed because r.toRead was also ready in
+		// every select{} above. Here we check before wasting
+		// a keepclient.Get() call.
+		select {
+		case <-r.rdrClosed:
+			break GET
+		default:
 		}
 	}
+	// In case we exited the above loop early: before returning,
+	// drain the toGet channel so its sender doesn't sit around
+	// blocking forever.
+	for _ = range r.toGet {
+	}
 }
 
 func newCFReader(kc *KeepClient) (r *cfReader) {
 	r = new(cfReader)
 	r.keepClient = kc
+	r.rdrClosed = make(chan struct{})
+	r.errNotNil = make(chan struct{})
 	r.toGet = make(chan *manifest.FileSegment, 2)
-	r.toRead = make(chan []byte)
+	r.toRead = make(chan []byte, (BLOCKSIZE+dataSliceSize-1)/dataSliceSize)
 	r.countDone = make(chan struct{})
 	go r.doGet()
 	return
diff --git a/sdk/go/keepclient/collectionreader_test.go b/sdk/go/keepclient/collectionreader_test.go
index f271208..51710b7 100644
--- a/sdk/go/keepclient/collectionreader_test.go
+++ b/sdk/go/keepclient/collectionreader_test.go
@@ -20,7 +20,8 @@ type IntegrationSuite struct{}
 
 type SuccessHandler struct {
 	disk map[string][]byte
-	lock chan struct{}
+	lock chan struct{}	// channel with buffer==1: full when an operation is in progress.
+	ops  *int		// number of operations completed
 }
 
 func (h SuccessHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
@@ -34,12 +35,18 @@ func (h SuccessHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
 		pdh := fmt.Sprintf("%x+%d", md5.Sum(buf), len(buf))
 		h.lock <- struct{}{}
 		h.disk[pdh] = buf
+		if h.ops != nil {
+			(*h.ops)++
+		}
 		<- h.lock
 		resp.Write([]byte(pdh))
 	case "GET":
 		pdh := req.URL.Path[1:]
 		h.lock <- struct{}{}
 		buf, ok := h.disk[pdh]
+		if h.ops != nil {
+			(*h.ops)++
+		}
 		<- h.lock
 		if !ok {
 			resp.WriteHeader(http.StatusNotFound)
@@ -57,6 +64,14 @@ type rdrTest struct {
 	want interface{} // error or string to expect
 }
 
+func StubWithFakeServers(kc *KeepClient, h http.Handler) {
+	localRoots := make(map[string]string)
+	for i, k := range RunSomeFakeKeepServers(h, 4) {
+		localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+	}
+	kc.SetServiceRoots(localRoots, localRoots, nil)
+}
+
 func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 	arv, err := arvadosclient.MakeArvadosClient()
 	c.Assert(err, check.IsNil)
@@ -66,12 +81,11 @@ func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 	c.Assert(err, check.IsNil)
 
 	{
-		localRoots := make(map[string]string)
-		h := SuccessHandler{disk: make(map[string][]byte), lock: make(chan struct{}, 1)}
-		for i, k := range RunSomeFakeKeepServers(h, 4) {
-			localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+		h := SuccessHandler{
+			disk: make(map[string][]byte),
+			lock: make(chan struct{}, 1),
 		}
-		kc.SetServiceRoots(localRoots, localRoots, nil)
+		StubWithFakeServers(kc, h)
 		kc.PutB([]byte("foo"))
 		kc.PutB([]byte("bar"))
 		kc.PutB([]byte("Hello world\n"))
@@ -121,3 +135,49 @@ func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 		}
 	}
 }
+
+func (s *ServerRequiredSuite) TestCollectionReaderCloseEarly(c *check.C) {
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.IsNil)
+	arv.ApiToken = arvadostest.ActiveToken
+
+	kc, err := MakeKeepClient(&arv)
+	c.Assert(err, check.IsNil)
+
+	h := SuccessHandler{
+		disk: make(map[string][]byte),
+		lock: make(chan struct{}, 1),
+		ops: new(int),
+	}
+	StubWithFakeServers(kc, h)
+	kc.PutB([]byte("foo"))
+
+	mt := ". "
+	for i := 0; i < 1000; i++ {
+		mt += "acbd18db4cc2f85cedef654fccc4a4d8+3 "
+	}
+	mt += "0:3000:foo1000.txt\n"
+
+	// Grab the stub server's lock, ensuring our cfReader doesn't
+	// get anything back from its first call to kc.Get() before we
+	// have a chance to call Close().
+	h.lock <- struct{}{}
+	opsBeforeRead := *h.ops
+
+	rdr, err := kc.CollectionFileReader(map[string]interface{}{"manifest_text": mt}, "foo1000.txt")
+	c.Assert(err, check.IsNil)
+	err = rdr.Close()
+	c.Assert(err, check.IsNil)
+	c.Assert(rdr.Error(), check.IsNil)
+
+	// Release the stub server's lock. The first GET operation will proceed.
+	<-h.lock
+
+	// doGet() should close toRead before sending any more bufs to it.
+	if what, ok := <-rdr.toRead;  ok {
+		c.Errorf("Got %+v, expected toRead to be closed", what)
+	}
+
+	// Stub should have handled exactly one GET request.
+	c.Assert(*h.ops, check.Equals, opsBeforeRead+1)
+}

commit 7b78afd6c670f1159ddff1f4e2ddfa6e7fa5b23c
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jul 29 04:41:26 2015 -0400

    5824: Enable blob signing in integration tests, and send keepstore logs to files.
    
    For some reason, sending keepstore logs to stdout seems to make
    keepstore crash silently when invoked from a Go integration test. Work
    around this by logging to disk, like we do with API server.

diff --git a/sdk/go/keepclient/keepclient_test.go b/sdk/go/keepclient/keepclient_test.go
index c1f6a3e..e4e459e 100644
--- a/sdk/go/keepclient/keepclient_test.go
+++ b/sdk/go/keepclient/keepclient_test.go
@@ -743,7 +743,7 @@ func (s *ServerRequiredSuite) TestPutGetHead(c *C) {
 	}
 	{
 		hash2, replicas, err := kc.PutB(content)
-		c.Check(hash2, Equals, fmt.Sprintf("%s+%d", hash, len(content)))
+		c.Check(hash2, Matches, fmt.Sprintf(`%s\+%d\b.*`, hash, len(content)))
 		c.Check(replicas, Equals, 2)
 		c.Check(err, Equals, nil)
 	}
diff --git a/sdk/python/tests/run_test_server.py b/sdk/python/tests/run_test_server.py
index 1c5162b..2b9d5f7 100644
--- a/sdk/python/tests/run_test_server.py
+++ b/sdk/python/tests/run_test_server.py
@@ -310,8 +310,9 @@ def _start_keep(n, keep_args):
     for arg, val in keep_args.iteritems():
         keep_cmd.append("{}={}".format(arg, val))
 
+    logf = open(os.path.join(TEST_TMPDIR, 'keep{}.log'.format(n)), 'a+')
     kp0 = subprocess.Popen(
-        keep_cmd, stdin=open('/dev/null'), stdout=sys.stderr)
+        keep_cmd, stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True)
     with open(_pidfile('keep{}'.format(n)), 'w') as f:
         f.write(str(kp0.pid))
 
@@ -326,10 +327,11 @@ def run_keep(blob_signing_key=None, enforce_permissions=False):
     stop_keep()
 
     keep_args = {}
-    if blob_signing_key:
-        with open(os.path.join(TEST_TMPDIR, "keep.blob_signing_key"), "w") as f:
-            keep_args['--permission-key-file'] = f.name
-            f.write(blob_signing_key)
+    if not blob_signing_key:
+        blob_signing_key = 'zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc'
+    with open(os.path.join(TEST_TMPDIR, "keep.blob_signing_key"), "w") as f:
+        keep_args['--blob-signing-key-file'] = f.name
+        f.write(blob_signing_key)
     if enforce_permissions:
         keep_args['--enforce-permissions'] = 'true'
 
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index 13fc88d..ac7dd1b 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -1144,7 +1144,7 @@ class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
         c2.save()
 
         c1.update()
-        self.assertRegexpMatches(c1.manifest_text(), r"\. e65075d550f9b5bf9992fa1d71a131be\+3 7ac66c0f148de9519b8bd264312c4d64\+7\+A[a-f0-9]{40}@[a-f0-9]{8} 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
+        self.assertRegexpMatches(c1.manifest_text(), r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
 
 
 if __name__ == '__main__':

commit 345a6ea565c56d488d5c471931fe2e452a5901c9
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jul 29 03:33:19 2015 -0400

    5824: Add Content-Length header.

diff --git a/services/keepdl/handler.go b/services/keepdl/handler.go
index 03b3e26..30b4b64 100644
--- a/services/keepdl/handler.go
+++ b/services/keepdl/handler.go
@@ -239,6 +239,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 			w.Header().Set("Content-Type", t)
 		}
 	}
+	w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
 
 	w.WriteHeader(http.StatusOK)
 	_, err = io.Copy(w, rdr)

commit 908432379061b7b4c1abef255c11bcf7c2f68538
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jul 29 01:56:55 2015 -0400

    5824: Use vhosts in curl integration tests. Add large file test.

diff --git a/services/keepdl/server_test.go b/services/keepdl/server_test.go
index 5864315..6fef9b8 100644
--- a/services/keepdl/server_test.go
+++ b/services/keepdl/server_test.go
@@ -3,6 +3,9 @@ package main
 import (
 	"crypto/md5"
 	"fmt"
+	"io"
+	"io/ioutil"
+	"net"
 	"os/exec"
 	"strings"
 	"testing"
@@ -25,17 +28,17 @@ func (s *IntegrationSuite) TestNoToken(c *check.C) {
 		"",
 		"bogustoken",
 	} {
-		hdr, body := s.runCurl(c, token, "/collections/"+arvadostest.FooCollection+"/foo")
+		hdr, body, _ := s.runCurl(c, token, "dl.example.com", "/collections/"+arvadostest.FooCollection+"/foo")
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
 		c.Check(body, check.Equals, "")
 
 		if token != "" {
-			hdr, body = s.runCurl(c, token, "/collections/download/"+arvadostest.FooCollection+"/"+token+"/foo")
+			hdr, body, _ = s.runCurl(c, token, "dl.example.com", "/collections/download/"+arvadostest.FooCollection+"/"+token+"/foo")
 			c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 			c.Check(body, check.Equals, "")
 		}
 
-		hdr, body = s.runCurl(c, token, "/bad-route")
+		hdr, body, _ = s.runCurl(c, token, "dl.example.com", "/bad-route")
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 		c.Check(body, check.Equals, "")
 	}
@@ -64,12 +67,58 @@ func (s *IntegrationSuite) Test404(c *check.C) {
 		"/collections/" + arvadostest.NonexistentCollection + "/theperthcountyconspiracy",
 		"/collections/download/" + arvadostest.NonexistentCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
 	} {
-		hdr, body := s.runCurl(c, arvadostest.ActiveToken, uri)
+		hdr, body, _ := s.runCurl(c, arvadostest.ActiveToken, "dl.example.com", uri)
 		c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
 		c.Check(body, check.Equals, "")
 	}
 }
 
+func (s *IntegrationSuite) Test1GBFile(c *check.C) {
+	if testing.Short() {
+		c.Skip("skipping 1GB integration test in short mode")
+	}
+	s.test100BlockFile(c, 10000000)
+}
+
+func (s *IntegrationSuite) Test300MBFile(c *check.C) {
+	s.test100BlockFile(c, 3000000)
+}
+
+func (s *IntegrationSuite) test100BlockFile(c *check.C, blocksize int) {
+	testdata := make([]byte, blocksize)
+	for i := 0; i < blocksize; i++ {
+		testdata[i] = byte(' ')
+	}
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.Equals, nil)
+	arv.ApiToken = arvadostest.ActiveToken
+	kc, err := keepclient.MakeKeepClient(&arv)
+	c.Assert(err, check.Equals, nil)
+	loc, _, err := kc.PutB(testdata[:])
+	c.Assert(err, check.Equals, nil)
+	mtext := "."
+	for i := 0; i < 100; i++ {
+		mtext = mtext + " " + loc
+	}
+	mtext = mtext + fmt.Sprintf(" 0:%d00:testdata.bin\n", blocksize)
+	coll := map[string]interface{}{}
+	err = arv.Create("collections",
+		map[string]interface{}{
+			"collection": map[string]interface{}{
+				"name": fmt.Sprintf("testdata blocksize=%d", blocksize),
+				"manifest_text": mtext,
+			},
+		}, &coll)
+	c.Assert(err, check.Equals, nil)
+	uuid := coll["uuid"].(string)
+	
+	hdr, body, size := s.runCurl(c, arv.ApiToken, uuid + ".dl.example.com", "/testdata.bin")
+	c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+	c.Check(hdr, check.Matches, `(?si).*Content-length: `+fmt.Sprintf("%d00", blocksize)+`\r\n.*`)
+	c.Check([]byte(body)[:1234], check.DeepEquals, testdata[:1234])
+	c.Check(size, check.Equals, int64(blocksize)*100)
+}
+
 func (s *IntegrationSuite) Test200(c *check.C) {
 	anonymousTokens = []string{arvadostest.AnonymousToken}
 	arv, err := arvadosclient.MakeArvadosClient()
@@ -86,19 +135,13 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 		{"tokensobogus", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
 		{arvadostest.ActiveToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
 		{arvadostest.AnonymousToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		// Anonymously accessible user agreement. These should
-		// start working when CollectionFileReader provides
-		// real data instead of fake/stub data.
+		// Anonymously accessible user agreement.
 		{"", "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 		{arvadostest.ActiveToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 		{arvadostest.SpectatorToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 		{arvadostest.SpectatorToken, "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 	} {
-		hdr, body := s.runCurl(c, spec[0], spec[1])
-		if strings.HasPrefix(hdr, "HTTP/1.1 501 Not Implemented\r\n") && body == "" {
-			c.Log("Not implemented!")
-			continue
-		}
+		hdr, body, _ := s.runCurl(c, spec[0], "dl.example.com", spec[1])
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
 		if strings.HasSuffix(spec[1], ".txt") {
 			c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
@@ -111,15 +154,34 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 }
 
 // Return header block and body.
-func (s *IntegrationSuite) runCurl(c *check.C, token, uri string, args ...string) (hdr, body string) {
+func (s *IntegrationSuite) runCurl(c *check.C, token, host, uri string, args ...string) (hdr, bodyPart string, bodySize int64) {
 	curlArgs := []string{"--silent", "--show-error", "--include"}
+	testHost, testPort, _ := net.SplitHostPort(s.testServer.Addr)
+	curlArgs = append(curlArgs, "--resolve", host + ":" + testPort + ":" + testHost)
 	if token != "" {
 		curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
 	}
 	curlArgs = append(curlArgs, args...)
-	curlArgs = append(curlArgs, "http://"+s.testServer.Addr+uri)
+	curlArgs = append(curlArgs, "http://"+host+":"+testPort+uri)
 	c.Log(fmt.Sprintf("curlArgs == %#v", curlArgs))
-	output, err := exec.Command("curl", curlArgs...).CombinedOutput()
+	cmd := exec.Command("curl", curlArgs...)
+	stdout, err := cmd.StdoutPipe()
+	c.Assert(err, check.Equals, nil)
+	cmd.Stderr = cmd.Stdout
+	go cmd.Start()
+	buf := make([]byte, 2<<27)
+	n, err := io.ReadFull(stdout, buf)
+	// Discard (but measure size of) anything past 128 MiB.
+	var discarded int64
+	if err == io.ErrUnexpectedEOF {
+		err = nil
+		buf = buf[:n]
+	} else {
+		c.Assert(err, check.Equals, nil)
+		discarded, err = io.Copy(ioutil.Discard, stdout)
+		c.Assert(err, check.Equals, nil)
+	}
+	err = cmd.Wait()
 	// Without "-f", curl exits 0 as long as it gets a valid HTTP
 	// response from the server, even if the response status
 	// indicates that the request failed. In our test suite, we
@@ -127,10 +189,11 @@ func (s *IntegrationSuite) runCurl(c *check.C, token, uri string, args ...string
 	// headers ourselves. If curl exits non-zero, our testing
 	// environment is broken.
 	c.Assert(err, check.Equals, nil)
-	hdrsAndBody := strings.SplitN(string(output), "\r\n\r\n", 2)
+	hdrsAndBody := strings.SplitN(string(buf), "\r\n\r\n", 2)
 	c.Assert(len(hdrsAndBody), check.Equals, 2)
 	hdr = hdrsAndBody[0]
-	body = hdrsAndBody[1]
+	bodyPart = hdrsAndBody[1]
+	bodySize = int64(len(bodyPart)) + discarded
 	return
 }
 

commit 73d1f74fcdf96a1ac298c4f7b8181e702c797ca5
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Jul 23 01:20:28 2015 -0400

    5824: Support vhost-based collection lookups.

diff --git a/sdk/go/arvadostest/fixtures.go b/sdk/go/arvadostest/fixtures.go
index 87b28f8..3040e0a 100644
--- a/sdk/go/arvadostest/fixtures.go
+++ b/sdk/go/arvadostest/fixtures.go
@@ -7,6 +7,8 @@ const (
 	FooCollection         = "zzzzz-4zz18-fy296fx3hot09f7"
 	NonexistentCollection = "zzzzz-4zz18-totallynotexist"
 	HelloWorldCollection  = "zzzzz-4zz18-4en62shvi99lxd4"
+	FooPdh                = "1f4b0bc7583c2a7f9102c395f4ffc5e3+45"
+	HelloWorldPdh         = "55713e6a34081eb03609e7ad5fcad129+62"
 	PathologicalManifest  = ". acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 73feffa4b7f6bb68e44cf984c85f6e88+3+Z+K at xyzzy acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero at 0 0:1:f 1:0:zero at 1 1:4:ooba 4:0:zero at 4 5:1:r 5:4:rbaz 9:0:zero at 9\n" +
 		"./overlapReverse acbd18db4cc2f85cedef654fccc4a4d8+3 acbd18db4cc2f85cedef654fccc4a4d8+3 5:1:o 4:2:oo 2:4:ofoo\n" +
 		"./segmented acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 0:1:frob 5:1:frob 1:1:frob 1:2:oof 0:1:oof 5:0:frob 3:1:frob\n" +
diff --git a/sdk/go/auth/auth.go b/sdk/go/auth/auth.go
index 4a719e9..3c7888a 100644
--- a/sdk/go/auth/auth.go
+++ b/sdk/go/auth/auth.go
@@ -1,13 +1,14 @@
 package auth
 
 import (
+	"encoding/base64"
 	"net/http"
 	"net/url"
 	"strings"
 )
 
 type Credentials struct {
-	Tokens []string
+	Tokens     []string
 }
 
 func NewCredentials() *Credentials {
@@ -20,6 +21,15 @@ func NewCredentialsFromHTTPRequest(r *http.Request) *Credentials {
 	return c
 }
 
+// EncodeTokenCookie accepts a token and returns a byte slice suitable
+// for use as a cookie value, such that it will be decoded correctly
+// by LoadTokensFromHTTPRequest.
+var EncodeTokenCookie func([]byte) string = base64.URLEncoding.EncodeToString
+
+// DecodeTokenCookie accepts a cookie value and returns the encoded
+// token.
+var DecodeTokenCookie func(string) ([]byte, error) = base64.URLEncoding.DecodeString
+
 // LoadTokensFromHttpRequest loads all tokens it can find in the
 // headers and query string of an http query.
 func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
@@ -51,6 +61,8 @@ func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
 		a.Tokens = append(a.Tokens, val...)
 	}
 
+	a.loadTokenFromCookie(r)
+
 	// TODO: Load token from Rails session cookie (if Rails site
 	// secret is known)
 }
@@ -59,3 +71,15 @@ func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
 // LoadTokensFromHttpRequest() that [or how] we should read and parse
 // the request body. This has to be requested explicitly by the
 // application.
+
+func (a *Credentials) loadTokenFromCookie(r *http.Request) {
+	cookie, err := r.Cookie("api_token")
+	if err != nil || len(cookie.Value) == 0 {
+		return
+	}
+	token, err := DecodeTokenCookie(cookie.Value)
+	if err != nil {
+		return
+	}
+	a.Tokens = append(a.Tokens, string(token))
+}
diff --git a/services/keepdl/doc.go b/services/keepdl/doc.go
index 65c7f19..d877356 100644
--- a/services/keepdl/doc.go
+++ b/services/keepdl/doc.go
@@ -1,28 +1,158 @@
 // Keepdl provides read-only HTTP access to files stored in Keep. It
 // serves public data to anonymous and unauthenticated clients, and
-// accepts authentication via Arvados tokens. It can be installed
-// anywhere with access to Keep services, typically behind a web proxy
-// that provides SSL support.
+// serves private data to clients that supply Arvados API tokens. It
+// can be installed anywhere with access to Keep services, typically
+// behind a web proxy that supports TLS.
 //
-// Given that this amounts to a web hosting service for arbitrary
-// content, it is vital to ensure that at least one of the following is
-// true:
+// Starting the server
 //
-// Usage
-//
-// Listening:
+// Serve HTTP requests at port 1234 on all interfaces:
 //
 //   keepdl -address=:1234
 //
-// Start an HTTP server on port 1234.
+// Serve HTTP requests at port 1234 on the interface with IP address 1.2.3.4:
 //
 //   keepdl -address=1.2.3.4:1234
 //
-// Start an HTTP server on port 1234, on the interface with IP address 1.2.3.4.
+// Proxy configuration
 //
 // Keepdl does not support SSL natively. Typically, it is installed
 // behind a proxy like nginx.
 //
+// Here is an example nginx configuration.
+//
+//	http {
+//	  upstream keepdl {
+//	    server localhost:1234;
+//	  }
+//	  server {
+//	    listen *:443 ssl;
+//	    server_name dl.example.com *.dl.example.com ~.*--dl.example.com;
+//	    ssl_certificate /root/wildcard.example.com.crt;
+//	    ssl_certificate_key /root/wildcard.example.com.key;
+//	    location  / {
+//	      proxy_pass http://keepdl;
+//	      proxy_set_header Host $host;
+//	      proxy_set_header X-Forwarded-For $remote_addr;
+//	    }
+//	  }
+//	}
+//
+// It is not necessary to run keepdl on the same host as the nginx
+// proxy. However, TLS is not used between nginx and keepdl, so
+// intervening networks must be secured by other means.
+//
+// Download URLs
+//
+// The following "same origin" URL patterns are supported for public
+// collections (i.e., collections which can be served by keepdl
+// without making use of any credentials supplied by the client). See
+// "Same-origin mode" below.
+//
+//   http://dl.example.com/c=uuid_or_pdh/path/file.txt
+//   http://dl.example.com/c=uuid_or_pdh/path/t=TOKEN/file.txt
+//
+// The following "multiple origin" URL patterns are supported for all
+// collections:
+//
+//   http://uuid_or_pdh--dl.example.com/path/file.txt
+//   http://uuid_or_pdh--dl.example.com/t=/path/file.txt
+//   http://uuid_or_pdh--dl.example.com/t=TOKEN/path/file.txt
+//
+// In the "multiple origin" form, the string "--" can be replaced with
+// "." with identical results (assuming the upstream proxy is
+// configured accordingly). These two are equivalent:
+//
+//   http://uuid_or_pdh--dl.example.com/path/file.txt
+//   http://uuid_or_pdh.dl.example.com/path/file.txt
+//
+// The first form minimizes the cost and effort of deploying a
+// wildcard TLS certificate for *.dl.example.com. The second form is
+// likely to be easier to configure, and more efficient to run, on an
+// upstream proxy.
+//
+// In all of the above forms, the "dl.example.com" part can be
+// anything at all.
+//
+// In all of the above forms, the "uuid_or_pdh" part can be either a
+// collection UUID or a portable data hash with the "+" character
+// replaced by "-".
+//
+// Assuming there is a collection with UUID
+// zzzzz-4zz18-znfnqtbbv4spc3w and portable data hash
+// 1f4b0bc7583c2a7f9102c395f4ffc5e3+45, the following URLs are
+// interchangeable:
+//
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/t=/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/t=/foo
+//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--foo.example.com/foo
+//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--.invalid/foo
+//
+// Authorization mechanisms
+//
+// A token can be provided in an Authorization header:
+//
+//   Authorization: OAuth2 o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// A base64-encoded token can be provided in a cookie named "api_token":
+//
+//   Cookie: api_token=bzA3ajRweDdSbEpLNEN1TVlwN0MwTERUNEN6UjFKMXFCRTVBdm83ZUNjVWpPVGlreEs=
+//
+// A token can be provided in an URL-encoded query string:
+//
+//   GET /foo.txt?api_token=o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// A suitably encoded token can be provided in a POST body if the
+// request has a content type of application/x-www-form-urlencoded or
+// multipart/form-data:
+//
+//   POST /foo.txt
+//   Content-Type: application/x-www-form-urlencoded
+//   [...]
+//   api_token=o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// If a token is provided in a query string or in a POST request, the
+// response is an HTTP 303 redirect to an equivalent GET request, with
+// the token stripped from the query string and added to a cookie
+// instead.
+//
+// Compatibility
+//
+// Client-provided authorization tokens are ignored if the client does
+// not provide a Host header.
+//
+// In order to use the query string or a POST form authorization
+// mechanisms, the client must follow 303 redirects; the client must
+// accept cookies with a 303 response and send those cookies when
+// performing the redirect; and either the client or an intervening
+// proxy must resolve a relative URL ("//host/path") if given in a
+// response Location header.
+//
+// Intranet mode
+//
+// Normally, Keepdl accepts requests for multiple collections using
+// the same host name, provided the client's credentials are not being
+// used. This provides insufficient XSS protection in an installation
+// where the "anonymously accessible" data is not truly public, but
+// merely protected by network topology.
+//
+// In such cases -- for example, a site which is not reachable from
+// the internet, where some data is world-readable from Arvados's
+// perspective but is intended to be available only to users within
+// the local network -- the upstream proxy should configured to return
+// 401 for all paths beginning with "/c=".
+//
+// Same-origin mode
+//
+// Without the same-origin protection outlined above, a web page
+// stored in collection X could execute JavaScript code that uses the
+// current viewer's credentials to download additional data from
+// collection Y -- data which is accessible to the current viewer, but
+// not to the author of collection X -- from the same origin
+// (``https://dl.example.com/'') and upload it to some other site
+// chosen by the author of collection X.
+//
 package main
 
 // TODO(TC): Implement
@@ -31,7 +161,7 @@ package main
 //
 // Normally, Keepdl is installed using a wildcard DNS entry and a
 // wildcard HTTPS certificate, serving data from collection X at
-// ``https://X.dl.example.com/path/file.ext''.
+// ``https://X--dl.example.com/path/file.ext''.
 //
 // It will also serve publicly accessible data at
 // ``https://dl.example.com/collections/X/path/file.txt'', but it does not
@@ -48,10 +178,4 @@ package main
 //
 //   keepdl -trust-all-content [...]
 //
-// In the general case, this should not be enabled: A web page stored
-// in collection X can execute JavaScript code that uses the current
-// viewer's credentials to download additional data -- data which is
-// accessible to the current viewer, but not to the author of
-// collection X -- from the same origin (``https://dl.example.com/'')
-// and upload it to some other site chosen by the author of collection
-// X.
+// In the general case, this should not be enabled: 
diff --git a/services/keepdl/handler.go b/services/keepdl/handler.go
index 04af920..03b3e26 100644
--- a/services/keepdl/handler.go
+++ b/services/keepdl/handler.go
@@ -2,11 +2,14 @@ package main
 
 import (
 	"fmt"
+	"html"
 	"io"
 	"mime"
 	"net/http"
+	"net/url"
 	"os"
 	"strings"
+	"time"
 
 	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
 	"git.curoverse.com/arvados.git/sdk/go/auth"
@@ -25,26 +28,49 @@ func init() {
 	anonymousTokens = []string{}
 }
 
+// return s if s is a UUID or a PDH, otherwise ""
+func parseCollectionIdFromDNSName(s string) string {
+	// Strip domain.
+	if i := strings.IndexRune(s, '.'); i >= 0 {
+		s = s[:i]
+	}
+	// Names like {uuid}--dl.example.com serve the same purpose as
+	// {uuid}.dl.example.com but can reduce cost/effort of using
+	// [additional] wildcard certificates.
+	if i := strings.Index(s, "--"); i >= 0 {
+		s = s[:i]
+	}
+	if !arvadosclient.UUIDMatch(s) && !arvadosclient.PDHMatch(s) {
+		return ""
+	}
+	return s
+}
+
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
-	var statusCode int
+	var statusCode = 0
 	var statusText string
 
 	w := httpserver.WrapResponseWriter(wOrig)
 	defer func() {
-		if statusCode > 0 {
-			if w.WroteStatus() == 0 {
-				w.WriteHeader(statusCode)
-			} else {
-				httpserver.Log(r.RemoteAddr, "WARNING",
-					fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
-			}
+		if statusCode == 0 {
+			statusCode = w.WroteStatus()
+		} else if w.WroteStatus() == 0 {
+			w.WriteHeader(statusCode)
+		} else if w.WroteStatus() != statusCode {
+			httpserver.Log(r.RemoteAddr, "WARNING",
+				fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
 		}
 		if statusText == "" {
 			statusText = http.StatusText(statusCode)
 		}
-		httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.URL.Path)
+		httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
 	}()
 
+	if r.Method != "GET" && r.Method != "POST" {
+		statusCode, statusText = http.StatusMethodNotAllowed, r.Method
+		return
+	}
+
 	arv := clientPool.Get()
 	if arv == nil {
 		statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
@@ -54,17 +80,70 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 
 	pathParts := strings.Split(r.URL.Path[1:], "/")
 
-	if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
-		statusCode = http.StatusNotFound
-		return
-	}
-
 	var targetId string
 	var targetPath []string
 	var tokens []string
 	var reqTokens []string
 	var pathToken bool
-	if len(pathParts) >= 5 && pathParts[1] == "download" {
+
+	if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
+		// "http://{id}.domain.example.com/{path}" form
+		if t := r.FormValue("api_token"); t != "" {
+			// ...with explicit token in query string or
+			// form in POST body. We must encrypt the
+			// token such that it can only be used for
+			// this collection; put it in an HttpOnly
+			// cookie; and redirect to the same URL with
+			// the query param redacted, and method =
+			// GET.
+			//
+			// The HttpOnly flag is necessary to prevent
+			// JavaScript code (included in, or loaded by,
+			// a page in the collection being served) from
+			// employing the user's token beyond reading
+			// other files in the same domain, i.e., same
+			// the collection.
+			//
+			// The 303 redirect is necessary in the case
+			// of a GET request to avoid exposing the
+			// token in the Location bar, and in the case
+			// of a POST request to avoid raising warnings
+			// when the user refreshes the resulting page.
+			http.SetCookie(w, &http.Cookie{
+				Name:    "api_token",
+				Value:   auth.EncodeTokenCookie([]byte(t)),
+				Path:    "/",
+				Expires: time.Now().AddDate(10,0,0),
+			})
+			redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
+
+			w.Header().Add("Location", redir)
+			statusCode, statusText = http.StatusSeeOther, redir
+			w.WriteHeader(statusCode)
+			io.WriteString(w, `<A href="`)
+			io.WriteString(w, html.EscapeString(redir))
+			io.WriteString(w, `">Continue</A>`)
+			return
+		} else if strings.HasPrefix(pathParts[0], "t=") {
+			// ...with explicit token in path,
+			// "{...}.com/t={token}/{path}".  This form
+			// must only be used to pass scoped tokens
+			// that give permission for a single
+			// collection. See FormValue case above.
+			tokens = []string{pathParts[0][2:]}
+			targetPath = pathParts[1:]
+			pathToken = true
+		} else {
+			// ...with cookie, Authorization header, or
+			// no token at all
+			reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+			tokens = append(reqTokens, anonymousTokens...)
+			targetPath = pathParts
+		}
+	} else if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
+		statusCode = http.StatusNotFound
+		return
+	} else if len(pathParts) >= 5 && pathParts[1] == "download" {
 		// "/collections/download/{id}/{token}/path..." form:
 		// Don't use our configured anonymous tokens,
 		// Authorization headers, etc.  Just use the token in
@@ -86,7 +165,6 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	found := false
 	for _, arv.ApiToken = range tokens {
 		err := arv.Get("collections", targetId, nil, &collection)
-		httpserver.Log(err)
 		if err == nil {
 			// Success
 			found = true
@@ -131,8 +209,8 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		// someone trying (anonymously) to download public
 		// data that has been deleted.  Allow a referrer to
 		// provide this context somehow?
-		statusCode = http.StatusUnauthorized
 		w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"")
+		statusCode = http.StatusUnauthorized
 		return
 	}
 
@@ -162,6 +240,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		}
 	}
 
+	w.WriteHeader(http.StatusOK)
 	_, err = io.Copy(w, rdr)
 	if err != nil {
 		statusCode, statusText = http.StatusBadGateway, err.Error()
diff --git a/services/keepdl/handler_test.go b/services/keepdl/handler_test.go
new file mode 100644
index 0000000..a1f5e1a
--- /dev/null
+++ b/services/keepdl/handler_test.go
@@ -0,0 +1,218 @@
+package main
+
+import (
+	"html"
+	"io/ioutil"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"regexp"
+	"strings"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	"git.curoverse.com/arvados.git/sdk/go/auth"
+	check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&UnitSuite{})
+
+type UnitSuite struct {}
+
+func mustParseURL(s string) *url.URL {
+	r, err := url.Parse(s)
+	if err != nil {
+		panic("parse URL: " + s)
+	}
+	return r
+}
+
+func (s *IntegrationSuite) TestVhost404(c *check.C) {
+	for _, testURL := range []string{
+		arvadostest.NonexistentCollection + ".example.com/theperthcountyconspiracy",
+		arvadostest.NonexistentCollection + ".example.com/t=" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
+	} {
+		resp := httptest.NewRecorder()
+		req := &http.Request{
+			Method: "GET",
+			URL: mustParseURL(testURL),
+		}
+		(&handler{}).ServeHTTP(resp, req)
+		c.Check(resp.Code, check.Equals, http.StatusNotFound)
+		c.Check(resp.Body.String(), check.Equals, "")
+	}
+}
+
+type authorizer func(*http.Request, string) int
+
+func (s *IntegrationSuite) TestVhostViaAuthzHeader(c *check.C) {
+	doVhostRequests(c, authzViaAuthzHeader)
+}
+func authzViaAuthzHeader(r *http.Request, tok string) int {
+	r.Header.Add("Authorization", "OAuth2 " + tok)
+	return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaCookieValue(c *check.C) {
+	doVhostRequests(c, authzViaCookieValue)
+}
+func authzViaCookieValue(r *http.Request, tok string) int {
+	r.AddCookie(&http.Cookie{
+		Name: "api_token",
+		Value: auth.EncodeTokenCookie([]byte(tok)),
+	})
+	return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaPath(c *check.C) {
+	doVhostRequests(c, authzViaPath)
+}
+func authzViaPath(r *http.Request, tok string) int {
+	r.URL.Path = "/t=" + tok + r.URL.Path
+	return http.StatusNotFound
+}
+
+func (s *IntegrationSuite) TestVhostViaQueryString(c *check.C) {
+	doVhostRequests(c, authzViaQueryString)
+}
+func authzViaQueryString(r *http.Request, tok string) int {
+	r.URL.RawQuery = "api_token=" + tok
+	return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaPOST(c *check.C) {
+	doVhostRequests(c, authzViaPOST)
+}
+func authzViaPOST(r *http.Request, tok string) int {
+	r.Method = "POST"
+	r.Header.Add("Content-Type", "application/x-www-form-urlencoded")
+	r.Body = ioutil.NopCloser(strings.NewReader(
+		url.Values{"api_token": {tok}}.Encode()))
+	return http.StatusUnauthorized
+}
+
+// Try some combinations of {url, token} using the given authorization
+// mechanism, and verify the result is correct.
+func doVhostRequests(c *check.C, authz authorizer) {
+	hostPath := arvadostest.FooCollection + ".example.com/foo"
+	for _, tok := range []string{
+		arvadostest.ActiveToken,
+		arvadostest.ActiveToken[:15],
+		arvadostest.SpectatorToken,
+		"bogus",
+		"",
+	} {
+		u := mustParseURL("http://" + hostPath)
+		req := &http.Request{
+			Method: "GET",
+			Host: u.Host,
+			URL: u,
+			Header: http.Header{},
+		}
+		failCode := authz(req, tok)
+		resp := doReq(req)
+		code, body := resp.Code, resp.Body.String()
+		if tok == arvadostest.ActiveToken {
+			c.Check(code, check.Equals, http.StatusOK)
+			c.Check(body, check.Equals, "foo")
+		} else {
+			c.Check(code >= 400, check.Equals, true)
+			c.Check(code < 500, check.Equals, true)
+			if tok == arvadostest.SpectatorToken {
+				// Valid token never offers to retry
+				// with different credentials.
+				c.Check(code, check.Equals, http.StatusNotFound)
+			} else {
+				// Invalid token can ask to retry
+				// depending on the authz method.
+				c.Check(code, check.Equals, failCode)
+			}
+			c.Check(body, check.Equals, "")
+		}
+	}
+}
+
+func doReq(req *http.Request) *httptest.ResponseRecorder {
+	resp := httptest.NewRecorder()
+	(&handler{}).ServeHTTP(resp, req)
+	if resp.Code != http.StatusSeeOther {
+		return resp
+	}
+	cookies := (&http.Response{Header: resp.Header()}).Cookies()
+	u, _ := req.URL.Parse(resp.Header().Get("Location"))
+	req = &http.Request{
+		Method: "GET",
+		Host: u.Host,
+		URL: u,
+		Header: http.Header{},
+	}
+	for _, c := range cookies {
+		req.AddCookie(c)
+	}
+	return doReq(req)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
+	s.testVhostRedirectTokenToCookie(c, "GET",
+		arvadostest.FooCollection + ".example.com/foo",
+		"?api_token=" + arvadostest.ActiveToken,
+		"text/plain",
+		"",
+		http.StatusOK,
+	)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
+	s.testVhostRedirectTokenToCookie(c, "POST",
+		arvadostest.FooCollection + ".example.com/foo",
+		"",
+		"application/x-www-form-urlencoded",
+		url.Values{"api_token": {arvadostest.ActiveToken}}.Encode(),
+		http.StatusOK,
+	)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C) {
+	s.testVhostRedirectTokenToCookie(c, "POST",
+		arvadostest.FooCollection + ".example.com/foo",
+		"",
+		"application/x-www-form-urlencoded",
+		url.Values{"api_token": {arvadostest.SpectatorToken}}.Encode(),
+		http.StatusNotFound,
+	)
+}
+
+func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString, contentType, body string, expectStatus int) {
+	u, _ := url.Parse(`http://` + hostPath + queryString)
+	req := &http.Request{
+		Method: method,
+		Host: u.Host,
+		URL: u,
+		Header: http.Header{"Content-Type": {contentType}},
+		Body: ioutil.NopCloser(strings.NewReader(body)),
+	}
+
+	resp := httptest.NewRecorder()
+	(&handler{}).ServeHTTP(resp, req)
+	c.Assert(resp.Code, check.Equals, http.StatusSeeOther)
+	c.Check(resp.Body.String(), check.Matches, `.*href="//` + regexp.QuoteMeta(html.EscapeString(hostPath)) + `".*`)
+	cookies := (&http.Response{Header: resp.Header()}).Cookies()
+
+	u, _ = u.Parse(resp.Header().Get("Location"))
+	req = &http.Request{
+		Method: "GET",
+		Host: u.Host,
+		URL: u,
+		Header: http.Header{},
+	}
+	for _, c := range cookies {
+		req.AddCookie(c)
+	}
+
+	resp = httptest.NewRecorder()
+	(&handler{}).ServeHTTP(resp, req)
+	c.Check(resp.Header().Get("Location"), check.Equals, "")
+	c.Check(resp.Code, check.Equals, expectStatus)
+	if expectStatus == http.StatusOK {
+		c.Check(resp.Body.String(), check.Equals, "foo")
+	}
+}

commit 1d59d3aa015ca5263e1ecaaad437f18db654beb4
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jun 24 23:33:08 2015 -0400

    5824: add (*KeepClient)CollectionFileReader()

diff --git a/sdk/go/arvadostest/fixtures.go b/sdk/go/arvadostest/fixtures.go
new file mode 100644
index 0000000..87b28f8
--- /dev/null
+++ b/sdk/go/arvadostest/fixtures.go
@@ -0,0 +1,17 @@
+package arvadostest
+
+const (
+	SpectatorToken        = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
+	ActiveToken           = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
+	AnonymousToken        = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
+	FooCollection         = "zzzzz-4zz18-fy296fx3hot09f7"
+	NonexistentCollection = "zzzzz-4zz18-totallynotexist"
+	HelloWorldCollection  = "zzzzz-4zz18-4en62shvi99lxd4"
+	PathologicalManifest  = ". acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 73feffa4b7f6bb68e44cf984c85f6e88+3+Z+K at xyzzy acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero at 0 0:1:f 1:0:zero at 1 1:4:ooba 4:0:zero at 4 5:1:r 5:4:rbaz 9:0:zero at 9\n" +
+		"./overlapReverse acbd18db4cc2f85cedef654fccc4a4d8+3 acbd18db4cc2f85cedef654fccc4a4d8+3 5:1:o 4:2:oo 2:4:ofoo\n" +
+		"./segmented acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 0:1:frob 5:1:frob 1:1:frob 1:2:oof 0:1:oof 5:0:frob 3:1:frob\n" +
+		`./foo\040b\141r acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:baz` + "\n" +
+		`./foo\040b\141r acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:b\141z\040w\141z` + "\n" +
+		"./foo acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero 0:3:foo\n" +
+		". acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:foo/zero 0:3:foo/foo\n"
+)
diff --git a/sdk/go/keepclient/collectionreader.go b/sdk/go/keepclient/collectionreader.go
new file mode 100644
index 0000000..929f693
--- /dev/null
+++ b/sdk/go/keepclient/collectionreader.go
@@ -0,0 +1,159 @@
+package keepclient
+
+import (
+	"errors"
+	"io"
+	"os"
+
+	"git.curoverse.com/arvados.git/sdk/go/manifest"
+)
+
+var (
+	ErrNoManifest     = errors.New("Collection has no manifest")
+	ErrNotImplemented = errors.New("Not implemented")
+)
+
+// CollectionFileReader returns an io.Reader that reads file content
+// from a collection. The filename must be given relative to the root
+// of the collection, without a leading "./".
+func (kc *KeepClient) CollectionFileReader(collection map[string]interface{}, filename string) (*cfReader, error) {
+	mText, ok := collection["manifest_text"].(string)
+	if !ok {
+		return nil, ErrNoManifest
+	}
+	m := manifest.Manifest{Text: mText}
+	rdrChan := make(chan *cfReader)
+	go func() {
+		// q is a queue of FileSegments that we have received but
+		// haven't yet been able to send to toGet.
+		var q []*manifest.FileSegment
+		var r *cfReader
+		for seg := range m.FileSegmentIterByName(filename) {
+			if r == nil {
+				// We've just discovered that the
+				// requested filename does appear in
+				// the manifest, so we can return a
+				// real reader (not nil) from
+				// CollectionFileReader().
+				r = newCFReader(kc)
+				rdrChan <- r
+			}
+			q = append(q, seg)
+			r.totalSize += uint64(seg.Len)
+			// Send toGet whatever it's ready to receive.
+			Q: for len(q) > 0 {
+				select {
+				case r.toGet <- q[0]:
+					q = q[1:]
+				default:
+					break Q
+				}
+			}
+		}
+		if r == nil {
+			// File not found
+			rdrChan <- nil
+			return
+		}
+		close(r.countDone)
+		for _, seg := range q {
+			r.toGet <- seg
+		}
+		close(r.toGet)
+	}()
+	// Before returning a reader, wait until we know whether the
+	// file exists here:
+	r := <-rdrChan
+	if r == nil {
+		return nil, os.ErrNotExist
+	}
+	return r, nil
+}
+
+type cfReader struct {
+	keepClient *KeepClient
+	// doGet() reads FileSegments from toGet, gets the data from
+	// Keep, and sends byte slices to toRead to be consumed by
+	// Read().
+	toGet        chan *manifest.FileSegment
+	toRead       chan []byte
+	// bytes ready to send next time someone calls Read()
+	buf          []byte
+	// Total size of the file being read. Not safe to read this
+	// until countDone is closed.
+	totalSize    uint64
+	countDone    chan struct{}
+	// First error encountered.
+	err          error
+}
+
+func (r *cfReader) Read(outbuf []byte) (n int, err error) {
+	if r.err != nil {
+		return 0, r.err
+	}
+	for r.buf == nil || len(r.buf) == 0 {
+		var ok bool
+		r.buf, ok = <-r.toRead
+		if r.err != nil {
+			return 0, r.err
+		} else if !ok {
+			return 0, io.EOF
+		}
+	}
+	if len(r.buf) > len(outbuf) {
+		n = len(outbuf)
+	} else {
+		n = len(r.buf)
+	}
+	copy(outbuf[:n], r.buf[:n])
+	r.buf = r.buf[n:]
+	return
+}
+
+func (r *cfReader) Close() error {
+	_, _ = <-r.countDone
+	for _ = range r.toGet {
+	}
+	for _ = range r.toRead {
+	}
+	return r.err
+}
+
+func (r *cfReader) Len() uint64 {
+	// Wait for all segments to be counted
+	_, _ = <-r.countDone
+	return r.totalSize
+}
+
+func (r *cfReader) doGet() {
+	defer close(r.toRead)
+	for fs := range r.toGet {
+		rdr, _, _, err := r.keepClient.Get(fs.Locator)
+		if err != nil {
+			r.err = err
+			return
+		}
+		var buf = make([]byte, fs.Offset+fs.Len)
+		_, err = io.ReadFull(rdr, buf)
+		if err != nil {
+			r.err = err
+			return
+		}
+		for bOff, bLen := fs.Offset, 1<<20; bOff <= fs.Offset+fs.Len && bLen > 0; bOff += bLen {
+			if bOff+bLen > fs.Offset+fs.Len {
+				bLen = fs.Offset + fs.Len - bOff
+			}
+			r.toRead <- buf[bOff : bOff+bLen]
+		}
+	}
+}
+
+func newCFReader(kc *KeepClient) (r *cfReader) {
+	r = new(cfReader)
+	r.keepClient = kc
+	r.toGet = make(chan *manifest.FileSegment, 2)
+	r.toRead = make(chan []byte)
+	r.countDone = make(chan struct{})
+	go r.doGet()
+	return
+}
diff --git a/sdk/go/keepclient/collectionreader_test.go b/sdk/go/keepclient/collectionreader_test.go
new file mode 100644
index 0000000..f271208
--- /dev/null
+++ b/sdk/go/keepclient/collectionreader_test.go
@@ -0,0 +1,123 @@
+package keepclient
+
+import (
+	"crypto/md5"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"net/http"
+	"os"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&IntegrationSuite{})
+
+// IntegrationSuite tests need an API server
+type IntegrationSuite struct{}
+
+type SuccessHandler struct {
+	disk map[string][]byte
+	lock chan struct{}
+}
+
+func (h SuccessHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+	switch req.Method {
+	case "PUT":
+		buf, err := ioutil.ReadAll(req.Body)
+		if err != nil {
+			resp.WriteHeader(500)
+			return
+		}
+		pdh := fmt.Sprintf("%x+%d", md5.Sum(buf), len(buf))
+		h.lock <- struct{}{}
+		h.disk[pdh] = buf
+		<- h.lock
+		resp.Write([]byte(pdh))
+	case "GET":
+		pdh := req.URL.Path[1:]
+		h.lock <- struct{}{}
+		buf, ok := h.disk[pdh]
+		<- h.lock
+		if !ok {
+			resp.WriteHeader(http.StatusNotFound)
+		} else {
+			resp.Write(buf)
+		}
+	default:
+		resp.WriteHeader(http.StatusMethodNotAllowed)
+	}
+}
+
+type rdrTest struct {
+	mt   string      // manifest text
+	f    string      // filename
+	want interface{} // error or string to expect
+}
+
+func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.IsNil)
+	arv.ApiToken = arvadostest.ActiveToken
+
+	kc, err := MakeKeepClient(&arv)
+	c.Assert(err, check.IsNil)
+
+	{
+		localRoots := make(map[string]string)
+		h := SuccessHandler{disk: make(map[string][]byte), lock: make(chan struct{}, 1)}
+		for i, k := range RunSomeFakeKeepServers(h, 4) {
+			localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+		}
+		kc.SetServiceRoots(localRoots, localRoots, nil)
+		kc.PutB([]byte("foo"))
+		kc.PutB([]byte("bar"))
+		kc.PutB([]byte("Hello world\n"))
+		kc.PutB([]byte(""))
+	}
+
+	mt := arvadostest.PathologicalManifest
+
+	for _, testCase := range []rdrTest{
+		{mt: mt, f: "zzzz", want: os.ErrNotExist},
+		{mt: mt, f: "frob", want: os.ErrNotExist},
+		{mt: mt, f: "/segmented/frob", want: os.ErrNotExist},
+		{mt: mt, f: "./segmented/frob", want: os.ErrNotExist},
+		{mt: mt, f: "/f", want: os.ErrNotExist},
+		{mt: mt, f: "./f", want: os.ErrNotExist},
+		{mt: mt, f: "foo bar//baz", want: os.ErrNotExist},
+		{mt: mt, f: "foo/zero", want: ""},
+		{mt: mt, f: "zero at 0", want: ""},
+		{mt: mt, f: "zero at 1", want: ""},
+		{mt: mt, f: "zero at 4", want: ""},
+		{mt: mt, f: "zero at 9", want: ""},
+		{mt: mt, f: "f", want: "f"},
+		{mt: mt, f: "ooba", want: "ooba"},
+		{mt: mt, f: "overlapReverse/o", want: "o"},
+		{mt: mt, f: "overlapReverse/oo", want: "oo"},
+		{mt: mt, f: "overlapReverse/ofoo", want: "ofoo"},
+		{mt: mt, f: "foo bar/baz", want: "foo"},
+		{mt: mt, f: "segmented/frob", want: "frob"},
+		{mt: mt, f: "segmented/oof", want: "oof"},
+	} {
+		rdr, err := kc.CollectionFileReader(map[string]interface{}{"manifest_text": testCase.mt}, testCase.f)
+		switch want := testCase.want.(type) {
+		case error:
+			c.Check(rdr, check.IsNil)
+			c.Check(err, check.Equals, want)
+		case string:
+			buf := make([]byte, len(want))
+			n, err := io.ReadFull(rdr, buf)
+			c.Check(err, check.IsNil)
+			for i := 0; i < 4; i++ {
+				c.Check(string(buf), check.Equals, want)
+				n, err = rdr.Read(buf)
+				c.Check(n, check.Equals, 0)
+				c.Check(err, check.Equals, io.EOF)
+			}
+			c.Check(rdr.Close(), check.Equals, nil)
+		}
+	}
+}
diff --git a/sdk/go/manifest/manifest.go b/sdk/go/manifest/manifest.go
index 4e816cd..f104d9a 100644
--- a/sdk/go/manifest/manifest.go
+++ b/sdk/go/manifest/manifest.go
@@ -5,25 +5,185 @@
 package manifest
 
 import (
+	"errors"
+	"fmt"
 	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
 	"log"
+	"regexp"
+	"strconv"
 	"strings"
 )
 
+var ErrInvalidToken = errors.New("Invalid token")
+
+var LocatorPattern = regexp.MustCompile(
+	"^[0-9a-fA-F]{32}\\+[0-9]+(\\+[A-Z][A-Za-z0-9 at _-]+)*$")
+
 type Manifest struct {
 	Text string
 }
 
+type BlockLocator struct {
+	Digest blockdigest.BlockDigest
+	Size   int
+	Hints  []string
+}
+
+type DataSegment struct {
+	BlockLocator
+	Locator      string
+	StreamOffset uint64
+}
+
+// FileSegment is a portion of a file that is contained within a
+// single block.
+type FileSegment struct {
+	Locator string
+	// Offset (within this block) of this data segment
+	Offset int
+	Len    int
+}
+
 // Represents a single line from a manifest.
 type ManifestStream struct {
 	StreamName string
 	Blocks     []string
-	Files      []string
+	FileTokens []string
+}
+
+var escapeSeq = regexp.MustCompile(`\\([0-9]{3}|\\)`)
+
+func unescapeSeq(seq string) string {
+	if seq == `\\` {
+		return `\`
+	}
+	i, err := strconv.ParseUint(seq[1:], 8, 8)
+	if err != nil {
+		// Invalid escape sequence: can't unescape.
+		return seq
+	}
+	return string([]byte{byte(i)})
+}
+
+func UnescapeName(s string) string {
+	return escapeSeq.ReplaceAllStringFunc(s, unescapeSeq)
+}
+
+func ParseBlockLocator(s string) (b BlockLocator, err error) {
+	if !LocatorPattern.MatchString(s) {
+		err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern "+
+			"\"%s\".",
+			s,
+			LocatorPattern.String())
+	} else {
+		tokens := strings.Split(s, "+")
+		var blockSize int64
+		var blockDigest blockdigest.BlockDigest
+		// We expect both of the following to succeed since LocatorPattern
+		// restricts the strings appropriately.
+		blockDigest, err = blockdigest.FromString(tokens[0])
+		if err != nil {
+			return
+		}
+		blockSize, err = strconv.ParseInt(tokens[1], 10, 0)
+		if err != nil {
+			return
+		}
+		b.Digest = blockDigest
+		b.Size = int(blockSize)
+		b.Hints = tokens[2:]
+	}
+	return
+}
+
+func parseFileToken(tok string) (segPos, segLen uint64, name string, err error) {
+	parts := strings.SplitN(tok, ":", 3)
+	if len(parts) != 3 {
+		err = ErrInvalidToken
+		return
+	}
+	segPos, err = strconv.ParseUint(parts[0], 10, 64)
+	if err != nil {
+		return
+	}
+	segLen, err = strconv.ParseUint(parts[1], 10, 64)
+	if err != nil {
+		return
+	}
+	name = UnescapeName(parts[2])
+	return
+}
+
+func (s *ManifestStream) FileSegmentIterByName(filepath string) <-chan *FileSegment {
+	ch := make(chan *FileSegment)
+	go func() {
+		s.sendFileSegmentIterByName(filepath, ch)
+		close(ch)
+	}()
+	return ch
+}
+
+func (s *ManifestStream) sendFileSegmentIterByName(filepath string, ch chan<- *FileSegment) {
+	blockLens := make([]int, 0, len(s.Blocks))
+	// This is what streamName+"/"+fileName will look like:
+	target := "./" + filepath
+	for _, fTok := range s.FileTokens {
+		wantPos, wantLen, name, err := parseFileToken(fTok)
+		if err != nil {
+			// Skip (!) invalid file tokens.
+			continue
+		}
+		if s.StreamName+"/"+name != target {
+			continue
+		}
+		if wantLen == 0 {
+			ch <- &FileSegment{Locator: "d41d8cd98f00b204e9800998ecf8427e+0", Offset: 0, Len: 0}
+			continue
+		}
+		// Linear search for blocks containing data for this
+		// file
+		var blockPos uint64 = 0 // position of block in stream
+		for i, loc := range s.Blocks {
+			if blockPos >= wantPos+wantLen {
+				break
+			}
+			if len(blockLens) <= i {
+				blockLens = blockLens[:i+1]
+				b, err := ParseBlockLocator(loc)
+				if err != nil {
+					// Unparseable locator -> unusable
+					// stream.
+					ch <- nil
+					return
+				}
+				blockLens[i] = b.Size
+			}
+			blockLen := uint64(blockLens[i])
+			if blockPos+blockLen <= wantPos {
+				blockPos += blockLen
+				continue
+			}
+			fseg := FileSegment{
+				Locator: loc,
+				Offset:  0,
+				Len:     blockLens[i],
+			}
+			if blockPos < wantPos {
+				fseg.Offset = int(wantPos - blockPos)
+				fseg.Len -= fseg.Offset
+			}
+			if blockPos+blockLen > wantPos+wantLen {
+				fseg.Len = int(wantPos+wantLen-blockPos) - fseg.Offset
+			}
+			ch <- &fseg
+			blockPos += blockLen
+		}
+	}
 }
 
 func parseManifestStream(s string) (m ManifestStream) {
 	tokens := strings.Split(s, " ")
-	m.StreamName = tokens[0]
+	m.StreamName = UnescapeName(tokens[0])
 	tokens = tokens[1:]
 	var i int
 	for i = range tokens {
@@ -32,7 +192,7 @@ func parseManifestStream(s string) (m ManifestStream) {
 		}
 	}
 	m.Blocks = tokens[:i]
-	m.Files = tokens[i:]
+	m.FileTokens = tokens[i:]
 	return
 }
 
@@ -58,6 +218,20 @@ func (m *Manifest) StreamIter() <-chan ManifestStream {
 	return ch
 }
 
+func (m *Manifest) FileSegmentIterByName(filepath string) <-chan *FileSegment {
+	ch := make(chan *FileSegment)
+	go func() {
+		for stream := range m.StreamIter() {
+			if !strings.HasPrefix("./"+filepath, stream.StreamName+"/") {
+				continue
+			}
+			stream.sendFileSegmentIterByName(filepath, ch)
+		}
+		close(ch)
+	}()
+	return ch
+}
+
 // Blocks may appear mulitple times within the same manifest if they
 // are used by multiple files. In that case this Iterator will output
 // the same block multiple times.
diff --git a/sdk/go/manifest/manifest_test.go b/sdk/go/manifest/manifest_test.go
index 8cfe3d9..364648d 100644
--- a/sdk/go/manifest/manifest_test.go
+++ b/sdk/go/manifest/manifest_test.go
@@ -1,10 +1,13 @@
 package manifest
 
 import (
-	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
 	"io/ioutil"
+	"reflect"
 	"runtime"
 	"testing"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
 )
 
 func getStackTrace() string {
@@ -60,7 +63,7 @@ func expectStringSlicesEqual(t *testing.T, actual []string, expected []string) {
 func expectManifestStream(t *testing.T, actual ManifestStream, expected ManifestStream) {
 	expectEqual(t, actual.StreamName, expected.StreamName)
 	expectStringSlicesEqual(t, actual.Blocks, expected.Blocks)
-	expectStringSlicesEqual(t, actual.Files, expected.Files)
+	expectStringSlicesEqual(t, actual.FileTokens, expected.FileTokens)
 }
 
 func expectBlockLocator(t *testing.T, actual blockdigest.BlockLocator, expected blockdigest.BlockLocator) {
@@ -72,8 +75,19 @@ func expectBlockLocator(t *testing.T, actual blockdigest.BlockLocator, expected
 func TestParseManifestStreamSimple(t *testing.T) {
 	m := parseManifestStream(". 365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf 0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt")
 	expectManifestStream(t, m, ManifestStream{StreamName: ".",
-		Blocks: []string{"365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
-		Files:  []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
+		Blocks:     []string{"365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
+		FileTokens: []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
+}
+
+func TestParseBlockLocatorSimple(t *testing.T) {
+	b, err := ParseBlockLocator("365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf")
+	if err != nil {
+		t.Fatalf("Unexpected error parsing block locator: %v", err)
+	}
+	expectBlockLocator(t, b, BlockLocator{Digest: blockdigest.AssertFromString("365f83f5f808896ec834c8b595288735"),
+		Size: 2310,
+		Hints: []string{"K at qr1hi",
+			"Af0c9a66381f3b028677411926f0be1c6282fe67c at 542b5ddf"}})
 }
 
 func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
@@ -88,8 +102,8 @@ func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
 	expectManifestStream(t,
 		firstStream,
 		ManifestStream{StreamName: ".",
-			Blocks: []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475 at 5441920c"},
-			Files:  []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
+			Blocks:     []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475 at 5441920c"},
+			FileTokens: []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
 
 	received, ok := <-streamIter
 	if ok {
@@ -126,3 +140,58 @@ func TestBlockIterLongManifest(t *testing.T) {
 			Size:  31367794,
 			Hints: []string{"E53f903684239bcc114f7bf8ff9bd6089f33058db at 5441920c"}})
 }
+
+func TestUnescape(t *testing.T) {
+	for _, testCase := range [][]string{
+		{`\040`, ` `},
+		{`\009`, `\009`},
+		{`\\\040\\`, `\ \`},
+		{`\\040\`, `\040\`},
+	} {
+		in := testCase[0]
+		expect := testCase[1]
+		got := UnescapeName(in)
+		if expect != got {
+			t.Errorf("For '%s' got '%s' instead of '%s'", in, got, expect)
+		}
+	}
+}
+
+type fsegtest struct {
+	mt   string        // manifest text
+	f    string        // filename
+	want []FileSegment // segments should be received on channel
+}
+
+func TestFileSegmentIterByName(t *testing.T) {
+	mt := arvadostest.PathologicalManifest
+	for _, testCase := range []fsegtest{
+		{mt: mt, f: "zzzz", want: nil},
+		// This case is too sensitive: it would be acceptable
+		// (even preferable) to return only one empty segment.
+		{mt: mt, f: "foo/zero", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}, {"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "zero at 0", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "zero at 1", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "zero at 4", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "zero at 9", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "f", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}}},
+		{mt: mt, f: "ooba", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}, {"37b51d194a7513e45b56f6524f2d51f2+3", 0, 2}}},
+		{mt: mt, f: "overlapReverse/o", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 2, 1}}},
+		{mt: mt, f: "overlapReverse/oo", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}}},
+		{mt: mt, f: "overlapReverse/ofoo", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 2, 1}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 3}}},
+		{mt: mt, f: "foo bar/baz", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 3}}},
+		// This case is too sensitive: it would be better to
+		// omit the empty segment.
+		{mt: mt, f: "segmented/frob", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}, {"37b51d194a7513e45b56f6524f2d51f2+3", 2, 1}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 1}, {"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}, {"37b51d194a7513e45b56f6524f2d51f2+3", 0, 1}}},
+		{mt: mt, f: "segmented/oof", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}}},
+	} {
+		m := Manifest{Text: testCase.mt}
+		var got []FileSegment
+		for fs := range m.FileSegmentIterByName(testCase.f) {
+			got = append(got, *fs)
+		}
+		if !reflect.DeepEqual(got, testCase.want) {
+			t.Errorf("For %#v:\n got  %#v\n want %#v", testCase.f, got, testCase.want)
+		}
+	}
+}
diff --git a/services/keepdl/handler.go b/services/keepdl/handler.go
index 48e3640..04af920 100644
--- a/services/keepdl/handler.go
+++ b/services/keepdl/handler.go
@@ -11,6 +11,7 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
 	"git.curoverse.com/arvados.git/sdk/go/auth"
 	"git.curoverse.com/arvados.git/sdk/go/httpserver"
+	"git.curoverse.com/arvados.git/sdk/go/keepclient"
 )
 
 var clientPool = arvadosclient.MakeClientPool()
@@ -136,17 +137,20 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	}
 
 	filename := strings.Join(targetPath, "/")
-	rdr, err := arvadosclient.CollectionFileReader(collection, filename)
+	kc, err := keepclient.MakeKeepClient(arv)
+	if err != nil {
+		statusCode, statusText = http.StatusInternalServerError, err.Error()
+		return
+	}
+	rdr, err := kc.CollectionFileReader(collection, filename)
 	if os.IsNotExist(err) {
 		statusCode = http.StatusNotFound
 		return
-	} else if err == arvadosclient.ErrNotImplemented {
-		statusCode = http.StatusNotImplemented
-		return
 	} else if err != nil {
 		statusCode, statusText = http.StatusBadGateway, err.Error()
 		return
 	}
+	defer rdr.Close()
 
 	// One or both of these can be -1 if not found:
 	basenamePos := strings.LastIndex(filename, "/")
diff --git a/services/keepdl/server_test.go b/services/keepdl/server_test.go
index 66c6812..5864315 100644
--- a/services/keepdl/server_test.go
+++ b/services/keepdl/server_test.go
@@ -15,16 +15,7 @@ import (
 
 var _ = check.Suite(&IntegrationSuite{})
 
-const (
-	spectatorToken  = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
-	activeToken     = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
-	anonymousToken  = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
-	fooCollection   = "zzzzz-4zz18-fy296fx3hot09f7"
-	bogusCollection = "zzzzz-4zz18-totallynotexist"
-	hwCollection    = "zzzzz-4zz18-4en62shvi99lxd4"
-)
-
-// IntegrationSuite tests need an API server and an arv-git-httpd server
+// IntegrationSuite tests need an API server and a keepdl server
 type IntegrationSuite struct {
 	testServer *server
 }
@@ -34,12 +25,12 @@ func (s *IntegrationSuite) TestNoToken(c *check.C) {
 		"",
 		"bogustoken",
 	} {
-		hdr, body := s.runCurl(c, token, "/collections/"+fooCollection+"/foo")
+		hdr, body := s.runCurl(c, token, "/collections/"+arvadostest.FooCollection+"/foo")
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
 		c.Check(body, check.Equals, "")
 
 		if token != "" {
-			hdr, body = s.runCurl(c, token, "/collections/download/"+fooCollection+"/"+token+"/foo")
+			hdr, body = s.runCurl(c, token, "/collections/download/"+arvadostest.FooCollection+"/"+token+"/foo")
 			c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 			c.Check(body, check.Equals, "")
 		}
@@ -62,46 +53,46 @@ func (s *IntegrationSuite) Test404(c *check.C) {
 		"/download",
 		"/collections",
 		"/collections/",
-		"/collections/" + fooCollection,
-		"/collections/" + fooCollection + "/",
+		"/collections/" + arvadostest.FooCollection,
+		"/collections/" + arvadostest.FooCollection + "/",
 		// Non-existent file in collection
-		"/collections/" + fooCollection + "/theperthcountyconspiracy",
-		"/collections/download/" + fooCollection + "/" + activeToken + "/theperthcountyconspiracy",
+		"/collections/" + arvadostest.FooCollection + "/theperthcountyconspiracy",
+		"/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
 		// Non-existent collection
-		"/collections/" + bogusCollection,
-		"/collections/" + bogusCollection + "/",
-		"/collections/" + bogusCollection + "/theperthcountyconspiracy",
-		"/collections/download/" + bogusCollection + "/" + activeToken + "/theperthcountyconspiracy",
+		"/collections/" + arvadostest.NonexistentCollection,
+		"/collections/" + arvadostest.NonexistentCollection + "/",
+		"/collections/" + arvadostest.NonexistentCollection + "/theperthcountyconspiracy",
+		"/collections/download/" + arvadostest.NonexistentCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
 	} {
-		hdr, body := s.runCurl(c, activeToken, uri)
+		hdr, body := s.runCurl(c, arvadostest.ActiveToken, uri)
 		c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
 		c.Check(body, check.Equals, "")
 	}
 }
 
 func (s *IntegrationSuite) Test200(c *check.C) {
-	anonymousTokens = []string{anonymousToken}
+	anonymousTokens = []string{arvadostest.AnonymousToken}
 	arv, err := arvadosclient.MakeArvadosClient()
 	c.Assert(err, check.Equals, nil)
-	arv.ApiToken = activeToken
+	arv.ApiToken = arvadostest.ActiveToken
 	kc, err := keepclient.MakeKeepClient(&arv)
 	c.Assert(err, check.Equals, nil)
 	kc.PutB([]byte("Hello world\n"))
 	kc.PutB([]byte("foo"))
 	for _, spec := range [][]string{
 		// My collection
-		{activeToken, "/collections/" + fooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{"", "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{"tokensobogus", "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{activeToken, "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{anonymousToken, "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{arvadostest.ActiveToken, "/collections/" + arvadostest.FooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{"", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{"tokensobogus", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{arvadostest.ActiveToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{arvadostest.AnonymousToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
 		// Anonymously accessible user agreement. These should
 		// start working when CollectionFileReader provides
 		// real data instead of fake/stub data.
-		{"", "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{activeToken, "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{spectatorToken, "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{spectatorToken, "/collections/download/"+hwCollection+"/"+spectatorToken+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{"", "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{arvadostest.ActiveToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{arvadostest.SpectatorToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{arvadostest.SpectatorToken, "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 	} {
 		hdr, body := s.runCurl(c, spec[0], spec[1])
 		if strings.HasPrefix(hdr, "HTTP/1.1 501 Not Implemented\r\n") && body == "" {

commit 3345d7c809056cb5e2352b404080355dfc0cc474
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Jul 23 01:09:02 2015 -0400

    5824: Add doc.go

diff --git a/services/keepdl/doc.go b/services/keepdl/doc.go
new file mode 100644
index 0000000..65c7f19
--- /dev/null
+++ b/services/keepdl/doc.go
@@ -0,0 +1,57 @@
+// Keepdl provides read-only HTTP access to files stored in Keep. It
+// serves public data to anonymous and unauthenticated clients, and
+// accepts authentication via Arvados tokens. It can be installed
+// anywhere with access to Keep services, typically behind a web proxy
+// that provides SSL support.
+//
+// Given that this amounts to a web hosting service for arbitrary
+// content, it is vital to ensure that at least one of the following is
+// true:
+//
+// Usage
+//
+// Listening:
+//
+//   keepdl -address=:1234
+//
+// Start an HTTP server on port 1234.
+//
+//   keepdl -address=1.2.3.4:1234
+//
+// Start an HTTP server on port 1234, on the interface with IP address 1.2.3.4.
+//
+// Keepdl does not support SSL natively. Typically, it is installed
+// behind a proxy like nginx.
+//
+package main
+
+// TODO(TC): Implement
+//
+// Trusted content
+//
+// Normally, Keepdl is installed using a wildcard DNS entry and a
+// wildcard HTTPS certificate, serving data from collection X at
+// ``https://X.dl.example.com/path/file.ext''.
+//
+// It will also serve publicly accessible data at
+// ``https://dl.example.com/collections/X/path/file.txt'', but it does not
+// accept any kind of credentials at paths like these.
+//
+// In "trust all content" mode, Keepdl will accept credentials (API
+// tokens) and serve any collection X at
+// "https://dl.example.com/collections/X/path/file.ext".  This is
+// UNSAFE except in the special case where everyone who is able write
+// ANY data to Keep, and every JavaScript and HTML file written to
+// Keep, is also trusted to read ALL of the data in Keep.
+//
+// In such cases you can enable trust-all-content mode.
+//
+//   keepdl -trust-all-content [...]
+//
+// In the general case, this should not be enabled: A web page stored
+// in collection X can execute JavaScript code that uses the current
+// viewer's credentials to download additional data -- data which is
+// accessible to the current viewer, but not to the author of
+// collection X -- from the same origin (``https://dl.example.com/'')
+// and upload it to some other site chosen by the author of collection
+// X.

commit e78b4337cc4da187fac20a556b46e5ef31b79fd8
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Jun 23 19:12:58 2015 -0400

    5824: Add install doc

diff --git a/doc/install/install-keepdl.html.textile.liquid b/doc/install/install-keepdl.html.textile.liquid
new file mode 100644
index 0000000..6730dff
--- /dev/null
+++ b/doc/install/install-keepdl.html.textile.liquid
@@ -0,0 +1,64 @@
+---
+layout: default
+navsection: installguide
+title: Install download server
+...
+
+This installation guide assumes you are on a 64 bit Debian or Ubuntu system.
+
+The keepdl server provides read-only HTTP access to files stored in Keep. It serves public data to anonymous and unauthenticated clients, and accepts authentication via Arvados tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides SSL support.
+
+By convention, we use the following hostname for the download service:
+
+<div class="offset1">
+table(table table-bordered table-condensed).
+|dl. at uuid_prefix@.your.domain|
+</div>
+
+This hostname should resolve from anywhere on the internet.
+
+h2. Install keepdl
+
+First add the Arvados apt repository, and then install the keepdl package.
+
+<notextile>
+<pre><code>~$ <span class="userinput">echo "deb http://apt.arvados.org/ wheezy main" | sudo tee /etc/apt/sources.list.d/apt.arvados.org.list</span>
+~$ <span class="userinput">sudo /usr/bin/apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7</span>
+~$ <span class="userinput">sudo /usr/bin/apt-get update</span>
+~$ <span class="userinput">sudo /usr/bin/apt-get install keepdl</span>
+</code></pre>
+</notextile>
+
+Verify that @keepdl@ is functional:
+
+<notextile>
+<pre><code>~$ <span class="userinput">keepdl -h</span>
+Usage of keepdl:
+  -address="0.0.0.0:80": Address to listen on, "host:port".
+</code></pre>
+</notextile>
+
+We recommend running @arv-git-httpd@ under "runit":https://packages.debian.org/search?keywords=runit or something similar.
+
+Your @run@ script should look something like this:
+
+<notextile>
+<pre><code>export ARVADOS_API_HOST=<span class="userinput">uuid_prefix</span>.your.domain
+exec sudo -u nobody keepdl -address=:9002 2>&1
+</code></pre>
+</notextile>
+
+h3. Set up a reverse proxy with SSL support
+
+The keepdl service will be accessible from anywhere on the internet, so we recommend using SSL for transport encryption.
+
+This is best achieved by putting a reverse proxy with SSL support in front of keepdl, running on port 443 and passing requests to keepdl on port 9002 (or whatever port you chose in your run script).
+
+h3. Tell the API server about the keepdl service
+
+In your API server's config/application.yml file, add the following entry:
+
+<notextile>
+<pre><code>keepdl: dl.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>

commit 1dd92db0cd22631e965d777d2e51f6ded4765ca8
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jun 17 02:47:49 2015 -0400

    5824: Assign MIME type by file extension. closes #6327

diff --git a/services/keepdl/handler.go b/services/keepdl/handler.go
index bbcd53c..48e3640 100644
--- a/services/keepdl/handler.go
+++ b/services/keepdl/handler.go
@@ -3,6 +3,7 @@ package main
 import (
 	"fmt"
 	"io"
+	"mime"
 	"net/http"
 	"os"
 	"strings"
@@ -146,6 +147,17 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		statusCode, statusText = http.StatusBadGateway, err.Error()
 		return
 	}
+
+	// One or both of these can be -1 if not found:
+	basenamePos := strings.LastIndex(filename, "/")
+	extPos := strings.LastIndex(filename, ".")
+	if extPos > basenamePos {
+		// Now extPos is safely >= 0.
+		if t := mime.TypeByExtension(filename[extPos:]); t != "" {
+			w.Header().Set("Content-Type", t)
+		}
+	}
+
 	_, err = io.Copy(w, rdr)
 	if err != nil {
 		statusCode, statusText = http.StatusBadGateway, err.Error()
diff --git a/services/keepdl/server_test.go b/services/keepdl/server_test.go
index 1c36f98..66c6812 100644
--- a/services/keepdl/server_test.go
+++ b/services/keepdl/server_test.go
@@ -109,6 +109,12 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 			continue
 		}
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+		if strings.HasSuffix(spec[1], ".txt") {
+			c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
+			// TODO: Check some types that aren't
+			// automatically detected by Go's http server
+			// by sniffing the content.
+		}
 		c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec[2])
 	}
 }

commit caddb03ec1d6e9d5d39f2ade9ddc70a4b7dddabb
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Jul 23 00:02:11 2015 -0400

    5824: Add keepdl.

diff --git a/services/keepdl/.gitignore b/services/keepdl/.gitignore
new file mode 100644
index 0000000..173e306
--- /dev/null
+++ b/services/keepdl/.gitignore
@@ -0,0 +1 @@
+keepdl
diff --git a/services/keepdl/handler.go b/services/keepdl/handler.go
new file mode 100644
index 0000000..bbcd53c
--- /dev/null
+++ b/services/keepdl/handler.go
@@ -0,0 +1,153 @@
+package main
+
+import (
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"strings"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+	"git.curoverse.com/arvados.git/sdk/go/auth"
+	"git.curoverse.com/arvados.git/sdk/go/httpserver"
+)
+
+var clientPool = arvadosclient.MakeClientPool()
+
+var anonymousTokens []string
+
+type handler struct{}
+
+func init() {
+	// TODO(TC): Get anonymousTokens from flags
+	anonymousTokens = []string{}
+}
+
+func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
+	var statusCode int
+	var statusText string
+
+	w := httpserver.WrapResponseWriter(wOrig)
+	defer func() {
+		if statusCode > 0 {
+			if w.WroteStatus() == 0 {
+				w.WriteHeader(statusCode)
+			} else {
+				httpserver.Log(r.RemoteAddr, "WARNING",
+					fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
+			}
+		}
+		if statusText == "" {
+			statusText = http.StatusText(statusCode)
+		}
+		httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.URL.Path)
+	}()
+
+	arv := clientPool.Get()
+	if arv == nil {
+		statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
+		return
+	}
+	defer clientPool.Put(arv)
+
+	pathParts := strings.Split(r.URL.Path[1:], "/")
+
+	if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
+		statusCode = http.StatusNotFound
+		return
+	}
+
+	var targetId string
+	var targetPath []string
+	var tokens []string
+	var reqTokens []string
+	var pathToken bool
+	if len(pathParts) >= 5 && pathParts[1] == "download" {
+		// "/collections/download/{id}/{token}/path..." form:
+		// Don't use our configured anonymous tokens,
+		// Authorization headers, etc.  Just use the token in
+		// the path.
+		targetId = pathParts[2]
+		tokens = []string{pathParts[3]}
+		targetPath = pathParts[4:]
+		pathToken = true
+	} else {
+		// "/collections/{id}/path..." form
+		targetId = pathParts[1]
+		reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+		tokens = append(reqTokens, anonymousTokens...)
+		targetPath = pathParts[2:]
+	}
+
+	tokenResult := make(map[string]int)
+	collection := make(map[string]interface{})
+	found := false
+	for _, arv.ApiToken = range tokens {
+		err := arv.Get("collections", targetId, nil, &collection)
+		httpserver.Log(err)
+		if err == nil {
+			// Success
+			found = true
+			break
+		}
+		if srvErr, ok := err.(arvadosclient.APIServerError); ok {
+			switch srvErr.HttpStatusCode {
+			case 404, 401:
+				// Token broken or insufficient to
+				// retrieve collection
+				tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
+				continue
+			}
+		}
+		// Something more serious is wrong
+		statusCode, statusText = http.StatusInternalServerError, err.Error()
+		return
+	}
+	if !found {
+		if pathToken {
+			// The URL is a "secret sharing link", but it
+			// didn't work out. Asking the client for
+			// additional credentials would just be
+			// confusing.
+			statusCode = http.StatusNotFound
+			return
+		}
+		for _, t := range reqTokens {
+			if tokenResult[t] == 404 {
+				// The client provided valid token(s), but the
+				// collection was not found.
+				statusCode = http.StatusNotFound
+				return
+			}
+		}
+		// The client's token was invalid (e.g., expired), or
+		// the client didn't even provide one.  Propagate the
+		// 401 to encourage the client to use a [different]
+		// token.
+		//
+		// TODO(TC): This response would be confusing to
+		// someone trying (anonymously) to download public
+		// data that has been deleted.  Allow a referrer to
+		// provide this context somehow?
+		statusCode = http.StatusUnauthorized
+		w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"")
+		return
+	}
+
+	filename := strings.Join(targetPath, "/")
+	rdr, err := arvadosclient.CollectionFileReader(collection, filename)
+	if os.IsNotExist(err) {
+		statusCode = http.StatusNotFound
+		return
+	} else if err == arvadosclient.ErrNotImplemented {
+		statusCode = http.StatusNotImplemented
+		return
+	} else if err != nil {
+		statusCode, statusText = http.StatusBadGateway, err.Error()
+		return
+	}
+	_, err = io.Copy(w, rdr)
+	if err != nil {
+		statusCode, statusText = http.StatusBadGateway, err.Error()
+	}
+}
diff --git a/services/keepdl/main.go b/services/keepdl/main.go
new file mode 100644
index 0000000..d780cc3
--- /dev/null
+++ b/services/keepdl/main.go
@@ -0,0 +1,28 @@
+package main
+
+import (
+	"flag"
+	"log"
+	"os"
+)
+
+func init() {
+	// MakeArvadosClient returns an error if this env var isn't
+	// available as a default token (even if we explicitly set a
+	// different token before doing anything with the client). We
+	// set this dummy value during init so it doesn't clobber the
+	// one used by "run test servers".
+	os.Setenv("ARVADOS_API_TOKEN", "xxx")
+}
+
+func main() {
+	flag.Parse()
+	srv := &server{}
+	if err := srv.Start(); err != nil {
+		log.Fatal(err)
+	}
+	log.Println("Listening at", srv.Addr)
+	if err := srv.Wait(); err != nil {
+		log.Fatal(err)
+	}
+}
diff --git a/services/keepdl/server.go b/services/keepdl/server.go
new file mode 100644
index 0000000..44da00f
--- /dev/null
+++ b/services/keepdl/server.go
@@ -0,0 +1,27 @@
+package main
+
+import (
+	"flag"
+	"net/http"
+
+	"git.curoverse.com/arvados.git/sdk/go/httpserver"
+)
+
+var address string
+
+func init() {
+	flag.StringVar(&address, "address", "0.0.0.0:80",
+		"Address to listen on, \"host:port\".")
+}
+
+type server struct {
+	httpserver.Server
+}
+
+func (srv *server) Start() error {
+	mux := http.NewServeMux()
+	mux.Handle("/", &handler{})
+	srv.Handler = mux
+	srv.Addr = address
+	return srv.Server.Start()
+}
diff --git a/services/keepdl/server_test.go b/services/keepdl/server_test.go
new file mode 100644
index 0000000..1c36f98
--- /dev/null
+++ b/services/keepdl/server_test.go
@@ -0,0 +1,170 @@
+package main
+
+import (
+	"crypto/md5"
+	"fmt"
+	"os/exec"
+	"strings"
+	"testing"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	"git.curoverse.com/arvados.git/sdk/go/keepclient"
+	check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&IntegrationSuite{})
+
+const (
+	spectatorToken  = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
+	activeToken     = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
+	anonymousToken  = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
+	fooCollection   = "zzzzz-4zz18-fy296fx3hot09f7"
+	bogusCollection = "zzzzz-4zz18-totallynotexist"
+	hwCollection    = "zzzzz-4zz18-4en62shvi99lxd4"
+)
+
+// IntegrationSuite tests need an API server and an arv-git-httpd server
+type IntegrationSuite struct {
+	testServer *server
+}
+
+func (s *IntegrationSuite) TestNoToken(c *check.C) {
+	for _, token := range []string{
+		"",
+		"bogustoken",
+	} {
+		hdr, body := s.runCurl(c, token, "/collections/"+fooCollection+"/foo")
+		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
+		c.Check(body, check.Equals, "")
+
+		if token != "" {
+			hdr, body = s.runCurl(c, token, "/collections/download/"+fooCollection+"/"+token+"/foo")
+			c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+			c.Check(body, check.Equals, "")
+		}
+
+		hdr, body = s.runCurl(c, token, "/bad-route")
+		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+		c.Check(body, check.Equals, "")
+	}
+}
+
+// TODO: Move most cases to functional tests -- at least use Go's own
+// http client instead of forking curl. Just leave enough of an
+// integration test to assure that the documented way of invoking curl
+// really works against the server.
+func (s *IntegrationSuite) Test404(c *check.C) {
+	for _, uri := range []string{
+		// Routing errors
+		"/",
+		"/foo",
+		"/download",
+		"/collections",
+		"/collections/",
+		"/collections/" + fooCollection,
+		"/collections/" + fooCollection + "/",
+		// Non-existent file in collection
+		"/collections/" + fooCollection + "/theperthcountyconspiracy",
+		"/collections/download/" + fooCollection + "/" + activeToken + "/theperthcountyconspiracy",
+		// Non-existent collection
+		"/collections/" + bogusCollection,
+		"/collections/" + bogusCollection + "/",
+		"/collections/" + bogusCollection + "/theperthcountyconspiracy",
+		"/collections/download/" + bogusCollection + "/" + activeToken + "/theperthcountyconspiracy",
+	} {
+		hdr, body := s.runCurl(c, activeToken, uri)
+		c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
+		c.Check(body, check.Equals, "")
+	}
+}
+
+func (s *IntegrationSuite) Test200(c *check.C) {
+	anonymousTokens = []string{anonymousToken}
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.Equals, nil)
+	arv.ApiToken = activeToken
+	kc, err := keepclient.MakeKeepClient(&arv)
+	c.Assert(err, check.Equals, nil)
+	kc.PutB([]byte("Hello world\n"))
+	kc.PutB([]byte("foo"))
+	for _, spec := range [][]string{
+		// My collection
+		{activeToken, "/collections/" + fooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{"", "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{"tokensobogus", "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{activeToken, "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{anonymousToken, "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		// Anonymously accessible user agreement. These should
+		// start working when CollectionFileReader provides
+		// real data instead of fake/stub data.
+		{"", "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{activeToken, "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{spectatorToken, "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{spectatorToken, "/collections/download/"+hwCollection+"/"+spectatorToken+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+	} {
+		hdr, body := s.runCurl(c, spec[0], spec[1])
+		if strings.HasPrefix(hdr, "HTTP/1.1 501 Not Implemented\r\n") && body == "" {
+			c.Log("Not implemented!")
+			continue
+		}
+		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+		c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec[2])
+	}
+}
+
+// Return header block and body.
+func (s *IntegrationSuite) runCurl(c *check.C, token, uri string, args ...string) (hdr, body string) {
+	curlArgs := []string{"--silent", "--show-error", "--include"}
+	if token != "" {
+		curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
+	}
+	curlArgs = append(curlArgs, args...)
+	curlArgs = append(curlArgs, "http://"+s.testServer.Addr+uri)
+	c.Log(fmt.Sprintf("curlArgs == %#v", curlArgs))
+	output, err := exec.Command("curl", curlArgs...).CombinedOutput()
+	// Without "-f", curl exits 0 as long as it gets a valid HTTP
+	// response from the server, even if the response status
+	// indicates that the request failed. In our test suite, we
+	// always expect a valid HTTP response, and we parse the
+	// headers ourselves. If curl exits non-zero, our testing
+	// environment is broken.
+	c.Assert(err, check.Equals, nil)
+	hdrsAndBody := strings.SplitN(string(output), "\r\n\r\n", 2)
+	c.Assert(len(hdrsAndBody), check.Equals, 2)
+	hdr = hdrsAndBody[0]
+	body = hdrsAndBody[1]
+	return
+}
+
+func (s *IntegrationSuite) SetUpSuite(c *check.C) {
+	arvadostest.StartAPI()
+	arvadostest.StartKeep()
+}
+
+func (s *IntegrationSuite) TearDownSuite(c *check.C) {
+	arvadostest.StopKeep()
+	arvadostest.StopAPI()
+}
+
+func (s *IntegrationSuite) SetUpTest(c *check.C) {
+	arvadostest.ResetEnv()
+	s.testServer = &server{}
+	var err error
+	address = "127.0.0.1:0"
+	err = s.testServer.Start()
+	c.Assert(err, check.Equals, nil)
+}
+
+func (s *IntegrationSuite) TearDownTest(c *check.C) {
+	var err error
+	if s.testServer != nil {
+		err = s.testServer.Close()
+	}
+	c.Check(err, check.Equals, nil)
+}
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+	check.TestingT(t)
+}

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list