[ARVADOS] updated: 1.2.0-36-ga695e615b

Git user git at public.curoverse.com
Thu Aug 23 15:16:06 EDT 2018


Summary of changes:
 lib/dispatchcloud/azure.go      | 100 ++++++++++++++++++++++++++++++++--------
 lib/dispatchcloud/azure_test.go |  48 +++++++++++++++++++
 lib/dispatchcloud/provider.go   |  22 +++++++++
 3 files changed, 152 insertions(+), 18 deletions(-)

       via  a695e615b207f1cfe0cd1989e7b5f1b68c391329 (commit)
      from  71efe208264f60c03cbb1994b8e9e44dadddff0e (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit a695e615b207f1cfe0cd1989e7b5f1b68c391329
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Thu Aug 23 15:15:42 2018 -0400

    13964: Detect and report rate limit and quota exceeded errors
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/lib/dispatchcloud/azure.go b/lib/dispatchcloud/azure.go
index 195bd8182..7f7e34fb1 100644
--- a/lib/dispatchcloud/azure.go
+++ b/lib/dispatchcloud/azure.go
@@ -9,6 +9,8 @@ import (
 	"fmt"
 	"log"
 	"net/http"
+	"regexp"
+	"strconv"
 	"sync"
 	"time"
 
@@ -17,6 +19,7 @@ import (
 	"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-06-01/network"
 	storageacct "github.com/Azure/azure-sdk-for-go/services/storage/mgmt/2018-02-01/storage"
 	"github.com/Azure/azure-sdk-for-go/storage"
+	"github.com/Azure/go-autorest/autorest"
 	"github.com/Azure/go-autorest/autorest/azure"
 	"github.com/Azure/go-autorest/autorest/azure/auth"
 	"github.com/Azure/go-autorest/autorest/to"
@@ -60,23 +63,25 @@ func (cl *VirtualMachinesClientImpl) CreateOrUpdate(ctx context.Context,
 
 	future, err := cl.inner.CreateOrUpdate(ctx, resourceGroupName, VMName, parameters)
 	if err != nil {
-		return compute.VirtualMachine{}, err
+		return compute.VirtualMachine{}, WrapAzureError(err)
 	}
 	future.WaitForCompletionRef(ctx, cl.inner.Client)
-	return future.Result(cl.inner)
+	r, err := future.Result(cl.inner)
+	return r, WrapAzureError(err)
 }
 
 func (cl *VirtualMachinesClientImpl) Delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error) {
 	future, err := cl.inner.Delete(ctx, resourceGroupName, VMName)
 	if err != nil {
-		return nil, err
+		return nil, WrapAzureError(err)
 	}
 	err = future.WaitForCompletionRef(ctx, cl.inner.Client)
-	return future.Response(), err
+	return future.Response(), WrapAzureError(err)
 }
 
 func (cl *VirtualMachinesClientImpl) ListComplete(ctx context.Context, resourceGroupName string) (result compute.VirtualMachineListResultIterator, err error) {
-	return cl.inner.ListComplete(ctx, resourceGroupName)
+	r, err := cl.inner.ListComplete(ctx, resourceGroupName)
+	return r, WrapAzureError(err)
 }
 
 type InterfacesClientWrapper interface {
@@ -95,10 +100,10 @@ type InterfacesClientImpl struct {
 func (cl *InterfacesClientImpl) Delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error) {
 	future, err := cl.inner.Delete(ctx, resourceGroupName, VMName)
 	if err != nil {
-		return nil, err
+		return nil, WrapAzureError(err)
 	}
 	err = future.WaitForCompletionRef(ctx, cl.inner.Client)
-	return future.Response(), err
+	return future.Response(), WrapAzureError(err)
 }
 
 func (cl *InterfacesClientImpl) CreateOrUpdate(ctx context.Context,
@@ -108,14 +113,74 @@ func (cl *InterfacesClientImpl) CreateOrUpdate(ctx context.Context,
 
 	future, err := cl.inner.CreateOrUpdate(ctx, resourceGroupName, networkInterfaceName, parameters)
 	if err != nil {
-		return network.Interface{}, err
+		return network.Interface{}, WrapAzureError(err)
 	}
 	future.WaitForCompletionRef(ctx, cl.inner.Client)
-	return future.Result(cl.inner)
+	r, err := future.Result(cl.inner)
+	return r, WrapAzureError(err)
 }
 
 func (cl *InterfacesClientImpl) ListComplete(ctx context.Context, resourceGroupName string) (result network.InterfaceListResultIterator, err error) {
-	return cl.inner.ListComplete(ctx, resourceGroupName)
+	r, err := cl.inner.ListComplete(ctx, resourceGroupName)
+	return r, WrapAzureError(err)
+}
+
+var quotaRe = regexp.MustCompile(`(?i:exceed|quota|limit)`)
+
+type AzureRateLimitError struct {
+	azure.RequestError
+	earliestRetry time.Time
+}
+
+func (ar *AzureRateLimitError) EarliestRetry() time.Time {
+	return ar.earliestRetry
+}
+
+type AzureQuotaError struct {
+	azure.RequestError
+}
+
+func (ar *AzureQuotaError) IsQuotaError() bool {
+	return true
+}
+
+func WrapAzureError(err error) error {
+	de, ok := err.(autorest.DetailedError)
+	if !ok {
+		return err
+	}
+	rq, ok := de.Original.(*azure.RequestError)
+	if !ok {
+		return err
+	}
+	if rq.Response == nil {
+		return err
+	}
+	if rq.Response.StatusCode == 429 || len(rq.Response.Header["Retry-After"]) >= 1 {
+		// API throttling
+		ra := rq.Response.Header["Retry-After"][0]
+		earliestRetry, parseErr := http.ParseTime(ra)
+		if parseErr != nil {
+			// Could not parse as a timestamp, must be number of seconds
+			dur, parseErr := strconv.ParseInt(ra, 10, 64)
+			if parseErr != nil {
+				earliestRetry = time.Now().Add(time.Duration(dur) * time.Second)
+			}
+		}
+		if parseErr != nil {
+			// Couldn't make sense of retry-after,
+			// so set retry to 20 seconds
+			earliestRetry = time.Now().Add(20 * time.Second)
+		}
+		return &AzureRateLimitError{*rq, earliestRetry}
+	}
+	if rq.ServiceError == nil {
+		return err
+	}
+	if quotaRe.FindString(rq.ServiceError.Code) != "" || quotaRe.FindString(rq.ServiceError.Message) != "" {
+		return &AzureQuotaError{*rq}
+	}
+	return err
 }
 
 type AzureProvider struct {
@@ -214,7 +279,7 @@ func (az *AzureProvider) Create(ctx context.Context,
 	}
 	nic, err := az.netClient.CreateOrUpdate(ctx, az.azconfig.ResourceGroup, name+"-nic", nicParameters)
 	if err != nil {
-		return nil, err
+		return nil, WrapAzureError(err)
 	}
 
 	log.Printf("Created NIC %v", *nic.ID)
@@ -283,7 +348,7 @@ func (az *AzureProvider) Create(ctx context.Context,
 
 	vm, err := az.vmClient.CreateOrUpdate(ctx, az.azconfig.ResourceGroup, name, vmParameters)
 	if err != nil {
-		return nil, err
+		return nil, WrapAzureError(err)
 	}
 
 	return &AzureInstance{
@@ -302,14 +367,14 @@ func (az *AzureProvider) Instances(ctx context.Context) ([]Instance, error) {
 
 	result, err := az.vmClient.ListComplete(ctx, az.azconfig.ResourceGroup)
 	if err != nil {
-		return nil, err
+		return nil, WrapAzureError(err)
 	}
 
 	instances := make([]Instance, 0)
 
 	for ; result.NotDone(); err = result.Next() {
 		if err != nil {
-			return nil, err
+			return nil, WrapAzureError(err)
 		}
 		if result.Value().Tags["arvados-class"] != nil &&
 			result.Value().Tags["arvados-instance-type"] != nil &&
@@ -327,7 +392,7 @@ func (az *AzureProvider) Instances(ctx context.Context) ([]Instance, error) {
 func (az *AzureProvider) ManageNics(ctx context.Context) (map[string]network.Interface, error) {
 	result, err := az.netClient.ListComplete(ctx, az.azconfig.ResourceGroup)
 	if err != nil {
-		return nil, err
+		return nil, WrapAzureError(err)
 	}
 
 	interfaces := make(map[string]network.Interface)
@@ -360,7 +425,7 @@ func (az *AzureProvider) ManageNics(ctx context.Context) (map[string]network.Int
 	for ; result.NotDone(); err = result.Next() {
 		if err != nil {
 			log.Printf("Error listing nics: %v", err)
-			return interfaces, nil
+			return interfaces, WrapAzureError(nil)
 		}
 		if result.Value().Tags["arvados-class"] != nil &&
 			(*result.Value().Tags["arvados-class"]) == "crunch-dynamic-compute" {
@@ -481,8 +546,7 @@ func (ai *AzureInstance) GetTags() ([]InstanceTag, error) {
 
 func (ai *AzureInstance) Destroy(ctx context.Context) error {
 	_, err := ai.provider.vmClient.Delete(ctx, ai.provider.azconfig.ResourceGroup, *ai.vm.Name)
-	// check response code?
-	return err
+	return WrapAzureError(err)
 }
 
 func (ai *AzureInstance) Address() string {
diff --git a/lib/dispatchcloud/azure_test.go b/lib/dispatchcloud/azure_test.go
index bcba51bfd..0c8c8d534 100644
--- a/lib/dispatchcloud/azure_test.go
+++ b/lib/dispatchcloud/azure_test.go
@@ -14,6 +14,8 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/config"
 	"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-06-01/compute"
 	"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-06-01/network"
+	"github.com/Azure/go-autorest/autorest"
+	"github.com/Azure/go-autorest/autorest/azure"
 	"github.com/Azure/go-autorest/autorest/to"
 	check "gopkg.in/check.v1"
 )
@@ -156,3 +158,49 @@ func (*AzureProviderSuite) TestDestroyInstances(c *check.C) {
 		c.Check(i.Destroy(context.Background()), check.IsNil)
 	}
 }
+
+func (*AzureProviderSuite) TestDeleteFake(c *check.C) {
+	ap, _, _, err := GetProvider()
+	if err != nil {
+		c.Fatal("Error making provider", err)
+	}
+
+	_, err = ap.(*AzureProvider).netClient.Delete(context.Background(), "fakefakefake", "fakefakefake")
+
+	rq := err.(autorest.DetailedError).Original.(*azure.RequestError)
+
+	log.Printf("%v %q %q", rq.Response.StatusCode, rq.ServiceError.Code, rq.ServiceError.Message)
+}
+
+func (*AzureProviderSuite) TestWrapError(c *check.C) {
+	retryError := autorest.DetailedError{
+		Original: &azure.RequestError{
+			DetailedError: autorest.DetailedError{
+				Response: &http.Response{
+					StatusCode: 429,
+					Header:     map[string][]string{"Retry-After": []string{"123"}},
+				},
+			},
+			ServiceError: &azure.ServiceError{},
+		},
+	}
+	wrapped := WrapAzureError(retryError)
+	_, ok := wrapped.(RateLimitError)
+	c.Check(ok, check.Equals, true)
+
+	quotaError := autorest.DetailedError{
+		Original: &azure.RequestError{
+			DetailedError: autorest.DetailedError{
+				Response: &http.Response{
+					StatusCode: 503,
+				},
+			},
+			ServiceError: &azure.ServiceError{
+				Message: "No more quota",
+			},
+		},
+	}
+	wrapped = WrapAzureError(quotaError)
+	_, ok = wrapped.(QuotaError)
+	c.Check(ok, check.Equals, true)
+}
diff --git a/lib/dispatchcloud/provider.go b/lib/dispatchcloud/provider.go
index c5411128a..d8b1ad6c0 100644
--- a/lib/dispatchcloud/provider.go
+++ b/lib/dispatchcloud/provider.go
@@ -6,10 +6,32 @@ package dispatchcloud
 
 import (
 	"context"
+	"time"
 
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
 )
 
+// A RateLimitError should be returned by a Provider when the cloud
+// service indicates it is rejecting all API calls for some time
+// interval.
+type RateLimitError interface {
+	// Time before which the caller should expect requests to
+	// fail.
+	EarliestRetry() time.Time
+	error
+}
+
+// A QuotaError should be returned by a Provider when the cloud
+// service indicates the account cannot create more VMs than already
+// exist.
+type QuotaError interface {
+	// If true, don't create more instances until some existing
+	// instances are destroyed. If false, don't handle the error
+	// as a quota error.
+	IsQuotaError() bool
+	error
+}
+
 type InstanceTag string
 type InstanceID string
 type ImageID string

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list