diff --git a/e2e/vmss.go b/e2e/vmss.go index 3b1b9202d45..9ec402850b8 100644 --- a/e2e/vmss.go +++ b/e2e/vmss.go @@ -510,19 +510,31 @@ func skipTestIfSKUNotAvailableErr(t testing.TB, err error) { return } var respErr *azcore.ResponseError - if !errors.As(err, &respErr) || respErr.StatusCode != 409 { + if !errors.As(err, &respErr) { return } - // sometimes the SKU is not available and we can't do anything. Skip the test in this case. + // SKU not available in region (409) if respErr.ErrorCode == "SkuNotAvailable" { t.Skip("skipping scenario SKU not available", t.Name(), err) } - // sometimes the SKU quota is exceeded and we can't do anything. Skip the test in this case. + // Quota exceeded (409) if respErr.ErrorCode == "OperationNotAllowed" && strings.Contains(respErr.Error(), "exceeding approved") && strings.Contains(respErr.Error(), "quota") { t.Skip("skipping scenario SKU quota exceeded", t.Name(), err) } + // Allocation failures for GPU/exotic SKUs with limited availability. + // AllocationFailed returns HTTP 200 (not 409), so it won't be caught by status code checks. + // These can persist even after CreateVMSSWithRetry exhausts its retries. + if respErr.ErrorCode == "AllocationFailed" { + t.Skip("skipping scenario allocation failed", t.Name(), err) + } + // Zonal allocation constraints — no capacity in the requested zone (409) + if respErr.ErrorCode == "OverconstrainedZonalAllocationRequest" || + respErr.ErrorCode == "OverconstrainedAllocationRequest" || + respErr.ErrorCode == "ZonalAllocationFailed" { + t.Skip("skipping scenario zonal allocation constrained", t.Name(), err) + } } func cleanupVMSS(ctx context.Context, s *Scenario, vm *ScenarioVM) {