From 06da40ec2ead45cadc1441c4f59ad4ad350528a6 Mon Sep 17 00:00:00 2001 From: Benjamin Ingberg Date: Fri, 29 May 2026 09:49:44 +0200 Subject: [PATCH 1/2] Add support for SplitBlob and SpliceBlob methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds support for the SplitBlob and SpliceBlob methods from the Remote Execution v2 (REv2) api. SplitBlob and SpliceBlob can be used to facilitate uploads and downloads of large files but a naïve implementation like this has some major drawbacks as well. The blobs must exist in both their chunked and non chunked form, which may significantly increase storage requirements for large blobs. The protocol gives no guarantee that a large blob stored in the CAS exists in its chunked form which forces you to perform a fairly heavy Split call that loads the entire large blob in order to decomposition it into its chunks. This implementation mostly exists as a stepping stone for a different implementation where Buildbarn internally manages all blobs as chunked blobs. --- MODULE.bazel | 1 + cmd/bb_storage/main.go | 31 +- go.mod | 1 + go.sum | 2 + pkg/blobstore/BUILD.bazel | 1 + pkg/blobstore/chunklistvalidating/BUILD.bazel | 43 ++ .../chunk_concatenating_reader.go | 68 +++ .../chunk_list_validating_blob_access.go | 371 +++++++++++++++ .../chunk_list_validating_blob_access_test.go | 410 ++++++++++++++++ pkg/blobstore/chunklistvalidating/chunker.go | 16 + .../fake_blob_access_test.go | 88 ++++ .../integration/BUILD.bazel | 19 + .../chunk_list_validating_integration_test.go | 445 ++++++++++++++++++ .../chunklistvalidating/reader_chunker.go | 53 +++ .../reader_chunker_test.go | 107 +++++ pkg/blobstore/cls_read_buffer_factory.go | 27 ++ pkg/blobstore/configuration/BUILD.bazel | 2 + .../configuration/blob_access_creator.go | 2 +- .../configuration/cas_blob_access_creator.go | 4 +- .../configuration/cls_blob_access_creator.go | 86 ++++ .../configuration/new_blob_access.go | 19 +- .../proto_blob_access_creator.go | 3 +- pkg/blobstore/grpcclients/BUILD.bazel | 1 + pkg/blobstore/grpcclients/cls_blob_access.go | 101 ++++ .../content_addressable_storage_server.go | 91 +++- ...content_addressable_storage_server_test.go | 134 +++++- .../configuration/bb_storage/bb_storage.pb.go | 54 ++- .../configuration/bb_storage/bb_storage.proto | 4 + pkg/proto/configuration/blobstore/BUILD.bazel | 2 + .../configuration/blobstore/blobstore.pb.go | 273 +++++++---- .../configuration/blobstore/blobstore.proto | 41 ++ 31 files changed, 2365 insertions(+), 135 deletions(-) create mode 100644 pkg/blobstore/chunklistvalidating/BUILD.bazel create mode 100644 pkg/blobstore/chunklistvalidating/chunk_concatenating_reader.go create mode 100644 pkg/blobstore/chunklistvalidating/chunk_list_validating_blob_access.go create mode 100644 pkg/blobstore/chunklistvalidating/chunk_list_validating_blob_access_test.go create mode 100644 pkg/blobstore/chunklistvalidating/chunker.go create mode 100644 pkg/blobstore/chunklistvalidating/fake_blob_access_test.go create mode 100644 pkg/blobstore/chunklistvalidating/integration/BUILD.bazel create mode 100644 pkg/blobstore/chunklistvalidating/integration/chunk_list_validating_integration_test.go create mode 100644 pkg/blobstore/chunklistvalidating/reader_chunker.go create mode 100644 pkg/blobstore/chunklistvalidating/reader_chunker_test.go create mode 100644 pkg/blobstore/cls_read_buffer_factory.go create mode 100644 pkg/blobstore/configuration/cls_blob_access_creator.go create mode 100644 pkg/blobstore/grpcclients/cls_blob_access.go diff --git a/MODULE.bazel b/MODULE.bazel index 16b0c02a2..13a03ec76 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -47,6 +47,7 @@ use_repo( "com_github_aws_aws_sdk_go_v2_service_sts", "com_github_bazelbuild_buildtools", "com_github_bazelbuild_remote_apis", + "com_github_buildbarn_go_cdc", "com_github_buildbarn_go_sha256tree", "com_github_fxtlabs_primes", "com_github_go_jose_go_jose_v3", diff --git a/cmd/bb_storage/main.go b/cmd/bb_storage/main.go index 192fcb5e4..91bdeb343 100644 --- a/cmd/bb_storage/main.go +++ b/cmd/bb_storage/main.go @@ -84,6 +84,27 @@ func main() { contentAddressableStorage = authorizedBackend } + // Chunk List Storage (CLS). + var chunkListStorage blobstore.BlobAccess + if configuration.ChunkListStorage != nil { + info, authorizedBackend, allAuthorizers, err := newScannableBlobAccess( + dependenciesGroup, + configuration.ChunkListStorage, + blobstore_configuration.NewCLSBlobAccessCreator( + contentAddressableStorageInfo, + grpcClientFactory, + int(configuration.MaximumMessageSizeBytes), + ), + grpcClientFactory, + ) + if err != nil { + return util.StatusWrap(err, "Failed to create Chunk Map") + } + cacheCapabilitiesProviders = append(cacheCapabilitiesProviders, info.BlobAccess) + cacheCapabilitiesAuthorizers = append(cacheCapabilitiesAuthorizers, allAuthorizers...) + chunkListStorage = authorizedBackend + } + // Action Cache (AC). var actionCache blobstore.BlobAccess if configuration.ActionCache != nil { @@ -193,12 +214,14 @@ func main() { configuration.GrpcServers, func(s grpc.ServiceRegistrar) { if contentAddressableStorage != nil { + contentAddressableStorageServer := grpcservers.NewContentAddressableStorageServer( + contentAddressableStorage, + chunkListStorage, + configuration.MaximumMessageSizeBytes, + ) remoteexecution.RegisterContentAddressableStorageServer( s, - grpcservers.NewContentAddressableStorageServer( - contentAddressableStorage, - configuration.MaximumMessageSizeBytes, - ), + contentAddressableStorageServer, ) bytestream.RegisterByteStreamServer( s, diff --git a/go.mod b/go.mod index be6f479e5..ffb113d4e 100644 --- a/go.mod +++ b/go.mod @@ -20,6 +20,7 @@ require ( github.com/bazelbuild/buildtools v0.0.0-20260527135131-3b47c424ecf5 github.com/bazelbuild/remote-apis v0.0.0-20260331222004-becdd8f9ff81 github.com/bazelbuild/rules_go v0.60.0 + github.com/buildbarn/go-cdc v0.0.9 github.com/buildbarn/go-sha256tree v0.0.0-20250310211320-0f70f20e855b github.com/fxtlabs/primes v0.0.0-20150821004651-dad82d10a449 github.com/go-jose/go-jose/v3 v3.0.5 diff --git a/go.sum b/go.sum index 1e8199db4..8cd5645b3 100644 --- a/go.sum +++ b/go.sum @@ -77,6 +77,8 @@ github.com/bazelbuild/rules_go v0.60.0/go.mod h1:CYcohJVxs4n7eftbC39GCqaEJm3E1EM github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/buildbarn/go-cdc v0.0.9 h1:bWfgn92ed8Oo2zZKJdMAfB0APGz7Q8zvnqUn3hPuihM= +github.com/buildbarn/go-cdc v0.0.9/go.mod h1:KUMqSMvoRlby3uak9aKIvgz3KgNqwm2CMUoVX1EDr8k= github.com/buildbarn/go-sha256tree v0.0.0-20250310211320-0f70f20e855b h1:IKUxixGBm9UxobU7c248z0BF0ojG19uoSLz8MFZM/KA= github.com/buildbarn/go-sha256tree v0.0.0-20250310211320-0f70f20e855b/go.mod h1:e7g3/yWApcg+PpDqd4eQEEV8pexQmfCgK3frP+1Wuvk= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= diff --git a/pkg/blobstore/BUILD.bazel b/pkg/blobstore/BUILD.bazel index 45d054538..80a4a60d5 100644 --- a/pkg/blobstore/BUILD.bazel +++ b/pkg/blobstore/BUILD.bazel @@ -9,6 +9,7 @@ go_library( "authorizing_blob_access.go", "blob_access.go", "cas_read_buffer_factory.go", + "cls_read_buffer_factory.go", "deadline_enforcing_blob_access.go", "demultiplexing_blob_access.go", "empty_blob_injecting_blob_access.go", diff --git a/pkg/blobstore/chunklistvalidating/BUILD.bazel b/pkg/blobstore/chunklistvalidating/BUILD.bazel new file mode 100644 index 000000000..1bb49c7f0 --- /dev/null +++ b/pkg/blobstore/chunklistvalidating/BUILD.bazel @@ -0,0 +1,43 @@ +load("@rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "chunklistvalidating", + srcs = [ + "chunk_concatenating_reader.go", + "chunk_list_validating_blob_access.go", + "chunker.go", + "reader_chunker.go", + ], + importpath = "github.com/buildbarn/bb-storage/pkg/blobstore/chunklistvalidating", + visibility = ["//visibility:public"], + deps = [ + "//pkg/blobstore", + "//pkg/blobstore/buffer", + "//pkg/digest", + "//pkg/util", + "@bazel_remote_apis//build/bazel/remote/execution/v2:remote_execution_go_proto", + "@com_github_buildbarn_go_cdc//:go-cdc", + "@org_golang_google_grpc//codes", + "@org_golang_google_grpc//status", + "@org_golang_x_sync//errgroup", + ], +) + +go_test( + name = "chunklistvalidating_test", + srcs = [ + "chunk_list_validating_blob_access_test.go", + "fake_blob_access_test.go", + "reader_chunker_test.go", + ], + deps = [ + ":chunklistvalidating", + "//pkg/blobstore", + "//pkg/blobstore/buffer", + "//pkg/digest", + "@bazel_remote_apis//build/bazel/remote/execution/v2:remote_execution_go_proto", + "@com_github_stretchr_testify//require", + "@org_golang_google_grpc//codes", + "@org_golang_google_grpc//status", + ], +) diff --git a/pkg/blobstore/chunklistvalidating/chunk_concatenating_reader.go b/pkg/blobstore/chunklistvalidating/chunk_concatenating_reader.go new file mode 100644 index 000000000..643af130e --- /dev/null +++ b/pkg/blobstore/chunklistvalidating/chunk_concatenating_reader.go @@ -0,0 +1,68 @@ +package chunklistvalidating + +import ( + "context" + "io" + + "github.com/buildbarn/bb-storage/pkg/blobstore" + "github.com/buildbarn/bb-storage/pkg/digest" + "github.com/buildbarn/bb-storage/pkg/util" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +// chunkConcatenatingReader is a helper utility that implements the +// io.ReadCloser api over a series of digest.Digest objectes fetched +// sequentially from the CAS. +type chunkConcatenatingReader struct { + ctx context.Context + contentAddressableStorage blobstore.BlobAccess + chunkDigests []digest.Digest + currentIndex int + currentReader io.ReadCloser + closed bool +} + +func (r *chunkConcatenatingReader) Read(p []byte) (int, error) { + if r.closed { + return 0, status.Error(codes.Internal, "Reader is already closed") + } + for { + if r.currentReader == nil { + if r.currentIndex >= len(r.chunkDigests) { + return 0, io.EOF + } + chunkDigest := r.chunkDigests[r.currentIndex] + b := r.contentAddressableStorage.Get(r.ctx, chunkDigest) + r.currentReader = b.ToReader() + r.currentIndex++ + } + + n, err := r.currentReader.Read(p) + if n > 0 { + return n, nil + } + if err == io.EOF { + err = r.currentReader.Close() + r.currentReader = nil + if err != nil { + return 0, err + } + continue + } + if err != nil { + _ = r.currentReader.Close() + r.currentReader = nil + return 0, util.StatusWrap(err, "Failed to read chunk") + } + } +} + +func (r *chunkConcatenatingReader) Close() (err error) { + r.closed = true + if r.currentReader != nil { + err = r.currentReader.Close() + r.currentReader = nil + } + return err +} diff --git a/pkg/blobstore/chunklistvalidating/chunk_list_validating_blob_access.go b/pkg/blobstore/chunklistvalidating/chunk_list_validating_blob_access.go new file mode 100644 index 000000000..807227f9f --- /dev/null +++ b/pkg/blobstore/chunklistvalidating/chunk_list_validating_blob_access.go @@ -0,0 +1,371 @@ +package chunklistvalidating + +import ( + "context" + "io" + + remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" + "github.com/buildbarn/bb-storage/pkg/blobstore" + "github.com/buildbarn/bb-storage/pkg/blobstore/buffer" + "github.com/buildbarn/bb-storage/pkg/digest" + "github.com/buildbarn/bb-storage/pkg/util" + "golang.org/x/sync/errgroup" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +type chunkListValidatingBlobAccess struct { + blobstore.BlobAccess + contentAddressableStorage blobstore.BlobAccess + maximumMessageSizeBytes int +} + +// NewChunkListValidatingBlobAccess creates a wrapper around a Chunk +// List Storage (CLS) that ensures only valid chunk lists are stored in +// the CLS. A valid chunk list is a chunk list which follows the +// chunking parameters, has all the chunks present in the Content +// Addressable Storage (CAS) and where the chunks concatenate into the +// appropriate digest. +// +// This validation is fairly expensive and validation should only be +// done at a single layer as close as possible to the CAS where the full +// view of the CAS is available. +func NewChunkListValidatingBlobAccess(chunkListStorage, contentAddressableStorage blobstore.BlobAccess, maximumMessageSizeBytes int) blobstore.BlobAccess { + return &chunkListValidatingBlobAccess{ + BlobAccess: chunkListStorage, + contentAddressableStorage: contentAddressableStorage, + maximumMessageSizeBytes: maximumMessageSizeBytes, + } +} + +// Fetch the chunking parameters from the GetCapabilities +// implementation. +func (ba *chunkListValidatingBlobAccess) getValidChunkingParameters(ctx context.Context, instanceName digest.InstanceName) (*remoteexecution.RepMaxCdcParams, error) { + capabilities, err := ba.BlobAccess.GetCapabilities(ctx, instanceName) + if err != nil { + return nil, util.StatusWrap(err, "Unable to GetCapabilities to determine chunking parameters") + } + + params := capabilities.GetCacheCapabilities().GetRepMaxCdcParams() + if params == nil { + return nil, status.Error(codes.Unimplemented, "This backend only supports upstream servers with rep max cdc support.") + } + if params.MinChunkSizeBytes < 64 { + return nil, status.Errorf(codes.Internal, "MinChunkSizeBytes was %d but a minimum of 64 is required.", params.MinChunkSizeBytes) + } + maxMinChunkSize := (ba.maximumMessageSizeBytes + 1) / 2 + if params.MinChunkSizeBytes > uint64(maxMinChunkSize) { + return nil, status.Errorf(codes.Internal, "MinChunkSizeBytes was %d but a maximum of %d is supported with the configured maximum message size.", params.MinChunkSizeBytes, maxMinChunkSize) + } + + return params, nil +} + +// Check the downstream blob access if this particular blob has already +// been split. If that's the case and all the chunks are still there we +// can return early. In case of errors we will return nil and continue +// with the regular code path. +func (ba *chunkListValidatingBlobAccess) checkSplitResult(ctx context.Context, d digest.Digest) buffer.Buffer { + b1, b2 := ba.BlobAccess.Get(ctx, d).CloneCopy(ba.maximumMessageSizeBytes) + responseMsg, err := b1.ToProto(&remoteexecution.SplitBlobResponse{}, ba.maximumMessageSizeBytes) + if err != nil { + b2.Discard() + return nil + } + + splitBlobResponse := responseMsg.(*remoteexecution.SplitBlobResponse) + digestFunction := d.GetDigestFunction() + digestSetBuilder := digest.NewSetBuilder(len(splitBlobResponse.ChunkDigests)) + digestSetBuilder.Add(d) + + for _, chunkDigestProto := range splitBlobResponse.ChunkDigests { + chunkDigest, err := digestFunction.NewDigestFromProto(chunkDigestProto) + if err != nil { + b2.Discard() + return nil + } + digestSetBuilder.Add(chunkDigest) + } + + missing, err := ba.contentAddressableStorage.FindMissing(ctx, digestSetBuilder.Build()) + if err == nil && missing.Empty() { + return b2 + } + b2.Discard() + return nil +} + +// Get returns a valid SplitResult for the given digest chunking the +// blob and storing the chunk list if needed. +func (ba *chunkListValidatingBlobAccess) Get(ctx context.Context, d digest.Digest) buffer.Buffer { + params, err := ba.getValidChunkingParameters(ctx, d.GetInstanceName()) + if err != nil { + return buffer.NewBufferFromError(err) + } + + // Check for the trivial case, the blob is small enough that it will + // always decompose to a single chunk of the same size as the + // original blob. We verify the existence of the blob in CAS and + // break out early. + blobSize := d.GetSizeBytes() + if uint64(blobSize) < 2*params.MinChunkSizeBytes { + missing, err := ba.contentAddressableStorage.FindMissing(ctx, d.ToSingletonSet()) + if err != nil { + return buffer.NewBufferFromError(util.StatusWrap(err, "Failed to verify blob existence")) + } + if !missing.Empty() { + return buffer.NewBufferFromError(status.Error(codes.NotFound, "Blob not found in CAS")) + } + + response := &remoteexecution.SplitBlobResponse{ + ChunkDigests: []*remoteexecution.Digest{d.GetProto()}, + ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, + } + + return buffer.NewProtoBufferFromProto(response, buffer.UserProvided) + } + + // Check if we have already computed the result for this blob. + if result := ba.checkSplitResult(ctx, d); result != nil { + return result + } + + // Fallthrough case, compute the chunk list, upload the chunks and + // store the chunk list. + blobReader := ba.contentAddressableStorage.Get(ctx, d).ToReader() + defer blobReader.Close() + chunker := NewReaderChunker(d.GetDigestFunction(), blobReader, int64(params.MinChunkSizeBytes), int64(params.HorizonSizeBytes)) + + chunkDigests := make([]*remoteexecution.Digest, 0, uint64(blobSize)/params.MinChunkSizeBytes+1) + + for { + chunk, err := chunker.NextChunk() + if err == io.EOF { + break + } + if err != nil { + return buffer.NewBufferFromError(err) + } + + missing, err := ba.contentAddressableStorage.FindMissing(ctx, chunk.Digest.ToSingletonSet()) + if err != nil { + return buffer.NewBufferFromError(err) + } + if !missing.Empty() { + if err := ba.contentAddressableStorage.Put(ctx, chunk.Digest, buffer.NewValidatedBufferFromByteSlice(chunk.Data)); err != nil { + return buffer.NewBufferFromError(err) + } + } + + chunkDigests = append(chunkDigests, chunk.Digest.GetProto()) + } + + response := &remoteexecution.SplitBlobResponse{ + ChunkDigests: chunkDigests, + ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, + } + + b1, b2 := buffer.NewProtoBufferFromProto(response, buffer.UserProvided).CloneCopy(ba.maximumMessageSizeBytes) + + if err := ba.BlobAccess.Put(ctx, d, b1); err != nil { + b2.Discard() + return buffer.NewBufferFromError(util.StatusWrap(err, "Failed to store the split blob response")) + } + + return b2 +} + +func (ba *chunkListValidatingBlobAccess) matchesStoredChunkList(ctx context.Context, d digest.Digest, userResponse *remoteexecution.SplitBlobResponse) bool { + existingMsg, err := ba.BlobAccess.Get(ctx, d).ToProto(&remoteexecution.SplitBlobResponse{}, ba.maximumMessageSizeBytes) + if err != nil { + return false + } + + cachedResponse := existingMsg.(*remoteexecution.SplitBlobResponse) + if len(cachedResponse.ChunkDigests) == len(userResponse.ChunkDigests) { + for i, c := range cachedResponse.ChunkDigests { + u := userResponse.ChunkDigests[i] + if u.Hash != c.Hash || u.SizeBytes != c.SizeBytes { + return false + } + } + } + + return true +} + +func (ba *chunkListValidatingBlobAccess) Put(ctx context.Context, d digest.Digest, b buffer.Buffer) error { + // Parse the buffer as a SplitBlobResponse + msg, err := b.ToProto(&remoteexecution.SplitBlobResponse{}, ba.maximumMessageSizeBytes) + if err != nil { + return util.StatusWrap(err, "Failed to parse input as SplitBlobResponse") + } + userResponse := msg.(*remoteexecution.SplitBlobResponse) + + digestFunction := d.GetDigestFunction() + var userChunks []digest.Digest + digestSetBuilder := digest.NewSetBuilder(len(userResponse.ChunkDigests)) + for _, chunkDigestProto := range userResponse.ChunkDigests { + chunkDigest, err := digestFunction.NewDigestFromProto(chunkDigestProto) + if err != nil { + return status.Errorf(codes.InvalidArgument, "Invalid chunk digest: %v", err) + } + digestSetBuilder.Add(chunkDigest) + userChunks = append(userChunks, chunkDigest) + } + + // Check that all referenced chunks are present in storage. + missing, err := ba.contentAddressableStorage.FindMissing(ctx, digestSetBuilder.Build()) + if err != nil { + return util.StatusWrap(err, "Failed to check existence of chunks") + } + if !missing.Empty() { + return status.Error(codes.NotFound, "At least one chunk in the chunk list was not found") + } + + // Check the trivial cases without hitting the downstream blob + // stores. + + // No chunks given, blob must be the empty blob. + if len(userChunks) == 0 { + if d.GetSizeBytes() != 0 { + return status.Error(codes.InvalidArgument, "Chunk list does not compose to blob") + } + if d.GetDigestFunction().NewGenerator(0).Sum() != d { + return status.Error(codes.InvalidArgument, "Chunk list does not compose to blob") + } + return nil + } + // Single chunk given, the blob must be equal to the chunk. At this + // point we have already verified the presence of the chunk so we do + // not have to verify the presence of the blob. + if len(userChunks) == 1 { + if d != userChunks[0] { + return status.Error(codes.InvalidArgument, "Chunk list does not compose to blob") + } + return nil + } + + chunksMatchesStoredLists := ba.matchesStoredChunkList(ctx, d, userResponse) + missing, err = ba.contentAddressableStorage.FindMissing(ctx, d.ToSingletonSet()) + if err != nil { + return util.StatusWrap(err, "Failed to check existence of blob") + } + blobExistsInCAS := missing.Empty() + + // The request is identical to an already existing chunk list with + // content we have verified exists in CAS. + if blobExistsInCAS && chunksMatchesStoredLists { + return nil + } + + // No more shortcuts available go through the heavy path of + // concatenating/verifying and chunking the blobs. + params, err := ba.getValidChunkingParameters(ctx, d.GetInstanceName()) + if err != nil { + return err + } + + reader := &chunkConcatenatingReader{ + ctx: ctx, + contentAddressableStorage: ba.contentAddressableStorage, + chunkDigests: userChunks, + } + + blobBuffer := buffer.NewCASBufferFromReader(d, reader, buffer.UserProvided) + b1, b2 := blobBuffer.CloneStream() + + // Stream 1: Uploads the blob to CAS. + group, gCtx := errgroup.WithContext(ctx) + group.Go(func() error { + if blobExistsInCAS { + // Upload unnecessary, blob already exists in CAS. + b1.Discard() + return nil + } + return ba.contentAddressableStorage.Put(gCtx, d, b1) + }) + + // Stream 2: Chunk the stream to compute the digest and cache the + // canonical chunks. + var canonicalChunkDigests []*remoteexecution.Digest + group.Go(func() error { + b2Reader := b2.ToReader() + defer b2Reader.Close() + chunker := NewReaderChunker(d.GetDigestFunction(), b2Reader, int64(params.MinChunkSizeBytes), int64(params.HorizonSizeBytes)) + for { + chunk, err := chunker.NextChunk() + if err == io.EOF { + return nil + } + if err != nil { + return err + } + + missing, err := ba.contentAddressableStorage.FindMissing(gCtx, chunk.Digest.ToSingletonSet()) + if err != nil { + return err + } + if !missing.Empty() { + if err := ba.contentAddressableStorage.Put(gCtx, chunk.Digest, buffer.NewValidatedBufferFromByteSlice(chunk.Data)); err != nil { + return util.StatusWrap(err, "Failed to save chunk") + } + } + canonicalChunkDigests = append(canonicalChunkDigests, chunk.Digest.GetProto()) + } + }) + + // Wait for the full blob validation and upload to complete. + if err := group.Wait(); err != nil { + return util.StatusWrap(err, "Failed to splice the blob") + } + + // Store the canonical response. + canonicalResponse := &remoteexecution.SplitBlobResponse{ + ChunkDigests: canonicalChunkDigests, + ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, + } + canonicalBuffer := buffer.NewProtoBufferFromProto(canonicalResponse, buffer.UserProvided) + if err := ba.BlobAccess.Put(ctx, d, canonicalBuffer); err != nil { + return util.StatusWrap(err, "Failed to save canonical chunk list") + } + return nil +} + +func (ba *chunkListValidatingBlobAccess) findMissingChunks(ctx context.Context, d digest.Digest) (digest.Set, error) { + splitBlobResponseProto, err := ba.BlobAccess.Get(ctx, d).ToProto(&remoteexecution.SplitBlobResponse{}, ba.maximumMessageSizeBytes) + if err != nil { + return digest.EmptySet, err + } + splitBlobResponse := splitBlobResponseProto.(*remoteexecution.SplitBlobResponse) + digestFunction := d.GetDigestFunction() + builder := digest.NewSetBuilder(len(splitBlobResponse.ChunkDigests)) + for _, chunkDigestProto := range splitBlobResponse.ChunkDigests { + chunkDigest, err := digestFunction.NewDigestFromProto(chunkDigestProto) + if err != nil { + return digest.EmptySet, util.StatusWrapf(err, "Invalid chunk digest %#v", chunkDigestProto) + } + builder.Add(chunkDigest) + } + return ba.contentAddressableStorage.FindMissing(ctx, builder.Build()) +} + +func (ba *chunkListValidatingBlobAccess) FindMissing(ctx context.Context, digests digest.Set) (digest.Set, error) { + missingBlobs, err := ba.BlobAccess.FindMissing(ctx, digests) + if err != nil { + return digest.EmptySet, err + } + nonMissingBlobs, _, _ := digest.GetDifferenceAndIntersection(digests, missingBlobs) + missings := make([]digest.Set, 1, 1+nonMissingBlobs.Length()) + missings[0] = missingBlobs + for _, d := range nonMissingBlobs.Items() { + missingChunks, err := ba.findMissingChunks(ctx, d) + if err != nil { + return digest.EmptySet, err + } + if !missingChunks.Empty() { + missings = append(missings, d.ToSingletonSet()) + } + } + return digest.GetUnion(missings), nil +} diff --git a/pkg/blobstore/chunklistvalidating/chunk_list_validating_blob_access_test.go b/pkg/blobstore/chunklistvalidating/chunk_list_validating_blob_access_test.go new file mode 100644 index 000000000..3830e720b --- /dev/null +++ b/pkg/blobstore/chunklistvalidating/chunk_list_validating_blob_access_test.go @@ -0,0 +1,410 @@ +package chunklistvalidating_test + +import ( + "bytes" + "context" + "testing" + + remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" + "github.com/buildbarn/bb-storage/pkg/blobstore/buffer" + "github.com/buildbarn/bb-storage/pkg/blobstore/chunklistvalidating" + "github.com/buildbarn/bb-storage/pkg/digest" + "github.com/stretchr/testify/require" + + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +// mustComputeDigest is a test helper to easily generate digests from +// byte slices. +func mustComputeDigest(t *testing.T, digestFunction digest.Function, data []byte) digest.Digest { + t.Helper() + generator := digestFunction.NewGenerator(int64(len(data))) + _, err := generator.Write(data) + require.NoError(t, err) + return generator.Sum() +} + +var testCDCParams = &remoteexecution.RepMaxCdcParams{ + MinChunkSizeBytes: 1024, + HorizonSizeBytes: 8 * 1024, +} +var maximumMessageSizeBytes = 1024 * 1024 + +func TestChunkListValidatingBlobAccessGetTrivialSmallBlob(t *testing.T) { + ctx := context.Background() + + fakeCAS := newFakeBlobAccess(nil) + fakeCLS := newFakeBlobAccess(testCDCParams) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + + digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) + blobData := []byte("Small trivial blob") + blobDigest := mustComputeDigest(t, digestFunction, blobData) + + require.NoError(t, fakeCAS.Put(ctx, blobDigest, buffer.NewValidatedBufferFromByteSlice(blobData))) + + fakeCAS.ResetTouches() + msg, err := validatingCLS.Get(ctx, blobDigest).ToProto(&remoteexecution.SplitBlobResponse{}, maximumMessageSizeBytes) + require.NoError(t, err) + + splitResponse := msg.(*remoteexecution.SplitBlobResponse) + require.Len(t, splitResponse.ChunkDigests, 1) + require.Equal(t, blobDigest.GetProto().Hash, splitResponse.ChunkDigests[0].Hash) + require.Greater(t, fakeCAS.GetTouches(blobDigest), 0, "Blob did not have its lifetime renewed.") +} + +func TestChunkListValidatingBlobAccessGetLargeBlob(t *testing.T) { + ctx := context.Background() + + fakeCAS := newFakeBlobAccess(nil) + fakeCLS := newFakeBlobAccess(testCDCParams) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + + digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) + blobData := bytes.Repeat([]byte("test_data_pattern_"), 6000) + blobDigest := mustComputeDigest(t, digestFunction, blobData) + + require.NoError(t, fakeCAS.Put(ctx, blobDigest, buffer.NewValidatedBufferFromByteSlice(blobData))) + + msg, err := validatingCLS.Get(ctx, blobDigest).ToProto(&remoteexecution.SplitBlobResponse{}, maximumMessageSizeBytes) + require.NoError(t, err) + + splitResponse := msg.(*remoteexecution.SplitBlobResponse) + require.Greater(t, len(splitResponse.ChunkDigests), 1, "Blob should have been divided into multiple chunks.") + + for _, chunkProto := range splitResponse.ChunkDigests { + chunkDigest, err := digestFunction.NewDigestFromProto(chunkProto) + require.NoError(t, err) + + require.Greater(t, fakeCAS.GetTouches(chunkDigest), 0, "Chunk generated by CDC did not have its lifetime renewed.") + missing, err := fakeCAS.FindMissing(ctx, chunkDigest.ToSingletonSet()) + require.NoError(t, err) + require.True(t, missing.Empty(), "Chunk generated by CDC was not saved to the CAS.") + } + + cachedMsg, err := fakeCLS.Get(ctx, blobDigest).ToProto(&remoteexecution.SplitBlobResponse{}, maximumMessageSizeBytes) + require.NoError(t, err) + require.Equal(t, len(splitResponse.ChunkDigests), len(cachedMsg.(*remoteexecution.SplitBlobResponse).ChunkDigests)) +} + +func TestChunkListValidatingBlobAccessGetExtendsLifetimes(t *testing.T) { + ctx := context.Background() + + fakeCAS := newFakeBlobAccess(nil) + fakeCLS := newFakeBlobAccess(testCDCParams) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + + digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) + blobData := bytes.Repeat([]byte("test_data_pattern_"), 6000) // ~108KB + blobDigest := mustComputeDigest(t, digestFunction, blobData) + require.NoError(t, fakeCAS.Put(ctx, blobDigest, buffer.NewValidatedBufferFromByteSlice(blobData))) + + // Split the blob to populate the CAS and CLS for this blob. + msg, err := validatingCLS.Get(ctx, blobDigest).ToProto(&remoteexecution.SplitBlobResponse{}, maximumMessageSizeBytes) + require.NoError(t, err) + splitResponse := msg.(*remoteexecution.SplitBlobResponse) + + // Reset touches. + fakeCAS.ResetTouches() + + // Perform a cached split. + msgCached, err := validatingCLS.Get(ctx, blobDigest).ToProto(&remoteexecution.SplitBlobResponse{}, maximumMessageSizeBytes) + require.NoError(t, err) + cachedResponse := msgCached.(*remoteexecution.SplitBlobResponse) + require.Equal(t, len(splitResponse.ChunkDigests), len(cachedResponse.ChunkDigests)) + + // The original blob MUST have had its lifetime extended + require.Greater(t, fakeCAS.GetTouches(blobDigest), 0, "Original blob's lifetime was not extended during call to SplitBlob") + require.Greater(t, fakeCLS.GetTouches(blobDigest), 0, "Original blob's chunk list lifetime was not extended during call to SplitBlob") + + // Every chunk MUST have been touched in the CAS + for _, chunkProto := range cachedResponse.ChunkDigests { + chunkDigest, err := digestFunction.NewDigestFromProto(chunkProto) + require.NoError(t, err) + require.Greater(t, fakeCAS.GetTouches(chunkDigest), 0, "Chunk's lifetime was not extended during call to SplitBlob") + } +} + +func TestChunkListValidatingBlobAccessPutManualSplice(t *testing.T) { + ctx := context.Background() + + fakeCAS := newFakeBlobAccess(nil) + fakeCLS := newFakeBlobAccess(testCDCParams) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + + digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) + + chunk1Data := []byte("Hello, ") + chunk1Digest := mustComputeDigest(t, digestFunction, chunk1Data) + require.NoError(t, fakeCAS.Put(ctx, chunk1Digest, buffer.NewValidatedBufferFromByteSlice(chunk1Data))) + + chunk2Data := []byte("World!") + chunk2Digest := mustComputeDigest(t, digestFunction, chunk2Data) + require.NoError(t, fakeCAS.Put(ctx, chunk2Digest, buffer.NewValidatedBufferFromByteSlice(chunk2Data))) + + expectedFullData := []byte("Hello, World!") + fullBlobDigest := mustComputeDigest(t, digestFunction, expectedFullData) + + splitResponse := &remoteexecution.SplitBlobResponse{ + ChunkDigests: []*remoteexecution.Digest{ + chunk1Digest.GetProto(), + chunk2Digest.GetProto(), + }, + } + reqBuffer := buffer.NewProtoBufferFromProto(splitResponse, buffer.UserProvided) + + err := validatingCLS.Put(ctx, fullBlobDigest, reqBuffer) + require.NoError(t, err) + + composedData, err := fakeCAS.Get(ctx, fullBlobDigest).ToByteSlice(len(expectedFullData)) + require.NoError(t, err) + require.Equal(t, expectedFullData, composedData) +} + +func TestChunkListValidatingBlobAccessPutCanonicalization(t *testing.T) { + ctx := context.Background() + + fakeCAS := newFakeBlobAccess(nil) + fakeCLS := newFakeBlobAccess(testCDCParams) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + + digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) + + blobData := bytes.Repeat([]byte("canonicalization_test_data"), 4000) // ~104KB + chunk1Data := blobData[:10] + chunk2Data := blobData[10:] + + chunk1Digest := mustComputeDigest(t, digestFunction, chunk1Data) + require.NoError(t, fakeCAS.Put(ctx, chunk1Digest, buffer.NewValidatedBufferFromByteSlice(chunk1Data))) + + chunk2Digest := mustComputeDigest(t, digestFunction, chunk2Data) + require.NoError(t, fakeCAS.Put(ctx, chunk2Digest, buffer.NewValidatedBufferFromByteSlice(chunk2Data))) + + fullBlobDigest := mustComputeDigest(t, digestFunction, blobData) + + splitResponse := &remoteexecution.SplitBlobResponse{ + ChunkDigests: []*remoteexecution.Digest{ + chunk1Digest.GetProto(), + chunk2Digest.GetProto(), + }, + } + reqBuffer := buffer.NewProtoBufferFromProto(splitResponse, buffer.UserProvided) + + err := validatingCLS.Put(ctx, fullBlobDigest, reqBuffer) + require.NoError(t, err) + + composedData, err := fakeCAS.Get(ctx, fullBlobDigest).ToByteSlice(200000) + require.NoError(t, err) + require.Equal(t, blobData, composedData) + + canonicalBuffer := fakeCLS.Get(ctx, fullBlobDigest) + canonicalProto, err := canonicalBuffer.ToProto(&remoteexecution.SplitBlobResponse{}, maximumMessageSizeBytes) + require.NoError(t, err) + + canonicalResp := canonicalProto.(*remoteexecution.SplitBlobResponse) + require.Greater(t, len(canonicalResp.ChunkDigests), 0) + require.NotEqual(t, chunk1Digest.GetProto().Hash, canonicalResp.ChunkDigests[0].Hash, "Server should not have echoed back the non-standard chunks") +} + +func TestChunkListValidatingBlobAccessPutMissingChunk(t *testing.T) { + ctx := context.Background() + + fakeCAS := newFakeBlobAccess(nil) + fakeCLS := newFakeBlobAccess(testCDCParams) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + + digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) + chunkDigest := mustComputeDigest(t, digestFunction, []byte("ghost")) + + splitResponse := &remoteexecution.SplitBlobResponse{ + ChunkDigests: []*remoteexecution.Digest{chunkDigest.GetProto()}, + } + reqBuffer := buffer.NewProtoBufferFromProto(splitResponse, buffer.UserProvided) + + err := validatingCLS.Put(ctx, chunkDigest, reqBuffer) + require.Error(t, err) + require.Equal(t, codes.NotFound, status.Code(err)) +} + +func TestChunkListValidatingBlobAccessPutDigestMismatch(t *testing.T) { + ctx := context.Background() + + fakeCAS := newFakeBlobAccess(nil) + fakeCLS := newFakeBlobAccess(testCDCParams) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + + digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) + + chunkData := []byte("Valid chunk data") + chunkDigest := mustComputeDigest(t, digestFunction, chunkData) + require.NoError(t, fakeCAS.Put(ctx, chunkDigest, buffer.NewValidatedBufferFromByteSlice(chunkData))) + + wrongBlobDigest := mustComputeDigest(t, digestFunction, []byte("Different data")) + + splitResponse := &remoteexecution.SplitBlobResponse{ + ChunkDigests: []*remoteexecution.Digest{chunkDigest.GetProto()}, + } + reqBuffer := buffer.NewProtoBufferFromProto(splitResponse, buffer.UserProvided) + + err := validatingCLS.Put(ctx, wrongBlobDigest, reqBuffer) + require.Error(t, err) + require.Contains(t, err.Error(), "does not compose to blob") +} + +func TestChunkListValidatingBlobAccessPutEmptyBlob(t *testing.T) { + ctx := context.Background() + + fakeCAS := newFakeBlobAccess(nil) + fakeCLS := newFakeBlobAccess(testCDCParams) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + + digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) + emptyDigest := mustComputeDigest(t, digestFunction, nil) + + splitResponse := &remoteexecution.SplitBlobResponse{ + ChunkDigests: []*remoteexecution.Digest{}, + } + reqBuffer := buffer.NewProtoBufferFromProto(splitResponse, buffer.UserProvided) + + err := validatingCLS.Put(ctx, emptyDigest, reqBuffer) + require.NoError(t, err) +} + +func TestChunkListValidatingBlobAccessPutRepeatedChunks(t *testing.T) { + ctx := context.Background() + + fakeCAS := newFakeBlobAccess(nil) + fakeCLS := newFakeBlobAccess(testCDCParams) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + + digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) + + chunkA := []byte("A") + digestA := mustComputeDigest(t, digestFunction, chunkA) + require.NoError(t, fakeCAS.Put(ctx, digestA, buffer.NewValidatedBufferFromByteSlice(chunkA))) + + chunkB := []byte("B") + digestB := mustComputeDigest(t, digestFunction, chunkB) + require.NoError(t, fakeCAS.Put(ctx, digestB, buffer.NewValidatedBufferFromByteSlice(chunkB))) + + expectedData := []byte("AABA") + expectedDigest := mustComputeDigest(t, digestFunction, expectedData) + + splitResponse := &remoteexecution.SplitBlobResponse{ + ChunkDigests: []*remoteexecution.Digest{ + digestA.GetProto(), + digestA.GetProto(), + digestB.GetProto(), + digestA.GetProto(), + }, + } + reqBuffer := buffer.NewProtoBufferFromProto(splitResponse, buffer.UserProvided) + + err := validatingCLS.Put(ctx, expectedDigest, reqBuffer) + require.NoError(t, err) + + composedData, err := fakeCAS.Get(ctx, expectedDigest).ToByteSlice(100) + require.NoError(t, err) + require.Equal(t, expectedData, composedData) +} + +func TestChunkListValidatingBlobAccessPutInlineEmptyChunk(t *testing.T) { + ctx := context.Background() + + fakeCAS := newFakeBlobAccess(nil) + fakeCLS := newFakeBlobAccess(testCDCParams) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + + digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) + + chunkData := []byte("Valid") + chunkDigest := mustComputeDigest(t, digestFunction, chunkData) + require.NoError(t, fakeCAS.Put(ctx, chunkDigest, buffer.NewValidatedBufferFromByteSlice(chunkData))) + + emptyDigest := mustComputeDigest(t, digestFunction, nil) + require.NoError(t, fakeCAS.Put(ctx, emptyDigest, buffer.NewValidatedBufferFromByteSlice(nil))) + + expectedDigest := mustComputeDigest(t, digestFunction, chunkData) + + splitResponse := &remoteexecution.SplitBlobResponse{ + ChunkDigests: []*remoteexecution.Digest{ + chunkDigest.GetProto(), + emptyDigest.GetProto(), + }, + } + reqBuffer := buffer.NewProtoBufferFromProto(splitResponse, buffer.UserProvided) + + err := validatingCLS.Put(ctx, expectedDigest, reqBuffer) + require.NoError(t, err) +} + +func TestChunkListValidatingBlobAccessPutExtendsLifetimes(t *testing.T) { + ctx := context.Background() + + fakeCAS := newFakeBlobAccess(nil) + fakeCLS := newFakeBlobAccess(testCDCParams) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + + digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) + + chunk1Data := []byte("Hello, ") + chunk1Digest := mustComputeDigest(t, digestFunction, chunk1Data) + require.NoError(t, fakeCAS.Put(ctx, chunk1Digest, buffer.NewValidatedBufferFromByteSlice(chunk1Data))) + + chunk2Data := []byte("World!") + chunk2Digest := mustComputeDigest(t, digestFunction, chunk2Data) + require.NoError(t, fakeCAS.Put(ctx, chunk2Digest, buffer.NewValidatedBufferFromByteSlice(chunk2Data))) + + expectedFullData := []byte("Hello, World!") + fullBlobDigest := mustComputeDigest(t, digestFunction, expectedFullData) + + fakeCAS.ResetTouches() + + splitResponse := &remoteexecution.SplitBlobResponse{ + ChunkDigests: []*remoteexecution.Digest{ + chunk1Digest.GetProto(), + chunk2Digest.GetProto(), + }, + } + reqBuffer := buffer.NewProtoBufferFromProto(splitResponse, buffer.UserProvided) + + err := validatingCLS.Put(ctx, fullBlobDigest, reqBuffer) + + // From the REAPI, the server may either process the splice and + // return OK, OR it may return ALREADY_EXISTS if the blob is already + // composed and the server chooses not to extend the lifetime of the + // user's specific chunks. + if status.Code(err) == codes.AlreadyExists { + // The server is free not to touch the user's chunks. However, + // it MUST still have verified/touched the original blob. + require.Greater(t, fakeCAS.GetTouches(fullBlobDigest), 0, "Composed blob lifetime was not extended during SpliceBlob") + require.Greater(t, fakeCLS.GetTouches(fullBlobDigest), 0, "Composed blob chunk list lifetime was not extended during SpliceBlob") + } else { + // Because the server accepted the Splice request, it is + // strictly obligated to extend the lifetimes of BOTH the + // provided chunks and the composed blob. + require.NoError(t, err) + + require.Greater(t, fakeCAS.GetTouches(chunk1Digest), 0, "Chunk 1 lifetime was not extended during SpliceBlob") + require.Greater(t, fakeCAS.GetTouches(chunk2Digest), 0, "Chunk 2 lifetime was not extended during SpliceBlob") + require.Greater(t, fakeCAS.GetTouches(fullBlobDigest), 0, "Composed blob lifetime was not extended during SpliceBlob") + require.Greater(t, fakeCLS.GetTouches(fullBlobDigest), 0, "Composed blob chunk list lifetime was not extended during SpliceBlob") + } +} + +func TestChunkListValidatingBlobAccessGetMissingBlob(t *testing.T) { + ctx := context.Background() + + fakeCAS := newFakeBlobAccess(nil) + fakeCLS := newFakeBlobAccess(testCDCParams) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, 1024*1024) + + digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) + ghostDigest := mustComputeDigest(t, digestFunction, []byte("ghost")) + + _, err := validatingCLS.Get(ctx, ghostDigest).ToProto(&remoteexecution.SplitBlobResponse{}, 1024*1024) + + require.Error(t, err) + require.Equal(t, codes.NotFound, status.Code(err)) +} diff --git a/pkg/blobstore/chunklistvalidating/chunker.go b/pkg/blobstore/chunklistvalidating/chunker.go new file mode 100644 index 000000000..8badbc7e9 --- /dev/null +++ b/pkg/blobstore/chunklistvalidating/chunker.go @@ -0,0 +1,16 @@ +package chunklistvalidating + +import ( + "github.com/buildbarn/bb-storage/pkg/digest" +) + +// Chunk is a struct of raw binary data with its digest. +type Chunk struct { + Digest digest.Digest + Data []byte +} + +// Chunker is an interface that provides a sequence of chunks. +type Chunker interface { + NextChunk() (Chunk, error) +} diff --git a/pkg/blobstore/chunklistvalidating/fake_blob_access_test.go b/pkg/blobstore/chunklistvalidating/fake_blob_access_test.go new file mode 100644 index 000000000..ffd7922c1 --- /dev/null +++ b/pkg/blobstore/chunklistvalidating/fake_blob_access_test.go @@ -0,0 +1,88 @@ +package chunklistvalidating_test + +import ( + "context" + "sync" + + remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" + "github.com/buildbarn/bb-storage/pkg/blobstore" + "github.com/buildbarn/bb-storage/pkg/blobstore/buffer" + "github.com/buildbarn/bb-storage/pkg/digest" + + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +// fakeBlobAccess provides a thread-safe, in-memory BlobAccess for +// testing. +type fakeBlobAccess struct { + blobstore.BlobAccess + lock sync.Mutex + blobs map[digest.Digest][]byte + touches map[digest.Digest]int // Tracks lifetime extensions + chunkingParameters *remoteexecution.RepMaxCdcParams +} + +func newFakeBlobAccess(chunkingParameters *remoteexecution.RepMaxCdcParams) *fakeBlobAccess { + return &fakeBlobAccess{ + blobs: make(map[digest.Digest][]byte), + touches: make(map[digest.Digest]int), + chunkingParameters: chunkingParameters, + } +} + +func (f *fakeBlobAccess) Get(ctx context.Context, d digest.Digest) buffer.Buffer { + f.lock.Lock() + defer f.lock.Unlock() + data, ok := f.blobs[d] + if !ok { + return buffer.NewBufferFromError(status.Error(codes.NotFound, "Blob not found")) + } + return buffer.NewValidatedBufferFromByteSlice(data) +} + +func (f *fakeBlobAccess) Put(ctx context.Context, d digest.Digest, b buffer.Buffer) error { + data, err := b.ToByteSlice(100 * 1024 * 1024) + if err != nil { + return err + } + f.lock.Lock() + defer f.lock.Unlock() + f.touches[d]++ + f.blobs[d] = data + return nil +} + +func (f *fakeBlobAccess) FindMissing(ctx context.Context, digests digest.Set) (digest.Set, error) { + f.lock.Lock() + defer f.lock.Unlock() + missing := digest.NewSetBuilder(digests.Length()) + for _, d := range digests.Items() { + if _, ok := f.blobs[d]; !ok { + missing.Add(d) + } else { + f.touches[d]++ + } + } + return missing.Build(), nil +} + +func (f *fakeBlobAccess) GetCapabilities(ctx context.Context, instanceName digest.InstanceName) (*remoteexecution.ServerCapabilities, error) { + return &remoteexecution.ServerCapabilities{ + CacheCapabilities: &remoteexecution.CacheCapabilities{ + RepMaxCdcParams: f.chunkingParameters, + }, + }, nil +} + +func (f *fakeBlobAccess) GetTouches(d digest.Digest) int { + f.lock.Lock() + defer f.lock.Unlock() + return f.touches[d] +} + +func (f *fakeBlobAccess) ResetTouches() { + f.lock.Lock() + defer f.lock.Unlock() + f.touches = make(map[digest.Digest]int) +} diff --git a/pkg/blobstore/chunklistvalidating/integration/BUILD.bazel b/pkg/blobstore/chunklistvalidating/integration/BUILD.bazel new file mode 100644 index 000000000..ca423091e --- /dev/null +++ b/pkg/blobstore/chunklistvalidating/integration/BUILD.bazel @@ -0,0 +1,19 @@ +load("@rules_go//go:def.bzl", "go_test") + +go_test( + name = "integration_test", + srcs = ["chunk_list_validating_integration_test.go"], + data = ["//cmd/bb_storage"], + env = { + "BB_STORAGE_RUNFILE_PATH": "$(rlocationpath //cmd/bb_storage:bb_storage)", + }, + deps = [ + "@bazel_remote_apis//build/bazel/remote/execution/v2:remote_execution_go_proto", + "@com_github_stretchr_testify//require", + "@org_golang_google_grpc//:grpc", + "@org_golang_google_grpc//codes", + "@org_golang_google_grpc//credentials/insecure", + "@org_golang_google_grpc//status", + "@rules_go//go/runfiles", + ], +) diff --git a/pkg/blobstore/chunklistvalidating/integration/chunk_list_validating_integration_test.go b/pkg/blobstore/chunklistvalidating/integration/chunk_list_validating_integration_test.go new file mode 100644 index 000000000..8e5dd7741 --- /dev/null +++ b/pkg/blobstore/chunklistvalidating/integration/chunk_list_validating_integration_test.go @@ -0,0 +1,445 @@ +package integration + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "math/rand" + "os" + "os/exec" + "strings" + "testing" + "time" + + remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" + "github.com/bazelbuild/rules_go/go/runfiles" + "github.com/stretchr/testify/require" + + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/status" +) + +type serverParams struct { + disableCLS bool + socketPath string + upstreamSocketPath string +} + +func escapeJSON(s string) string { + s = strings.ReplaceAll(s, "\\", "\\\\") + s = strings.ReplaceAll(s, "\"", "\\\"") + s = strings.ReplaceAll(s, "'", "\\'") + s = strings.ReplaceAll(s, "\n", "\\n") + return s +} + +func storageConfig(params serverParams) string { + return fmt.Sprintf(` +local cls = %t; +local listenPath = '%s'; +{ + grpcServers: [{ + listenPaths: [listenPath], + authenticationPolicy: { allow: {} }, + }], + maximumMessageSizeBytes: 4 * 1024 * 1024, + contentAddressableStorage: { + backend: { + 'local': { + keyLocationMapInMemory: { entries: 1024 }, + keyLocationMapMaximumGetAttempts: 32, + keyLocationMapMaximumPutAttempts: 64, + oldBlocks: 1, + currentBlocks: 1, + newBlocks: 1, + blocksInMemory: { blockSizeBytes: 1024 * 1024 }, + }, + }, + getAuthorizer: { allow: {} }, + putAuthorizer: { allow: {} }, + findMissingAuthorizer: { allow: {} }, + }, + chunkListStorage: if !cls then null else { + backend: { + 'local': { + keyLocationMapInMemory: { entries: 1024 }, + keyLocationMapMaximumGetAttempts: 32, + keyLocationMapMaximumPutAttempts: 64, + oldBlocks: 1, + currentBlocks: 1, + newBlocks: 1, + blocksInMemory: { blockSizeBytes: 1024 * 1024 }, + chunkingParameters: { + minChunkSizeBytes: 256, + horizonSizeBytes: 8*256, + } + }, + }, + getAuthorizer: { allow: {} }, + putAuthorizer: { allow: {} }, + findMissingAuthorizer: { allow: {} }, + }, +} +`, !params.disableCLS, escapeJSON(params.socketPath)) +} + +func frontendConfig(params serverParams) string { + return fmt.Sprintf(` +local cls = %t; +local listenPath = '%s'; +// unix:// doesn't work under Windows. +// https://github.com/grpc/grpc-go/issues/8675 +local upstreamAddress = 'unix:%s'; +{ + grpcServers: [{ + listenPaths: [listenPath], + authenticationPolicy: { allow: {} }, + }], + maximumMessageSizeBytes: 4 * 1024 * 1024, + contentAddressableStorage: { + backend: { grpc: { client: { address: upstreamAddress } } }, + getAuthorizer: { allow: {} }, + putAuthorizer: { allow: {} }, + findMissingAuthorizer: { allow: {} }, + }, + chunkListStorage: if !cls then null else { + backend: { chunkListValidating: { backend: { grpc: { client: { address: upstreamAddress } } } } }, + getAuthorizer: { allow: {} }, + putAuthorizer: { allow: {} }, + findMissingAuthorizer: { allow: {} }, + }, +} +`, !params.disableCLS, escapeJSON(params.socketPath), escapeJSON(params.upstreamSocketPath)) +} + +func writeConfigFile(name, content string) (file *os.File, err error) { + if file, err = os.CreateTemp("", name); err != nil { + return nil, err + } + if _, err = file.WriteString(content); err != nil { + return nil, err + } + if err = file.Close(); err != nil { + return nil, err + } + return file, nil +} + +func setupServer(t *testing.T, name, config string) func() { + rf, err := runfiles.New() + if err != nil { + t.Fatalf("Failed to initialize runfiles: %v", err) + } + runfilePath := os.Getenv("BB_STORAGE_RUNFILE_PATH") + require.NotEmpty(t, runfilePath, "BB_STORAGE_RUNFILE_PATH environment variable is not set") + + bbStoragePath, err := rf.Rlocation(runfilePath) + require.NoError(t, err) + + configFile, err := writeConfigFile(name, config) + require.NoError(t, err) + + cmd := exec.Command(bbStoragePath, configFile.Name()) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + err = cmd.Start() + require.NoError(t, err) + + return func() { + cmd.Process.Kill() + cmd.Wait() + os.Remove(configFile.Name()) + } +} + +func createSocketPath(t *testing.T) string { + t.Helper() + socketFile, err := os.CreateTemp("", "bb_*.sock") + require.NoError(t, err) + socketPath := socketFile.Name() + socketFile.Close() + err = os.Remove(socketPath) + require.NoError(t, err) + return socketPath +} + +func setupServers(t *testing.T, storageParams, frontendParams serverParams) (func(), remoteexecution.CapabilitiesClient, remoteexecution.ContentAddressableStorageClient) { + storageParams.socketPath = createSocketPath(t) + closeStorage := setupServer(t, "storage", storageConfig(storageParams)) + require.Eventually(t, func() bool { + _, err := os.Stat(storageParams.socketPath) + return err == nil + }, 1*time.Second, 10*time.Millisecond, "Storage server did not start.") + + frontendParams.socketPath = createSocketPath(t) + frontendParams.upstreamSocketPath = storageParams.socketPath + closeFrontend := setupServer(t, "frontend", frontendConfig(frontendParams)) + require.Eventually(t, func() bool { + _, err := os.Stat(frontendParams.socketPath) + return err == nil + }, 1*time.Second, 10*time.Millisecond, "Frontend server did not start.") + + conn, err := grpc.NewClient(fmt.Sprintf("unix:%s", frontendParams.socketPath), grpc.WithTransportCredentials(insecure.NewCredentials())) + require.NoError(t, err) + + return func() { + closeStorage() + closeFrontend() + conn.Close() + os.Remove(storageParams.socketPath) + os.Remove(frontendParams.socketPath) + }, remoteexecution.NewCapabilitiesClient(conn), remoteexecution.NewContentAddressableStorageClient(conn) +} + +func TestChunkListValidatingCapabilities(t *testing.T) { + tests := []struct { + name string + storageParams serverParams + frontendParams serverParams + expectSupport bool + }{ + {"Enabled In Both", serverParams{}, serverParams{}, true}, + {"Disabled in Storage", serverParams{disableCLS: true}, serverParams{}, false}, + {"Disabled in Frontend", serverParams{}, serverParams{disableCLS: true}, false}, + {"Disabled in Both", serverParams{disableCLS: true}, serverParams{disableCLS: true}, false}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + closer, capabilitiesClient, _ := setupServers(t, tc.storageParams, tc.frontendParams) + defer closer() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + capabilities, err := capabilitiesClient.GetCapabilities(ctx, &remoteexecution.GetCapabilitiesRequest{ + InstanceName: "", + }) + require.NoError(t, err) + + cacheCaps := capabilities.GetCacheCapabilities() + require.NotNil(t, cacheCaps) + + if tc.expectSupport { + require.True(t, cacheCaps.SpliceBlobSupport) + require.True(t, cacheCaps.SplitBlobSupport) + + chunkingParameters := cacheCaps.GetRepMaxCdcParams() + require.NotNil(t, chunkingParameters) + require.Equal(t, uint64(256), chunkingParameters.GetMinChunkSizeBytes()) + require.Equal(t, uint64(2048), chunkingParameters.GetHorizonSizeBytes()) + } else { + require.False(t, cacheCaps.SpliceBlobSupport) + require.False(t, cacheCaps.SplitBlobSupport) + require.Nil(t, cacheCaps.GetRepMaxCdcParams()) + } + }) + } +} + +func computeDigest(data []byte) *remoteexecution.Digest { + hash := sha256.Sum256(data) + return &remoteexecution.Digest{ + Hash: hex.EncodeToString(hash[:]), + SizeBytes: int64(len(data)), + } +} + +func makeRandomData(t *testing.T, size int, seed int64) []byte { + t.Helper() + data := make([]byte, size) + r := rand.New(rand.NewSource(seed)) + _, err := r.Read(data) + require.NoError(t, err) + return data +} + +func uploadBlob(ctx context.Context, t *testing.T, cas remoteexecution.ContentAddressableStorageClient, data []byte) *remoteexecution.Digest { + t.Helper() + digest := computeDigest(data) + req := &remoteexecution.BatchUpdateBlobsRequest{ + Requests: []*remoteexecution.BatchUpdateBlobsRequest_Request{ + {Digest: digest, Data: data}, + }, + DigestFunction: remoteexecution.DigestFunction_SHA256, + } + res, err := cas.BatchUpdateBlobs(ctx, req) + require.NoError(t, err) + require.NotEmpty(t, res.Responses, "server returned empty responses array") + status := res.Responses[0].GetStatus() + require.Equal(t, int32(0), status.GetCode(), status.GetMessage()) + return digest +} + +func findMissingBlobs(ctx context.Context, t *testing.T, cas remoteexecution.ContentAddressableStorageClient, digests []*remoteexecution.Digest) []*remoteexecution.Digest { + t.Helper() + req := &remoteexecution.FindMissingBlobsRequest{ + BlobDigests: digests, + DigestFunction: remoteexecution.DigestFunction_SHA256, + } + res, err := cas.FindMissingBlobs(ctx, req) + require.NoError(t, err) + return res.MissingBlobDigests +} + +func TestRepMaxCDCSplitAndSpliceBehaviors(t *testing.T) { + minChunkSize := int64(256) + + t.Run("RoundTripSplitThenSplice", func(t *testing.T) { + closer, _, casClient := setupServers(t, serverParams{}, serverParams{}) + defer closer() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + dataSize := (minChunkSize * 4) + 128 + data := makeRandomData(t, int(dataSize), 0) + blobDigest := uploadBlob(ctx, t, casClient, data) + + splitReq := &remoteexecution.SplitBlobRequest{ + BlobDigest: blobDigest, + ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, + DigestFunction: remoteexecution.DigestFunction_SHA256, + } + splitRes, err := casClient.SplitBlob(ctx, splitReq) + require.NoError(t, err) + + spliceReq := &remoteexecution.SpliceBlobRequest{ + BlobDigest: blobDigest, + ChunkDigests: splitRes.ChunkDigests, + ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, + DigestFunction: remoteexecution.DigestFunction_SHA256, + } + spliceRes, err := casClient.SpliceBlob(ctx, spliceReq) + require.NoError(t, err) + require.Equal(t, blobDigest.Hash, spliceRes.BlobDigest.Hash) + }) + + t.Run("SpliceNonStandardChunkingThenSplit", func(t *testing.T) { + closer, _, casClient := setupServers(t, serverParams{}, serverParams{}) + defer closer() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + blobData := makeRandomData(t, int(minChunkSize*2), 0) + + chunk1 := blobData[:1] + chunk2 := blobData[1:] + + digest1 := uploadBlob(ctx, t, casClient, chunk1) + digest2 := uploadBlob(ctx, t, casClient, chunk2) + expectedDigest := computeDigest(blobData) + + spliceReq := &remoteexecution.SpliceBlobRequest{ + BlobDigest: expectedDigest, + ChunkDigests: []*remoteexecution.Digest{digest1, digest2}, + ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, + DigestFunction: remoteexecution.DigestFunction_SHA256, + } + spliceRes, err := casClient.SpliceBlob(ctx, spliceReq) + require.NoError(t, err) + require.Equal(t, expectedDigest.Hash, spliceRes.BlobDigest.Hash) + + splitReq := &remoteexecution.SplitBlobRequest{ + BlobDigest: expectedDigest, + ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, + DigestFunction: remoteexecution.DigestFunction_SHA256, + } + splitRes, err := casClient.SplitBlob(ctx, splitReq) + require.NoError(t, err) + + require.NotEmpty(t, splitRes.ChunkDigests) + + // Check that it didn't just echo our chunks back + isEcho := len(splitRes.ChunkDigests) == 2 && + splitRes.ChunkDigests[0].Hash == digest1.Hash && + splitRes.ChunkDigests[1].Hash == digest2.Hash + require.False(t, isEcho, "Server echoed non-standard chunks") + + var totalSize int64 + for _, c := range splitRes.ChunkDigests { + totalSize += c.SizeBytes + } + require.Equal(t, expectedDigest.SizeBytes, totalSize) + }) + + t.Run("SpliceAlreadyExistsOrNoop", func(t *testing.T) { + closer, _, casClient := setupServers(t, serverParams{}, serverParams{}) + defer closer() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + blobData := append([]byte("This blob will be fully uploaded before we try to splice it."), makeRandomData(t, 16, 0)...) + expectedDigest := uploadBlob(ctx, t, casClient, blobData) + + chunk1 := blobData[:10] + chunk2 := blobData[10:] + digest1 := uploadBlob(ctx, t, casClient, chunk1) + digest2 := uploadBlob(ctx, t, casClient, chunk2) + + spliceReq := &remoteexecution.SpliceBlobRequest{ + BlobDigest: expectedDigest, + ChunkDigests: []*remoteexecution.Digest{digest1, digest2}, + ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, + DigestFunction: remoteexecution.DigestFunction_SHA256, + } + + spliceRes, err := casClient.SpliceBlob(ctx, spliceReq) + + if err != nil { + require.Equal(t, codes.AlreadyExists, status.Code(err), "Expected OK or ALREADY_EXISTS") + } else { + require.Equal(t, expectedDigest.Hash, spliceRes.BlobDigest.Hash) + } + }) + + t.Run("ValidationSpliceBlobRejections", func(t *testing.T) { + closer, _, casClient := setupServers(t, serverParams{}, serverParams{}) + defer closer() + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + validData := makeRandomData(t, 512, 0) + validDigest := uploadBlob(ctx, t, casClient, validData) + ghostDigest := computeDigest([]byte("I do not exist")) + + tests := []struct { + name string + req *remoteexecution.SpliceBlobRequest + expectError codes.Code + }{ + { + name: "Missing Chunk", + req: &remoteexecution.SpliceBlobRequest{ + BlobDigest: ghostDigest, + ChunkDigests: []*remoteexecution.Digest{ghostDigest}, + }, + expectError: codes.NotFound, + }, + { + name: "Digest Mismatch", + req: &remoteexecution.SpliceBlobRequest{ + BlobDigest: computeDigest([]byte("Fake target")), + ChunkDigests: []*remoteexecution.Digest{validDigest}, + }, + expectError: codes.InvalidArgument, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + tc.req.ChunkingFunction = remoteexecution.ChunkingFunction_REP_MAX_CDC + tc.req.DigestFunction = remoteexecution.DigestFunction_SHA256 + + _, err := casClient.SpliceBlob(ctx, tc.req) + require.Error(t, err) + require.Equal(t, tc.expectError, status.Code(err)) + }) + } + }) +} diff --git a/pkg/blobstore/chunklistvalidating/reader_chunker.go b/pkg/blobstore/chunklistvalidating/reader_chunker.go new file mode 100644 index 000000000..0d250dabd --- /dev/null +++ b/pkg/blobstore/chunklistvalidating/reader_chunker.go @@ -0,0 +1,53 @@ +package chunklistvalidating + +import ( + "bufio" + "io" + + "github.com/buildbarn/bb-storage/pkg/digest" + cdc "github.com/buildbarn/go-cdc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +type readerChunker struct { + cdcChunker cdc.ContentDefinedChunker + reader io.Reader + digestFunction digest.Function +} + +func (c *readerChunker) NextChunk() (Chunk, error) { + chunk, err := c.cdcChunker.ReadNextChunk() + if err != nil { + return Chunk{}, err + } + + digestGenerator := c.digestFunction.NewGenerator(int64(len(chunk))) + if _, err := digestGenerator.Write(chunk); err != nil { + return Chunk{}, status.Error(codes.Internal, "Could not compute digest of chunk") + } + chunkDigest := digestGenerator.Sum() + + return Chunk{ + Data: chunk, + Digest: chunkDigest, + }, nil +} + +// NewReaderChunker creates a chunker that reads from an io.Reader +func NewReaderChunker(digestFunction digest.Function, reader io.Reader, minChunkSizeBytes, horizonSizeBytes int64) Chunker { + // The internal RepMaxContentDefinedChunker may peek up to this many + // bytes. We therefore make sure that the underlying buffer is big + // enough to prevent bufio.ErrBufferFull errors. + bufferSizeBytes := 2*minChunkSizeBytes + horizonSizeBytes + return &readerChunker{ + cdc.NewRepMaxContentDefinedChunker( + bufio.NewReaderSize(reader, int(bufferSizeBytes)), + &cdc.FastContentDefinedChunkerGearTable, + int(minChunkSizeBytes), + int(horizonSizeBytes), + ), + reader, + digestFunction, + } +} diff --git a/pkg/blobstore/chunklistvalidating/reader_chunker_test.go b/pkg/blobstore/chunklistvalidating/reader_chunker_test.go new file mode 100644 index 000000000..c5630d427 --- /dev/null +++ b/pkg/blobstore/chunklistvalidating/reader_chunker_test.go @@ -0,0 +1,107 @@ +package chunklistvalidating_test + +import ( + "io" + "math/rand" + "testing" + + remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" + "github.com/buildbarn/bb-storage/pkg/blobstore/buffer" + "github.com/buildbarn/bb-storage/pkg/blobstore/chunklistvalidating" + "github.com/buildbarn/bb-storage/pkg/digest" + "github.com/stretchr/testify/require" +) + +const ( + minChunkSize = 256 << 10 // 256 KiB + maxChunkSize = 2*minChunkSize - 1 + horizonLookaheadBytes = 8 * minChunkSize +) + +func FuzzReaderChunker(f *testing.F) { + for i := range 20 { + // Fuzz test i+1 MB of data with seed i. + f.Add((i+1)<<20, int64(i)) + } + f.Fuzz(func(t *testing.T, dataSizeBytes int, seed int64) { + require := require.New(t) + rng := rand.New(rand.NewSource(seed)) + originalData := make([]byte, dataSizeBytes) + rng.Read(originalData) + digestFunc := digest.MustNewFunction("", remoteexecution.DigestFunction_SHA256) + + reader := buffer.NewValidatedBufferFromByteSlice(originalData).ToReader() + defer reader.Close() + chunker := chunklistvalidating.NewReaderChunker(digestFunc, reader, minChunkSize, horizonLookaheadBytes) + + composedData := make([]byte, 0, dataSizeBytes) + var numberOfChunks int + for numberOfChunks = 0; ; numberOfChunks++ { + chunk, err := chunker.NextChunk() + if err == io.EOF { + break + } + require.NoError(err, "Failed to generate chunk %d.", numberOfChunks) + + chunkSize := int64(len(chunk.Data)) + chunkHasher := chunk.Digest.NewHasher(chunkSize) + chunkHasher.Write(chunk.Data) + + require.Equal(chunk.Digest.GetHashBytes(), chunkHasher.Sum(nil), "Digest mismatch for %d.", numberOfChunks) + composedData = append(composedData, chunk.Data...) + } + + require.Equal(originalData, composedData) + + originalDigestGen := digestFunc.NewGenerator(int64(dataSizeBytes)) + originalDigestGen.Write(originalData) + + composedDigestGen := digestFunc.NewGenerator(int64(dataSizeBytes)) + composedDigestGen.Write(composedData) + + require.Equal(originalDigestGen.Sum(), composedDigestGen.Sum(), "The digest of the composed data does not match the digest of the original data.") + + minNumberOfChunks := dataSizeBytes / maxChunkSize + require.GreaterOrEqual(numberOfChunks, minNumberOfChunks, "Produced fewer chunks than should be possible.") + + maxNumberOfChunks := dataSizeBytes / minChunkSize + require.LessOrEqual(numberOfChunks, maxNumberOfChunks, "Produced more chunks than should be possible.") + }) +} + +func TestReaderChunkerSmallBlob(t *testing.T) { + // Test with a small blob that should produce a single chunk + originalData := []byte("Hello, World!") + reader := buffer.NewValidatedBufferFromByteSlice(originalData).ToReader() + defer reader.Close() + + digestFunc := digest.MustNewFunction("", remoteexecution.DigestFunction_SHA256) + chunker := chunklistvalidating.NewReaderChunker(digestFunc, reader, minChunkSize, horizonLookaheadBytes) + + chunks := make([][]byte, 0, 1) + for { + chunk, err := chunker.NextChunk() + if err == io.EOF { + break + } + require.NoError(t, err) + + chunks = append(chunks, chunk.Data) + } + require.Len(t, chunks, 1) + require.Equal(t, originalData, chunks[0]) +} + +func TestReaderChunkerEmptyBlob(t *testing.T) { + // Test with empty blob + originalData := []byte{} + reader := buffer.NewValidatedBufferFromByteSlice(originalData).ToReader() + defer reader.Close() + + digestFunc := digest.MustNewFunction("", remoteexecution.DigestFunction_SHA256) + chunker := chunklistvalidating.NewReaderChunker(digestFunc, reader, minChunkSize, horizonLookaheadBytes) + + chunk, err := chunker.NextChunk() + require.ErrorIs(t, io.EOF, err) + require.Empty(t, chunk.Data) +} diff --git a/pkg/blobstore/cls_read_buffer_factory.go b/pkg/blobstore/cls_read_buffer_factory.go new file mode 100644 index 000000000..7735f064b --- /dev/null +++ b/pkg/blobstore/cls_read_buffer_factory.go @@ -0,0 +1,27 @@ +package blobstore + +import ( + "io" + + remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" + "github.com/buildbarn/bb-storage/pkg/blobstore/buffer" + "github.com/buildbarn/bb-storage/pkg/digest" +) + +type clsReadBufferFactory struct{} + +func (clsReadBufferFactory) NewBufferFromByteSlice(digest digest.Digest, data []byte, dataIntegrityCallback buffer.DataIntegrityCallback) buffer.Buffer { + return buffer.NewProtoBufferFromByteSlice(&remoteexecution.SplitBlobResponse{}, data, buffer.BackendProvided(dataIntegrityCallback)) +} + +func (clsReadBufferFactory) NewBufferFromReader(digest digest.Digest, r io.ReadCloser, dataIntegrityCallback buffer.DataIntegrityCallback) buffer.Buffer { + return buffer.NewProtoBufferFromReader(&remoteexecution.SplitBlobResponse{}, r, buffer.BackendProvided(dataIntegrityCallback)) +} + +func (f clsReadBufferFactory) NewBufferFromReaderAt(digest digest.Digest, r buffer.ReadAtCloser, sizeBytes int64, dataIntegrityCallback buffer.DataIntegrityCallback) buffer.Buffer { + return f.NewBufferFromReader(digest, newReaderFromReaderAt(r), dataIntegrityCallback) +} + +// CLSReadBufferFactory is capable of creating identifiers and buffers +// for objects stored in the Chunk List Storage (CLS). +var CLSReadBufferFactory ReadBufferFactory = clsReadBufferFactory{} diff --git a/pkg/blobstore/configuration/BUILD.bazel b/pkg/blobstore/configuration/BUILD.bazel index 33ebe095b..8882b2b2e 100644 --- a/pkg/blobstore/configuration/BUILD.bazel +++ b/pkg/blobstore/configuration/BUILD.bazel @@ -8,6 +8,7 @@ go_library( "blob_replicator_creator.go", "cas_blob_access_creator.go", "cas_blob_replicator_creator.go", + "cls_blob_access_creator.go", "fsac_blob_access_creator.go", "icas_blob_access_creator.go", "icas_blob_replicator_creator.go", @@ -21,6 +22,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/blobstore", + "//pkg/blobstore/chunklistvalidating", "//pkg/blobstore/completenesschecking", "//pkg/blobstore/grpcclients", "//pkg/blobstore/local", diff --git a/pkg/blobstore/configuration/blob_access_creator.go b/pkg/blobstore/configuration/blob_access_creator.go index 6b44c1171..50d6032ce 100644 --- a/pkg/blobstore/configuration/blob_access_creator.go +++ b/pkg/blobstore/configuration/blob_access_creator.go @@ -48,7 +48,7 @@ type BlobAccessCreator interface { // NewHierarchicalInstanceNamesLocalBlobAccess() creates a // BlobAccess suitable for storing data on the local system that // uses hierarchical instance names. - NewHierarchicalInstanceNamesLocalBlobAccess(keyLocationMap local.KeyLocationMap, locationBlobMap local.LocationBlobMap, globalLock *sync.RWMutex) (blobstore.BlobAccess, error) + NewHierarchicalInstanceNamesLocalBlobAccess(keyLocationMap local.KeyLocationMap, locationBlobMap local.LocationBlobMap, globalLock *sync.RWMutex, capabilitiesProvider capabilities.Provider) (blobstore.BlobAccess, error) // NewCustomBlobAccess() can be used as a fallback to create // BlobAccess instances that only apply to this storage type. // For example, CompletenessCheckingBlobAccess is only diff --git a/pkg/blobstore/configuration/cas_blob_access_creator.go b/pkg/blobstore/configuration/cas_blob_access_creator.go index 2677982a9..ac215dd68 100644 --- a/pkg/blobstore/configuration/cas_blob_access_creator.go +++ b/pkg/blobstore/configuration/cas_blob_access_creator.go @@ -72,8 +72,8 @@ func (casBlobAccessCreator) NewBlockListGrowthPolicy(currentBlocks, newBlocks in return local.NewImmutableBlockListGrowthPolicy(currentBlocks, newBlocks), nil } -func (casBlobAccessCreator) NewHierarchicalInstanceNamesLocalBlobAccess(keyLocationMap local.KeyLocationMap, locationBlobMap local.LocationBlobMap, globalLock *sync.RWMutex) (blobstore.BlobAccess, error) { - return local.NewHierarchicalCASBlobAccess(keyLocationMap, locationBlobMap, globalLock, casCapabilitiesProvider), nil +func (casBlobAccessCreator) NewHierarchicalInstanceNamesLocalBlobAccess(keyLocationMap local.KeyLocationMap, locationBlobMap local.LocationBlobMap, globalLock *sync.RWMutex, capabilitiesProvider capabilities.Provider) (blobstore.BlobAccess, error) { + return local.NewHierarchicalCASBlobAccess(keyLocationMap, locationBlobMap, globalLock, capabilitiesProvider), nil } func (bac *casBlobAccessCreator) NewCustomBlobAccess(terminationGroup program.Group, configuration *pb.BlobAccessConfiguration, nestedCreator NestedBlobAccessCreator) (BlobAccessInfo, string, error) { diff --git a/pkg/blobstore/configuration/cls_blob_access_creator.go b/pkg/blobstore/configuration/cls_blob_access_creator.go new file mode 100644 index 000000000..76ff79f9c --- /dev/null +++ b/pkg/blobstore/configuration/cls_blob_access_creator.go @@ -0,0 +1,86 @@ +package configuration + +import ( + remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" + "github.com/buildbarn/bb-storage/pkg/blobstore" + "github.com/buildbarn/bb-storage/pkg/blobstore/chunklistvalidating" + "github.com/buildbarn/bb-storage/pkg/blobstore/grpcclients" + "github.com/buildbarn/bb-storage/pkg/capabilities" + "github.com/buildbarn/bb-storage/pkg/digest" + "github.com/buildbarn/bb-storage/pkg/grpc" + "github.com/buildbarn/bb-storage/pkg/program" + pb "github.com/buildbarn/bb-storage/pkg/proto/configuration/blobstore" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +type clsBlobAccessCreator struct { + protoBlobAccessCreator + protoBlobReplicatorCreator + + contentAddressableStorage *BlobAccessInfo + grpcClientFactory grpc.ClientFactory + maximumMessageSizeBytes int +} + +// NewCLSBlobAccessCreator creates a BlobAccessCreator that can be +// provided to NewBlobAccessFromConfiguration() to construct a +// BlobAccess that is suitable for querying for chunk list. +func NewCLSBlobAccessCreator(contentAddressableStorage *BlobAccessInfo, grpcClientFactory grpc.ClientFactory, maximumMessageSizeBytes int) BlobAccessCreator { + return &clsBlobAccessCreator{ + contentAddressableStorage: contentAddressableStorage, + grpcClientFactory: grpcClientFactory, + maximumMessageSizeBytes: maximumMessageSizeBytes, + } +} + +func (clsBlobAccessCreator) GetReadBufferFactory() blobstore.ReadBufferFactory { + return blobstore.CLSReadBufferFactory +} + +func (clsBlobAccessCreator) GetStorageTypeName() string { + return "cls" +} + +func (clsBlobAccessCreator) GetDefaultCapabilitiesProvider() capabilities.Provider { + return capabilities.NewStaticProvider(&remoteexecution.ServerCapabilities{}) +} + +func (bac *clsBlobAccessCreator) NewCustomBlobAccess(terminationGroup program.Group, configuration *pb.BlobAccessConfiguration, nestedCreator NestedBlobAccessCreator) (BlobAccessInfo, string, error) { + switch backend := configuration.Backend.(type) { + case *pb.BlobAccessConfiguration_ChunkListValidating: + if bac.contentAddressableStorage == nil { + return BlobAccessInfo{}, "", status.Error(codes.InvalidArgument, "Action Cache completeness checking can only be enabled if a Content Addressable Storage is configured") + } + + base, err := nestedCreator.NewNestedBlobAccess(backend.ChunkListValidating.Backend, bac) + if err != nil { + return BlobAccessInfo{}, "", err + } + return BlobAccessInfo{ + BlobAccess: chunklistvalidating.NewChunkListValidatingBlobAccess( + base.BlobAccess, + bac.contentAddressableStorage.BlobAccess, + bac.maximumMessageSizeBytes, + ), + DigestKeyFormat: base.DigestKeyFormat.Combine(bac.contentAddressableStorage.DigestKeyFormat), + }, "chunk_list_validating", nil + + case *pb.BlobAccessConfiguration_Grpc: + client, err := bac.grpcClientFactory.NewClientFromConfiguration(backend.Grpc.Client, terminationGroup) + if err != nil { + return BlobAccessInfo{}, "", err + } + return BlobAccessInfo{ + BlobAccess: grpcclients.NewCLSBlobAccess(client, bac.maximumMessageSizeBytes), + DigestKeyFormat: digest.KeyWithInstance, + }, "grpc", nil + + default: + return newProtoCustomBlobAccess(configuration, nestedCreator, bac) + } +} + +func (clsBlobAccessCreator) WrapTopLevelBlobAccess(blobAccess blobstore.BlobAccess) blobstore.BlobAccess { + return blobAccess +} diff --git a/pkg/blobstore/configuration/new_blob_access.go b/pkg/blobstore/configuration/new_blob_access.go index 941494d66..07e52041c 100644 --- a/pkg/blobstore/configuration/new_blob_access.go +++ b/pkg/blobstore/configuration/new_blob_access.go @@ -7,6 +7,7 @@ import ( "sync" "time" + remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" "github.com/buildbarn/bb-storage/pkg/blobstore" "github.com/buildbarn/bb-storage/pkg/blobstore/local" "github.com/buildbarn/bb-storage/pkg/blobstore/mirrored" @@ -14,6 +15,7 @@ import ( "github.com/buildbarn/bb-storage/pkg/blobstore/readfallback" "github.com/buildbarn/bb-storage/pkg/blobstore/sharding" "github.com/buildbarn/bb-storage/pkg/blockdevice" + "github.com/buildbarn/bb-storage/pkg/capabilities" "github.com/buildbarn/bb-storage/pkg/clock" "github.com/buildbarn/bb-storage/pkg/digest" "github.com/buildbarn/bb-storage/pkg/eviction" @@ -352,11 +354,26 @@ func (nc *simpleNestedBlobAccessCreator) newNestedBlobAccessBare(configuration * ) var localBlobAccess blobstore.BlobAccess + capabilitiesProvider := creator.GetDefaultCapabilitiesProvider() + chunkingParameters := backend.Local.GetChunkingParameters() + if chunkingParameters != nil { + capabilitiesProvider = capabilities.NewMergingProvider([]capabilities.Provider{ + capabilitiesProvider, + capabilities.NewStaticProvider(&remoteexecution.ServerCapabilities{ + CacheCapabilities: &remoteexecution.CacheCapabilities{ + SplitBlobSupport: true, + SpliceBlobSupport: true, + RepMaxCdcParams: chunkingParameters, + }, + }), + }) + } if backend.Local.HierarchicalInstanceNames { localBlobAccess, err = creator.NewHierarchicalInstanceNamesLocalBlobAccess( keyLocationMap, locationBlobMap, &globalLock, + capabilitiesProvider, ) if err != nil { return BlobAccessInfo{}, "", err @@ -368,7 +385,7 @@ func (nc *simpleNestedBlobAccessCreator) newNestedBlobAccessBare(configuration * digestKeyFormat, &globalLock, storageTypeName, - creator.GetDefaultCapabilitiesProvider(), + capabilitiesProvider, ) } return BlobAccessInfo{ diff --git a/pkg/blobstore/configuration/proto_blob_access_creator.go b/pkg/blobstore/configuration/proto_blob_access_creator.go index aa7daf857..d7ff593ba 100644 --- a/pkg/blobstore/configuration/proto_blob_access_creator.go +++ b/pkg/blobstore/configuration/proto_blob_access_creator.go @@ -5,6 +5,7 @@ import ( "github.com/buildbarn/bb-storage/pkg/blobstore" "github.com/buildbarn/bb-storage/pkg/blobstore/local" + "github.com/buildbarn/bb-storage/pkg/capabilities" "github.com/buildbarn/bb-storage/pkg/digest" pb "github.com/buildbarn/bb-storage/pkg/proto/configuration/blobstore" @@ -25,7 +26,7 @@ func (protoBlobAccessCreator) NewBlockListGrowthPolicy(currentBlocks, newBlocks return local.NewMutableBlockListGrowthPolicy(currentBlocks), nil } -func (protoBlobAccessCreator) NewHierarchicalInstanceNamesLocalBlobAccess(keyLocationMap local.KeyLocationMap, locationBlobMap local.LocationBlobMap, globalLock *sync.RWMutex) (blobstore.BlobAccess, error) { +func (protoBlobAccessCreator) NewHierarchicalInstanceNamesLocalBlobAccess(keyLocationMap local.KeyLocationMap, locationBlobMap local.LocationBlobMap, globalLock *sync.RWMutex, capabilitiesProvider capabilities.Provider) (blobstore.BlobAccess, error) { return nil, status.Error(codes.InvalidArgument, "The hierarchical instance names option can only be used for the Content Addressable Storage") } diff --git a/pkg/blobstore/grpcclients/BUILD.bazel b/pkg/blobstore/grpcclients/BUILD.bazel index 257d8fd04..bb518ecf6 100644 --- a/pkg/blobstore/grpcclients/BUILD.bazel +++ b/pkg/blobstore/grpcclients/BUILD.bazel @@ -5,6 +5,7 @@ go_library( srcs = [ "ac_blob_access.go", "cas_blob_access.go", + "cls_blob_access.go", "fsac_blob_access.go", "icas_blob_access.go", "iscc_blob_access.go", diff --git a/pkg/blobstore/grpcclients/cls_blob_access.go b/pkg/blobstore/grpcclients/cls_blob_access.go new file mode 100644 index 000000000..a27e5ea84 --- /dev/null +++ b/pkg/blobstore/grpcclients/cls_blob_access.go @@ -0,0 +1,101 @@ +package grpcclients + +import ( + "context" + + remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" + "github.com/buildbarn/bb-storage/pkg/blobstore" + "github.com/buildbarn/bb-storage/pkg/blobstore/buffer" + "github.com/buildbarn/bb-storage/pkg/blobstore/slicing" + "github.com/buildbarn/bb-storage/pkg/digest" + + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +type clsBlobAccess struct { + contentAddressableStorageClient remoteexecution.ContentAddressableStorageClient + capabilitiesClient remoteexecution.CapabilitiesClient + maximumMessageSizeBytes int +} + +// NewCLSBlobAccess creates a BlobAccess that relays any requests to a +// gRPC server that implements the split and splice api calls of a +// remoteexecution.ContentAddressableStorage service. +func NewCLSBlobAccess(client grpc.ClientConnInterface, maximumMessageSizeBytes int) blobstore.BlobAccess { + return &clsBlobAccess{ + contentAddressableStorageClient: remoteexecution.NewContentAddressableStorageClient(client), + capabilitiesClient: remoteexecution.NewCapabilitiesClient(client), + maximumMessageSizeBytes: maximumMessageSizeBytes, + } +} + +func (ba *clsBlobAccess) Get(ctx context.Context, digest digest.Digest) buffer.Buffer { + digestFunction := digest.GetDigestFunction() + splitBlobsResponse, err := ba.contentAddressableStorageClient.SplitBlob(ctx, &remoteexecution.SplitBlobRequest{ + InstanceName: digestFunction.GetInstanceName().String(), + BlobDigest: digest.GetProto(), + DigestFunction: digestFunction.GetEnumValue(), + }) + if err != nil { + return buffer.NewBufferFromError(err) + } + return buffer.NewProtoBufferFromProto(splitBlobsResponse, buffer.BackendProvided(buffer.Irreparable(digest))) +} + +func (ba *clsBlobAccess) GetFromComposite(ctx context.Context, parentDigest, childDigest digest.Digest, slicer slicing.BlobSlicer) buffer.Buffer { + b, _ := slicer.Slice(ba.Get(ctx, parentDigest), childDigest) + return b +} + +func (ba *clsBlobAccess) Put(ctx context.Context, digest digest.Digest, b buffer.Buffer) error { + splitBlobResponseProto, err := b.ToProto(&remoteexecution.SplitBlobResponse{}, ba.maximumMessageSizeBytes) + if err != nil { + return err + } + splitBlobResponse := splitBlobResponseProto.(*remoteexecution.SplitBlobResponse) + digestFunction := digest.GetDigestFunction() + _, err = ba.contentAddressableStorageClient.SpliceBlob(ctx, &remoteexecution.SpliceBlobRequest{ + InstanceName: digestFunction.GetInstanceName().String(), + DigestFunction: digestFunction.GetEnumValue(), + ChunkDigests: splitBlobResponse.GetChunkDigests(), + ChunkingFunction: splitBlobResponse.GetChunkingFunction(), + BlobDigest: digest.GetProto(), + }) + return err +} + +func (ba *clsBlobAccess) FindMissing(ctx context.Context, digests digest.Set) (digest.Set, error) { + missing := digest.NewSetBuilder(digests.Length()) + for _, d := range digests.Items() { + _, err := ba.contentAddressableStorageClient.SplitBlob(ctx, &remoteexecution.SplitBlobRequest{ + InstanceName: d.GetInstanceName().String(), + BlobDigest: d.GetProto(), + DigestFunction: d.GetDigestFunction().GetEnumValue(), + ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, + }) + if status.Code(err) == codes.NotFound { + missing.Add(d) + } else if err != nil { + return digest.EmptySet, err + } + } + return missing.Build(), nil +} + +func (ba *clsBlobAccess) GetCapabilities(ctx context.Context, instanceName digest.InstanceName) (*remoteexecution.ServerCapabilities, error) { + serverCapabilities, err := getServerCapabilitiesWithCacheCapabilities(ctx, ba.capabilitiesClient, instanceName) + if err != nil { + return nil, err + } + cacheCapabilities := serverCapabilities.CacheCapabilities + // Only return fields that pertain to Chunk List Storage. + return &remoteexecution.ServerCapabilities{ + CacheCapabilities: &remoteexecution.CacheCapabilities{ + SplitBlobSupport: cacheCapabilities.SplitBlobSupport, + SpliceBlobSupport: cacheCapabilities.SpliceBlobSupport, + RepMaxCdcParams: cacheCapabilities.RepMaxCdcParams, + }, + }, nil +} diff --git a/pkg/blobstore/grpcservers/content_addressable_storage_server.go b/pkg/blobstore/grpcservers/content_addressable_storage_server.go index 18802b959..79397391c 100644 --- a/pkg/blobstore/grpcservers/content_addressable_storage_server.go +++ b/pkg/blobstore/grpcservers/content_addressable_storage_server.go @@ -15,14 +15,16 @@ import ( type contentAddressableStorageServer struct { contentAddressableStorage blobstore.BlobAccess + chunkListStorage blobstore.BlobAccess maximumMessageSizeBytes int64 } // NewContentAddressableStorageServer creates a GRPC service for serving // the contents of a Bazel Content Addressable Storage (CAS) to Bazel. -func NewContentAddressableStorageServer(contentAddressableStorage blobstore.BlobAccess, maximumMessageSizeBytes int64) remoteexecution.ContentAddressableStorageServer { +func NewContentAddressableStorageServer(contentAddressableStorage, chunkListStorage blobstore.BlobAccess, maximumMessageSizeBytes int64) remoteexecution.ContentAddressableStorageServer { return &contentAddressableStorageServer{ contentAddressableStorage: contentAddressableStorage, + chunkListStorage: chunkListStorage, maximumMessageSizeBytes: maximumMessageSizeBytes, } } @@ -56,6 +58,36 @@ func (s *contentAddressableStorageServer) FindMissingBlobs(ctx context.Context, for _, outDigest := range outDigests.Items() { partialDigests = append(partialDigests, outDigest.GetProto()) } + + // Server is configured with Chunk List Storage (CLS) so we must + // verify the CLS as well. Note that in this version of bb-storage a + // missing chunk list for a blob does not imply that the blob is + // missing. It is merely required to manage the life time of chunk + // lists. In a future version of bb-storage FMB calls will go to + // either the chunk storage or the chunk list storage. + if s.chunkListStorage != nil { + capabilities, err := s.chunkListStorage.GetCapabilities(ctx, instanceName) + if err != nil { + return nil, err + } + if capabilities.GetCacheCapabilities().GetRepMaxCdcParams() == nil { + return nil, status.Error(codes.Internal, "This server implementation is only compatible with RepMaxCDC") + } + minChunkSize := capabilities.GetCacheCapabilities().GetRepMaxCdcParams().GetMinChunkSizeBytes() + maxChunkSize := 2*minChunkSize - 1 + bigBlobDigests := digest.NewSetBuilder(0) + for _, partialDigest := range in.BlobDigests { + if partialDigest.GetSizeBytes() > int64(maxChunkSize) { + digest, err := digestFunction.NewDigestFromProto(partialDigest) + if err != nil { + return nil, err + } + bigBlobDigests.Add(digest) + } + } + _, _ = s.chunkListStorage.FindMissing(ctx, bigBlobDigests.Build()) + } + return &remoteexecution.FindMissingBlobsResponse{ MissingBlobDigests: partialDigests, }, nil @@ -150,10 +182,59 @@ func (contentAddressableStorageServer) GetTree(in *remoteexecution.GetTreeReques return status.Error(codes.Unimplemented, "This service does not support downloading directory trees") } -func (contentAddressableStorageServer) SpliceBlob(ctx context.Context, in *remoteexecution.SpliceBlobRequest) (*remoteexecution.SpliceBlobResponse, error) { - return nil, status.Error(codes.Unimplemented, "This service does not support splicing blobs") +func (s *contentAddressableStorageServer) SpliceBlob(ctx context.Context, in *remoteexecution.SpliceBlobRequest) (*remoteexecution.SpliceBlobResponse, error) { + if s.chunkListStorage == nil { + return nil, status.Error(codes.Unimplemented, "This service does not support SpliceBlob") + } + + instanceName, err := digest.NewInstanceName(in.InstanceName) + if err != nil { + return nil, util.StatusWrapf(err, "Invalid instance name %#v", in.InstanceName) + } + digestFunction, err := instanceName.GetDigestFunction(in.DigestFunction, len(in.BlobDigest.GetHash())) + if err != nil { + return nil, err + } + blobDigest, err := digestFunction.NewDigestFromProto(in.BlobDigest) + if err != nil { + return nil, err + } + + splitBlobResponse := &remoteexecution.SplitBlobResponse{ + ChunkDigests: in.ChunkDigests, + ChunkingFunction: in.ChunkingFunction, + } + b := buffer.NewProtoBufferFromProto(splitBlobResponse, buffer.UserProvided) + + if err := s.chunkListStorage.Put(ctx, blobDigest, b); err != nil { + return nil, err + } + + return &remoteexecution.SpliceBlobResponse{ + BlobDigest: in.BlobDigest, + }, nil } -func (contentAddressableStorageServer) SplitBlob(ctx context.Context, in *remoteexecution.SplitBlobRequest) (*remoteexecution.SplitBlobResponse, error) { - return nil, status.Error(codes.Unimplemented, "This service does not support splitting blobs") +func (s *contentAddressableStorageServer) SplitBlob(ctx context.Context, in *remoteexecution.SplitBlobRequest) (*remoteexecution.SplitBlobResponse, error) { + if s.chunkListStorage == nil { + return nil, status.Error(codes.Unimplemented, "This service does not support SplitBlob") + } + + instanceName, err := digest.NewInstanceName(in.InstanceName) + if err != nil { + return nil, util.StatusWrapf(err, "Invalid instance name %#v", in.InstanceName) + } + digestFunction, err := instanceName.GetDigestFunction(in.DigestFunction, len(in.BlobDigest.GetHash())) + if err != nil { + return nil, err + } + blobDigest, err := digestFunction.NewDigestFromProto(in.BlobDigest) + if err != nil { + return nil, err + } + splitBlobResponse, err := s.chunkListStorage.Get(ctx, blobDigest).ToProto(&remoteexecution.SplitBlobResponse{}, int(s.maximumMessageSizeBytes)) + if err != nil { + return nil, err + } + return splitBlobResponse.(*remoteexecution.SplitBlobResponse), nil } diff --git a/pkg/blobstore/grpcservers/content_addressable_storage_server_test.go b/pkg/blobstore/grpcservers/content_addressable_storage_server_test.go index 89a278297..7c5ed83b4 100644 --- a/pkg/blobstore/grpcservers/content_addressable_storage_server_test.go +++ b/pkg/blobstore/grpcservers/content_addressable_storage_server_test.go @@ -55,7 +55,7 @@ func TestContentAddressableStorageServerBatchReadBlobsSuccess(t *testing.T) { buf3 := buffer.NewBufferFromError(status.Error(codes.NotFound, "The object you requested could not be found")) contentAddressableStorage.EXPECT().Get(ctx, digest3).Return(buf3) - contentAddressableStorageServer := grpcservers.NewContentAddressableStorageServer(contentAddressableStorage, 1<<16) + contentAddressableStorageServer := grpcservers.NewContentAddressableStorageServer(contentAddressableStorage, nil, 4<<20) response, err := contentAddressableStorageServer.BatchReadBlobs(ctx, request) require.NoError(t, err) @@ -108,8 +108,138 @@ func TestContentAddressableStorageServerBatchReadBlobsFailure(t *testing.T) { contentAddressableStorage := mock.NewMockBlobAccess(ctrl) - contentAddressableStorageServer := grpcservers.NewContentAddressableStorageServer(contentAddressableStorage, 200) + contentAddressableStorageServer := grpcservers.NewContentAddressableStorageServer(contentAddressableStorage, nil, 200) _, err := contentAddressableStorageServer.BatchReadBlobs(ctx, request) testutil.RequireEqualStatus(t, status.Error(codes.InvalidArgument, "Attempted to read a total of at least 357 bytes, while a maximum of 200 bytes is permitted"), err) } + +func TestContentAddressableStorageServerFindMissingBlobs(t *testing.T) { + ctrl, ctx := gomock.WithContext(context.Background(), t) + + digest1 := digest.MustNewDigest("my_instance_name", remoteexecution.DigestFunction_SHA256, "409a7f83ac6b31dc8c77e3ec18038f209bd2f545e0f4177c2e2381aa4e067b49", 16) + digest2 := digest.MustNewDigest("my_instance_name", remoteexecution.DigestFunction_SHA256, "0479688f99e8cbc70291ce272876ff8e0db71a0889daf2752884b0996056b4a0", 256) + + request := &remoteexecution.FindMissingBlobsRequest{ + InstanceName: "my_instance_name", + BlobDigests: []*remoteexecution.Digest{ + {Hash: digest1.GetHashString(), SizeBytes: digest1.GetSizeBytes()}, + {Hash: digest2.GetHashString(), SizeBytes: digest2.GetSizeBytes()}, + }, + } + + contentAddressableStorage := mock.NewMockBlobAccess(ctrl) + chunkListStorage := mock.NewMockBlobAccess(ctrl) + setBuilder := digest.NewSetBuilder(2) + digestSet := setBuilder.Add(digest1).Add(digest2).Build() + + // Missing chunk lists is not an error, nor does it imply a missing + // blob at this stage. + contentAddressableStorage.EXPECT().FindMissing(ctx, digestSet).Return(digest.EmptySet, nil) + chunkListStorage.EXPECT().GetCapabilities(ctx, digest1.GetInstanceName()).Return( + &remoteexecution.ServerCapabilities{ + CacheCapabilities: &remoteexecution.CacheCapabilities{ + SplitBlobSupport: true, + SpliceBlobSupport: true, + RepMaxCdcParams: &remoteexecution.RepMaxCdcParams{ + MinChunkSizeBytes: 64, + HorizonSizeBytes: 128, + }, + }, + }, + nil, + ) + chunkListStorage.EXPECT().FindMissing(ctx, digest2.ToSingletonSet()).Return(digest2.ToSingletonSet(), nil) + + contentAddressableStorageServer := grpcservers.NewContentAddressableStorageServer(contentAddressableStorage, chunkListStorage, 200) + + response, err := contentAddressableStorageServer.FindMissingBlobs(ctx, request) + require.NoError(t, err) + require.Empty(t, response.GetMissingBlobDigests()) +} + +func TestContentAddressableStorageServerSplitBlob(t *testing.T) { + ctrl, ctx := gomock.WithContext(context.Background(), t) + + request := &remoteexecution.SplitBlobRequest{ + BlobDigest: &remoteexecution.Digest{ + Hash: "409a7f83ac6b31dc8c77e3ec18038f209bd2f545e0f4177c2e2381aa4e067b49", + SizeBytes: 16, + }, + InstanceName: "my_instance_name", + DigestFunction: remoteexecution.DigestFunction_SHA256, + } + + contentAddressableStorage := mock.NewMockBlobAccess(ctrl) + chunkListStorage := mock.NewMockBlobAccess(ctrl) + + instanceName, err := digest.NewInstanceName(request.InstanceName) + require.NoError(t, err) + digestFunction, err := instanceName.GetDigestFunction(request.DigestFunction, len(request.BlobDigest.Hash)) + require.NoError(t, err) + blobDigest, err := digestFunction.NewDigestFromProto(request.BlobDigest) + require.NoError(t, err) + + chunkListStorage.EXPECT().Get(ctx, blobDigest).Return( + buffer.NewProtoBufferFromProto( + &remoteexecution.SplitBlobResponse{ + ChunkDigests: []*remoteexecution.Digest{ + { + Hash: "409a7f83ac6b31dc8c77e3ec18038f209bd2f545e0f4177c2e2381aa4e067b49", + SizeBytes: 8, + }, + { + Hash: "409a7f83ac6b31dc8c77e3ec18038f209bd2f545e0f4177c2e2381aa4e067b49", + SizeBytes: 8, + }, + }, + }, + buffer.UserProvided, + ), + ) + + contentAddressableStorageServer := grpcservers.NewContentAddressableStorageServer(contentAddressableStorage, chunkListStorage, 200) + _, err = contentAddressableStorageServer.SplitBlob(ctx, request) + require.NoError(t, err) +} + +func TestContentAddressableStorageServerSpliceBlob(t *testing.T) { + ctrl, ctx := gomock.WithContext(context.Background(), t) + + request := &remoteexecution.SpliceBlobRequest{ + BlobDigest: &remoteexecution.Digest{ + Hash: "409a7f83ac6b31dc8c77e3ec18038f209bd2f545e0f4177c2e2381aa4e067b49", + SizeBytes: 16, + }, + ChunkDigests: []*remoteexecution.Digest{ + { + Hash: "409a7f83ac6b31dc8c77e3ec18038f209bd2f545e0f4177c2e2381aa4e067b49", + SizeBytes: 8, + }, + { + Hash: "409a7f83ac6b31dc8c77e3ec18038f209bd2f545e0f4177c2e2381aa4e067b49", + SizeBytes: 8, + }, + }, + InstanceName: "my_instance_name", + } + + contentAddressableStorage := mock.NewMockBlobAccess(ctrl) + chunkListStorage := mock.NewMockBlobAccess(ctrl) + + instanceName, err := digest.NewInstanceName(request.InstanceName) + require.NoError(t, err) + digestFunction, err := instanceName.GetDigestFunction(request.DigestFunction, len(request.BlobDigest.Hash)) + require.NoError(t, err) + blobDigest, err := digestFunction.NewDigestFromProto(request.BlobDigest) + require.NoError(t, err) + + chunkListStorage.EXPECT().Put(ctx, blobDigest, buffer.NewProtoBufferFromProto(&remoteexecution.SplitBlobResponse{ + ChunkDigests: request.ChunkDigests, + }, buffer.UserProvided)).Return(nil) + + contentAddressableStorageServer := grpcservers.NewContentAddressableStorageServer(contentAddressableStorage, chunkListStorage, 200) + response, err := contentAddressableStorageServer.SpliceBlob(ctx, request) + require.NoError(t, err) + require.Equal(t, request.BlobDigest, response.BlobDigest) +} diff --git a/pkg/proto/configuration/bb_storage/bb_storage.pb.go b/pkg/proto/configuration/bb_storage/bb_storage.pb.go index fa4fb021e..fb4668e07 100644 --- a/pkg/proto/configuration/bb_storage/bb_storage.pb.go +++ b/pkg/proto/configuration/bb_storage/bb_storage.pb.go @@ -35,6 +35,7 @@ type ApplicationConfiguration struct { MaximumMessageSizeBytes int64 `protobuf:"varint,8,opt,name=maximum_message_size_bytes,json=maximumMessageSizeBytes,proto3" json:"maximum_message_size_bytes,omitempty"` Global *global.Configuration `protobuf:"bytes,9,opt,name=global,proto3" json:"global,omitempty"` ContentAddressableStorage *ScannableBlobAccessConfiguration `protobuf:"bytes,17,opt,name=content_addressable_storage,json=contentAddressableStorage,proto3" json:"content_addressable_storage,omitempty"` + ChunkListStorage *ScannableBlobAccessConfiguration `protobuf:"bytes,22,opt,name=chunk_list_storage,json=chunkListStorage,proto3" json:"chunk_list_storage,omitempty"` ActionCache *NonScannableBlobAccessConfiguration `protobuf:"bytes,18,opt,name=action_cache,json=actionCache,proto3" json:"action_cache,omitempty"` IndirectContentAddressableStorage *ScannableBlobAccessConfiguration `protobuf:"bytes,10,opt,name=indirect_content_addressable_storage,json=indirectContentAddressableStorage,proto3" json:"indirect_content_addressable_storage,omitempty"` InitialSizeClassCache *NonScannableBlobAccessConfiguration `protobuf:"bytes,11,opt,name=initial_size_class_cache,json=initialSizeClassCache,proto3" json:"initial_size_class_cache,omitempty"` @@ -111,6 +112,13 @@ func (x *ApplicationConfiguration) GetContentAddressableStorage() *ScannableBlob return nil } +func (x *ApplicationConfiguration) GetChunkListStorage() *ScannableBlobAccessConfiguration { + if x != nil { + return x.ChunkListStorage + } + return nil +} + func (x *ApplicationConfiguration) GetActionCache() *NonScannableBlobAccessConfiguration { if x != nil { return x.ActionCache @@ -292,7 +300,7 @@ var File_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_s const file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_rawDesc = "" + "\n" + - "Sgithub.com/buildbarn/bb-storage/pkg/proto/configuration/bb_storage/bb_storage.proto\x12\"buildbarn.configuration.bb_storage\x1a6build/bazel/remote/execution/v2/remote_execution.proto\x1aGgithub.com/buildbarn/bb-storage/pkg/proto/configuration/auth/auth.proto\x1aQgithub.com/buildbarn/bb-storage/pkg/proto/configuration/blobstore/blobstore.proto\x1aMgithub.com/buildbarn/bb-storage/pkg/proto/configuration/builder/builder.proto\x1aKgithub.com/buildbarn/bb-storage/pkg/proto/configuration/global/global.proto\x1aGgithub.com/buildbarn/bb-storage/pkg/proto/configuration/grpc/grpc.proto\x1aGgithub.com/buildbarn/bb-storage/pkg/proto/configuration/zstd/zstd.proto\"\xbd\v\n" + + "Sgithub.com/buildbarn/bb-storage/pkg/proto/configuration/bb_storage/bb_storage.proto\x12\"buildbarn.configuration.bb_storage\x1a6build/bazel/remote/execution/v2/remote_execution.proto\x1aGgithub.com/buildbarn/bb-storage/pkg/proto/configuration/auth/auth.proto\x1aQgithub.com/buildbarn/bb-storage/pkg/proto/configuration/blobstore/blobstore.proto\x1aMgithub.com/buildbarn/bb-storage/pkg/proto/configuration/builder/builder.proto\x1aKgithub.com/buildbarn/bb-storage/pkg/proto/configuration/global/global.proto\x1aGgithub.com/buildbarn/bb-storage/pkg/proto/configuration/grpc/grpc.proto\x1aGgithub.com/buildbarn/bb-storage/pkg/proto/configuration/zstd/zstd.proto\"\xb1\f\n" + "\x18ApplicationConfiguration\x12T\n" + "\fgrpc_servers\x18\x04 \x03(\v21.buildbarn.configuration.grpc.ServerConfigurationR\vgrpcServers\x12l\n" + "\n" + @@ -300,7 +308,8 @@ const file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb "schedulers\x12;\n" + "\x1amaximum_message_size_bytes\x18\b \x01(\x03R\x17maximumMessageSizeBytes\x12E\n" + "\x06global\x18\t \x01(\v2-.buildbarn.configuration.global.ConfigurationR\x06global\x12\x84\x01\n" + - "\x1bcontent_addressable_storage\x18\x11 \x01(\v2D.buildbarn.configuration.bb_storage.ScannableBlobAccessConfigurationR\x19contentAddressableStorage\x12j\n" + + "\x1bcontent_addressable_storage\x18\x11 \x01(\v2D.buildbarn.configuration.bb_storage.ScannableBlobAccessConfigurationR\x19contentAddressableStorage\x12r\n" + + "\x12chunk_list_storage\x18\x16 \x01(\v2D.buildbarn.configuration.bb_storage.ScannableBlobAccessConfigurationR\x10chunkListStorage\x12j\n" + "\faction_cache\x18\x12 \x01(\v2G.buildbarn.configuration.bb_storage.NonScannableBlobAccessConfigurationR\vactionCache\x12\x95\x01\n" + "$indirect_content_addressable_storage\x18\n" + " \x01(\v2D.buildbarn.configuration.bb_storage.ScannableBlobAccessConfigurationR!indirectContentAddressableStorage\x12\x80\x01\n" + @@ -353,26 +362,27 @@ var file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_s 3, // 1: buildbarn.configuration.bb_storage.ApplicationConfiguration.schedulers:type_name -> buildbarn.configuration.bb_storage.ApplicationConfiguration.SchedulersEntry 5, // 2: buildbarn.configuration.bb_storage.ApplicationConfiguration.global:type_name -> buildbarn.configuration.global.Configuration 2, // 3: buildbarn.configuration.bb_storage.ApplicationConfiguration.content_addressable_storage:type_name -> buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration - 1, // 4: buildbarn.configuration.bb_storage.ApplicationConfiguration.action_cache:type_name -> buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration - 2, // 5: buildbarn.configuration.bb_storage.ApplicationConfiguration.indirect_content_addressable_storage:type_name -> buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration - 1, // 6: buildbarn.configuration.bb_storage.ApplicationConfiguration.initial_size_class_cache:type_name -> buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration - 1, // 7: buildbarn.configuration.bb_storage.ApplicationConfiguration.file_system_access_cache:type_name -> buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration - 6, // 8: buildbarn.configuration.bb_storage.ApplicationConfiguration.execute_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration - 7, // 9: buildbarn.configuration.bb_storage.ApplicationConfiguration.supported_compressors:type_name -> build.bazel.remote.execution.v2.Compressor.Value - 8, // 10: buildbarn.configuration.bb_storage.ApplicationConfiguration.zstd_pool:type_name -> buildbarn.configuration.zstd.PoolConfiguration - 9, // 11: buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 6, // 12: buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration.get_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration - 6, // 13: buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration.put_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration - 9, // 14: buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 6, // 15: buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration.get_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration - 6, // 16: buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration.put_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration - 6, // 17: buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration.find_missing_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration - 10, // 18: buildbarn.configuration.bb_storage.ApplicationConfiguration.SchedulersEntry.value:type_name -> buildbarn.configuration.builder.SchedulerConfiguration - 19, // [19:19] is the sub-list for method output_type - 19, // [19:19] is the sub-list for method input_type - 19, // [19:19] is the sub-list for extension type_name - 19, // [19:19] is the sub-list for extension extendee - 0, // [0:19] is the sub-list for field type_name + 2, // 4: buildbarn.configuration.bb_storage.ApplicationConfiguration.chunk_list_storage:type_name -> buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration + 1, // 5: buildbarn.configuration.bb_storage.ApplicationConfiguration.action_cache:type_name -> buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration + 2, // 6: buildbarn.configuration.bb_storage.ApplicationConfiguration.indirect_content_addressable_storage:type_name -> buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration + 1, // 7: buildbarn.configuration.bb_storage.ApplicationConfiguration.initial_size_class_cache:type_name -> buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration + 1, // 8: buildbarn.configuration.bb_storage.ApplicationConfiguration.file_system_access_cache:type_name -> buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration + 6, // 9: buildbarn.configuration.bb_storage.ApplicationConfiguration.execute_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration + 7, // 10: buildbarn.configuration.bb_storage.ApplicationConfiguration.supported_compressors:type_name -> build.bazel.remote.execution.v2.Compressor.Value + 8, // 11: buildbarn.configuration.bb_storage.ApplicationConfiguration.zstd_pool:type_name -> buildbarn.configuration.zstd.PoolConfiguration + 9, // 12: buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 6, // 13: buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration.get_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration + 6, // 14: buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration.put_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration + 9, // 15: buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 6, // 16: buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration.get_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration + 6, // 17: buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration.put_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration + 6, // 18: buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration.find_missing_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration + 10, // 19: buildbarn.configuration.bb_storage.ApplicationConfiguration.SchedulersEntry.value:type_name -> buildbarn.configuration.builder.SchedulerConfiguration + 20, // [20:20] is the sub-list for method output_type + 20, // [20:20] is the sub-list for method input_type + 20, // [20:20] is the sub-list for extension type_name + 20, // [20:20] is the sub-list for extension extendee + 0, // [0:20] is the sub-list for field type_name } func init() { diff --git a/pkg/proto/configuration/bb_storage/bb_storage.proto b/pkg/proto/configuration/bb_storage/bb_storage.proto index 365c75527..55f431b9b 100644 --- a/pkg/proto/configuration/bb_storage/bb_storage.proto +++ b/pkg/proto/configuration/bb_storage/bb_storage.proto @@ -61,6 +61,10 @@ message ApplicationConfiguration { // Storage (CAS). ScannableBlobAccessConfiguration content_addressable_storage = 17; + // Optional: Blobstore configuration for the Content List Storage + // (CLS). + ScannableBlobAccessConfiguration chunk_list_storage = 22; + // Optional: Blobstore configuration for the Action Cache (AC). NonScannableBlobAccessConfiguration action_cache = 18; diff --git a/pkg/proto/configuration/blobstore/BUILD.bazel b/pkg/proto/configuration/blobstore/BUILD.bazel index a64c36145..83d0b07cc 100644 --- a/pkg/proto/configuration/blobstore/BUILD.bazel +++ b/pkg/proto/configuration/blobstore/BUILD.bazel @@ -14,6 +14,7 @@ proto_library( "//pkg/proto/configuration/digest:digest_proto", "//pkg/proto/configuration/grpc:grpc_proto", "//pkg/proto/configuration/http/client:client_proto", + "@bazel_remote_apis//build/bazel/remote/execution/v2:remote_execution_proto", "@googleapis//google/rpc:status_proto", "@protobuf//:duration_proto", "@protobuf//:empty_proto", @@ -33,6 +34,7 @@ go_proto_library( "//pkg/proto/configuration/digest", "//pkg/proto/configuration/grpc", "//pkg/proto/configuration/http/client", + "@bazel_remote_apis//build/bazel/remote/execution/v2:remote_execution_go_proto", "@org_golang_google_genproto_googleapis_rpc//status", ], ) diff --git a/pkg/proto/configuration/blobstore/blobstore.pb.go b/pkg/proto/configuration/blobstore/blobstore.pb.go index dc3f8a4e0..237805e13 100644 --- a/pkg/proto/configuration/blobstore/blobstore.pb.go +++ b/pkg/proto/configuration/blobstore/blobstore.pb.go @@ -7,6 +7,7 @@ package blobstore import ( + v2 "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" blockdevice "github.com/buildbarn/bb-storage/pkg/proto/configuration/blockdevice" aws "github.com/buildbarn/bb-storage/pkg/proto/configuration/cloud/aws" gcp "github.com/buildbarn/bb-storage/pkg/proto/configuration/cloud/gcp" @@ -106,6 +107,7 @@ type BlobAccessConfiguration struct { // *BlobAccessConfiguration_WithLabels // *BlobAccessConfiguration_Label // *BlobAccessConfiguration_DeadlineEnforcing + // *BlobAccessConfiguration_ChunkListValidating Backend isBlobAccessConfiguration_Backend `protobuf_oneof:"backend"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache @@ -319,6 +321,15 @@ func (x *BlobAccessConfiguration) GetDeadlineEnforcing() *DeadlineEnforcingBlobA return nil } +func (x *BlobAccessConfiguration) GetChunkListValidating() *ChunkListValidatingBlobAccessConfiguration { + if x != nil { + if x, ok := x.Backend.(*BlobAccessConfiguration_ChunkListValidating); ok { + return x.ChunkListValidating + } + } + return nil +} + type isBlobAccessConfiguration_Backend interface { isBlobAccessConfiguration_Backend() } @@ -399,6 +410,10 @@ type BlobAccessConfiguration_DeadlineEnforcing struct { DeadlineEnforcing *DeadlineEnforcingBlobAccess `protobuf:"bytes,28,opt,name=deadline_enforcing,json=deadlineEnforcing,proto3,oneof"` } +type BlobAccessConfiguration_ChunkListValidating struct { + ChunkListValidating *ChunkListValidatingBlobAccessConfiguration `protobuf:"bytes,29,opt,name=chunk_list_validating,json=chunkListValidating,proto3,oneof"` +} + func (*BlobAccessConfiguration_ReadCaching) isBlobAccessConfiguration_Backend() {} func (*BlobAccessConfiguration_Grpc) isBlobAccessConfiguration_Backend() {} @@ -437,6 +452,8 @@ func (*BlobAccessConfiguration_Label) isBlobAccessConfiguration_Backend() {} func (*BlobAccessConfiguration_DeadlineEnforcing) isBlobAccessConfiguration_Backend() {} +func (*BlobAccessConfiguration_ChunkListValidating) isBlobAccessConfiguration_Backend() {} + type ReadCachingBlobAccessConfiguration struct { state protoimpl.MessageState `protogen:"open.v1"` Slow *BlobAccessConfiguration `protobuf:"bytes,1,opt,name=slow,proto3" json:"slow,omitempty"` @@ -628,6 +645,7 @@ type LocalBlobAccessConfiguration struct { BlocksBackend isLocalBlobAccessConfiguration_BlocksBackend `protobuf_oneof:"blocks_backend"` Persistent *LocalBlobAccessConfiguration_Persistent `protobuf:"bytes,13,opt,name=persistent,proto3" json:"persistent,omitempty"` HierarchicalInstanceNames bool `protobuf:"varint,14,opt,name=hierarchical_instance_names,json=hierarchicalInstanceNames,proto3" json:"hierarchical_instance_names,omitempty"` + ChunkingParameters *v2.RepMaxCdcParams `protobuf:"bytes,15,opt,name=chunking_parameters,json=chunkingParameters,proto3" json:"chunking_parameters,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -761,6 +779,13 @@ func (x *LocalBlobAccessConfiguration) GetHierarchicalInstanceNames() bool { return false } +func (x *LocalBlobAccessConfiguration) GetChunkingParameters() *v2.RepMaxCdcParams { + if x != nil { + return x.ChunkingParameters + } + return nil +} + type isLocalBlobAccessConfiguration_KeyLocationMapBackend interface { isLocalBlobAccessConfiguration_KeyLocationMapBackend() } @@ -1726,6 +1751,50 @@ func (x *GrpcBlobAccessConfiguration) GetEnableCompression() bool { return false } +type ChunkListValidatingBlobAccessConfiguration struct { + state protoimpl.MessageState `protogen:"open.v1"` + Backend *BlobAccessConfiguration `protobuf:"bytes,1,opt,name=backend,proto3" json:"backend,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ChunkListValidatingBlobAccessConfiguration) Reset() { + *x = ChunkListValidatingBlobAccessConfiguration{} + mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[21] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ChunkListValidatingBlobAccessConfiguration) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ChunkListValidatingBlobAccessConfiguration) ProtoMessage() {} + +func (x *ChunkListValidatingBlobAccessConfiguration) ProtoReflect() protoreflect.Message { + mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[21] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ChunkListValidatingBlobAccessConfiguration.ProtoReflect.Descriptor instead. +func (*ChunkListValidatingBlobAccessConfiguration) Descriptor() ([]byte, []int) { + return file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_rawDescGZIP(), []int{21} +} + +func (x *ChunkListValidatingBlobAccessConfiguration) GetBackend() *BlobAccessConfiguration { + if x != nil { + return x.Backend + } + return nil +} + type ShardingBlobAccessConfiguration_Shard struct { state protoimpl.MessageState `protogen:"open.v1"` Backend *BlobAccessConfiguration `protobuf:"bytes,1,opt,name=backend,proto3" json:"backend,omitempty"` @@ -1736,7 +1805,7 @@ type ShardingBlobAccessConfiguration_Shard struct { func (x *ShardingBlobAccessConfiguration_Shard) Reset() { *x = ShardingBlobAccessConfiguration_Shard{} - mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[21] + mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[22] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1748,7 +1817,7 @@ func (x *ShardingBlobAccessConfiguration_Shard) String() string { func (*ShardingBlobAccessConfiguration_Shard) ProtoMessage() {} func (x *ShardingBlobAccessConfiguration_Shard) ProtoReflect() protoreflect.Message { - mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[21] + mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[22] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1787,7 +1856,7 @@ type LocalBlobAccessConfiguration_KeyLocationMapInMemory struct { func (x *LocalBlobAccessConfiguration_KeyLocationMapInMemory) Reset() { *x = LocalBlobAccessConfiguration_KeyLocationMapInMemory{} - mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[23] + mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[24] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1799,7 +1868,7 @@ func (x *LocalBlobAccessConfiguration_KeyLocationMapInMemory) String() string { func (*LocalBlobAccessConfiguration_KeyLocationMapInMemory) ProtoMessage() {} func (x *LocalBlobAccessConfiguration_KeyLocationMapInMemory) ProtoReflect() protoreflect.Message { - mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[23] + mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[24] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1831,7 +1900,7 @@ type LocalBlobAccessConfiguration_BlocksInMemory struct { func (x *LocalBlobAccessConfiguration_BlocksInMemory) Reset() { *x = LocalBlobAccessConfiguration_BlocksInMemory{} - mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[24] + mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[25] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1843,7 +1912,7 @@ func (x *LocalBlobAccessConfiguration_BlocksInMemory) String() string { func (*LocalBlobAccessConfiguration_BlocksInMemory) ProtoMessage() {} func (x *LocalBlobAccessConfiguration_BlocksInMemory) ProtoReflect() protoreflect.Message { - mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[24] + mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[25] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1877,7 +1946,7 @@ type LocalBlobAccessConfiguration_BlocksOnBlockDevice struct { func (x *LocalBlobAccessConfiguration_BlocksOnBlockDevice) Reset() { *x = LocalBlobAccessConfiguration_BlocksOnBlockDevice{} - mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[25] + mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[26] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1889,7 +1958,7 @@ func (x *LocalBlobAccessConfiguration_BlocksOnBlockDevice) String() string { func (*LocalBlobAccessConfiguration_BlocksOnBlockDevice) ProtoMessage() {} func (x *LocalBlobAccessConfiguration_BlocksOnBlockDevice) ProtoReflect() protoreflect.Message { - mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[25] + mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[26] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1936,7 +2005,7 @@ type LocalBlobAccessConfiguration_Persistent struct { func (x *LocalBlobAccessConfiguration_Persistent) Reset() { *x = LocalBlobAccessConfiguration_Persistent{} - mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[26] + mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[27] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1948,7 +2017,7 @@ func (x *LocalBlobAccessConfiguration_Persistent) String() string { func (*LocalBlobAccessConfiguration_Persistent) ProtoMessage() {} func (x *LocalBlobAccessConfiguration_Persistent) ProtoReflect() protoreflect.Message { - mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[26] + mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[27] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1982,10 +2051,10 @@ var File_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobs const file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_rawDesc = "" + "\n" + - "Qgithub.com/buildbarn/bb-storage/pkg/proto/configuration/blobstore/blobstore.proto\x12!buildbarn.configuration.blobstore\x1aUgithub.com/buildbarn/bb-storage/pkg/proto/configuration/blockdevice/blockdevice.proto\x1aKgithub.com/buildbarn/bb-storage/pkg/proto/configuration/cloud/aws/aws.proto\x1aKgithub.com/buildbarn/bb-storage/pkg/proto/configuration/cloud/gcp/gcp.proto\x1aKgithub.com/buildbarn/bb-storage/pkg/proto/configuration/digest/digest.proto\x1aGgithub.com/buildbarn/bb-storage/pkg/proto/configuration/grpc/grpc.proto\x1aPgithub.com/buildbarn/bb-storage/pkg/proto/configuration/http/client/client.proto\x1a\x1egoogle/protobuf/duration.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x17google/rpc/status.proto\"\xf3\x01\n" + + "Qgithub.com/buildbarn/bb-storage/pkg/proto/configuration/blobstore/blobstore.proto\x12!buildbarn.configuration.blobstore\x1a6build/bazel/remote/execution/v2/remote_execution.proto\x1aUgithub.com/buildbarn/bb-storage/pkg/proto/configuration/blockdevice/blockdevice.proto\x1aKgithub.com/buildbarn/bb-storage/pkg/proto/configuration/cloud/aws/aws.proto\x1aKgithub.com/buildbarn/bb-storage/pkg/proto/configuration/cloud/gcp/gcp.proto\x1aKgithub.com/buildbarn/bb-storage/pkg/proto/configuration/digest/digest.proto\x1aGgithub.com/buildbarn/bb-storage/pkg/proto/configuration/grpc/grpc.proto\x1aPgithub.com/buildbarn/bb-storage/pkg/proto/configuration/http/client/client.proto\x1a\x1egoogle/protobuf/duration.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x17google/rpc/status.proto\"\xf3\x01\n" + "\x16BlobstoreConfiguration\x12z\n" + "\x1bcontent_addressable_storage\x18\x01 \x01(\v2:.buildbarn.configuration.blobstore.BlobAccessConfigurationR\x19contentAddressableStorage\x12]\n" + - "\faction_cache\x18\x02 \x01(\v2:.buildbarn.configuration.blobstore.BlobAccessConfigurationR\vactionCache\"\xe3\x0f\n" + + "\faction_cache\x18\x02 \x01(\v2:.buildbarn.configuration.blobstore.BlobAccessConfigurationR\vactionCache\"\xe9\x10\n" + "\x17BlobAccessConfiguration\x12j\n" + "\fread_caching\x18\x04 \x01(\v2E.buildbarn.configuration.blobstore.ReadCachingBlobAccessConfigurationH\x00R\vreadCaching\x12T\n" + "\x04grpc\x18\a \x01(\v2>.buildbarn.configuration.blobstore.GrpcBlobAccessConfigurationH\x00R\x04grpc\x12*\n" + @@ -2008,7 +2077,8 @@ const file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blo "\vwith_labels\x18\x1a \x01(\v2D.buildbarn.configuration.blobstore.WithLabelsBlobAccessConfigurationH\x00R\n" + "withLabels\x12\x16\n" + "\x05label\x18\x1b \x01(\tH\x00R\x05label\x12o\n" + - "\x12deadline_enforcing\x18\x1c \x01(\v2>.buildbarn.configuration.blobstore.DeadlineEnforcingBlobAccessH\x00R\x11deadlineEnforcingB\t\n" + + "\x12deadline_enforcing\x18\x1c \x01(\v2>.buildbarn.configuration.blobstore.DeadlineEnforcingBlobAccessH\x00R\x11deadlineEnforcing\x12\x83\x01\n" + + "\x15chunk_list_validating\x18\x1d \x01(\v2M.buildbarn.configuration.blobstore.ChunkListValidatingBlobAccessConfigurationH\x00R\x13chunkListValidatingB\t\n" + "\abackendJ\x04\b\x02\x10\x03J\x04\b\x03\x10\x04J\x04\b\x05\x10\x06J\x04\b\x06\x10\aJ\x04\b\n" + "\x10\v\"\xa4\x02\n" + "\"ReadCachingBlobAccessConfiguration\x12N\n" + @@ -2029,7 +2099,7 @@ const file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blo "\tbackend_a\x18\x01 \x01(\v2:.buildbarn.configuration.blobstore.BlobAccessConfigurationR\bbackendA\x12W\n" + "\tbackend_b\x18\x02 \x01(\v2:.buildbarn.configuration.blobstore.BlobAccessConfigurationR\bbackendB\x12i\n" + "\x11replicator_a_to_b\x18\x03 \x01(\v2>.buildbarn.configuration.blobstore.BlobReplicatorConfigurationR\x0ereplicatorAToB\x12i\n" + - "\x11replicator_b_to_a\x18\x04 \x01(\v2>.buildbarn.configuration.blobstore.BlobReplicatorConfigurationR\x0ereplicatorBToA\"\xb6\f\n" + + "\x11replicator_b_to_a\x18\x04 \x01(\v2>.buildbarn.configuration.blobstore.BlobReplicatorConfigurationR\x0ereplicatorBToA\"\x99\r\n" + "\x1cLocalBlobAccessConfiguration\x12\x94\x01\n" + "\x1akey_location_map_in_memory\x18\v \x01(\v2V.buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.KeyLocationMapInMemoryH\x00R\x16keyLocationMapInMemory\x12{\n" + " key_location_map_on_block_device\x18\f \x01(\v22.buildbarn.configuration.blockdevice.ConfigurationH\x00R\x1bkeyLocationMapOnBlockDevice\x12O\n" + @@ -2046,7 +2116,8 @@ const file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blo "\n" + "persistent\x18\r \x01(\v2J.buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.PersistentR\n" + "persistent\x12>\n" + - "\x1bhierarchical_instance_names\x18\x0e \x01(\bR\x19hierarchicalInstanceNames\x1a2\n" + + "\x1bhierarchical_instance_names\x18\x0e \x01(\bR\x19hierarchicalInstanceNames\x12a\n" + + "\x13chunking_parameters\x18\x0f \x01(\v20.build.bazel.remote.execution.v2.RepMaxCdcParamsR\x12chunkingParameters\x1a2\n" + "\x16KeyLocationMapInMemory\x12\x18\n" + "\aentries\x18\x01 \x01(\x03R\aentries\x1a:\n" + "\x0eBlocksInMemory\x12(\n" + @@ -2127,7 +2198,9 @@ const file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blo "\abackend\x18\x02 \x01(\v2:.buildbarn.configuration.blobstore.BlobAccessConfigurationR\abackend\"\x97\x01\n" + "\x1bGrpcBlobAccessConfiguration\x12I\n" + "\x06client\x18\x01 \x01(\v21.buildbarn.configuration.grpc.ClientConfigurationR\x06client\x12-\n" + - "\x12enable_compression\x18\x02 \x01(\bR\x11enableCompressionBCZAgithub.com/buildbarn/bb-storage/pkg/proto/configuration/blobstoreb\x06proto3" + "\x12enable_compression\x18\x02 \x01(\bR\x11enableCompression\"\x82\x01\n" + + "*ChunkListValidatingBlobAccessConfiguration\x12T\n" + + "\abackend\x18\x01 \x01(\v2:.buildbarn.configuration.blobstore.BlobAccessConfigurationR\abackendBCZAgithub.com/buildbarn/bb-storage/pkg/proto/configuration/blobstoreb\x06proto3" var ( file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_rawDescOnce sync.Once @@ -2141,7 +2214,7 @@ func file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blob return file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_rawDescData } -var file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes = make([]protoimpl.MessageInfo, 29) +var file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes = make([]protoimpl.MessageInfo, 30) var file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_goTypes = []any{ (*BlobstoreConfiguration)(nil), // 0: buildbarn.configuration.blobstore.BlobstoreConfiguration (*BlobAccessConfiguration)(nil), // 1: buildbarn.configuration.blobstore.BlobAccessConfiguration @@ -2164,31 +2237,33 @@ var file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobs (*WithLabelsBlobAccessConfiguration)(nil), // 18: buildbarn.configuration.blobstore.WithLabelsBlobAccessConfiguration (*DeadlineEnforcingBlobAccess)(nil), // 19: buildbarn.configuration.blobstore.DeadlineEnforcingBlobAccess (*GrpcBlobAccessConfiguration)(nil), // 20: buildbarn.configuration.blobstore.GrpcBlobAccessConfiguration - (*ShardingBlobAccessConfiguration_Shard)(nil), // 21: buildbarn.configuration.blobstore.ShardingBlobAccessConfiguration.Shard - nil, // 22: buildbarn.configuration.blobstore.ShardingBlobAccessConfiguration.ShardsEntry - (*LocalBlobAccessConfiguration_KeyLocationMapInMemory)(nil), // 23: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.KeyLocationMapInMemory - (*LocalBlobAccessConfiguration_BlocksInMemory)(nil), // 24: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.BlocksInMemory - (*LocalBlobAccessConfiguration_BlocksOnBlockDevice)(nil), // 25: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.BlocksOnBlockDevice - (*LocalBlobAccessConfiguration_Persistent)(nil), // 26: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.Persistent - nil, // 27: buildbarn.configuration.blobstore.DemultiplexingBlobAccessConfiguration.InstanceNamePrefixesEntry - nil, // 28: buildbarn.configuration.blobstore.WithLabelsBlobAccessConfiguration.LabelsEntry - (*status.Status)(nil), // 29: google.rpc.Status - (*blockdevice.Configuration)(nil), // 30: buildbarn.configuration.blockdevice.Configuration - (*digest.ExistenceCacheConfiguration)(nil), // 31: buildbarn.configuration.digest.ExistenceCacheConfiguration - (*aws.SessionConfiguration)(nil), // 32: buildbarn.configuration.cloud.aws.SessionConfiguration - (*client.Configuration)(nil), // 33: buildbarn.configuration.http.client.Configuration - (*gcp.ClientOptionsConfiguration)(nil), // 34: buildbarn.configuration.cloud.gcp.ClientOptionsConfiguration - (*emptypb.Empty)(nil), // 35: google.protobuf.Empty - (*grpc.ClientConfiguration)(nil), // 36: buildbarn.configuration.grpc.ClientConfiguration - (*durationpb.Duration)(nil), // 37: google.protobuf.Duration - (*timestamppb.Timestamp)(nil), // 38: google.protobuf.Timestamp + (*ChunkListValidatingBlobAccessConfiguration)(nil), // 21: buildbarn.configuration.blobstore.ChunkListValidatingBlobAccessConfiguration + (*ShardingBlobAccessConfiguration_Shard)(nil), // 22: buildbarn.configuration.blobstore.ShardingBlobAccessConfiguration.Shard + nil, // 23: buildbarn.configuration.blobstore.ShardingBlobAccessConfiguration.ShardsEntry + (*LocalBlobAccessConfiguration_KeyLocationMapInMemory)(nil), // 24: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.KeyLocationMapInMemory + (*LocalBlobAccessConfiguration_BlocksInMemory)(nil), // 25: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.BlocksInMemory + (*LocalBlobAccessConfiguration_BlocksOnBlockDevice)(nil), // 26: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.BlocksOnBlockDevice + (*LocalBlobAccessConfiguration_Persistent)(nil), // 27: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.Persistent + nil, // 28: buildbarn.configuration.blobstore.DemultiplexingBlobAccessConfiguration.InstanceNamePrefixesEntry + nil, // 29: buildbarn.configuration.blobstore.WithLabelsBlobAccessConfiguration.LabelsEntry + (*status.Status)(nil), // 30: google.rpc.Status + (*blockdevice.Configuration)(nil), // 31: buildbarn.configuration.blockdevice.Configuration + (*v2.RepMaxCdcParams)(nil), // 32: build.bazel.remote.execution.v2.RepMaxCdcParams + (*digest.ExistenceCacheConfiguration)(nil), // 33: buildbarn.configuration.digest.ExistenceCacheConfiguration + (*aws.SessionConfiguration)(nil), // 34: buildbarn.configuration.cloud.aws.SessionConfiguration + (*client.Configuration)(nil), // 35: buildbarn.configuration.http.client.Configuration + (*gcp.ClientOptionsConfiguration)(nil), // 36: buildbarn.configuration.cloud.gcp.ClientOptionsConfiguration + (*emptypb.Empty)(nil), // 37: google.protobuf.Empty + (*grpc.ClientConfiguration)(nil), // 38: buildbarn.configuration.grpc.ClientConfiguration + (*durationpb.Duration)(nil), // 39: google.protobuf.Duration + (*timestamppb.Timestamp)(nil), // 40: google.protobuf.Timestamp } var file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_depIdxs = []int32{ 1, // 0: buildbarn.configuration.blobstore.BlobstoreConfiguration.content_addressable_storage:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration 1, // 1: buildbarn.configuration.blobstore.BlobstoreConfiguration.action_cache:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration 2, // 2: buildbarn.configuration.blobstore.BlobAccessConfiguration.read_caching:type_name -> buildbarn.configuration.blobstore.ReadCachingBlobAccessConfiguration 20, // 3: buildbarn.configuration.blobstore.BlobAccessConfiguration.grpc:type_name -> buildbarn.configuration.blobstore.GrpcBlobAccessConfiguration - 29, // 4: buildbarn.configuration.blobstore.BlobAccessConfiguration.error:type_name -> google.rpc.Status + 30, // 4: buildbarn.configuration.blobstore.BlobAccessConfiguration.error:type_name -> google.rpc.Status 3, // 5: buildbarn.configuration.blobstore.BlobAccessConfiguration.sharding:type_name -> buildbarn.configuration.blobstore.ShardingBlobAccessConfiguration 4, // 6: buildbarn.configuration.blobstore.BlobAccessConfiguration.mirrored:type_name -> buildbarn.configuration.blobstore.MirroredBlobAccessConfiguration 5, // 7: buildbarn.configuration.blobstore.BlobAccessConfiguration.local:type_name -> buildbarn.configuration.blobstore.LocalBlobAccessConfiguration @@ -2204,66 +2279,69 @@ var file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobs 17, // 17: buildbarn.configuration.blobstore.BlobAccessConfiguration.zip_writing:type_name -> buildbarn.configuration.blobstore.ZIPBlobAccessConfiguration 18, // 18: buildbarn.configuration.blobstore.BlobAccessConfiguration.with_labels:type_name -> buildbarn.configuration.blobstore.WithLabelsBlobAccessConfiguration 19, // 19: buildbarn.configuration.blobstore.BlobAccessConfiguration.deadline_enforcing:type_name -> buildbarn.configuration.blobstore.DeadlineEnforcingBlobAccess - 1, // 20: buildbarn.configuration.blobstore.ReadCachingBlobAccessConfiguration.slow:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 1, // 21: buildbarn.configuration.blobstore.ReadCachingBlobAccessConfiguration.fast:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 10, // 22: buildbarn.configuration.blobstore.ReadCachingBlobAccessConfiguration.replicator:type_name -> buildbarn.configuration.blobstore.BlobReplicatorConfiguration - 22, // 23: buildbarn.configuration.blobstore.ShardingBlobAccessConfiguration.shards:type_name -> buildbarn.configuration.blobstore.ShardingBlobAccessConfiguration.ShardsEntry - 1, // 24: buildbarn.configuration.blobstore.MirroredBlobAccessConfiguration.backend_a:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 1, // 25: buildbarn.configuration.blobstore.MirroredBlobAccessConfiguration.backend_b:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 10, // 26: buildbarn.configuration.blobstore.MirroredBlobAccessConfiguration.replicator_a_to_b:type_name -> buildbarn.configuration.blobstore.BlobReplicatorConfiguration - 10, // 27: buildbarn.configuration.blobstore.MirroredBlobAccessConfiguration.replicator_b_to_a:type_name -> buildbarn.configuration.blobstore.BlobReplicatorConfiguration - 23, // 28: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.key_location_map_in_memory:type_name -> buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.KeyLocationMapInMemory - 30, // 29: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.key_location_map_on_block_device:type_name -> buildbarn.configuration.blockdevice.Configuration - 24, // 30: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.blocks_in_memory:type_name -> buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.BlocksInMemory - 25, // 31: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.blocks_on_block_device:type_name -> buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.BlocksOnBlockDevice - 26, // 32: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.persistent:type_name -> buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.Persistent - 1, // 33: buildbarn.configuration.blobstore.ExistenceCachingBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 31, // 34: buildbarn.configuration.blobstore.ExistenceCachingBlobAccessConfiguration.existence_cache:type_name -> buildbarn.configuration.digest.ExistenceCacheConfiguration - 1, // 35: buildbarn.configuration.blobstore.CompletenessCheckingBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 1, // 36: buildbarn.configuration.blobstore.ReadFallbackBlobAccessConfiguration.primary:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 1, // 37: buildbarn.configuration.blobstore.ReadFallbackBlobAccessConfiguration.secondary:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 10, // 38: buildbarn.configuration.blobstore.ReadFallbackBlobAccessConfiguration.replicator:type_name -> buildbarn.configuration.blobstore.BlobReplicatorConfiguration - 1, // 39: buildbarn.configuration.blobstore.ReferenceExpandingBlobAccessConfiguration.indirect_content_addressable_storage:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 32, // 40: buildbarn.configuration.blobstore.ReferenceExpandingBlobAccessConfiguration.aws_session:type_name -> buildbarn.configuration.cloud.aws.SessionConfiguration - 33, // 41: buildbarn.configuration.blobstore.ReferenceExpandingBlobAccessConfiguration.http_client:type_name -> buildbarn.configuration.http.client.Configuration - 34, // 42: buildbarn.configuration.blobstore.ReferenceExpandingBlobAccessConfiguration.gcp_client_options:type_name -> buildbarn.configuration.cloud.gcp.ClientOptionsConfiguration - 1, // 43: buildbarn.configuration.blobstore.ReferenceExpandingBlobAccessConfiguration.content_addressable_storage:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 35, // 44: buildbarn.configuration.blobstore.BlobReplicatorConfiguration.local:type_name -> google.protobuf.Empty - 36, // 45: buildbarn.configuration.blobstore.BlobReplicatorConfiguration.remote:type_name -> buildbarn.configuration.grpc.ClientConfiguration - 11, // 46: buildbarn.configuration.blobstore.BlobReplicatorConfiguration.queued:type_name -> buildbarn.configuration.blobstore.QueuedBlobReplicatorConfiguration - 35, // 47: buildbarn.configuration.blobstore.BlobReplicatorConfiguration.noop:type_name -> google.protobuf.Empty - 10, // 48: buildbarn.configuration.blobstore.BlobReplicatorConfiguration.deduplicating:type_name -> buildbarn.configuration.blobstore.BlobReplicatorConfiguration - 12, // 49: buildbarn.configuration.blobstore.BlobReplicatorConfiguration.concurrency_limiting:type_name -> buildbarn.configuration.blobstore.ConcurrencyLimitingBlobReplicatorConfiguration - 10, // 50: buildbarn.configuration.blobstore.QueuedBlobReplicatorConfiguration.base:type_name -> buildbarn.configuration.blobstore.BlobReplicatorConfiguration - 31, // 51: buildbarn.configuration.blobstore.QueuedBlobReplicatorConfiguration.existence_cache:type_name -> buildbarn.configuration.digest.ExistenceCacheConfiguration - 10, // 52: buildbarn.configuration.blobstore.ConcurrencyLimitingBlobReplicatorConfiguration.base:type_name -> buildbarn.configuration.blobstore.BlobReplicatorConfiguration - 27, // 53: buildbarn.configuration.blobstore.DemultiplexingBlobAccessConfiguration.instance_name_prefixes:type_name -> buildbarn.configuration.blobstore.DemultiplexingBlobAccessConfiguration.InstanceNamePrefixesEntry - 1, // 54: buildbarn.configuration.blobstore.DemultiplexedBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 1, // 55: buildbarn.configuration.blobstore.ActionResultExpiringBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 37, // 56: buildbarn.configuration.blobstore.ActionResultExpiringBlobAccessConfiguration.minimum_validity:type_name -> google.protobuf.Duration - 37, // 57: buildbarn.configuration.blobstore.ActionResultExpiringBlobAccessConfiguration.maximum_validity_jitter:type_name -> google.protobuf.Duration - 38, // 58: buildbarn.configuration.blobstore.ActionResultExpiringBlobAccessConfiguration.minimum_timestamp:type_name -> google.protobuf.Timestamp - 1, // 59: buildbarn.configuration.blobstore.ReadCanaryingBlobAccessConfiguration.source:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 1, // 60: buildbarn.configuration.blobstore.ReadCanaryingBlobAccessConfiguration.replica:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 37, // 61: buildbarn.configuration.blobstore.ReadCanaryingBlobAccessConfiguration.maximum_cache_duration:type_name -> google.protobuf.Duration - 31, // 62: buildbarn.configuration.blobstore.ZIPBlobAccessConfiguration.data_integrity_validation_cache:type_name -> buildbarn.configuration.digest.ExistenceCacheConfiguration - 1, // 63: buildbarn.configuration.blobstore.WithLabelsBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 28, // 64: buildbarn.configuration.blobstore.WithLabelsBlobAccessConfiguration.labels:type_name -> buildbarn.configuration.blobstore.WithLabelsBlobAccessConfiguration.LabelsEntry - 37, // 65: buildbarn.configuration.blobstore.DeadlineEnforcingBlobAccess.timeout:type_name -> google.protobuf.Duration - 1, // 66: buildbarn.configuration.blobstore.DeadlineEnforcingBlobAccess.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 36, // 67: buildbarn.configuration.blobstore.GrpcBlobAccessConfiguration.client:type_name -> buildbarn.configuration.grpc.ClientConfiguration - 1, // 68: buildbarn.configuration.blobstore.ShardingBlobAccessConfiguration.Shard.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 21, // 69: buildbarn.configuration.blobstore.ShardingBlobAccessConfiguration.ShardsEntry.value:type_name -> buildbarn.configuration.blobstore.ShardingBlobAccessConfiguration.Shard - 30, // 70: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.BlocksOnBlockDevice.source:type_name -> buildbarn.configuration.blockdevice.Configuration - 31, // 71: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.BlocksOnBlockDevice.data_integrity_validation_cache:type_name -> buildbarn.configuration.digest.ExistenceCacheConfiguration - 37, // 72: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.Persistent.minimum_epoch_interval:type_name -> google.protobuf.Duration - 14, // 73: buildbarn.configuration.blobstore.DemultiplexingBlobAccessConfiguration.InstanceNamePrefixesEntry.value:type_name -> buildbarn.configuration.blobstore.DemultiplexedBlobAccessConfiguration - 1, // 74: buildbarn.configuration.blobstore.WithLabelsBlobAccessConfiguration.LabelsEntry.value:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 75, // [75:75] is the sub-list for method output_type - 75, // [75:75] is the sub-list for method input_type - 75, // [75:75] is the sub-list for extension type_name - 75, // [75:75] is the sub-list for extension extendee - 0, // [0:75] is the sub-list for field type_name + 21, // 20: buildbarn.configuration.blobstore.BlobAccessConfiguration.chunk_list_validating:type_name -> buildbarn.configuration.blobstore.ChunkListValidatingBlobAccessConfiguration + 1, // 21: buildbarn.configuration.blobstore.ReadCachingBlobAccessConfiguration.slow:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 1, // 22: buildbarn.configuration.blobstore.ReadCachingBlobAccessConfiguration.fast:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 10, // 23: buildbarn.configuration.blobstore.ReadCachingBlobAccessConfiguration.replicator:type_name -> buildbarn.configuration.blobstore.BlobReplicatorConfiguration + 23, // 24: buildbarn.configuration.blobstore.ShardingBlobAccessConfiguration.shards:type_name -> buildbarn.configuration.blobstore.ShardingBlobAccessConfiguration.ShardsEntry + 1, // 25: buildbarn.configuration.blobstore.MirroredBlobAccessConfiguration.backend_a:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 1, // 26: buildbarn.configuration.blobstore.MirroredBlobAccessConfiguration.backend_b:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 10, // 27: buildbarn.configuration.blobstore.MirroredBlobAccessConfiguration.replicator_a_to_b:type_name -> buildbarn.configuration.blobstore.BlobReplicatorConfiguration + 10, // 28: buildbarn.configuration.blobstore.MirroredBlobAccessConfiguration.replicator_b_to_a:type_name -> buildbarn.configuration.blobstore.BlobReplicatorConfiguration + 24, // 29: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.key_location_map_in_memory:type_name -> buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.KeyLocationMapInMemory + 31, // 30: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.key_location_map_on_block_device:type_name -> buildbarn.configuration.blockdevice.Configuration + 25, // 31: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.blocks_in_memory:type_name -> buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.BlocksInMemory + 26, // 32: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.blocks_on_block_device:type_name -> buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.BlocksOnBlockDevice + 27, // 33: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.persistent:type_name -> buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.Persistent + 32, // 34: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.chunking_parameters:type_name -> build.bazel.remote.execution.v2.RepMaxCdcParams + 1, // 35: buildbarn.configuration.blobstore.ExistenceCachingBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 33, // 36: buildbarn.configuration.blobstore.ExistenceCachingBlobAccessConfiguration.existence_cache:type_name -> buildbarn.configuration.digest.ExistenceCacheConfiguration + 1, // 37: buildbarn.configuration.blobstore.CompletenessCheckingBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 1, // 38: buildbarn.configuration.blobstore.ReadFallbackBlobAccessConfiguration.primary:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 1, // 39: buildbarn.configuration.blobstore.ReadFallbackBlobAccessConfiguration.secondary:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 10, // 40: buildbarn.configuration.blobstore.ReadFallbackBlobAccessConfiguration.replicator:type_name -> buildbarn.configuration.blobstore.BlobReplicatorConfiguration + 1, // 41: buildbarn.configuration.blobstore.ReferenceExpandingBlobAccessConfiguration.indirect_content_addressable_storage:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 34, // 42: buildbarn.configuration.blobstore.ReferenceExpandingBlobAccessConfiguration.aws_session:type_name -> buildbarn.configuration.cloud.aws.SessionConfiguration + 35, // 43: buildbarn.configuration.blobstore.ReferenceExpandingBlobAccessConfiguration.http_client:type_name -> buildbarn.configuration.http.client.Configuration + 36, // 44: buildbarn.configuration.blobstore.ReferenceExpandingBlobAccessConfiguration.gcp_client_options:type_name -> buildbarn.configuration.cloud.gcp.ClientOptionsConfiguration + 1, // 45: buildbarn.configuration.blobstore.ReferenceExpandingBlobAccessConfiguration.content_addressable_storage:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 37, // 46: buildbarn.configuration.blobstore.BlobReplicatorConfiguration.local:type_name -> google.protobuf.Empty + 38, // 47: buildbarn.configuration.blobstore.BlobReplicatorConfiguration.remote:type_name -> buildbarn.configuration.grpc.ClientConfiguration + 11, // 48: buildbarn.configuration.blobstore.BlobReplicatorConfiguration.queued:type_name -> buildbarn.configuration.blobstore.QueuedBlobReplicatorConfiguration + 37, // 49: buildbarn.configuration.blobstore.BlobReplicatorConfiguration.noop:type_name -> google.protobuf.Empty + 10, // 50: buildbarn.configuration.blobstore.BlobReplicatorConfiguration.deduplicating:type_name -> buildbarn.configuration.blobstore.BlobReplicatorConfiguration + 12, // 51: buildbarn.configuration.blobstore.BlobReplicatorConfiguration.concurrency_limiting:type_name -> buildbarn.configuration.blobstore.ConcurrencyLimitingBlobReplicatorConfiguration + 10, // 52: buildbarn.configuration.blobstore.QueuedBlobReplicatorConfiguration.base:type_name -> buildbarn.configuration.blobstore.BlobReplicatorConfiguration + 33, // 53: buildbarn.configuration.blobstore.QueuedBlobReplicatorConfiguration.existence_cache:type_name -> buildbarn.configuration.digest.ExistenceCacheConfiguration + 10, // 54: buildbarn.configuration.blobstore.ConcurrencyLimitingBlobReplicatorConfiguration.base:type_name -> buildbarn.configuration.blobstore.BlobReplicatorConfiguration + 28, // 55: buildbarn.configuration.blobstore.DemultiplexingBlobAccessConfiguration.instance_name_prefixes:type_name -> buildbarn.configuration.blobstore.DemultiplexingBlobAccessConfiguration.InstanceNamePrefixesEntry + 1, // 56: buildbarn.configuration.blobstore.DemultiplexedBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 1, // 57: buildbarn.configuration.blobstore.ActionResultExpiringBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 39, // 58: buildbarn.configuration.blobstore.ActionResultExpiringBlobAccessConfiguration.minimum_validity:type_name -> google.protobuf.Duration + 39, // 59: buildbarn.configuration.blobstore.ActionResultExpiringBlobAccessConfiguration.maximum_validity_jitter:type_name -> google.protobuf.Duration + 40, // 60: buildbarn.configuration.blobstore.ActionResultExpiringBlobAccessConfiguration.minimum_timestamp:type_name -> google.protobuf.Timestamp + 1, // 61: buildbarn.configuration.blobstore.ReadCanaryingBlobAccessConfiguration.source:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 1, // 62: buildbarn.configuration.blobstore.ReadCanaryingBlobAccessConfiguration.replica:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 39, // 63: buildbarn.configuration.blobstore.ReadCanaryingBlobAccessConfiguration.maximum_cache_duration:type_name -> google.protobuf.Duration + 33, // 64: buildbarn.configuration.blobstore.ZIPBlobAccessConfiguration.data_integrity_validation_cache:type_name -> buildbarn.configuration.digest.ExistenceCacheConfiguration + 1, // 65: buildbarn.configuration.blobstore.WithLabelsBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 29, // 66: buildbarn.configuration.blobstore.WithLabelsBlobAccessConfiguration.labels:type_name -> buildbarn.configuration.blobstore.WithLabelsBlobAccessConfiguration.LabelsEntry + 39, // 67: buildbarn.configuration.blobstore.DeadlineEnforcingBlobAccess.timeout:type_name -> google.protobuf.Duration + 1, // 68: buildbarn.configuration.blobstore.DeadlineEnforcingBlobAccess.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 38, // 69: buildbarn.configuration.blobstore.GrpcBlobAccessConfiguration.client:type_name -> buildbarn.configuration.grpc.ClientConfiguration + 1, // 70: buildbarn.configuration.blobstore.ChunkListValidatingBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 1, // 71: buildbarn.configuration.blobstore.ShardingBlobAccessConfiguration.Shard.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 22, // 72: buildbarn.configuration.blobstore.ShardingBlobAccessConfiguration.ShardsEntry.value:type_name -> buildbarn.configuration.blobstore.ShardingBlobAccessConfiguration.Shard + 31, // 73: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.BlocksOnBlockDevice.source:type_name -> buildbarn.configuration.blockdevice.Configuration + 33, // 74: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.BlocksOnBlockDevice.data_integrity_validation_cache:type_name -> buildbarn.configuration.digest.ExistenceCacheConfiguration + 39, // 75: buildbarn.configuration.blobstore.LocalBlobAccessConfiguration.Persistent.minimum_epoch_interval:type_name -> google.protobuf.Duration + 14, // 76: buildbarn.configuration.blobstore.DemultiplexingBlobAccessConfiguration.InstanceNamePrefixesEntry.value:type_name -> buildbarn.configuration.blobstore.DemultiplexedBlobAccessConfiguration + 1, // 77: buildbarn.configuration.blobstore.WithLabelsBlobAccessConfiguration.LabelsEntry.value:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 78, // [78:78] is the sub-list for method output_type + 78, // [78:78] is the sub-list for method input_type + 78, // [78:78] is the sub-list for extension type_name + 78, // [78:78] is the sub-list for extension extendee + 0, // [0:78] is the sub-list for field type_name } func init() { @@ -2293,6 +2371,7 @@ func file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blob (*BlobAccessConfiguration_WithLabels)(nil), (*BlobAccessConfiguration_Label)(nil), (*BlobAccessConfiguration_DeadlineEnforcing)(nil), + (*BlobAccessConfiguration_ChunkListValidating)(nil), } file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_msgTypes[5].OneofWrappers = []any{ (*LocalBlobAccessConfiguration_KeyLocationMapInMemory_)(nil), @@ -2314,7 +2393,7 @@ func file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blob GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_rawDesc), len(file_github_com_buildbarn_bb_storage_pkg_proto_configuration_blobstore_blobstore_proto_rawDesc)), NumEnums: 0, - NumMessages: 29, + NumMessages: 30, NumExtensions: 0, NumServices: 0, }, diff --git a/pkg/proto/configuration/blobstore/blobstore.proto b/pkg/proto/configuration/blobstore/blobstore.proto index 68b226711..917df5514 100644 --- a/pkg/proto/configuration/blobstore/blobstore.proto +++ b/pkg/proto/configuration/blobstore/blobstore.proto @@ -2,6 +2,7 @@ syntax = "proto3"; package buildbarn.configuration.blobstore; +import "build/bazel/remote/execution/v2/remote_execution.proto"; import "github.com/buildbarn/bb-storage/pkg/proto/configuration/blockdevice/blockdevice.proto"; import "github.com/buildbarn/bb-storage/pkg/proto/configuration/cloud/aws/aws.proto"; import "github.com/buildbarn/bb-storage/pkg/proto/configuration/cloud/gcp/gcp.proto"; @@ -199,6 +200,29 @@ message BlobAccessConfiguration { // value. When gRPC calls are timed out a `DEADLINE_EXCEEDED` error // code will be returned. DeadlineEnforcingBlobAccess deadline_enforcing = 28; + + // Validate that chunk list requests fulfill the api specification. + // Chunk List Storage (CLS) is used to implement the SplitBlob and + // SpliceBlob methods from the REv2 api. The protocol has several + // demands on the state of the Content Addressable Storage (CAS) + // after those methods have been called. + // + // SplitBlob requires that that the blob as well as all chunks of + // the blob are available in the CAS, they have their lifetime + // renewed and that the resulting chunk list composes into the blob. + // This is validated by the Get path by splitting the Blob on demand + // if required. + // + // SpliceBlob requires that the blob as well as all chunks of the + // blob are present in storage, they have their lifetime renewed and + // that the supplied chunk list composes into the blob. Notably it + // does not require the chunks to follow any particular chunking + // algorithm but our implementation ensures that after any call a + // proper rep max cdc chunk list is verified even if the caller + // supplied a different chunk list. + // + // This decorator must be placed on the CLS. + ChunkListValidatingBlobAccessConfiguration chunk_list_validating = 29; } // Was 'redis'. Instead of using Redis, one may run a separate @@ -642,6 +666,18 @@ message LocalBlobAccessConfiguration { // level, e.g., on top of CompletenessCheckingBlobAccess. This can be // achieved by using HierarchicalInstanceNamesBlobAccess. bool hierarchical_instance_names = 14; + + // The chunking parameters advertised via GetCapabilities, setting + // this announces support for the SplitBlob and SpliceBlob api calls. + // + // Adding the chunking parameters does not make the underlying + // blobstore enforce the chunking parameters or implement the split + // and splice blob. There must be a configured Chunk List Storage with + // an outer ChunkValidatingBlobAccess that has view of the entire + // Content Addressable Storage (CAS). + // + // This option is only supported for the CLS. + build.bazel.remote.execution.v2.RepMaxCdcParams chunking_parameters = 15; } message ExistenceCachingBlobAccessConfiguration { @@ -937,3 +973,8 @@ message GrpcBlobAccessConfiguration { // types (AC, ICAS, etc.). bool enable_compression = 2; } + +message ChunkListValidatingBlobAccessConfiguration { + // The backend to which validated operations are delegated. + BlobAccessConfiguration backend = 1; +} From c2f0bf761e460e1a7b1ee2e4071a63e8e1392805 Mon Sep 17 00:00:00 2001 From: Benjamin Ingberg Date: Tue, 9 Jun 2026 08:44:01 +0200 Subject: [PATCH 2/2] Replace CAS with CS and CLS This commit builds on top of our split and splice blob support to make it a mandatory first class feature in Buildbarn. With this commit the Content Addressable Storage (CAS) is created from two Storage configurations that work in tandem. A Chunk Storage (CS) which is content addressed and contains chunks of blobs, and a Chunk List Storage (CLS) which is addressed by a blob digest and contains a manifest describing the chunks that make up the blob. All api calls are automatically translated to use Chunk Lists created with RepMaxCDC. Effectively this means that large blobs no longer exists in the storage layer, individual chunks of the large blobs are in turn deduplicated in such a manner that the chunks are stored only once. The automatic translation makes certain that clients that are not cdc aware can still continue to use the storage backend without performing any changes. Clients which support RepMaxCDC also gets a significant reduction in the amount of blobs to transfer as they only need to transfer modified chunks rather than the entire blob. --- cmd/bb_copy/main.go | 4 +- cmd/bb_replicator/main.go | 4 +- cmd/bb_storage/BUILD.bazel | 4 + cmd/bb_storage/main.go | 77 ++- pkg/blobstore/buffer/BUILD.bazel | 3 + pkg/blobstore/buffer/buffer_benchmark_test.go | 184 ++++++ .../buffer/cas_chunk_concatenating_buffer.go | 249 +++++++ pkg/blobstore/cdc/BUILD.bazel | 43 ++ pkg/blobstore/cdc/bypass.go | 22 + .../cdc/caching_parameter_provider.go | 30 + pkg/blobstore/cdc/cas_chunking_blob_access.go | 185 ++++++ .../{chunklistvalidating => cdc}/chunker.go | 2 +- pkg/blobstore/cdc/parameter_provider.go | 63 ++ .../reader_chunker.go | 2 +- .../reader_chunker_test.go | 10 +- pkg/blobstore/cdc/ttl_cache.go | 115 ++++ pkg/blobstore/chunklistvalidating/BUILD.bazel | 12 +- .../chunk_concatenating_reader.go | 68 -- .../chunk_list_validating_blob_access.go | 290 ++++---- .../chunk_list_validating_blob_access_test.go | 141 ++-- .../integration/BUILD.bazel | 19 - .../chunk_list_validating_integration_test.go | 445 ------------- .../completeness_checking_blob_access_test.go | 1 + pkg/blobstore/configuration/BUILD.bazel | 5 +- .../configuration/ac_blob_access_creator.go | 20 +- .../configuration/cls_blob_access_creator.go | 31 +- ...s_creator.go => cs_blob_access_creator.go} | 35 +- ...eator.go => cs_blob_replicator_creator.go} | 18 +- .../configuration/new_blob_access.go | 32 - pkg/blobstore/grpcclients/BUILD.bazel | 4 +- pkg/blobstore/grpcclients/cls_blob_access.go | 29 +- .../{cas_blob_access.go => cs_blob_access.go} | 4 +- ..._access_test.go => cs_blob_access_test.go} | 30 +- pkg/blobstore/grpcservers/BUILD.bazel | 1 + .../grpcservers/byte_stream_server.go | 107 ++- .../grpcservers/byte_stream_server_test.go | 54 +- .../content_addressable_storage_server.go | 56 +- ...content_addressable_storage_server_test.go | 109 ++- .../grpcservers/integration/BUILD.bazel | 34 + .../integration/byte_stream_server_test.go | 51 ++ ...content_addressable_storage_server_test.go | 415 ++++++++++++ .../grpcservers/integration/utils_test.go | 625 ++++++++++++++++++ .../bb_replicator/bb_replicator.proto | 4 +- .../configuration/bb_storage/BUILD.bazel | 2 + .../configuration/bb_storage/bb_storage.pb.go | 178 +++-- .../configuration/bb_storage/bb_storage.proto | 34 +- 46 files changed, 2709 insertions(+), 1142 deletions(-) create mode 100644 pkg/blobstore/buffer/buffer_benchmark_test.go create mode 100644 pkg/blobstore/buffer/cas_chunk_concatenating_buffer.go create mode 100644 pkg/blobstore/cdc/BUILD.bazel create mode 100644 pkg/blobstore/cdc/bypass.go create mode 100644 pkg/blobstore/cdc/caching_parameter_provider.go create mode 100644 pkg/blobstore/cdc/cas_chunking_blob_access.go rename pkg/blobstore/{chunklistvalidating => cdc}/chunker.go (91%) create mode 100644 pkg/blobstore/cdc/parameter_provider.go rename pkg/blobstore/{chunklistvalidating => cdc}/reader_chunker.go (98%) rename pkg/blobstore/{chunklistvalidating => cdc}/reader_chunker_test.go (88%) create mode 100644 pkg/blobstore/cdc/ttl_cache.go delete mode 100644 pkg/blobstore/chunklistvalidating/chunk_concatenating_reader.go delete mode 100644 pkg/blobstore/chunklistvalidating/integration/BUILD.bazel delete mode 100644 pkg/blobstore/chunklistvalidating/integration/chunk_list_validating_integration_test.go rename pkg/blobstore/configuration/{cas_blob_access_creator.go => cs_blob_access_creator.go} (79%) rename pkg/blobstore/configuration/{cas_blob_replicator_creator.go => cs_blob_replicator_creator.go} (66%) rename pkg/blobstore/grpcclients/{cas_blob_access.go => cs_blob_access.go} (98%) rename pkg/blobstore/grpcclients/{cas_blob_access_test.go => cs_blob_access_test.go} (95%) create mode 100644 pkg/blobstore/grpcservers/integration/BUILD.bazel create mode 100644 pkg/blobstore/grpcservers/integration/byte_stream_server_test.go create mode 100644 pkg/blobstore/grpcservers/integration/content_addressable_storage_server_test.go create mode 100644 pkg/blobstore/grpcservers/integration/utils_test.go diff --git a/cmd/bb_copy/main.go b/cmd/bb_copy/main.go index 54e83489e..6d891a639 100644 --- a/cmd/bb_copy/main.go +++ b/cmd/bb_copy/main.go @@ -42,7 +42,7 @@ func main() { grpcClientFactory := grpc.NewBaseClientFactory(grpc.BaseClientDialer, nil, nil, nil) - blobAccessCreator := blobstore_configuration.NewCASBlobAccessCreator( + blobAccessCreator := blobstore_configuration.NewCSBlobAccessCreator( grpcClientFactory, int(configuration.MaximumMessageSizeBytes), bb_zstd.NewPoolFromConfiguration(nil), @@ -68,7 +68,7 @@ func main() { configuration.Replicator, source.BlobAccess, sink, - blobstore_configuration.NewCASBlobReplicatorCreator(grpcClientFactory), + blobstore_configuration.NewCSBlobReplicatorCreator(grpcClientFactory), ) if err != nil { return util.StatusWrap(err, "Failed to create replicator") diff --git a/cmd/bb_replicator/main.go b/cmd/bb_replicator/main.go index 8f21a8020..8e5b9f994 100644 --- a/cmd/bb_replicator/main.go +++ b/cmd/bb_replicator/main.go @@ -33,7 +33,7 @@ func main() { return util.StatusWrap(err, "Failed to apply global configuration options") } - blobAccessCreator := blobstore_configuration.NewCASBlobAccessCreator( + blobAccessCreator := blobstore_configuration.NewCSBlobAccessCreator( grpcClientFactory, int(configuration.MaximumMessageSizeBytes), bb_zstd.NewPoolFromConfiguration(nil), @@ -59,7 +59,7 @@ func main() { configuration.Replicator, source.BlobAccess, sink, - blobstore_configuration.NewCASBlobReplicatorCreator(grpcClientFactory), + blobstore_configuration.NewCSBlobReplicatorCreator(grpcClientFactory), ) if err != nil { return util.StatusWrap(err, "Failed to create replicator") diff --git a/cmd/bb_storage/BUILD.bazel b/cmd/bb_storage/BUILD.bazel index b9d6bc741..62be0bf6e 100644 --- a/cmd/bb_storage/BUILD.bazel +++ b/cmd/bb_storage/BUILD.bazel @@ -10,10 +10,14 @@ go_library( "//pkg/auth", "//pkg/auth/configuration", "//pkg/blobstore", + "//pkg/blobstore/cdc", "//pkg/blobstore/configuration", "//pkg/blobstore/grpcservers", "//pkg/builder", "//pkg/capabilities", + "//pkg/clock", + "//pkg/digest", + "//pkg/eviction", "//pkg/global", "//pkg/grpc", "//pkg/program", diff --git a/cmd/bb_storage/main.go b/cmd/bb_storage/main.go index 91bdeb343..bab07baf3 100644 --- a/cmd/bb_storage/main.go +++ b/cmd/bb_storage/main.go @@ -9,10 +9,14 @@ import ( "github.com/buildbarn/bb-storage/pkg/auth" auth_configuration "github.com/buildbarn/bb-storage/pkg/auth/configuration" "github.com/buildbarn/bb-storage/pkg/blobstore" + "github.com/buildbarn/bb-storage/pkg/blobstore/cdc" blobstore_configuration "github.com/buildbarn/bb-storage/pkg/blobstore/configuration" "github.com/buildbarn/bb-storage/pkg/blobstore/grpcservers" "github.com/buildbarn/bb-storage/pkg/builder" "github.com/buildbarn/bb-storage/pkg/capabilities" + "github.com/buildbarn/bb-storage/pkg/clock" + "github.com/buildbarn/bb-storage/pkg/digest" + "github.com/buildbarn/bb-storage/pkg/eviction" "github.com/buildbarn/bb-storage/pkg/global" bb_grpc "github.com/buildbarn/bb-storage/pkg/grpc" "github.com/buildbarn/bb-storage/pkg/program" @@ -54,13 +58,38 @@ func main() { var cacheCapabilitiesAuthorizers []auth.Authorizer // Content Addressable Storage (CAS). - var contentAddressableStorageInfo *blobstore_configuration.BlobAccessInfo var contentAddressableStorage blobstore.BlobAccess + var contentAddressableStorageKeyFormat digest.KeyFormat + var chunkListStorage blobstore.BlobAccess if configuration.ContentAddressableStorage != nil { - info, authorizedBackend, allAuthorizers, err := newScannableBlobAccess( + casConfiguration := configuration.ContentAddressableStorage + if casConfiguration.ChunkStorage == nil { + return status.Error(codes.InvalidArgument, "The Chunk Storage is a mandatory part of the Content Addressable Storage.") + } + if casConfiguration.ChunkListStorage == nil { + return status.Error(codes.InvalidArgument, "The Chunk List Storage is a mandatory part of the Content Addressable Storage.") + } + + var parameterCache *cdc.TTLCache[cdc.Parameters] + if casConfiguration.ContentDefinedChunkingParameterCache != nil { + parameterCacheConfiguraiton := casConfiguration.ContentDefinedChunkingParameterCache + evictionSet, err := eviction.NewSetFromConfiguration[string](parameterCacheConfiguraiton.CacheReplacementPolicy) + if err != nil { + return err + } + parameterCache = cdc.NewTTLCache[cdc.Parameters]( + clock.SystemClock, + evictionSet, + int(parameterCacheConfiguraiton.GetCacheSize()), + parameterCacheConfiguraiton.CacheDuration.AsDuration(), + ) + } + + // Create the Chunk Storage (CS). + chunkStorageInfo, chunkStorage, allAuthorizers, err := newScannableBlobAccess( dependenciesGroup, - configuration.ContentAddressableStorage, - blobstore_configuration.NewCASBlobAccessCreator( + casConfiguration.ChunkStorage, + blobstore_configuration.NewCSBlobAccessCreator( grpcClientFactory, int(configuration.MaximumMessageSizeBytes), zstdPool, @@ -68,11 +97,11 @@ func main() { grpcClientFactory, ) if err != nil { - return util.StatusWrap(err, "Failed to create Content Addressable Storage") + return util.StatusWrap(err, "Failed to create Content Addressable Storage: Failed to create Chunk Storage") } cacheCapabilitiesProviders = append( cacheCapabilitiesProviders, - info.BlobAccess, + chunkStorageInfo.BlobAccess, capabilities.NewStaticProvider(&remoteexecution.ServerCapabilities{ CacheCapabilities: &remoteexecution.CacheCapabilities{ SupportedCompressors: configuration.SupportedCompressors, @@ -80,29 +109,37 @@ func main() { }), ) cacheCapabilitiesAuthorizers = append(cacheCapabilitiesAuthorizers, allAuthorizers...) - contentAddressableStorageInfo = &info - contentAddressableStorage = authorizedBackend - } - // Chunk List Storage (CLS). - var chunkListStorage blobstore.BlobAccess - if configuration.ChunkListStorage != nil { - info, authorizedBackend, allAuthorizers, err := newScannableBlobAccess( + // Create the Chunk List Storage (CLS). + chunkListStorageInfo, authorizedChunkListStorage, allAuthorizers, err := newScannableBlobAccess( dependenciesGroup, - configuration.ChunkListStorage, + casConfiguration.ChunkListStorage, blobstore_configuration.NewCLSBlobAccessCreator( - contentAddressableStorageInfo, + &chunkStorageInfo, grpcClientFactory, int(configuration.MaximumMessageSizeBytes), + parameterCache, ), grpcClientFactory, ) if err != nil { - return util.StatusWrap(err, "Failed to create Chunk Map") + return util.StatusWrap(err, "Failed to create Content Addressable Storage: Failed to create Chunk List Storage") } - cacheCapabilitiesProviders = append(cacheCapabilitiesProviders, info.BlobAccess) + chunkListStorage = authorizedChunkListStorage + cacheCapabilitiesProviders = append(cacheCapabilitiesProviders, chunkListStorageInfo.BlobAccess) cacheCapabilitiesAuthorizers = append(cacheCapabilitiesAuthorizers, allAuthorizers...) - chunkListStorage = authorizedBackend + + cdcParameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider( + authorizedChunkListStorage, + int(configuration.MaximumMessageSizeBytes), + ) + + if parameterCache != nil { + cdcParameterProvider = cdc.NewCachingParameterProvider(cdcParameterProvider, parameterCache) + } + + contentAddressableStorage = cdc.NewCasChunkingBlobAccess(chunkStorage, authorizedChunkListStorage, cdcParameterProvider, int(configuration.MaximumMessageSizeBytes)) + contentAddressableStorageKeyFormat = chunkStorageInfo.DigestKeyFormat.Combine(chunkListStorageInfo.DigestKeyFormat) } // Action Cache (AC). @@ -112,7 +149,8 @@ func main() { dependenciesGroup, configuration.ActionCache, blobstore_configuration.NewACBlobAccessCreator( - contentAddressableStorageInfo, + contentAddressableStorage, + contentAddressableStorageKeyFormat, grpcClientFactory, int(configuration.MaximumMessageSizeBytes), ), @@ -228,6 +266,7 @@ func main() { grpcservers.NewByteStreamServer( contentAddressableStorage, 1<<16, + int(configuration.MaximumMessageSizeBytes), zstdPool, ), ) diff --git a/pkg/blobstore/buffer/BUILD.bazel b/pkg/blobstore/buffer/BUILD.bazel index 9f2995862..f681d3f94 100644 --- a/pkg/blobstore/buffer/BUILD.bazel +++ b/pkg/blobstore/buffer/BUILD.bazel @@ -5,6 +5,7 @@ go_library( srcs = [ "buffer.go", "cas_buffer_with_background_task.go", + "cas_chunk_concatenating_buffer.go", "cas_chunk_reader_buffer.go", "cas_cloned_buffer.go", "cas_error_handling_buffer.go", @@ -44,6 +45,7 @@ go_library( go_test( name = "buffer_test", srcs = [ + "buffer_benchmark_test.go", "cas_buffer_with_background_task_test.go", "error_handler_test.go", "example_test.go", @@ -60,6 +62,7 @@ go_test( deps = [ ":buffer", "//internal/mock", + "//pkg/blobstore", "//pkg/digest", "//pkg/testutil", "@bazel_remote_apis//build/bazel/remote/execution/v2:remote_execution_go_proto", diff --git a/pkg/blobstore/buffer/buffer_benchmark_test.go b/pkg/blobstore/buffer/buffer_benchmark_test.go new file mode 100644 index 000000000..54af14974 --- /dev/null +++ b/pkg/blobstore/buffer/buffer_benchmark_test.go @@ -0,0 +1,184 @@ +package buffer_test + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "io" + "math/rand" + "testing" + + remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" + "github.com/buildbarn/bb-storage/pkg/blobstore" + "github.com/buildbarn/bb-storage/pkg/blobstore/buffer" + "github.com/buildbarn/bb-storage/pkg/digest" + "github.com/stretchr/testify/require" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +// benchmarkChunkStorage is a mocked blobstore.BlobAccess that reads +// buffer.Buffer objects which have been predeclared. These +// buffer.Buffer objects use the NewCASBufferFromByteSlice +// implementation and can be reused within the context of this test. +// This implementation is used to reduce the overhead of getting items +// from the chunk storage so that our benchmarks can focus on the +// behavior of our buffer implementations. +type benchmarkChunkStorage struct { + blobstore.BlobAccess + chunks map[string]buffer.Buffer +} + +const ( + chunkSize = 256 << 10 + numChunks = 1000 +) + +func (m *benchmarkChunkStorage) Get(ctx context.Context, d digest.Digest) buffer.Buffer { + data, ok := m.chunks[d.GetHashString()] + if !ok { + return buffer.NewBufferFromError(status.Errorf(codes.NotFound, "chunk not found")) + } + return data +} + +func (m *benchmarkChunkStorage) Put(ctx context.Context, d digest.Digest, buf buffer.Buffer) error { + data, err := buf.ToByteSlice(int(d.GetSizeBytes())) + if err != nil { + return err + } + m.chunks[d.GetHashString()] = buffer.NewCASBufferFromByteSlice(d, data, buffer.UserProvided) + return nil +} + +func makeRandomData(tb testing.TB, size int, seed int64) []byte { + tb.Helper() + data := make([]byte, size) + r := rand.New(rand.NewSource(seed)) + _, err := r.Read(data) + require.NoError(tb, err) + return data +} + +func setupBenchmarkData(b *testing.B) (digest.Digest, []byte, []digest.Digest, blobstore.BlobAccess) { + b.Helper() + + totalSize := chunkSize * numChunks + + data := makeRandomData(b, totalSize, 0) + + var chunkStorage benchmarkChunkStorage + chunkStorage.chunks = make(map[string]buffer.Buffer, numChunks) + + ctx := context.Background() + + digestFunction := digest.MustNewFunction("benchmark", remoteexecution.DigestFunction_SHA256) + + hash := sha256.Sum256(data) + blobDigest, _ := digestFunction.NewDigest(hex.EncodeToString(hash[:]), int64(len(data))) + + var chunkDigests []digest.Digest + for i := 0; i < numChunks; i++ { + chunkData := data[i*chunkSize : (i+1)*chunkSize] + chunkHash := sha256.Sum256(chunkData) + d, _ := digestFunction.NewDigest(hex.EncodeToString(chunkHash[:]), int64(len(chunkData))) + + chunkDigests = append(chunkDigests, d) + err := chunkStorage.Put(ctx, d, buffer.NewValidatedBufferFromByteSlice(chunkData)) + if err != nil { + b.Fatalf("Failed to put chunk: %v", err) + } + } + + return blobDigest, data, chunkDigests, &chunkStorage +} + +type bufferFactory func() buffer.Buffer + +func runBufferBenchmarks(b *testing.B, dataSize int64, factory bufferFactory) { + // Read through the entire buffer via the io.ReadCloser interface. + b.Run("StreamRead", func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + buf := factory() + + reader := buf.ToReader() + _, err := io.Copy(io.Discard, reader) + if err != nil { + b.Fatalf("ReadAll failed: %v", err) + } + + reader.Close() + } + }) + + // Read through the entire buffer in chunks up to 1MiB at a time. + b.Run("ChunkRead_", func(b *testing.B) { + // 1MiB typical bytestream write chunk. + const maxChunkSize = 1 << 20 + + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + buf := factory() + + reader := buf.ToChunkReader(0, maxChunkSize) + for { + _, err := reader.Read() + if err == io.EOF { + break + } + if err != nil { + b.Fatalf("ChunkReader failed: %v", err) + } + } + reader.Close() + } + }) + + // Read a random 4096 byte slice of the buffer. + b.Run("ReadRand__", func(b *testing.B) { + p := make([]byte, 4096) + // Max offset guarantees we stay strictly within bounds. + maxOffset := dataSize - int64(len(p)) + + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + buf := factory() + + // Multiply by a large prime to pseudo-randomly scatter the + // reads across the address space without invoking + // rand.Int() + offset := int64(i*999983) % maxOffset + + _, err := buf.ReadAt(p, int64(offset)) + if err != nil && err != io.EOF { + b.Fatalf("ReadAt failed at offset %d: %v", offset, err) + } + } + }) +} + +func BenchmarkBuffers(b *testing.B) { + blobDigest, rawData, chunkDigests, chunkStorage := setupBenchmarkData(b) + ctx := context.Background() + + // A buffer backed by an in memory byte slice, represents an ideal + // case. + b.Run("ByteSlice__", func(b *testing.B) { + runBufferBenchmarks(b, int64(len(rawData)), func() buffer.Buffer { + return buffer.NewValidatedBufferFromByteSlice(rawData) + }) + }) + + // ChunkConcatenating buffer where individual chunks gets validated + // but not the concatenated chunk. + chunkGetter := chunkStorage.Get + b.Run("ChunkConcat", func(b *testing.B) { + runBufferBenchmarks(b, int64(len(rawData)), func() buffer.Buffer { + return buffer.NewValidatedCASChunkConcatenatingBuffer(ctx, blobDigest, chunkDigests, chunkGetter, buffer.UserProvided) + }) + }) +} diff --git a/pkg/blobstore/buffer/cas_chunk_concatenating_buffer.go b/pkg/blobstore/buffer/cas_chunk_concatenating_buffer.go new file mode 100644 index 000000000..36e6f6bfc --- /dev/null +++ b/pkg/blobstore/buffer/cas_chunk_concatenating_buffer.go @@ -0,0 +1,249 @@ +package buffer + +import ( + "context" + "io" + + "github.com/buildbarn/bb-storage/pkg/digest" + "github.com/buildbarn/bb-storage/pkg/util" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/proto" +) + +// ChunkGetter is a callback used by casChunkedBuffer to lazily fetch +// the buffers of individual chunks. This abstraction prevents circular +// dependencies between the buffer package and the blobstore package. +type ChunkGetter func(ctx context.Context, digest digest.Digest) Buffer + +type casChunkConcatenatingBuffer struct { + ctx context.Context + chunkGetter ChunkGetter + blobDigest digest.Digest + chunkDigests []digest.Digest + source Source +} + +// NewValidatedCASChunkConcatenatingBuffer creates an optimized buffer +// for an object stored in the Content Addressable Storage composed of a +// validated list of chunks. Because the chunk list is treated as +// correct, validation is limited to validating any underlying chunks +// read allowing for random access reads. +func NewValidatedCASChunkConcatenatingBuffer(ctx context.Context, blobDigest digest.Digest, chunkDigests []digest.Digest, chunkGetter ChunkGetter, source Source) Buffer { + return &casChunkConcatenatingBuffer{ + ctx: ctx, + chunkGetter: chunkGetter, + blobDigest: blobDigest, + chunkDigests: chunkDigests, + source: source, + } +} + +// NewUnvalidatedCASChunkConcatenatingBuffer creates a buffer for a chunk list +// provided by an untrusted source. It falls back to the standard +// validating chunk reader stream to guarantee the overarching blob +// checksum is strictly validated. +func NewUnvalidatedCASChunkConcatenatingBuffer(ctx context.Context, blobDigest digest.Digest, chunkDigests []digest.Digest, chunkGetter ChunkGetter, source Source, maximumMessageSizeBytes int) Buffer { + reader := &chunkConcatenatingChunkReader{ + ctx: ctx, + chunkGetter: chunkGetter, + chunkDigests: chunkDigests, + chunkOffset: 0, + maxChunkSize: maximumMessageSizeBytes, + } + return NewCASBufferFromChunkReader(blobDigest, reader, source) +} + +func (b *casChunkConcatenatingBuffer) GetSizeBytes() (int64, error) { + return b.blobDigest.GetSizeBytes(), nil +} + +func (casChunkConcatenatingBuffer) Discard() {} + +func (b *casChunkConcatenatingBuffer) IntoWriter(w io.Writer) error { + for _, d := range b.chunkDigests { + chunkBuf := b.chunkGetter(b.ctx, d) + if err := chunkBuf.IntoWriter(w); err != nil { + return err + } + } + return nil +} + +func (b *casChunkConcatenatingBuffer) findChunkOffset(off int64) (index int, chunkOffset int64) { + var accumulatedSize int64 + for i, d := range b.chunkDigests { + chunkSize := d.GetSizeBytes() + if accumulatedSize+chunkSize > off { + return i, off - accumulatedSize + } + accumulatedSize += chunkSize + } + return len(b.chunkDigests), 0 +} + +func (b *casChunkConcatenatingBuffer) ReadAt(p []byte, off int64) (int, error) { + if err := validateReaderOffset(b.blobDigest.GetSizeBytes(), off); err != nil { + return 0, err + } + + index, chunkOffset := b.findChunkOffset(off) + bytesRead := 0 + + for index < len(b.chunkDigests) { + d := b.chunkDigests[index] + + n, err := b.chunkGetter(b.ctx, d).ReadAt(p[bytesRead:], chunkOffset) + bytesRead += n + // Error when reading chunk. + if err != nil { + if err != io.EOF { + return bytesRead, util.StatusWrapf(err, "Error when reading chunk at index %d", index) + } + if int64(n) < d.GetSizeBytes()-chunkOffset { + return bytesRead, status.Errorf(codes.Internal, "Expected buffer to be %d bytes but it was only %d.", d.GetSizeBytes(), n+int(chunkOffset)) + } + } + + if bytesRead == len(p) { + return bytesRead, nil + } + chunkOffset = 0 + index++ + } + + // return io.EOF if we couldn't fill the buffer + if bytesRead < len(p) { + return bytesRead, io.EOF + } + return bytesRead, nil +} + +func (b *casChunkConcatenatingBuffer) ToProto(m proto.Message, maximumSizeBytes int) (proto.Message, error) { + return toProtoViaByteSlice(b, m, maximumSizeBytes) +} + +func (b *casChunkConcatenatingBuffer) ToByteSlice(maximumSizeBytes int) ([]byte, error) { + expectedSizeBytes := b.blobDigest.GetSizeBytes() + if expectedSizeBytes > int64(maximumSizeBytes) { + return nil, status.Errorf(codes.InvalidArgument, "Buffer is %d bytes in size, while a maximum of %d bytes is permitted.", expectedSizeBytes, maximumSizeBytes) + } + data := make([]byte, expectedSizeBytes) + if expectedSizeBytes > 0 { + // ReadAt is safe to call here. While both ReadAt and + // ToByteSlice assumes ownership this buffer.Buffer + // implementation is stateless. + n, err := b.ReadAt(data, 0) + if err != nil && err != io.EOF { + return nil, err + } + if int64(n) != expectedSizeBytes { + return nil, status.Errorf(codes.Internal, "Buffer is %d bytes in size, while %d bytes were expected.", n, expectedSizeBytes) + } + } + return data, nil +} + +func (b *casChunkConcatenatingBuffer) ToChunkReader(off int64, maximumChunkSizeBytes int) ChunkReader { + if err := validateReaderOffset(b.blobDigest.GetSizeBytes(), off); err != nil { + return newErrorChunkReader(err) + } + + index, chunkOffset := b.findChunkOffset(off) + + return &chunkConcatenatingChunkReader{ + ctx: b.ctx, + chunkGetter: b.chunkGetter, + chunkDigests: b.chunkDigests, + chunkOffset: chunkOffset, + currentIndex: index, + maxChunkSize: maximumChunkSizeBytes, + } +} + +func (b *casChunkConcatenatingBuffer) ToReader() io.ReadCloser { + return b.toUnvalidatedReader(0) +} + +func (b *casChunkConcatenatingBuffer) toUnvalidatedReader(off int64) io.ReadCloser { + return newChunkReaderBackedReader(b.toUnvalidatedChunkReader(off, 0)) +} + +func (b *casChunkConcatenatingBuffer) CloneCopy(maximumSizeBytes int) (Buffer, Buffer) { + return b, b +} + +func (b *casChunkConcatenatingBuffer) CloneStream() (Buffer, Buffer) { + return newCASClonedBuffer(b, b.blobDigest, b.source).CloneStream() +} + +func (b *casChunkConcatenatingBuffer) WithTask(task func() error) Buffer { + return newCASBufferWithBackgroundTask(b, b.blobDigest, b.source, task) +} + +func (b *casChunkConcatenatingBuffer) applyErrorHandler(errorHandler ErrorHandler) (Buffer, bool) { + return newCASErrorHandlingBuffer(b, errorHandler, b.blobDigest, b.source), false +} + +func (b *casChunkConcatenatingBuffer) toUnvalidatedChunkReader(off int64, maximumChunkSizeBytes int) ChunkReader { + return b.ToChunkReader(off, maximumChunkSizeBytes) +} + +// chunkConcatenatingChunkReader fullfills the buffer.ChunkReader +// interface. +type chunkConcatenatingChunkReader struct { + ctx context.Context + chunkGetter ChunkGetter + chunkDigests []digest.Digest + chunkOffset int64 + maxChunkSize int + + currentIndex int + currentReader ChunkReader + closed bool +} + +func (r *chunkConcatenatingChunkReader) Read() ([]byte, error) { + if r.closed { + return nil, status.Error(codes.Internal, "Reader is already closed") + } + for { + if r.currentReader == nil { + if r.currentIndex >= len(r.chunkDigests) { + return nil, io.EOF + } + currentDigest := r.chunkDigests[r.currentIndex] + chunkBuf := r.chunkGetter(r.ctx, currentDigest) + maxChunkSize := r.maxChunkSize + if maxChunkSize <= 0 { + maxChunkSize = int(currentDigest.GetSizeBytes()) + } + r.currentReader = chunkBuf.ToChunkReader(r.chunkOffset, maxChunkSize) + r.chunkOffset = 0 + r.currentIndex++ + } + + data, err := r.currentReader.Read() + if len(data) > 0 { + return data, nil + } + if err == io.EOF { + r.currentReader.Close() + r.currentReader = nil + continue + } + if err != nil { + r.currentReader.Close() + r.currentReader = nil + return nil, err + } + } +} + +func (r *chunkConcatenatingChunkReader) Close() { + r.closed = true + if r.currentReader != nil { + r.currentReader.Close() + r.currentReader = nil + } +} diff --git a/pkg/blobstore/cdc/BUILD.bazel b/pkg/blobstore/cdc/BUILD.bazel new file mode 100644 index 000000000..ecd45cd92 --- /dev/null +++ b/pkg/blobstore/cdc/BUILD.bazel @@ -0,0 +1,43 @@ +load("@rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "cdc", + srcs = [ + "bypass.go", + "caching_parameter_provider.go", + "cas_chunking_blob_access.go", + "chunker.go", + "parameter_provider.go", + "reader_chunker.go", + "ttl_cache.go", + ], + importpath = "github.com/buildbarn/bb-storage/pkg/blobstore/cdc", + visibility = ["//visibility:public"], + deps = [ + "//pkg/blobstore", + "//pkg/blobstore/buffer", + "//pkg/blobstore/slicing", + "//pkg/capabilities", + "//pkg/clock", + "//pkg/digest", + "//pkg/eviction", + "//pkg/util", + "@bazel_remote_apis//build/bazel/remote/execution/v2:remote_execution_go_proto", + "@com_github_buildbarn_go_cdc//:go-cdc", + "@org_golang_google_grpc//codes", + "@org_golang_google_grpc//status", + "@org_golang_x_sync//singleflight", + ], +) + +go_test( + name = "cdc_test", + srcs = ["reader_chunker_test.go"], + deps = [ + ":cdc", + "//pkg/blobstore/buffer", + "//pkg/digest", + "@bazel_remote_apis//build/bazel/remote/execution/v2:remote_execution_go_proto", + "@com_github_stretchr_testify//require", + ], +) diff --git a/pkg/blobstore/cdc/bypass.go b/pkg/blobstore/cdc/bypass.go new file mode 100644 index 000000000..1564d4e85 --- /dev/null +++ b/pkg/blobstore/cdc/bypass.go @@ -0,0 +1,22 @@ +package cdc + +import "context" + +type chunkListValidationBypassKey struct{} + +// NewContextWithChunkListValidationBypass creates a derived context +// that signals downstream storage layers that the chunk list has +// already been validated (or was freshly generated) and does not need +// expensive re-validation. +func NewContextWithChunkListValidationBypass(ctx context.Context) context.Context { + return context.WithValue(ctx, chunkListValidationBypassKey{}, chunkListValidationBypassKey{}) +} + +// ChunkListValidationBypassed checks if the provided context contains +// the bypass signal. +func ChunkListValidationBypassed(ctx context.Context) bool { + if value := ctx.Value(chunkListValidationBypassKey{}); value != nil { + return true + } + return false +} diff --git a/pkg/blobstore/cdc/caching_parameter_provider.go b/pkg/blobstore/cdc/caching_parameter_provider.go new file mode 100644 index 000000000..1bf7606d2 --- /dev/null +++ b/pkg/blobstore/cdc/caching_parameter_provider.go @@ -0,0 +1,30 @@ +package cdc + +import ( + "context" + + "github.com/buildbarn/bb-storage/pkg/digest" +) + +type cachingParameterProvider struct { + base ParameterProvider + cache *TTLCache[Parameters] +} + +// NewCachingParameterProvider creates a decorator that caches CDC +// parameters in memory. It is heavily modeled after existence caching, +// utilizing an eviction set to bound the cache size and a TTL to +// guarantee freshness. +func NewCachingParameterProvider(base ParameterProvider, cache *TTLCache[Parameters]) ParameterProvider { + return &cachingParameterProvider{ + base: base, + cache: cache, + } +} + +func (p *cachingParameterProvider) Get(ctx context.Context, instanceName digest.InstanceName) (Parameters, error) { + key := instanceName.String() + return p.cache.GetOrSet(key, func() (Parameters, error) { + return p.base.Get(ctx, instanceName) + }) +} diff --git a/pkg/blobstore/cdc/cas_chunking_blob_access.go b/pkg/blobstore/cdc/cas_chunking_blob_access.go new file mode 100644 index 000000000..4634685df --- /dev/null +++ b/pkg/blobstore/cdc/cas_chunking_blob_access.go @@ -0,0 +1,185 @@ +package cdc + +import ( + "context" + "io" + + remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" + "github.com/buildbarn/bb-storage/pkg/blobstore" + "github.com/buildbarn/bb-storage/pkg/blobstore/buffer" + "github.com/buildbarn/bb-storage/pkg/blobstore/slicing" + "github.com/buildbarn/bb-storage/pkg/digest" + "github.com/buildbarn/bb-storage/pkg/util" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +type casChunking struct { + chunkStorage blobstore.BlobAccess + chunkGetter buffer.ChunkGetter + chunkListStorage blobstore.BlobAccess + parameterProvider ParameterProvider + maximumMessageSizeBytes int +} + +// NewCasChunkingBlobAccess creates a cas blob access configuration that +// constructs large case objects from the chunks described in the chunk +// list. +func NewCasChunkingBlobAccess(chunkStorage, chunkListStorage blobstore.BlobAccess, parameterProvider ParameterProvider, maximumMessageSizeBytes int) blobstore.BlobAccess { + return &casChunking{ + chunkStorage: chunkStorage, + chunkListStorage: chunkListStorage, + parameterProvider: parameterProvider, + maximumMessageSizeBytes: maximumMessageSizeBytes, + chunkGetter: chunkStorage.Get, + } +} + +func (bc *casChunking) FindMissing(ctx context.Context, digests digest.Set) (digest.Set, error) { + digestSets := digests.PartitionByInstanceName() + missings := make([]digest.Set, 0, len(digestSets)) + for _, digestSet := range digestSets { + // PartitionByInstanceNames guarantees non empty sets. + missing, err := bc.findMissingFromInstance(ctx, digestSet.Items()[0].GetInstanceName(), digestSet) + if err != nil { + return digest.EmptySet, err + } + missings = append(missings, missing) + } + return digest.GetUnion(missings), nil +} + +func (bc *casChunking) Get(ctx context.Context, d digest.Digest) buffer.Buffer { + params, err := bc.parameterProvider.Get(ctx, d.GetInstanceName()) + if err != nil { + return buffer.NewBufferFromError(err) + } + + if d.GetSizeBytes() < int64(2*params.MinChunkSizeBytes) { + return bc.chunkStorage.Get(ctx, d) + } + + chunkDigests, err := bc.chunksOfBlob(ctx, d, params) + if err != nil { + return buffer.NewBufferFromError(err) + } + return buffer.NewValidatedCASChunkConcatenatingBuffer(ctx, d, chunkDigests, bc.chunkStorage.Get, buffer.UserProvided) +} + +func (casChunking) GetCapabilities(ctx context.Context, instanceName digest.InstanceName) (*remoteexecution.ServerCapabilities, error) { + return nil, status.Error(codes.Unimplemented, "CasChunkingBlobAccess does not implement GetCapabilities") +} + +func (casChunking) GetFromComposite(ctx context.Context, parentDigest, childDigest digest.Digest, slicer slicing.BlobSlicer) buffer.Buffer { + return buffer.NewBufferFromError(status.Error(codes.Unimplemented, "CasChunkingBlobAccess does not implement GetFromComposite")) +} + +func (bc *casChunking) Put(ctx context.Context, digest digest.Digest, in buffer.Buffer) error { + params, err := bc.parameterProvider.Get(ctx, digest.GetInstanceName()) + if err != nil { + return err + } + + // Check for trivial case where we can simply put the value directly + // to the underlying storage and not involve chunk lists at all. + if digest.GetSizeBytes() < 2*params.MinChunkSizeBytes { + return bc.chunkStorage.Put( + ctx, + digest, + in, + ) + } + + // The blob is big so we chunk it, store all missing chunks and save + // the corresponding chunk list. + reader := in.ToReader() + defer reader.Close() + chunker := NewReaderChunker( + digest.GetDigestFunction(), + reader, + int64(params.MinChunkSizeBytes), + int64(params.HorizonSizeBytes), + ) + chunkDigests := make([]*remoteexecution.Digest, 0, digest.GetSizeBytes()/int64(params.MinChunkSizeBytes)) + for { + chunk, err := chunker.NextChunk() + if err == io.EOF { + break + } + if err != nil { + return util.StatusWrap(err, "Failed to chunk write stream") + } + + missing, err := bc.chunkStorage.FindMissing(ctx, chunk.Digest.ToSingletonSet()) + if err != nil { + return err + } + if !missing.Empty() { + if err := bc.chunkStorage.Put(ctx, chunk.Digest, buffer.NewValidatedBufferFromByteSlice(chunk.Data)); err != nil { + return util.StatusWrap(err, "Failed to save chunk") + } + } + + chunkDigests = append(chunkDigests, chunk.Digest.GetProto()) + } + + // All data chunks have been uploaded but before we can return a + // succesful response we must save the result to our chunk list + // storage. As we have validated the chunk list here ourselves we + // can bypass the validation of the blob. + ctx = NewContextWithChunkListValidationBypass(ctx) + chunkListProto := &remoteexecution.SplitBlobResponse{ + ChunkDigests: chunkDigests, + ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, + } + b := buffer.NewProtoBufferFromProto(chunkListProto, buffer.UserProvided) + if err := bc.chunkListStorage.Put(ctx, digest, b); err != nil { + return util.StatusWrap(err, "Could not save chunk list for blob") + } + + return nil +} + +func (bc *casChunking) chunksOfBlob(ctx context.Context, d digest.Digest, params Parameters) ([]digest.Digest, error) { + b := bc.chunkListStorage.Get(ctx, d) + responseProtoBuf, err := b.ToProto(&remoteexecution.SplitBlobResponse{}, bc.maximumMessageSizeBytes) + if err != nil { + return nil, err + } + response := responseProtoBuf.(*remoteexecution.SplitBlobResponse) + digestFunction := d.GetDigestFunction() + chunkDigestsProto := response.ChunkDigests + chunkDigests := make([]digest.Digest, len(chunkDigestsProto)) + for i, cdp := range chunkDigestsProto { + chunkDigests[i], err = digestFunction.NewDigestFromProto(cdp) + if err != nil { + return nil, util.StatusWrap(err, "Failed to parse digest from proto") + } + } + return chunkDigests, nil +} + +func (bc *casChunking) findMissingFromInstance(ctx context.Context, instanceName digest.InstanceName, digests digest.Set) (digest.Set, error) { + params, err := bc.parameterProvider.Get(ctx, instanceName) + if err != nil { + return digest.EmptySet, err + } + smallDigests := digest.NewSetBuilder(digests.Length()) + largeDigests := digest.NewSetBuilder(digests.Length()) + for _, d := range digests.Items() { + if d.GetSizeBytes() < 2*params.MinChunkSizeBytes { + smallDigests.Add(d) + } else { + largeDigests.Add(d) + } + } + smallMissing, err := bc.chunkStorage.FindMissing(ctx, smallDigests.Build()) + if err != nil { + return digest.EmptySet, err + } + largeMissing, err := bc.chunkListStorage.FindMissing(ctx, largeDigests.Build()) + if err != nil { + return digest.EmptySet, err + } + return digest.GetUnion([]digest.Set{smallMissing, largeMissing}), nil +} diff --git a/pkg/blobstore/chunklistvalidating/chunker.go b/pkg/blobstore/cdc/chunker.go similarity index 91% rename from pkg/blobstore/chunklistvalidating/chunker.go rename to pkg/blobstore/cdc/chunker.go index 8badbc7e9..ac02682db 100644 --- a/pkg/blobstore/chunklistvalidating/chunker.go +++ b/pkg/blobstore/cdc/chunker.go @@ -1,4 +1,4 @@ -package chunklistvalidating +package cdc import ( "github.com/buildbarn/bb-storage/pkg/digest" diff --git a/pkg/blobstore/cdc/parameter_provider.go b/pkg/blobstore/cdc/parameter_provider.go new file mode 100644 index 000000000..0cc0751ee --- /dev/null +++ b/pkg/blobstore/cdc/parameter_provider.go @@ -0,0 +1,63 @@ +package cdc + +import ( + "context" + + "github.com/buildbarn/bb-storage/pkg/capabilities" + "github.com/buildbarn/bb-storage/pkg/digest" + "github.com/buildbarn/bb-storage/pkg/util" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +// Parameters represents the safely-typed, validated chunking +// boundaries. +type Parameters struct { + MinChunkSizeBytes int64 + HorizonSizeBytes int64 +} + +// ParameterProvider is a service that determines the Content-Defined +// Chunking parameters for a given instance name. +type ParameterProvider interface { + Get(ctx context.Context, instanceName digest.InstanceName) (Parameters, error) +} + +type capabilityToParameterProvider struct { + capabilitiesProvider capabilities.Provider + maximumMessageSizeBytes int +} + +// NewParameterProviderFromCapabilitiesProvider creates a +// ParameterProvider that fetches parameters directly from a +// capabilities.Provider. +func NewParameterProviderFromCapabilitiesProvider(capabilitiesProvider capabilities.Provider, maximumMessageSizeBytes int) ParameterProvider { + return &capabilityToParameterProvider{ + capabilitiesProvider: capabilitiesProvider, + maximumMessageSizeBytes: maximumMessageSizeBytes, + } +} + +func (p *capabilityToParameterProvider) Get(ctx context.Context, instanceName digest.InstanceName) (Parameters, error) { + capabilities, err := p.capabilitiesProvider.GetCapabilities(ctx, instanceName) + if err != nil { + return Parameters{}, util.StatusWrap(err, "Unable to GetCapabilities to determine chunking parameters") + } + + params := capabilities.GetCacheCapabilities().GetRepMaxCdcParams() + if params == nil { + return Parameters{}, status.Error(codes.Unimplemented, "This backend only supports upstream servers with rep max cdc support.") + } + if params.MinChunkSizeBytes < 64 { + return Parameters{}, status.Errorf(codes.Internal, "MinChunkSizeBytes was %d but a minimum of 64 is required.", params.MinChunkSizeBytes) + } + maxMinChunkSize := (p.maximumMessageSizeBytes + 1) / 2 + if params.MinChunkSizeBytes > uint64(maxMinChunkSize) { + return Parameters{}, status.Errorf(codes.Internal, "MinChunkSizeBytes was %d but a maximum of %d is supported with the configured maximum message size.", params.MinChunkSizeBytes, maxMinChunkSize) + } + + return Parameters{ + MinChunkSizeBytes: int64(params.MinChunkSizeBytes), + HorizonSizeBytes: int64(params.HorizonSizeBytes), + }, nil +} diff --git a/pkg/blobstore/chunklistvalidating/reader_chunker.go b/pkg/blobstore/cdc/reader_chunker.go similarity index 98% rename from pkg/blobstore/chunklistvalidating/reader_chunker.go rename to pkg/blobstore/cdc/reader_chunker.go index 0d250dabd..3e2dba135 100644 --- a/pkg/blobstore/chunklistvalidating/reader_chunker.go +++ b/pkg/blobstore/cdc/reader_chunker.go @@ -1,4 +1,4 @@ -package chunklistvalidating +package cdc import ( "bufio" diff --git a/pkg/blobstore/chunklistvalidating/reader_chunker_test.go b/pkg/blobstore/cdc/reader_chunker_test.go similarity index 88% rename from pkg/blobstore/chunklistvalidating/reader_chunker_test.go rename to pkg/blobstore/cdc/reader_chunker_test.go index c5630d427..d92778bbd 100644 --- a/pkg/blobstore/chunklistvalidating/reader_chunker_test.go +++ b/pkg/blobstore/cdc/reader_chunker_test.go @@ -1,4 +1,4 @@ -package chunklistvalidating_test +package cdc_test import ( "io" @@ -7,7 +7,7 @@ import ( remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" "github.com/buildbarn/bb-storage/pkg/blobstore/buffer" - "github.com/buildbarn/bb-storage/pkg/blobstore/chunklistvalidating" + "github.com/buildbarn/bb-storage/pkg/blobstore/cdc" "github.com/buildbarn/bb-storage/pkg/digest" "github.com/stretchr/testify/require" ) @@ -32,7 +32,7 @@ func FuzzReaderChunker(f *testing.F) { reader := buffer.NewValidatedBufferFromByteSlice(originalData).ToReader() defer reader.Close() - chunker := chunklistvalidating.NewReaderChunker(digestFunc, reader, minChunkSize, horizonLookaheadBytes) + chunker := cdc.NewReaderChunker(digestFunc, reader, minChunkSize, horizonLookaheadBytes) composedData := make([]byte, 0, dataSizeBytes) var numberOfChunks int @@ -76,7 +76,7 @@ func TestReaderChunkerSmallBlob(t *testing.T) { defer reader.Close() digestFunc := digest.MustNewFunction("", remoteexecution.DigestFunction_SHA256) - chunker := chunklistvalidating.NewReaderChunker(digestFunc, reader, minChunkSize, horizonLookaheadBytes) + chunker := cdc.NewReaderChunker(digestFunc, reader, minChunkSize, horizonLookaheadBytes) chunks := make([][]byte, 0, 1) for { @@ -99,7 +99,7 @@ func TestReaderChunkerEmptyBlob(t *testing.T) { defer reader.Close() digestFunc := digest.MustNewFunction("", remoteexecution.DigestFunction_SHA256) - chunker := chunklistvalidating.NewReaderChunker(digestFunc, reader, minChunkSize, horizonLookaheadBytes) + chunker := cdc.NewReaderChunker(digestFunc, reader, minChunkSize, horizonLookaheadBytes) chunk, err := chunker.NextChunk() require.ErrorIs(t, io.EOF, err) diff --git a/pkg/blobstore/cdc/ttl_cache.go b/pkg/blobstore/cdc/ttl_cache.go new file mode 100644 index 000000000..bbbede985 --- /dev/null +++ b/pkg/blobstore/cdc/ttl_cache.go @@ -0,0 +1,115 @@ +package cdc + +import ( + "sync" + "time" + + "github.com/buildbarn/bb-storage/pkg/clock" + "github.com/buildbarn/bb-storage/pkg/eviction" + + "golang.org/x/sync/singleflight" +) + +type cachedItem[V any] struct { + value V + expiration time.Time +} + +// TTLCache provides a generic, concurrency-safe cache with TTL and +// eviction for string keys. Keys are limited to strings as this is what +// singleflight is limited to. +type TTLCache[V any] struct { + clock clock.Clock + evictionSet eviction.Set[string] + maxItems int + cacheDuration time.Duration + + lock sync.Mutex + items map[string]cachedItem[V] + + flightGroup singleflight.Group +} + +// NewTTLCache instantiates a reusable TTLCache for any key-value pair. +func NewTTLCache[V any](clock clock.Clock, evictionSet eviction.Set[string], maxItems int, cacheDuration time.Duration) *TTLCache[V] { + return &TTLCache[V]{ + clock: clock, + evictionSet: evictionSet, + maxItems: maxItems, + cacheDuration: cacheDuration, + items: make(map[string]cachedItem[V]), + } +} + +// GetOrSet retrieves the value from the cache, or executes the fetcher +// exactly once for concurrent callers of the same key. +func (c *TTLCache[V]) GetOrSet(key string, fetch func() (V, error)) (V, error) { + // Fast path, get directly from the cache. + if val, ok := c.Get(key); ok { + return val, nil + } + + // Key was missing, deduplicate all calls for the same key. + result, err, _ := c.flightGroup.Do(key, func() (interface{}, error) { + // Check the cache inside the singleflight scope. Protects us + // from performing an extra fetch in case there was a put in + // between our previous check and our flight taking of. + if val, ok := c.Get(key); ok { + return val, nil + } + + // Execute the fetch + val, err := fetch() + if err != nil { + return nil, err + } + c.Put(key, val) + return val, nil + }) + + if err != nil { + var zero V + return zero, err + } + + return result.(V), nil +} + +// Get retrieves an item if it exists and hasn't expired. +func (c *TTLCache[V]) Get(key string) (V, bool) { + c.lock.Lock() + defer c.lock.Unlock() + + if cached, ok := c.items[key]; ok { + if !c.clock.Now().After(cached.expiration) { + c.evictionSet.Touch(key) + return cached.value, true + } + } + + var zero V + return zero, false +} + +// Put inserts or updates an item in the cache, handling eviction if at +// capacity. +func (c *TTLCache[V]) Put(key string, value V) { + c.lock.Lock() + defer c.lock.Unlock() + + expiration := c.clock.Now().Add(c.cacheDuration) + + if _, ok := c.items[key]; ok { + c.items[key] = cachedItem[V]{value: value, expiration: expiration} + c.evictionSet.Touch(key) + return + } + + if len(c.items) >= c.maxItems { + delete(c.items, c.evictionSet.Peek()) + c.evictionSet.Remove() + } + + c.items[key] = cachedItem[V]{value: value, expiration: expiration} + c.evictionSet.Insert(key) +} diff --git a/pkg/blobstore/chunklistvalidating/BUILD.bazel b/pkg/blobstore/chunklistvalidating/BUILD.bazel index 1bb49c7f0..8c930cb14 100644 --- a/pkg/blobstore/chunklistvalidating/BUILD.bazel +++ b/pkg/blobstore/chunklistvalidating/BUILD.bazel @@ -2,24 +2,18 @@ load("@rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "chunklistvalidating", - srcs = [ - "chunk_concatenating_reader.go", - "chunk_list_validating_blob_access.go", - "chunker.go", - "reader_chunker.go", - ], + srcs = ["chunk_list_validating_blob_access.go"], importpath = "github.com/buildbarn/bb-storage/pkg/blobstore/chunklistvalidating", visibility = ["//visibility:public"], deps = [ "//pkg/blobstore", "//pkg/blobstore/buffer", + "//pkg/blobstore/cdc", "//pkg/digest", "//pkg/util", "@bazel_remote_apis//build/bazel/remote/execution/v2:remote_execution_go_proto", - "@com_github_buildbarn_go_cdc//:go-cdc", "@org_golang_google_grpc//codes", "@org_golang_google_grpc//status", - "@org_golang_x_sync//errgroup", ], ) @@ -28,12 +22,12 @@ go_test( srcs = [ "chunk_list_validating_blob_access_test.go", "fake_blob_access_test.go", - "reader_chunker_test.go", ], deps = [ ":chunklistvalidating", "//pkg/blobstore", "//pkg/blobstore/buffer", + "//pkg/blobstore/cdc", "//pkg/digest", "@bazel_remote_apis//build/bazel/remote/execution/v2:remote_execution_go_proto", "@com_github_stretchr_testify//require", diff --git a/pkg/blobstore/chunklistvalidating/chunk_concatenating_reader.go b/pkg/blobstore/chunklistvalidating/chunk_concatenating_reader.go deleted file mode 100644 index 643af130e..000000000 --- a/pkg/blobstore/chunklistvalidating/chunk_concatenating_reader.go +++ /dev/null @@ -1,68 +0,0 @@ -package chunklistvalidating - -import ( - "context" - "io" - - "github.com/buildbarn/bb-storage/pkg/blobstore" - "github.com/buildbarn/bb-storage/pkg/digest" - "github.com/buildbarn/bb-storage/pkg/util" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" -) - -// chunkConcatenatingReader is a helper utility that implements the -// io.ReadCloser api over a series of digest.Digest objectes fetched -// sequentially from the CAS. -type chunkConcatenatingReader struct { - ctx context.Context - contentAddressableStorage blobstore.BlobAccess - chunkDigests []digest.Digest - currentIndex int - currentReader io.ReadCloser - closed bool -} - -func (r *chunkConcatenatingReader) Read(p []byte) (int, error) { - if r.closed { - return 0, status.Error(codes.Internal, "Reader is already closed") - } - for { - if r.currentReader == nil { - if r.currentIndex >= len(r.chunkDigests) { - return 0, io.EOF - } - chunkDigest := r.chunkDigests[r.currentIndex] - b := r.contentAddressableStorage.Get(r.ctx, chunkDigest) - r.currentReader = b.ToReader() - r.currentIndex++ - } - - n, err := r.currentReader.Read(p) - if n > 0 { - return n, nil - } - if err == io.EOF { - err = r.currentReader.Close() - r.currentReader = nil - if err != nil { - return 0, err - } - continue - } - if err != nil { - _ = r.currentReader.Close() - r.currentReader = nil - return 0, util.StatusWrap(err, "Failed to read chunk") - } - } -} - -func (r *chunkConcatenatingReader) Close() (err error) { - r.closed = true - if r.currentReader != nil { - err = r.currentReader.Close() - r.currentReader = nil - } - return err -} diff --git a/pkg/blobstore/chunklistvalidating/chunk_list_validating_blob_access.go b/pkg/blobstore/chunklistvalidating/chunk_list_validating_blob_access.go index 807227f9f..2ee0e774e 100644 --- a/pkg/blobstore/chunklistvalidating/chunk_list_validating_blob_access.go +++ b/pkg/blobstore/chunklistvalidating/chunk_list_validating_blob_access.go @@ -7,17 +7,18 @@ import ( remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" "github.com/buildbarn/bb-storage/pkg/blobstore" "github.com/buildbarn/bb-storage/pkg/blobstore/buffer" + "github.com/buildbarn/bb-storage/pkg/blobstore/cdc" "github.com/buildbarn/bb-storage/pkg/digest" "github.com/buildbarn/bb-storage/pkg/util" - "golang.org/x/sync/errgroup" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" ) type chunkListValidatingBlobAccess struct { blobstore.BlobAccess - contentAddressableStorage blobstore.BlobAccess - maximumMessageSizeBytes int + chunkStorage blobstore.BlobAccess + maximumMessageSizeBytes int + parameterProvider cdc.ParameterProvider } // NewChunkListValidatingBlobAccess creates a wrapper around a Chunk @@ -30,75 +31,54 @@ type chunkListValidatingBlobAccess struct { // This validation is fairly expensive and validation should only be // done at a single layer as close as possible to the CAS where the full // view of the CAS is available. -func NewChunkListValidatingBlobAccess(chunkListStorage, contentAddressableStorage blobstore.BlobAccess, maximumMessageSizeBytes int) blobstore.BlobAccess { +func NewChunkListValidatingBlobAccess(chunkListStorage, chunkStorage blobstore.BlobAccess, cdcParameterProvider cdc.ParameterProvider, maximumMessageSizeBytes int) blobstore.BlobAccess { return &chunkListValidatingBlobAccess{ - BlobAccess: chunkListStorage, - contentAddressableStorage: contentAddressableStorage, - maximumMessageSizeBytes: maximumMessageSizeBytes, + BlobAccess: chunkListStorage, + chunkStorage: chunkStorage, + maximumMessageSizeBytes: maximumMessageSizeBytes, + parameterProvider: cdcParameterProvider, } } -// Fetch the chunking parameters from the GetCapabilities -// implementation. -func (ba *chunkListValidatingBlobAccess) getValidChunkingParameters(ctx context.Context, instanceName digest.InstanceName) (*remoteexecution.RepMaxCdcParams, error) { - capabilities, err := ba.BlobAccess.GetCapabilities(ctx, instanceName) - if err != nil { - return nil, util.StatusWrap(err, "Unable to GetCapabilities to determine chunking parameters") - } - - params := capabilities.GetCacheCapabilities().GetRepMaxCdcParams() - if params == nil { - return nil, status.Error(codes.Unimplemented, "This backend only supports upstream servers with rep max cdc support.") - } - if params.MinChunkSizeBytes < 64 { - return nil, status.Errorf(codes.Internal, "MinChunkSizeBytes was %d but a minimum of 64 is required.", params.MinChunkSizeBytes) +// Get the split result from the downstream blob access, should one +// exist return it only if all its constituent chunks exist. +func (ba *chunkListValidatingBlobAccess) getComplete(ctx context.Context, d digest.Digest) buffer.Buffer { + missing, err := ba.BlobAccess.FindMissing(ctx, d.ToSingletonSet()) + if err != nil || !missing.Empty() { + return buffer.NewBufferFromError(status.Error(codes.NotFound, "Blob could not be found.")) } - maxMinChunkSize := (ba.maximumMessageSizeBytes + 1) / 2 - if params.MinChunkSizeBytes > uint64(maxMinChunkSize) { - return nil, status.Errorf(codes.Internal, "MinChunkSizeBytes was %d but a maximum of %d is supported with the configured maximum message size.", params.MinChunkSizeBytes, maxMinChunkSize) - } - - return params, nil -} - -// Check the downstream blob access if this particular blob has already -// been split. If that's the case and all the chunks are still there we -// can return early. In case of errors we will return nil and continue -// with the regular code path. -func (ba *chunkListValidatingBlobAccess) checkSplitResult(ctx context.Context, d digest.Digest) buffer.Buffer { b1, b2 := ba.BlobAccess.Get(ctx, d).CloneCopy(ba.maximumMessageSizeBytes) responseMsg, err := b1.ToProto(&remoteexecution.SplitBlobResponse{}, ba.maximumMessageSizeBytes) if err != nil { b2.Discard() - return nil + return buffer.NewBufferFromError(status.Error(codes.NotFound, "Failed to parse chunk list.")) } splitBlobResponse := responseMsg.(*remoteexecution.SplitBlobResponse) digestFunction := d.GetDigestFunction() digestSetBuilder := digest.NewSetBuilder(len(splitBlobResponse.ChunkDigests)) - digestSetBuilder.Add(d) for _, chunkDigestProto := range splitBlobResponse.ChunkDigests { chunkDigest, err := digestFunction.NewDigestFromProto(chunkDigestProto) if err != nil { b2.Discard() - return nil + return buffer.NewBufferFromError(util.StatusWrap(err, "Failed to parse digest of chunk.")) } digestSetBuilder.Add(chunkDigest) } - missing, err := ba.contentAddressableStorage.FindMissing(ctx, digestSetBuilder.Build()) + missing, err = ba.chunkStorage.FindMissing(ctx, digestSetBuilder.Build()) if err == nil && missing.Empty() { return b2 } b2.Discard() - return nil + return buffer.NewBufferFromError(status.Error(codes.NotFound, "Blob could not be found.")) } // Get returns a valid SplitResult for the given digest chunking the // blob and storing the chunk list if needed. func (ba *chunkListValidatingBlobAccess) Get(ctx context.Context, d digest.Digest) buffer.Buffer { - params, err := ba.getValidChunkingParameters(ctx, d.GetInstanceName()) + params, err := ba.parameterProvider.Get(ctx, d.GetInstanceName()) if err != nil { return buffer.NewBufferFromError(err) } @@ -108,8 +88,8 @@ func (ba *chunkListValidatingBlobAccess) Get(ctx context.Context, d digest.Diges // original blob. We verify the existence of the blob in CAS and // break out early. blobSize := d.GetSizeBytes() - if uint64(blobSize) < 2*params.MinChunkSizeBytes { - missing, err := ba.contentAddressableStorage.FindMissing(ctx, d.ToSingletonSet()) + if blobSize < 2*params.MinChunkSizeBytes { + missing, err := ba.chunkStorage.FindMissing(ctx, d.ToSingletonSet()) if err != nil { return buffer.NewBufferFromError(util.StatusWrap(err, "Failed to verify blob existence")) } @@ -125,54 +105,8 @@ func (ba *chunkListValidatingBlobAccess) Get(ctx context.Context, d digest.Diges return buffer.NewProtoBufferFromProto(response, buffer.UserProvided) } - // Check if we have already computed the result for this blob. - if result := ba.checkSplitResult(ctx, d); result != nil { - return result - } - - // Fallthrough case, compute the chunk list, upload the chunks and - // store the chunk list. - blobReader := ba.contentAddressableStorage.Get(ctx, d).ToReader() - defer blobReader.Close() - chunker := NewReaderChunker(d.GetDigestFunction(), blobReader, int64(params.MinChunkSizeBytes), int64(params.HorizonSizeBytes)) - - chunkDigests := make([]*remoteexecution.Digest, 0, uint64(blobSize)/params.MinChunkSizeBytes+1) - - for { - chunk, err := chunker.NextChunk() - if err == io.EOF { - break - } - if err != nil { - return buffer.NewBufferFromError(err) - } - - missing, err := ba.contentAddressableStorage.FindMissing(ctx, chunk.Digest.ToSingletonSet()) - if err != nil { - return buffer.NewBufferFromError(err) - } - if !missing.Empty() { - if err := ba.contentAddressableStorage.Put(ctx, chunk.Digest, buffer.NewValidatedBufferFromByteSlice(chunk.Data)); err != nil { - return buffer.NewBufferFromError(err) - } - } - - chunkDigests = append(chunkDigests, chunk.Digest.GetProto()) - } - - response := &remoteexecution.SplitBlobResponse{ - ChunkDigests: chunkDigests, - ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, - } - - b1, b2 := buffer.NewProtoBufferFromProto(response, buffer.UserProvided).CloneCopy(ba.maximumMessageSizeBytes) - - if err := ba.BlobAccess.Put(ctx, d, b1); err != nil { - b2.Discard() - return buffer.NewBufferFromError(util.StatusWrap(err, "Failed to store the split blob response")) - } - - return b2 + // Return upstream split result if complete. + return ba.getComplete(ctx, d) } func (ba *chunkListValidatingBlobAccess) matchesStoredChunkList(ctx context.Context, d digest.Digest, userResponse *remoteexecution.SplitBlobResponse) bool { @@ -200,32 +134,23 @@ func (ba *chunkListValidatingBlobAccess) Put(ctx context.Context, d digest.Diges if err != nil { return util.StatusWrap(err, "Failed to parse input as SplitBlobResponse") } - userResponse := msg.(*remoteexecution.SplitBlobResponse) + inResponse := msg.(*remoteexecution.SplitBlobResponse) + + params, err := ba.parameterProvider.Get(ctx, d.GetInstanceName()) + if err != nil { + return err + } digestFunction := d.GetDigestFunction() var userChunks []digest.Digest - digestSetBuilder := digest.NewSetBuilder(len(userResponse.ChunkDigests)) - for _, chunkDigestProto := range userResponse.ChunkDigests { + for _, chunkDigestProto := range inResponse.ChunkDigests { chunkDigest, err := digestFunction.NewDigestFromProto(chunkDigestProto) if err != nil { return status.Errorf(codes.InvalidArgument, "Invalid chunk digest: %v", err) } - digestSetBuilder.Add(chunkDigest) userChunks = append(userChunks, chunkDigest) } - // Check that all referenced chunks are present in storage. - missing, err := ba.contentAddressableStorage.FindMissing(ctx, digestSetBuilder.Build()) - if err != nil { - return util.StatusWrap(err, "Failed to check existence of chunks") - } - if !missing.Empty() { - return status.Error(codes.NotFound, "At least one chunk in the chunk list was not found") - } - - // Check the trivial cases without hitting the downstream blob - // stores. - // No chunks given, blob must be the empty blob. if len(userChunks) == 0 { if d.GetSizeBytes() != 0 { @@ -236,88 +161,52 @@ func (ba *chunkListValidatingBlobAccess) Put(ctx context.Context, d digest.Diges } return nil } - // Single chunk given, the blob must be equal to the chunk. At this - // point we have already verified the presence of the chunk so we do - // not have to verify the presence of the blob. - if len(userChunks) == 1 { - if d != userChunks[0] { - return status.Error(codes.InvalidArgument, "Chunk list does not compose to blob") - } - return nil - } - chunksMatchesStoredLists := ba.matchesStoredChunkList(ctx, d, userResponse) - missing, err = ba.contentAddressableStorage.FindMissing(ctx, d.ToSingletonSet()) + // Check that all referenced chunks are present in storage. + userChunks, err = ba.flattenChunks(ctx, params, userChunks) if err != nil { - return util.StatusWrap(err, "Failed to check existence of blob") + return status.Error(codes.NotFound, "At least one chunk is missing from storage.") } - blobExistsInCAS := missing.Empty() - // The request is identical to an already existing chunk list with - // content we have verified exists in CAS. - if blobExistsInCAS && chunksMatchesStoredLists { - return nil + // Chunk list is marked for validation bypass, push it directy to + // downstream blob store. + if cdc.ChunkListValidationBypassed(ctx) { + return ba.BlobAccess.Put(ctx, d, b) } - // No more shortcuts available go through the heavy path of - // concatenating/verifying and chunking the blobs. - params, err := ba.getValidChunkingParameters(ctx, d.GetInstanceName()) - if err != nil { - return err - } + // Check the trivial case without hitting the downstream blob + // stores. - reader := &chunkConcatenatingReader{ - ctx: ctx, - contentAddressableStorage: ba.contentAddressableStorage, - chunkDigests: userChunks, + if ba.matchesStoredChunkList(ctx, d, inResponse) { + return nil } - blobBuffer := buffer.NewCASBufferFromReader(d, reader, buffer.UserProvided) - b1, b2 := blobBuffer.CloneStream() - - // Stream 1: Uploads the blob to CAS. - group, gCtx := errgroup.WithContext(ctx) - group.Go(func() error { - if blobExistsInCAS { - // Upload unnecessary, blob already exists in CAS. - b1.Discard() - return nil - } - return ba.contentAddressableStorage.Put(gCtx, d, b1) - }) - - // Stream 2: Chunk the stream to compute the digest and cache the - // canonical chunks. + // No more shortcuts available go through the heavy path of + // concatenating/verifying and chunking the blobs. + blobBuffer := buffer.NewUnvalidatedCASChunkConcatenatingBuffer(ctx, d, userChunks, ba.chunkStorage.Get, buffer.UserProvided, ba.maximumMessageSizeBytes) var canonicalChunkDigests []*remoteexecution.Digest - group.Go(func() error { - b2Reader := b2.ToReader() - defer b2Reader.Close() - chunker := NewReaderChunker(d.GetDigestFunction(), b2Reader, int64(params.MinChunkSizeBytes), int64(params.HorizonSizeBytes)) - for { - chunk, err := chunker.NextChunk() - if err == io.EOF { - return nil - } - if err != nil { - return err - } + reader := blobBuffer.ToReader() + defer reader.Close() + chunker := cdc.NewReaderChunker(d.GetDigestFunction(), reader, int64(params.MinChunkSizeBytes), int64(params.HorizonSizeBytes)) + for { + chunk, err := chunker.NextChunk() + if err == io.EOF { + break + } + if err != nil { + return err + } - missing, err := ba.contentAddressableStorage.FindMissing(gCtx, chunk.Digest.ToSingletonSet()) - if err != nil { - return err - } - if !missing.Empty() { - if err := ba.contentAddressableStorage.Put(gCtx, chunk.Digest, buffer.NewValidatedBufferFromByteSlice(chunk.Data)); err != nil { - return util.StatusWrap(err, "Failed to save chunk") - } + missing, err := ba.chunkStorage.FindMissing(ctx, chunk.Digest.ToSingletonSet()) + if err != nil { + return err + } + if !missing.Empty() { + if err := ba.chunkStorage.Put(ctx, chunk.Digest, buffer.NewValidatedBufferFromByteSlice(chunk.Data)); err != nil { + return util.StatusWrap(err, "Failed to save chunk") } - canonicalChunkDigests = append(canonicalChunkDigests, chunk.Digest.GetProto()) } - }) - - // Wait for the full blob validation and upload to complete. - if err := group.Wait(); err != nil { - return util.StatusWrap(err, "Failed to splice the blob") + canonicalChunkDigests = append(canonicalChunkDigests, chunk.Digest.GetProto()) } // Store the canonical response. @@ -332,6 +221,55 @@ func (ba *chunkListValidatingBlobAccess) Put(ctx context.Context, d digest.Diges return nil } +func (ba *chunkListValidatingBlobAccess) flattenChunks(ctx context.Context, params cdc.Parameters, userChunks []digest.Digest) ([]digest.Digest, error) { + maxChunkSize := int64(2*params.MinChunkSizeBytes - 1) + bigDigests := digest.NewSetBuilder(len(userChunks)) + for _, chunkDigest := range userChunks { + if chunkDigest.GetSizeBytes() > maxChunkSize { + bigDigests.Add(chunkDigest) + } + } + missing, err := ba.BlobAccess.FindMissing(ctx, bigDigests.Build()) + if err != nil { + return nil, util.StatusWrap(err, "Error checking for chunk lists of big chunks") + } + if !missing.Empty() { + return nil, status.Error(codes.NotFound, "Chunk lists not found for big chunks.") + } + flattenedChunks := make([]digest.Digest, 0, len(userChunks)) + flattenedChunksBuilder := digest.NewSetBuilder(len(userChunks)) + for _, chunkDigest := range userChunks { + digestFunction := chunkDigest.GetDigestFunction() + if chunkDigest.GetSizeBytes() <= maxChunkSize { + flattenedChunks = append(flattenedChunks, chunkDigest) + flattenedChunksBuilder.Add(chunkDigest) + } else { + innerChunksResponseBuffer := ba.BlobAccess.Get(ctx, chunkDigest) + innerChunksResponseProtoBuf, err := innerChunksResponseBuffer.ToProto(&remoteexecution.SplitBlobResponse{}, ba.maximumMessageSizeBytes) + if err != nil { + return nil, util.StatusWrap(err, "Error reading chunk list for big chunk") + } + innerChunksResponseProto := innerChunksResponseProtoBuf.(*remoteexecution.SplitBlobResponse) + for _, innerChunkDigestProto := range innerChunksResponseProto.ChunkDigests { + innerDigest, err := digestFunction.NewDigestFromProto(innerChunkDigestProto) + if err != nil { + return nil, util.StatusWrap(err, "Error parsing digest of chunk list of big chunk") + } + flattenedChunks = append(flattenedChunks, innerDigest) + flattenedChunksBuilder.Add(innerDigest) + } + } + } + missing, err = ba.chunkStorage.FindMissing(ctx, flattenedChunksBuilder.Build()) + if err != nil { + return nil, util.StatusWrap(err, "Error checking for existence of flattened chunks.") + } + if !missing.Empty() { + return nil, status.Error(codes.NotFound, "At least one chunk among flattened chunks are missing.") + } + return flattenedChunks, nil +} + func (ba *chunkListValidatingBlobAccess) findMissingChunks(ctx context.Context, d digest.Digest) (digest.Set, error) { splitBlobResponseProto, err := ba.BlobAccess.Get(ctx, d).ToProto(&remoteexecution.SplitBlobResponse{}, ba.maximumMessageSizeBytes) if err != nil { @@ -347,7 +285,7 @@ func (ba *chunkListValidatingBlobAccess) findMissingChunks(ctx context.Context, } builder.Add(chunkDigest) } - return ba.contentAddressableStorage.FindMissing(ctx, builder.Build()) + return ba.chunkStorage.FindMissing(ctx, builder.Build()) } func (ba *chunkListValidatingBlobAccess) FindMissing(ctx context.Context, digests digest.Set) (digest.Set, error) { diff --git a/pkg/blobstore/chunklistvalidating/chunk_list_validating_blob_access_test.go b/pkg/blobstore/chunklistvalidating/chunk_list_validating_blob_access_test.go index 3830e720b..83129baf8 100644 --- a/pkg/blobstore/chunklistvalidating/chunk_list_validating_blob_access_test.go +++ b/pkg/blobstore/chunklistvalidating/chunk_list_validating_blob_access_test.go @@ -7,6 +7,7 @@ import ( remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" "github.com/buildbarn/bb-storage/pkg/blobstore/buffer" + "github.com/buildbarn/bb-storage/pkg/blobstore/cdc" "github.com/buildbarn/bb-storage/pkg/blobstore/chunklistvalidating" "github.com/buildbarn/bb-storage/pkg/digest" "github.com/stretchr/testify/require" @@ -36,7 +37,8 @@ func TestChunkListValidatingBlobAccessGetTrivialSmallBlob(t *testing.T) { fakeCAS := newFakeBlobAccess(nil) fakeCLS := newFakeBlobAccess(testCDCParams) - validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider(fakeCLS, maximumMessageSizeBytes) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, parameterProvider, maximumMessageSizeBytes) digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) blobData := []byte("Small trivial blob") @@ -54,76 +56,86 @@ func TestChunkListValidatingBlobAccessGetTrivialSmallBlob(t *testing.T) { require.Greater(t, fakeCAS.GetTouches(blobDigest), 0, "Blob did not have its lifetime renewed.") } -func TestChunkListValidatingBlobAccessGetLargeBlob(t *testing.T) { +func TestChunkListValidatingBlobAccessGetExtendsLifetimes(t *testing.T) { ctx := context.Background() fakeCAS := newFakeBlobAccess(nil) fakeCLS := newFakeBlobAccess(testCDCParams) - validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider(fakeCLS, maximumMessageSizeBytes) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, parameterProvider, maximumMessageSizeBytes) + + blobData := bytes.Repeat([]byte("testdatafortests"), 250) // <4KiB + chunk1Data := blobData[:len(blobData)/2] + chunk2Data := blobData[len(blobData)/2:] digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) - blobData := bytes.Repeat([]byte("test_data_pattern_"), 6000) blobDigest := mustComputeDigest(t, digestFunction, blobData) + chunk1Digest := mustComputeDigest(t, digestFunction, chunk1Data) + chunk2Digest := mustComputeDigest(t, digestFunction, chunk2Data) + require.NoError(t, fakeCAS.Put(ctx, chunk1Digest, buffer.NewValidatedBufferFromByteSlice(chunk1Data))) + require.NoError(t, fakeCAS.Put(ctx, chunk2Digest, buffer.NewValidatedBufferFromByteSlice(chunk2Data))) - require.NoError(t, fakeCAS.Put(ctx, blobDigest, buffer.NewValidatedBufferFromByteSlice(blobData))) + splitResponse := &remoteexecution.SplitBlobResponse{ + ChunkDigests: []*remoteexecution.Digest{ + chunk1Digest.GetProto(), + chunk2Digest.GetProto(), + }, + } + chunkListBuffer := buffer.NewProtoBufferFromProto(splitResponse, buffer.UserProvided) + require.NoError(t, fakeCLS.Put(ctx, blobDigest, chunkListBuffer)) - msg, err := validatingCLS.Get(ctx, blobDigest).ToProto(&remoteexecution.SplitBlobResponse{}, maximumMessageSizeBytes) + // Reset touches. + fakeCLS.ResetTouches() + fakeCAS.ResetTouches() + + // Perform a cached split. + msgCached, err := validatingCLS.Get(ctx, blobDigest).ToProto(&remoteexecution.SplitBlobResponse{}, maximumMessageSizeBytes) require.NoError(t, err) + cachedResponse := msgCached.(*remoteexecution.SplitBlobResponse) + require.Equal(t, len(splitResponse.ChunkDigests), len(cachedResponse.ChunkDigests)) - splitResponse := msg.(*remoteexecution.SplitBlobResponse) - require.Greater(t, len(splitResponse.ChunkDigests), 1, "Blob should have been divided into multiple chunks.") + // The original blob MUST have had its lifetime extended + require.Greater(t, fakeCLS.GetTouches(blobDigest), 0, "Original blob's chunk list lifetime was not extended during call to Get") - for _, chunkProto := range splitResponse.ChunkDigests { + // Every chunk MUST have had its lifetime extended + for _, chunkProto := range cachedResponse.ChunkDigests { chunkDigest, err := digestFunction.NewDigestFromProto(chunkProto) require.NoError(t, err) - - require.Greater(t, fakeCAS.GetTouches(chunkDigest), 0, "Chunk generated by CDC did not have its lifetime renewed.") - missing, err := fakeCAS.FindMissing(ctx, chunkDigest.ToSingletonSet()) - require.NoError(t, err) - require.True(t, missing.Empty(), "Chunk generated by CDC was not saved to the CAS.") + require.Greater(t, fakeCAS.GetTouches(chunkDigest), 0, "Chunk's lifetime was not extended during call to Get") } - - cachedMsg, err := fakeCLS.Get(ctx, blobDigest).ToProto(&remoteexecution.SplitBlobResponse{}, maximumMessageSizeBytes) - require.NoError(t, err) - require.Equal(t, len(splitResponse.ChunkDigests), len(cachedMsg.(*remoteexecution.SplitBlobResponse).ChunkDigests)) } -func TestChunkListValidatingBlobAccessGetExtendsLifetimes(t *testing.T) { +func TestChunkListValidatingBlobAccessGetLargeBlobMissingUnderlyingChunk(t *testing.T) { ctx := context.Background() - fakeCAS := newFakeBlobAccess(nil) fakeCLS := newFakeBlobAccess(testCDCParams) - validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider(fakeCLS, maximumMessageSizeBytes) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, parameterProvider, maximumMessageSizeBytes) digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) - blobData := bytes.Repeat([]byte("test_data_pattern_"), 6000) // ~108KB - blobDigest := mustComputeDigest(t, digestFunction, blobData) - require.NoError(t, fakeCAS.Put(ctx, blobDigest, buffer.NewValidatedBufferFromByteSlice(blobData))) - - // Split the blob to populate the CAS and CLS for this blob. - msg, err := validatingCLS.Get(ctx, blobDigest).ToProto(&remoteexecution.SplitBlobResponse{}, maximumMessageSizeBytes) - require.NoError(t, err) - splitResponse := msg.(*remoteexecution.SplitBlobResponse) - // Reset touches. - fakeCAS.ResetTouches() - - // Perform a cached split. - msgCached, err := validatingCLS.Get(ctx, blobDigest).ToProto(&remoteexecution.SplitBlobResponse{}, maximumMessageSizeBytes) - require.NoError(t, err) - cachedResponse := msgCached.(*remoteexecution.SplitBlobResponse) - require.Equal(t, len(splitResponse.ChunkDigests), len(cachedResponse.ChunkDigests)) + chunk1Data := bytes.Repeat([]byte("A"), 1500) + chunk1Digest := mustComputeDigest(t, digestFunction, chunk1Data) + require.NoError(t, fakeCAS.Put(ctx, chunk1Digest, buffer.NewValidatedBufferFromByteSlice(chunk1Data))) + chunk2Data := bytes.Repeat([]byte("B"), 1500) + chunk2Digest := mustComputeDigest(t, digestFunction, chunk2Data) + // Chunk 2 is not uploaded to the chunk storage - // The original blob MUST have had its lifetime extended - require.Greater(t, fakeCAS.GetTouches(blobDigest), 0, "Original blob's lifetime was not extended during call to SplitBlob") - require.Greater(t, fakeCLS.GetTouches(blobDigest), 0, "Original blob's chunk list lifetime was not extended during call to SplitBlob") + expectedFullData := append(chunk1Data, chunk2Data...) + blobDigest := mustComputeDigest(t, digestFunction, expectedFullData) - // Every chunk MUST have been touched in the CAS - for _, chunkProto := range cachedResponse.ChunkDigests { - chunkDigest, err := digestFunction.NewDigestFromProto(chunkProto) - require.NoError(t, err) - require.Greater(t, fakeCAS.GetTouches(chunkDigest), 0, "Chunk's lifetime was not extended during call to SplitBlob") + splitResponse := &remoteexecution.SplitBlobResponse{ + ChunkDigests: []*remoteexecution.Digest{ + chunk1Digest.GetProto(), + chunk2Digest.GetProto(), + }, } + manifestBuffer := buffer.NewProtoBufferFromProto(splitResponse, buffer.UserProvided) + require.NoError(t, fakeCLS.Put(ctx, blobDigest, manifestBuffer)) + + _, err := validatingCLS.Get(ctx, blobDigest).ToProto(&remoteexecution.SplitBlobResponse{}, maximumMessageSizeBytes) + require.Error(t, err) + require.Equal(t, codes.NotFound, status.Code(err), "Incorrect error message from Get request: %s", err.Error()) } func TestChunkListValidatingBlobAccessPutManualSplice(t *testing.T) { @@ -131,7 +143,8 @@ func TestChunkListValidatingBlobAccessPutManualSplice(t *testing.T) { fakeCAS := newFakeBlobAccess(nil) fakeCLS := newFakeBlobAccess(testCDCParams) - validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider(fakeCLS, maximumMessageSizeBytes) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, parameterProvider, maximumMessageSizeBytes) digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) @@ -167,13 +180,14 @@ func TestChunkListValidatingBlobAccessPutCanonicalization(t *testing.T) { fakeCAS := newFakeBlobAccess(nil) fakeCLS := newFakeBlobAccess(testCDCParams) - validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider(fakeCLS, maximumMessageSizeBytes) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, parameterProvider, maximumMessageSizeBytes) digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) - blobData := bytes.Repeat([]byte("canonicalization_test_data"), 4000) // ~104KB - chunk1Data := blobData[:10] - chunk2Data := blobData[10:] + blobData := bytes.Repeat([]byte("testdatafortests"), 250) // <4KiB + chunk1Data := blobData[:len(blobData)/2] + chunk2Data := blobData[len(blobData)/2:] chunk1Digest := mustComputeDigest(t, digestFunction, chunk1Data) require.NoError(t, fakeCAS.Put(ctx, chunk1Digest, buffer.NewValidatedBufferFromByteSlice(chunk1Data))) @@ -194,10 +208,6 @@ func TestChunkListValidatingBlobAccessPutCanonicalization(t *testing.T) { err := validatingCLS.Put(ctx, fullBlobDigest, reqBuffer) require.NoError(t, err) - composedData, err := fakeCAS.Get(ctx, fullBlobDigest).ToByteSlice(200000) - require.NoError(t, err) - require.Equal(t, blobData, composedData) - canonicalBuffer := fakeCLS.Get(ctx, fullBlobDigest) canonicalProto, err := canonicalBuffer.ToProto(&remoteexecution.SplitBlobResponse{}, maximumMessageSizeBytes) require.NoError(t, err) @@ -212,7 +222,8 @@ func TestChunkListValidatingBlobAccessPutMissingChunk(t *testing.T) { fakeCAS := newFakeBlobAccess(nil) fakeCLS := newFakeBlobAccess(testCDCParams) - validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider(fakeCLS, maximumMessageSizeBytes) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, parameterProvider, maximumMessageSizeBytes) digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) chunkDigest := mustComputeDigest(t, digestFunction, []byte("ghost")) @@ -232,7 +243,8 @@ func TestChunkListValidatingBlobAccessPutDigestMismatch(t *testing.T) { fakeCAS := newFakeBlobAccess(nil) fakeCLS := newFakeBlobAccess(testCDCParams) - validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider(fakeCLS, maximumMessageSizeBytes) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, parameterProvider, maximumMessageSizeBytes) digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) @@ -249,7 +261,7 @@ func TestChunkListValidatingBlobAccessPutDigestMismatch(t *testing.T) { err := validatingCLS.Put(ctx, wrongBlobDigest, reqBuffer) require.Error(t, err) - require.Contains(t, err.Error(), "does not compose to blob") + require.Equal(t, codes.InvalidArgument, status.Code(err), "Incorrect error from Put request: %s", err.Error()) } func TestChunkListValidatingBlobAccessPutEmptyBlob(t *testing.T) { @@ -257,7 +269,8 @@ func TestChunkListValidatingBlobAccessPutEmptyBlob(t *testing.T) { fakeCAS := newFakeBlobAccess(nil) fakeCLS := newFakeBlobAccess(testCDCParams) - validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider(fakeCLS, maximumMessageSizeBytes) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, parameterProvider, maximumMessageSizeBytes) digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) emptyDigest := mustComputeDigest(t, digestFunction, nil) @@ -276,7 +289,8 @@ func TestChunkListValidatingBlobAccessPutRepeatedChunks(t *testing.T) { fakeCAS := newFakeBlobAccess(nil) fakeCLS := newFakeBlobAccess(testCDCParams) - validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider(fakeCLS, maximumMessageSizeBytes) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, parameterProvider, maximumMessageSizeBytes) digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) @@ -314,7 +328,8 @@ func TestChunkListValidatingBlobAccessPutInlineEmptyChunk(t *testing.T) { fakeCAS := newFakeBlobAccess(nil) fakeCLS := newFakeBlobAccess(testCDCParams) - validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider(fakeCLS, maximumMessageSizeBytes) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, parameterProvider, maximumMessageSizeBytes) digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) @@ -344,7 +359,8 @@ func TestChunkListValidatingBlobAccessPutExtendsLifetimes(t *testing.T) { fakeCAS := newFakeBlobAccess(nil) fakeCLS := newFakeBlobAccess(testCDCParams) - validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, maximumMessageSizeBytes) + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider(fakeCLS, maximumMessageSizeBytes) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, parameterProvider, maximumMessageSizeBytes) digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) @@ -398,7 +414,8 @@ func TestChunkListValidatingBlobAccessGetMissingBlob(t *testing.T) { fakeCAS := newFakeBlobAccess(nil) fakeCLS := newFakeBlobAccess(testCDCParams) - validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, 1024*1024) + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider(fakeCLS, maximumMessageSizeBytes) + validatingCLS := chunklistvalidating.NewChunkListValidatingBlobAccess(fakeCLS, fakeCAS, parameterProvider, maximumMessageSizeBytes) digestFunction := digest.MustNewFunction("instance", remoteexecution.DigestFunction_SHA256) ghostDigest := mustComputeDigest(t, digestFunction, []byte("ghost")) diff --git a/pkg/blobstore/chunklistvalidating/integration/BUILD.bazel b/pkg/blobstore/chunklistvalidating/integration/BUILD.bazel deleted file mode 100644 index ca423091e..000000000 --- a/pkg/blobstore/chunklistvalidating/integration/BUILD.bazel +++ /dev/null @@ -1,19 +0,0 @@ -load("@rules_go//go:def.bzl", "go_test") - -go_test( - name = "integration_test", - srcs = ["chunk_list_validating_integration_test.go"], - data = ["//cmd/bb_storage"], - env = { - "BB_STORAGE_RUNFILE_PATH": "$(rlocationpath //cmd/bb_storage:bb_storage)", - }, - deps = [ - "@bazel_remote_apis//build/bazel/remote/execution/v2:remote_execution_go_proto", - "@com_github_stretchr_testify//require", - "@org_golang_google_grpc//:grpc", - "@org_golang_google_grpc//codes", - "@org_golang_google_grpc//credentials/insecure", - "@org_golang_google_grpc//status", - "@rules_go//go/runfiles", - ], -) diff --git a/pkg/blobstore/chunklistvalidating/integration/chunk_list_validating_integration_test.go b/pkg/blobstore/chunklistvalidating/integration/chunk_list_validating_integration_test.go deleted file mode 100644 index 8e5dd7741..000000000 --- a/pkg/blobstore/chunklistvalidating/integration/chunk_list_validating_integration_test.go +++ /dev/null @@ -1,445 +0,0 @@ -package integration - -import ( - "context" - "crypto/sha256" - "encoding/hex" - "fmt" - "math/rand" - "os" - "os/exec" - "strings" - "testing" - "time" - - remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" - "github.com/bazelbuild/rules_go/go/runfiles" - "github.com/stretchr/testify/require" - - "google.golang.org/grpc" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/credentials/insecure" - "google.golang.org/grpc/status" -) - -type serverParams struct { - disableCLS bool - socketPath string - upstreamSocketPath string -} - -func escapeJSON(s string) string { - s = strings.ReplaceAll(s, "\\", "\\\\") - s = strings.ReplaceAll(s, "\"", "\\\"") - s = strings.ReplaceAll(s, "'", "\\'") - s = strings.ReplaceAll(s, "\n", "\\n") - return s -} - -func storageConfig(params serverParams) string { - return fmt.Sprintf(` -local cls = %t; -local listenPath = '%s'; -{ - grpcServers: [{ - listenPaths: [listenPath], - authenticationPolicy: { allow: {} }, - }], - maximumMessageSizeBytes: 4 * 1024 * 1024, - contentAddressableStorage: { - backend: { - 'local': { - keyLocationMapInMemory: { entries: 1024 }, - keyLocationMapMaximumGetAttempts: 32, - keyLocationMapMaximumPutAttempts: 64, - oldBlocks: 1, - currentBlocks: 1, - newBlocks: 1, - blocksInMemory: { blockSizeBytes: 1024 * 1024 }, - }, - }, - getAuthorizer: { allow: {} }, - putAuthorizer: { allow: {} }, - findMissingAuthorizer: { allow: {} }, - }, - chunkListStorage: if !cls then null else { - backend: { - 'local': { - keyLocationMapInMemory: { entries: 1024 }, - keyLocationMapMaximumGetAttempts: 32, - keyLocationMapMaximumPutAttempts: 64, - oldBlocks: 1, - currentBlocks: 1, - newBlocks: 1, - blocksInMemory: { blockSizeBytes: 1024 * 1024 }, - chunkingParameters: { - minChunkSizeBytes: 256, - horizonSizeBytes: 8*256, - } - }, - }, - getAuthorizer: { allow: {} }, - putAuthorizer: { allow: {} }, - findMissingAuthorizer: { allow: {} }, - }, -} -`, !params.disableCLS, escapeJSON(params.socketPath)) -} - -func frontendConfig(params serverParams) string { - return fmt.Sprintf(` -local cls = %t; -local listenPath = '%s'; -// unix:// doesn't work under Windows. -// https://github.com/grpc/grpc-go/issues/8675 -local upstreamAddress = 'unix:%s'; -{ - grpcServers: [{ - listenPaths: [listenPath], - authenticationPolicy: { allow: {} }, - }], - maximumMessageSizeBytes: 4 * 1024 * 1024, - contentAddressableStorage: { - backend: { grpc: { client: { address: upstreamAddress } } }, - getAuthorizer: { allow: {} }, - putAuthorizer: { allow: {} }, - findMissingAuthorizer: { allow: {} }, - }, - chunkListStorage: if !cls then null else { - backend: { chunkListValidating: { backend: { grpc: { client: { address: upstreamAddress } } } } }, - getAuthorizer: { allow: {} }, - putAuthorizer: { allow: {} }, - findMissingAuthorizer: { allow: {} }, - }, -} -`, !params.disableCLS, escapeJSON(params.socketPath), escapeJSON(params.upstreamSocketPath)) -} - -func writeConfigFile(name, content string) (file *os.File, err error) { - if file, err = os.CreateTemp("", name); err != nil { - return nil, err - } - if _, err = file.WriteString(content); err != nil { - return nil, err - } - if err = file.Close(); err != nil { - return nil, err - } - return file, nil -} - -func setupServer(t *testing.T, name, config string) func() { - rf, err := runfiles.New() - if err != nil { - t.Fatalf("Failed to initialize runfiles: %v", err) - } - runfilePath := os.Getenv("BB_STORAGE_RUNFILE_PATH") - require.NotEmpty(t, runfilePath, "BB_STORAGE_RUNFILE_PATH environment variable is not set") - - bbStoragePath, err := rf.Rlocation(runfilePath) - require.NoError(t, err) - - configFile, err := writeConfigFile(name, config) - require.NoError(t, err) - - cmd := exec.Command(bbStoragePath, configFile.Name()) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - err = cmd.Start() - require.NoError(t, err) - - return func() { - cmd.Process.Kill() - cmd.Wait() - os.Remove(configFile.Name()) - } -} - -func createSocketPath(t *testing.T) string { - t.Helper() - socketFile, err := os.CreateTemp("", "bb_*.sock") - require.NoError(t, err) - socketPath := socketFile.Name() - socketFile.Close() - err = os.Remove(socketPath) - require.NoError(t, err) - return socketPath -} - -func setupServers(t *testing.T, storageParams, frontendParams serverParams) (func(), remoteexecution.CapabilitiesClient, remoteexecution.ContentAddressableStorageClient) { - storageParams.socketPath = createSocketPath(t) - closeStorage := setupServer(t, "storage", storageConfig(storageParams)) - require.Eventually(t, func() bool { - _, err := os.Stat(storageParams.socketPath) - return err == nil - }, 1*time.Second, 10*time.Millisecond, "Storage server did not start.") - - frontendParams.socketPath = createSocketPath(t) - frontendParams.upstreamSocketPath = storageParams.socketPath - closeFrontend := setupServer(t, "frontend", frontendConfig(frontendParams)) - require.Eventually(t, func() bool { - _, err := os.Stat(frontendParams.socketPath) - return err == nil - }, 1*time.Second, 10*time.Millisecond, "Frontend server did not start.") - - conn, err := grpc.NewClient(fmt.Sprintf("unix:%s", frontendParams.socketPath), grpc.WithTransportCredentials(insecure.NewCredentials())) - require.NoError(t, err) - - return func() { - closeStorage() - closeFrontend() - conn.Close() - os.Remove(storageParams.socketPath) - os.Remove(frontendParams.socketPath) - }, remoteexecution.NewCapabilitiesClient(conn), remoteexecution.NewContentAddressableStorageClient(conn) -} - -func TestChunkListValidatingCapabilities(t *testing.T) { - tests := []struct { - name string - storageParams serverParams - frontendParams serverParams - expectSupport bool - }{ - {"Enabled In Both", serverParams{}, serverParams{}, true}, - {"Disabled in Storage", serverParams{disableCLS: true}, serverParams{}, false}, - {"Disabled in Frontend", serverParams{}, serverParams{disableCLS: true}, false}, - {"Disabled in Both", serverParams{disableCLS: true}, serverParams{disableCLS: true}, false}, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - closer, capabilitiesClient, _ := setupServers(t, tc.storageParams, tc.frontendParams) - defer closer() - - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - capabilities, err := capabilitiesClient.GetCapabilities(ctx, &remoteexecution.GetCapabilitiesRequest{ - InstanceName: "", - }) - require.NoError(t, err) - - cacheCaps := capabilities.GetCacheCapabilities() - require.NotNil(t, cacheCaps) - - if tc.expectSupport { - require.True(t, cacheCaps.SpliceBlobSupport) - require.True(t, cacheCaps.SplitBlobSupport) - - chunkingParameters := cacheCaps.GetRepMaxCdcParams() - require.NotNil(t, chunkingParameters) - require.Equal(t, uint64(256), chunkingParameters.GetMinChunkSizeBytes()) - require.Equal(t, uint64(2048), chunkingParameters.GetHorizonSizeBytes()) - } else { - require.False(t, cacheCaps.SpliceBlobSupport) - require.False(t, cacheCaps.SplitBlobSupport) - require.Nil(t, cacheCaps.GetRepMaxCdcParams()) - } - }) - } -} - -func computeDigest(data []byte) *remoteexecution.Digest { - hash := sha256.Sum256(data) - return &remoteexecution.Digest{ - Hash: hex.EncodeToString(hash[:]), - SizeBytes: int64(len(data)), - } -} - -func makeRandomData(t *testing.T, size int, seed int64) []byte { - t.Helper() - data := make([]byte, size) - r := rand.New(rand.NewSource(seed)) - _, err := r.Read(data) - require.NoError(t, err) - return data -} - -func uploadBlob(ctx context.Context, t *testing.T, cas remoteexecution.ContentAddressableStorageClient, data []byte) *remoteexecution.Digest { - t.Helper() - digest := computeDigest(data) - req := &remoteexecution.BatchUpdateBlobsRequest{ - Requests: []*remoteexecution.BatchUpdateBlobsRequest_Request{ - {Digest: digest, Data: data}, - }, - DigestFunction: remoteexecution.DigestFunction_SHA256, - } - res, err := cas.BatchUpdateBlobs(ctx, req) - require.NoError(t, err) - require.NotEmpty(t, res.Responses, "server returned empty responses array") - status := res.Responses[0].GetStatus() - require.Equal(t, int32(0), status.GetCode(), status.GetMessage()) - return digest -} - -func findMissingBlobs(ctx context.Context, t *testing.T, cas remoteexecution.ContentAddressableStorageClient, digests []*remoteexecution.Digest) []*remoteexecution.Digest { - t.Helper() - req := &remoteexecution.FindMissingBlobsRequest{ - BlobDigests: digests, - DigestFunction: remoteexecution.DigestFunction_SHA256, - } - res, err := cas.FindMissingBlobs(ctx, req) - require.NoError(t, err) - return res.MissingBlobDigests -} - -func TestRepMaxCDCSplitAndSpliceBehaviors(t *testing.T) { - minChunkSize := int64(256) - - t.Run("RoundTripSplitThenSplice", func(t *testing.T) { - closer, _, casClient := setupServers(t, serverParams{}, serverParams{}) - defer closer() - - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - dataSize := (minChunkSize * 4) + 128 - data := makeRandomData(t, int(dataSize), 0) - blobDigest := uploadBlob(ctx, t, casClient, data) - - splitReq := &remoteexecution.SplitBlobRequest{ - BlobDigest: blobDigest, - ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, - DigestFunction: remoteexecution.DigestFunction_SHA256, - } - splitRes, err := casClient.SplitBlob(ctx, splitReq) - require.NoError(t, err) - - spliceReq := &remoteexecution.SpliceBlobRequest{ - BlobDigest: blobDigest, - ChunkDigests: splitRes.ChunkDigests, - ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, - DigestFunction: remoteexecution.DigestFunction_SHA256, - } - spliceRes, err := casClient.SpliceBlob(ctx, spliceReq) - require.NoError(t, err) - require.Equal(t, blobDigest.Hash, spliceRes.BlobDigest.Hash) - }) - - t.Run("SpliceNonStandardChunkingThenSplit", func(t *testing.T) { - closer, _, casClient := setupServers(t, serverParams{}, serverParams{}) - defer closer() - - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - blobData := makeRandomData(t, int(minChunkSize*2), 0) - - chunk1 := blobData[:1] - chunk2 := blobData[1:] - - digest1 := uploadBlob(ctx, t, casClient, chunk1) - digest2 := uploadBlob(ctx, t, casClient, chunk2) - expectedDigest := computeDigest(blobData) - - spliceReq := &remoteexecution.SpliceBlobRequest{ - BlobDigest: expectedDigest, - ChunkDigests: []*remoteexecution.Digest{digest1, digest2}, - ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, - DigestFunction: remoteexecution.DigestFunction_SHA256, - } - spliceRes, err := casClient.SpliceBlob(ctx, spliceReq) - require.NoError(t, err) - require.Equal(t, expectedDigest.Hash, spliceRes.BlobDigest.Hash) - - splitReq := &remoteexecution.SplitBlobRequest{ - BlobDigest: expectedDigest, - ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, - DigestFunction: remoteexecution.DigestFunction_SHA256, - } - splitRes, err := casClient.SplitBlob(ctx, splitReq) - require.NoError(t, err) - - require.NotEmpty(t, splitRes.ChunkDigests) - - // Check that it didn't just echo our chunks back - isEcho := len(splitRes.ChunkDigests) == 2 && - splitRes.ChunkDigests[0].Hash == digest1.Hash && - splitRes.ChunkDigests[1].Hash == digest2.Hash - require.False(t, isEcho, "Server echoed non-standard chunks") - - var totalSize int64 - for _, c := range splitRes.ChunkDigests { - totalSize += c.SizeBytes - } - require.Equal(t, expectedDigest.SizeBytes, totalSize) - }) - - t.Run("SpliceAlreadyExistsOrNoop", func(t *testing.T) { - closer, _, casClient := setupServers(t, serverParams{}, serverParams{}) - defer closer() - - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - blobData := append([]byte("This blob will be fully uploaded before we try to splice it."), makeRandomData(t, 16, 0)...) - expectedDigest := uploadBlob(ctx, t, casClient, blobData) - - chunk1 := blobData[:10] - chunk2 := blobData[10:] - digest1 := uploadBlob(ctx, t, casClient, chunk1) - digest2 := uploadBlob(ctx, t, casClient, chunk2) - - spliceReq := &remoteexecution.SpliceBlobRequest{ - BlobDigest: expectedDigest, - ChunkDigests: []*remoteexecution.Digest{digest1, digest2}, - ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, - DigestFunction: remoteexecution.DigestFunction_SHA256, - } - - spliceRes, err := casClient.SpliceBlob(ctx, spliceReq) - - if err != nil { - require.Equal(t, codes.AlreadyExists, status.Code(err), "Expected OK or ALREADY_EXISTS") - } else { - require.Equal(t, expectedDigest.Hash, spliceRes.BlobDigest.Hash) - } - }) - - t.Run("ValidationSpliceBlobRejections", func(t *testing.T) { - closer, _, casClient := setupServers(t, serverParams{}, serverParams{}) - defer closer() - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - validData := makeRandomData(t, 512, 0) - validDigest := uploadBlob(ctx, t, casClient, validData) - ghostDigest := computeDigest([]byte("I do not exist")) - - tests := []struct { - name string - req *remoteexecution.SpliceBlobRequest - expectError codes.Code - }{ - { - name: "Missing Chunk", - req: &remoteexecution.SpliceBlobRequest{ - BlobDigest: ghostDigest, - ChunkDigests: []*remoteexecution.Digest{ghostDigest}, - }, - expectError: codes.NotFound, - }, - { - name: "Digest Mismatch", - req: &remoteexecution.SpliceBlobRequest{ - BlobDigest: computeDigest([]byte("Fake target")), - ChunkDigests: []*remoteexecution.Digest{validDigest}, - }, - expectError: codes.InvalidArgument, - }, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - tc.req.ChunkingFunction = remoteexecution.ChunkingFunction_REP_MAX_CDC - tc.req.DigestFunction = remoteexecution.DigestFunction_SHA256 - - _, err := casClient.SpliceBlob(ctx, tc.req) - require.Error(t, err) - require.Equal(t, tc.expectError, status.Code(err)) - }) - } - }) -} diff --git a/pkg/blobstore/completenesschecking/completeness_checking_blob_access_test.go b/pkg/blobstore/completenesschecking/completeness_checking_blob_access_test.go index 0fe3d6820..ee2b244c8 100644 --- a/pkg/blobstore/completenesschecking/completeness_checking_blob_access_test.go +++ b/pkg/blobstore/completenesschecking/completeness_checking_blob_access_test.go @@ -25,6 +25,7 @@ func TestCompletenessCheckingBlobAccess(t *testing.T) { actionCache := mock.NewMockBlobAccess(ctrl) contentAddressableStorage := mock.NewMockBlobAccess(ctrl) + completenessCheckingBlobAccess := completenesschecking.NewCompletenessCheckingBlobAccess( actionCache, contentAddressableStorage, diff --git a/pkg/blobstore/configuration/BUILD.bazel b/pkg/blobstore/configuration/BUILD.bazel index 8882b2b2e..9b72fac5f 100644 --- a/pkg/blobstore/configuration/BUILD.bazel +++ b/pkg/blobstore/configuration/BUILD.bazel @@ -6,9 +6,9 @@ go_library( "ac_blob_access_creator.go", "blob_access_creator.go", "blob_replicator_creator.go", - "cas_blob_access_creator.go", - "cas_blob_replicator_creator.go", "cls_blob_access_creator.go", + "cs_blob_access_creator.go", + "cs_blob_replicator_creator.go", "fsac_blob_access_creator.go", "icas_blob_access_creator.go", "icas_blob_replicator_creator.go", @@ -22,6 +22,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/blobstore", + "//pkg/blobstore/cdc", "//pkg/blobstore/chunklistvalidating", "//pkg/blobstore/completenesschecking", "//pkg/blobstore/grpcclients", diff --git a/pkg/blobstore/configuration/ac_blob_access_creator.go b/pkg/blobstore/configuration/ac_blob_access_creator.go index 14999a233..7183a10b3 100644 --- a/pkg/blobstore/configuration/ac_blob_access_creator.go +++ b/pkg/blobstore/configuration/ac_blob_access_creator.go @@ -31,19 +31,21 @@ type acBlobAccessCreator struct { protoBlobAccessCreator protoBlobReplicatorCreator - contentAddressableStorage *BlobAccessInfo - grpcClientFactory grpc.ClientFactory - maximumMessageSizeBytes int + contentAddressableStorage blobstore.BlobAccess + contentAddressableStorageKeyFormat digest.KeyFormat + grpcClientFactory grpc.ClientFactory + maximumMessageSizeBytes int } // NewACBlobAccessCreator creates a BlobAccessCreator that can be // provided to NewBlobAccessFromConfiguration() to construct a // BlobAccess that is suitable for accessing the Action Cache. -func NewACBlobAccessCreator(contentAddressableStorage *BlobAccessInfo, grpcClientFactory grpc.ClientFactory, maximumMessageSizeBytes int) BlobAccessCreator { +func NewACBlobAccessCreator(contentAddressableStorage blobstore.BlobAccess, contentAddressableStorageKeyFormat digest.KeyFormat, grpcClientFactory grpc.ClientFactory, maximumMessageSizeBytes int) BlobAccessCreator { return &acBlobAccessCreator{ - contentAddressableStorage: contentAddressableStorage, - grpcClientFactory: grpcClientFactory, - maximumMessageSizeBytes: maximumMessageSizeBytes, + contentAddressableStorage: contentAddressableStorage, + contentAddressableStorageKeyFormat: contentAddressableStorageKeyFormat, + grpcClientFactory: grpcClientFactory, + maximumMessageSizeBytes: maximumMessageSizeBytes, } } @@ -100,12 +102,12 @@ func (bac *acBlobAccessCreator) NewCustomBlobAccess(terminationGroup program.Gro return BlobAccessInfo{ BlobAccess: completenesschecking.NewCompletenessCheckingBlobAccess( base.BlobAccess, - bac.contentAddressableStorage.BlobAccess, + bac.contentAddressableStorage, blobstore.RecommendedFindMissingDigestsCount, bac.maximumMessageSizeBytes, backend.CompletenessChecking.MaximumTotalTreeSizeBytes, ), - DigestKeyFormat: base.DigestKeyFormat.Combine(bac.contentAddressableStorage.DigestKeyFormat), + DigestKeyFormat: base.DigestKeyFormat.Combine(bac.contentAddressableStorageKeyFormat), }, "completeness_checking", nil case *pb.BlobAccessConfiguration_Grpc: client, err := bac.grpcClientFactory.NewClientFromConfiguration(backend.Grpc.Client, terminationGroup) diff --git a/pkg/blobstore/configuration/cls_blob_access_creator.go b/pkg/blobstore/configuration/cls_blob_access_creator.go index 76ff79f9c..014338807 100644 --- a/pkg/blobstore/configuration/cls_blob_access_creator.go +++ b/pkg/blobstore/configuration/cls_blob_access_creator.go @@ -3,6 +3,7 @@ package configuration import ( remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" "github.com/buildbarn/bb-storage/pkg/blobstore" + "github.com/buildbarn/bb-storage/pkg/blobstore/cdc" "github.com/buildbarn/bb-storage/pkg/blobstore/chunklistvalidating" "github.com/buildbarn/bb-storage/pkg/blobstore/grpcclients" "github.com/buildbarn/bb-storage/pkg/capabilities" @@ -18,19 +19,21 @@ type clsBlobAccessCreator struct { protoBlobAccessCreator protoBlobReplicatorCreator - contentAddressableStorage *BlobAccessInfo - grpcClientFactory grpc.ClientFactory - maximumMessageSizeBytes int + chunkStorage *BlobAccessInfo + grpcClientFactory grpc.ClientFactory + maximumMessageSizeBytes int + parameterCache *cdc.TTLCache[cdc.Parameters] } // NewCLSBlobAccessCreator creates a BlobAccessCreator that can be // provided to NewBlobAccessFromConfiguration() to construct a // BlobAccess that is suitable for querying for chunk list. -func NewCLSBlobAccessCreator(contentAddressableStorage *BlobAccessInfo, grpcClientFactory grpc.ClientFactory, maximumMessageSizeBytes int) BlobAccessCreator { +func NewCLSBlobAccessCreator(chunkStorage *BlobAccessInfo, grpcClientFactory grpc.ClientFactory, maximumMessageSizeBytes int, parameterCache *cdc.TTLCache[cdc.Parameters]) BlobAccessCreator { return &clsBlobAccessCreator{ - contentAddressableStorage: contentAddressableStorage, - grpcClientFactory: grpcClientFactory, - maximumMessageSizeBytes: maximumMessageSizeBytes, + chunkStorage: chunkStorage, + grpcClientFactory: grpcClientFactory, + maximumMessageSizeBytes: maximumMessageSizeBytes, + parameterCache: parameterCache, } } @@ -49,7 +52,7 @@ func (clsBlobAccessCreator) GetDefaultCapabilitiesProvider() capabilities.Provid func (bac *clsBlobAccessCreator) NewCustomBlobAccess(terminationGroup program.Group, configuration *pb.BlobAccessConfiguration, nestedCreator NestedBlobAccessCreator) (BlobAccessInfo, string, error) { switch backend := configuration.Backend.(type) { case *pb.BlobAccessConfiguration_ChunkListValidating: - if bac.contentAddressableStorage == nil { + if bac.chunkStorage == nil { return BlobAccessInfo{}, "", status.Error(codes.InvalidArgument, "Action Cache completeness checking can only be enabled if a Content Addressable Storage is configured") } @@ -57,13 +60,21 @@ func (bac *clsBlobAccessCreator) NewCustomBlobAccess(terminationGroup program.Gr if err != nil { return BlobAccessInfo{}, "", err } + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider( + base.BlobAccess, + bac.maximumMessageSizeBytes, + ) + if bac.parameterCache != nil { + parameterProvider = cdc.NewCachingParameterProvider(parameterProvider, bac.parameterCache) + } return BlobAccessInfo{ BlobAccess: chunklistvalidating.NewChunkListValidatingBlobAccess( base.BlobAccess, - bac.contentAddressableStorage.BlobAccess, + bac.chunkStorage.BlobAccess, + parameterProvider, bac.maximumMessageSizeBytes, ), - DigestKeyFormat: base.DigestKeyFormat.Combine(bac.contentAddressableStorage.DigestKeyFormat), + DigestKeyFormat: base.DigestKeyFormat.Combine(bac.chunkStorage.DigestKeyFormat), }, "chunk_list_validating", nil case *pb.BlobAccessConfiguration_Grpc: diff --git a/pkg/blobstore/configuration/cas_blob_access_creator.go b/pkg/blobstore/configuration/cs_blob_access_creator.go similarity index 79% rename from pkg/blobstore/configuration/cas_blob_access_creator.go rename to pkg/blobstore/configuration/cs_blob_access_creator.go index ac215dd68..22aeb18e3 100644 --- a/pkg/blobstore/configuration/cas_blob_access_creator.go +++ b/pkg/blobstore/configuration/cs_blob_access_creator.go @@ -28,27 +28,26 @@ import ( "cloud.google.com/go/storage" ) -var casCapabilitiesProvider = capabilities.NewStaticProvider(&remoteexecution.ServerCapabilities{ +var csCapabilitiesProvider = capabilities.NewStaticProvider(&remoteexecution.ServerCapabilities{ CacheCapabilities: &remoteexecution.CacheCapabilities{ DigestFunctions: digest.SupportedDigestFunctions, // MaxBatchTotalSize: Not used by Bazel yet. }, }) -type casBlobAccessCreator struct { - casBlobReplicatorCreator +type csBlobAccessCreator struct { + csBlobReplicatorCreator maximumMessageSizeBytes int zstdPool bb_zstd.Pool } -// NewCASBlobAccessCreator creates a BlobAccessCreator that can be +// NewCSBlobAccessCreator creates a BlobAccessCreator that can be // provided to NewBlobAccessFromConfiguration() to construct a -// BlobAccess that is suitable for accessing the Content Addressable -// Storage. -func NewCASBlobAccessCreator(grpcClientFactory grpc.ClientFactory, maximumMessageSizeBytes int, zstdPool bb_zstd.Pool) BlobAccessCreator { - return &casBlobAccessCreator{ - casBlobReplicatorCreator: casBlobReplicatorCreator{ +// BlobAccess that is suitable for accessing the Chunk Storage. +func NewCSBlobAccessCreator(grpcClientFactory grpc.ClientFactory, maximumMessageSizeBytes int, zstdPool bb_zstd.Pool) BlobAccessCreator { + return &csBlobAccessCreator{ + csBlobReplicatorCreator: csBlobReplicatorCreator{ grpcClientFactory: grpcClientFactory, }, maximumMessageSizeBytes: maximumMessageSizeBytes, @@ -56,27 +55,27 @@ func NewCASBlobAccessCreator(grpcClientFactory grpc.ClientFactory, maximumMessag } } -func (casBlobAccessCreator) GetBaseDigestKeyFormat() digest.KeyFormat { +func (csBlobAccessCreator) GetBaseDigestKeyFormat() digest.KeyFormat { return digest.KeyWithoutInstance } -func (casBlobAccessCreator) GetReadBufferFactory() blobstore.ReadBufferFactory { +func (csBlobAccessCreator) GetReadBufferFactory() blobstore.ReadBufferFactory { return blobstore.CASReadBufferFactory } -func (casBlobAccessCreator) GetDefaultCapabilitiesProvider() capabilities.Provider { - return casCapabilitiesProvider +func (csBlobAccessCreator) GetDefaultCapabilitiesProvider() capabilities.Provider { + return csCapabilitiesProvider } -func (casBlobAccessCreator) NewBlockListGrowthPolicy(currentBlocks, newBlocks int) (local.BlockListGrowthPolicy, error) { +func (csBlobAccessCreator) NewBlockListGrowthPolicy(currentBlocks, newBlocks int) (local.BlockListGrowthPolicy, error) { return local.NewImmutableBlockListGrowthPolicy(currentBlocks, newBlocks), nil } -func (casBlobAccessCreator) NewHierarchicalInstanceNamesLocalBlobAccess(keyLocationMap local.KeyLocationMap, locationBlobMap local.LocationBlobMap, globalLock *sync.RWMutex, capabilitiesProvider capabilities.Provider) (blobstore.BlobAccess, error) { +func (csBlobAccessCreator) NewHierarchicalInstanceNamesLocalBlobAccess(keyLocationMap local.KeyLocationMap, locationBlobMap local.LocationBlobMap, globalLock *sync.RWMutex, capabilitiesProvider capabilities.Provider) (blobstore.BlobAccess, error) { return local.NewHierarchicalCASBlobAccess(keyLocationMap, locationBlobMap, globalLock, capabilitiesProvider), nil } -func (bac *casBlobAccessCreator) NewCustomBlobAccess(terminationGroup program.Group, configuration *pb.BlobAccessConfiguration, nestedCreator NestedBlobAccessCreator) (BlobAccessInfo, string, error) { +func (bac *csBlobAccessCreator) NewCustomBlobAccess(terminationGroup program.Group, configuration *pb.BlobAccessConfiguration, nestedCreator NestedBlobAccessCreator) (BlobAccessInfo, string, error) { switch backend := configuration.Backend.(type) { case *pb.BlobAccessConfiguration_ExistenceCaching: base, err := nestedCreator.NewNestedBlobAccess(backend.ExistenceCaching.Backend, bac) @@ -103,7 +102,7 @@ func (bac *casBlobAccessCreator) NewCustomBlobAccess(terminationGroup program.Gr // TODO: Should we provide a configuration option, so // that digest.KeyWithoutInstance can be used? return BlobAccessInfo{ - BlobAccess: grpcclients.NewCASBlobAccess(client, uuid.NewRandom, 64<<10, zstdPool), + BlobAccess: grpcclients.NewCSBlobAccess(client, uuid.NewRandom, 64<<10, zstdPool), DigestKeyFormat: digest.KeyWithInstance, }, "grpc", nil case *pb.BlobAccessConfiguration_ReferenceExpanding: @@ -177,7 +176,7 @@ func (bac *casBlobAccessCreator) NewCustomBlobAccess(terminationGroup program.Gr } } -func (casBlobAccessCreator) WrapTopLevelBlobAccess(blobAccess blobstore.BlobAccess) blobstore.BlobAccess { +func (csBlobAccessCreator) WrapTopLevelBlobAccess(blobAccess blobstore.BlobAccess) blobstore.BlobAccess { // For the Content Addressable Storage it is required that the empty // blob is always present. This decorator ensures that requests // for the empty blob never contact the storage backend. diff --git a/pkg/blobstore/configuration/cas_blob_replicator_creator.go b/pkg/blobstore/configuration/cs_blob_replicator_creator.go similarity index 66% rename from pkg/blobstore/configuration/cas_blob_replicator_creator.go rename to pkg/blobstore/configuration/cs_blob_replicator_creator.go index ed3172fef..215ebaf03 100644 --- a/pkg/blobstore/configuration/cas_blob_replicator_creator.go +++ b/pkg/blobstore/configuration/cs_blob_replicator_creator.go @@ -11,25 +11,25 @@ import ( "google.golang.org/grpc/status" ) -type casBlobReplicatorCreator struct { +type csBlobReplicatorCreator struct { grpcClientFactory grpc.ClientFactory } -// NewCASBlobReplicatorCreator creates a BlobReplicatorCreator that can +// NewCSBlobReplicatorCreator creates a BlobReplicatorCreator that can // be provided to NewBlobReplicatorFromConfiguration() to construct a -// BlobReplicator that is suitable for replicating Content Addressable -// Storage objects. -func NewCASBlobReplicatorCreator(grpcClientFactory grpc.ClientFactory) BlobReplicatorCreator { - return &casBlobReplicatorCreator{ +// BlobReplicator that is suitable for replicating Chunk Storage +// objects. +func NewCSBlobReplicatorCreator(grpcClientFactory grpc.ClientFactory) BlobReplicatorCreator { + return &csBlobReplicatorCreator{ grpcClientFactory: grpcClientFactory, } } -func (casBlobReplicatorCreator) GetStorageTypeName() string { - return "cas" +func (csBlobReplicatorCreator) GetStorageTypeName() string { + return "cs" } -func (brc *casBlobReplicatorCreator) NewCustomBlobReplicator(terminationGroup program.Group, configuration *pb.BlobReplicatorConfiguration, source blobstore.BlobAccess, sink BlobAccessInfo) (replication.BlobReplicator, error) { +func (brc *csBlobReplicatorCreator) NewCustomBlobReplicator(terminationGroup program.Group, configuration *pb.BlobReplicatorConfiguration, source blobstore.BlobAccess, sink BlobAccessInfo) (replication.BlobReplicator, error) { switch mode := configuration.Mode.(type) { case *pb.BlobReplicatorConfiguration_Deduplicating: base, err := NewBlobReplicatorFromConfiguration(terminationGroup, mode.Deduplicating, source, sink, brc) diff --git a/pkg/blobstore/configuration/new_blob_access.go b/pkg/blobstore/configuration/new_blob_access.go index 07e52041c..a1d59ef0a 100644 --- a/pkg/blobstore/configuration/new_blob_access.go +++ b/pkg/blobstore/configuration/new_blob_access.go @@ -21,13 +21,11 @@ import ( "github.com/buildbarn/bb-storage/pkg/eviction" "github.com/buildbarn/bb-storage/pkg/filesystem" "github.com/buildbarn/bb-storage/pkg/filesystem/path" - "github.com/buildbarn/bb-storage/pkg/grpc" "github.com/buildbarn/bb-storage/pkg/program" pb "github.com/buildbarn/bb-storage/pkg/proto/configuration/blobstore" digest_pb "github.com/buildbarn/bb-storage/pkg/proto/configuration/digest" "github.com/buildbarn/bb-storage/pkg/random" "github.com/buildbarn/bb-storage/pkg/util" - bb_zstd "github.com/buildbarn/bb-storage/pkg/zstd" "github.com/fxtlabs/primes" "google.golang.org/grpc/codes" @@ -632,33 +630,3 @@ func NewBlobAccessFromConfiguration(terminationGroup program.Group, configuratio DigestKeyFormat: backend.DigestKeyFormat, }, nil } - -// NewCASAndACBlobAccessFromConfiguration is a convenience function to -// create BlobAccess objects for both the Content Addressable Storage -// and Action Cache. Most Buildbarn components tend to require access to -// both these data stores. -func NewCASAndACBlobAccessFromConfiguration(terminationGroup program.Group, configuration *pb.BlobstoreConfiguration, grpcClientFactory grpc.ClientFactory, maximumMessageSizeBytes int, zstdPool bb_zstd.Pool) (blobstore.BlobAccess, blobstore.BlobAccess, error) { - contentAddressableStorage, err := NewBlobAccessFromConfiguration( - terminationGroup, - configuration.GetContentAddressableStorage(), - NewCASBlobAccessCreator(grpcClientFactory, maximumMessageSizeBytes, zstdPool), - ) - if err != nil { - return nil, nil, util.StatusWrap(err, "Failed to create Content Addressable Storage") - } - - actionCache, err := NewBlobAccessFromConfiguration( - terminationGroup, - configuration.GetActionCache(), - NewACBlobAccessCreator( - &contentAddressableStorage, - grpcClientFactory, - maximumMessageSizeBytes, - ), - ) - if err != nil { - return nil, nil, util.StatusWrap(err, "Failed to create Action Cache") - } - - return contentAddressableStorage.BlobAccess, actionCache.BlobAccess, nil -} diff --git a/pkg/blobstore/grpcclients/BUILD.bazel b/pkg/blobstore/grpcclients/BUILD.bazel index bb518ecf6..361ff8e7f 100644 --- a/pkg/blobstore/grpcclients/BUILD.bazel +++ b/pkg/blobstore/grpcclients/BUILD.bazel @@ -4,8 +4,8 @@ go_library( name = "grpcclients", srcs = [ "ac_blob_access.go", - "cas_blob_access.go", "cls_blob_access.go", + "cs_blob_access.go", "fsac_blob_access.go", "icas_blob_access.go", "iscc_blob_access.go", @@ -34,7 +34,7 @@ go_library( go_test( name = "grpcclients_test", - srcs = ["cas_blob_access_test.go"], + srcs = ["cs_blob_access_test.go"], deps = [ ":grpcclients", "//internal/mock", diff --git a/pkg/blobstore/grpcclients/cls_blob_access.go b/pkg/blobstore/grpcclients/cls_blob_access.go index a27e5ea84..e10404c19 100644 --- a/pkg/blobstore/grpcclients/cls_blob_access.go +++ b/pkg/blobstore/grpcclients/cls_blob_access.go @@ -10,8 +10,6 @@ import ( "github.com/buildbarn/bb-storage/pkg/digest" "google.golang.org/grpc" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" ) type clsBlobAccess struct { @@ -67,21 +65,18 @@ func (ba *clsBlobAccess) Put(ctx context.Context, digest digest.Digest, b buffer } func (ba *clsBlobAccess) FindMissing(ctx context.Context, digests digest.Set) (digest.Set, error) { - missing := digest.NewSetBuilder(digests.Length()) - for _, d := range digests.Items() { - _, err := ba.contentAddressableStorageClient.SplitBlob(ctx, &remoteexecution.SplitBlobRequest{ - InstanceName: d.GetInstanceName().String(), - BlobDigest: d.GetProto(), - DigestFunction: d.GetDigestFunction().GetEnumValue(), - ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, - }) - if status.Code(err) == codes.NotFound { - missing.Add(d) - } else if err != nil { - return digest.EmptySet, err - } - } - return missing.Build(), nil + // Semantically an REv2 server which supports the Split and Splice + // apis should be able to answer the SplitBlob call for any blob + // which it has in its storage. Thus we can safely say that we are + // able to Get a chunk list from an upstream server as long as it + // has the blob. We can therefore reuse the existing + // FindMissingBlobs api for this purpose. + // + // In Buildbarn we implement this on the server side by segregating + // FMB requests for blobs larger than the maximum chunk size to the + // Chunk List Storage (CLS) and to the Chunk Storage (CS) for other + // blobs. + return findMissingBlobsInternal(ctx, digests, ba.contentAddressableStorageClient) } func (ba *clsBlobAccess) GetCapabilities(ctx context.Context, instanceName digest.InstanceName) (*remoteexecution.ServerCapabilities, error) { diff --git a/pkg/blobstore/grpcclients/cas_blob_access.go b/pkg/blobstore/grpcclients/cs_blob_access.go similarity index 98% rename from pkg/blobstore/grpcclients/cas_blob_access.go rename to pkg/blobstore/grpcclients/cs_blob_access.go index 8c7b3abca..5f19d6b8e 100644 --- a/pkg/blobstore/grpcclients/cas_blob_access.go +++ b/pkg/blobstore/grpcclients/cs_blob_access.go @@ -33,7 +33,7 @@ type casBlobAccess struct { zstdPool bb_zstd.Pool } -// NewCASBlobAccess creates a BlobAccess handle that relays any requests +// NewCSBlobAccess creates a BlobAccess handle that relays any requests // to a gRPC service that implements the bytestream.ByteStream and // remoteexecution.ContentAddressableStorage services. Those are the // services that Bazel uses to access blobs stored in the Content @@ -41,7 +41,7 @@ type casBlobAccess struct { // // If zstdPool is non-nil, the client will use ZSTD compression for // ByteStream operations if the server supports it. -func NewCASBlobAccess(client grpc.ClientConnInterface, uuidGenerator util.UUIDGenerator, readChunkSize int, zstdPool bb_zstd.Pool) blobstore.BlobAccess { +func NewCSBlobAccess(client grpc.ClientConnInterface, uuidGenerator util.UUIDGenerator, readChunkSize int, zstdPool bb_zstd.Pool) blobstore.BlobAccess { return &casBlobAccess{ byteStreamClient: bytestream.NewByteStreamClient(client), contentAddressableStorageClient: remoteexecution.NewContentAddressableStorageClient(client), diff --git a/pkg/blobstore/grpcclients/cas_blob_access_test.go b/pkg/blobstore/grpcclients/cs_blob_access_test.go similarity index 95% rename from pkg/blobstore/grpcclients/cas_blob_access_test.go rename to pkg/blobstore/grpcclients/cs_blob_access_test.go index 4465d1ded..de60aa128 100644 --- a/pkg/blobstore/grpcclients/cas_blob_access_test.go +++ b/pkg/blobstore/grpcclients/cs_blob_access_test.go @@ -37,12 +37,12 @@ func newTestZstdPool(maxEncoders, maxDecoders int64) bb_zstd.Pool { ) } -func TestCASBlobAccessPut(t *testing.T) { +func TestCSBlobAccessPut(t *testing.T) { ctrl, ctx := gomock.WithContext(context.Background(), t) client := mock.NewMockClientConnInterface(ctrl) uuidGenerator := mock.NewMockUUIDGenerator(ctrl) - blobAccess := grpcclients.NewCASBlobAccess(client, uuidGenerator.Call, 10, nil) + blobAccess := grpcclients.NewCSBlobAccess(client, uuidGenerator.Call, 10, nil) blobDigest := digest.MustNewDigest("hello", remoteexecution.DigestFunction_MD5, "8b1a9953c4611296a827abf8c47804d7", 5) uuid := uuid.Must(uuid.Parse("7d659e5f-0e4b-48f0-ad9f-3489db6e103b")) @@ -180,7 +180,7 @@ func TestCASBlobAccessGet(t *testing.T) { client := mock.NewMockClientConnInterface(ctrl) uuidGenerator := mock.NewMockUUIDGenerator(ctrl) - blobAccess := grpcclients.NewCASBlobAccess(client, uuidGenerator.Call, 10, nil) + blobAccess := grpcclients.NewCSBlobAccess(client, uuidGenerator.Call, 10, nil) t.Run("Success", func(t *testing.T) { blobDigest := digest.MustNewDigest("hello", remoteexecution.DigestFunction_MD5, "8b1a9953c4611296a827abf8c47804d7", 5) @@ -276,12 +276,12 @@ func TestCASBlobAccessGet(t *testing.T) { }) } -func TestCASBlobAccessGetCapabilities(t *testing.T) { +func TestCSBlobAccessGetCapabilities(t *testing.T) { ctrl, ctx := gomock.WithContext(context.Background(), t) client := mock.NewMockClientConnInterface(ctrl) uuidGenerator := mock.NewMockUUIDGenerator(ctrl) - blobAccess := grpcclients.NewCASBlobAccess(client, uuidGenerator.Call, 10, nil) + blobAccess := grpcclients.NewCSBlobAccess(client, uuidGenerator.Call, 10, nil) t.Run("BackendFailure", func(t *testing.T) { client.EXPECT().Invoke( @@ -377,12 +377,12 @@ func TestCASBlobAccessGetCapabilities(t *testing.T) { }) } -func TestCASBlobAccessPutWithCompression(t *testing.T) { +func TestCSBlobAccessPutWithCompression(t *testing.T) { ctrl, ctx := gomock.WithContext(context.Background(), t) client := mock.NewMockClientConnInterface(ctrl) uuidGenerator := mock.NewMockUUIDGenerator(ctrl) - blobAccess := grpcclients.NewCASBlobAccess(client, uuidGenerator.Call, 10, newTestZstdPool(16, 16)) + blobAccess := grpcclients.NewCSBlobAccess(client, uuidGenerator.Call, 10, newTestZstdPool(16, 16)) expectGetCapabilitiesWithZSTD(client) @@ -455,12 +455,12 @@ func expectGetCapabilitiesWithZSTD(client *mock.MockClientConnInterface) { }).AnyTimes() } -func TestCASBlobAccessGetWithCompression(t *testing.T) { +func TestCSBlobAccessGetWithCompression(t *testing.T) { ctrl, ctx := gomock.WithContext(context.Background(), t) client := mock.NewMockClientConnInterface(ctrl) uuidGenerator := mock.NewMockUUIDGenerator(ctrl) - blobAccess := grpcclients.NewCASBlobAccess(client, uuidGenerator.Call, 100, newTestZstdPool(16, 16)) + blobAccess := grpcclients.NewCSBlobAccess(client, uuidGenerator.Call, 100, newTestZstdPool(16, 16)) expectGetCapabilitiesWithZSTD(client) @@ -500,14 +500,14 @@ func TestCASBlobAccessGetWithCompression(t *testing.T) { }) } -func TestCASBlobAccessPutPoolExhaustion(t *testing.T) { +func TestCSBlobAccessPutPoolExhaustion(t *testing.T) { // Create a pool with only 1 concurrent encoder to test backpressure. pool := bb_zstd.NewBoundedPool(1, 1, nil, nil) ctrl, ctx := gomock.WithContext(context.Background(), t) client := mock.NewMockClientConnInterface(ctrl) uuidGenerator := mock.NewMockUUIDGenerator(ctrl) - blobAccess := grpcclients.NewCASBlobAccess(client, uuidGenerator.Call, 10, pool) + blobAccess := grpcclients.NewCSBlobAccess(client, uuidGenerator.Call, 10, pool) expectGetCapabilitiesWithZSTD(client) @@ -579,7 +579,7 @@ func TestCASBlobAccessPutPoolExhaustion(t *testing.T) { wg.Wait() } -func TestCASBlobAccessPutPoolReleasesEncoder(t *testing.T) { +func TestCSBlobAccessPutPoolReleasesEncoder(t *testing.T) { // Pool with 1 encoder: if encoder isn't released after the first Put, // the second Put would deadlock. pool := bb_zstd.NewBoundedPool(1, 1, nil, nil) @@ -587,7 +587,7 @@ func TestCASBlobAccessPutPoolReleasesEncoder(t *testing.T) { ctrl, ctx := gomock.WithContext(context.Background(), t) client := mock.NewMockClientConnInterface(ctrl) uuidGenerator := mock.NewMockUUIDGenerator(ctrl) - blobAccess := grpcclients.NewCASBlobAccess(client, uuidGenerator.Call, 10, pool) + blobAccess := grpcclients.NewCSBlobAccess(client, uuidGenerator.Call, 10, pool) expectGetCapabilitiesWithZSTD(client) @@ -619,7 +619,7 @@ func TestCASBlobAccessPutPoolReleasesEncoder(t *testing.T) { } } -func TestCASBlobAccessGetPoolReleasesDecoder(t *testing.T) { +func TestCSBlobAccessGetPoolReleasesDecoder(t *testing.T) { // Pool with 1 decoder: if decoder isn't released after the first Get, // the second Get would deadlock. pool := bb_zstd.NewBoundedPool(1, 1, nil, nil) @@ -627,7 +627,7 @@ func TestCASBlobAccessGetPoolReleasesDecoder(t *testing.T) { ctrl, ctx := gomock.WithContext(context.Background(), t) client := mock.NewMockClientConnInterface(ctrl) uuidGenerator := mock.NewMockUUIDGenerator(ctrl) - blobAccess := grpcclients.NewCASBlobAccess(client, uuidGenerator.Call, 100, pool) + blobAccess := grpcclients.NewCSBlobAccess(client, uuidGenerator.Call, 100, pool) expectGetCapabilitiesWithZSTD(client) diff --git a/pkg/blobstore/grpcservers/BUILD.bazel b/pkg/blobstore/grpcservers/BUILD.bazel index 556ad91e8..2d8196a52 100644 --- a/pkg/blobstore/grpcservers/BUILD.bazel +++ b/pkg/blobstore/grpcservers/BUILD.bazel @@ -40,6 +40,7 @@ go_test( ":grpcservers", "//internal/mock", "//pkg/blobstore/buffer", + "//pkg/blobstore/cdc", "//pkg/digest", "//pkg/proto/icas", "//pkg/testutil", diff --git a/pkg/blobstore/grpcservers/byte_stream_server.go b/pkg/blobstore/grpcservers/byte_stream_server.go index 0f9cb18a2..332614693 100644 --- a/pkg/blobstore/grpcservers/byte_stream_server.go +++ b/pkg/blobstore/grpcservers/byte_stream_server.go @@ -18,19 +18,21 @@ import ( ) type byteStreamServer struct { - blobAccess blobstore.BlobAccess - readChunkSize int - zstdPool bb_zstd.Pool + contentAddressableStorage blobstore.BlobAccess + maximumMessageSizeBytes int + readChunkSize int + zstdPool bb_zstd.Pool } // NewByteStreamServer creates a GRPC service for reading blobs from and // writing blobs to a BlobAccess. It is used by Bazel to access the // Content Addressable Storage (CAS). -func NewByteStreamServer(blobAccess blobstore.BlobAccess, readChunkSize int, zstdPool bb_zstd.Pool) bytestream.ByteStreamServer { +func NewByteStreamServer(contentAddressableStorage blobstore.BlobAccess, readChunkSize, maximumMessageSizeBytes int, zstdPool bb_zstd.Pool) bytestream.ByteStreamServer { return &byteStreamServer{ - blobAccess: blobAccess, - readChunkSize: readChunkSize, - zstdPool: zstdPool, + contentAddressableStorage: contentAddressableStorage, + readChunkSize: readChunkSize, + maximumMessageSizeBytes: maximumMessageSizeBytes, + zstdPool: zstdPool, } } @@ -38,14 +40,14 @@ func (s *byteStreamServer) Read(in *bytestream.ReadRequest, out bytestream.ByteS if in.ReadLimit != 0 { return status.Error(codes.Unimplemented, "This service does not support downloading partial files") } - digest, compressor, err := digest.NewDigestFromByteStreamReadPath(in.ResourceName) + d, compressor, err := digest.NewDigestFromByteStreamReadPath(in.ResourceName) if err != nil { return err } ctx := out.Context() switch compressor { case remoteexecution.Compressor_IDENTITY: - r := s.blobAccess.Get(ctx, digest).ToChunkReader(in.ReadOffset, s.readChunkSize) + r := s.contentAddressableStorage.Get(ctx, d).ToChunkReader(in.ReadOffset, s.readChunkSize) defer r.Close() for { @@ -62,14 +64,12 @@ func (s *byteStreamServer) Read(in *bytestream.ReadRequest, out bytestream.ByteS } case remoteexecution.Compressor_ZSTD: - b := s.blobAccess.Get(ctx, digest) encoder, err := s.zstdPool.NewEncoder(ctx, &readStreamWriter{out: out}) if err != nil { - b.Discard() return status.Errorf(codes.ResourceExhausted, "Failed to acquire ZSTD encoder: %v", err) } defer encoder.Close() - return b.IntoWriter(encoder) + return s.contentAddressableStorage.Get(ctx, d).IntoWriter(encoder) default: return status.Errorf(codes.Unimplemented, "This service does not support downloading compression type: %s", compressor) } @@ -138,37 +138,54 @@ func (s *byteStreamServer) Write(stream bytestream.ByteStream_WriteServer) error } return err } - digest, compressor, err := digest.NewDigestFromByteStreamWritePath(request.ResourceName) + + d, compressor, err := digest.NewDigestFromByteStreamWritePath(request.ResourceName) if err != nil { return err } + + ctx := stream.Context() switch compressor { case remoteexecution.Compressor_IDENTITY: - return s.writeIdentity(stream, request, digest) + r := &byteStreamWriteServerChunkReader{stream: stream} + if err := r.setRequest(request); err != nil { + return err + } + + b := buffer.NewCASBufferFromChunkReader(d, r, buffer.UserProvided) + if err := s.contentAddressableStorage.Put(stream.Context(), d, b); err != nil { + return err + } + + return stream.SendAndClose(&bytestream.WriteResponse{ + CommittedSize: d.GetSizeBytes(), + }) case remoteexecution.Compressor_ZSTD: - return s.writeZstd(stream, request, digest) + streamReader := &zstdWriteStreamReader{ + stream: stream, + nextOffset: int64(len(request.Data)), + finished: request.FinishWrite, + pendingData: request.Data, + } + + zstdReader, err := bb_zstd.NewReadCloser(ctx, s.zstdPool, streamReader) + if err != nil { + return util.StatusWrap(err, "Failed to acquire ZSTD decoder") + } + + b := buffer.NewCASBufferFromReader(d, zstdReader, buffer.UserProvided) + if err := s.contentAddressableStorage.Put(ctx, d, b); err != nil { + return err + } + + return stream.SendAndClose(&bytestream.WriteResponse{ + CommittedSize: streamReader.nextOffset, + }) default: return status.Errorf(codes.Unimplemented, "This service does not support uploading compression type: %s", compressor) } } -func (s *byteStreamServer) writeIdentity(stream bytestream.ByteStream_WriteServer, request *bytestream.WriteRequest, digest digest.Digest) error { - r := &byteStreamWriteServerChunkReader{stream: stream} - if err := r.setRequest(request); err != nil { - return err - } - if err := s.blobAccess.Put( - stream.Context(), - digest, - buffer.NewCASBufferFromChunkReader(digest, r, buffer.UserProvided), - ); err != nil { - return err - } - return stream.SendAndClose(&bytestream.WriteResponse{ - CommittedSize: digest.GetSizeBytes(), - }) -} - type zstdWriteStreamReader struct { stream bytestream.ByteStream_WriteServer nextOffset int64 @@ -211,32 +228,6 @@ func (zstdWriteStreamReader) Close() error { return nil } -func (s *byteStreamServer) writeZstd(stream bytestream.ByteStream_WriteServer, request *bytestream.WriteRequest, digest digest.Digest) error { - ctx := stream.Context() - streamReader := &zstdWriteStreamReader{ - stream: stream, - nextOffset: int64(len(request.Data)), - finished: request.FinishWrite, - pendingData: request.Data, - } - - zstdReader, err := bb_zstd.NewReadCloser(ctx, s.zstdPool, streamReader) - if err != nil { - return util.StatusWrap(err, "Failed to acquire ZSTD decoder") - } - - if err := s.blobAccess.Put( - ctx, - digest, - buffer.NewCASBufferFromReader(digest, zstdReader, buffer.UserProvided), - ); err != nil { - return err - } - return stream.SendAndClose(&bytestream.WriteResponse{ - CommittedSize: streamReader.nextOffset, - }) -} - func (byteStreamServer) QueryWriteStatus(ctx context.Context, in *bytestream.QueryWriteStatusRequest) (*bytestream.QueryWriteStatusResponse, error) { return nil, status.Error(codes.Unimplemented, "This service does not support querying write status") } diff --git a/pkg/blobstore/grpcservers/byte_stream_server_test.go b/pkg/blobstore/grpcservers/byte_stream_server_test.go index 542eb40f3..c6be37e8f 100644 --- a/pkg/blobstore/grpcservers/byte_stream_server_test.go +++ b/pkg/blobstore/grpcservers/byte_stream_server_test.go @@ -11,6 +11,7 @@ import ( remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" "github.com/buildbarn/bb-storage/internal/mock" "github.com/buildbarn/bb-storage/pkg/blobstore/buffer" + "github.com/buildbarn/bb-storage/pkg/blobstore/cdc" "github.com/buildbarn/bb-storage/pkg/blobstore/grpcservers" "github.com/buildbarn/bb-storage/pkg/digest" "github.com/buildbarn/bb-storage/pkg/testutil" @@ -33,8 +34,23 @@ func TestByteStreamServer(t *testing.T) { // Create an RPC server/client pair. l := bufconn.Listen(1 << 20) server := grpc.NewServer() - blobAccess := mock.NewMockBlobAccess(ctrl) - bytestream.RegisterByteStreamServer(server, grpcservers.NewByteStreamServer(blobAccess, 10, bb_zstd.NewUnboundedPool( + chunkStorage := mock.NewMockBlobAccess(ctrl) + chunkListStorage := mock.NewMockBlobAccess(ctrl) + + chunkListStorage.EXPECT().GetCapabilities(gomock.Any(), gomock.Any()).Return( + &remoteexecution.ServerCapabilities{ + CacheCapabilities: &remoteexecution.CacheCapabilities{ + RepMaxCdcParams: &remoteexecution.RepMaxCdcParams{ + MinChunkSizeBytes: 256 * 1024, + HorizonSizeBytes: 8 * 256 * 1024, + }, + }, + }, nil, + ).AnyTimes() + maximumMessageSizeBytes := 1 << 20 + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider(chunkListStorage, maximumMessageSizeBytes) + contentAddressableStorageChunker := cdc.NewCasChunkingBlobAccess(chunkStorage, chunkListStorage, parameterProvider, maximumMessageSizeBytes) + bytestream.RegisterByteStreamServer(server, grpcservers.NewByteStreamServer(contentAddressableStorageChunker, 10, maximumMessageSizeBytes, bb_zstd.NewUnboundedPool( []zstd.EOption{zstd.WithEncoderConcurrency(1)}, []zstd.DOption{zstd.WithDecoderConcurrency(1)}, ))) @@ -91,7 +107,7 @@ func TestByteStreamServer(t *testing.T) { t.Run("ReadSuccessEmptyInstance", func(t *testing.T) { // Attempt to fetch the small blob without an instance name. - blobAccess.EXPECT().Get( + chunkStorage.EXPECT().Get( gomock.Any(), digest.MustNewDigest("", remoteexecution.DigestFunction_MD5, "09f7e02f1290be211da707a266f153b3", 5), ).Return(buffer.NewValidatedBufferFromByteSlice([]byte("Hello"))) @@ -109,7 +125,7 @@ func TestByteStreamServer(t *testing.T) { t.Run("ReadSuccessNonEmptyInstance", func(t *testing.T) { // Attempt to fetch the large blob with an instance name. - blobAccess.EXPECT().Get( + chunkStorage.EXPECT().Get( gomock.Any(), digest.MustNewDigest("debian8", remoteexecution.DigestFunction_MD5, "3538d378083b9afa5ffad767f7269509", 22), ).Return(buffer.NewValidatedBufferFromByteSlice([]byte("This is a long message"))) @@ -134,7 +150,7 @@ func TestByteStreamServer(t *testing.T) { t.Run("ReadZSTDCompression", func(t *testing.T) { // Test reading with ZSTD compression. originalData := []byte("This is a test message that should be compressed with ZSTD") - blobAccess.EXPECT().Get( + chunkStorage.EXPECT().Get( gomock.Any(), digest.MustNewDigest("", remoteexecution.DigestFunction_SHA256, "8b2c3f8a9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b2c3d4e5f61", 58), ).Return(buffer.NewValidatedBufferFromByteSlice(originalData)) @@ -170,7 +186,7 @@ func TestByteStreamServer(t *testing.T) { originalData[i] = byte(i % 256) } - blobAccess.EXPECT().Get( + chunkStorage.EXPECT().Get( gomock.Any(), digest.MustNewDigest("", remoteexecution.DigestFunction_SHA256, "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b2", 100000), ).Return(buffer.NewValidatedBufferFromByteSlice(originalData)) @@ -214,7 +230,7 @@ func TestByteStreamServer(t *testing.T) { t.Run("ReadNegativeReadOffset", func(t *testing.T) { // Attempt to fetch a blob with a negative offset. - blobAccess.EXPECT().Get( + chunkStorage.EXPECT().Get( gomock.Any(), digest.MustNewDigest("ubuntu1804", remoteexecution.DigestFunction_MD5, "6fc422233a40a75a1f028e11c3cd1140", 7), ).Return(buffer.NewValidatedBufferFromByteSlice([]byte("Goodbye"))) @@ -231,7 +247,7 @@ func TestByteStreamServer(t *testing.T) { t.Run("ReadOffsetBeyondEnd", func(t *testing.T) { // Attempt to fetch a blob with a offset beyond the size // of the blob. - blobAccess.EXPECT().Get( + chunkStorage.EXPECT().Get( gomock.Any(), digest.MustNewDigest("ubuntu1804", remoteexecution.DigestFunction_MD5, "ad3c8ac9eef32188da352082244b3598", 13), ).Return(buffer.NewValidatedBufferFromByteSlice([]byte("short message"))) @@ -247,7 +263,7 @@ func TestByteStreamServer(t *testing.T) { t.Run("ReadSuccessWithOffset", func(t *testing.T) { // Attempt to fetch a lblob with an instance name and offset. - blobAccess.EXPECT().Get( + chunkStorage.EXPECT().Get( gomock.Any(), digest.MustNewDigest("ubuntu1804", remoteexecution.DigestFunction_MD5, "da39a3ee5e6b4b0d3255bfef95601890", 19), ).Return(buffer.NewValidatedBufferFromByteSlice([]byte("This offset message"))) @@ -269,7 +285,7 @@ func TestByteStreamServer(t *testing.T) { t.Run("ReadNonexistentBlob", func(t *testing.T) { // Attempt to fetch a nonexistent blob. - blobAccess.EXPECT().Get( + chunkStorage.EXPECT().Get( gomock.Any(), digest.MustNewDigest("fedora28", remoteexecution.DigestFunction_MD5, "09f34d28e9c8bb445ec996388968a9e8", 7), ).Return(buffer.NewBufferFromError(status.Error(codes.NotFound, "Blob not found"))) @@ -304,7 +320,7 @@ func TestByteStreamServer(t *testing.T) { t.Run("WriteSuccessEmptyInstance", func(t *testing.T) { // Attempt to write a blob without an instance name. - blobAccess.EXPECT().Put( + chunkStorage.EXPECT().Put( gomock.Any(), digest.MustNewDigest("", remoteexecution.DigestFunction_MD5, "581c1053f832a1c719fb6528a588ccfd", 14), gomock.Any(), @@ -345,7 +361,7 @@ func TestByteStreamServer(t *testing.T) { generator.Write(originalData) actualDigest := generator.Sum() - blobAccess.EXPECT().Put( + chunkStorage.EXPECT().Put( gomock.Any(), actualDigest, gomock.Any(), @@ -382,7 +398,7 @@ func TestByteStreamServer(t *testing.T) { generator.Write(originalData) actualDigest := generator.Sum() - blobAccess.EXPECT().Put( + chunkStorage.EXPECT().Put( gomock.Any(), actualDigest, gomock.Any(), @@ -431,7 +447,7 @@ func TestByteStreamServer(t *testing.T) { generator.Write(originalData) actualDigest := generator.Sum() - blobAccess.EXPECT().Put( + chunkStorage.EXPECT().Put( gomock.Any(), actualDigest, gomock.Any(), @@ -485,7 +501,7 @@ func TestByteStreamServer(t *testing.T) { // Test writing with invalid ZSTD data. invalidData := []byte("This is not valid ZSTD compressed data") - blobAccess.EXPECT().Put( + chunkStorage.EXPECT().Put( gomock.Any(), digest.MustNewDigest("", remoteexecution.DigestFunction_SHA256, "d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b2c3d4e5", 10), gomock.Any(), @@ -520,7 +536,7 @@ func TestByteStreamServer(t *testing.T) { generator.Write(originalData) actualDigest := generator.Sum() - blobAccess.EXPECT().Put( + chunkStorage.EXPECT().Put( gomock.Any(), actualDigest, gomock.Any(), @@ -553,7 +569,7 @@ func TestByteStreamServer(t *testing.T) { t.Run("WriteSuccessWithoutFinish", func(t *testing.T) { // Attempt to write without finishing properly. - blobAccess.EXPECT().Put( + chunkStorage.EXPECT().Put( gomock.Any(), digest.MustNewDigest("", remoteexecution.DigestFunction_SHA1, "f10e562d8825ec2e17e0d9f58646f8084a658cfa", 6), gomock.Any(), @@ -575,7 +591,7 @@ func TestByteStreamServer(t *testing.T) { t.Run("WriteFailFinishTwice", func(t *testing.T) { // Attempted to write while finishing twice. - blobAccess.EXPECT().Put( + chunkStorage.EXPECT().Put( gomock.Any(), digest.MustNewDigest("fedora28", remoteexecution.DigestFunction_MD5, "cbd8f7984c654c25512e3d9241ae569f", 3), gomock.Any(), @@ -603,7 +619,7 @@ func TestByteStreamServer(t *testing.T) { t.Run("WriteFailBadOffset", func(t *testing.T) { // Attempted to write with a bad write offset. - blobAccess.EXPECT().Put( + chunkStorage.EXPECT().Put( gomock.Any(), digest.MustNewDigest("windows10", remoteexecution.DigestFunction_MD5, "68e109f0f40ca72a15e05cc22786f8e6", 10), gomock.Any(), diff --git a/pkg/blobstore/grpcservers/content_addressable_storage_server.go b/pkg/blobstore/grpcservers/content_addressable_storage_server.go index 79397391c..e744ba641 100644 --- a/pkg/blobstore/grpcservers/content_addressable_storage_server.go +++ b/pkg/blobstore/grpcservers/content_addressable_storage_server.go @@ -14,8 +14,8 @@ import ( ) type contentAddressableStorageServer struct { - contentAddressableStorage blobstore.BlobAccess chunkListStorage blobstore.BlobAccess + contentAddressableStorage blobstore.BlobAccess maximumMessageSizeBytes int64 } @@ -23,8 +23,8 @@ type contentAddressableStorageServer struct { // the contents of a Bazel Content Addressable Storage (CAS) to Bazel. func NewContentAddressableStorageServer(contentAddressableStorage, chunkListStorage blobstore.BlobAccess, maximumMessageSizeBytes int64) remoteexecution.ContentAddressableStorageServer { return &contentAddressableStorageServer{ - contentAddressableStorage: contentAddressableStorage, chunkListStorage: chunkListStorage, + contentAddressableStorage: contentAddressableStorage, maximumMessageSizeBytes: maximumMessageSizeBytes, } } @@ -43,53 +43,26 @@ func (s *contentAddressableStorageServer) FindMissingBlobs(ctx context.Context, } inDigests := digest.NewSetBuilder(len(in.BlobDigests)) - for _, partialDigest := range in.BlobDigests { - digest, err := digestFunction.NewDigestFromProto(partialDigest) + for _, inDigest := range in.BlobDigests { + digest, err := digestFunction.NewDigestFromProto(inDigest) if err != nil { return nil, err } inDigests.Add(digest) } - outDigests, err := s.contentAddressableStorage.FindMissing(ctx, inDigests.Build()) + + missing, err := s.contentAddressableStorage.FindMissing(ctx, inDigests.Build()) if err != nil { return nil, err } - partialDigests := make([]*remoteexecution.Digest, 0, outDigests.Length()) - for _, outDigest := range outDigests.Items() { - partialDigests = append(partialDigests, outDigest.GetProto()) - } - // Server is configured with Chunk List Storage (CLS) so we must - // verify the CLS as well. Note that in this version of bb-storage a - // missing chunk list for a blob does not imply that the blob is - // missing. It is merely required to manage the life time of chunk - // lists. In a future version of bb-storage FMB calls will go to - // either the chunk storage or the chunk list storage. - if s.chunkListStorage != nil { - capabilities, err := s.chunkListStorage.GetCapabilities(ctx, instanceName) - if err != nil { - return nil, err - } - if capabilities.GetCacheCapabilities().GetRepMaxCdcParams() == nil { - return nil, status.Error(codes.Internal, "This server implementation is only compatible with RepMaxCDC") - } - minChunkSize := capabilities.GetCacheCapabilities().GetRepMaxCdcParams().GetMinChunkSizeBytes() - maxChunkSize := 2*minChunkSize - 1 - bigBlobDigests := digest.NewSetBuilder(0) - for _, partialDigest := range in.BlobDigests { - if partialDigest.GetSizeBytes() > int64(maxChunkSize) { - digest, err := digestFunction.NewDigestFromProto(partialDigest) - if err != nil { - return nil, err - } - bigBlobDigests.Add(digest) - } - } - _, _ = s.chunkListStorage.FindMissing(ctx, bigBlobDigests.Build()) + outDigests := make([]*remoteexecution.Digest, 0, missing.Length()) + for _, outDigest := range missing.Items() { + outDigests = append(outDigests, outDigest.GetProto()) } return &remoteexecution.FindMissingBlobsResponse{ - MissingBlobDigests: partialDigests, + MissingBlobDigests: outDigests, }, nil } @@ -106,6 +79,7 @@ func (s *contentAddressableStorageServer) BatchReadBlobs(ctx context.Context, in return nil, err } + // TODO: Compensate for message overhead. bytesRemaining := s.maximumMessageSizeBytes digests := make([]digest.Digest, 0, len(in.Digests)) for _, reqDigest := range in.Digests { @@ -183,10 +157,6 @@ func (contentAddressableStorageServer) GetTree(in *remoteexecution.GetTreeReques } func (s *contentAddressableStorageServer) SpliceBlob(ctx context.Context, in *remoteexecution.SpliceBlobRequest) (*remoteexecution.SpliceBlobResponse, error) { - if s.chunkListStorage == nil { - return nil, status.Error(codes.Unimplemented, "This service does not support SpliceBlob") - } - instanceName, err := digest.NewInstanceName(in.InstanceName) if err != nil { return nil, util.StatusWrapf(err, "Invalid instance name %#v", in.InstanceName) @@ -216,10 +186,6 @@ func (s *contentAddressableStorageServer) SpliceBlob(ctx context.Context, in *re } func (s *contentAddressableStorageServer) SplitBlob(ctx context.Context, in *remoteexecution.SplitBlobRequest) (*remoteexecution.SplitBlobResponse, error) { - if s.chunkListStorage == nil { - return nil, status.Error(codes.Unimplemented, "This service does not support SplitBlob") - } - instanceName, err := digest.NewInstanceName(in.InstanceName) if err != nil { return nil, util.StatusWrapf(err, "Invalid instance name %#v", in.InstanceName) diff --git a/pkg/blobstore/grpcservers/content_addressable_storage_server_test.go b/pkg/blobstore/grpcservers/content_addressable_storage_server_test.go index 7c5ed83b4..7619b3dcb 100644 --- a/pkg/blobstore/grpcservers/content_addressable_storage_server_test.go +++ b/pkg/blobstore/grpcservers/content_addressable_storage_server_test.go @@ -7,6 +7,7 @@ import ( remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" "github.com/buildbarn/bb-storage/internal/mock" "github.com/buildbarn/bb-storage/pkg/blobstore/buffer" + "github.com/buildbarn/bb-storage/pkg/blobstore/cdc" "github.com/buildbarn/bb-storage/pkg/blobstore/grpcservers" "github.com/buildbarn/bb-storage/pkg/digest" "github.com/buildbarn/bb-storage/pkg/testutil" @@ -44,18 +45,32 @@ func TestContentAddressableStorageServerBatchReadBlobsSuccess(t *testing.T) { InstanceName: "ubuntu1804", } - contentAddressableStorage := mock.NewMockBlobAccess(ctrl) + chunkStorage := mock.NewMockBlobAccess(ctrl) + chunkListStorage := mock.NewMockBlobAccess(ctrl) + chunkListStorage.EXPECT().GetCapabilities(gomock.Any(), gomock.Any()).Return( + &remoteexecution.ServerCapabilities{ + CacheCapabilities: &remoteexecution.CacheCapabilities{ + RepMaxCdcParams: &remoteexecution.RepMaxCdcParams{ + MinChunkSizeBytes: 256 * 1024, + HorizonSizeBytes: 8 * 256 * 1024, + }, + }, + }, nil, + ).AnyTimes() a := make([]byte, 123) buf := buffer.NewValidatedBufferFromByteSlice(a) - contentAddressableStorage.EXPECT().Get(ctx, digest1).Return(buf) + chunkStorage.EXPECT().Get(ctx, digest1).Return(buf) b := make([]byte, 234) buf2 := buffer.NewValidatedBufferFromByteSlice(b) - contentAddressableStorage.EXPECT().Get(ctx, digest2).Return(buf2) + chunkStorage.EXPECT().Get(ctx, digest2).Return(buf2) buf3 := buffer.NewBufferFromError(status.Error(codes.NotFound, "The object you requested could not be found")) - contentAddressableStorage.EXPECT().Get(ctx, digest3).Return(buf3) + chunkStorage.EXPECT().Get(ctx, digest3).Return(buf3) - contentAddressableStorageServer := grpcservers.NewContentAddressableStorageServer(contentAddressableStorage, nil, 4<<20) + maximumMessageSizeBytes := 4 << 20 + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider(chunkListStorage, maximumMessageSizeBytes) + casChunker := cdc.NewCasChunkingBlobAccess(chunkStorage, chunkListStorage, parameterProvider, maximumMessageSizeBytes) + contentAddressableStorageServer := grpcservers.NewContentAddressableStorageServer(casChunker, chunkListStorage, int64(maximumMessageSizeBytes)) response, err := contentAddressableStorageServer.BatchReadBlobs(ctx, request) require.NoError(t, err) @@ -106,9 +121,23 @@ func TestContentAddressableStorageServerBatchReadBlobsFailure(t *testing.T) { InstanceName: "ubuntu1804", } - contentAddressableStorage := mock.NewMockBlobAccess(ctrl) + chunkStorage := mock.NewMockBlobAccess(ctrl) + chunkListStorage := mock.NewMockBlobAccess(ctrl) + chunkListStorage.EXPECT().GetCapabilities(gomock.Any(), gomock.Any()).Return( + &remoteexecution.ServerCapabilities{ + CacheCapabilities: &remoteexecution.CacheCapabilities{ + RepMaxCdcParams: &remoteexecution.RepMaxCdcParams{ + MinChunkSizeBytes: 64, + HorizonSizeBytes: 8 * 64, + }, + }, + }, nil, + ).AnyTimes() - contentAddressableStorageServer := grpcservers.NewContentAddressableStorageServer(contentAddressableStorage, nil, 200) + maximumMessageSizeBytes := 200 + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider(chunkListStorage, maximumMessageSizeBytes) + casChunker := cdc.NewCasChunkingBlobAccess(chunkStorage, chunkListStorage, parameterProvider, maximumMessageSizeBytes) + contentAddressableStorageServer := grpcservers.NewContentAddressableStorageServer(casChunker, chunkListStorage, int64(maximumMessageSizeBytes)) _, err := contentAddressableStorageServer.BatchReadBlobs(ctx, request) testutil.RequireEqualStatus(t, status.Error(codes.InvalidArgument, "Attempted to read a total of at least 357 bytes, while a maximum of 200 bytes is permitted"), err) @@ -128,30 +157,29 @@ func TestContentAddressableStorageServerFindMissingBlobs(t *testing.T) { }, } - contentAddressableStorage := mock.NewMockBlobAccess(ctrl) + chunkStorage := mock.NewMockBlobAccess(ctrl) chunkListStorage := mock.NewMockBlobAccess(ctrl) - setBuilder := digest.NewSetBuilder(2) - digestSet := setBuilder.Add(digest1).Add(digest2).Build() - - // Missing chunk lists is not an error, nor does it imply a missing - // blob at this stage. - contentAddressableStorage.EXPECT().FindMissing(ctx, digestSet).Return(digest.EmptySet, nil) - chunkListStorage.EXPECT().GetCapabilities(ctx, digest1.GetInstanceName()).Return( + chunkListStorage.EXPECT().GetCapabilities(gomock.Any(), gomock.Any()).Return( &remoteexecution.ServerCapabilities{ CacheCapabilities: &remoteexecution.CacheCapabilities{ - SplitBlobSupport: true, - SpliceBlobSupport: true, RepMaxCdcParams: &remoteexecution.RepMaxCdcParams{ MinChunkSizeBytes: 64, - HorizonSizeBytes: 128, + HorizonSizeBytes: 8 * 64, }, }, - }, - nil, - ) - chunkListStorage.EXPECT().FindMissing(ctx, digest2.ToSingletonSet()).Return(digest2.ToSingletonSet(), nil) + }, nil, + ).AnyTimes() + + // Digest1 is small so will be routed directly to chunk storage, + // while digest2 is large and will be routed to the chunk list + // storage. + chunkStorage.EXPECT().FindMissing(ctx, digest1.ToSingletonSet()).Return(digest.EmptySet, nil) + chunkListStorage.EXPECT().FindMissing(ctx, digest2.ToSingletonSet()).Return(digest.EmptySet, nil) - contentAddressableStorageServer := grpcservers.NewContentAddressableStorageServer(contentAddressableStorage, chunkListStorage, 200) + maximumMessageSizeBytes := 200 + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider(chunkListStorage, maximumMessageSizeBytes) + casChunker := cdc.NewCasChunkingBlobAccess(chunkStorage, chunkListStorage, parameterProvider, maximumMessageSizeBytes) + contentAddressableStorageServer := grpcservers.NewContentAddressableStorageServer(casChunker, chunkListStorage, int64(maximumMessageSizeBytes)) response, err := contentAddressableStorageServer.FindMissingBlobs(ctx, request) require.NoError(t, err) @@ -170,8 +198,18 @@ func TestContentAddressableStorageServerSplitBlob(t *testing.T) { DigestFunction: remoteexecution.DigestFunction_SHA256, } - contentAddressableStorage := mock.NewMockBlobAccess(ctrl) + chunkStorage := mock.NewMockBlobAccess(ctrl) chunkListStorage := mock.NewMockBlobAccess(ctrl) + chunkListStorage.EXPECT().GetCapabilities(gomock.Any(), gomock.Any()).Return( + &remoteexecution.ServerCapabilities{ + CacheCapabilities: &remoteexecution.CacheCapabilities{ + RepMaxCdcParams: &remoteexecution.RepMaxCdcParams{ + MinChunkSizeBytes: 64, + HorizonSizeBytes: 8 * 64, + }, + }, + }, nil, + ).AnyTimes() instanceName, err := digest.NewInstanceName(request.InstanceName) require.NoError(t, err) @@ -198,7 +236,11 @@ func TestContentAddressableStorageServerSplitBlob(t *testing.T) { ), ) - contentAddressableStorageServer := grpcservers.NewContentAddressableStorageServer(contentAddressableStorage, chunkListStorage, 200) + maximumMessageSizeBytes := 200 + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider(chunkListStorage, maximumMessageSizeBytes) + casChunker := cdc.NewCasChunkingBlobAccess(chunkStorage, chunkListStorage, parameterProvider, maximumMessageSizeBytes) + contentAddressableStorageServer := grpcservers.NewContentAddressableStorageServer(casChunker, chunkListStorage, int64(maximumMessageSizeBytes)) + _, err = contentAddressableStorageServer.SplitBlob(ctx, request) require.NoError(t, err) } @@ -224,8 +266,18 @@ func TestContentAddressableStorageServerSpliceBlob(t *testing.T) { InstanceName: "my_instance_name", } - contentAddressableStorage := mock.NewMockBlobAccess(ctrl) + chunkStorage := mock.NewMockBlobAccess(ctrl) chunkListStorage := mock.NewMockBlobAccess(ctrl) + chunkListStorage.EXPECT().GetCapabilities(gomock.Any(), gomock.Any()).Return( + &remoteexecution.ServerCapabilities{ + CacheCapabilities: &remoteexecution.CacheCapabilities{ + RepMaxCdcParams: &remoteexecution.RepMaxCdcParams{ + MinChunkSizeBytes: 64, + HorizonSizeBytes: 8 * 64, + }, + }, + }, nil, + ).AnyTimes() instanceName, err := digest.NewInstanceName(request.InstanceName) require.NoError(t, err) @@ -238,7 +290,10 @@ func TestContentAddressableStorageServerSpliceBlob(t *testing.T) { ChunkDigests: request.ChunkDigests, }, buffer.UserProvided)).Return(nil) - contentAddressableStorageServer := grpcservers.NewContentAddressableStorageServer(contentAddressableStorage, chunkListStorage, 200) + maximumMessageSizeBytes := 200 + parameterProvider := cdc.NewParameterProviderFromCapabilitiesProvider(chunkListStorage, maximumMessageSizeBytes) + casChunker := cdc.NewCasChunkingBlobAccess(chunkStorage, chunkListStorage, parameterProvider, maximumMessageSizeBytes) + contentAddressableStorageServer := grpcservers.NewContentAddressableStorageServer(casChunker, chunkListStorage, int64(maximumMessageSizeBytes)) response, err := contentAddressableStorageServer.SpliceBlob(ctx, request) require.NoError(t, err) require.Equal(t, request.BlobDigest, response.BlobDigest) diff --git a/pkg/blobstore/grpcservers/integration/BUILD.bazel b/pkg/blobstore/grpcservers/integration/BUILD.bazel new file mode 100644 index 000000000..4381b6dbf --- /dev/null +++ b/pkg/blobstore/grpcservers/integration/BUILD.bazel @@ -0,0 +1,34 @@ +load("@rules_go//go:def.bzl", "go_test") + +go_test( + name = "integration_test", + timeout = "short", + srcs = [ + "byte_stream_server_test.go", + "content_addressable_storage_server_test.go", + "utils_test.go", + ], + data = [ + "//cmd/bb_replicator", + "//cmd/bb_storage", + ], + env = { + "BB_STORAGE_RUNFILE_PATH": "$(rlocationpath //cmd/bb_storage:bb_storage)", + "BB_REPLICATOR_RUNFILE_PATH": "$(rlocationpath //cmd/bb_replicator:bb_replicator)", + }, + deps = [ + "//pkg/digest", + "//pkg/util", + "@bazel_remote_apis//build/bazel/remote/execution/v2:remote_execution_go_proto", + "@com_github_google_uuid//:uuid", + "@com_github_klauspost_compress//zstd", + "@com_github_stretchr_testify//require", + "@org_golang_google_genproto_googleapis_bytestream//:bytestream", + "@org_golang_google_grpc//:grpc", + "@org_golang_google_grpc//codes", + "@org_golang_google_grpc//credentials/insecure", + "@org_golang_google_grpc//status", + "@org_golang_google_protobuf//proto", + "@rules_go//go/runfiles", + ], +) diff --git a/pkg/blobstore/grpcservers/integration/byte_stream_server_test.go b/pkg/blobstore/grpcservers/integration/byte_stream_server_test.go new file mode 100644 index 000000000..01208d1c9 --- /dev/null +++ b/pkg/blobstore/grpcservers/integration/byte_stream_server_test.go @@ -0,0 +1,51 @@ +package integration + +import ( + "context" + "testing" + "time" + + remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" + "github.com/stretchr/testify/require" +) + +func TestByteStreamAPI(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + blobData := makeRandomData(t, 2*maximumMessageSizeBytes, 0) + digest := computeDigest(blobData) + + tests := []struct { + name string + compressor remoteexecution.Compressor_Value + data []byte + }{ + {name: "IDENTITY", compressor: remoteexecution.Compressor_IDENTITY, data: blobData}, + {name: "ZSTD", compressor: remoteexecution.Compressor_ZSTD, data: zstdEncode(blobData)}, + } + + t.Run("Write and read back binary data", func(t *testing.T) { + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + closer, _, _, _, bsClient := setupServers(t) + defer closer() + // Write blob. + err := bytestreamWriteBlob(ctx, bsClient, test.data, digest, test.compressor) + require.NoError(t, err, "Could not write blob") + + // Read back uncompressed. + receivedData, err := bytestreamReadBlob(ctx, bsClient, digest, remoteexecution.Compressor_IDENTITY) + require.NoError(t, err, "Could not read back uploaded data") + require.Equal(t, blobData, receivedData, "Downloaded payload does not match uploaded data") + + // Read back compressed. + receivedZstdData, err := bytestreamReadBlob(ctx, bsClient, digest, remoteexecution.Compressor_ZSTD) + require.NoError(t, err, "Could not read back uploaded data") + decompressedData, err := zstdDecode(receivedZstdData) + require.NoError(t, err, "Failed to decompress ZSTD payload") + require.Equal(t, blobData, decompressedData, "Downloaded compressed payload does not match uploaded data") + }) + } + }) +} diff --git a/pkg/blobstore/grpcservers/integration/content_addressable_storage_server_test.go b/pkg/blobstore/grpcservers/integration/content_addressable_storage_server_test.go new file mode 100644 index 000000000..595aeb808 --- /dev/null +++ b/pkg/blobstore/grpcservers/integration/content_addressable_storage_server_test.go @@ -0,0 +1,415 @@ +package integration + +import ( + "context" + "testing" + "time" + + remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" + "github.com/buildbarn/bb-storage/pkg/digest" + "github.com/stretchr/testify/require" + + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/proto" +) + +func TestContentAddressableStorageAPI(t *testing.T) { + blobData := makeRandomData(t, maximumMessageSizeBytes/2, 0) + blobDigest := computeDigest(blobData) + + t.Run("GetCapabilities", func(t *testing.T) { + closer, capabilitiesClient, _, _, _ := setupServers(t) + defer closer() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + capabilities, err := capabilitiesClient.GetCapabilities(ctx, &remoteexecution.GetCapabilitiesRequest{ + InstanceName: "allowed_instance", + }) + require.NoError(t, err) + + cacheCaps := capabilities.GetCacheCapabilities() + require.NotNil(t, cacheCaps) + require.Contains(t, cacheCaps.DigestFunctions, remoteexecution.DigestFunction_SHA256) + + require.True(t, cacheCaps.SpliceBlobSupport) + require.True(t, cacheCaps.SplitBlobSupport) + + chunkingParameters := cacheCaps.GetRepMaxCdcParams() + require.NotNil(t, chunkingParameters) + require.Equal(t, minChunkSizeBytes, int(chunkingParameters.MinChunkSizeBytes)) + require.Equal(t, 8*minChunkSizeBytes, int(chunkingParameters.HorizonSizeBytes)) + + _, err = capabilitiesClient.GetCapabilities(ctx, &remoteexecution.GetCapabilitiesRequest{ + InstanceName: "forbidden_instance", + }) + require.Error(t, err) + status, ok := status.FromError(err) + require.True(t, ok) + require.Equal(t, codes.PermissionDenied, status.Code()) + }) + + t.Run("UploadSplitFMBDownload", func(t *testing.T) { + closer, _, casClient, _, _ := setupServers(t) + defer closer() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + // Upload a test blob. + err := batchUploadBlob(ctx, casClient, blobData, blobDigest) + require.NoError(t, err, "Failed to upload test data") + + // Ask server to split. + chunkDigests, err := splitBlob(ctx, casClient, blobDigest) + require.NoError(t, err, "Failed to split uploaded blob") + + // Check that all digests exist in CAS. + allDigests := append(chunkDigests, blobDigest) + missing, err := findMissingBlobs(ctx, casClient, allDigests) + require.NoError(t, err, "Failed to find missing blobs") + require.Empty(t, missing, "Blobs were unexpectedly missing") + + // Read back chunks and stitch them together. + chunks, err := batchDownloadBlobs(ctx, casClient, chunkDigests) + require.NoError(t, err, "Failed to download blobs") + rebuiltBlob := make([]byte, 0, len(blobData)) + for _, chunk := range chunks { + rebuiltBlob = append(rebuiltBlob, chunk...) + } + require.Equal(t, blobData, rebuiltBlob, "Blob did not stitch back into expected result") + + // Read back blob. + blobs, err := batchDownloadBlobs(ctx, casClient, []digest.Digest{blobDigest}) + require.NoError(t, err) + require.Len(t, blobs, 1) + require.Equal(t, blobData, blobs[0]) + }) +} + +func TestRepMaxCDCSplitAndSpliceBehaviors(t *testing.T) { + t.Run("RoundTripSplitThenSplice", func(t *testing.T) { + closer, _, casClient, _, _ := setupServers(t) + defer closer() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + dataSize := (minChunkSizeBytes * 4) + 128 + data := makeRandomData(t, int(dataSize), 0) + blobDigest := computeDigest(data) + err := batchUploadBlob(ctx, casClient, data, blobDigest) + require.NoError(t, err) + + digests, err := splitBlob(ctx, casClient, blobDigest) + require.NoError(t, err, "Unexpected error when splitting blob.") + + err = spliceBlob(ctx, casClient, blobDigest, digests) + require.NoError(t, err, "Unexpected error when splicing blob.") + }) + + t.Run("SpliceNonStandardChunkingThenSplit", func(t *testing.T) { + closer, _, casClient, _, _ := setupServers(t) + defer closer() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + blobData := makeRandomData(t, int(minChunkSizeBytes*2), 0) + + chunk1 := blobData[:1] + chunk2 := blobData[1:] + + digest1 := computeDigest(chunk1) + digest2 := computeDigest(chunk2) + blobDigest := computeDigest(blobData) + chunkDigests := []digest.Digest{digest1, digest2} + + err := batchUploadBlob(ctx, casClient, chunk1, digest1) + require.NoError(t, err, "Unexpected error when uploading chunk1.") + + err = batchUploadBlob(ctx, casClient, chunk2, digest2) + require.NoError(t, err, "Unexpected error when uploading chunk2.") + + err = spliceBlob(ctx, casClient, blobDigest, chunkDigests) + require.NoError(t, err, "Unexpected error when splicing chunks.") + + digests, err := splitBlob(ctx, casClient, blobDigest) + require.NoError(t, err, "Unexpected error when splitting recently spliced blob.") + require.NotEqual(t, chunkDigests, digests, "Split should not return non standard split result.") + + chunks, err := batchDownloadBlobs(ctx, casClient, digests) + require.NoError(t, err, "Unexpected error when downloading chunks of split blob.") + + rebuiltBlob := make([]byte, 0, len(blobData)) + for _, chunk := range chunks { + rebuiltBlob = append(rebuiltBlob, chunk...) + } + require.Equal(t, blobData, rebuiltBlob, "Blob did not stitch back into expected result") + }) + + t.Run("SpliceAlreadyExistsOrNoop", func(t *testing.T) { + closer, _, casClient, _, _ := setupServers(t) + defer closer() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + blobData := []byte("This blob will be fully uploaded before we try to splice it.") + blobDigest := computeDigest(blobData) + err := batchUploadBlob(ctx, casClient, blobData, blobDigest) + require.NoError(t, err, "Unexpected error when uploading blob.") + + chunk1 := blobData[:1] + chunk2 := blobData[1:] + digest1 := computeDigest(chunk1) + digest2 := computeDigest(chunk2) + chunkDigests := []digest.Digest{digest1, digest2} + + err = batchUploadBlob(ctx, casClient, chunk1, digest1) + require.NoError(t, err, "Unexpected error when uploading chunk1.") + + err = batchUploadBlob(ctx, casClient, chunk2, digest2) + require.NoError(t, err, "Unexpected error when uploading chunk2.") + + err = spliceBlob(ctx, casClient, blobDigest, chunkDigests) + if err != nil { + require.Equal(t, codes.AlreadyExists, status.Code(err), "Expected OK or ALREADY_EXISTS") + } + }) + + t.Run("ValidationSpliceBlobRejections", func(t *testing.T) { + closer, _, casClient, _, _ := setupServers(t) + defer closer() + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + validData := makeRandomData(t, 512, 0) + validDigest := computeDigest(validData) + err := batchUploadBlob(ctx, casClient, validData, validDigest) + require.NoError(t, err, "Unexpected error when uploading blob.") + + ghostDigest := computeDigest([]byte("I do not exist in storage")) + + tests := []struct { + name string + blobDigest digest.Digest + chunkDigests []digest.Digest + expectError codes.Code + }{ + { + name: "Missing Chunk", + blobDigest: ghostDigest, + chunkDigests: []digest.Digest{ghostDigest}, + expectError: codes.NotFound, + }, + { + name: "Digest Mismatch", + blobDigest: computeDigest([]byte("Fake target")), + chunkDigests: []digest.Digest{validDigest}, + expectError: codes.InvalidArgument, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + err := spliceBlob(ctx, casClient, tc.blobDigest, tc.chunkDigests) + require.Error(t, err) + require.Equal(t, tc.expectError, status.Code(err), "Not the expected error: %s", err.Error()) + }) + } + }) + + t.Run("SpliceSplicedBlob", func(t *testing.T) { + closer, _, casClient, _, _ := setupServers(t) + defer closer() + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + blobData := makeRandomData(t, 2*minChunkSizeBytes+1, 0) + blobDigest := computeDigest(blobData) + chunk1 := blobData[:1] + chunk2 := blobData[1:] + digest1 := computeDigest(chunk1) + digest2 := computeDigest(chunk2) + chunkDigests := []digest.Digest{digest1, digest2} + + err := batchUploadBlob(ctx, casClient, chunk1, digest1) + require.NoError(t, err, "Unexpected error when uploading chunk1.") + + err = batchUploadBlob(ctx, casClient, chunk2, digest2) + require.NoError(t, err, "Unexpected error when uploading chunk2.") + + err = spliceBlob(ctx, casClient, blobDigest, chunkDigests) + require.NoError(t, err, "Unexpected error when splicing blob.") + }) +} + +func TestActionCacheAPI(t *testing.T) { + smallData := []byte("small file contents") + smallDigest := computeDigest(smallData) + + bigData := makeRandomData(t, int(minChunkSizeBytes*4), 0) + bigDigest := computeDigest(bigData) + + tree := &remoteexecution.Tree{ + Root: &remoteexecution.Directory{ + Files: []*remoteexecution.FileNode{ + {Name: "big.bin", Digest: bigDigest.GetProto(), IsExecutable: true}, + {Name: "small.txt", Digest: smallDigest.GetProto()}, + }, + }, + } + treeData, err := proto.Marshal(tree) + require.NoError(t, err) + treeDigest := computeDigest(treeData) + + actionResult := &remoteexecution.ActionResult{ + OutputDirectories: []*remoteexecution.OutputDirectory{ + {Path: "build_output", TreeDigest: treeDigest.GetProto()}, + }, + ExitCode: 0, + } + + actionDigest := computeDigest(makeRandomData(t, 128, 0)) + + t.Run("CompleteActionResult", func(t *testing.T) { + closer, _, casClient, acClient, _ := setupServers(t) + defer closer() + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + require.NoError(t, batchUploadBlob(ctx, casClient, smallData, smallDigest)) + require.NoError(t, batchUploadBlob(ctx, casClient, bigData, bigDigest)) + require.NoError(t, batchUploadBlob(ctx, casClient, treeData, treeDigest)) + + err := updateActionResult(ctx, acClient, actionDigest, actionResult) + require.NoError(t, err) + + getResp, err := getActionResult(ctx, acClient, actionDigest) + require.NoError(t, err) + require.Equal(t, actionResult.ExitCode, getResp.ExitCode) + require.Len(t, getResp.OutputDirectories, 1) + require.Equal(t, treeDigest.GetProto().Hash, getResp.OutputDirectories[0].TreeDigest.Hash) + }) + + t.Run("IncompleteActionResult", func(t *testing.T) { + closer, _, casClient, acClient, _ := setupServers(t) + defer closer() + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + // Big data has not been uploaded in this test + require.NoError(t, batchUploadBlob(ctx, casClient, smallData, smallDigest)) + require.NoError(t, batchUploadBlob(ctx, casClient, treeData, treeDigest)) + + err = updateActionResult(ctx, acClient, actionDigest, actionResult) + require.NoError(t, err, "Unexpected error updating action result") + + _, err := getActionResult(ctx, acClient, actionDigest) + require.Error(t, err, "Incomplete action result should fail") + require.Equal(t, codes.NotFound, status.Code(err)) + }) +} + +func TestAPIAuthorizationRejections(t *testing.T) { + closer, capClient, casClient, acClient, bsClient := setupServers(t) + defer closer() + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + forbiddenInstance := "forbidden_instance" + data := []byte("top secret data") + forbiddenDigest := computeDigestWithInstanceName(data, forbiddenInstance) + chunkDigest1 := computeDigestWithInstanceName([]byte("top "), forbiddenInstance) + chunkDigest2 := computeDigestWithInstanceName([]byte("secret data"), forbiddenInstance) + + dummyActionResult := &remoteexecution.ActionResult{ + ExitCode: 0, + } + + tests := []struct { + name string + call func() error + }{ + { + name: "GetCapabilities", + call: func() error { + _, err := capClient.GetCapabilities(ctx, &remoteexecution.GetCapabilitiesRequest{ + InstanceName: forbiddenInstance, + }) + return err + }, + }, + { + name: "FindMissingBlobs", + call: func() error { + _, err := findMissingBlobs(ctx, casClient, []digest.Digest{forbiddenDigest}) + return err + }, + }, + { + name: "BatchUpdateBlobs", + call: func() error { + return batchUploadBlob(ctx, casClient, data, forbiddenDigest) + }, + }, + { + name: "BatchReadBlobs", + call: func() error { + _, err := batchDownloadBlobs(ctx, casClient, []digest.Digest{forbiddenDigest}) + return err + }, + }, + { + name: "SplitBlob", + call: func() error { + _, err := splitBlob(ctx, casClient, forbiddenDigest) + return err + }, + }, + { + name: "SpliceBlob", + call: func() error { + return spliceBlob(ctx, casClient, forbiddenDigest, []digest.Digest{chunkDigest1, chunkDigest2}) + }, + }, + { + name: "ByteStream Write", + call: func() error { + return bytestreamWriteBlob(ctx, bsClient, data, forbiddenDigest, remoteexecution.Compressor_IDENTITY) + }, + }, + { + name: "ByteStream Read", + call: func() error { + _, err := bytestreamReadBlob(ctx, bsClient, forbiddenDigest, remoteexecution.Compressor_IDENTITY) + return err + }, + }, + { + name: "ActionCache UpdateActionResult", + call: func() error { + return updateActionResult(ctx, acClient, forbiddenDigest, dummyActionResult) + }, + }, + { + name: "ActionCache GetActionResult", + call: func() error { + _, err := getActionResult(ctx, acClient, forbiddenDigest) + return err + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + err := tc.call() + require.Error(t, err, "Expected API call to fail for forbidden instance") + require.Equal(t, codes.PermissionDenied, status.Code(err), "Expected PermissionDenied, got %v", status.Code(err)) + }) + } +} diff --git a/pkg/blobstore/grpcservers/integration/utils_test.go b/pkg/blobstore/grpcservers/integration/utils_test.go new file mode 100644 index 000000000..02df58c5a --- /dev/null +++ b/pkg/blobstore/grpcservers/integration/utils_test.go @@ -0,0 +1,625 @@ +package integration + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "math/rand" + "os" + "os/exec" + "strings" + "testing" + "time" + + remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" + "github.com/bazelbuild/rules_go/go/runfiles" + "github.com/buildbarn/bb-storage/pkg/digest" + "github.com/buildbarn/bb-storage/pkg/util" + "github.com/google/uuid" + "github.com/klauspost/compress/zstd" + "github.com/stretchr/testify/require" + "google.golang.org/genproto/googleapis/bytestream" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/status" +) + +type storageAddresses struct { + storageShardSocketPaths []string + localCacheSocketPath string +} + +const ( + maximumMessageSizeBytes = 2 << 20 // 2MiB + minChunkSizeBytes = 256 << 10 // 256KiB + bytestreamWriteChunkSizeBytes = 1 << 20 // 1MiB +) + +func storageConfig(listenSocketPath string) string { + return fmt.Sprintf(` +local listenPath = '%s'; +local maximumMessageSizeBytes = %d; +local minChunkSizeBytes = %d; + +local inMemoryStorage = { + keyLocationMapInMemory: { entries: 1024 * 1024 }, + keyLocationMapMaximumGetAttempts: 32, + keyLocationMapMaximumPutAttempts: 64, + oldBlocks: 1, + currentBlocks: 1, + newBlocks: 1, + blocksInMemory: { blockSizeBytes: 32 * 1024 * 1024 }, +}; + +{ + grpcServers: [{ + listenPaths: [listenPath], + authenticationPolicy: { allow: {} }, + }], + maximumMessageSizeBytes: maximumMessageSizeBytes, + contentAddressableStorage: { + chunkStorage: { + backend: { 'local': inMemoryStorage }, + getAuthorizer: { allow: {} }, + putAuthorizer: { allow: {} }, + findMissingAuthorizer: { allow: {} }, + }, + chunkListStorage: { + backend: { + 'local': inMemoryStorage + { chunkingParameters: { minChunkSizeBytes: minChunkSizeBytes, horizonSizeBytes: 8*minChunkSizeBytes } }, + }, + getAuthorizer: { allow: {} }, + putAuthorizer: { allow: {} }, + findMissingAuthorizer: { allow: {} }, + }, + }, + actionCache: { + backend: { 'local': inMemoryStorage }, + getAuthorizer: { allow: {} }, + putAuthorizer: { allow: {} }, + }, +} +`, escapeJSON(listenSocketPath), maximumMessageSizeBytes, minChunkSizeBytes) +} + +func replicatorConfig(listenSocketPath string, params storageAddresses) string { + upstreamsJSON, err := json.Marshal(params.storageShardSocketPaths) + if err != nil { + panic(err) + } + return fmt.Sprintf(` +local listenPath = '%s'; +local upstreamAddresses = %s; +local localCacheAddress = "unix:%s"; +local maximumMessageSizeBytes = %d; + +local topology = { + sharding: { + shards: { + ["shard" + i]: { backend: { grpc: { client: { address: "unix:" + upstreamAddresses[i] } } }, weight: 1 } + for i in std.range(0, std.length(upstreamAddresses) - 1) + } + } +}; + +{ + grpcServers: [{ + listenPaths: [listenPath], + authenticationPolicy: { allow: {} }, + }], + maximumMessageSizeBytes: maximumMessageSizeBytes, + source: topology, + sink: { grpc: { client: { address: localCacheAddress } } }, + replicator: { + deduplicating: { + concurrencyLimiting: { + base: { 'local': {} }, + maximumConcurrency: 1, + }, + }, + } +} +`, escapeJSON(listenSocketPath), upstreamsJSON, escapeJSON(params.localCacheSocketPath), maximumMessageSizeBytes) +} + +func frontendConfig(listenSocketPath string, params storageAddresses, replicatorSocketPath string) string { + upstreamsJSON, err := json.Marshal(params.storageShardSocketPaths) + if err != nil { + panic(err) + } + return fmt.Sprintf(` +local listenPath = '%s'; +local upstreamAddresses = %s; +local localCacheAddress = "unix:%s"; +local replicatorAddress = "unix:%s"; +local maximumMessageSizeBytes = %d; + +local topology = { + sharding: { + shards: { + ["shard" + i]: { backend: { grpc: { client: { address: "unix:" + upstreamAddresses[i] } } }, weight: 1 } + for i in std.range(0, std.length(upstreamAddresses) - 1) + } + } +}; + +local readCaching(inner, replicator) = { + readCaching: { + fast: { grpc: { client: { address: localCacheAddress } } }, + slow: inner, + replicator: replicator, + }, +}; + +local authorizer = { + jmespathExpression: { + expression: 'contains(authenticationMetadata.private.mayAccess, instanceName)' + }, +}; + +local csReplicator = { remote: { address: replicatorAddress } }; +local simpleReplicator = { 'local': {} }; + +{ + grpcServers: [{ + listenPaths: [listenPath], + authenticationPolicy: { + allow: { + private: { + mayAccess: ['allowed_instance'], + }, + }, + }, + }], + supportedCompressors: ['ZSTD'], + zstdPool: { + maximumEncoders: 16, + maximumDecoders: 16, + encoderWindowSizeBytes: 8 * 1024 * 1024, + decoderWindowSizeBytes: 8 * 1024 * 1024, + }, + maximumMessageSizeBytes: maximumMessageSizeBytes, + contentAddressableStorage: { + chunkStorage: { + backend: readCaching(topology, csReplicator), + getAuthorizer: authorizer, + putAuthorizer: authorizer, + findMissingAuthorizer: authorizer, + }, + chunkListStorage: { + backend: readCaching({ chunkListValidating: { backend: topology } }, simpleReplicator), + getAuthorizer: authorizer, + putAuthorizer: authorizer, + findMissingAuthorizer: authorizer, + }, + contentDefinedChunkingParameterCache: { + cacheSize: 10, + cacheDuration: '60s', + cacheReplacementPolicy: 'LEAST_RECENTLY_USED', + }, + }, + actionCache: { + backend: readCaching( + { + completenessChecking: { + backend: topology, + maximumTotalTreeSizeBytes: 64 * 1024 * 1024, + }, + }, + simpleReplicator, + ), + getAuthorizer: authorizer, + putAuthorizer: authorizer, + }, +} +`, escapeJSON(listenSocketPath), upstreamsJSON, escapeJSON(params.localCacheSocketPath), escapeJSON(replicatorSocketPath), maximumMessageSizeBytes) +} + +func writeConfigFile(name, content string) (file *os.File, err error) { + if file, err = os.CreateTemp("", name); err != nil { + return nil, err + } + if _, err = file.WriteString(content); err != nil { + return nil, err + } + if err = file.Close(); err != nil { + return nil, err + } + return file, nil +} + +func getBinaryPath(envVar string) (string, error) { + rf, err := runfiles.New() + if err != nil { + return "", util.StatusWrap(err, "Failed to initialize runfiles") + } + + runfilePath := os.Getenv(envVar) + if runfilePath == "" { + return "", util.StatusWrapf(err, "'%s' environment variable is not set", envVar) + } + + return rf.Rlocation(runfilePath) +} + +func setupServers(t *testing.T) (func(), remoteexecution.CapabilitiesClient, remoteexecution.ContentAddressableStorageClient, remoteexecution.ActionCacheClient, bytestream.ByteStreamClient) { + storageBinaryPath, err := getBinaryPath("BB_STORAGE_RUNFILE_PATH") + require.NoError(t, err, "Could not get storage binary path") + replicatorBinaryPath, err := getBinaryPath("BB_REPLICATOR_RUNFILE_PATH") + require.NoError(t, err, "Could not get replicator binary path") + + storageSocketPaths := make([]string, 2) + storageClosers := make([]func(), 2) + for i := 0; i < 2; i++ { + storageSocketPaths[i] = createSocketPath(t) + storageClosers[i] = setupServer(t, storageBinaryPath, fmt.Sprintf("storage%d", i), storageConfig(storageSocketPaths[i])) + require.Eventually(t, func() bool { + _, err := os.Stat(storageSocketPaths[i]) + return err == nil + }, 2*time.Second, 10*time.Millisecond, "Storage server %d did not start.", i) + } + + localCacheSocketPath := createSocketPath(t) + closeLocalCache := setupServer(t, storageBinaryPath, "localCache", storageConfig(localCacheSocketPath)) + require.Eventually(t, func() bool { + _, err := os.Stat(localCacheSocketPath) + return err == nil + }, 2*time.Second, 10*time.Millisecond, "Local cache storage server did not start.") + + storageAddresses := storageAddresses{ + storageShardSocketPaths: storageSocketPaths, + localCacheSocketPath: localCacheSocketPath, + } + + replicatorSocketPath := createSocketPath(t) + closeReplicator := setupServer(t, replicatorBinaryPath, "replicator", replicatorConfig(replicatorSocketPath, storageAddresses)) + require.Eventually(t, func() bool { + _, err := os.Stat(replicatorSocketPath) + return err == nil + }, 2*time.Second, 10*time.Millisecond, "Replicator did not start.") + + frontendSocketPath := createSocketPath(t) + closeFrontend := setupServer(t, storageBinaryPath, "frontend", frontendConfig(frontendSocketPath, storageAddresses, replicatorSocketPath)) + require.Eventually(t, func() bool { + _, err := os.Stat(frontendSocketPath) + return err == nil + }, 2*time.Second, 10*time.Millisecond, "Frontend server did not start.") + + conn, err := grpc.NewClient(fmt.Sprintf("unix:%s", frontendSocketPath), grpc.WithTransportCredentials(insecure.NewCredentials())) + require.NoError(t, err) + + return func() { + conn.Close() + closeFrontend() + closeReplicator() + closeLocalCache() + for _, closer := range storageClosers { + closer() + } + }, + remoteexecution.NewCapabilitiesClient(conn), + remoteexecution.NewContentAddressableStorageClient(conn), + remoteexecution.NewActionCacheClient(conn), + bytestream.NewByteStreamClient(conn) +} + +func setupServer(t *testing.T, binaryPath, name, config string) func() { + configFile, err := writeConfigFile(name, config) + require.NoError(t, err, "Could not write config file") + + cmd := exec.Command(binaryPath, configFile.Name()) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + err = cmd.Start() + require.NoError(t, err) + + return func() { + cmd.Process.Kill() + cmd.Wait() + os.Remove(configFile.Name()) + } +} + +func createSocketPath(t *testing.T) string { + t.Helper() + socketFile, err := os.CreateTemp("", "bb_*.sock") + require.NoError(t, err) + socketPath := socketFile.Name() + socketFile.Close() + err = os.Remove(socketPath) + require.NoError(t, err) + return socketPath +} + +func computeDigestWithInstanceName(data []byte, instanceName string) digest.Digest { + hash := sha256.Sum256(data) + return digest.MustNewDigest( + instanceName, + remoteexecution.DigestFunction_SHA256, + hex.EncodeToString(hash[:]), + int64(len(data)), + ) +} + +func computeDigest(data []byte) digest.Digest { + return computeDigestWithInstanceName(data, "allowed_instance") +} + +func escapeJSON(s string) string { + s = strings.ReplaceAll(s, "\\", "\\\\") + s = strings.ReplaceAll(s, "\"", "\\\"") + s = strings.ReplaceAll(s, "'", "\\'") + s = strings.ReplaceAll(s, "\n", "\\n") + return s +} + +func makeRandomData(t *testing.T, size int, seed int64) []byte { + t.Helper() + data := make([]byte, size) + r := rand.New(rand.NewSource(seed)) + _, err := r.Read(data) + require.NoError(t, err) + return data +} + +func zstdEncode(data []byte) []byte { + encoder, err := zstd.NewWriter(nil) + if err != nil { + panic(err) + } + defer encoder.Close() + return encoder.EncodeAll(data, nil) +} + +func zstdDecode(data []byte) ([]byte, error) { + decoder, err := zstd.NewReader(nil) + if err != nil { + panic(err) + } + defer decoder.Close() + ret, err := decoder.DecodeAll(data, nil) + if err != nil { + return nil, err + } + return ret, nil +} + +func batchUploadBlob(ctx context.Context, client remoteexecution.ContentAddressableStorageClient, data []byte, d digest.Digest) error { + req := &remoteexecution.BatchUpdateBlobsRequest{ + InstanceName: d.GetInstanceName().String(), + Requests: []*remoteexecution.BatchUpdateBlobsRequest_Request{ + { + Digest: d.GetProto(), + Data: data, + }, + }, + } + + responses, err := client.BatchUpdateBlobs(ctx, req) + if err != nil { + return err + } + for _, response := range responses.Responses { + return status.ErrorProto(response.Status) + } + return err +} + +func batchDownloadBlobs(ctx context.Context, client remoteexecution.ContentAddressableStorageClient, digests []digest.Digest) ([][]byte, error) { + if len(digests) == 0 { + return nil, nil + } + + dataMap := make(map[string][]byte, len(digests)) + + batchStart := 0 + for batchStart < len(digests) { + batchSize := int64(0) + batchEnd := batchStart + + // Slide the window forward until we hit our byte limit. We + // always include at least one digest, even if it exceeds the + // limit. + for batchEnd < len(digests) { + size := digests[batchEnd].GetSizeBytes() + if batchEnd > batchStart && batchSize+size > maximumMessageSizeBytes/2 { + break + } + batchSize += size + batchEnd++ + } + + // Prepare the batch request. + var batchProtos []*remoteexecution.Digest + for _, d := range digests[batchStart:batchEnd] { + batchProtos = append(batchProtos, d.GetProto()) + } + + req := &remoteexecution.BatchReadBlobsRequest{ + InstanceName: digests[0].GetInstanceName().String(), + Digests: batchProtos, + } + + // Execute the RPC for this specific batch. + res, err := client.BatchReadBlobs(ctx, req) + if err != nil { + return nil, err + } + + for _, r := range res.Responses { + if r.Status != nil && r.Status.Code != int32(codes.OK) { + return nil, status.ErrorProto(r.Status) + } + dataMap[r.Digest.Hash] = r.Data + } + + // Move the window forward for the next batch. + batchStart = batchEnd + } + + // Map the responses back to the requested order. + var downloadedData [][]byte + for _, d := range digests { + data, ok := dataMap[d.GetHashString()] + if !ok { + return nil, status.Errorf(codes.NotFound, "Digest %s was not returned in BatchReadBlobs response", d.GetHashString()) + } + downloadedData = append(downloadedData, data) + } + + return downloadedData, nil +} + +func findMissingBlobs(ctx context.Context, client remoteexecution.ContentAddressableStorageClient, digests []digest.Digest) ([]digest.Digest, error) { + if len(digests) == 0 { + return nil, nil + } + + var digestProtos []*remoteexecution.Digest + for _, d := range digests { + digestProtos = append(digestProtos, d.GetProto()) + } + + req := &remoteexecution.FindMissingBlobsRequest{ + InstanceName: digests[0].GetInstanceName().String(), + BlobDigests: digestProtos, + } + + res, err := client.FindMissingBlobs(ctx, req) + if err != nil { + return nil, err + } + + var missingDigests []digest.Digest + digestFunction := digests[0].GetDigestFunction() + for _, p := range res.MissingBlobDigests { + d, err := digestFunction.NewDigestFromProto(p) + if err != nil { + return nil, util.StatusWrap(err, "Failed to parse missing digest from proto") + } + missingDigests = append(missingDigests, d) + } + + return missingDigests, nil +} + +func splitBlob(ctx context.Context, client remoteexecution.ContentAddressableStorageClient, d digest.Digest) ([]digest.Digest, error) { + req := &remoteexecution.SplitBlobRequest{ + InstanceName: d.GetInstanceName().String(), + BlobDigest: d.GetProto(), + ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, + } + + res, err := client.SplitBlob(ctx, req) + if err != nil { + return nil, err + } + + chunkDigests := make([]digest.Digest, 0, len(res.ChunkDigests)) + digestFunction := d.GetDigestFunction() + for _, chunkProto := range res.ChunkDigests { + chunkDigest, err := digestFunction.NewDigestFromProto(chunkProto) + if err != nil { + return nil, util.StatusWrap(err, "Failed to parse chunk digest from proto") + } + chunkDigests = append(chunkDigests, chunkDigest) + } + + return chunkDigests, nil +} + +func spliceBlob(ctx context.Context, client remoteexecution.ContentAddressableStorageClient, blob digest.Digest, chunks []digest.Digest) error { + chunkDigests := make([]*remoteexecution.Digest, 0, len(chunks)) + for _, d := range chunks { + chunkDigests = append(chunkDigests, d.GetProto()) + } + + req := &remoteexecution.SpliceBlobRequest{ + InstanceName: blob.GetInstanceName().String(), + BlobDigest: blob.GetProto(), + ChunkDigests: chunkDigests, + ChunkingFunction: remoteexecution.ChunkingFunction_REP_MAX_CDC, + } + + _, err := client.SpliceBlob(ctx, req) + return err +} + +func bytestreamWriteBlob(ctx context.Context, client bytestream.ByteStreamClient, data []byte, digest digest.Digest, compressor remoteexecution.Compressor_Value) error { + writeStream, err := client.Write(ctx) + if err != nil { + return err + } + offset := int64(0) + dataSize := int64(len(data)) + + for offset < dataSize { + end := offset + bytestreamWriteChunkSizeBytes + if end > dataSize { + end = dataSize + } + + chunk := data[offset:end] + isLast := end == dataSize + + err := writeStream.Send(&bytestream.WriteRequest{ + ResourceName: digest.GetByteStreamWritePath(uuid.New(), compressor), + WriteOffset: offset, + FinishWrite: isLast, + Data: chunk, + }) + if err == io.EOF { + _, innerErr := writeStream.CloseAndRecv() + if innerErr != nil { + return util.StatusFromMultiple([]error{innerErr, err}) + } + } else if err != nil { + return err + } + offset = end + } + _, err = writeStream.CloseAndRecv() + return err +} + +func bytestreamReadBlob(ctx context.Context, client bytestream.ByteStreamClient, digest digest.Digest, compressor remoteexecution.Compressor_Value) ([]byte, error) { + readReq := &bytestream.ReadRequest{ + ResourceName: digest.GetByteStreamReadPath(compressor), + } + readStream, err := client.Read(ctx, readReq) + if err != nil { + return nil, err + } + defer readStream.CloseSend() + + receivedData := make([]byte, 0, digest.GetSizeBytes()) + for { + res, err := readStream.Recv() + if err == io.EOF { + return receivedData, nil + } else if err != nil { + return nil, err + } + receivedData = append(receivedData, res.Data...) + } +} + +func updateActionResult(ctx context.Context, acClient remoteexecution.ActionCacheClient, actionDigest digest.Digest, result *remoteexecution.ActionResult) error { + _, err := acClient.UpdateActionResult(ctx, &remoteexecution.UpdateActionResultRequest{ + InstanceName: actionDigest.GetInstanceName().String(), + ActionDigest: actionDigest.GetProto(), + ActionResult: result, + }) + return err +} + +func getActionResult(ctx context.Context, acClient remoteexecution.ActionCacheClient, actionDigest digest.Digest) (*remoteexecution.ActionResult, error) { + return acClient.GetActionResult(ctx, &remoteexecution.GetActionResultRequest{ + InstanceName: actionDigest.GetInstanceName().String(), + ActionDigest: actionDigest.GetProto(), + }) +} diff --git a/pkg/proto/configuration/bb_replicator/bb_replicator.proto b/pkg/proto/configuration/bb_replicator/bb_replicator.proto index 363383881..cda2231e9 100644 --- a/pkg/proto/configuration/bb_replicator/bb_replicator.proto +++ b/pkg/proto/configuration/bb_replicator/bb_replicator.proto @@ -16,10 +16,10 @@ message ApplicationConfiguration { // gRPC servers to spawn to listen for requests from clients. repeated buildbarn.configuration.grpc.ServerConfiguration grpc_servers = 2; - // Content Addressable Storage where data needs to be read. + // Chunk Storage where data needs to be read. buildbarn.configuration.blobstore.BlobAccessConfiguration source = 3; - // Content Addressable Storage where data needs to be written. + // Chunk Storage where data needs to be written. buildbarn.configuration.blobstore.BlobAccessConfiguration sink = 4; // Configuration for replication. diff --git a/pkg/proto/configuration/bb_storage/BUILD.bazel b/pkg/proto/configuration/bb_storage/BUILD.bazel index 71f4077f8..9d3e38435 100644 --- a/pkg/proto/configuration/bb_storage/BUILD.bazel +++ b/pkg/proto/configuration/bb_storage/BUILD.bazel @@ -11,6 +11,7 @@ proto_library( "//pkg/proto/configuration/auth:auth_proto", "//pkg/proto/configuration/blobstore:blobstore_proto", "//pkg/proto/configuration/builder:builder_proto", + "//pkg/proto/configuration/digest:digest_proto", "//pkg/proto/configuration/global:global_proto", "//pkg/proto/configuration/grpc:grpc_proto", "//pkg/proto/configuration/zstd:zstd_proto", @@ -27,6 +28,7 @@ go_proto_library( "//pkg/proto/configuration/auth", "//pkg/proto/configuration/blobstore", "//pkg/proto/configuration/builder", + "//pkg/proto/configuration/digest", "//pkg/proto/configuration/global", "//pkg/proto/configuration/grpc", "//pkg/proto/configuration/zstd", diff --git a/pkg/proto/configuration/bb_storage/bb_storage.pb.go b/pkg/proto/configuration/bb_storage/bb_storage.pb.go index fb4668e07..542716145 100644 --- a/pkg/proto/configuration/bb_storage/bb_storage.pb.go +++ b/pkg/proto/configuration/bb_storage/bb_storage.pb.go @@ -11,6 +11,7 @@ import ( auth "github.com/buildbarn/bb-storage/pkg/proto/configuration/auth" blobstore "github.com/buildbarn/bb-storage/pkg/proto/configuration/blobstore" builder "github.com/buildbarn/bb-storage/pkg/proto/configuration/builder" + digest "github.com/buildbarn/bb-storage/pkg/proto/configuration/digest" global "github.com/buildbarn/bb-storage/pkg/proto/configuration/global" grpc "github.com/buildbarn/bb-storage/pkg/proto/configuration/grpc" zstd "github.com/buildbarn/bb-storage/pkg/proto/configuration/zstd" @@ -34,8 +35,7 @@ type ApplicationConfiguration struct { Schedulers map[string]*builder.SchedulerConfiguration `protobuf:"bytes,5,rep,name=schedulers,proto3" json:"schedulers,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` MaximumMessageSizeBytes int64 `protobuf:"varint,8,opt,name=maximum_message_size_bytes,json=maximumMessageSizeBytes,proto3" json:"maximum_message_size_bytes,omitempty"` Global *global.Configuration `protobuf:"bytes,9,opt,name=global,proto3" json:"global,omitempty"` - ContentAddressableStorage *ScannableBlobAccessConfiguration `protobuf:"bytes,17,opt,name=content_addressable_storage,json=contentAddressableStorage,proto3" json:"content_addressable_storage,omitempty"` - ChunkListStorage *ScannableBlobAccessConfiguration `protobuf:"bytes,22,opt,name=chunk_list_storage,json=chunkListStorage,proto3" json:"chunk_list_storage,omitempty"` + ContentAddressableStorage *ContentAddressableStorageConfiguration `protobuf:"bytes,17,opt,name=content_addressable_storage,json=contentAddressableStorage,proto3" json:"content_addressable_storage,omitempty"` ActionCache *NonScannableBlobAccessConfiguration `protobuf:"bytes,18,opt,name=action_cache,json=actionCache,proto3" json:"action_cache,omitempty"` IndirectContentAddressableStorage *ScannableBlobAccessConfiguration `protobuf:"bytes,10,opt,name=indirect_content_addressable_storage,json=indirectContentAddressableStorage,proto3" json:"indirect_content_addressable_storage,omitempty"` InitialSizeClassCache *NonScannableBlobAccessConfiguration `protobuf:"bytes,11,opt,name=initial_size_class_cache,json=initialSizeClassCache,proto3" json:"initial_size_class_cache,omitempty"` @@ -105,20 +105,13 @@ func (x *ApplicationConfiguration) GetGlobal() *global.Configuration { return nil } -func (x *ApplicationConfiguration) GetContentAddressableStorage() *ScannableBlobAccessConfiguration { +func (x *ApplicationConfiguration) GetContentAddressableStorage() *ContentAddressableStorageConfiguration { if x != nil { return x.ContentAddressableStorage } return nil } -func (x *ApplicationConfiguration) GetChunkListStorage() *ScannableBlobAccessConfiguration { - if x != nil { - return x.ChunkListStorage - } - return nil -} - func (x *ApplicationConfiguration) GetActionCache() *NonScannableBlobAccessConfiguration { if x != nil { return x.ActionCache @@ -168,6 +161,66 @@ func (x *ApplicationConfiguration) GetZstdPool() *zstd.PoolConfiguration { return nil } +type ContentAddressableStorageConfiguration struct { + state protoimpl.MessageState `protogen:"open.v1"` + ChunkStorage *ScannableBlobAccessConfiguration `protobuf:"bytes,1,opt,name=chunk_storage,json=chunkStorage,proto3" json:"chunk_storage,omitempty"` + ChunkListStorage *ScannableBlobAccessConfiguration `protobuf:"bytes,2,opt,name=chunk_list_storage,json=chunkListStorage,proto3" json:"chunk_list_storage,omitempty"` + ContentDefinedChunkingParameterCache *digest.ExistenceCacheConfiguration `protobuf:"bytes,3,opt,name=content_defined_chunking_parameter_cache,json=contentDefinedChunkingParameterCache,proto3" json:"content_defined_chunking_parameter_cache,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ContentAddressableStorageConfiguration) Reset() { + *x = ContentAddressableStorageConfiguration{} + mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ContentAddressableStorageConfiguration) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ContentAddressableStorageConfiguration) ProtoMessage() {} + +func (x *ContentAddressableStorageConfiguration) ProtoReflect() protoreflect.Message { + mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_msgTypes[1] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ContentAddressableStorageConfiguration.ProtoReflect.Descriptor instead. +func (*ContentAddressableStorageConfiguration) Descriptor() ([]byte, []int) { + return file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_rawDescGZIP(), []int{1} +} + +func (x *ContentAddressableStorageConfiguration) GetChunkStorage() *ScannableBlobAccessConfiguration { + if x != nil { + return x.ChunkStorage + } + return nil +} + +func (x *ContentAddressableStorageConfiguration) GetChunkListStorage() *ScannableBlobAccessConfiguration { + if x != nil { + return x.ChunkListStorage + } + return nil +} + +func (x *ContentAddressableStorageConfiguration) GetContentDefinedChunkingParameterCache() *digest.ExistenceCacheConfiguration { + if x != nil { + return x.ContentDefinedChunkingParameterCache + } + return nil +} + type NonScannableBlobAccessConfiguration struct { state protoimpl.MessageState `protogen:"open.v1"` Backend *blobstore.BlobAccessConfiguration `protobuf:"bytes,1,opt,name=backend,proto3" json:"backend,omitempty"` @@ -179,7 +232,7 @@ type NonScannableBlobAccessConfiguration struct { func (x *NonScannableBlobAccessConfiguration) Reset() { *x = NonScannableBlobAccessConfiguration{} - mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_msgTypes[1] + mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_msgTypes[2] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -191,7 +244,7 @@ func (x *NonScannableBlobAccessConfiguration) String() string { func (*NonScannableBlobAccessConfiguration) ProtoMessage() {} func (x *NonScannableBlobAccessConfiguration) ProtoReflect() protoreflect.Message { - mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_msgTypes[1] + mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_msgTypes[2] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -204,7 +257,7 @@ func (x *NonScannableBlobAccessConfiguration) ProtoReflect() protoreflect.Messag // Deprecated: Use NonScannableBlobAccessConfiguration.ProtoReflect.Descriptor instead. func (*NonScannableBlobAccessConfiguration) Descriptor() ([]byte, []int) { - return file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_rawDescGZIP(), []int{1} + return file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_rawDescGZIP(), []int{2} } func (x *NonScannableBlobAccessConfiguration) GetBackend() *blobstore.BlobAccessConfiguration { @@ -240,7 +293,7 @@ type ScannableBlobAccessConfiguration struct { func (x *ScannableBlobAccessConfiguration) Reset() { *x = ScannableBlobAccessConfiguration{} - mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_msgTypes[2] + mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_msgTypes[3] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -252,7 +305,7 @@ func (x *ScannableBlobAccessConfiguration) String() string { func (*ScannableBlobAccessConfiguration) ProtoMessage() {} func (x *ScannableBlobAccessConfiguration) ProtoReflect() protoreflect.Message { - mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_msgTypes[2] + mi := &file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_msgTypes[3] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -265,7 +318,7 @@ func (x *ScannableBlobAccessConfiguration) ProtoReflect() protoreflect.Message { // Deprecated: Use ScannableBlobAccessConfiguration.ProtoReflect.Descriptor instead. func (*ScannableBlobAccessConfiguration) Descriptor() ([]byte, []int) { - return file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_rawDescGZIP(), []int{2} + return file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_rawDescGZIP(), []int{3} } func (x *ScannableBlobAccessConfiguration) GetBackend() *blobstore.BlobAccessConfiguration { @@ -300,16 +353,15 @@ var File_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_s const file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_rawDesc = "" + "\n" + - "Sgithub.com/buildbarn/bb-storage/pkg/proto/configuration/bb_storage/bb_storage.proto\x12\"buildbarn.configuration.bb_storage\x1a6build/bazel/remote/execution/v2/remote_execution.proto\x1aGgithub.com/buildbarn/bb-storage/pkg/proto/configuration/auth/auth.proto\x1aQgithub.com/buildbarn/bb-storage/pkg/proto/configuration/blobstore/blobstore.proto\x1aMgithub.com/buildbarn/bb-storage/pkg/proto/configuration/builder/builder.proto\x1aKgithub.com/buildbarn/bb-storage/pkg/proto/configuration/global/global.proto\x1aGgithub.com/buildbarn/bb-storage/pkg/proto/configuration/grpc/grpc.proto\x1aGgithub.com/buildbarn/bb-storage/pkg/proto/configuration/zstd/zstd.proto\"\xb1\f\n" + + "Sgithub.com/buildbarn/bb-storage/pkg/proto/configuration/bb_storage/bb_storage.proto\x12\"buildbarn.configuration.bb_storage\x1a6build/bazel/remote/execution/v2/remote_execution.proto\x1aGgithub.com/buildbarn/bb-storage/pkg/proto/configuration/auth/auth.proto\x1aQgithub.com/buildbarn/bb-storage/pkg/proto/configuration/blobstore/blobstore.proto\x1aMgithub.com/buildbarn/bb-storage/pkg/proto/configuration/builder/builder.proto\x1aKgithub.com/buildbarn/bb-storage/pkg/proto/configuration/digest/digest.proto\x1aKgithub.com/buildbarn/bb-storage/pkg/proto/configuration/global/global.proto\x1aGgithub.com/buildbarn/bb-storage/pkg/proto/configuration/grpc/grpc.proto\x1aGgithub.com/buildbarn/bb-storage/pkg/proto/configuration/zstd/zstd.proto\"\xc9\v\n" + "\x18ApplicationConfiguration\x12T\n" + "\fgrpc_servers\x18\x04 \x03(\v21.buildbarn.configuration.grpc.ServerConfigurationR\vgrpcServers\x12l\n" + "\n" + "schedulers\x18\x05 \x03(\v2L.buildbarn.configuration.bb_storage.ApplicationConfiguration.SchedulersEntryR\n" + "schedulers\x12;\n" + "\x1amaximum_message_size_bytes\x18\b \x01(\x03R\x17maximumMessageSizeBytes\x12E\n" + - "\x06global\x18\t \x01(\v2-.buildbarn.configuration.global.ConfigurationR\x06global\x12\x84\x01\n" + - "\x1bcontent_addressable_storage\x18\x11 \x01(\v2D.buildbarn.configuration.bb_storage.ScannableBlobAccessConfigurationR\x19contentAddressableStorage\x12r\n" + - "\x12chunk_list_storage\x18\x16 \x01(\v2D.buildbarn.configuration.bb_storage.ScannableBlobAccessConfigurationR\x10chunkListStorage\x12j\n" + + "\x06global\x18\t \x01(\v2-.buildbarn.configuration.global.ConfigurationR\x06global\x12\x8a\x01\n" + + "\x1bcontent_addressable_storage\x18\x11 \x01(\v2J.buildbarn.configuration.bb_storage.ContentAddressableStorageConfigurationR\x19contentAddressableStorage\x12j\n" + "\faction_cache\x18\x12 \x01(\v2G.buildbarn.configuration.bb_storage.NonScannableBlobAccessConfigurationR\vactionCache\x12\x95\x01\n" + "$indirect_content_addressable_storage\x18\n" + " \x01(\v2D.buildbarn.configuration.bb_storage.ScannableBlobAccessConfigurationR!indirectContentAddressableStorage\x12\x80\x01\n" + @@ -320,7 +372,11 @@ const file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb "\tzstd_pool\x18\x15 \x01(\v2/.buildbarn.configuration.zstd.PoolConfigurationR\bzstdPool\x1av\n" + "\x0fSchedulersEntry\x12\x10\n" + "\x03key\x18\x01 \x01(\tR\x03key\x12M\n" + - "\x05value\x18\x02 \x01(\v27.buildbarn.configuration.builder.SchedulerConfigurationR\x05value:\x028\x01J\x04\b\x01\x10\x02J\x04\b\x02\x10\x03J\x04\b\x03\x10\x04J\x04\b\x06\x10\aJ\x04\b\a\x10\bJ\x04\b\f\x10\rJ\x04\b\r\x10\x0eJ\x04\b\x0e\x10\x0fJ\x04\b\x0f\x10\x10\"\xb7\x02\n" + + "\x05value\x18\x02 \x01(\v27.buildbarn.configuration.builder.SchedulerConfigurationR\x05value:\x028\x01J\x04\b\x01\x10\x02J\x04\b\x02\x10\x03J\x04\b\x03\x10\x04J\x04\b\x06\x10\aJ\x04\b\a\x10\bJ\x04\b\f\x10\rJ\x04\b\r\x10\x0eJ\x04\b\x0e\x10\x0fJ\x04\b\x0f\x10\x10J\x04\b\x16\x10\x17\"\x9d\x03\n" + + "&ContentAddressableStorageConfiguration\x12i\n" + + "\rchunk_storage\x18\x01 \x01(\v2D.buildbarn.configuration.bb_storage.ScannableBlobAccessConfigurationR\fchunkStorage\x12r\n" + + "\x12chunk_list_storage\x18\x02 \x01(\v2D.buildbarn.configuration.bb_storage.ScannableBlobAccessConfigurationR\x10chunkListStorage\x12\x93\x01\n" + + "(content_defined_chunking_parameter_cache\x18\x03 \x01(\v2;.buildbarn.configuration.digest.ExistenceCacheConfigurationR$contentDefinedChunkingParameterCache\"\xb7\x02\n" + "#NonScannableBlobAccessConfiguration\x12T\n" + "\abackend\x18\x01 \x01(\v2:.buildbarn.configuration.blobstore.BlobAccessConfigurationR\abackend\x12\\\n" + "\x0eget_authorizer\x18\x02 \x01(\v25.buildbarn.configuration.auth.AuthorizerConfigurationR\rgetAuthorizer\x12\\\n" + @@ -343,46 +399,50 @@ func file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_ return file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_rawDescData } -var file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_msgTypes = make([]protoimpl.MessageInfo, 4) +var file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_msgTypes = make([]protoimpl.MessageInfo, 5) var file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_goTypes = []any{ - (*ApplicationConfiguration)(nil), // 0: buildbarn.configuration.bb_storage.ApplicationConfiguration - (*NonScannableBlobAccessConfiguration)(nil), // 1: buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration - (*ScannableBlobAccessConfiguration)(nil), // 2: buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration - nil, // 3: buildbarn.configuration.bb_storage.ApplicationConfiguration.SchedulersEntry - (*grpc.ServerConfiguration)(nil), // 4: buildbarn.configuration.grpc.ServerConfiguration - (*global.Configuration)(nil), // 5: buildbarn.configuration.global.Configuration - (*auth.AuthorizerConfiguration)(nil), // 6: buildbarn.configuration.auth.AuthorizerConfiguration - (v2.Compressor_Value)(0), // 7: build.bazel.remote.execution.v2.Compressor.Value - (*zstd.PoolConfiguration)(nil), // 8: buildbarn.configuration.zstd.PoolConfiguration - (*blobstore.BlobAccessConfiguration)(nil), // 9: buildbarn.configuration.blobstore.BlobAccessConfiguration - (*builder.SchedulerConfiguration)(nil), // 10: buildbarn.configuration.builder.SchedulerConfiguration + (*ApplicationConfiguration)(nil), // 0: buildbarn.configuration.bb_storage.ApplicationConfiguration + (*ContentAddressableStorageConfiguration)(nil), // 1: buildbarn.configuration.bb_storage.ContentAddressableStorageConfiguration + (*NonScannableBlobAccessConfiguration)(nil), // 2: buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration + (*ScannableBlobAccessConfiguration)(nil), // 3: buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration + nil, // 4: buildbarn.configuration.bb_storage.ApplicationConfiguration.SchedulersEntry + (*grpc.ServerConfiguration)(nil), // 5: buildbarn.configuration.grpc.ServerConfiguration + (*global.Configuration)(nil), // 6: buildbarn.configuration.global.Configuration + (*auth.AuthorizerConfiguration)(nil), // 7: buildbarn.configuration.auth.AuthorizerConfiguration + (v2.Compressor_Value)(0), // 8: build.bazel.remote.execution.v2.Compressor.Value + (*zstd.PoolConfiguration)(nil), // 9: buildbarn.configuration.zstd.PoolConfiguration + (*digest.ExistenceCacheConfiguration)(nil), // 10: buildbarn.configuration.digest.ExistenceCacheConfiguration + (*blobstore.BlobAccessConfiguration)(nil), // 11: buildbarn.configuration.blobstore.BlobAccessConfiguration + (*builder.SchedulerConfiguration)(nil), // 12: buildbarn.configuration.builder.SchedulerConfiguration } var file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_depIdxs = []int32{ - 4, // 0: buildbarn.configuration.bb_storage.ApplicationConfiguration.grpc_servers:type_name -> buildbarn.configuration.grpc.ServerConfiguration - 3, // 1: buildbarn.configuration.bb_storage.ApplicationConfiguration.schedulers:type_name -> buildbarn.configuration.bb_storage.ApplicationConfiguration.SchedulersEntry - 5, // 2: buildbarn.configuration.bb_storage.ApplicationConfiguration.global:type_name -> buildbarn.configuration.global.Configuration - 2, // 3: buildbarn.configuration.bb_storage.ApplicationConfiguration.content_addressable_storage:type_name -> buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration - 2, // 4: buildbarn.configuration.bb_storage.ApplicationConfiguration.chunk_list_storage:type_name -> buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration - 1, // 5: buildbarn.configuration.bb_storage.ApplicationConfiguration.action_cache:type_name -> buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration - 2, // 6: buildbarn.configuration.bb_storage.ApplicationConfiguration.indirect_content_addressable_storage:type_name -> buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration - 1, // 7: buildbarn.configuration.bb_storage.ApplicationConfiguration.initial_size_class_cache:type_name -> buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration - 1, // 8: buildbarn.configuration.bb_storage.ApplicationConfiguration.file_system_access_cache:type_name -> buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration - 6, // 9: buildbarn.configuration.bb_storage.ApplicationConfiguration.execute_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration - 7, // 10: buildbarn.configuration.bb_storage.ApplicationConfiguration.supported_compressors:type_name -> build.bazel.remote.execution.v2.Compressor.Value - 8, // 11: buildbarn.configuration.bb_storage.ApplicationConfiguration.zstd_pool:type_name -> buildbarn.configuration.zstd.PoolConfiguration - 9, // 12: buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 6, // 13: buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration.get_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration - 6, // 14: buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration.put_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration - 9, // 15: buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration - 6, // 16: buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration.get_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration - 6, // 17: buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration.put_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration - 6, // 18: buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration.find_missing_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration - 10, // 19: buildbarn.configuration.bb_storage.ApplicationConfiguration.SchedulersEntry.value:type_name -> buildbarn.configuration.builder.SchedulerConfiguration - 20, // [20:20] is the sub-list for method output_type - 20, // [20:20] is the sub-list for method input_type - 20, // [20:20] is the sub-list for extension type_name - 20, // [20:20] is the sub-list for extension extendee - 0, // [0:20] is the sub-list for field type_name + 5, // 0: buildbarn.configuration.bb_storage.ApplicationConfiguration.grpc_servers:type_name -> buildbarn.configuration.grpc.ServerConfiguration + 4, // 1: buildbarn.configuration.bb_storage.ApplicationConfiguration.schedulers:type_name -> buildbarn.configuration.bb_storage.ApplicationConfiguration.SchedulersEntry + 6, // 2: buildbarn.configuration.bb_storage.ApplicationConfiguration.global:type_name -> buildbarn.configuration.global.Configuration + 1, // 3: buildbarn.configuration.bb_storage.ApplicationConfiguration.content_addressable_storage:type_name -> buildbarn.configuration.bb_storage.ContentAddressableStorageConfiguration + 2, // 4: buildbarn.configuration.bb_storage.ApplicationConfiguration.action_cache:type_name -> buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration + 3, // 5: buildbarn.configuration.bb_storage.ApplicationConfiguration.indirect_content_addressable_storage:type_name -> buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration + 2, // 6: buildbarn.configuration.bb_storage.ApplicationConfiguration.initial_size_class_cache:type_name -> buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration + 2, // 7: buildbarn.configuration.bb_storage.ApplicationConfiguration.file_system_access_cache:type_name -> buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration + 7, // 8: buildbarn.configuration.bb_storage.ApplicationConfiguration.execute_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration + 8, // 9: buildbarn.configuration.bb_storage.ApplicationConfiguration.supported_compressors:type_name -> build.bazel.remote.execution.v2.Compressor.Value + 9, // 10: buildbarn.configuration.bb_storage.ApplicationConfiguration.zstd_pool:type_name -> buildbarn.configuration.zstd.PoolConfiguration + 3, // 11: buildbarn.configuration.bb_storage.ContentAddressableStorageConfiguration.chunk_storage:type_name -> buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration + 3, // 12: buildbarn.configuration.bb_storage.ContentAddressableStorageConfiguration.chunk_list_storage:type_name -> buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration + 10, // 13: buildbarn.configuration.bb_storage.ContentAddressableStorageConfiguration.content_defined_chunking_parameter_cache:type_name -> buildbarn.configuration.digest.ExistenceCacheConfiguration + 11, // 14: buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 7, // 15: buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration.get_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration + 7, // 16: buildbarn.configuration.bb_storage.NonScannableBlobAccessConfiguration.put_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration + 11, // 17: buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration.backend:type_name -> buildbarn.configuration.blobstore.BlobAccessConfiguration + 7, // 18: buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration.get_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration + 7, // 19: buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration.put_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration + 7, // 20: buildbarn.configuration.bb_storage.ScannableBlobAccessConfiguration.find_missing_authorizer:type_name -> buildbarn.configuration.auth.AuthorizerConfiguration + 12, // 21: buildbarn.configuration.bb_storage.ApplicationConfiguration.SchedulersEntry.value:type_name -> buildbarn.configuration.builder.SchedulerConfiguration + 22, // [22:22] is the sub-list for method output_type + 22, // [22:22] is the sub-list for method input_type + 22, // [22:22] is the sub-list for extension type_name + 22, // [22:22] is the sub-list for extension extendee + 0, // [0:22] is the sub-list for field type_name } func init() { @@ -398,7 +458,7 @@ func file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_rawDesc), len(file_github_com_buildbarn_bb_storage_pkg_proto_configuration_bb_storage_bb_storage_proto_rawDesc)), NumEnums: 0, - NumMessages: 4, + NumMessages: 5, NumExtensions: 0, NumServices: 0, }, diff --git a/pkg/proto/configuration/bb_storage/bb_storage.proto b/pkg/proto/configuration/bb_storage/bb_storage.proto index 55f431b9b..58ee1a17b 100644 --- a/pkg/proto/configuration/bb_storage/bb_storage.proto +++ b/pkg/proto/configuration/bb_storage/bb_storage.proto @@ -6,6 +6,7 @@ import "build/bazel/remote/execution/v2/remote_execution.proto"; import "github.com/buildbarn/bb-storage/pkg/proto/configuration/auth/auth.proto"; import "github.com/buildbarn/bb-storage/pkg/proto/configuration/blobstore/blobstore.proto"; import "github.com/buildbarn/bb-storage/pkg/proto/configuration/builder/builder.proto"; +import "github.com/buildbarn/bb-storage/pkg/proto/configuration/digest/digest.proto"; import "github.com/buildbarn/bb-storage/pkg/proto/configuration/global/global.proto"; import "github.com/buildbarn/bb-storage/pkg/proto/configuration/grpc/grpc.proto"; import "github.com/buildbarn/bb-storage/pkg/proto/configuration/zstd/zstd.proto"; @@ -57,13 +58,9 @@ message ApplicationConfiguration { // Common configuration options that apply to all Buildbarn binaries. buildbarn.configuration.global.Configuration global = 9; - // Optional: Blobstore configuration for the Content Addressable - // Storage (CAS). - ScannableBlobAccessConfiguration content_addressable_storage = 17; - - // Optional: Blobstore configuration for the Content List Storage - // (CLS). - ScannableBlobAccessConfiguration chunk_list_storage = 22; + // Optional: Blobstore configurations for the Content + // AddressableContentAddressa Storage (CAS). + ContentAddressableStorageConfiguration content_addressable_storage = 17; // Optional: Blobstore configuration for the Action Cache (AC). NonScannableBlobAccessConfiguration action_cache = 18; @@ -113,6 +110,29 @@ message ApplicationConfiguration { // ByteStream server, and enables ZSTD compression for ByteStream // operations where the server supports it. buildbarn.configuration.zstd.PoolConfiguration zstd_pool = 21; + + // Was 'chunk_list_storage'. Has been moved into the + // content_addressable_storage. + reserved 22; +} + +// Storage configuration for a content addressable storage. +message ContentAddressableStorageConfiguration { + // Blob Access Configuration for the chunk storage of the content + // addressable storage. + ScannableBlobAccessConfiguration chunk_storage = 1; + + // Blob Access Configuration for the chunk list storage of the content + // addressable storage. + ScannableBlobAccessConfiguration chunk_list_storage = 2; + + // Optional: In memory cache for getting the content defined chunking + // parameters for a specific instance name. Useful for blob access + // configurations where the parameters are fetched from an external + // server (i.e. the GrpcBlobAccessConfiguration). Can be set to a + // relatively low value like '60s'. + buildbarn.configuration.digest.ExistenceCacheConfiguration + content_defined_chunking_parameter_cache = 3; } // Storage configuration for backends which don't allow batch digest