From 5c8b4564f234f68a5d9668a01f60f78aa85a7b72 Mon Sep 17 00:00:00 2001 From: konrad Date: Wed, 24 Jun 2026 21:36:41 +0400 Subject: [PATCH 1/8] Some fixes and improvements --- cmd/batch-handlers.go | 12 ++--- cmd/batchjobmetric_string.go | 2 +- cmd/data-usage-cache.go | 7 +-- cmd/decommetric_string.go | 2 +- cmd/dynamic-timeouts.go | 9 ++-- cmd/erasure-common.go | 23 ++++++---- cmd/erasure.go | 17 ++++--- cmd/healingmetric_string.go | 2 +- cmd/iam-object-store.go | 2 +- cmd/lceventsrc_string.go | 2 +- cmd/metacache-stream.go | 7 +++ cmd/notification.go | 44 ++++++++++++++++--- cmd/os-dirent_namelen_linux.go | 5 +-- cmd/postpolicyform.go | 4 +- cmd/rebalancemetric_string.go | 2 +- cmd/rebalstatus_string.go | 2 +- cmd/scannermetric_string.go | 2 +- cmd/server-startup-msg.go | 7 +-- cmd/sftp-server-driver.go | 6 +-- cmd/signature-v2.go | 8 +--- cmd/site-replication-utils.go | 6 +-- cmd/storage-rest-server.go | 22 ++++++++-- cmd/storage-rest_test.go | 23 ++++++++++ cmd/storagemetric_string.go | 2 +- cmd/warm-backend-azure.go | 3 +- cmd/xl-storage-format-v2_string.go | 4 +- docs/debugging/inspect/go.mod | 6 +-- docs/debugging/inspect/go.sum | 8 ++-- docs/debugging/s3-verify/go.mod | 10 ++--- docs/debugging/s3-verify/go.sum | 4 ++ docs/resiliency/docker-compose.yaml | 12 +++-- internal/arn/arn.go | 2 +- internal/deadlineconn/deadlineconn_test.go | 14 ++---- internal/grid/grid_test.go | 2 +- .../target/loggertypes/targettype_string.go | 2 +- .../logger/target/testlogger/testlogger.go | 7 +-- internal/s3select/csv/reader_contrib_test.go | 2 +- internal/s3select/select.go | 1 + 38 files changed, 181 insertions(+), 114 deletions(-) diff --git a/cmd/batch-handlers.go b/cmd/batch-handlers.go index 454feb19adc4f..fb21d5ab9b5ef 100644 --- a/cmd/batch-handlers.go +++ b/cmd/batch-handlers.go @@ -1771,22 +1771,22 @@ func (a adminAPIHandlers) StartBatchJob(w http.ResponseWriter, r *http.Request) // Fill with default values if job.Replicate != nil { if job.Replicate.Source.Snowball.Disable == nil { - job.Replicate.Source.Snowball.Disable = ptr(false) + job.Replicate.Source.Snowball.Disable = new(false) } if job.Replicate.Source.Snowball.Batch == nil { - job.Replicate.Source.Snowball.Batch = ptr(100) + job.Replicate.Source.Snowball.Batch = new(100) } if job.Replicate.Source.Snowball.InMemory == nil { - job.Replicate.Source.Snowball.InMemory = ptr(true) + job.Replicate.Source.Snowball.InMemory = new(true) } if job.Replicate.Source.Snowball.Compress == nil { - job.Replicate.Source.Snowball.Compress = ptr(false) + job.Replicate.Source.Snowball.Compress = new(false) } if job.Replicate.Source.Snowball.SmallerThan == nil { - job.Replicate.Source.Snowball.SmallerThan = ptr("5MiB") + job.Replicate.Source.Snowball.SmallerThan = new("5MiB") } if job.Replicate.Source.Snowball.SkipErrs == nil { - job.Replicate.Source.Snowball.SkipErrs = ptr(true) + job.Replicate.Source.Snowball.SkipErrs = new(true) } } diff --git a/cmd/batchjobmetric_string.go b/cmd/batchjobmetric_string.go index a1697a19d623e..d2615681ef293 100644 --- a/cmd/batchjobmetric_string.go +++ b/cmd/batchjobmetric_string.go @@ -18,7 +18,7 @@ const _batchJobMetric_name = "ReplicationKeyRotationExpire" var _batchJobMetric_index = [...]uint8{0, 11, 22, 28} func (i batchJobMetric) String() string { - if i >= batchJobMetric(len(_batchJobMetric_index)-1) { + if i < 0 || i >= batchJobMetric(len(_batchJobMetric_index)-1) { return "batchJobMetric(" + strconv.FormatInt(int64(i), 10) + ")" } return _batchJobMetric_name[_batchJobMetric_index[i]:_batchJobMetric_index[i+1]] diff --git a/cmd/data-usage-cache.go b/cmd/data-usage-cache.go index 5752daae057f7..8f9547c90ad19 100644 --- a/cmd/data-usage-cache.go +++ b/cmd/data-usage-cache.go @@ -635,11 +635,12 @@ func (d *dataUsageCache) forceCompact(limit int) { // StringAll returns a detailed string representation of all entries in the cache. func (d *dataUsageCache) StringAll() string { // Remove bloom filter from print. - s := fmt.Sprintf("info:%+v\n", d.Info) + var s strings.Builder + s.WriteString(fmt.Sprintf("info:%+v\n", d.Info)) for k, v := range d.Cache { - s += fmt.Sprintf("\t%v: %+v\n", k, v) + s.WriteString(fmt.Sprintf("\t%v: %+v\n", k, v)) } - return strings.TrimSpace(s) + return strings.TrimSpace(s.String()) } // String returns a human readable representation of the string. diff --git a/cmd/decommetric_string.go b/cmd/decommetric_string.go index fb485ab016ca1..a0afe2f76c425 100644 --- a/cmd/decommetric_string.go +++ b/cmd/decommetric_string.go @@ -18,7 +18,7 @@ const _decomMetric_name = "DecommissionBucketDecommissionObjectDecommissionRemov var _decomMetric_index = [...]uint8{0, 18, 36, 60} func (i decomMetric) String() string { - if i >= decomMetric(len(_decomMetric_index)-1) { + if i < 0 || i >= decomMetric(len(_decomMetric_index)-1) { return "decomMetric(" + strconv.FormatInt(int64(i), 10) + ")" } return _decomMetric_name[_decomMetric_index[i]:_decomMetric_index[i+1]] diff --git a/cmd/dynamic-timeouts.go b/cmd/dynamic-timeouts.go index 9c4f297bbe3a6..7bcf4e4a544db 100644 --- a/cmd/dynamic-timeouts.go +++ b/cmd/dynamic-timeouts.go @@ -129,13 +129,10 @@ func (dt *dynamicTimeout) adjust(entries [dynamicTimeoutLogSize]time.Duration) { if failPct > dynamicTimeoutIncreaseThresholdPct { // We are hitting the timeout too often, so increase the timeout by 25% - timeout := min( + timeout := max( // Set upper cap. - atomic.LoadInt64(&dt.timeout)*125/100, int64(maxDynamicTimeout)) - // Safety, shouldn't happen - if timeout < dt.minimum { - timeout = dt.minimum - } + min(atomic.LoadInt64(&dt.timeout)*125/100, int64(maxDynamicTimeout)), + dt.minimum) atomic.StoreInt64(&dt.timeout, timeout) } else if failPct < dynamicTimeoutDecreaseThresholdPct { // We are hitting the timeout relatively few times, diff --git a/cmd/erasure-common.go b/cmd/erasure-common.go index 7146766ac2e74..acda124ea619e 100644 --- a/cmd/erasure-common.go +++ b/cmd/erasure-common.go @@ -26,17 +26,21 @@ import ( func (er erasureObjects) getOnlineDisks() (newDisks []StorageAPI) { disks := er.getDisks() + var wg sync.WaitGroup var mu sync.Mutex + newDisks = make([]StorageAPI, 0, len(disks)) + r := rand.New(rand.NewSource(time.Now().UnixNano())) + for _, i := range r.Perm(len(disks)) { - wg.Add(1) - go func() { - defer wg.Done() - if disks[i] == nil { - return - } - di, err := disks[i].DiskInfo(context.Background(), DiskInfoOptions{}) + disk := disks[i] // avoids repeated indexing + if disk == nil { + // avoids spawning goroutines that immediately return + continue + } + wg.Go(func() { + di, err := disk.DiskInfo(context.Background(), DiskInfoOptions{}) if err != nil || di.Healing { // - Do not consume disks which are not reachable // unformatted or simply not accessible for some reason. @@ -48,10 +52,11 @@ func (er erasureObjects) getOnlineDisks() (newDisks []StorageAPI) { } mu.Lock() - newDisks = append(newDisks, disks[i]) + newDisks = append(newDisks, disk) mu.Unlock() - }() + }) } + wg.Wait() return newDisks } diff --git a/cmd/erasure.go b/cmd/erasure.go index 4e6674c35b1c8..652c3aac86331 100644 --- a/cmd/erasure.go +++ b/cmd/erasure.go @@ -285,16 +285,15 @@ func (er erasureObjects) getOnlineDisksWithHealingAndInfo(inclHealing bool) (new infos := make([]DiskInfo, len(disks)) r := rand.New(rand.NewSource(time.Now().UnixNano())) for _, i := range r.Perm(len(disks)) { - wg.Add(1) - go func() { - defer wg.Done() + disk := disks[i] + if disk == nil { + infos[i].Error = errDiskNotFound.Error() + continue + } - disk := disks[i] - if disk == nil { - infos[i].Error = errDiskNotFound.Error() - return - } + i, disk := i, disk + wg.Go(func() { di, err := disk.DiskInfo(context.Background(), DiskInfoOptions{}) infos[i] = di if err != nil { @@ -302,7 +301,7 @@ func (er erasureObjects) getOnlineDisksWithHealingAndInfo(inclHealing bool) (new // unformatted or simply not accessible for some reason. infos[i].Error = err.Error() } - }() + }) } wg.Wait() diff --git a/cmd/healingmetric_string.go b/cmd/healingmetric_string.go index 012fc7aa9304f..6573a1c22c8e6 100644 --- a/cmd/healingmetric_string.go +++ b/cmd/healingmetric_string.go @@ -18,7 +18,7 @@ const _healingMetric_name = "BucketObjectCheckAbandonedParts" var _healingMetric_index = [...]uint8{0, 6, 12, 31} func (i healingMetric) String() string { - if i >= healingMetric(len(_healingMetric_index)-1) { + if i < 0 || i >= healingMetric(len(_healingMetric_index)-1) { return "healingMetric(" + strconv.FormatInt(int64(i), 10) + ")" } return _healingMetric_name[_healingMetric_index[i]:_healingMetric_index[i+1]] diff --git a/cmd/iam-object-store.go b/cmd/iam-object-store.go index 7da764577778f..89931148d5761 100644 --- a/cmd/iam-object-store.go +++ b/cmd/iam-object-store.go @@ -576,7 +576,7 @@ func (iamOS *IAMObjectStore) loadAllFromObjStore(ctx context.Context, cache *iam if took := time.Since(listStartTime); took > maxIAMLoadOpTime { var s strings.Builder for k, v := range listedConfigItems { - fmt.Fprintf(&s, " %s: %d items\n", k, len(v)) + s.WriteString(fmt.Sprintf(" %s: %d items\n", k, len(v))) } logger.Info("listAllIAMConfigItems took %.2fs with contents:\n%s", took.Seconds(), s.String()) } diff --git a/cmd/lceventsrc_string.go b/cmd/lceventsrc_string.go index dd121abb461e5..ea3451b8188a3 100644 --- a/cmd/lceventsrc_string.go +++ b/cmd/lceventsrc_string.go @@ -26,7 +26,7 @@ const _lcEventSrc_name = "NoneHealScannerDecomRebals3HeadObjects3GetObjects3List var _lcEventSrc_index = [...]uint8{0, 4, 8, 15, 20, 25, 37, 48, 61, 72, 84, 109} func (i lcEventSrc) String() string { - if i >= lcEventSrc(len(_lcEventSrc_index)-1) { + if i < 0 || i >= lcEventSrc(len(_lcEventSrc_index)-1) { return "lcEventSrc(" + strconv.FormatInt(int64(i), 10) + ")" } return _lcEventSrc_name[_lcEventSrc_index[i]:_lcEventSrc_index[i+1]] diff --git a/cmd/metacache-stream.go b/cmd/metacache-stream.go index cf61895fe4d7b..f08f85054c020 100644 --- a/cmd/metacache-stream.go +++ b/cmd/metacache-stream.go @@ -769,8 +769,14 @@ type metacacheBlockWriter struct { // Each block is the size of the capacity of the input channel. // The caller should close to indicate the stream has ended. func newMetacacheBlockWriter(in <-chan metaCacheEntry, nextBlock func(b *metacacheBlock) error) *metacacheBlockWriter { + blockEntries := cap(in) + if blockEntries <= 0 { + blockEntries = 1 + } + w := metacacheBlockWriter{blockEntries: cap(in)} w.wg.Add(1) + //TODO: this looks messy go func() { defer w.wg.Done() var current metacacheBlock @@ -784,6 +790,7 @@ func newMetacacheBlockWriter(in <-chan metaCacheEntry, nextBlock func(b *metacac block := newMetacacheWriter(buf, 1<<20) defer block.Close() + finishBlock := func() { if err := block.Close(); err != nil { w.streamErr = err diff --git a/cmd/notification.go b/cmd/notification.go index 152856ac23085..2bfa0efca4963 100644 --- a/cmd/notification.go +++ b/cmd/notification.go @@ -267,19 +267,29 @@ func (sys *NotificationSys) LoadServiceAccount(ctx context.Context, accessKey st func (sys *NotificationSys) BackgroundHealStatus(ctx context.Context) ([]madmin.BgHealState, []NotificationPeerErr) { ng := WithNPeers(len(sys.peerClients)) states := make([]madmin.BgHealState, len(sys.peerClients)) + for idx, client := range sys.peerClients { + idx := idx client := client + + var host xnet.Host + if client != nil && client.host != nil { + host = *client.host + } + ng.Go(ctx, func() error { - if client == nil { + if client == nil || client.host == nil { return errPeerNotReachable } + st, err := client.BackgroundHealStatus(ctx) if err != nil { return err } + states[idx] = st return nil - }, idx, *client.host) + }, idx, host) } return states, ng.Wait() @@ -475,28 +485,48 @@ var errPeerNotReachable = errors.New("peer is not reachable") func (sys *NotificationSys) GetLocks(ctx context.Context, r *http.Request) []*PeerLocks { locksResp := make([]*PeerLocks, len(sys.peerClients)) g := errgroup.WithNErrs(len(sys.peerClients)) + for index, client := range sys.peerClients { + index := index client := client + + peerAddr := "" + if client != nil && client.host != nil { + peerAddr = client.host.String() + } + g.Go(func() error { if client == nil { return errPeerNotReachable } - serverLocksResp, err := sys.peerClients[index].GetLocks(ctx) + + serverLocksResp, err := client.GetLocks(ctx) if err != nil { return err } locksResp[index] = &PeerLocks{ - Addr: sys.peerClients[index].host.String(), + Addr: peerAddr, Locks: serverLocksResp, } return nil }, index) } for index, err := range g.Wait() { - reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", - sys.peerClients[index].host.String()) + if err == nil { + continue + } + + client := sys.peerClients[index] + + peerAddr := "" + if client != nil && client.host != nil { + peerAddr = client.host.String() + } + + reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", peerAddr) ctx := logger.SetReqInfo(ctx, reqInfo) - peersLogOnceIf(ctx, err, sys.peerClients[index].host.String()) + + peersLogOnceIf(ctx, err, peerAddr) } locksResp = append(locksResp, &PeerLocks{ Addr: getHostName(r), diff --git a/cmd/os-dirent_namelen_linux.go b/cmd/os-dirent_namelen_linux.go index 14c44239453cb..525a2e8185fcd 100644 --- a/cmd/os-dirent_namelen_linux.go +++ b/cmd/os-dirent_namelen_linux.go @@ -31,10 +31,7 @@ func direntNamlen(dirent *syscall.Dirent) (uint64, error) { const fixedHdr = uint16(unsafe.Offsetof(syscall.Dirent{}.Name)) nameBuf := (*[unsafe.Sizeof(dirent.Name)]byte)(unsafe.Pointer(&dirent.Name[0])) const nameBufLen = uint16(len(nameBuf)) - limit := dirent.Reclen - fixedHdr - if limit > nameBufLen { - limit = nameBufLen - } + limit := min(dirent.Reclen-fixedHdr, nameBufLen) // Avoid bugs in long file names // https://github.com/golang/tools/commit/5f9a5413737ba4b4f692214aebee582b47c8be74 nameLen := bytes.IndexByte(nameBuf[:limit], 0) diff --git a/cmd/postpolicyform.go b/cmd/postpolicyform.go index 1c2eeaf40c562..ad117f2eb67ee 100644 --- a/cmd/postpolicyform.go +++ b/cmd/postpolicyform.go @@ -186,7 +186,7 @@ func parsePostPolicyForm(r io.Reader) (PostPolicyForm, error) { for k, v := range condt { if !isString(v) { // Pre-check value type. // All values must be of type string. - return parsedPolicy, fmt.Errorf("Unknown type %s of conditional field value %s found in POST policy form", reflect.TypeOf(condt).String(), condt) + return parsedPolicy, fmt.Errorf("Unknown type %s of conditional field value %s found in POST policy form", reflect.TypeFor[map[string]any]().String(), condt) } // {"acl": "public-read" } is an alternate way to indicate - [ "eq", "$acl", "public-read" ] // In this case we will just collapse this into "eq" for all use cases. @@ -240,7 +240,7 @@ func parsePostPolicyForm(r io.Reader) (PostPolicyForm, error) { default: // Condition should be valid. return parsedPolicy, fmt.Errorf("Unknown type %s of conditional field value %s found in POST policy form", - reflect.TypeOf(condt).String(), condt) + reflect.TypeFor[[]any]().String(), condt) } default: return parsedPolicy, fmt.Errorf("Unknown field %s of type %s found in POST policy form", diff --git a/cmd/rebalancemetric_string.go b/cmd/rebalancemetric_string.go index 930e04341de39..e14b186049c0f 100644 --- a/cmd/rebalancemetric_string.go +++ b/cmd/rebalancemetric_string.go @@ -20,7 +20,7 @@ const _rebalanceMetric_name = "RebalanceBucketsRebalanceBucketRebalanceObjectReb var _rebalanceMetric_index = [...]uint8{0, 16, 31, 46, 67, 79} func (i rebalanceMetric) String() string { - if i >= rebalanceMetric(len(_rebalanceMetric_index)-1) { + if i < 0 || i >= rebalanceMetric(len(_rebalanceMetric_index)-1) { return "rebalanceMetric(" + strconv.FormatInt(int64(i), 10) + ")" } return _rebalanceMetric_name[_rebalanceMetric_index[i]:_rebalanceMetric_index[i+1]] diff --git a/cmd/rebalstatus_string.go b/cmd/rebalstatus_string.go index 0dc74b217aad8..06d2a2c02cc0f 100644 --- a/cmd/rebalstatus_string.go +++ b/cmd/rebalstatus_string.go @@ -20,7 +20,7 @@ const _rebalStatus_name = "NoneStartedCompletedStoppedFailed" var _rebalStatus_index = [...]uint8{0, 4, 11, 20, 27, 33} func (i rebalStatus) String() string { - if i >= rebalStatus(len(_rebalStatus_index)-1) { + if i < 0 || i >= rebalStatus(len(_rebalStatus_index)-1) { return "rebalStatus(" + strconv.FormatInt(int64(i), 10) + ")" } return _rebalStatus_name[_rebalStatus_index[i]:_rebalStatus_index[i+1]] diff --git a/cmd/scannermetric_string.go b/cmd/scannermetric_string.go index 32c7a4f891ca8..209e8fad87de0 100644 --- a/cmd/scannermetric_string.go +++ b/cmd/scannermetric_string.go @@ -37,7 +37,7 @@ const _scannerMetric_name = "ReadMetadataCheckMissingSaveUsageApplyAllApplyVersi var _scannerMetric_index = [...]uint8{0, 12, 24, 33, 41, 53, 65, 74, 77, 93, 98, 112, 127, 147, 157, 167, 186, 198, 208, 217, 232, 245, 249} func (i scannerMetric) String() string { - if i >= scannerMetric(len(_scannerMetric_index)-1) { + if i < 0 || i >= scannerMetric(len(_scannerMetric_index)-1) { return "scannerMetric(" + strconv.FormatInt(int64(i), 10) + ")" } return _scannerMetric_name[_scannerMetric_index[i]:_scannerMetric_index[i+1]] diff --git a/cmd/server-startup-msg.go b/cmd/server-startup-msg.go index ce98d0086b96c..75fb4403c4e33 100644 --- a/cmd/server-startup-msg.go +++ b/cmd/server-startup-msg.go @@ -152,11 +152,12 @@ func printLambdaTargets() { return } - arnMsg := color.Blue("Object Lambda ARNs: ") + var arnMsg strings.Builder + arnMsg.WriteString(color.Blue("Object Lambda ARNs: ")) for _, arn := range globalLambdaTargetList.List(globalSite.Region()) { - arnMsg += color.Bold(fmt.Sprintf("%s ", arn)) + arnMsg.WriteString(color.Bold(fmt.Sprintf("%s ", arn))) } - logger.Startup(arnMsg + "\n") + logger.Startup(arnMsg.String() + "\n") } // Prints bucket notification configurations. diff --git a/cmd/sftp-server-driver.go b/cmd/sftp-server-driver.go index 3ce7c0b43adf1..5a0e2662cadfe 100644 --- a/cmd/sftp-server-driver.go +++ b/cmd/sftp-server-driver.go @@ -285,8 +285,7 @@ func (f *sftpDriver) Filewrite(r *sftp.Request) (w io.WriterAt, err error) { r: pr, wg: &sync.WaitGroup{}, } - wa.wg.Add(1) - go func() { + wa.wg.Go(func() { oi, err := clnt.PutObject(r.Context(), bucket, object, pr, -1, minio.PutObjectOptions{ ContentType: mimedb.TypeByExtension(path.Ext(object)), DisableContentSha256: true, @@ -294,8 +293,7 @@ func (f *sftpDriver) Filewrite(r *sftp.Request) (w io.WriterAt, err error) { }) stopFn(oi.Size, err) pr.CloseWithError(err) - wa.wg.Done() - }() + }) return wa, nil } diff --git a/cmd/signature-v2.go b/cmd/signature-v2.go index 1fd42ba70233b..d494f611c6559 100644 --- a/cmd/signature-v2.go +++ b/cmd/signature-v2.go @@ -347,13 +347,7 @@ func canonicalizedResourceV2(encodedResource, encodedQuery string) string { queries := strings.Split(encodedQuery, "&") keyval := make(map[string]string) for _, query := range queries { - key := query - val := "" - index := strings.Index(query, "=") - if index != -1 { - key = query[:index] - val = query[index+1:] - } + key, val, _ := strings.Cut(query, "=") keyval[key] = val } diff --git a/cmd/site-replication-utils.go b/cmd/site-replication-utils.go index 192275845d97b..1618f1767cf20 100644 --- a/cmd/site-replication-utils.go +++ b/cmd/site-replication-utils.go @@ -177,11 +177,9 @@ func (sm *siteResyncMetrics) save(ctx context.Context) { } rs.LastSaved = UTCNow() sm.peerResyncMap[dID] = rs - wg.Add(1) - go func() { - defer wg.Done() + wg.Go(func() { saveSiteResyncMetadata(ctx, st, newObjectLayerFn()) - }() + }) } } wg.Wait() diff --git a/cmd/storage-rest-server.go b/cmd/storage-rest-server.go index d7ee11493ff03..940aececdf49b 100644 --- a/cmd/storage-rest-server.go +++ b/cmd/storage-rest-server.go @@ -254,15 +254,13 @@ func (s *storageRESTServer) NSScannerHandler(ctx context.Context, params *nsScan // Collect updates, stream them before the full cache is sent. updates := make(chan dataUsageEntry, 1) var wg sync.WaitGroup - wg.Add(1) - go func() { - defer wg.Done() + wg.Go(func() { for update := range updates { resp := storageNSScannerRPC.NewResponse() resp.Update = &update out <- resp } - }() + }) ui, err := s.getStorage().NSScanner(ctx, *params.Cache, updates, madmin.HealScanMode(params.ScanMode), nil) wg.Wait() if err != nil { @@ -543,6 +541,14 @@ func (s *storageRESTServer) ReadPartsHandler(w http.ResponseWriter, r *http.Requ return } + // Reject path traversal smuggled through msgpack body (CVE-2026-42600). + for _, p := range preq.Paths { + if hasBadPathComponent(p) { + s.writeErrorResponse(w, errInvalidArgument) + return + } + } + done := keepHTTPResponseAlive(w) infos, err := s.getStorage().ReadParts(r.Context(), volume, preq.Paths...) done(nil) @@ -1278,6 +1284,14 @@ func (s *storageRESTServer) DeleteBulkHandler(w http.ResponseWriter, r *http.Req return } + // Reject path traversal smuggled through msgpack body (CVE-2026-42600). + for _, p := range req.Paths { + if hasBadPathComponent(p) { + s.writeErrorResponse(w, errInvalidArgument) + return + } + } + volume := r.Form.Get(storageRESTVolume) keepHTTPResponseAlive(w)(s.getStorage().DeleteBulk(r.Context(), volume, req.Paths...)) } diff --git a/cmd/storage-rest_test.go b/cmd/storage-rest_test.go index a601d7996a1d0..3f95e7c0f1b01 100644 --- a/cmd/storage-rest_test.go +++ b/cmd/storage-rest_test.go @@ -411,3 +411,26 @@ func TestStorageRESTClientRenameFile(t *testing.T) { testStorageAPIRenameFile(t, restClient) } + +// TestStorageRESTClientPathTraversal verifies that ReadMultiple, ReadParts, and +// DeleteBulk all reject ".." traversal sequences smuggled in the msgpack body +// (CVE-2026-42600). The global setRequestValidityMiddleware only inspects +// r.URL.Path and form values, so without handler-level checks a caller holding +// the cluster root JWT could escape the configured drive root. +func TestStorageRESTClientPathTraversal(t *testing.T) { + restClient := newStorageRESTHTTPServerClient(t) + + t.Run("ReadParts", func(t *testing.T) { + _, err := restClient.ReadParts(t.Context(), "foo", "../etc/passwd") + if err == nil { + t.Fatal("expected error for traversal in ReadParts paths, got nil") + } + }) + + t.Run("DeleteBulk", func(t *testing.T) { + err := restClient.DeleteBulk(t.Context(), "foo", "../etc/shadow") + if err == nil { + t.Fatal("expected error for traversal in DeleteBulk paths, got nil") + } + }) +} diff --git a/cmd/storagemetric_string.go b/cmd/storagemetric_string.go index 21fa89e1c3567..957d48b239fc5 100644 --- a/cmd/storagemetric_string.go +++ b/cmd/storagemetric_string.go @@ -46,7 +46,7 @@ const _storageMetric_name = "MakeVolBulkMakeVolListVolsStatVolDeleteVolWalkDirLi var _storageMetric_index = [...]uint16{0, 11, 18, 26, 33, 42, 49, 56, 64, 74, 84, 98, 108, 118, 128, 134, 148, 158, 166, 179, 192, 206, 217, 223, 230, 242, 262, 270, 280, 290, 299, 303} func (i storageMetric) String() string { - if i >= storageMetric(len(_storageMetric_index)-1) { + if i < 0 || i >= storageMetric(len(_storageMetric_index)-1) { return "storageMetric(" + strconv.FormatInt(int64(i), 10) + ")" } return _storageMetric_name[_storageMetric_index[i]:_storageMetric_index[i+1]] diff --git a/cmd/warm-backend-azure.go b/cmd/warm-backend-azure.go index bb087e749cca8..3ad5121207eda 100644 --- a/cmd/warm-backend-azure.go +++ b/cmd/warm-backend-azure.go @@ -26,7 +26,6 @@ import ( "strings" "github.com/Azure/azure-sdk-for-go/sdk/azcore" - "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" "github.com/Azure/azure-sdk-for-go/sdk/azidentity" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob" @@ -63,7 +62,7 @@ func (az *warmBackendAzure) getDest(object string) string { func (az *warmBackendAzure) PutWithMeta(ctx context.Context, object string, r io.Reader, length int64, meta map[string]string) (remoteVersionID, error) { azMeta := map[string]*string{} for k, v := range meta { - azMeta[k] = to.Ptr(v) + azMeta[k] = new(v) } resp, err := az.clnt.UploadStream(ctx, az.Bucket, az.getDest(object), io.LimitReader(r, length), &azblob.UploadStreamOptions{ Concurrency: 4, diff --git a/cmd/xl-storage-format-v2_string.go b/cmd/xl-storage-format-v2_string.go index 61959f85d414b..622e878d277c2 100644 --- a/cmd/xl-storage-format-v2_string.go +++ b/cmd/xl-storage-format-v2_string.go @@ -20,7 +20,7 @@ const _VersionType_name = "invalidVersionTypeObjectTypeDeleteTypeLegacyTypelastV var _VersionType_index = [...]uint8{0, 18, 28, 38, 48, 63} func (i VersionType) String() string { - if i >= VersionType(len(_VersionType_index)-1) { + if i < 0 || i >= VersionType(len(_VersionType_index)-1) { return "VersionType(" + strconv.FormatInt(int64(i), 10) + ")" } return _VersionType_name[_VersionType_index[i]:_VersionType_index[i+1]] @@ -39,7 +39,7 @@ const _ErasureAlgo_name = "invalidErasureAlgoReedSolomonlastErasureAlgo" var _ErasureAlgo_index = [...]uint8{0, 18, 29, 44} func (i ErasureAlgo) String() string { - if i >= ErasureAlgo(len(_ErasureAlgo_index)-1) { + if i < 0 || i >= ErasureAlgo(len(_ErasureAlgo_index)-1) { return "ErasureAlgo(" + strconv.FormatInt(int64(i), 10) + ")" } return _ErasureAlgo_name[_ErasureAlgo_index[i]:_ErasureAlgo_index[i+1]] diff --git a/docs/debugging/inspect/go.mod b/docs/debugging/inspect/go.mod index a9e37e2f482a5..a892683a003e1 100644 --- a/docs/debugging/inspect/go.mod +++ b/docs/debugging/inspect/go.mod @@ -1,6 +1,6 @@ module github.com/minio/minio/docs/debugging/inspect -go 1.23.0 +go 1.24.0 toolchain go1.24.8 @@ -20,6 +20,6 @@ require ( github.com/mattn/go-isatty v0.0.20 // indirect github.com/minio/pkg/v3 v3.0.28 // indirect github.com/philhofer/fwd v1.1.3-0.20240916144458-20a13a1f6b7c // indirect - golang.org/x/crypto v0.35.0 // indirect - golang.org/x/sys v0.30.0 // indirect + golang.org/x/crypto v0.45.0 // indirect + golang.org/x/sys v0.38.0 // indirect ) diff --git a/docs/debugging/inspect/go.sum b/docs/debugging/inspect/go.sum index 7bdafc1efff3d..7750d63ad613d 100644 --- a/docs/debugging/inspect/go.sum +++ b/docs/debugging/inspect/go.sum @@ -24,13 +24,13 @@ github.com/tinylib/msgp v1.2.5 h1:WeQg1whrXRFiZusidTQqzETkRpGjFjcIhW6uqWH09po= github.com/tinylib/msgp v1.2.5/go.mod h1:ykjzy2wzgrlvpDCRc4LA8UXy6D8bzMSuAF3WD57Gok0= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.35.0 h1:b15kiHdrGCHrP6LvwaQ3c03kgNhhiMgvlhxHQhmg2Xs= -golang.org/x/crypto v0.35.0/go.mod h1:dy7dXNW32cAb/6/PRuTNsix8T+vJAqvuIy5Bli/x0YQ= +golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q= +golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= -golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= +golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/docs/debugging/s3-verify/go.mod b/docs/debugging/s3-verify/go.mod index 37426446d5c8e..d673192d3c033 100644 --- a/docs/debugging/s3-verify/go.mod +++ b/docs/debugging/s3-verify/go.mod @@ -1,6 +1,6 @@ module github.com/minio/minio/docs/debugging/s3-verify -go 1.23.0 +go 1.24.0 toolchain go1.24.8 @@ -17,9 +17,9 @@ require ( github.com/minio/md5-simd v1.1.2 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/rs/xid v1.6.0 // indirect - golang.org/x/crypto v0.36.0 // indirect - golang.org/x/net v0.38.0 // indirect - golang.org/x/sys v0.31.0 // indirect - golang.org/x/text v0.23.0 // indirect + golang.org/x/crypto v0.45.0 // indirect + golang.org/x/net v0.47.0 // indirect + golang.org/x/sys v0.38.0 // indirect + golang.org/x/text v0.31.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/docs/debugging/s3-verify/go.sum b/docs/debugging/s3-verify/go.sum index c8926fa12e8bd..e01f6791bc471 100644 --- a/docs/debugging/s3-verify/go.sum +++ b/docs/debugging/s3-verify/go.sum @@ -25,12 +25,16 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34= golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc= +golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= +golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= +golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/docs/resiliency/docker-compose.yaml b/docs/resiliency/docker-compose.yaml index 842d766a144d2..aebb15fedbb83 100644 --- a/docs/resiliency/docker-compose.yaml +++ b/docs/resiliency/docker-compose.yaml @@ -80,10 +80,14 @@ services: - "9000:9000" - "9001:9001" depends_on: - - minio1 - - minio2 - - minio3 - - minio4 + minio1: + condition: service_healthy + minio2: + condition: service_healthy + minio3: + condition: service_healthy + minio4: + condition: service_healthy ## By default this config uses default local driver, ## For custom volumes replace with volume driver configuration. diff --git a/internal/arn/arn.go b/internal/arn/arn.go index 4f40748c715e5..b7e995c0655d0 100644 --- a/internal/arn/arn.go +++ b/internal/arn/arn.go @@ -50,7 +50,7 @@ type ARN struct { // Allows english letters, numbers, '.', '-', '_' and '/'. Starts with a // letter or digit. At least 1 character long. -var validResourceIDRegex = regexp.MustCompile(`[A-Za-z0-9_/\.-]+$`) +var validResourceIDRegex = regexp.MustCompile(`[A-Za-z0-9_./-]+$`) // NewIAMRoleARN - returns an ARN for a role in MinIO. func NewIAMRoleARN(resourceID, serverRegion string) (ARN, error) { diff --git a/internal/deadlineconn/deadlineconn_test.go b/internal/deadlineconn/deadlineconn_test.go index 6921e47b1e476..669289328d089 100644 --- a/internal/deadlineconn/deadlineconn_test.go +++ b/internal/deadlineconn/deadlineconn_test.go @@ -42,10 +42,7 @@ func TestBuffConnReadTimeout(t *testing.T) { } var wg sync.WaitGroup - wg.Add(1) - go func() { - defer wg.Done() - + wg.Go(func() { tcpConn, terr := tcpListener.AcceptTCP() if terr != nil { t.Errorf("failed to accept new connection. %v", terr) @@ -89,7 +86,7 @@ func TestBuffConnReadTimeout(t *testing.T) { t.Errorf("failed to write to client. %v", terr) return } - }() + }) c, err := net.Dial("tcp", serverAddr) if err != nil { @@ -132,10 +129,7 @@ func TestBuffConnReadCheckTimeout(t *testing.T) { } var cerr error var wg sync.WaitGroup - wg.Add(1) - go func() { - defer wg.Done() - + wg.Go(func() { tcpConn, terr := tcpListener.AcceptTCP() if terr != nil { cerr = fmt.Errorf("failed to accept new connection. %v", terr) @@ -171,7 +165,7 @@ func TestBuffConnReadCheckTimeout(t *testing.T) { cerr = fmt.Errorf("could read from client, expected error, got %v", terr) return } - }() + }) c, err := net.Dial("tcp", serverAddr) if err != nil { diff --git a/internal/grid/grid_test.go b/internal/grid/grid_test.go index 54002c38ba1d5..b6fab68c9e3cc 100644 --- a/internal/grid/grid_test.go +++ b/internal/grid/grid_test.go @@ -242,7 +242,7 @@ func TestSingleRoundtripGenerics(t *testing.T) { t.Errorf("want error %v(%T), got %v(%T)", RemoteErr(testPayload), RemoteErr(testPayload), err, err) } if resp != nil { - t.Errorf("want nil, got %#v", resp) + t.Errorf("want nil, got %v", resp) } h2.PutResponse(resp) t.Log("Roundtrip:", time.Since(start)) diff --git a/internal/logger/target/loggertypes/targettype_string.go b/internal/logger/target/loggertypes/targettype_string.go index 715a9fef142aa..08ea3f2efcd83 100644 --- a/internal/logger/target/loggertypes/targettype_string.go +++ b/internal/logger/target/loggertypes/targettype_string.go @@ -19,7 +19,7 @@ var _TargetType_index = [...]uint8{0, 7, 11, 16} func (i TargetType) String() string { i -= 1 - if i >= TargetType(len(_TargetType_index)-1) { + if i < 0 || i >= TargetType(len(_TargetType_index)-1) { return "TargetType(" + strconv.FormatInt(int64(i+1), 10) + ")" } return _TargetType_name[_TargetType_index[i]:_TargetType_index[i+1]] diff --git a/internal/logger/target/testlogger/testlogger.go b/internal/logger/target/testlogger/testlogger.go index 68f0f82dd7ddb..9cbac7a4f805c 100644 --- a/internal/logger/target/testlogger/testlogger.go +++ b/internal/logger/target/testlogger/testlogger.go @@ -150,14 +150,15 @@ func (t *testLogger) Send(ctx context.Context, entry any) error { if v.Trace == nil { logf("%s: %s", v.Level, v.Message) } else { - msg := fmt.Sprintf("%s: %+v", v.Level, v.Trace.Message) + var msg strings.Builder + msg.WriteString(fmt.Sprintf("%s: %+v", v.Level, v.Trace.Message)) for i, m := range v.Trace.Source { if i == 0 && strings.Contains(m, "logger.go:") { continue } - msg += fmt.Sprintf("\n%s", m) + msg.WriteString(fmt.Sprintf("\n%s", m)) } - logf("%s", msg) + logf("%s", msg.String()) } default: logf("%+v (%T)", v, v) diff --git a/internal/s3select/csv/reader_contrib_test.go b/internal/s3select/csv/reader_contrib_test.go index f2262f5e83e1e..247c54ed8a637 100644 --- a/internal/s3select/csv/reader_contrib_test.go +++ b/internal/s3select/csv/reader_contrib_test.go @@ -540,7 +540,7 @@ func BenchmarkReaderHuge(b *testing.B) { for n := range 11 { f := openTestFile(b, "nyc-taxi-data-100k.csv") want := 309 - for i := 0; i < n; i++ { + for range n { f = append(f, f...) want *= 2 } diff --git a/internal/s3select/select.go b/internal/s3select/select.go index 2bd7bab91f504..2cc227a169eba 100644 --- a/internal/s3select/select.go +++ b/internal/s3select/select.go @@ -437,6 +437,7 @@ func (s3Select *S3Select) Open(rsc io.ReadSeekCloser) error { if strings.EqualFold(s3Select.Input.JSONArgs.ContentType, "lines") { // PReader enforces the S3 Select per-record size limit while splitting lines. + //TODO: add simdj.NewReader s3Select.recordReader = json.NewPReader(s3Select.progressReader, &s3Select.Input.JSONArgs) } else { // Document mode. From 1b083a00db9eeb6503af95308942a0aab217a8a5 Mon Sep 17 00:00:00 2001 From: konrad Date: Thu, 25 Jun 2026 11:12:41 +0400 Subject: [PATCH 2/8] Fix unsafe pattern in ReadBlock(), fix some reading errors, consuming r.current could lead to discarding metadata without returning it to pool if it came from pool, fix panic in some methods --- cmd/metacache-stream.go | 543 ++++++++++++++++++++++++----------- cmd/metacache-stream_test.go | 103 +++++++ 2 files changed, 475 insertions(+), 171 deletions(-) diff --git a/cmd/metacache-stream.go b/cmd/metacache-stream.go index f08f85054c020..b647763bd1bb3 100644 --- a/cmd/metacache-stream.go +++ b/cmd/metacache-stream.go @@ -18,6 +18,7 @@ package cmd import ( + "bytes" "context" "errors" "fmt" @@ -154,37 +155,39 @@ func (w *metacacheWriter) write(objs ...metaCacheEntry) error { // The returned channel should be closed when done. // Any error is reported when closing the metacacheWriter. func (w *metacacheWriter) stream() (chan<- metaCacheEntry, error) { + if w == nil { + return nil, errors.New("metacacheWriter: nil writer") + } if w.creator != nil { - err := w.creator() - w.creator = nil - if err != nil { + if err := w.creator(); err != nil { + w.creator = nil return nil, fmt.Errorf("metacacheWriter: unable to create writer: %w", err) } + w.creator = nil if w.mw == nil { return nil, errors.New("metacacheWriter: writer not initialized") } } + objs := make(chan metaCacheEntry, 100) + w.streamErr = nil - w.streamWg.Add(1) - go func() { - defer w.streamWg.Done() + w.streamWg.Go(func() { for o := range objs { if len(o.name) == 0 || w.streamErr != nil { continue } - // Indicate EOS - err := w.mw.WriteBool(true) - if err != nil { + // true means another entry follows. + if err := w.mw.WriteBool(true); err != nil { w.streamErr = err continue } - err = w.mw.WriteString(o.name) - if err != nil { + + if err := w.mw.WriteString(o.name); err != nil { w.streamErr = err continue } - err = w.mw.WriteBytes(o.metadata) + err := w.mw.WriteBytes(o.metadata) if w.reuseBlocks || o.reusable { metaDataPoolPut(o.metadata) } @@ -193,7 +196,7 @@ func (w *metacacheWriter) stream() (chan<- metaCacheEntry, error) { continue } } - }() + }) return objs, nil } @@ -212,26 +215,42 @@ func (w *metacacheWriter) Close() error { // Reset and start writing to new writer. // Close must have been called before this. func (w *metacacheWriter) Reset(out io.Writer) { + if w == nil { + return + } + + // full reset w.streamErr = nil + w.mw = nil + w.closer = nil + w.creator = func() error { s2w := s2.NewWriter(out, s2.WriterBlockSize(w.blockSize), s2.WriterConcurrency(2)) + w.mw = msgp.NewWriter(s2w) w.creator = nil + if err := w.mw.WriteByte(metacacheStreamVersion); err != nil { + _ = s2w.Close() return err } - w.closer = func() error { + w.closer = func() (err error) { + // guarantees the S2 writer is closed even when msgp writing/flushing fails + defer func() { + cerr := s2w.Close() + if err == nil && cerr != nil { + err = cerr + } + }() + if w.streamErr != nil { return w.streamErr } if err := w.mw.WriteBool(false); err != nil { return err } - if err := w.mw.Flush(); err != nil { - return err - } - return s2w.Close() + return w.mw.Flush() } return nil } @@ -271,23 +290,24 @@ func newMetacacheReader(r io.Reader) *metacacheReader { } switch v { case 1, 2: + return nil default: - return fmt.Errorf("metacacheReader: Unknown version: %d", v) + return fmt.Errorf("metacacheReader: unknown version: %d", v) } - return nil }, } } func (r *metacacheReader) checkInit() { - if r.creator == nil || r.err != nil { + if r == nil || r.creator == nil || r.err != nil { return } r.err = r.creator() r.creator = nil } -// peek will return the name of the next object. +// peek will read and cache the next object. +// The next read should consume r.current. // Will return io.EOF if there are no more objects. // Should be used sparingly. func (r *metacacheReader) peek() (metaCacheEntry, error) { @@ -298,33 +318,38 @@ func (r *metacacheReader) peek() (metaCacheEntry, error) { if r.current.name != "" { return r.current, nil } - if more, err := r.mr.ReadBool(); !more { - switch err { - case nil: - r.err = io.EOF - return metaCacheEntry{}, io.EOF - case io.EOF: - r.err = io.ErrUnexpectedEOF - return metaCacheEntry{}, io.ErrUnexpectedEOF + more, err := r.mr.ReadBool() + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF } r.err = err return metaCacheEntry{}, err } + if !more { + r.err = io.EOF + return metaCacheEntry{}, io.EOF + } - var err error - if r.current.name, err = r.mr.ReadString(); err != nil { + r.current.name, err = r.mr.ReadString() + if err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF } r.err = err return metaCacheEntry{}, err } + r.current.metadata, err = r.mr.ReadBytes(r.current.metadata[:0]) - if err == io.EOF { - err = io.ErrUnexpectedEOF + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + r.err = err + return metaCacheEntry{}, err } - r.err = err - return r.current, err + + return r.current, nil } // next will read one entry from the stream. @@ -334,66 +359,87 @@ func (r *metacacheReader) next() (metaCacheEntry, error) { if r.err != nil { return metaCacheEntry{}, r.err } + var m metaCacheEntry - var err error + if r.current.name != "" { m.name = r.current.name m.metadata = r.current.metadata + r.current.name = "" r.current.metadata = nil + return m, nil } - if more, err := r.mr.ReadBool(); !more { - switch err { - case nil: - r.err = io.EOF - return m, io.EOF - case io.EOF: - r.err = io.ErrUnexpectedEOF - return m, io.ErrUnexpectedEOF + more, err := r.mr.ReadBool() + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF } r.err = err return m, err } - if m.name, err = r.mr.ReadString(); err != nil { + + if !more { + r.err = io.EOF + return m, io.EOF + } + + m.name, err = r.mr.ReadString() + if err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF } r.err = err return m, err } - m.metadata, err = r.mr.ReadBytes(metaDataPoolGet()) - if err == io.EOF { - err = io.ErrUnexpectedEOF + + buf := metaDataPoolGet() + + m.metadata, err = r.mr.ReadBytes(buf) + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + + if cap(buf) >= metaDataReadDefault { + metaDataPoolPut(buf) + } + + r.err = err + return m, err } + if len(m.metadata) == 0 && cap(m.metadata) >= metaDataReadDefault { metaDataPoolPut(m.metadata) m.metadata = nil } - r.err = err - return m, err + + return m, nil } -// next will read one entry from the stream. -// Generally not recommended for fast operation. +// nextEOF reports whether the next entry read would return io.EOF. +// It may read and cache the next entry. +// TODO: preferably (bool, error) should be return func (r *metacacheReader) nextEOF() bool { r.checkInit() + if r.err != nil { - return r.err == io.EOF + return errors.Is(r.err, io.EOF) } + if r.current.name != "" { return false } + _, err := r.peek() - if err != nil { - r.err = err - return r.err == io.EOF - } - return false + return errors.Is(err, io.EOF) } // forwardTo will forward to the first entry that is >= s. // Will return io.EOF if end of stream is reached without finding any. +// +// Assumes entries are sorted by name. func (r *metacacheReader) forwardTo(s string) error { r.checkInit() if r.err != nil { @@ -407,47 +453,71 @@ func (r *metacacheReader) forwardTo(s string) error { if r.current.name >= s { return nil } + r.current.name = "" r.current.metadata = nil } + // temporary name buffer. + target := []byte(s) + + // Temporary name buffer. tmp := make([]byte, 0, 256) + for { - if more, err := r.mr.ReadBool(); !more { - switch err { - case nil: - r.err = io.EOF - return io.EOF - case io.EOF: - r.err = io.ErrUnexpectedEOF - return io.ErrUnexpectedEOF + more, err := r.mr.ReadBool() + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF } r.err = err return err } + + if !more { + r.err = io.EOF + return io.EOF + } // Read name without allocating more than 1 buffer. sz, err := r.mr.ReadStringHeader() if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } r.err = err return err } if cap(tmp) < int(sz) { - tmp = make([]byte, 0, sz+256) + tmp = make([]byte, int(sz)+256) } - tmp = tmp[:sz] - _, err = r.mr.R.ReadFull(tmp) - if err != nil { + + tmp = tmp[:int(sz)] + + if _, err = r.mr.R.ReadFull(tmp); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } r.err = err return err } - if string(tmp) >= s { + + // do not allocate a new string here + if bytes.Compare(tmp, target) >= 0 { r.current.name = string(tmp) - r.current.metadata, r.err = r.mr.ReadBytes(nil) - return r.err + + r.current.metadata, err = r.mr.ReadBytes(nil) + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + r.err = err + return err + } + + return nil } // Skip metadata - err = r.mr.Skip() - if err != nil { + if err = r.mr.Skip(); err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF } @@ -464,9 +534,11 @@ func (r *metacacheReader) forwardTo(s string) error { // Use peek to determine if at end of stream. func (r *metacacheReader) readN(n int, inclDeleted, inclDirs, inclVersions bool, prefix string) (metaCacheEntriesSorted, error) { r.checkInit() + if n == 0 { return metaCacheEntriesSorted{}, nil } + if r.err != nil { return metaCacheEntriesSorted{}, r.err } @@ -475,75 +547,106 @@ func (r *metacacheReader) readN(n int, inclDeleted, inclDirs, inclVersions bool, if n > 0 { res = make(metaCacheEntries, 0, n) } + if prefix != "" { if err := r.forwardTo(prefix); err != nil { return metaCacheEntriesSorted{}, err } } - next, err := r.peek() - if err != nil { - return metaCacheEntriesSorted{}, err - } - if !next.hasPrefix(prefix) { - return metaCacheEntriesSorted{}, io.EOF - } if r.current.name != "" { - if (inclDeleted || !r.current.isLatestDeletemarker()) && r.current.hasPrefix(prefix) && (inclDirs || r.current.isObject()) { - res = append(res, r.current) + if !r.current.hasPrefix(prefix) { + return metaCacheEntriesSorted{}, io.EOF } + + current := r.current r.current.name = "" r.current.metadata = nil - } + if includeMetacacheEntry(current, inclDeleted, inclDirs, inclVersions) { + res = append(res, current) + } else { + releaseMetaCacheEntryMetadata(current) + } + } for n < 0 || len(res) < n { - if more, err := r.mr.ReadBool(); !more { - switch err { - case nil: - r.err = io.EOF - return metaCacheEntriesSorted{o: res}, io.EOF - case io.EOF: - r.err = io.ErrUnexpectedEOF - return metaCacheEntriesSorted{o: res}, io.ErrUnexpectedEOF + more, err := r.mr.ReadBool() + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF } r.err = err return metaCacheEntriesSorted{o: res}, err } - var err error + + if !more { + r.err = io.EOF + return metaCacheEntriesSorted{o: res}, io.EOF + } + var meta metaCacheEntry - if meta.name, err = r.mr.ReadString(); err != nil { + + meta.name, err = r.mr.ReadString() + if err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF } r.err = err return metaCacheEntriesSorted{o: res}, err } - if !meta.hasPrefix(prefix) { - r.mr.R.Skip(1) - return metaCacheEntriesSorted{o: res}, io.EOF - } - if meta.metadata, err = r.mr.ReadBytes(metaDataPoolGet()); err != nil { + + meta.metadata, err = r.mr.ReadBytes(metaDataPoolGet()) + if err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF } r.err = err return metaCacheEntriesSorted{o: res}, err } - if len(meta.metadata) == 0 { + + if len(meta.metadata) == 0 && cap(meta.metadata) >= metaDataReadDefault { metaDataPoolPut(meta.metadata) meta.metadata = nil } - if !inclDirs && (meta.isDir() || (!inclVersions && meta.isObjectDir() && meta.isLatestDeletemarker())) { - continue + + if !meta.hasPrefix(prefix) { + // Prefix range ended, but this is a real next entry. + // Cache it so a later read can still consume it. + r.current = meta + return metaCacheEntriesSorted{o: res}, io.EOF } - if !inclDeleted && meta.isLatestDeletemarker() && meta.isObject() && !meta.isObjectDir() { + + if !includeMetacacheEntry(meta, inclDeleted, inclDirs, inclVersions) { + releaseMetaCacheEntryMetadata(meta) continue } + res = append(res, meta) } + return metaCacheEntriesSorted{o: res}, nil } +// helper method for readN +func includeMetacacheEntry(meta metaCacheEntry, inclDeleted, inclDirs, inclVersions bool) bool { + if !inclDirs && (meta.isDir() || (!inclVersions && meta.isObjectDir() && meta.isLatestDeletemarker())) { + return false + } + + if !inclDeleted && meta.isLatestDeletemarker() && meta.isObject() && !meta.isObjectDir() { + return false + } + + return true +} + +// helper method for metadata release +func releaseMetaCacheEntryMetadata(meta metaCacheEntry) { + if cap(meta.metadata) >= metaDataReadDefault { + metaDataPoolPut(meta.metadata) + } +} + // readAll will return all remaining objects on the dst channel and close it when done. // The context allows the operation to be canceled. func (r *metacacheReader) readAll(ctx context.Context, dst chan<- metaCacheEntry) error { @@ -553,17 +656,20 @@ func (r *metacacheReader) readAll(ctx context.Context, dst chan<- metaCacheEntry } defer xioutil.SafeClose(dst) if r.current.name != "" { + current := r.current + select { case <-ctx.Done(): r.err = ctx.Err() return ctx.Err() - case dst <- r.current: + case dst <- current: + r.current.name = "" + r.current.metadata = nil } - r.current.name = "" - r.current.metadata = nil } for { - if more, err := r.mr.ReadBool(); !more { + more, err := r.mr.ReadBool() + if err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF } @@ -571,30 +677,50 @@ func (r *metacacheReader) readAll(ctx context.Context, dst chan<- metaCacheEntry return err } - var err error + if !more { + // Clean EOS marker. + r.err = io.EOF + return nil + } + var meta metaCacheEntry - if meta.name, err = r.mr.ReadString(); err != nil { + + meta.name, err = r.mr.ReadString() + if err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF } r.err = err return err } - if meta.metadata, err = r.mr.ReadBytes(metaDataPoolGet()); err != nil { + + buf := metaDataPoolGet() + + meta.metadata, err = r.mr.ReadBytes(buf) + if err != nil { + if cap(buf) >= metaDataReadDefault { + metaDataPoolPut(buf) + } + if err == io.EOF { err = io.ErrUnexpectedEOF } r.err = err return err } - if len(meta.metadata) == 0 { + if len(meta.metadata) == 0 && cap(meta.metadata) >= metaDataReadDefault { metaDataPoolPut(meta.metadata) meta.metadata = nil } + select { case <-ctx.Done(): + if cap(meta.metadata) >= metaDataReadDefault { + metaDataPoolPut(meta.metadata) + } r.err = ctx.Err() return ctx.Err() + case dst <- meta: } } @@ -604,45 +730,58 @@ func (r *metacacheReader) readAll(ctx context.Context, dst chan<- metaCacheEntry // and provide a callback for each entry read in order // as long as true is returned on the callback. func (r *metacacheReader) readFn(fn func(entry metaCacheEntry) bool) error { + if fn == nil { + return errors.New("metacacheReader: nil callback") + } + r.checkInit() if r.err != nil { return r.err } if r.current.name != "" { - fn(r.current) + current := r.current r.current.name = "" r.current.metadata = nil + // makes the cached entry obey the same callback-stop contract as newly read entries + if !fn(current) { + return nil + } } for { - if more, err := r.mr.ReadBool(); !more { - switch err { - case io.EOF: - r.err = io.ErrUnexpectedEOF - return io.ErrUnexpectedEOF - case nil: - r.err = io.EOF - return io.EOF + more, err := r.mr.ReadBool() + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF } + r.err = err return err } - var err error + if !more { + r.err = io.EOF + return io.EOF + } + var meta metaCacheEntry - if meta.name, err = r.mr.ReadString(); err != nil { + + meta.name, err = r.mr.ReadString() + if err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF } r.err = err return err } - if meta.metadata, err = r.mr.ReadBytes(nil); err != nil { + + meta.metadata, err = r.mr.ReadBytes(nil) + if err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF } r.err = err return err } - // Send it! + if !fn(meta) { return nil } @@ -657,37 +796,51 @@ func (r *metacacheReader) readNames(n int) ([]string, error) { if r.err != nil { return nil, r.err } + if n == 0 { return nil, nil } + var res []string if n > 0 { res = make([]string, 0, n) } if r.current.name != "" { res = append(res, r.current.name) + + // this part matters if r.current.metadata was read using pooled metadata + if cap(r.current.metadata) >= metaDataReadDefault { + metaDataPoolPut(r.current.metadata) + } + r.current.name = "" r.current.metadata = nil } for n < 0 || len(res) < n { - if more, err := r.mr.ReadBool(); !more { - switch err { - case nil: - r.err = io.EOF - return res, io.EOF - case io.EOF: - r.err = io.ErrUnexpectedEOF - return res, io.ErrUnexpectedEOF + more, err := r.mr.ReadBool() + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF } + r.err = err return res, err } - var err error - var name string - if name, err = r.mr.ReadString(); err != nil { + if !more { + r.err = io.EOF + return res, io.EOF + } + + name, err := r.mr.ReadString() + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } r.err = err return res, err } + + // Skip metadata. if err = r.mr.Skip(); err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF @@ -695,6 +848,7 @@ func (r *metacacheReader) readNames(n int) ([]string, error) { r.err = err return res, err } + res = append(res, name) } return res, nil @@ -707,27 +861,36 @@ func (r *metacacheReader) skip(n int) error { if r.err != nil { return r.err } + if n <= 0 { return nil } + if r.current.name != "" { - n-- + if cap(r.current.metadata) >= metaDataReadDefault { + metaDataPoolPut(r.current.metadata) + } + r.current.name = "" r.current.metadata = nil + n-- } for n > 0 { - if more, err := r.mr.ReadBool(); !more { - switch err { - case nil: - r.err = io.EOF - return io.EOF - case io.EOF: - r.err = io.ErrUnexpectedEOF - return io.ErrUnexpectedEOF + more, err := r.mr.ReadBool() + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF } + r.err = err return err } + if !more { + r.err = io.EOF + return io.EOF + } + + // Skip name. if err := r.mr.Skip(); err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF @@ -735,6 +898,8 @@ func (r *metacacheReader) skip(n int) error { r.err = err return err } + + // Skip metadata. if err := r.mr.Skip(); err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF @@ -742,6 +907,7 @@ func (r *metacacheReader) skip(n int) error { r.err = err return err } + n-- } return nil @@ -774,11 +940,9 @@ func newMetacacheBlockWriter(in <-chan metaCacheEntry, nextBlock func(b *metacac blockEntries = 1 } - w := metacacheBlockWriter{blockEntries: cap(in)} - w.wg.Add(1) - //TODO: this looks messy - go func() { - defer w.wg.Done() + w := metacacheBlockWriter{blockEntries: blockEntries} + + w.wg.Go(func() { var current metacacheBlock var n int @@ -789,46 +953,81 @@ func newMetacacheBlockWriter(in <-chan metaCacheEntry, nextBlock func(b *metacac }() block := newMetacacheWriter(buf, 1<<20) - defer block.Close() - finishBlock := func() { + finishBlock := func(eos bool) bool { if err := block.Close(); err != nil { w.streamErr = err - return + return false } + + current.EOS = eos current.data = buf.Bytes() - w.streamErr = nextBlock(¤t) - // Prepare for next - current.n++ + + if err := nextBlock(¤t); err != nil { + w.streamErr = err + return false + } + + nextN := current.n + 1 + + current = metacacheBlock{ + n: nextN, + } + + n = 0 + buf.Reset() block.Reset(buf) - current.First = "" + + return true } + for o := range in { if len(o.name) == 0 || w.streamErr != nil { continue } - if current.First == "" { - current.First = o.name + + // Important: + // Flush only when we already have a full block AND we just saw + // another valid entry. + // + // This guarantees that a full block can still become EOS if the + // input channel closes immediately after it. + if n >= w.blockEntries { + if !finishBlock(false) { + continue + } } - if n >= w.blockEntries-1 { - finishBlock() - n = 0 + if current.First == "" { + current.First = o.name } - n++ - w.streamErr = block.write(o) - if w.streamErr != nil { + if err := block.write(o); err != nil { + w.streamErr = err continue } + current.Last = o.name + n++ } + + if w.streamErr != nil { + return + } + + // Emit the final block as EOS. + // + // n > 0: + // normal final block with entries. + // + // current.n == 0: + // preserve existing behavior for empty input: emit one EOS block. if n > 0 || current.n == 0 { - current.EOS = true - finishBlock() + finishBlock(true) } - }() + }) + return &w } @@ -858,7 +1057,8 @@ func (b metacacheBlock) headerKV() map[string]string { return map[string]string{fmt.Sprintf("%s-metacache-part-%d", ReservedMetadataPrefixLower, b.n): string(v)} } -// pastPrefix returns true if the given prefix is before start of the block. +// pastPrefix reports whether the block starts after the possible range +// of entries matching prefix. func (b metacacheBlock) pastPrefix(prefix string) bool { if prefix == "" || strings.HasPrefix(b.First, prefix) { return false @@ -867,7 +1067,8 @@ func (b metacacheBlock) pastPrefix(prefix string) bool { return b.First > prefix } -// endedPrefix returns true if the given prefix ends within the block. +// endedPrefix reports whether the block's last entry is already past +// the possible range of entries matching prefix. func (b metacacheBlock) endedPrefix(prefix string) bool { if prefix == "" || strings.HasPrefix(b.Last, prefix) { return false diff --git a/cmd/metacache-stream_test.go b/cmd/metacache-stream_test.go index 36286f6883b8d..f5f940b4fb897 100644 --- a/cmd/metacache-stream_test.go +++ b/cmd/metacache-stream_test.go @@ -19,6 +19,7 @@ package cmd import ( "bytes" + "fmt" "io" "os" "reflect" @@ -425,3 +426,105 @@ func Test_metacacheReader_skip(t *testing.T) { t.Fatal(err) } } + +func TestNewMetacacheBlockWriterUnbufferedChannelDoesNotEmitEmptyBlock(t *testing.T) { + in := make(chan metaCacheEntry) // cap == 0 + + var got []metacacheBlock + + w := newMetacacheBlockWriter(in, func(b *metacacheBlock) error { + got = append(got, cloneMetacacheBlock(b)) + return nil + }) + + in <- metaCacheEntry{name: "alpha"} + close(in) + + w.wg.Wait() + + if w.streamErr != nil { + t.Fatalf("unexpected stream error: %v", w.streamErr) + } + + if len(got) != 1 { + t.Fatalf("expected exactly 1 block, got %d: %+v", len(got), summarizeBlocks(got)) + } + + if got[0].First != "alpha" { + t.Fatalf("expected First alpha, got %q", got[0].First) + } + + if got[0].Last != "alpha" { + t.Fatalf("expected Last alpha, got %q", got[0].Last) + } + + if !got[0].EOS { + t.Fatalf("expected single block to be EOS") + } + + if len(got[0].data) == 0 { + t.Fatalf("expected block data to contain serialized entry, got empty data") + } +} + +func TestNewMetacacheBlockWriterBufferedChannelCapacityTwoKeepsTwoEntriesInOneBlock(t *testing.T) { + in := make(chan metaCacheEntry, 2) + + var got []metacacheBlock + + w := newMetacacheBlockWriter(in, func(b *metacacheBlock) error { + got = append(got, cloneMetacacheBlock(b)) + return nil + }) + + in <- metaCacheEntry{name: "alpha"} + in <- metaCacheEntry{name: "beta"} + close(in) + + w.wg.Wait() + + if w.streamErr != nil { + t.Fatalf("unexpected stream error: %v", w.streamErr) + } + + if len(got) != 1 { + t.Fatalf("expected exactly 1 block for 2 entries with cap 2, got %d: %+v", len(got), summarizeBlocks(got)) + } + + if got[0].First != "alpha" { + t.Fatalf("expected First alpha, got %q", got[0].First) + } + + if got[0].Last != "beta" { + t.Fatalf("expected Last beta, got %q", got[0].Last) + } + + if !got[0].EOS { + t.Fatalf("expected the only block to be EOS") + } + + if len(got[0].data) == 0 { + t.Fatalf("expected block data to contain serialized entries, got empty data") + } +} + +func cloneMetacacheBlock(b *metacacheBlock) metacacheBlock { + c := *b + c.data = append([]byte(nil), b.data...) + return c +} + +func summarizeBlocks(blocks []metacacheBlock) []string { + out := make([]string, 0, len(blocks)) + for _, b := range blocks { + out = append(out, fmt.Sprintf( + "{First:%q Last:%q EOS:%v n:%d dataLen:%d}", + b.First, + b.Last, + b.EOS, + b.n, + len(b.data), + )) + } + return out +} From 4fa2fc6316191e4636b96bc8a58b6d5af6661d66 Mon Sep 17 00:00:00 2001 From: konrad Date: Thu, 25 Jun 2026 11:29:50 +0400 Subject: [PATCH 3/8] Improve metacache-marker and add tests for it - previous was not safe enough and could case panic on malformed markers and partially mutate o.Marker if the marker tag is invalid --- cmd/metacache-marker.go | 96 ++++++++---- cmd/metacache-marker_test.go | 289 +++++++++++++++++++++++++++++++++++ 2 files changed, 359 insertions(+), 26 deletions(-) create mode 100644 cmd/metacache-marker_test.go diff --git a/cmd/metacache-marker.go b/cmd/metacache-marker.go index 4548af2b9300f..2b1435c2a128d 100644 --- a/cmd/metacache-marker.go +++ b/cmd/metacache-marker.go @@ -31,60 +31,104 @@ const markerTagVersion = "v2" // parseMarker will parse a marker possibly encoded with encodeMarker func (o *listPathOptions) parseMarker() { s := o.Marker - if !strings.Contains(s, "[minio_cache:"+markerTagVersion) { + start := strings.LastIndexByte(s, '[') + if start < 0 { return } - start := strings.LastIndex(s, "[") - o.Marker = s[:start] - end := strings.LastIndex(s, "]") - tag := strings.Trim(s[start:end], "[]") - tags := strings.SplitSeq(tag, ",") - for tag := range tags { - kv := strings.Split(tag, ":") - if len(kv) < 2 { + end := strings.LastIndexByte(s, ']') + if end < 0 || end < start { + return + } + // Usually encoded marker should be a suffix. If encodeMarker always appends + // the tag at the end, require this to avoid parsing unrelated brackets. + if end != len(s)-1 { + return + } + + rawTag := s[start+1 : end] + + fields := strings.Split(rawTag, ",") + if len(fields) == 0 { + return + } + + parsed := make(map[string]string, len(fields)) + + for _, field := range fields { + k, v, ok := strings.Cut(field, ":") + if !ok { continue } - switch kv[0] { + parsed[k] = v + } + + if parsed["minio_cache"] != markerTagVersion { + return + } + + // Only mutate Marker after validating that this is really our marker tag. + o.Marker = s[:start] + + if !strings.Contains(s, "[minio_cache:"+markerTagVersion) { + return + } + for k, v := range parsed { + switch k { case "minio_cache": - if kv[1] != markerTagVersion { - continue - } + // Already validated. + case "id": - o.ID = kv[1] + o.ID = v + case "return": o.ID = mustGetUUID() o.Create = true + case "p": // pool - v, err := strconv.ParseInt(kv[1], 10, 64) - if err != nil { + pool, err := strconv.Atoi(v) + if err != nil || pool < 0 { o.ID = mustGetUUID() o.Create = true continue } - o.pool = int(v) + o.pool = pool + case "s": // set - v, err := strconv.ParseInt(kv[1], 10, 64) - if err != nil { + set, err := strconv.Atoi(v) + if err != nil || set < 0 { o.ID = mustGetUUID() o.Create = true continue } - o.set = int(v) + o.set = set + default: - // Ignore unknown + // Ignore unknown fields. } } } // encodeMarker will encode a uuid and return it as a marker. -// uuid cannot contain '[', ':' or ','. +// uuid cannot contain '[', ']', ':' or ','. func (o listPathOptions) encodeMarker(marker string) string { if o.ID == "" { - // Mark as returning listing... + // Mark as returning listing. return fmt.Sprintf("%s[minio_cache:%s,return:]", marker, markerTagVersion) } - if strings.ContainsAny(o.ID, "[:,") { - internalLogIf(context.Background(), fmt.Errorf("encodeMarker: uuid %s contained invalid characters", o.ID)) + if strings.ContainsAny(o.ID, "[]:,") { + internalLogIf(context.Background(), fmt.Errorf("encodeMarker: uuid %q contained invalid characters", o.ID)) + + // Avoid emitting a malformed marker. + // Caller will get a marker that asks for a new cache/listing. + return fmt.Sprintf("%s[minio_cache:%s,return:]", marker, markerTagVersion) } - return fmt.Sprintf("%s[minio_cache:%s,id:%s,p:%d,s:%d]", marker, markerTagVersion, o.ID, o.pool, o.set) + + return fmt.Sprintf( + "%s[minio_cache:%s,id:%s,p:%d,s:%d]", + marker, + markerTagVersion, + o.ID, + o.pool, + o.set, + ) } diff --git a/cmd/metacache-marker_test.go b/cmd/metacache-marker_test.go new file mode 100644 index 0000000000000..96d42465c1975 --- /dev/null +++ b/cmd/metacache-marker_test.go @@ -0,0 +1,289 @@ +package cmd + +import ( + "strings" + "testing" +) + +func TestListPathOptionsEncodeParseMarkerRoundTrip(t *testing.T) { + opts := listPathOptions{ + ID: "550e8400-e29b-41d4-a716-446655440000", + pool: 3, + set: 7, + } + + encoded := opts.encodeMarker("photos/2026/img.jpg") + + wantEncoded := "photos/2026/img.jpg[minio_cache:v2,id:550e8400-e29b-41d4-a716-446655440000,p:3,s:7]" + if encoded != wantEncoded { + t.Fatalf("unexpected encoded marker:\nwant: %q\n got: %q", wantEncoded, encoded) + } + + parsed := listPathOptions{ + Marker: encoded, + } + + parsed.parseMarker() + + if parsed.Marker != "photos/2026/img.jpg" { + t.Fatalf("expected marker without encoded suffix, got %q", parsed.Marker) + } + + if parsed.ID != opts.ID { + t.Fatalf("expected ID %q, got %q", opts.ID, parsed.ID) + } + + if parsed.pool != opts.pool { + t.Fatalf("expected pool %d, got %d", opts.pool, parsed.pool) + } + + if parsed.set != opts.set { + t.Fatalf("expected set %d, got %d", opts.set, parsed.set) + } + + if parsed.Create { + t.Fatalf("expected Create=false") + } +} + +func TestListPathOptionsEncodeMarkerWithoutIDMarksReturn(t *testing.T) { + opts := listPathOptions{} + + encoded := opts.encodeMarker("photos/") + + want := "photos/[minio_cache:v2,return:]" + if encoded != want { + t.Fatalf("unexpected encoded marker:\nwant: %q\n got: %q", want, encoded) + } + + parsed := listPathOptions{ + Marker: encoded, + } + + parsed.parseMarker() + + if parsed.Marker != "photos/" { + t.Fatalf("expected marker %q, got %q", "photos/", parsed.Marker) + } + + if !parsed.Create { + t.Fatalf("expected Create=true") + } + + if parsed.ID == "" { + t.Fatalf("expected generated ID to be set") + } +} + +func TestListPathOptionsEncodeMarkerInvalidIDFallsBackToReturn(t *testing.T) { + tests := []string{ + "bad:id", + "bad,id", + "bad[id", + "bad]id", + } + + for _, id := range tests { + t.Run(id, func(t *testing.T) { + opts := listPathOptions{ + ID: id, + pool: 1, + set: 2, + } + + encoded := opts.encodeMarker("marker") + + want := "marker[minio_cache:v2,return:]" + if encoded != want { + t.Fatalf("expected invalid ID to fallback to return marker:\nwant: %q\n got: %q", want, encoded) + } + }) + } +} + +func TestListPathOptionsParseMarkerIgnoresPlainMarker(t *testing.T) { + opts := listPathOptions{ + Marker: "photos/2026/img.jpg", + ID: "existing-id", + Create: false, + pool: 9, + set: 8, + } + + opts.parseMarker() + + if opts.Marker != "photos/2026/img.jpg" { + t.Fatalf("plain marker should not be changed, got %q", opts.Marker) + } + + if opts.ID != "existing-id" { + t.Fatalf("ID should not be changed, got %q", opts.ID) + } + + if opts.pool != 9 || opts.set != 8 { + t.Fatalf("pool/set should not be changed, got pool=%d set=%d", opts.pool, opts.set) + } +} + +func TestListPathOptionsParseMarkerIgnoresUnknownVersion(t *testing.T) { + opts := listPathOptions{ + Marker: "photos/[minio_cache:v1,id:abc,p:1,s:2]", + ID: "existing-id", + pool: 9, + set: 8, + } + + opts.parseMarker() + + if opts.Marker != "photos/[minio_cache:v1,id:abc,p:1,s:2]" { + t.Fatalf("marker should not be changed for unknown version, got %q", opts.Marker) + } + + if opts.ID != "existing-id" { + t.Fatalf("ID should not be changed, got %q", opts.ID) + } + + if opts.pool != 9 || opts.set != 8 { + t.Fatalf("pool/set should not be changed, got pool=%d set=%d", opts.pool, opts.set) + } +} + +func TestListPathOptionsParseMarkerRequiresEncodedTagAsSuffix(t *testing.T) { + opts := listPathOptions{ + Marker: "photos/[minio_cache:v2,id:abc,p:1,s:2]/extra", + ID: "existing-id", + pool: 9, + set: 8, + } + + opts.parseMarker() + + if opts.Marker != "photos/[minio_cache:v2,id:abc,p:1,s:2]/extra" { + t.Fatalf("marker should not be changed when encoded tag is not suffix, got %q", opts.Marker) + } + + if opts.ID != "existing-id" { + t.Fatalf("ID should not be changed, got %q", opts.ID) + } + + if opts.pool != 9 || opts.set != 8 { + t.Fatalf("pool/set should not be changed, got pool=%d set=%d", opts.pool, opts.set) + } +} + +func TestListPathOptionsParseMarkerDoesNotPanicOnMalformedMarkers(t *testing.T) { + tests := []string{ + "", + "photos/", + "photos/[", + "photos/]", + "photos/[minio_cache:v2", + "photos/minio_cache:v2]", + "photos/[minio_cache:v2,id:abc,p:1,s:2", + "photos/[garbage]", + "photos/[minio_cache]", + "photos/[minio_cache:v2,id]", + "photos/[minio_cache:v2,p:not-int,s:2]", + "photos/[minio_cache:v2,p:1,s:not-int]", + } + + for _, marker := range tests { + t.Run(marker, func(t *testing.T) { + opts := listPathOptions{ + Marker: marker, + } + + defer func() { + if r := recover(); r != nil { + t.Fatalf("parseMarker panicked for marker %q: %v", marker, r) + } + }() + + opts.parseMarker() + }) + } +} + +func TestListPathOptionsParseMarkerWithAdditionalUnknownFields(t *testing.T) { + opts := listPathOptions{ + Marker: "marker[minio_cache:v2,id:abc,unknown:value,p:4,s:5]", + } + + opts.parseMarker() + + if opts.Marker != "marker" { + t.Fatalf("expected marker %q, got %q", "marker", opts.Marker) + } + + if opts.ID != "abc" { + t.Fatalf("expected ID %q, got %q", "abc", opts.ID) + } + + if opts.pool != 4 { + t.Fatalf("expected pool 4, got %d", opts.pool) + } + + if opts.set != 5 { + t.Fatalf("expected set 5, got %d", opts.set) + } +} + +func TestListPathOptionsParseMarkerPreservesMarkerWithEarlierBrackets(t *testing.T) { + opts := listPathOptions{ + Marker: "photos/[folder]/image[minio_cache:v2,id:abc,p:1,s:2]", + } + + opts.parseMarker() + + if opts.Marker != "photos/[folder]/image" { + t.Fatalf("expected marker before encoded suffix to be preserved, got %q", opts.Marker) + } + + if opts.ID != "abc" { + t.Fatalf("expected ID abc, got %q", opts.ID) + } + + if opts.pool != 1 { + t.Fatalf("expected pool 1, got %d", opts.pool) + } + + if opts.set != 2 { + t.Fatalf("expected set 2, got %d", opts.set) + } +} + +func TestListPathOptionsParseReturnMarkerCreatesNewListing(t *testing.T) { + opts := listPathOptions{ + Marker: "marker[minio_cache:v2,return:]", + } + + opts.parseMarker() + + if opts.Marker != "marker" { + t.Fatalf("expected marker %q, got %q", "marker", opts.Marker) + } + + if !opts.Create { + t.Fatalf("expected Create=true") + } + + if opts.ID == "" { + t.Fatalf("expected generated ID") + } +} + +func TestListPathOptionsEncodedMarkerDoesNotContainInvalidIDCharacters(t *testing.T) { + opts := listPathOptions{ + ID: "valid-id", + } + + encoded := opts.encodeMarker("marker") + + if strings.Contains(encoded, "return:") { + t.Fatalf("did not expect valid ID to fallback to return marker: %q", encoded) + } + + if !strings.Contains(encoded, "id:valid-id") { + t.Fatalf("expected encoded marker to contain valid ID, got %q", encoded) + } +} From 0f0ca2a751f4427f35edf88196f97d1418677f25 Mon Sep 17 00:00:00 2001 From: konrad Date: Thu, 25 Jun 2026 14:20:22 +0400 Subject: [PATCH 4/8] Fix issues with merge and traversing entries in metacache --- cmd/metacache-entries.go | 704 ++++++++++++++++++++++++---------- cmd/metacache-entries_test.go | 1 + 2 files changed, 494 insertions(+), 211 deletions(-) diff --git a/cmd/metacache-entries.go b/cmd/metacache-entries.go index 69c5e835c2e44..a073b3b1f3fa8 100644 --- a/cmd/metacache-entries.go +++ b/cmd/metacache-entries.go @@ -20,8 +20,8 @@ package cmd import ( "bytes" "context" + "fmt" "os" - "path" "sort" "strings" @@ -44,7 +44,9 @@ type metaCacheEntry struct { reusable bool } -// isDir returns if the entry is representing a prefix directory. +// isDir reports whether the entry represents a synthetic prefix directory. +// +// Directory entries have no object metadata and their name ends with "/". func (e metaCacheEntry) isDir() bool { return len(e.metadata) == 0 && strings.HasSuffix(e.name, slashSeparator) } @@ -54,7 +56,7 @@ func (e metaCacheEntry) isObject() bool { return len(e.metadata) > 0 } -// isObjectDir returns if the entry is representing an object/ +// isObjectDir returns whether the entry represents an object whose name ends with "/". func (e metaCacheEntry) isObjectDir() bool { return len(e.metadata) > 0 && strings.HasSuffix(e.name, slashSeparator) } @@ -85,12 +87,20 @@ func (e *metaCacheEntry) matches(other *metaCacheEntry, strict bool) (prefer *me return other, false } - if other.isDir() || e.isDir() { - if e.isDir() { - return e, other.isDir() == e.isDir() + // Directories/prefixes do not have xlmeta. + // If both are synthetic dirs, they match. + // If only one is synthetic dir, prefer the real object. + if e.isDir() || other.isDir() { + switch { + case e.isDir() && other.isDir(): + return e, true + case e.isDir(): + return other, false + default: + return e, false } - return other, other.isDir() == e.isDir() } + eVers, eErr := e.xlmeta() oVers, oErr := other.xlmeta() if eErr != nil || oErr != nil { @@ -116,6 +126,7 @@ func (e *metaCacheEntry) matches(other *metaCacheEntry, strict bool) (prefer *me } // Check if each version matches... + //TODO: check if this versions come ordered, otherwise it can report false mismatches. for i, eVer := range eVers.versions { oVer := oVers.versions[i] if eVer.header != oVer.header { @@ -156,20 +167,32 @@ func (e *metaCacheEntry) matches(other *metaCacheEntry, strict bool) (prefer *me return prefer, true } -// isInDir returns whether the entry is in the dir when considering the separator. +// isInDir returns whether the entry is directly inside dir when considering separator. +// +// dir is expected to be either empty for root, or normalized to end with separator. func (e metaCacheEntry) isInDir(dir, separator string) bool { - if len(dir) == 0 { - // Root + if separator == "" { + return false + } + + // safer if callers may pass dir without trailing / + if dir != "" && !strings.HasSuffix(dir, separator) { + dir += separator + } + + if dir == "" { idx := strings.Index(e.name, separator) return idx == -1 || idx == len(e.name)-len(separator) } - ext := strings.TrimPrefix(e.name, dir) - if len(ext) != len(e.name) { - idx := strings.Index(ext, separator) - // If separator is not found or is last entry, ok. - return idx == -1 || idx == len(ext)-len(separator) + + if !strings.HasPrefix(e.name, dir) { + return false } - return false + + ext := strings.TrimPrefix(e.name, dir) + idx := strings.Index(ext, separator) + + return idx == -1 || idx == len(ext)-len(separator) } // isLatestDeletemarker returns whether the latest version is a delete marker. @@ -185,11 +208,14 @@ func (e *metaCacheEntry) isLatestDeletemarker() bool { if !isXL2V1Format(e.metadata) { return false } - if meta, _, err := isIndexedMetaV2(e.metadata); meta != nil { - return meta.IsLatestDeleteMarker() - } else if err != nil { + // stricter around isIndexedMetaV2() errors + meta, _, err := isIndexedMetaV2(e.metadata) + if err != nil { return true } + if meta != nil { + return meta.IsLatestDeleteMarker() + } // Fall back... xlMeta, err := e.xlmeta() if err != nil || len(xlMeta.versions) == 0 { @@ -213,21 +239,25 @@ func (e *metaCacheEntry) isAllFreeVersions() bool { } return true } + if !isXL2V1Format(e.metadata) { return false } - if meta, _, err := isIndexedMetaV2(e.metadata); meta != nil { - return meta.AllHidden(false) - } else if err != nil { + meta, _, err := isIndexedMetaV2(e.metadata) + if err != nil { return true } + if meta != nil { + return meta.AllHidden(false) + } + // Fall back... xlMeta, err := e.xlmeta() if err != nil || len(xlMeta.versions) == 0 { return true } - // Check versions.. - for _, v := range e.cached.versions { + // still can be e.cached may still be nil + for _, v := range xlMeta.versions { if !v.header.FreeVersion() { return false } @@ -259,6 +289,12 @@ func (e *metaCacheEntry) fileInfo(bucket string) (FileInfo, error) { } return e.cached.ToFileInfo(bucket, e.name, "", false, true) } + + // protect against malformed entries + if len(e.metadata) == 0 { + return FileInfo{}, fmt.Errorf("metaCacheEntry: no metadata for non-directory entry %q", e.name) + } + return getFileInfo(e.metadata, bucket, e.name, "", fileInfoOpts{}) } @@ -268,18 +304,18 @@ func (e *metaCacheEntry) xlmeta() (*xlMetaV2, error) { if e.isDir() { return nil, errFileNotFound } - if e.cached == nil { - if len(e.metadata) == 0 { - // only happens if the entry is not found. - return nil, errFileNotFound - } - var xl xlMetaV2 - err := xl.LoadOrConvert(e.metadata) - if err != nil { - return nil, err - } - e.cached = &xl + if e.cached != nil { + return e.cached, nil + } + if len(e.metadata) == 0 { + // Only happens if the entry is not found or malformed. + return nil, errFileNotFound + } + var xl xlMetaV2 + if err := xl.LoadOrConvert(e.metadata); err != nil { + return nil, err } + e.cached = &xl return e.cached, nil } @@ -299,6 +335,10 @@ func (e *metaCacheEntry) fileInfoVersions(bucket string) (FileInfoVersions, erro }, }, nil } + // for non-directory entry with empty metadata + if len(e.metadata) == 0 { + return FileInfoVersions{}, errFileNotFound + } // Too small gains to reuse cache here. return getFileInfoVersions(e.metadata, bucket, e.name, true) } @@ -356,17 +396,23 @@ type metadataResolutionParams struct { // entries are resolved by majority, then if tied by mod-time and versions. // Names must match on all entries in m. func (m metaCacheEntries) resolve(r *metadataResolutionParams) (selected *metaCacheEntry, ok bool) { - if len(m) == 0 { + if len(m) == 0 || r == nil { return nil, false } - dirExists := 0 if cap(r.candidates) < len(m) { r.candidates = make([][]xlMetaV2ShallowVersion, 0, len(m)) } r.candidates = r.candidates[:0] + + // Separate object and dir selection + var dirSelected *metaCacheEntry + var objSelected *metaCacheEntry + + dirExists := 0 objsAgree := 0 objsValid := 0 + for i := range m { entry := &m[i] // Empty entry @@ -376,7 +422,9 @@ func (m metaCacheEntries) resolve(r *metadataResolutionParams) (selected *metaCa if entry.isDir() { dirExists++ - selected = entry + if dirSelected == nil { + dirSelected = entry + } continue } @@ -393,71 +441,72 @@ func (m metaCacheEntries) resolve(r *metadataResolutionParams) (selected *metaCa // We select the first object we find as a candidate and see if all match that. // This is to quickly identify if all agree. - if selected == nil { - selected = entry + if objSelected == nil { + objSelected = entry objsAgree = 1 continue } // Names match, check meta... - if prefer, ok := entry.matches(selected, r.strict); ok { - selected = prefer + if prefer, matches := entry.matches(objSelected, r.strict); matches { + objSelected = prefer objsAgree++ - continue } } - // Return dir entries, if enough... - if selected != nil && selected.isDir() && dirExists >= r.dirQuorum { - return selected, true - } + // Prefer real object metadata if object quorum exists. + if objsValid >= r.objQuorum { + if objSelected == nil || objSelected.cached == nil { + return nil, false + } + if objsAgree == objsValid { + return objSelected, true + } - // If we would never be able to reach read quorum. - if objsValid < r.objQuorum { - return nil, false - } + merged := &metaCacheEntry{ + name: objSelected.name, + reusable: true, + cached: &xlMetaV2{metaV: objSelected.cached.metaV}, + } + merged.cached.versions = mergeXLV2Versions(r.objQuorum, r.strict, r.requestedVersions, r.candidates...) - // If all objects agree. - if selected != nil && objsAgree == objsValid { - return selected, true - } + if len(merged.cached.versions) == 0 { + return nil, false + } - // If cached is nil we shall skip the entry. - if selected.cached == nil { - return nil, false - } + buf := metaDataPoolGet() + var err error + // it may lose the pooled buffer if AppendTo fails + merged.metadata, err = merged.cached.AppendTo(buf) + if err != nil { + metaDataPoolPut(buf) + bugLogIf(context.Background(), err) + return nil, false + } - // Merge if we have disagreement. - // Create a new merged result. - selected = &metaCacheEntry{ - name: selected.name, - reusable: true, - cached: &xlMetaV2{metaV: selected.cached.metaV}, - } - selected.cached.versions = mergeXLV2Versions(r.objQuorum, r.strict, r.requestedVersions, r.candidates...) - if len(selected.cached.versions) == 0 { - return nil, false + return merged, true } - // Reserialize - var err error - selected.metadata, err = selected.cached.AppendTo(metaDataPoolGet()) - if err != nil { - bugLogIf(context.Background(), err) - return nil, false + // Fall back to synthetic directory only if object quorum was not reached. + if dirSelected != nil && dirExists >= r.dirQuorum { + return dirSelected, true } - return selected, true + + return nil, false } // firstFound returns the first found and the number of set entries. func (m metaCacheEntries) firstFound() (first *metaCacheEntry, n int) { - for i, entry := range m { - if entry.name != "" { - n++ - if first == nil { - first = &m[i] - } + // avoids copying each metaCacheEntry during range iteration + for i := range m { + if m[i].name == "" { + continue + } + n++ + if first == nil { + first = &m[i] } } + return first, n } @@ -497,22 +546,33 @@ func (m *metaCacheEntriesSorted) fileInfoVersions(bucket, prefix, delimiter, aft prevPrefix := "" vcfg, _ := globalBucketVersioningSys.Get(bucket) - for _, entry := range m.o { + for i := range m.o { + // avoid copying + entry := &m.o[i] + + if prefix != "" && !strings.HasPrefix(entry.name, prefix) { + continue + } + if entry.isObject() { if delimiter != "" { - idx := strings.Index(strings.TrimPrefix(entry.name, prefix), delimiter) + rest := strings.TrimPrefix(entry.name, prefix) + + idx := strings.Index(rest, delimiter) if idx >= 0 { idx = len(prefix) + idx + len(delimiter) currPrefix := entry.name[:idx] if currPrefix == prevPrefix { continue } + prevPrefix = currPrefix versions = append(versions, ObjectInfo{ IsDir: true, Bucket: bucket, Name: currPrefix, }) + continue } } @@ -531,11 +591,12 @@ func (m *metaCacheEntriesSorted) fileInfoVersions(bucket, prefix, delimiter, aft afterV = "" } + versioned := vcfg != nil && vcfg.Versioned(entry.name) + for _, version := range fiVersions { if !version.VersionPurgeStatus().Empty() { continue } - versioned := vcfg != nil && vcfg.Versioned(entry.name) versions = append(versions, version.ToObjectInfo(bucket, entry.name, versioned)) } @@ -546,15 +607,19 @@ func (m *metaCacheEntriesSorted) fileInfoVersions(bucket, prefix, delimiter, aft if delimiter == "" { continue } - idx := strings.Index(strings.TrimPrefix(entry.name, prefix), delimiter) + rest := strings.TrimPrefix(entry.name, prefix) + + idx := strings.Index(rest, delimiter) if idx < 0 { continue } + idx = len(prefix) + idx + len(delimiter) currPrefix := entry.name[:idx] if currPrefix == prevPrefix { continue } + prevPrefix = currPrefix versions = append(versions, ObjectInfo{ IsDir: true, @@ -575,16 +640,24 @@ func (m *metaCacheEntriesSorted) fileInfos(bucket, prefix, delimiter string) (ob vcfg, _ := globalBucketVersioningSys.Get(bucket) - for _, entry := range m.o { + for i := range m.o { + // avoid copy + entry := &m.o[i] + + if prefix != "" && !strings.HasPrefix(entry.name, prefix) { + continue + } if entry.isObject() { if delimiter != "" { - idx := strings.Index(strings.TrimPrefix(entry.name, prefix), delimiter) + rest := strings.TrimPrefix(entry.name, prefix) + idx := strings.Index(rest, delimiter) if idx >= 0 { idx = len(prefix) + idx + len(delimiter) currPrefix := entry.name[:idx] if currPrefix == prevPrefix { continue } + prevPrefix = currPrefix objects = append(objects, ObjectInfo{ IsDir: true, @@ -606,7 +679,8 @@ func (m *metaCacheEntriesSorted) fileInfos(bucket, prefix, delimiter string) (ob if delimiter == "" { continue } - idx := strings.Index(strings.TrimPrefix(entry.name, prefix), delimiter) + rest := strings.TrimPrefix(entry.name, prefix) + idx := strings.Index(rest, delimiter) if idx < 0 { continue } @@ -627,7 +701,9 @@ func (m *metaCacheEntriesSorted) fileInfos(bucket, prefix, delimiter string) (ob return objects } -// forwardTo will truncate m so only entries that are s or after is in the list. +// forwardTo will truncate m so only entries that are s or after are in the list. +// +// Requires m.o to be sorted by name. func (m *metaCacheEntriesSorted) forwardTo(s string) { if s == "" { return @@ -635,11 +711,13 @@ func (m *metaCacheEntriesSorted) forwardTo(s string) { idx := sort.Search(len(m.o), func(i int) bool { return m.o[i].name >= s }) - if m.reuse { - for i, entry := range m.o[:idx] { - metaDataPoolPut(entry.metadata) - m.o[i].metadata = nil + for i := range m.o[:idx] { + if m.reuse && cap(m.o[i].metadata) >= metaDataReadDefault { + metaDataPoolPut(m.o[i].metadata) } + // Clear references so skipped entries do not stay alive through + // the backing array. + m.o[i] = metaCacheEntry{} } m.o = m.o[idx:] @@ -662,21 +740,50 @@ func (m *metaCacheEntriesSorted) forwardPast(s string) { m.o = m.o[idx:] } -// mergeEntryChannels will merge entries from in and return them sorted on out. +// mergeEntryChannels will merge sorted entries from in and return them sorted on out. // To signify no more results are on an input channel, close it. // The output channel will be closed when all inputs are emptied. -// If file names are equal, compareMeta is called to select which one to choose. +// If file names are equal, metadata is merged/resolved. // The entry not chosen will be discarded. // If the context is canceled the function will return the error, // otherwise the function will return nil. +// +// Each input channel must produce entries sorted by name. func mergeEntryChannels(ctx context.Context, in []chan metaCacheEntry, out chan<- metaCacheEntry, readQuorum int) error { defer xioutil.SafeClose(out) - top := make([]*metaCacheEntry, len(in)) - nDone := 0 + ctxDone := ctx.Done() - // Use simpler forwarder. + releaseEntry := func(e *metaCacheEntry) { + if e == nil { + return + } + if e.reusable && e.metadata != nil { + metaDataPoolPut(e.metadata) + } + e.metadata = nil + e.cached = nil + } + + // Do not use path.Clean here. Object keys may legally contain repeated + // slashes or "." / ".." segments. We only want to treat "name" and "name/" + // as the same logical listing position for dir/object conflict handling. + sameListedName := func(a, b string) bool { + if a == b { + return true + } + return strings.TrimSuffix(a, slashSeparator) == strings.TrimSuffix(b, slashSeparator) + } + + if len(in) == 0 { + return nil + } + + // Simple forwarder for one input. if len(in) == 1 { + if in[0] == nil { + return nil + } for { select { case <-ctxDone: @@ -687,6 +794,7 @@ func mergeEntryChannels(ctx context.Context, in []chan metaCacheEntry, out chan< } select { case <-ctxDone: + releaseEntry(&v) return ctx.Err() case out <- v: } @@ -694,201 +802,334 @@ func mergeEntryChannels(ctx context.Context, in []chan metaCacheEntry, out chan< } } - selectFrom := func(idx int) error { + top := make([]*metaCacheEntry, len(in)) + done := make([]bool, len(in)) + nDone := 0 + + releaseTop := func(idx int) { + releaseEntry(top[idx]) + top[idx] = nil + } + + defer func() { + for i := range top { + releaseTop(i) + } + }() + + readTop := func(idx int) error { + if done[idx] { + top[idx] = nil + return nil + } + + if in[idx] == nil { + done[idx] = true + top[idx] = nil + nDone++ + return nil + } + select { case <-ctxDone: return ctx.Err() case entry, ok := <-in[idx]: if !ok { top[idx] = nil + done[idx] = true nDone++ - } else { - top[idx] = &entry + return nil } + top[idx] = &entry + return nil } - return nil } - // Populate all... + + discardAndReadTop := func(idx int) error { + releaseTop(idx) + return readTop(idx) + } + + // Populate initial heads. for i := range in { - if err := selectFrom(i); err != nil { + if err := readTop(i); err != nil { return err } } last := "" - var toMerge []int + toMerge := make([]int, 0, len(in)-1) - // Choose the best to return. for { if nDone == len(in) { return nil } - best := top[0] - bestIdx := 0 + var best *metaCacheEntry + bestIdx := -1 toMerge = toMerge[:0] - for i, other := range top[1:] { - otherIdx := i + 1 + + for i, other := range top { if other == nil { continue } if best == nil { best = other - bestIdx = otherIdx + bestIdx = i continue } - if path.Clean(best.name) == path.Clean(other.name) { - // We may be in a situation where we have a directory and an object with the same name. - // In that case we will drop the directory entry. - // This should however not be confused with an object with a trailing slash. + if sameListedName(best.name, other.name) { + // We may have a synthetic directory and an object with the + // same logical listing name. Drop the synthetic directory. dirMatches := best.isDir() == other.isDir() suffixMatches := strings.HasSuffix(best.name, slashSeparator) == strings.HasSuffix(other.name, slashSeparator) - // Simple case. Both are same type with same suffix. + // Same type and same slash suffix: resolve/merge them. if dirMatches && suffixMatches { - toMerge = append(toMerge, otherIdx) + toMerge = append(toMerge, i) continue } if !dirMatches { - // We have an object `name` or 'name/' and a directory `name/`. if other.isDir() { if serverDebugLog { console.Debugln("mergeEntryChannels: discarding directory", other.name, "for object", best.name) } - // Discard the directory. - if err := selectFrom(otherIdx); err != nil { + if err := discardAndReadTop(i); err != nil { return err } continue } - // Replace directory with object. if serverDebugLog { console.Debugln("mergeEntryChannels: discarding directory", best.name, "for object", other.name) } toMerge = toMerge[:0] best = other - bestIdx = otherIdx + bestIdx = i continue } - // Leave it to be resolved. Names are different. + + // Same dir/object type but different slash suffix. + // Leave normal lexical ordering to decide. } if best.name > other.name { toMerge = toMerge[:0] best = other - bestIdx = otherIdx + bestIdx = i } } + if best == nil { + return nil + } - // Merge any unmerged if len(toMerge) > 0 { - versions := make([][]xlMetaV2ShallowVersion, 0, len(toMerge)+1) - xl, err := best.xlmeta() - if err == nil { - versions = append(versions, xl.versions) - } - for _, idx := range toMerge { - other := top[idx] - if other == nil { - continue - } - xl2, err := other.xlmeta() - if err != nil { - if err := selectFrom(idx); err != nil { + // Duplicate synthetic directories: keep one, discard the rest. + if best.isDir() { + for _, idx := range toMerge { + if err := discardAndReadTop(idx); err != nil { return err } - continue } - if xl == nil { - // Discard current "best" - if err := selectFrom(bestIdx); err != nil { + toMerge = toMerge[:0] + } else { + versions := make([][]xlMetaV2ShallowVersion, 0, len(toMerge)+1) + xl, err := best.xlmeta() + if err == nil { + versions = append(versions, xl.versions) + } else { + xl = nil + } + for _, idx := range toMerge { + other := top[idx] + if other == nil { + continue + } + + xlOther, err := other.xlmeta() + if err != nil { + if err := discardAndReadTop(idx); err != nil { + return err + } + continue + } + if xl == nil { + // Current best was invalid. Discard it and promote + // the first valid duplicate. + if err := discardAndReadTop(bestIdx); err != nil { + return err + } + best = other + bestIdx = idx + xl = xlOther + } else { + if err := discardAndReadTop(idx); err != nil { + return err + } + } + versions = append(versions, xlOther.versions) + } + + if xl == nil || len(versions) == 0 { + if err := discardAndReadTop(bestIdx); err != nil { return err } - bestIdx = idx - best = other - xl = xl2 - } else { - // Mark read, unless we added it as new "best". - if err := selectFrom(idx); err != nil { + continue + } + + mergedVersions := mergeXLV2Versions(readQuorum, true, 0, versions...) + // no longer emits best when duplicate metadata merge fails to satisfy quorum + if len(mergedVersions) == 0 { + if err := discardAndReadTop(bestIdx); err != nil { return err } + continue } - versions = append(versions, xl2.versions) - } - if xl != nil && len(versions) > 0 { - // Merge all versions. 'strict' doesn't matter since we only need one. - xl.versions = mergeXLV2Versions(readQuorum, true, 0, versions...) - if meta, err := xl.AppendTo(metaDataPoolGet()); err == nil { - if best.reusable { - metaDataPoolPut(best.metadata) + xl.versions = mergedVersions + + buf := metaDataPoolGet() + meta, err := xl.AppendTo(buf) + if err != nil { + metaDataPoolPut(buf) + bugLogIf(context.Background(), err) + + if err := discardAndReadTop(bestIdx); err != nil { + return err } - best.metadata = meta - best.cached = xl + continue } + if best.reusable && best.metadata != nil { + metaDataPoolPut(best.metadata) + } + + best.metadata = meta + best.cached = xl + best.reusable = true } - toMerge = toMerge[:0] } + if best.name > last { select { case <-ctxDone: return ctx.Err() case out <- *best: last = best.name + + // Ownership of best.metadata has moved to the receiver. + // Clear top before readTop so deferred cleanup does not + // return metadata that was already sent. + top[bestIdx] = nil } - } else if serverDebugLog { + if err := readTop(bestIdx); err != nil { + return err + } + continue + } + + if serverDebugLog { console.Debugln("mergeEntryChannels: discarding duplicate", best.name, "<=", last) } - // Replace entry we just sent. - if err := selectFrom(bestIdx); err != nil { + + if err := discardAndReadTop(bestIdx); err != nil { return err } } } // merge will merge other into m. -// If the same entries exists in both and metadata matches only one is added, -// otherwise the entry from m will be placed first. -// Operation time is expected to be O(n+m). +// If the same entry exists in both and metadata matches, only one is added. +// If names are equal but metadata differs, the entry from m is placed first. +// Operation time is expected to be O(n+m), or O(limit) if limit > 0. func (m *metaCacheEntriesSorted) merge(other metaCacheEntriesSorted, limit int) { - merged := make(metaCacheEntries, 0, m.len()+other.len()) + if limit == 0 { + m.o = nil + return + } + a := m.entries() b := other.entries() + capHint := len(a) + len(b) + if limit > 0 && capHint > limit { + capHint = limit + } + + merged := make(metaCacheEntries, 0, capHint) + appendOne := func(entry metaCacheEntry) bool { + if limit > 0 && len(merged) >= limit { + return false + } + merged = append(merged, entry) + return true + } for len(a) > 0 && len(b) > 0 { + if limit > 0 && len(merged) >= limit { + break + } switch { - case a[0].name == b[0].name && bytes.Equal(a[0].metadata, b[0].metadata): - // Same, discard one. - merged = append(merged, a[0]) + case a[0].name == b[0].name: + if bytes.Equal(a[0].metadata, b[0].metadata) { + // Same entry, discard one. + if !appendOne(a[0]) { + break + } + a = a[1:] + b = b[1:] + continue + } + // Same name, different metadata. + // Preserve m first, as documented. + if !appendOne(a[0]) { + break + } a = a[1:] - b = b[1:] case a[0].name < b[0].name: - merged = append(merged, a[0]) + if !appendOne(a[0]) { + break + } a = a[1:] default: - merged = append(merged, b[0]) + if !appendOne(b[0]) { + break + } b = b[1:] } - if limit > 0 && len(merged) >= limit { - break - } } - // Append anything left. - if limit < 0 || len(merged) < limit { - merged = append(merged, a...) - merged = append(merged, b...) + for len(a) > 0 && (limit < 0 || len(merged) < limit) { + merged = append(merged, a[0]) + a = a[1:] + } + for len(b) > 0 && (limit < 0 || len(merged) < limit) { + merged = append(merged, b[0]) + b = b[1:] } m.o = merged } // filterPrefix will filter m to only contain entries with the specified prefix. +// +// Requires m.o to be sorted by name. func (m *metaCacheEntriesSorted) filterPrefix(s string) { - if s == "" { + if m == nil || s == "" { return } + m.forwardTo(s) - for i, o := range m.o { - if !o.hasPrefix(s) { - m.o = m.o[:i] - break + for i := range m.o { + if m.o[i].hasPrefix(s) { + continue } + if m.reuse { + for j := i; j < len(m.o); j++ { + if cap(m.o[j].metadata) >= metaDataReadDefault { + metaDataPoolPut(m.o[j].metadata) + } + m.o[j] = metaCacheEntry{} + } + } else { + for j := i; j < len(m.o); j++ { + m.o[j] = metaCacheEntry{} + } + } + m.o = m.o[:i] + return } } @@ -896,11 +1137,16 @@ func (m *metaCacheEntriesSorted) filterPrefix(s string) { // Order is preserved, but the underlying slice is modified. func (m *metaCacheEntriesSorted) filterObjectsOnly() { dst := m.o[:0] - for _, o := range m.o { - if !o.isDir() { - dst = append(dst, o) + for i := range m.o { + if !m.o[i].isDir() { + dst = append(dst, m.o[i]) } } + // Clear the tail so removed entries do not remain referenced + // by the underlying array. + for i := len(dst); i < len(m.o); i++ { + m.o[i] = metaCacheEntry{} + } m.o = dst } @@ -908,10 +1154,19 @@ func (m *metaCacheEntriesSorted) filterObjectsOnly() { // Order is preserved, but the underlying slice is modified. func (m *metaCacheEntriesSorted) filterPrefixesOnly() { dst := m.o[:0] - for _, o := range m.o { - if o.isDir() { - dst = append(dst, o) + for i := range m.o { + if m.o[i].isDir() { + dst = append(dst, m.o[i]) + continue } + if m.reuse && cap(m.o[i].metadata) >= metaDataReadDefault { + metaDataPoolPut(m.o[i].metadata) + } + } + // Clear the tail so removed entries do not stay referenced + // by the underlying array. + for i := len(dst); i < len(m.o); i++ { + m.o[i] = metaCacheEntry{} } m.o = dst } @@ -921,25 +1176,47 @@ func (m *metaCacheEntriesSorted) filterPrefixesOnly() { // To return root elements only set prefix to an empty string. // Order is preserved, but the underlying slice is modified. func (m *metaCacheEntriesSorted) filterRecursiveEntries(prefix, separator string) { - dst := m.o[:0] + if separator == "" { + return + } + if separator == "" { + return + } + if prefix != "" { m.forwardTo(prefix) - for _, o := range m.o { - ext := strings.TrimPrefix(o.name, prefix) - if len(ext) != len(o.name) { - if !strings.Contains(ext, separator) { - dst = append(dst, o) - } + } + + dst := m.o[:0] + + for i := range m.o { + name := m.o[i].name + + if prefix != "" { + if !strings.HasPrefix(name, prefix) { + break } - } - } else { - // No prefix, simpler - for _, o := range m.o { - if !strings.Contains(o.name, separator) { - dst = append(dst, o) + ext := strings.TrimPrefix(name, prefix) + if ext == "" || !strings.Contains(ext, separator) { + dst = append(dst, m.o[i]) + continue } + } else { + if !strings.Contains(name, separator) { + dst = append(dst, m.o[i]) + continue + } + } + if m.reuse && cap(m.o[i].metadata) >= metaDataReadDefault { + metaDataPoolPut(m.o[i].metadata) } } + + for i := len(dst); i < len(m.o); i++ { + // Clear references held by the backing array. + m.o[i] = metaCacheEntry{} + } + m.o = dst } @@ -948,15 +1225,20 @@ func (m *metaCacheEntriesSorted) truncate(n int) { if m == nil { return } - if len(m.o) > n { - if m.reuse { - for i, entry := range m.o[n:] { - metaDataPoolPut(entry.metadata) - m.o[n+i].metadata = nil - } + if n < 0 { + n = 0 + } + if len(m.o) <= n { + return + } + for i := n; i < len(m.o); i++ { + if m.reuse && cap(m.o[i].metadata) >= metaDataReadDefault { + metaDataPoolPut(m.o[i].metadata) } - m.o = m.o[:n] + // Clear references held by the backing array. + m.o[i] = metaCacheEntry{} } + m.o = m.o[:n] } // len returns the number of objects and prefix dirs in m. diff --git a/cmd/metacache-entries_test.go b/cmd/metacache-entries_test.go index 75af4d0f27909..1aec54098cf48 100644 --- a/cmd/metacache-entries_test.go +++ b/cmd/metacache-entries_test.go @@ -124,6 +124,7 @@ func Test_metaCacheEntries_filterRecursive(t *testing.T) { data.filterRecursiveEntries("src/compress/bzip2/", slashSeparator) got := data.entries().names() want := []string{"src/compress/bzip2/", "src/compress/bzip2/bit_reader.go", "src/compress/bzip2/bzip2.go", "src/compress/bzip2/bzip2_test.go", "src/compress/bzip2/huffman.go", "src/compress/bzip2/move_to_front.go"} + // "src/compress/bzip2/", "src/compress/bzip2/bit_reader.go", "src/compress/bzip2/bzip2.go", "src/compress/bzip2/bzip2_test.go", "src/compress/bzip2/huffman.go", "src/compress/bzip2/move_to_front.go", "src/compress/bzip2/testdata/" if !reflect.DeepEqual(want, got) { t.Errorf("got unexpected result: %#v", got) } From d2254e6edc41ffd5b8bb656fb33500e4d4b6ddf0 Mon Sep 17 00:00:00 2001 From: konrad Date: Fri, 26 Jun 2026 09:48:30 +0400 Subject: [PATCH 5/8] Fix race conditions on deletion of cache --- cmd/metacache-bucket.go | 75 +++++++---- cmd/metacache-bucket_test.go | 245 +++++++++++++++++++++++++++++++++++ 2 files changed, 295 insertions(+), 25 deletions(-) diff --git a/cmd/metacache-bucket.go b/cmd/metacache-bucket.go index 4df23d8253af6..801d2423457d5 100644 --- a/cmd/metacache-bucket.go +++ b/cmd/metacache-bucket.go @@ -96,6 +96,14 @@ func (b *bucketMetacache) findCache(o listPathOptions) metacache { b.mu.Lock() defer b.mu.Unlock() + if b.caches == nil { + b.caches = make(map[string]metacache) + } + + if b.cachesRoot == nil { + b.cachesRoot = make(map[string][]string) + } + // Check if exists already. if c, ok := b.caches[o.ID]; ok { c.lastHandout = time.Now() @@ -157,16 +165,16 @@ func (b *bucketMetacache) cleanup() { } remainCaches = append(remainCaches, cache) } - if len(remainCaches) > metacacheMaxEntries { - // Sort oldest last... - sort.Slice(remainCaches, func(i, j int) bool { - return remainCaches[i].lastHandout.Before(remainCaches[j].lastHandout) - }) - // Keep first metacacheMaxEntries... - for _, cache := range remainCaches[metacacheMaxEntries:] { - if time.Since(cache.lastHandout) > metacacheMaxClientWait { - remove[cache.id] = struct{}{} - } + + // Sort oldest last... + sort.Slice(remainCaches, func(i, j int) bool { + return remainCaches[i].lastHandout.Before(remainCaches[j].lastHandout) + }) + // Remove oldest entries above the limit. + excess := len(remainCaches) - metacacheMaxEntries + for _, cache := range remainCaches[:excess] { + if time.Since(cache.lastHandout) > metacacheMaxClientWait { + remove[cache.id] = struct{}{} } } } @@ -181,6 +189,9 @@ func (b *bucketMetacache) cleanup() { func (b *bucketMetacache) updateCacheEntry(update metacache) (metacache, error) { b.mu.Lock() defer b.mu.Unlock() + if b.caches == nil { + return update, errFileNotFound + } existing, ok := b.caches[update.id] if !ok { return update, errFileNotFound @@ -227,32 +238,46 @@ func (b *bucketMetacache) deleteAll() { b.mu.Lock() defer b.mu.Unlock() - b.updated = true // Delete all. ez.deleteAll(ctx, minioMetaBucket, metacachePrefixForID(b.bucket, slashSeparator)) + b.updated = true b.caches = make(map[string]metacache, 10) b.cachesRoot = make(map[string][]string, 10) } +// for test purposes +var deleteMetacacheFiles = func(c metacache, ctx context.Context) { + c.delete(ctx) +} + // deleteCache will delete a specific cache and all files related to it across the cluster. func (b *bucketMetacache) deleteCache(id string) { b.mu.Lock() + defer b.mu.Unlock() c, ok := b.caches[id] - if ok { - // Delete from root map. - list := b.cachesRoot[c.root] - for i, lid := range list { - if id == lid { - list = append(list[:i], list[i+1:]...) - break - } + if !ok { + return + } + + // Delete from root map. + list := b.cachesRoot[c.root] + n := 0 + for _, lid := range list { + if lid != id { + list[n] = lid + n++ } - b.cachesRoot[c.root] = list - delete(b.caches, id) - b.updated = true } - b.mu.Unlock() - if ok { - c.delete(context.Background()) + list = list[:n] + + if len(list) == 0 { + delete(b.cachesRoot, c.root) + } else { + b.cachesRoot[c.root] = list } + + delete(b.caches, id) + b.updated = true + + deleteMetacacheFiles(c, context.Background()) } diff --git a/cmd/metacache-bucket_test.go b/cmd/metacache-bucket_test.go index 768d78538ab78..635bb00d53d8e 100644 --- a/cmd/metacache-bucket_test.go +++ b/cmd/metacache-bucket_test.go @@ -18,10 +18,255 @@ package cmd import ( + "context" "fmt" "testing" + "time" ) +func newCleanupTestBucket(bucket string) *bucketMetacache { + return &bucketMetacache{ + bucket: bucket, + caches: make(map[string]metacache), + cachesRoot: make(map[string][]string), + } +} + +func addCleanupTestCache(b *bucketMetacache, c metacache) { + b.caches[c.id] = c + b.cachesRoot[c.root] = append(b.cachesRoot[c.root], c.id) +} + +func cleanupTestID(i int) string { + return fmt.Sprintf("cleanup-test-%05d", i) +} + +func cleanupTestCache(bucket, id string, lastHandout, now time.Time) metacache { + return metacache{ + id: id, + bucket: bucket, + root: "root/", + started: now.Add(-time.Minute), + ended: now, + lastUpdate: now, + lastHandout: lastHandout, + status: scanStateSuccess, + } +} + +func hasRootIndex(b *bucketMetacache, root, id string) bool { + for _, got := range b.cachesRoot[root] { + if got == id { + return true + } + } + return false +} + +func Test_bucketMetacache_deleteCache_DoesNotRecreateSameIDBeforeDiskDeleteFinished(t *testing.T) { + const bucket = "delete-cache-race-bucket" + const id = "delete-cache-race-id" + const root = "root/" + + b := &bucketMetacache{ + bucket: bucket, + caches: map[string]metacache{ + id: { + id: id, + bucket: bucket, + root: root, + }, + }, + cachesRoot: map[string][]string{ + root: {id}, + }, + } + + deleteStarted := make(chan struct{}) + allowDeleteFinish := make(chan struct{}) + deleteFinished := make(chan struct{}) + deleteErr := make(chan error, 1) + + oldDeleteMetacacheFiles := deleteMetacacheFiles + deleteMetacacheFiles = func(c metacache, ctx context.Context) { + if c.id != id { + deleteErr <- fmt.Errorf("deleted cache id = %q, want %q", c.id, id) + return + } + + close(deleteStarted) + <-allowDeleteFinish + close(deleteFinished) + } + defer func() { + deleteMetacacheFiles = oldDeleteMetacacheFiles + }() + + go b.deleteCache(id) + + select { + case <-deleteStarted: + case err := <-deleteErr: + t.Fatal(err) + case <-time.After(time.Second): + t.Fatal("deleteCache did not start disk delete") + } + + recreated := make(chan metacache, 1) + + go func() { + recreated <- b.findCache(listPathOptions{ + Bucket: bucket, + ID: id, + Create: true, + }) + }() + + select { + case c := <-recreated: + close(allowDeleteFinish) + + select { + case <-deleteFinished: + case <-time.After(time.Second): + t.Fatal("disk delete did not finish") + } + + t.Fatalf("findCache recreated cache %q before old disk delete finished: %+v", id, c) + + case <-time.After(50 * time.Millisecond): + // Expected after the fix: findCache must not be able to recreate the same + // ID while old on-disk files are still being deleted. + } + + close(allowDeleteFinish) + + select { + case <-deleteFinished: + case <-time.After(time.Second): + t.Fatal("disk delete did not finish after being released") + } + + select { + case c := <-recreated: + if c.id != id { + t.Fatalf("recreated cache id = %q, want %q", c.id, id) + } + case <-time.After(time.Second): + t.Fatal("findCache did not complete after disk delete finished") + } +} + +func Test_bucketMetacache_cleanup_OverLimitRemovesOldestCaches(t *testing.T) { + const bucket = "cleanup-test-bucket" + + now := time.Now().Truncate(time.Second) + b := newCleanupTestBucket(bucket) + + total := metacacheMaxEntries + 2 + excess := total - metacacheMaxEntries + + // Spread all handout times between: + // > metacacheMaxClientWait + // and + // < 5 * metacacheMaxClientWait + // + // This makes every entry: + // 1. still worthKeeping() + // 2. old enough to be eligible for limit-based eviction + // + // ID 0 is the oldest, last ID is the newest. + step := (2 * metacacheMaxClientWait) / time.Duration(total+1) + if step <= 0 { + step = time.Nanosecond + } + + for i := 0; i < total; i++ { + id := cleanupTestID(i) + + age := metacacheMaxClientWait + step*time.Duration(total-i) + lastHandout := now.Add(-age) + + addCleanupTestCache(b, cleanupTestCache(bucket, id, lastHandout, now)) + } + + b.cleanup() + + if got, want := len(b.caches), metacacheMaxEntries; got != want { + t.Fatalf("cache count after cleanup = %d, want %d", got, want) + } + + for i := 0; i < excess; i++ { + id := cleanupTestID(i) + + if _, ok := b.caches[id]; ok { + t.Fatalf("oldest cache %q was kept, want removed", id) + } + if hasRootIndex(b, "root/", id) { + t.Fatalf("oldest cache %q still present in cachesRoot index", id) + } + } + + for i := excess; i < total; i++ { + id := cleanupTestID(i) + + if _, ok := b.caches[id]; !ok { + t.Fatalf("cache %q was removed, want kept", id) + } + if !hasRootIndex(b, "root/", id) { + t.Fatalf("cache %q missing from cachesRoot index", id) + } + } +} + +func Test_bucketMetacache_cleanup_OverLimitDoesNotOnlyLookAtNewestCaches(t *testing.T) { + const bucket = "cleanup-test-bucket" + + now := time.Now() + b := newCleanupTestBucket(bucket) + + total := metacacheMaxEntries + 2 + excess := total - metacacheMaxEntries + + for i := 0; i < total; i++ { + id := cleanupTestID(i) + + var lastHandout time.Time + if i < excess { + // These are the only entries old enough to be evicted due to the limit. + lastHandout = now.Add(-(metacacheMaxClientWait + metacacheMaxClientWait/2 + time.Duration(i)*time.Millisecond)) + } else { + // These entries are newer than metacacheMaxClientWait and should not be + // selected for limit-based eviction. + lastHandout = now.Add(-metacacheMaxClientWait / 2) + } + + addCleanupTestCache(b, cleanupTestCache(bucket, id, lastHandout, now)) + } + + b.cleanup() + + if got, want := len(b.caches), metacacheMaxEntries; got != want { + t.Fatalf("cache count after cleanup = %d, want %d; cleanup probably inspected newest entries instead of oldest", got, want) + } + + for i := 0; i < excess; i++ { + id := cleanupTestID(i) + + if _, ok := b.caches[id]; ok { + t.Fatalf("old eligible cache %q was kept, want removed", id) + } + } + + for i := excess; i < total; i++ { + id := cleanupTestID(i) + + if _, ok := b.caches[id]; !ok { + t.Fatalf("recent cache %q was removed, want kept", id) + } + } +} + func Benchmark_bucketMetacache_findCache(b *testing.B) { bm := newBucketMetacache("", false) const elements = 50000 From ab9a19d67cc47030c97397cc74ab60e8751c0308 Mon Sep 17 00:00:00 2001 From: konrad Date: Fri, 26 Jun 2026 17:36:41 +0400 Subject: [PATCH 6/8] More test for metacache-server-pool and a bunch of fixes --- cmd/metacache-server-pool.go | 289 +++++--- cmd/metacache-server-pool_test.go | 1018 +++++++++++++++++++++++++++++ 2 files changed, 1231 insertions(+), 76 deletions(-) create mode 100644 cmd/metacache-server-pool_test.go diff --git a/cmd/metacache-server-pool.go b/cmd/metacache-server-pool.go index bcdfed8f10ff4..4d56dab20e55c 100644 --- a/cmd/metacache-server-pool.go +++ b/cmd/metacache-server-pool.go @@ -28,28 +28,64 @@ import ( "sync" "time" + "github.com/minio/minio/internal/bucket/lifecycle" "github.com/minio/minio/internal/grid" xioutil "github.com/minio/minio/internal/ioutil" ) +var listMergedFn = func( + z *erasureServerPools, + ctx context.Context, + o listPathOptions, + filterCh chan<- metaCacheEntry, +) error { + return z.listMerged(ctx, o, filterCh) +} + func renameAllBucketMetacache(epPath string) error { // Rename all previous `.minio.sys/buckets//.metacache` to // to `.minio.sys/tmp/` for deletion. return readDirFn(pathJoin(epPath, minioMetaBucket, bucketMetaPrefix), func(name string, typ os.FileMode) error { - if typ == os.ModeDir { - tmpMetacacheOld := pathutil.Join(epPath, minioMetaTmpDeletedBucket, mustGetUUID()) - if err := renameAll(pathJoin(epPath, minioMetaBucket, metacachePrefixForID(name, slashSeparator)), - tmpMetacacheOld, epPath); err != nil && err != errFileNotFound { - return fmt.Errorf("unable to rename (%s -> %s) %w", - pathJoin(epPath, minioMetaBucket+metacachePrefixForID(minioMetaBucket, slashSeparator)), - tmpMetacacheOld, - osErrToFileErr(err)) - } + if typ&os.ModeDir == 0 { + return nil + } + srcMetacacheOld := pathJoin(epPath, minioMetaBucket, metacachePrefixForID(name, slashSeparator)) + tmpMetacacheOld := pathutil.Join(epPath, minioMetaTmpDeletedBucket, mustGetUUID()) + + if err := renameAll(srcMetacacheOld, tmpMetacacheOld, epPath); !errors.Is(err, errFileNotFound) { + return fmt.Errorf("unable to rename (%s -> %s) %w", + srcMetacacheOld, + tmpMetacacheOld, + osErrToFileErr(err), + ) } return nil }) } +var markMetacacheListingUnused = func(o listPathOptions) { + rpc := globalNotificationSys.restClientFromHash(pathJoin(o.Bucket, o.Prefix)) + if rpc == nil { + return + } + + ctx, cancel := context.WithTimeout(GlobalContext, 5*time.Second) + defer cancel() + + c, err := rpc.GetMetacacheListing(ctx, o) + if err == nil && c != nil { + c.error = "no longer used" + c.status = scanStateError + rpc.UpdateMetacacheListing(ctx, *c) + } +} + +func markMetacacheListingUnusedAsyncAndClearID(o *listPathOptions) { + opts := *o + go markMetacacheListingUnused(opts) + o.ID = "" +} + // listPath will return the requested entries. // If no more entries are in the listing io.EOF is returned, // otherwise nil or an unexpected error is returned. @@ -191,20 +227,7 @@ func (z *erasureServerPools) listPath(ctx context.Context, o *listPathOptions) ( return entries, err } entries.truncate(0) - go func() { - rpc := globalNotificationSys.restClientFromHash(pathJoin(o.Bucket, o.Prefix)) - if rpc != nil { - ctx, cancel := context.WithTimeout(GlobalContext, 5*time.Second) - defer cancel() - c, err := rpc.GetMetacacheListing(ctx, *o) - if err == nil { - c.error = "no longer used" - c.status = scanStateError - rpc.UpdateMetacacheListing(ctx, *c) - } - } - }() - o.ID = "" + markMetacacheListingUnusedAsyncAndClearID(o) } if contextCanceled(ctx) { @@ -228,7 +251,7 @@ func (z *erasureServerPools) listPath(ctx context.Context, o *listPathOptions) ( // listing exactly at a specific limit. o.StopDiskAtLimit = true } - listErr = z.listMerged(listCtx, o, filterCh) + listErr = listMergedFn(z, listCtx, o, filterCh) o.debugln("listMerged returned with", listErr) }(*o) @@ -254,6 +277,17 @@ func (z *erasureServerPools) listPath(ctx context.Context, o *listPathOptions) ( return entries, nil } +var listPathOnSetFn = func( + set *erasureObjects, + ctx context.Context, + o listPathOptions, + results chan<- metaCacheEntry, +) error { + return set.listPath(ctx, o, results) +} + +var mergeEntryChannelsFn = mergeEntryChannels + // listMerged will list across all sets and return a merged results stream. // The result channel is closed when no more results are expected. func (z *erasureServerPools) listMerged(ctx context.Context, o listPathOptions, results chan<- metaCacheEntry) error { @@ -273,7 +307,7 @@ func (z *erasureServerPools) listMerged(ctx context.Context, o listPathOptions, inputs = append(inputs, innerResults) go func(i int, set *erasureObjects) { defer wg.Done() - err := set.listPath(listCtx, o, innerResults) + err := listPathOnSetFn(set, listCtx, o, innerResults) mu.Lock() defer mu.Unlock() if err == nil { @@ -288,12 +322,16 @@ func (z *erasureServerPools) listMerged(ctx context.Context, o listPathOptions, // Gather results to a single channel. // Quorum is one since we are merging across sets. - err := mergeEntryChannels(ctx, inputs, results, 1) + err := mergeEntryChannelsFn(ctx, inputs, results, 1) cancelList() wg.Wait() // we should return 'errs' from per disk + if err != nil { + return err + } + if isAllNotFound(errs) { if isAllVolumeNotFound(errs) { return errVolumeNotFound @@ -301,10 +339,6 @@ func (z *erasureServerPools) listMerged(ctx context.Context, o listPathOptions, return nil } - if err != nil { - return err - } - if contextCanceled(ctx) { return ctx.Err() } @@ -326,6 +360,26 @@ func (z *erasureServerPools) listMerged(ctx context.Context, o listPathOptions, return nil } +var metaCacheEntryFileInfoFn = func(obj metaCacheEntry, bucket string) (FileInfo, error) { + return obj.fileInfo(bucket) +} + +var metaCacheEntryFileInfoVersionsFn = func(obj metaCacheEntry, bucket string) (FileInfoVersions, error) { + return obj.fileInfoVersions(bucket) +} + +var evalLifecycleForListObjectFn = func(ctx context.Context, o listPathOptions, objInfo ObjectInfo) lifecycle.Event { + return evalActionFromLifecycle(ctx, *o.Lifecycle, o.Retention, o.Replication.Config, objInfo) +} + +var enqueueExpiryByDaysFn = func(objInfo ObjectInfo, evt lifecycle.Event, src lcEventSrc) { + globalExpiryState.enqueueByDays(objInfo, evt, src) +} + +var queueReplicationHealFn = func(ctx context.Context, o listPathOptions, objInfo ObjectInfo) { + queueReplicationHeal(ctx, o.Bucket, objInfo, o.Replication, 0) +} + // triggerExpiryAndRepl applies lifecycle and replication actions on the listing // It returns true if the listing is non-versioned and the given object is expired. func triggerExpiryAndRepl(ctx context.Context, o listPathOptions, obj metaCacheEntry) (skip bool) { @@ -336,19 +390,19 @@ func triggerExpiryAndRepl(ctx context.Context, o listPathOptions, obj metaCacheE // filter out between versions 'obj' cannot be truncated // in such a manner, so look for skipping an object only // for regular ListObjects() call only. - if !o.Versioned && !o.V1 { - fi, err := obj.fileInfo(o.Bucket) + if !o.Versioned { + fi, err := metaCacheEntryFileInfoFn(obj, o.Bucket) if err != nil { return skip } objInfo := fi.ToObjectInfo(o.Bucket, obj.name, versioned) if o.Lifecycle != nil { - act := evalActionFromLifecycle(ctx, *o.Lifecycle, o.Retention, o.Replication.Config, objInfo).Action + act := evalLifecycleForListObjectFn(ctx, o, objInfo).Action skip = act.Delete() && !act.DeleteRestored() } } - fiv, err := obj.fileInfoVersions(o.Bucket) + fiv, err := metaCacheEntryFileInfoVersionsFn(obj, o.Bucket) if err != nil { return skip } @@ -358,23 +412,94 @@ func triggerExpiryAndRepl(ctx context.Context, o listPathOptions, obj metaCacheE objInfo := version.ToObjectInfo(o.Bucket, obj.name, versioned) if o.Lifecycle != nil { - evt := evalActionFromLifecycle(ctx, *o.Lifecycle, o.Retention, o.Replication.Config, objInfo) + evt := evalLifecycleForListObjectFn(ctx, o, objInfo) if evt.Action.Delete() { - globalExpiryState.enqueueByDays(objInfo, evt, lcEventSrc_s3ListObjects) + enqueueExpiryByDaysFn(objInfo, evt, lcEventSrc_s3ListObjects) if !evt.Action.DeleteRestored() { continue } // queue version for replication upon expired restored copies if needed. } } - queueReplicationHeal(ctx, o.Bucket, objInfo, o.Replication, 0) + queueReplicationHealFn(ctx, o, objInfo) } return skip } +// seams for tests +var listAndSaveGetAvailablePoolIdxFn = func(z *erasureServerPools, ctx context.Context, bucket string, object string, size int64) int { + return z.getAvailablePoolIdx(ctx, bucket, object, size) +} + +var listAndSaveGetHashedSetIndexFn = func(pool *erasureSets, id string) int { + return pool.getHashedSetIndex(id) +} + +var listAndSaveSaveMetaCacheStreamFn = func(saver *erasureObjects, ctx context.Context, meta *metaCacheRPC, saveCh <-chan metaCacheEntry) error { + return saver.saveMetaCacheStream(ctx, meta, saveCh) +} + +var listAndSaveListMergedFn = func(z *erasureServerPools, ctx context.Context, o listPathOptions, results chan<- metaCacheEntry) error { + return z.listMerged(ctx, o, results) +} + +var listAndSaveRestClientFromHashFn = func(hash string) *peerRESTClient { + return globalNotificationSys.restClientFromHash(hash) +} + +// helper +func listAndSaveFanOut( + ctx context.Context, + listCtx context.Context, + o listPathOptions, + inCh <-chan metaCacheEntry, + outCh chan<- metaCacheEntry, + saveCh chan<- metaCacheEntry, + funcReturned *bool, + funcReturnedMu *sync.Mutex, +) { + var returned bool + + defer xioutil.SafeClose(saveCh) + + for entry := range inCh { + if o.shouldSkip(ctx, entry) { + continue + } + + if !returned { + funcReturnedMu.Lock() + returned = *funcReturned + funcReturnedMu.Unlock() + + if returned { + xioutil.SafeClose(outCh) + } else { + select { + case outCh <- entry: + case <-listCtx.Done(): + return + } + } + } + + entry.reusable = returned + + select { + case saveCh <- entry: + case <-listCtx.Done(): + return + } + } + + if !returned { + xioutil.SafeClose(outCh) + } +} + func (z *erasureServerPools) listAndSave(ctx context.Context, o *listPathOptions) (entries metaCacheEntriesSorted, err error) { // Use ID as the object name... - o.pool = z.getAvailablePoolIdx(ctx, minioMetaBucket, o.ID, 10<<20) + o.pool = listAndSaveGetAvailablePoolIdxFn(z, ctx, minioMetaBucket, o.ID, 10<<20) if o.pool < 0 { // No space or similar, don't persist the listing. o.pool = 0 @@ -383,11 +508,13 @@ func (z *erasureServerPools) listAndSave(ctx context.Context, o *listPathOptions o.Transient = true return entries, errDiskFull } - o.set = z.serverPools[o.pool].getHashedSetIndex(o.ID) + o.set = listAndSaveGetHashedSetIndexFn(z.serverPools[o.pool], o.ID) saver := z.serverPools[o.pool].sets[o.set] - // Disconnect from call above, but cancel on exit. + // Disconnect from request context so the metacache save can continue + // after this call has returned enough entries to the caller. listCtx, cancel := context.WithCancel(GlobalContext) + saveCh := make(chan metaCacheEntry, metacacheBlockSize) inCh := make(chan metaCacheEntry, metacacheBlockSize) outCh := make(chan metaCacheEntry, o.Limit) @@ -395,62 +522,72 @@ func (z *erasureServerPools) listAndSave(ctx context.Context, o *listPathOptions filteredResults := o.gatherResults(ctx, outCh) mc := o.newMetacache() - meta := metaCacheRPC{meta: &mc, cancel: cancel, rpc: globalNotificationSys.restClientFromHash(pathJoin(o.Bucket, o.Prefix)), o: *o} + meta := metaCacheRPC{ + meta: &mc, + cancel: cancel, + rpc: listAndSaveRestClientFromHashFn(pathJoin(o.Bucket, o.Prefix)), + o: *o, + } // Save listing... go func() { - if err := saver.saveMetaCacheStream(listCtx, &meta, saveCh); err != nil { + defer cancel() + if err := listAndSaveSaveMetaCacheStreamFn(saver, listCtx, &meta, saveCh); err != nil { + meta.setErr(err.Error()) + } + }() + + saveDone := make(chan struct{}) + go func() { + defer close(saveDone) + defer cancel() + + if err := listAndSaveSaveMetaCacheStreamFn(saver, listCtx, &meta, saveCh); err != nil { meta.setErr(err.Error()) } - cancel() }() // Do listing... + listErrCh := make(chan error, 1) + go func(o listPathOptions) { - err := z.listMerged(listCtx, o, inCh) + err := listAndSaveListMergedFn(z, listCtx, o, inCh) if err != nil { meta.setErr(err.Error()) } + o.debugln("listAndSave: listing", o.ID, "finished with ", err) + + listErrCh <- err + close(listErrCh) }(*o) // Keep track of when we return since we no longer have to send entries to output. var funcReturned bool var funcReturnedMu sync.Mutex - defer func() { - funcReturnedMu.Lock() - funcReturned = true - funcReturnedMu.Unlock() - }() - // Write listing to results and saver. - go func() { - var returned bool - for entry := range inCh { - if o.shouldSkip(ctx, entry) { - continue - } - if !returned { - funcReturnedMu.Lock() - returned = funcReturned - funcReturnedMu.Unlock() - outCh <- entry - if returned { - xioutil.SafeClose(outCh) - } - } - entry.reusable = returned - saveCh <- entry - } - if !returned { - xioutil.SafeClose(outCh) - } - xioutil.SafeClose(saveCh) - }() + + go listAndSaveFanOut( + ctx, listCtx, *o, + inCh, outCh, saveCh, + &funcReturned, &funcReturnedMu, + ) entries, err = filteredResults() - if err == nil { - // Check if listing recorded an error. - err = meta.getErr() + + funcReturnedMu.Lock() + funcReturned = true + funcReturnedMu.Unlock() + + if errors.Is(err, io.EOF) { + if listErr := <-listErrCh; listErr != nil { + return entries, listErr + } } + if metaErr := meta.getErr(); metaErr != nil { + if err == nil || errors.Is(err, io.EOF) { + err = metaErr + } + } + return entries, err } diff --git a/cmd/metacache-server-pool_test.go b/cmd/metacache-server-pool_test.go new file mode 100644 index 0000000000000..5a24b37881da3 --- /dev/null +++ b/cmd/metacache-server-pool_test.go @@ -0,0 +1,1018 @@ +package cmd + +import ( + "context" + "errors" + "io" + "strings" + "sync" + "testing" + "time" + + "github.com/minio/minio/internal/bucket/lifecycle" +) + +func rawListPathOptionsForErrorTest() listPathOptions { + return listPathOptions{ + Bucket: "testbucket", + Prefix: "photos/", + Separator: slashSeparator, + Recursive: false, + Limit: 100, + + // Important: avoid metacache/RPC branches. + Transient: true, + Create: true, + } +} + +func Test_erasureServerPools_listPath_InvalidBucketReturnsError(t *testing.T) { + z := &erasureServerPools{} + + o := listPathOptions{ + Bucket: "", + Prefix: "photos/", + Limit: 100, + } + + entries, err := z.listPath(context.Background(), &o) + if err == nil { + t.Fatal("expected error for invalid bucket, got nil") + } + + if errors.Is(err, io.EOF) { + t.Fatalf("expected validation error, got io.EOF") + } + + if entries.len() != 0 { + t.Fatalf("entries length = %d, want 0", entries.len()) + } +} + +func Test_erasureServerPools_listPath_CanceledContextReturnsCanceled(t *testing.T) { + z := &erasureServerPools{} + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + o := rawListPathOptionsForErrorTest() + + entries, err := z.listPath(ctx, &o) + if !errors.Is(err, context.Canceled) { + t.Fatalf("listPath error = %v, want context.Canceled", err) + } + + if entries.len() != 0 { + t.Fatalf("entries length = %d, want 0", entries.len()) + } +} + +func Test_erasureServerPools_listPath_EarlyReturnsEOF(t *testing.T) { + z := &erasureServerPools{} + ctx := context.Background() + + tests := []struct { + name string + opts listPathOptions + }{ + { + name: "limit zero", + opts: listPathOptions{ + Bucket: "testbucket", + Prefix: "photos/", + Limit: 0, + }, + }, + { + name: "marker outside prefix", + opts: listPathOptions{ + Bucket: "testbucket", + Prefix: "a/", + Marker: "b/object", + Limit: 100, + }, + }, + { + name: "prefix starts with slash", + opts: listPathOptions{ + Bucket: "testbucket", + Prefix: SlashSeparator, + Limit: 100, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + entries, err := z.listPath(ctx, &tt.opts) + + if !errors.Is(err, io.EOF) { + t.Fatalf("listPath error = %v, want io.EOF", err) + } + + if entries.len() != 0 { + t.Fatalf("entries length = %d, want 0", entries.len()) + } + }) + } +} + +func Test_erasureServerPools_listPath_ListMergedDeadlineExceededIsReturned(t *testing.T) { + z := &erasureServerPools{} + + oldListMergedFn := listMergedFn + listMergedFn = func( + z *erasureServerPools, + ctx context.Context, + o listPathOptions, + filterCh chan<- metaCacheEntry, + ) error { + close(filterCh) + return context.DeadlineExceeded + } + t.Cleanup(func() { + listMergedFn = oldListMergedFn + }) + + o := rawListPathOptionsForErrorTest() + + entries, err := z.listPath(context.Background(), &o) + if !errors.Is(err, context.DeadlineExceeded) { + t.Fatalf("listPath error = %v, want context.DeadlineExceeded", err) + } + + if entries.len() != 0 { + t.Fatalf("entries length = %d, want 0", entries.len()) + } +} + +func Test_erasureServerPools_listPath_MarkerBeforePrefixIsCleared(t *testing.T) { + z := &erasureServerPools{} + ctx := context.Background() + + o := listPathOptions{ + Bucket: "testbucket", + Prefix: "photos/", + Marker: "abc", + Limit: 0, + } + + _, err := z.listPath(ctx, &o) + if !errors.Is(err, io.EOF) { + t.Fatalf("listPath error = %v, want io.EOF", err) + } + + if o.Marker != "" { + t.Fatalf("Marker = %q, want empty", o.Marker) + } +} + +func Test_erasureServerPools_listPath_RawListingNormalizesOptionsAndCallsListMerged(t *testing.T) { + z := &erasureServerPools{} + ctx := context.Background() + + oldListMergedFn := listMergedFn + t.Cleanup(func() { + listMergedFn = oldListMergedFn + }) + + gotOptions := make(chan listPathOptions, 1) + + listMergedFn = func( + z *erasureServerPools, + ctx context.Context, + o listPathOptions, + filterCh chan<- metaCacheEntry, + ) error { + gotOptions <- o + close(filterCh) + return io.EOF + } + + o := listPathOptions{ + Bucket: "testbucket", + Prefix: "photos/2026/", + Separator: slashSeparator, + Recursive: false, + Limit: 100, + + // Force raw listing path and avoid metacache RPC/cache lookup branches. + Transient: true, + Create: true, + } + + entries, err := z.listPath(ctx, &o) + if !errors.Is(err, io.EOF) { + t.Fatalf("listPath error = %v, want io.EOF", err) + } + + if entries.len() != 0 { + t.Fatalf("entries length = %d, want 0", entries.len()) + } + + var got listPathOptions + + select { + case got = <-gotOptions: + case <-time.After(time.Second): + t.Fatal("listMergedFn was not called") + } + + if got.Bucket != "testbucket" { + t.Fatalf("Bucket = %q, want %q", got.Bucket, "testbucket") + } + + if got.Prefix != "photos/2026/" { + t.Fatalf("Prefix = %q, want %q", got.Prefix, "photos/2026/") + } + + if got.Separator != slashSeparator { + t.Fatalf("Separator = %q, want %q", got.Separator, slashSeparator) + } + + if got.Recursive { + t.Fatal("Recursive = true, want false for slash separator non-recursive listing") + } + + if !got.IncludeDirectories { + t.Fatal("IncludeDirectories = false, want true for slash separator listing") + } + + if !got.Transient { + t.Fatal("Transient = false, want true") + } + + if got.Create { + t.Fatal("Create = true, want false because Transient forces Create=false") + } + + if !got.StopDiskAtLimit { + t.Fatal("StopDiskAtLimit = false, want true when Lifecycle is nil") + } + + if got.BaseDir == "" { + t.Fatal("BaseDir is empty, want it to be derived from Prefix") + } +} + +func Test_erasureServerPools_listPath_EmptySeparatorBecomesRecursiveSlashListing(t *testing.T) { + z := &erasureServerPools{} + ctx := context.Background() + + oldListMergedFn := listMergedFn + t.Cleanup(func() { + listMergedFn = oldListMergedFn + }) + + gotOptions := make(chan listPathOptions, 1) + + listMergedFn = func( + z *erasureServerPools, + ctx context.Context, + o listPathOptions, + filterCh chan<- metaCacheEntry, + ) error { + gotOptions <- o + close(filterCh) + return io.EOF + } + + o := listPathOptions{ + Bucket: "testbucket", + Prefix: "photos/", + Separator: "", + Recursive: false, + Limit: 100, + Transient: true, + Create: true, + } + + _, err := z.listPath(ctx, &o) + if !errors.Is(err, io.EOF) { + t.Fatalf("listPath error = %v, want io.EOF", err) + } + + var got listPathOptions + + select { + case got = <-gotOptions: + case <-time.After(time.Second): + t.Fatal("listMergedFn was not called") + } + + if got.Separator != slashSeparator { + t.Fatalf("Separator = %q, want %q", got.Separator, slashSeparator) + } + + if !got.Recursive { + t.Fatal("Recursive = false, want true when original separator is empty") + } + + if got.IncludeDirectories { + t.Fatal("IncludeDirectories = true, want false when original separator was empty") + } + + if got.Create { + t.Fatal("Create = true, want false because Transient forces Create=false") + } +} + +func Test_erasureServerPools_listPath_ListMergedContextCanceledIsNotReturnedAsListErr(t *testing.T) { + z := &erasureServerPools{} + + oldListMergedFn := listMergedFn + listMergedFn = func( + z *erasureServerPools, + ctx context.Context, + o listPathOptions, + filterCh chan<- metaCacheEntry, + ) error { + close(filterCh) + return context.Canceled + } + t.Cleanup(func() { + listMergedFn = oldListMergedFn + }) + + o := rawListPathOptionsForErrorTest() + + entries, err := z.listPath(context.Background(), &o) + + // listPath intentionally ignores listMerged's context.Canceled: + // + // if listErr != nil && !errors.Is(listErr, context.Canceled) { + // return entries, listErr + // } + // + // With no entries emitted, the final result should be io.EOF. + if !errors.Is(err, io.EOF) { + t.Fatalf("listPath error = %v, want io.EOF", err) + } + + if entries.len() != 0 { + t.Fatalf("entries length = %d, want 0", entries.len()) + } +} + +func Test_erasureServerPools_listPath_ListMergedErrorIsReturned(t *testing.T) { + z := &erasureServerPools{} + + wantErr := errors.New("forced listMerged failure") + + oldListMergedFn := listMergedFn + listMergedFn = func( + z *erasureServerPools, + ctx context.Context, + o listPathOptions, + filterCh chan<- metaCacheEntry, + ) error { + close(filterCh) + return wantErr + } + t.Cleanup(func() { + listMergedFn = oldListMergedFn + }) + + o := listPathOptions{ + Bucket: "testbucket", + Prefix: "photos/", + Separator: slashSeparator, + Recursive: false, + Limit: 100, + + // Avoid metacache/RPC branches. + Transient: true, + Create: true, + } + + entries, err := z.listPath(context.Background(), &o) + if !errors.Is(err, wantErr) { + t.Fatalf("listPath error = %v, want %v", err, wantErr) + } + + if entries.len() != 0 { + t.Fatalf("entries length = %d, want 0", entries.len()) + } +} + +func Test_markMetacacheListingUnusedAsyncAndClearID_UsesOriginalListID(t *testing.T) { + const bucket = "testbucket" + const prefix = "photos/" + const failedID = "failed-list-id" + + gotOptions := make(chan listPathOptions, 1) + + oldMarkMetacacheListingUnused := markMetacacheListingUnused + markMetacacheListingUnused = func(o listPathOptions) { + gotOptions <- o + } + t.Cleanup(func() { + markMetacacheListingUnused = oldMarkMetacacheListingUnused + }) + + o := &listPathOptions{ + Bucket: bucket, + Prefix: prefix, + ID: failedID, + } + + markMetacacheListingUnusedAsyncAndClearID(o) + + if o.ID != "" { + t.Fatalf("listPathOptions.ID after clear = %q, want empty", o.ID) + } + + select { + case got := <-gotOptions: + if got.ID != failedID { + t.Fatalf("markMetacacheListingUnused got ID = %q, want %q", got.ID, failedID) + } + + if got.Bucket != bucket { + t.Fatalf("markMetacacheListingUnused got Bucket = %q, want %q", got.Bucket, bucket) + } + + if got.Prefix != prefix { + t.Fatalf("markMetacacheListingUnused got Prefix = %q, want %q", got.Prefix, prefix) + } + + case <-time.After(time.Second): + t.Fatal("markMetacacheListingUnused was not called") + } +} + +func Test_erasureServerPools_listMerged_DoesNotMaskMergeErrorWhenAllSetsNotFound(t *testing.T) { + z := &erasureServerPools{ + serverPools: []*erasureSets{ + { + sets: []*erasureObjects{ + nil, + nil, + }, + }, + }, + } + + wantErr := errors.New("forced merge failure") + + oldListPathOnSetFn := listPathOnSetFn + oldMergeEntryChannelsFn := mergeEntryChannelsFn + + listPathOnSetFn = func( + set *erasureObjects, + ctx context.Context, + o listPathOptions, + results chan<- metaCacheEntry, + ) error { + return errFileNotFound + } + + mergeEntryChannelsFn = func( + ctx context.Context, + inputs []chan metaCacheEntry, + out chan<- metaCacheEntry, + quorum int, + ) error { + return wantErr + } + + t.Cleanup(func() { + listPathOnSetFn = oldListPathOnSetFn + mergeEntryChannelsFn = oldMergeEntryChannelsFn + }) + + results := make(chan metaCacheEntry, 10) + + err := z.listMerged(context.Background(), listPathOptions{ + Bucket: "testbucket", + Prefix: "photos/", + Limit: 100, + }, results) + + if !errors.Is(err, wantErr) { + t.Fatalf("listMerged error = %v, want merge error %v", err, wantErr) + } +} + +func Test_erasureServerPools_listMerged_AllSetsEOFReturnsEOF(t *testing.T) { + z := &erasureServerPools{ + serverPools: []*erasureSets{ + { + sets: []*erasureObjects{ + nil, + nil, + }, + }, + }, + } + + oldListPathOnSetFn := listPathOnSetFn + oldMergeEntryChannelsFn := mergeEntryChannelsFn + + listPathOnSetFn = func( + set *erasureObjects, + ctx context.Context, + o listPathOptions, + results chan<- metaCacheEntry, + ) error { + return io.EOF + } + + mergeEntryChannelsFn = func( + ctx context.Context, + inputs []chan metaCacheEntry, + out chan<- metaCacheEntry, + quorum int, + ) error { + return nil + } + + t.Cleanup(func() { + listPathOnSetFn = oldListPathOnSetFn + mergeEntryChannelsFn = oldMergeEntryChannelsFn + }) + + results := make(chan metaCacheEntry, 10) + + err := z.listMerged(context.Background(), listPathOptions{ + Bucket: "testbucket", + Prefix: "photos/", + Limit: 100, + }, results) + + if !errors.Is(err, io.EOF) { + t.Fatalf("listMerged error = %v, want io.EOF", err) + } +} + +func Test_erasureServerPools_listMerged_OneSuccessfulSetReturnsNil(t *testing.T) { + z := &erasureServerPools{ + serverPools: []*erasureSets{ + { + sets: []*erasureObjects{ + nil, + nil, + }, + }, + }, + } + + var call int + + oldListPathOnSetFn := listPathOnSetFn + oldMergeEntryChannelsFn := mergeEntryChannelsFn + + listPathOnSetFn = func( + set *erasureObjects, + ctx context.Context, + o listPathOptions, + results chan<- metaCacheEntry, + ) error { + call++ + if call == 1 { + return nil + } + return io.EOF + } + + mergeEntryChannelsFn = func( + ctx context.Context, + inputs []chan metaCacheEntry, + out chan<- metaCacheEntry, + quorum int, + ) error { + return nil + } + + t.Cleanup(func() { + listPathOnSetFn = oldListPathOnSetFn + mergeEntryChannelsFn = oldMergeEntryChannelsFn + }) + + results := make(chan metaCacheEntry, 10) + + err := z.listMerged(context.Background(), listPathOptions{ + Bucket: "testbucket", + Prefix: "photos/", + Limit: 100, + }, results) + + if err != nil { + t.Fatalf("listMerged error = %v, want nil", err) + } +} + +func Test_erasureServerPools_listMerged_UnexpectedSetErrorIsReturned(t *testing.T) { + z := &erasureServerPools{ + serverPools: []*erasureSets{ + { + sets: []*erasureObjects{ + nil, + nil, + }, + }, + }, + } + + wantErr := errors.New("forced set failure") + + var call int + + oldListPathOnSetFn := listPathOnSetFn + oldMergeEntryChannelsFn := mergeEntryChannelsFn + + listPathOnSetFn = func( + set *erasureObjects, + ctx context.Context, + o listPathOptions, + results chan<- metaCacheEntry, + ) error { + call++ + if call == 1 { + return wantErr + } + return io.EOF + } + + mergeEntryChannelsFn = func( + ctx context.Context, + inputs []chan metaCacheEntry, + out chan<- metaCacheEntry, + quorum int, + ) error { + return nil + } + + t.Cleanup(func() { + listPathOnSetFn = oldListPathOnSetFn + mergeEntryChannelsFn = oldMergeEntryChannelsFn + }) + + results := make(chan metaCacheEntry, 10) + + err := z.listMerged(context.Background(), listPathOptions{ + Bucket: "testbucket", + Prefix: "photos/", + Limit: 100, + }, results) + + if !errors.Is(err, wantErr) { + t.Fatalf("listMerged error = %v, want %v", err, wantErr) + } +} + +func Test_triggerExpiryAndRepl_ListObjectsV1ExpiredObjectIsSkipped(t *testing.T) { + const bucket = "testbucket" + const object = "expired-object" + + oldFileInfoFn := metaCacheEntryFileInfoFn + oldFileInfoVersionsFn := metaCacheEntryFileInfoVersionsFn + oldEvalLifecycleFn := evalLifecycleForListObjectFn + + metaCacheEntryFileInfoFn = func(obj metaCacheEntry, bucket string) (FileInfo, error) { + if bucket != "testbucket" { + t.Fatalf("bucket = %q, want %q", bucket, "testbucket") + } + + return FileInfo{ + Volume: bucket, + Name: obj.name, + Size: 1, + ModTime: time.Now(), + }, nil + } + + metaCacheEntryFileInfoVersionsFn = func(obj metaCacheEntry, bucket string) (FileInfoVersions, error) { + return FileInfoVersions{}, nil + } + + var evalCalls int + evalLifecycleForListObjectFn = func(ctx context.Context, o listPathOptions, objInfo ObjectInfo) lifecycle.Event { + evalCalls++ + return lifecycle.Event{Action: lifecycle.DeleteAction} + } + + t.Cleanup(func() { + metaCacheEntryFileInfoFn = oldFileInfoFn + metaCacheEntryFileInfoVersionsFn = oldFileInfoVersionsFn + evalLifecycleForListObjectFn = oldEvalLifecycleFn + }) + + skip := triggerExpiryAndRepl(context.Background(), listPathOptions{ + Bucket: bucket, + V1: true, + Versioned: false, + Lifecycle: &lifecycle.Lifecycle{}, + }, metaCacheEntry{ + name: object, + }) + + if !skip { + t.Fatal("skip = false, want true for expired object in regular ListObjects V1 listing") + } + + if evalCalls == 0 { + t.Fatal("lifecycle was not evaluated for ListObjects V1") + } +} + +func Test_triggerExpiryAndRepl_VersionedListingDoesNotSkipExpiredObject(t *testing.T) { + const bucket = "testbucket" + const object = "expired-object" + + oldFileInfoFn := metaCacheEntryFileInfoFn + oldFileInfoVersionsFn := metaCacheEntryFileInfoVersionsFn + oldEvalLifecycleFn := evalLifecycleForListObjectFn + + metaCacheEntryFileInfoFn = func(obj metaCacheEntry, bucket string) (FileInfo, error) { + t.Fatal("fileInfo must not be called for versioned listing skip decision") + return FileInfo{}, nil + } + + metaCacheEntryFileInfoVersionsFn = func(obj metaCacheEntry, bucket string) (FileInfoVersions, error) { + return FileInfoVersions{}, nil + } + + evalLifecycleForListObjectFn = func(ctx context.Context, o listPathOptions, objInfo ObjectInfo) lifecycle.Event { + return lifecycle.Event{Action: lifecycle.DeleteAction} + } + + t.Cleanup(func() { + metaCacheEntryFileInfoFn = oldFileInfoFn + metaCacheEntryFileInfoVersionsFn = oldFileInfoVersionsFn + evalLifecycleForListObjectFn = oldEvalLifecycleFn + }) + + skip := triggerExpiryAndRepl(context.Background(), listPathOptions{ + Bucket: bucket, + Versioned: true, + Lifecycle: &lifecycle.Lifecycle{}, + }, metaCacheEntry{ + name: object, + }) + + if skip { + t.Fatal("skip = true, want false for versioned listing") + } +} + +func Test_triggerExpiryAndRepl_FileInfoErrorDoesNotSkip(t *testing.T) { + const bucket = "testbucket" + const object = "bad-object" + + oldFileInfoFn := metaCacheEntryFileInfoFn + oldFileInfoVersionsFn := metaCacheEntryFileInfoVersionsFn + oldEvalLifecycleFn := evalLifecycleForListObjectFn + + metaCacheEntryFileInfoFn = func(obj metaCacheEntry, bucket string) (FileInfo, error) { + return FileInfo{}, errors.New("forced fileInfo failure") + } + + metaCacheEntryFileInfoVersionsFn = func(obj metaCacheEntry, bucket string) (FileInfoVersions, error) { + t.Fatal("fileInfoVersions must not be called after fileInfo failure") + return FileInfoVersions{}, nil + } + + evalLifecycleForListObjectFn = func(ctx context.Context, o listPathOptions, objInfo ObjectInfo) lifecycle.Event { + t.Fatal("lifecycle must not be evaluated after fileInfo failure") + return lifecycle.Event{} + } + + t.Cleanup(func() { + metaCacheEntryFileInfoFn = oldFileInfoFn + metaCacheEntryFileInfoVersionsFn = oldFileInfoVersionsFn + evalLifecycleForListObjectFn = oldEvalLifecycleFn + }) + + skip := triggerExpiryAndRepl(context.Background(), listPathOptions{ + Bucket: bucket, + Lifecycle: &lifecycle.Lifecycle{}, + }, metaCacheEntry{ + name: object, + }) + + if skip { + t.Fatal("skip = true, want false when fileInfo fails") + } +} + +func Test_erasureServerPools_listAndSave_DiskFullMakesListingTransient(t *testing.T) { + oldGetAvailablePoolIdxFn := listAndSaveGetAvailablePoolIdxFn + listAndSaveGetAvailablePoolIdxFn = func( + z *erasureServerPools, + ctx context.Context, + bucket string, + object string, + size int64, + ) int { + return -1 + } + t.Cleanup(func() { + listAndSaveGetAvailablePoolIdxFn = oldGetAvailablePoolIdxFn + }) + + z := &erasureServerPools{} + + o := &listPathOptions{ + Bucket: "testbucket", + Prefix: "photos/", + ID: "list-id", + Create: true, + Limit: 100, + } + + entries, err := z.listAndSave(context.Background(), o) + if !errors.Is(err, errDiskFull) { + t.Fatalf("listAndSave error = %v, want errDiskFull", err) + } + + if entries.len() != 0 { + t.Fatalf("entries length = %d, want 0", entries.len()) + } + + if o.pool != 0 { + t.Fatalf("pool = %d, want 0", o.pool) + } + + if o.Create { + t.Fatal("Create = true, want false") + } + + if o.ID != "" { + t.Fatalf("ID = %q, want empty", o.ID) + } + + if !o.Transient { + t.Fatal("Transient = false, want true") + } +} + +func Test_erasureServerPools_listAndSave_ListMergedErrorIsReturnedWhenNoEntries(t *testing.T) { + wantErr := errors.New("forced listMerged failure") + + oldGetAvailablePoolIdxFn := listAndSaveGetAvailablePoolIdxFn + oldGetHashedSetIndexFn := listAndSaveGetHashedSetIndexFn + oldSaveMetaCacheStreamFn := listAndSaveSaveMetaCacheStreamFn + oldListMergedFn := listAndSaveListMergedFn + oldRestClientFromHashFn := listAndSaveRestClientFromHashFn + + listAndSaveGetAvailablePoolIdxFn = func( + z *erasureServerPools, + ctx context.Context, + bucket string, + object string, + size int64, + ) int { + return 0 + } + + listAndSaveGetHashedSetIndexFn = func(pool *erasureSets, id string) int { + return 0 + } + + listAndSaveSaveMetaCacheStreamFn = func( + saver *erasureObjects, + ctx context.Context, + meta *metaCacheRPC, + saveCh <-chan metaCacheEntry, + ) error { + for range saveCh { + } + return nil + } + + listAndSaveListMergedFn = func( + z *erasureServerPools, + ctx context.Context, + o listPathOptions, + results chan<- metaCacheEntry, + ) error { + close(results) + return wantErr + } + + listAndSaveRestClientFromHashFn = func(hash string) *peerRESTClient { + return nil + } + + t.Cleanup(func() { + listAndSaveGetAvailablePoolIdxFn = oldGetAvailablePoolIdxFn + listAndSaveGetHashedSetIndexFn = oldGetHashedSetIndexFn + listAndSaveSaveMetaCacheStreamFn = oldSaveMetaCacheStreamFn + listAndSaveListMergedFn = oldListMergedFn + listAndSaveRestClientFromHashFn = oldRestClientFromHashFn + }) + + z := &erasureServerPools{ + serverPools: []*erasureSets{ + { + sets: []*erasureObjects{ + nil, + }, + }, + }, + } + + o := &listPathOptions{ + Bucket: "testbucket", + Prefix: "photos/", + ID: "list-id", + Create: true, + Limit: 1, + } + + type result struct { + entries metaCacheEntriesSorted + err error + } + + done := make(chan result, 1) + + go func() { + entries, err := z.listAndSave(context.Background(), o) + done <- result{ + entries: entries, + err: err, + } + }() + + var res result + + select { + case res = <-done: + case <-time.After(time.Second): + t.Fatal("listAndSave did not return") + } + + if res.err == nil { + t.Fatal("expected listMerged error, got nil") + } + + if !strings.Contains(res.err.Error(), wantErr.Error()) { + t.Fatalf("listAndSave error = %v, want it to contain %q", res.err, wantErr.Error()) + } + + if res.entries.len() != 0 { + t.Fatalf("entries length = %d, want 0", res.entries.len()) + } +} + +func Test_listAndSaveFanOut_DoesNotSendToOutputAfterFunctionReturned(t *testing.T) { + inCh := make(chan metaCacheEntry, 1) + outCh := make(chan metaCacheEntry, 1) + saveCh := make(chan metaCacheEntry, 1) + + funcReturned := true + var funcReturnedMu sync.Mutex + + o := listPathOptions{ + Bucket: "testbucket", + Prefix: "photos/", + Recursive: true, + } + + entry := metaCacheEntry{name: "photos/object-1"} + + if o.shouldSkip(context.Background(), entry) { + t.Fatal("test entry is skipped by listPathOptions.shouldSkip; test setup is invalid") + } + + inCh <- entry + close(inCh) + + listAndSaveFanOut( + context.Background(), + context.Background(), + o, + inCh, + outCh, + saveCh, + &funcReturned, + &funcReturnedMu, + ) + + select { + case got, ok := <-outCh: + if ok { + t.Fatalf("unexpected entry sent to outCh after function returned: %+v", got) + } + default: + } + + got, ok := <-saveCh + if !ok { + t.Fatal("saveCh was closed before saved entry could be read") + } + + if got.name != "photos/object-1" { + t.Fatalf("saved entry name = %q, want %q", got.name, "photos/object-1") + } + + if !got.reusable { + t.Fatal("saved entry reusable = false, want true") + } + + if _, ok := <-saveCh; ok { + t.Fatal("saveCh is not closed") + } +} From 8b0564d66f85b6147cf8e2d0858b3b116c8b9f54 Mon Sep 17 00:00:00 2001 From: konrad Date: Sat, 27 Jun 2026 18:11:46 +0400 Subject: [PATCH 7/8] Tests and code improvements for metacache-manager --- cmd/metacache-manager.go | 160 ++++--- cmd/metacache-manager_test.go | 807 ++++++++++++++++++++++++++++++++++ 2 files changed, 915 insertions(+), 52 deletions(-) create mode 100644 cmd/metacache-manager_test.go diff --git a/cmd/metacache-manager.go b/cmd/metacache-manager.go index d23a8aa1470bd..b7f66420f3b58 100644 --- a/cmd/metacache-manager.go +++ b/cmd/metacache-manager.go @@ -49,69 +49,88 @@ const metacacheMaxEntries = 5000 func (m *metacacheManager) initManager() { // Add a transient bucket. // Start saver when object layer is ready. - go func() { - objAPI := newObjectLayerFn() - for objAPI == nil { - time.Sleep(time.Second) - objAPI = newObjectLayerFn() + m.init.Do(func() { + go m.cleanupLoop() + }) +} + +func (m *metacacheManager) cleanupLoop() { + for { + if objAPI := newObjectLayerFn(); objAPI != nil { + break } - t := time.NewTicker(time.Minute) - defer t.Stop() + select { + case <-time.After(time.Second): + case <-GlobalContext.Done(): + return + } + } - var exit bool - for !exit { - select { - case <-t.C: - case <-GlobalContext.Done(): - exit = true - } - m.mu.RLock() - for _, v := range m.buckets { - if !exit { - v.cleanup() - } + t := time.NewTicker(time.Minute) + defer t.Stop() + + var exit bool + for !exit { + select { + case <-t.C: + case <-GlobalContext.Done(): + exit = true + } + m.mu.RLock() + for _, v := range m.buckets { + if !exit { + v.cleanup() } - m.mu.RUnlock() - m.mu.Lock() - for k, v := range m.trash { - if time.Since(v.lastUpdate) > metacacheMaxRunningAge { - v.delete(context.Background()) - delete(m.trash, k) - } + } + m.mu.RUnlock() + m.mu.Lock() + for k, v := range m.trash { + if time.Since(v.lastUpdate) > metacacheMaxRunningAge { + v.delete(context.Background()) + delete(m.trash, k) } - m.mu.Unlock() } - }() + m.mu.Unlock() + } } // updateCacheEntry will update non-transient state. func (m *metacacheManager) updateCacheEntry(update metacache) (metacache, error) { m.mu.RLock() - if meta, ok := m.trash[update.id]; ok { - m.mu.RUnlock() + meta, inTrash := m.trash[update.id] + b, hasBucket := m.buckets[update.bucket] + m.mu.RUnlock() + + if inTrash { return meta, nil } - b, ok := m.buckets[update.bucket] - m.mu.RUnlock() - if ok { - return b.updateCacheEntry(update) + if !hasBucket || b == nil { + // We should have either a trashed cache entry or an existing bucket. + return metacache{}, errVolumeNotFound } - // We should have either a trashed bucket or this - return metacache{}, errVolumeNotFound + //TODO: still allows a race condition where the bucket is removed/trash-added after the manager lock is released + return b.updateCacheEntry(update) } // getBucket will get a bucket metacache or load it from disk if needed. func (m *metacacheManager) getBucket(ctx context.Context, bucket string) *bucketMetacache { m.init.Do(m.initManager) + select { + case <-ctx.Done(): + return nil + default: + } + // Return a transient bucket for invalid or system buckets. m.mu.RLock() - b, ok := m.buckets[bucket] - if ok { - m.mu.RUnlock() + b := m.buckets[bucket] + m.mu.RUnlock() + + if b != nil { if b.bucket != bucket { logger.Info("getBucket: cached bucket %s does not match this bucket %s", b.bucket, bucket) debug.PrintStack() @@ -119,12 +138,12 @@ func (m *metacacheManager) getBucket(ctx context.Context, bucket string) *bucket return b } - m.mu.RUnlock() m.mu.Lock() defer m.mu.Unlock() - // See if someone else fetched it while we waited for the lock. - b, ok = m.buckets[bucket] - if ok { + + // Check again in case another goroutine created it while we waited. + b = m.buckets[bucket] + if b != nil { if b.bucket != bucket { logger.Info("getBucket: newly cached bucket %s does not match this bucket %s", b.bucket, bucket) debug.PrintStack() @@ -135,6 +154,7 @@ func (m *metacacheManager) getBucket(ctx context.Context, bucket string) *bucket // New bucket. If we fail return the transient bucket. b = newBucketMetacache(bucket, true) m.buckets[bucket] = b + return b } @@ -150,31 +170,59 @@ func (m *metacacheManager) deleteBucketCache(bucket string) { delete(m.buckets, bucket) m.mu.Unlock() - // Since deletes may take some time we try to do it without - // holding lock to m all the time. + if b == nil { + return + } + + now := time.Now() + + var expired []metacache + trash := make(map[string]metacache) + + // collect expired/trash entries under b.mu, release b.mu, then update m.trash and delete files outside bucket lock. b.mu.Lock() - defer b.mu.Unlock() for k, v := range b.caches { - if time.Since(v.lastUpdate) > metacacheMaxRunningAge { - v.delete(context.Background()) + if now.Sub(v.lastUpdate) > metacacheMaxRunningAge { + expired = append(expired, v) continue } + v.error = "Bucket deleted" v.status = scanStateError + trash[k] = v + } + b.mu.Unlock() + + if len(trash) > 0 { m.mu.Lock() - m.trash[k] = v + for k, v := range trash { + m.trash[k] = v + } m.mu.Unlock() } + + for _, v := range expired { + v.delete(context.Background()) + } } // deleteAll will delete all caches. func (m *metacacheManager) deleteAll() { + // manager should remove buckets under m.mu, then call b.deleteAll() after releasing m.mu m.init.Do(m.initManager) + m.mu.Lock() - defer m.mu.Unlock() - for bucket, b := range m.buckets { + buckets := make([]*bucketMetacache, 0, len(m.buckets)) + for _, b := range m.buckets { + if b != nil { + buckets = append(buckets, b) + } + } + m.buckets = make(map[string]*bucketMetacache) + m.mu.Unlock() + + for _, b := range buckets { b.deleteAll() - delete(m.buckets, bucket) } } @@ -187,8 +235,16 @@ func (o listPathOptions) checkMetacacheState(ctx context.Context, rpc *peerRESTC if err != nil { return err } + if c == nil { + return errFileNotFound + } + cache := *c + if cache.fileNotFound || cache.status == scanStateNone { + return errFileNotFound + } + if cache.status == scanStateNone || cache.fileNotFound { return errFileNotFound } diff --git a/cmd/metacache-manager_test.go b/cmd/metacache-manager_test.go new file mode 100644 index 0000000000000..fe4aca8ae1e93 --- /dev/null +++ b/cmd/metacache-manager_test.go @@ -0,0 +1,807 @@ +package cmd + +import ( + "context" + "errors" + "fmt" + "sync" + "testing" + "time" +) + +func newTestMetacacheManager() *metacacheManager { + m := &metacacheManager{ + buckets: make(map[string]*bucketMetacache), + trash: make(map[string]metacache), + } + + // Prevent getBucket from starting the real async initManager goroutine. + m.init.Do(func() {}) + + return m +} + +func newTestBucketMetacache(bucket string) *bucketMetacache { + return &bucketMetacache{ + bucket: bucket, + caches: make(map[string]metacache), + cachesRoot: make(map[string][]string), + } +} + +func TestMetacacheManagerUpdateCacheEntry_ReturnsTrashEntry(t *testing.T) { + m := newTestMetacacheManager() + + update := metacache{ + id: "cache-1", + bucket: "bucket-a", + } + + trashed := metacache{ + id: "cache-1", + bucket: "bucket-a", + lastUpdate: time.Now().Add(-time.Minute), + } + + m.trash[update.id] = trashed + + got, err := m.updateCacheEntry(update) + if err != nil { + t.Fatalf("expected nil error, got %v", err) + } + + if got.id != trashed.id { + t.Fatalf("expected trash id %q, got %q", trashed.id, got.id) + } + + if got.bucket != trashed.bucket { + t.Fatalf("expected trash bucket %q, got %q", trashed.bucket, got.bucket) + } + + if !got.lastUpdate.Equal(trashed.lastUpdate) { + t.Fatalf("expected trash lastUpdate %v, got %v", trashed.lastUpdate, got.lastUpdate) + } +} + +func TestMetacacheManagerUpdateCacheEntry_TrashWinsOverBucket(t *testing.T) { + m := newTestMetacacheManager() + + update := metacache{ + id: "cache-1", + bucket: "bucket-a", + } + + trashed := metacache{ + id: "cache-1", + bucket: "bucket-a", + lastUpdate: time.Now().Add(-time.Minute), + } + + b := newTestBucketMetacache(update.bucket) + + m.trash[update.id] = trashed + m.buckets[update.bucket] = b + + got, err := m.updateCacheEntry(update) + if err != nil { + t.Fatalf("expected nil error, got %v", err) + } + + if got.id != trashed.id { + t.Fatalf("expected trash id %q, got %q", trashed.id, got.id) + } + + b.mu.RLock() + _, cacheWasUpdated := b.caches[update.id] + bucketMarkedUpdated := b.updated + b.mu.RUnlock() + + if cacheWasUpdated { + t.Fatalf("bucket cache was updated even though trash entry should win") + } + + if bucketMarkedUpdated { + t.Fatalf("bucket was marked updated even though trash entry should win") + } +} + +func TestMetacacheManagerUpdateCacheEntry_DelegatesToExistingBucketEntry(t *testing.T) { + m := newTestMetacacheManager() + + existing := metacache{ + id: "cache-1", + bucket: "bucket-a", + lastUpdate: time.Now().Add(-time.Minute), + } + + update := metacache{ + id: existing.id, + bucket: existing.bucket, + lastUpdate: time.Now(), + } + + b := newTestBucketMetacache(update.bucket) + b.caches[existing.id] = existing + + m.buckets[update.bucket] = b + + got, err := m.updateCacheEntry(update) + if err != nil { + t.Fatalf("expected nil error, got %v", err) + } + + if got.id != update.id { + t.Fatalf("expected cache id %q, got %q", update.id, got.id) + } + + if got.bucket != update.bucket { + t.Fatalf("expected cache bucket %q, got %q", update.bucket, got.bucket) + } + + b.mu.RLock() + stored, ok := b.caches[update.id] + bucketMarkedUpdated := b.updated + b.mu.RUnlock() + + if !ok { + t.Fatalf("expected cache entry %q to remain stored in bucket", update.id) + } + + if stored.id != update.id { + t.Fatalf("expected stored cache id %q, got %q", update.id, stored.id) + } + + if stored.bucket != update.bucket { + t.Fatalf("expected stored cache bucket %q, got %q", update.bucket, stored.bucket) + } + + if !bucketMarkedUpdated { + t.Fatalf("expected bucket to be marked updated") + } +} + +func TestMetacacheManagerUpdateCacheEntry_ReturnsVolumeNotFound(t *testing.T) { + m := newTestMetacacheManager() + + update := metacache{ + id: "cache-1", + bucket: "missing-bucket", + } + + got, err := m.updateCacheEntry(update) + if !errors.Is(err, errVolumeNotFound) { + t.Fatalf("expected errVolumeNotFound, got %v", err) + } + + if got.id != "" { + t.Fatalf("expected empty metacache id, got %q", got.id) + } + + if got.bucket != "" { + t.Fatalf("expected empty metacache bucket, got %q", got.bucket) + } +} + +func TestMetacacheManagerUpdateCacheEntry_ConcurrentTrashAccess(t *testing.T) { + m := newTestMetacacheManager() + + update := metacache{ + id: "cache-1", + bucket: "bucket-a", + } + + const readers = 16 + const iterations = 10_000 + + var wg sync.WaitGroup + start := make(chan struct{}) + + for i := 0; i < readers; i++ { + wg.Add(1) + + go func() { + defer wg.Done() + + <-start + + for j := 0; j < iterations; j++ { + _, err := m.updateCacheEntry(update) + if err != nil && !errors.Is(err, errVolumeNotFound) { + t.Errorf("unexpected error: %v", err) + return + } + } + }() + } + + wg.Add(1) + + go func() { + defer wg.Done() + + <-start + + for i := 0; i < iterations; i++ { + m.mu.Lock() + + if i%2 == 0 { + m.trash[update.id] = metacache{ + id: update.id, + bucket: update.bucket, + lastUpdate: time.Now(), + } + } else { + delete(m.trash, update.id) + } + + m.mu.Unlock() + } + }() + + close(start) + wg.Wait() +} + +func TestMetacacheManagerGetBucket_ContextCanceledReturnsNil(t *testing.T) { + m := newTestMetacacheManager() + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + got := m.getBucket(ctx, "bucket-a") + if got != nil { + t.Fatalf("expected nil bucket for canceled context, got %#v", got) + } + + m.mu.RLock() + defer m.mu.RUnlock() + + if len(m.buckets) != 0 { + t.Fatalf("expected no buckets to be created, got %d", len(m.buckets)) + } +} + +func TestMetacacheManagerGetBucket_ReturnsExistingBucket(t *testing.T) { + m := newTestMetacacheManager() + + expected := newTestBucketMetacache("bucket-a") + m.buckets["bucket-a"] = expected + + got := m.getBucket(context.Background(), "bucket-a") + if got == nil { + t.Fatal("expected existing bucket, got nil") + } + + if got != expected { + t.Fatalf("expected existing bucket pointer %p, got %p", expected, got) + } + + if got.bucket != "bucket-a" { + t.Fatalf("expected bucket name %q, got %q", "bucket-a", got.bucket) + } +} + +func TestMetacacheManagerGetBucket_CreatesBucketWhenMissing(t *testing.T) { + m := newTestMetacacheManager() + + got := m.getBucket(context.Background(), "bucket-a") + if got == nil { + t.Fatal("expected created bucket, got nil") + } + + if got.bucket != "bucket-a" { + t.Fatalf("expected bucket name %q, got %q", "bucket-a", got.bucket) + } + + m.mu.RLock() + stored := m.buckets["bucket-a"] + bucketCount := len(m.buckets) + m.mu.RUnlock() + + if stored == nil { + t.Fatal("expected bucket to be stored in manager") + } + + if stored != got { + t.Fatalf("expected stored bucket pointer %p, got %p", got, stored) + } + + if bucketCount != 1 { + t.Fatalf("expected exactly 1 bucket, got %d", bucketCount) + } +} + +func TestMetacacheManagerGetBucket_ReplacesNilBucketEntry(t *testing.T) { + m := newTestMetacacheManager() + + m.buckets["bucket-a"] = nil + + got := m.getBucket(context.Background(), "bucket-a") + if got == nil { + t.Fatal("expected nil map entry to be replaced with real bucket") + } + + if got.bucket != "bucket-a" { + t.Fatalf("expected bucket name %q, got %q", "bucket-a", got.bucket) + } + + m.mu.RLock() + stored := m.buckets["bucket-a"] + bucketCount := len(m.buckets) + m.mu.RUnlock() + + if stored == nil { + t.Fatal("expected bucket to be stored after replacing nil entry") + } + + if stored != got { + t.Fatalf("expected stored bucket pointer %p, got %p", got, stored) + } + + if bucketCount != 1 { + t.Fatalf("expected exactly 1 bucket, got %d", bucketCount) + } +} + +func TestMetacacheManagerGetBucket_ConcurrentSameBucketReturnsSingleInstance(t *testing.T) { + m := newTestMetacacheManager() + + const goroutines = 64 + + var wg sync.WaitGroup + start := make(chan struct{}) + results := make([]*bucketMetacache, goroutines) + + for i := 0; i < goroutines; i++ { + wg.Add(1) + + go func(i int) { + defer wg.Done() + + <-start + results[i] = m.getBucket(context.Background(), "bucket-a") + }(i) + } + + close(start) + wg.Wait() + + first := results[0] + if first == nil { + t.Fatal("expected bucket, got nil") + } + + for i, got := range results { + if got == nil { + t.Fatalf("result %d: expected bucket, got nil", i) + } + + if got != first { + t.Fatalf("result %d: expected same bucket pointer %p, got %p", i, first, got) + } + + if got.bucket != "bucket-a" { + t.Fatalf("result %d: expected bucket name %q, got %q", i, "bucket-a", got.bucket) + } + } + + m.mu.RLock() + stored := m.buckets["bucket-a"] + bucketCount := len(m.buckets) + m.mu.RUnlock() + + if stored != first { + t.Fatalf("expected stored bucket pointer %p, got %p", first, stored) + } + + if bucketCount != 1 { + t.Fatalf("expected exactly 1 bucket, got %d", bucketCount) + } +} + +func TestMetacacheManagerGetBucket_ConcurrentDifferentBuckets(t *testing.T) { + m := newTestMetacacheManager() + + bucketNames := []string{ + "bucket-a", + "bucket-b", + "bucket-c", + "bucket-d", + "bucket-e", + } + + var wg sync.WaitGroup + start := make(chan struct{}) + results := make([]*bucketMetacache, len(bucketNames)) + + for i, bucket := range bucketNames { + wg.Add(1) + + go func(i int, bucket string) { + defer wg.Done() + + <-start + results[i] = m.getBucket(context.Background(), bucket) + }(i, bucket) + } + + close(start) + wg.Wait() + + for i, bucket := range bucketNames { + got := results[i] + if got == nil { + t.Fatalf("bucket %q: expected bucket, got nil", bucket) + } + + if got.bucket != bucket { + t.Fatalf("bucket %q: expected bucket name %q, got %q", bucket, bucket, got.bucket) + } + } + + m.mu.RLock() + bucketCount := len(m.buckets) + m.mu.RUnlock() + + if bucketCount != len(bucketNames) { + t.Fatalf("expected %d buckets, got %d", len(bucketNames), bucketCount) + } +} + +func TestMetacacheManagerDeleteBucketCache_MissingBucketDoesNothing(t *testing.T) { + m := newTestMetacacheManager() + + otherBucket := newTestBucketMetacache("other-bucket") + m.buckets["other-bucket"] = otherBucket + + existingTrash := metacache{ + id: "trash-1", + bucket: "other-bucket", + lastUpdate: time.Now(), + } + m.trash["trash-1"] = existingTrash + + m.deleteBucketCache("missing-bucket") + + m.mu.RLock() + defer m.mu.RUnlock() + + if got := m.buckets["other-bucket"]; got != otherBucket { + t.Fatalf("expected unrelated bucket to remain unchanged") + } + + if _, ok := m.buckets["missing-bucket"]; ok { + t.Fatalf("did not expect missing bucket to be created or stored") + } + + if len(m.trash) != 1 { + t.Fatalf("expected trash to remain unchanged, got len=%d", len(m.trash)) + } + + gotTrash := m.trash["trash-1"] + if gotTrash.id != existingTrash.id { + t.Fatalf("expected existing trash id %q, got %q", existingTrash.id, gotTrash.id) + } +} + +func TestMetacacheManagerDeleteBucketCache_NilBucketEntryIsRemoved(t *testing.T) { + m := newTestMetacacheManager() + + m.buckets["bucket-a"] = nil + + defer func() { + if r := recover(); r != nil { + t.Fatalf("deleteBucketCache panicked for nil bucket entry: %v", r) + } + }() + + m.deleteBucketCache("bucket-a") + + m.mu.RLock() + defer m.mu.RUnlock() + + if _, ok := m.buckets["bucket-a"]; ok { + t.Fatalf("expected nil bucket entry to be removed") + } + + if len(m.trash) != 0 { + t.Fatalf("expected trash to remain empty, got len=%d", len(m.trash)) + } +} + +func TestMetacacheManagerDeleteBucketCache_MovesRunningCachesToTrash(t *testing.T) { + m := newTestMetacacheManager() + + b := newTestBucketMetacache("bucket-a") + + cache1 := metacache{ + id: "cache-1", + bucket: "bucket-a", + lastUpdate: time.Now(), + } + + cache2 := metacache{ + id: "cache-2", + bucket: "bucket-a", + lastUpdate: time.Now(), + } + + b.caches[cache1.id] = cache1 + b.caches[cache2.id] = cache2 + + m.buckets["bucket-a"] = b + + m.deleteBucketCache("bucket-a") + + m.mu.RLock() + defer m.mu.RUnlock() + + if _, ok := m.buckets["bucket-a"]; ok { + t.Fatalf("expected bucket to be removed from manager") + } + + if len(m.trash) != 2 { + t.Fatalf("expected 2 trash entries, got %d", len(m.trash)) + } + + got1, ok := m.trash[cache1.id] + if !ok { + t.Fatalf("expected cache %q to be moved to trash", cache1.id) + } + + if got1.id != cache1.id { + t.Fatalf("expected trash cache id %q, got %q", cache1.id, got1.id) + } + + if got1.bucket != cache1.bucket { + t.Fatalf("expected trash cache bucket %q, got %q", cache1.bucket, got1.bucket) + } + + if got1.error != "Bucket deleted" { + t.Fatalf("expected trash cache error %q, got %q", "Bucket deleted", got1.error) + } + + if got1.status != scanStateError { + t.Fatalf("expected trash cache status %v, got %v", scanStateError, got1.status) + } + + got2, ok := m.trash[cache2.id] + if !ok { + t.Fatalf("expected cache %q to be moved to trash", cache2.id) + } + + if got2.id != cache2.id { + t.Fatalf("expected trash cache id %q, got %q", cache2.id, got2.id) + } + + if got2.bucket != cache2.bucket { + t.Fatalf("expected trash cache bucket %q, got %q", cache2.bucket, got2.bucket) + } + + if got2.error != "Bucket deleted" { + t.Fatalf("expected trash cache error %q, got %q", "Bucket deleted", got2.error) + } + + if got2.status != scanStateError { + t.Fatalf("expected trash cache status %v, got %v", scanStateError, got2.status) + } +} + +func TestMetacacheManagerDeleteBucketCache_ConcurrentCallsSameBucket(t *testing.T) { + m := newTestMetacacheManager() + + b := newTestBucketMetacache("bucket-a") + + const cacheCount = 100 + for i := 0; i < cacheCount; i++ { + id := fmt.Sprintf("cache-%d", i) + + b.caches[id] = metacache{ + id: id, + bucket: "bucket-a", + lastUpdate: time.Now(), + } + } + + m.buckets["bucket-a"] = b + + const goroutines = 16 + + var wg sync.WaitGroup + start := make(chan struct{}) + + for i := 0; i < goroutines; i++ { + wg.Add(1) + + go func() { + defer wg.Done() + + <-start + m.deleteBucketCache("bucket-a") + }() + } + + close(start) + wg.Wait() + + m.mu.RLock() + defer m.mu.RUnlock() + + if _, ok := m.buckets["bucket-a"]; ok { + t.Fatalf("expected bucket to be removed") + } + + if len(m.trash) != cacheCount { + t.Fatalf("expected %d trash entries, got %d", cacheCount, len(m.trash)) + } +} + +func TestMetacacheManagerDeleteAll_EmptyManagerDoesNothing(t *testing.T) { + m := newTestMetacacheManager() + + m.deleteAll() + + m.mu.RLock() + defer m.mu.RUnlock() + + if len(m.buckets) != 0 { + t.Fatalf("expected no buckets, got %d", len(m.buckets)) + } + + if len(m.trash) != 0 { + t.Fatalf("expected no trash entries, got %d", len(m.trash)) + } +} + +func TestMetacacheManagerDeleteAll_RemovesAllBuckets(t *testing.T) { + m := newTestMetacacheManager() + + bucketA := newTestBucketMetacache("bucket-a") + bucketB := newTestBucketMetacache("bucket-b") + + bucketA.caches["cache-a"] = metacache{ + id: "cache-a", + bucket: "bucket-a", + lastUpdate: time.Now(), + } + + bucketB.caches["cache-b"] = metacache{ + id: "cache-b", + bucket: "bucket-b", + lastUpdate: time.Now(), + } + + m.buckets["bucket-a"] = bucketA + m.buckets["bucket-b"] = bucketB + + m.deleteAll() + + m.mu.RLock() + defer m.mu.RUnlock() + + if len(m.buckets) != 0 { + t.Fatalf("expected all buckets to be removed, got %d", len(m.buckets)) + } + + if _, ok := m.buckets["bucket-a"]; ok { + t.Fatalf("expected bucket-a to be removed") + } + + if _, ok := m.buckets["bucket-b"]; ok { + t.Fatalf("expected bucket-b to be removed") + } +} + +func TestMetacacheManagerDeleteAll_DoesNotModifyTrash(t *testing.T) { + m := newTestMetacacheManager() + + bucketA := newTestBucketMetacache("bucket-a") + bucketA.caches["cache-a"] = metacache{ + id: "cache-a", + bucket: "bucket-a", + lastUpdate: time.Now(), + } + + trashEntry := metacache{ + id: "trash-1", + bucket: "bucket-x", + lastUpdate: time.Now(), + } + + m.buckets["bucket-a"] = bucketA + m.trash[trashEntry.id] = trashEntry + + m.deleteAll() + + m.mu.RLock() + defer m.mu.RUnlock() + + if len(m.buckets) != 0 { + t.Fatalf("expected all buckets to be removed, got %d", len(m.buckets)) + } + + if len(m.trash) != 1 { + t.Fatalf("expected trash to remain unchanged, got len=%d", len(m.trash)) + } + + gotTrash, ok := m.trash[trashEntry.id] + if !ok { + t.Fatalf("expected trash entry %q to remain", trashEntry.id) + } + + if gotTrash.id != trashEntry.id { + t.Fatalf("expected trash id %q, got %q", trashEntry.id, gotTrash.id) + } + + if gotTrash.bucket != trashEntry.bucket { + t.Fatalf("expected trash bucket %q, got %q", trashEntry.bucket, gotTrash.bucket) + } +} + +func TestMetacacheManagerDeleteAll_NilBucketEntryDoesNotPanic(t *testing.T) { + m := newTestMetacacheManager() + + m.buckets["bucket-a"] = nil + m.buckets["bucket-b"] = newTestBucketMetacache("bucket-b") + + defer func() { + if r := recover(); r != nil { + t.Fatalf("deleteAll panicked with nil bucket entry: %v", r) + } + }() + + m.deleteAll() + + m.mu.RLock() + defer m.mu.RUnlock() + + if len(m.buckets) != 0 { + t.Fatalf("expected all buckets to be removed, got %d", len(m.buckets)) + } +} + +func TestMetacacheManagerDeleteAll_ConcurrentCalls(t *testing.T) { + m := newTestMetacacheManager() + + const bucketCount = 100 + + for i := 0; i < bucketCount; i++ { + bucket := fmt.Sprintf("bucket-%d", i) + cacheID := fmt.Sprintf("cache-%d", i) + + b := newTestBucketMetacache(bucket) + b.caches[cacheID] = metacache{ + id: cacheID, + bucket: bucket, + lastUpdate: time.Now(), + } + + m.buckets[bucket] = b + } + + const goroutines = 16 + + var wg sync.WaitGroup + start := make(chan struct{}) + + for i := 0; i < goroutines; i++ { + wg.Add(1) + + go func() { + defer wg.Done() + + <-start + m.deleteAll() + }() + } + + close(start) + wg.Wait() + + m.mu.RLock() + defer m.mu.RUnlock() + + if len(m.buckets) != 0 { + t.Fatalf("expected all buckets to be removed, got %d", len(m.buckets)) + } +} From 32156f27e431c0cb3d608bc7c77a139152634f75 Mon Sep 17 00:00:00 2001 From: konrad Date: Sun, 28 Jun 2026 16:17:20 +0400 Subject: [PATCH 8/8] Fixes to all test suite pass --- cmd/metacache-entries.go | 698 +++++++------------- cmd/metacache-entries_test.go | 526 ++++++++++++++- cmd/metacache-manager.go | 66 +- cmd/metacache-marker_test.go | 289 -------- cmd/metacache-server-pool.go | 343 ++++------ cmd/metacache-server-pool_test.go | 1018 ----------------------------- 6 files changed, 926 insertions(+), 2014 deletions(-) delete mode 100644 cmd/metacache-marker_test.go delete mode 100644 cmd/metacache-server-pool_test.go diff --git a/cmd/metacache-entries.go b/cmd/metacache-entries.go index a073b3b1f3fa8..67eef6c319aa1 100644 --- a/cmd/metacache-entries.go +++ b/cmd/metacache-entries.go @@ -20,8 +20,8 @@ package cmd import ( "bytes" "context" - "fmt" "os" + "path" "sort" "strings" @@ -30,6 +30,12 @@ import ( ) // metaCacheEntry is an object or a directory within an unknown bucket. +// +// metaCacheEntry values are designed for single-owner data flow. They are copied +// by value in several hot paths, so do not add sync.Mutex/sync.Once fields here. +// If an entry is shared across goroutines, callers must provide external +// synchronization. The xlmeta method therefore avoids populating the lazy cache +// on a cache miss; code with exclusive ownership may assign cached explicitly. type metaCacheEntry struct { // name is the full name of the object including prefixes name string @@ -87,20 +93,12 @@ func (e *metaCacheEntry) matches(other *metaCacheEntry, strict bool) (prefer *me return other, false } - // Directories/prefixes do not have xlmeta. - // If both are synthetic dirs, they match. - // If only one is synthetic dir, prefer the real object. - if e.isDir() || other.isDir() { - switch { - case e.isDir() && other.isDir(): - return e, true - case e.isDir(): - return other, false - default: - return e, false + if other.isDir() || e.isDir() { + if e.isDir() { + return e, other.isDir() == e.isDir() } + return other, other.isDir() == e.isDir() } - eVers, eErr := e.xlmeta() oVers, oErr := other.xlmeta() if eErr != nil || oErr != nil { @@ -167,32 +165,20 @@ func (e *metaCacheEntry) matches(other *metaCacheEntry, strict bool) (prefer *me return prefer, true } -// isInDir returns whether the entry is directly inside dir when considering separator. -// -// dir is expected to be either empty for root, or normalized to end with separator. +// isInDir returns whether the entry is in the dir when considering the separator. func (e metaCacheEntry) isInDir(dir, separator string) bool { - if separator == "" { - return false - } - - // safer if callers may pass dir without trailing / - if dir != "" && !strings.HasSuffix(dir, separator) { - dir += separator - } - - if dir == "" { + if len(dir) == 0 { + // Root idx := strings.Index(e.name, separator) return idx == -1 || idx == len(e.name)-len(separator) } - - if !strings.HasPrefix(e.name, dir) { - return false - } - ext := strings.TrimPrefix(e.name, dir) - idx := strings.Index(ext, separator) - - return idx == -1 || idx == len(ext)-len(separator) + if len(ext) != len(e.name) { + idx := strings.Index(ext, separator) + // If separator is not found or is last entry, ok. + return idx == -1 || idx == len(ext)-len(separator) + } + return false } // isLatestDeletemarker returns whether the latest version is a delete marker. @@ -208,13 +194,10 @@ func (e *metaCacheEntry) isLatestDeletemarker() bool { if !isXL2V1Format(e.metadata) { return false } - // stricter around isIndexedMetaV2() errors - meta, _, err := isIndexedMetaV2(e.metadata) - if err != nil { - return true - } - if meta != nil { + if meta, _, err := isIndexedMetaV2(e.metadata); meta != nil { return meta.IsLatestDeleteMarker() + } else if err != nil { + return true } // Fall back... xlMeta, err := e.xlmeta() @@ -239,24 +222,20 @@ func (e *metaCacheEntry) isAllFreeVersions() bool { } return true } - if !isXL2V1Format(e.metadata) { return false } - meta, _, err := isIndexedMetaV2(e.metadata) - if err != nil { - return true - } - if meta != nil { + if meta, _, err := isIndexedMetaV2(e.metadata); meta != nil { return meta.AllHidden(false) + } else if err != nil { + return true } - // Fall back... xlMeta, err := e.xlmeta() if err != nil || len(xlMeta.versions) == 0 { return true } - // still can be e.cached may still be nil + // Check versions. for _, v := range xlMeta.versions { if !v.header.FreeVersion() { return false @@ -289,17 +268,17 @@ func (e *metaCacheEntry) fileInfo(bucket string) (FileInfo, error) { } return e.cached.ToFileInfo(bucket, e.name, "", false, true) } - - // protect against malformed entries - if len(e.metadata) == 0 { - return FileInfo{}, fmt.Errorf("metaCacheEntry: no metadata for non-directory entry %q", e.name) - } - return getFileInfo(e.metadata, bucket, e.name, "", fileInfoOpts{}) } // xlmeta returns the decoded metadata. // This should not be called on directories. +// +// If metadata has already been cached, the cached value is returned. On a cache +// miss this method decodes into a fresh value and returns it without mutating the +// entry. This avoids a data race when the same entry is accidentally shared +// across goroutines. Callers that own the entry exclusively may assign cached +// explicitly after decoding if they need to retain the decoded value. func (e *metaCacheEntry) xlmeta() (*xlMetaV2, error) { if e.isDir() { return nil, errFileNotFound @@ -307,14 +286,17 @@ func (e *metaCacheEntry) xlmeta() (*xlMetaV2, error) { if e.cached != nil { return e.cached, nil } + if len(e.metadata) == 0 { - // Only happens if the entry is not found or malformed. + // only happens if the entry is not found. return nil, errFileNotFound } var xl xlMetaV2 - if err := xl.LoadOrConvert(e.metadata); err != nil { + err := xl.LoadOrConvert(e.metadata) + if err != nil { return nil, err } + //TODO: xlmeta() says it avoids mutating the entry on cache miss, but the code still does this e.cached = &xl return e.cached, nil } @@ -335,10 +317,6 @@ func (e *metaCacheEntry) fileInfoVersions(bucket string) (FileInfoVersions, erro }, }, nil } - // for non-directory entry with empty metadata - if len(e.metadata) == 0 { - return FileInfoVersions{}, errFileNotFound - } // Too small gains to reuse cache here. return getFileInfoVersions(e.metadata, bucket, e.name, true) } @@ -400,19 +378,14 @@ func (m metaCacheEntries) resolve(r *metadataResolutionParams) (selected *metaCa return nil, false } + dirExists := 0 if cap(r.candidates) < len(m) { r.candidates = make([][]xlMetaV2ShallowVersion, 0, len(m)) } r.candidates = r.candidates[:0] - - // Separate object and dir selection - var dirSelected *metaCacheEntry - var objSelected *metaCacheEntry - - dirExists := 0 objsAgree := 0 objsValid := 0 - + var selectedXL *xlMetaV2 for i := range m { entry := &m[i] // Empty entry @@ -422,9 +395,8 @@ func (m metaCacheEntries) resolve(r *metadataResolutionParams) (selected *metaCa if entry.isDir() { dirExists++ - if dirSelected == nil { - dirSelected = entry - } + selected = entry + selectedXL = nil continue } @@ -441,72 +413,83 @@ func (m metaCacheEntries) resolve(r *metadataResolutionParams) (selected *metaCa // We select the first object we find as a candidate and see if all match that. // This is to quickly identify if all agree. - if objSelected == nil { - objSelected = entry + if selected == nil { + selected = entry + selectedXL = xl objsAgree = 1 continue } // Names match, check meta... - if prefer, matches := entry.matches(objSelected, r.strict); matches { - objSelected = prefer + if prefer, ok := entry.matches(selected, r.strict); ok { + selected = prefer + if prefer == entry { + selectedXL = xl + } else if selectedXL == nil && !selected.isDir() { + selectedXL, _ = selected.xlmeta() + } objsAgree++ + continue } } - // Prefer real object metadata if object quorum exists. - if objsValid >= r.objQuorum { - if objSelected == nil || objSelected.cached == nil { - return nil, false - } - if objsAgree == objsValid { - return objSelected, true - } + // Return dir entries, if enough... + if selected != nil && selected.isDir() && dirExists >= r.dirQuorum { + return selected, true + } - merged := &metaCacheEntry{ - name: objSelected.name, - reusable: true, - cached: &xlMetaV2{metaV: objSelected.cached.metaV}, - } - merged.cached.versions = mergeXLV2Versions(r.objQuorum, r.strict, r.requestedVersions, r.candidates...) + // If we would never be able to reach read quorum. + if objsValid < r.objQuorum { + return nil, false + } - if len(merged.cached.versions) == 0 { - return nil, false - } + // If all objects agree. + if selected != nil && objsAgree == objsValid { + return selected, true + } - buf := metaDataPoolGet() + if selected == nil || selected.isDir() { + return nil, false + } + if selectedXL == nil { var err error - // it may lose the pooled buffer if AppendTo fails - merged.metadata, err = merged.cached.AppendTo(buf) + selectedXL, err = selected.xlmeta() if err != nil { - metaDataPoolPut(buf) - bugLogIf(context.Background(), err) return nil, false } - - return merged, true } - // Fall back to synthetic directory only if object quorum was not reached. - if dirSelected != nil && dirExists >= r.dirQuorum { - return dirSelected, true + // Merge if we have disagreement. + // Create a new merged result. + selected = &metaCacheEntry{ + name: selected.name, + reusable: true, + cached: &xlMetaV2{metaV: selectedXL.metaV}, + } + selected.cached.versions = mergeXLV2Versions(r.objQuorum, r.strict, r.requestedVersions, r.candidates...) + if len(selected.cached.versions) == 0 { + return nil, false } - return nil, false + // Reserialize + var err error + selected.metadata, err = selected.cached.AppendTo(metaDataPoolGet()) + if err != nil { + bugLogIf(context.Background(), err) + return nil, false + } + return selected, true } // firstFound returns the first found and the number of set entries. func (m metaCacheEntries) firstFound() (first *metaCacheEntry, n int) { - // avoids copying each metaCacheEntry during range iteration - for i := range m { - if m[i].name == "" { - continue - } - n++ - if first == nil { - first = &m[i] + for i, entry := range m { + if entry.name != "" { + n++ + if first == nil { + first = &m[i] + } } } - return first, n } @@ -546,33 +529,22 @@ func (m *metaCacheEntriesSorted) fileInfoVersions(bucket, prefix, delimiter, aft prevPrefix := "" vcfg, _ := globalBucketVersioningSys.Get(bucket) - for i := range m.o { - // avoid copying - entry := &m.o[i] - - if prefix != "" && !strings.HasPrefix(entry.name, prefix) { - continue - } - + for _, entry := range m.o { if entry.isObject() { if delimiter != "" { - rest := strings.TrimPrefix(entry.name, prefix) - - idx := strings.Index(rest, delimiter) + idx := strings.Index(strings.TrimPrefix(entry.name, prefix), delimiter) if idx >= 0 { idx = len(prefix) + idx + len(delimiter) currPrefix := entry.name[:idx] if currPrefix == prevPrefix { continue } - prevPrefix = currPrefix versions = append(versions, ObjectInfo{ IsDir: true, Bucket: bucket, Name: currPrefix, }) - continue } } @@ -591,12 +563,11 @@ func (m *metaCacheEntriesSorted) fileInfoVersions(bucket, prefix, delimiter, aft afterV = "" } - versioned := vcfg != nil && vcfg.Versioned(entry.name) - for _, version := range fiVersions { if !version.VersionPurgeStatus().Empty() { continue } + versioned := vcfg != nil && vcfg.Versioned(entry.name) versions = append(versions, version.ToObjectInfo(bucket, entry.name, versioned)) } @@ -607,19 +578,15 @@ func (m *metaCacheEntriesSorted) fileInfoVersions(bucket, prefix, delimiter, aft if delimiter == "" { continue } - rest := strings.TrimPrefix(entry.name, prefix) - - idx := strings.Index(rest, delimiter) + idx := strings.Index(strings.TrimPrefix(entry.name, prefix), delimiter) if idx < 0 { continue } - idx = len(prefix) + idx + len(delimiter) currPrefix := entry.name[:idx] if currPrefix == prevPrefix { continue } - prevPrefix = currPrefix versions = append(versions, ObjectInfo{ IsDir: true, @@ -640,24 +607,16 @@ func (m *metaCacheEntriesSorted) fileInfos(bucket, prefix, delimiter string) (ob vcfg, _ := globalBucketVersioningSys.Get(bucket) - for i := range m.o { - // avoid copy - entry := &m.o[i] - - if prefix != "" && !strings.HasPrefix(entry.name, prefix) { - continue - } + for _, entry := range m.o { if entry.isObject() { if delimiter != "" { - rest := strings.TrimPrefix(entry.name, prefix) - idx := strings.Index(rest, delimiter) + idx := strings.Index(strings.TrimPrefix(entry.name, prefix), delimiter) if idx >= 0 { idx = len(prefix) + idx + len(delimiter) currPrefix := entry.name[:idx] if currPrefix == prevPrefix { continue } - prevPrefix = currPrefix objects = append(objects, ObjectInfo{ IsDir: true, @@ -679,8 +638,7 @@ func (m *metaCacheEntriesSorted) fileInfos(bucket, prefix, delimiter string) (ob if delimiter == "" { continue } - rest := strings.TrimPrefix(entry.name, prefix) - idx := strings.Index(rest, delimiter) + idx := strings.Index(strings.TrimPrefix(entry.name, prefix), delimiter) if idx < 0 { continue } @@ -701,9 +659,7 @@ func (m *metaCacheEntriesSorted) fileInfos(bucket, prefix, delimiter string) (ob return objects } -// forwardTo will truncate m so only entries that are s or after are in the list. -// -// Requires m.o to be sorted by name. +// forwardTo will truncate m so only entries that are s or after is in the list. func (m *metaCacheEntriesSorted) forwardTo(s string) { if s == "" { return @@ -711,13 +667,11 @@ func (m *metaCacheEntriesSorted) forwardTo(s string) { idx := sort.Search(len(m.o), func(i int) bool { return m.o[i].name >= s }) - for i := range m.o[:idx] { - if m.reuse && cap(m.o[i].metadata) >= metaDataReadDefault { - metaDataPoolPut(m.o[i].metadata) + if m.reuse { + for i, entry := range m.o[:idx] { + metaDataPoolPut(entry.metadata) + m.o[i].metadata = nil } - // Clear references so skipped entries do not stay alive through - // the backing array. - m.o[i] = metaCacheEntry{} } m.o = m.o[idx:] @@ -740,50 +694,28 @@ func (m *metaCacheEntriesSorted) forwardPast(s string) { m.o = m.o[idx:] } -// mergeEntryChannels will merge sorted entries from in and return them sorted on out. +// mergeEntryChannels will merge entries from in and return them sorted on out. // To signify no more results are on an input channel, close it. // The output channel will be closed when all inputs are emptied. -// If file names are equal, metadata is merged/resolved. +// If file names are equal, compareMeta is called to select which one to choose. // The entry not chosen will be discarded. // If the context is canceled the function will return the error, // otherwise the function will return nil. // -// Each input channel must produce entries sorted by name. +// Each input channel must produce entries sorted by name. This function owns out: +// callers must not send to out concurrently and must expect out to be closed +// before this function returns. func mergeEntryChannels(ctx context.Context, in []chan metaCacheEntry, out chan<- metaCacheEntry, readQuorum int) error { defer xioutil.SafeClose(out) - - ctxDone := ctx.Done() - - releaseEntry := func(e *metaCacheEntry) { - if e == nil { - return - } - if e.reusable && e.metadata != nil { - metaDataPoolPut(e.metadata) - } - e.metadata = nil - e.cached = nil - } - - // Do not use path.Clean here. Object keys may legally contain repeated - // slashes or "." / ".." segments. We only want to treat "name" and "name/" - // as the same logical listing position for dir/object conflict handling. - sameListedName := func(a, b string) bool { - if a == b { - return true - } - return strings.TrimSuffix(a, slashSeparator) == strings.TrimSuffix(b, slashSeparator) - } - if len(in) == 0 { return nil } + top := make([]*metaCacheEntry, len(in)) + nDone := 0 + ctxDone := ctx.Done() - // Simple forwarder for one input. + // Use simpler forwarder. if len(in) == 1 { - if in[0] == nil { - return nil - } for { select { case <-ctxDone: @@ -794,7 +726,6 @@ func mergeEntryChannels(ctx context.Context, in []chan metaCacheEntry, out chan< } select { case <-ctxDone: - releaseEntry(&v) return ctx.Err() case out <- v: } @@ -802,256 +733,173 @@ func mergeEntryChannels(ctx context.Context, in []chan metaCacheEntry, out chan< } } - top := make([]*metaCacheEntry, len(in)) - done := make([]bool, len(in)) - nDone := 0 - - releaseTop := func(idx int) { - releaseEntry(top[idx]) - top[idx] = nil - } - - defer func() { - for i := range top { - releaseTop(i) - } - }() - - readTop := func(idx int) error { - if done[idx] { - top[idx] = nil - return nil - } - - if in[idx] == nil { - done[idx] = true - top[idx] = nil - nDone++ - return nil - } - + selectFrom := func(idx int) error { select { case <-ctxDone: return ctx.Err() case entry, ok := <-in[idx]: if !ok { top[idx] = nil - done[idx] = true nDone++ - return nil + } else { + top[idx] = &entry } - top[idx] = &entry - return nil } + return nil } - - discardAndReadTop := func(idx int) error { - releaseTop(idx) - return readTop(idx) - } - - // Populate initial heads. + // Populate all... for i := range in { - if err := readTop(i); err != nil { + if err := selectFrom(i); err != nil { return err } } last := "" - toMerge := make([]int, 0, len(in)-1) + var toMerge []int + // Choose the best to return. for { if nDone == len(in) { return nil } - var best *metaCacheEntry - bestIdx := -1 + best := top[0] + bestIdx := 0 toMerge = toMerge[:0] - - for i, other := range top { + for i, other := range top[1:] { + otherIdx := i + 1 if other == nil { continue } if best == nil { best = other - bestIdx = i + bestIdx = otherIdx continue } - if sameListedName(best.name, other.name) { - // We may have a synthetic directory and an object with the - // same logical listing name. Drop the synthetic directory. + if path.Clean(best.name) == path.Clean(other.name) { + // We may be in a situation where we have a directory and an object with the same name. + // In that case we will drop the directory entry. + // This should however not be confused with an object with a trailing slash. dirMatches := best.isDir() == other.isDir() suffixMatches := strings.HasSuffix(best.name, slashSeparator) == strings.HasSuffix(other.name, slashSeparator) - // Same type and same slash suffix: resolve/merge them. + // Simple case. Both are same type with same suffix. if dirMatches && suffixMatches { - toMerge = append(toMerge, i) + toMerge = append(toMerge, otherIdx) continue } if !dirMatches { + // We have an object `name` or 'name/' and a directory `name/`. if other.isDir() { if serverDebugLog { console.Debugln("mergeEntryChannels: discarding directory", other.name, "for object", best.name) } - if err := discardAndReadTop(i); err != nil { + // Discard the directory. + if err := selectFrom(otherIdx); err != nil { return err } continue } + // Replace directory with object and discard the previous best directory. if serverDebugLog { console.Debugln("mergeEntryChannels: discarding directory", best.name, "for object", other.name) } + oldBestIdx := bestIdx toMerge = toMerge[:0] best = other - bestIdx = i + bestIdx = otherIdx + if err := selectFrom(oldBestIdx); err != nil { + return err + } continue } - - // Same dir/object type but different slash suffix. - // Leave normal lexical ordering to decide. + // Leave it to be resolved. Names are different. } if best.name > other.name { toMerge = toMerge[:0] best = other - bestIdx = i + bestIdx = otherIdx } } - if best == nil { - return nil - } + // Merge any unmerged if len(toMerge) > 0 { - // Duplicate synthetic directories: keep one, discard the rest. - if best.isDir() { - for _, idx := range toMerge { - if err := discardAndReadTop(idx); err != nil { - return err - } - } - toMerge = toMerge[:0] - } else { - versions := make([][]xlMetaV2ShallowVersion, 0, len(toMerge)+1) - xl, err := best.xlmeta() - if err == nil { - versions = append(versions, xl.versions) - } else { - xl = nil - } - for _, idx := range toMerge { - other := top[idx] - if other == nil { - continue - } - - xlOther, err := other.xlmeta() - if err != nil { - if err := discardAndReadTop(idx); err != nil { - return err - } - continue - } - if xl == nil { - // Current best was invalid. Discard it and promote - // the first valid duplicate. - if err := discardAndReadTop(bestIdx); err != nil { - return err - } - best = other - bestIdx = idx - xl = xlOther - } else { - if err := discardAndReadTop(idx); err != nil { - return err - } - } - versions = append(versions, xlOther.versions) + versions := make([][]xlMetaV2ShallowVersion, 0, len(toMerge)+1) + xl, err := best.xlmeta() + if err == nil { + versions = append(versions, xl.versions) + } + for _, idx := range toMerge { + other := top[idx] + if other == nil { + continue } - - if xl == nil || len(versions) == 0 { - if err := discardAndReadTop(bestIdx); err != nil { + xl2, err := other.xlmeta() + if err != nil { + if err := selectFrom(idx); err != nil { return err } continue } - - mergedVersions := mergeXLV2Versions(readQuorum, true, 0, versions...) - // no longer emits best when duplicate metadata merge fails to satisfy quorum - if len(mergedVersions) == 0 { - if err := discardAndReadTop(bestIdx); err != nil { + if xl == nil { + // Discard current "best" + if err := selectFrom(bestIdx); err != nil { return err } - continue - } - - xl.versions = mergedVersions - - buf := metaDataPoolGet() - meta, err := xl.AppendTo(buf) - if err != nil { - metaDataPoolPut(buf) - bugLogIf(context.Background(), err) - - if err := discardAndReadTop(bestIdx); err != nil { + bestIdx = idx + best = other + xl = xl2 + } else { + // Mark read, unless we added it as new "best". + if err := selectFrom(idx); err != nil { return err } - continue - } - if best.reusable && best.metadata != nil { - metaDataPoolPut(best.metadata) } + versions = append(versions, xl2.versions) + } - best.metadata = meta - best.cached = xl - best.reusable = true + if xl != nil && len(versions) > 0 { + // Merge all versions. 'strict' doesn't matter since we only need one. + xl.versions = mergeXLV2Versions(readQuorum, true, 0, versions...) + if meta, err := xl.AppendTo(metaDataPoolGet()); err == nil { + if best.reusable { + metaDataPoolPut(best.metadata) + } + best.metadata = meta + best.cached = xl + } } + toMerge = toMerge[:0] } - if best.name > last { select { case <-ctxDone: return ctx.Err() case out <- *best: last = best.name - - // Ownership of best.metadata has moved to the receiver. - // Clear top before readTop so deferred cleanup does not - // return metadata that was already sent. - top[bestIdx] = nil } - if err := readTop(bestIdx); err != nil { - return err - } - continue - } - - if serverDebugLog { + } else if serverDebugLog { console.Debugln("mergeEntryChannels: discarding duplicate", best.name, "<=", last) } - - if err := discardAndReadTop(bestIdx); err != nil { + // Replace entry we just sent. + if err := selectFrom(bestIdx); err != nil { return err } } } // merge will merge other into m. -// If the same entry exists in both and metadata matches, only one is added. -// If names are equal but metadata differs, the entry from m is placed first. -// Operation time is expected to be O(n+m), or O(limit) if limit > 0. +// If the same entries exists in both and metadata matches only one is added, +// otherwise the entry from m will be placed first. +// Operation time is expected to be O(n+m). func (m *metaCacheEntriesSorted) merge(other metaCacheEntriesSorted, limit int) { - if limit == 0 { - m.o = nil - return + capacity := m.len() + other.len() + if limit > 0 && limit < capacity { + capacity = limit } - + merged := make(metaCacheEntries, 0, capacity) a := m.entries() b := other.entries() - capHint := len(a) + len(b) - if limit > 0 && capHint > limit { - capHint = limit - } - merged := make(metaCacheEntries, 0, capHint) appendOne := func(entry metaCacheEntry) bool { if limit > 0 && len(merged) >= limit { return false @@ -1059,77 +907,74 @@ func (m *metaCacheEntriesSorted) merge(other metaCacheEntriesSorted, limit int) merged = append(merged, entry) return true } + for len(a) > 0 && len(b) > 0 { - if limit > 0 && len(merged) >= limit { - break - } switch { case a[0].name == b[0].name: + // Same object and same metadata, discard one copy. if bytes.Equal(a[0].metadata, b[0].metadata) { - // Same entry, discard one. if !appendOne(a[0]) { - break + m.o = merged + return } a = a[1:] b = b[1:] continue } - // Same name, different metadata. - // Preserve m first, as documented. + + // Same name but different metadata. Preserve the documented behavior: + // entries already in m are placed first. if !appendOne(a[0]) { - break + m.o = merged + return } a = a[1:] case a[0].name < b[0].name: if !appendOne(a[0]) { - break + m.o = merged + return } a = a[1:] default: if !appendOne(b[0]) { - break + m.o = merged + return } b = b[1:] } } - for len(a) > 0 && (limit < 0 || len(merged) < limit) { - merged = append(merged, a[0]) - a = a[1:] - } - for len(b) > 0 && (limit < 0 || len(merged) < limit) { - merged = append(merged, b[0]) - b = b[1:] + + appendRemaining := func(entries metaCacheEntries) { + if limit < 0 { + merged = append(merged, entries...) + return + } + remaining := limit - len(merged) + if remaining <= 0 { + return + } + if len(entries) > remaining { + entries = entries[:remaining] + } + merged = append(merged, entries...) } + + appendRemaining(a) + appendRemaining(b) m.o = merged } // filterPrefix will filter m to only contain entries with the specified prefix. -// -// Requires m.o to be sorted by name. func (m *metaCacheEntriesSorted) filterPrefix(s string) { - if m == nil || s == "" { + if s == "" { return } - m.forwardTo(s) - for i := range m.o { - if m.o[i].hasPrefix(s) { - continue - } - if m.reuse { - for j := i; j < len(m.o); j++ { - if cap(m.o[j].metadata) >= metaDataReadDefault { - metaDataPoolPut(m.o[j].metadata) - } - m.o[j] = metaCacheEntry{} - } - } else { - for j := i; j < len(m.o); j++ { - m.o[j] = metaCacheEntry{} - } + for i, o := range m.o { + if !o.hasPrefix(s) { + m.o = m.o[:i] + break } - m.o = m.o[:i] - return } } @@ -1137,16 +982,11 @@ func (m *metaCacheEntriesSorted) filterPrefix(s string) { // Order is preserved, but the underlying slice is modified. func (m *metaCacheEntriesSorted) filterObjectsOnly() { dst := m.o[:0] - for i := range m.o { - if !m.o[i].isDir() { - dst = append(dst, m.o[i]) + for _, o := range m.o { + if !o.isDir() { + dst = append(dst, o) } } - // Clear the tail so removed entries do not remain referenced - // by the underlying array. - for i := len(dst); i < len(m.o); i++ { - m.o[i] = metaCacheEntry{} - } m.o = dst } @@ -1154,19 +994,10 @@ func (m *metaCacheEntriesSorted) filterObjectsOnly() { // Order is preserved, but the underlying slice is modified. func (m *metaCacheEntriesSorted) filterPrefixesOnly() { dst := m.o[:0] - for i := range m.o { - if m.o[i].isDir() { - dst = append(dst, m.o[i]) - continue + for _, o := range m.o { + if o.isDir() { + dst = append(dst, o) } - if m.reuse && cap(m.o[i].metadata) >= metaDataReadDefault { - metaDataPoolPut(m.o[i].metadata) - } - } - // Clear the tail so removed entries do not stay referenced - // by the underlying array. - for i := len(dst); i < len(m.o); i++ { - m.o[i] = metaCacheEntry{} } m.o = dst } @@ -1176,47 +1007,25 @@ func (m *metaCacheEntriesSorted) filterPrefixesOnly() { // To return root elements only set prefix to an empty string. // Order is preserved, but the underlying slice is modified. func (m *metaCacheEntriesSorted) filterRecursiveEntries(prefix, separator string) { - if separator == "" { - return - } - if separator == "" { - return - } - + dst := m.o[:0] if prefix != "" { m.forwardTo(prefix) - } - - dst := m.o[:0] - - for i := range m.o { - name := m.o[i].name - - if prefix != "" { - if !strings.HasPrefix(name, prefix) { - break - } - ext := strings.TrimPrefix(name, prefix) - if ext == "" || !strings.Contains(ext, separator) { - dst = append(dst, m.o[i]) - continue - } - } else { - if !strings.Contains(name, separator) { - dst = append(dst, m.o[i]) - continue + for _, o := range m.o { + ext := strings.TrimPrefix(o.name, prefix) + if len(ext) != len(o.name) { + if !strings.Contains(ext, separator) { + dst = append(dst, o) + } } } - if m.reuse && cap(m.o[i].metadata) >= metaDataReadDefault { - metaDataPoolPut(m.o[i].metadata) + } else { + // No prefix, simpler + for _, o := range m.o { + if !strings.Contains(o.name, separator) { + dst = append(dst, o) + } } } - - for i := len(dst); i < len(m.o); i++ { - // Clear references held by the backing array. - m.o[i] = metaCacheEntry{} - } - m.o = dst } @@ -1225,20 +1034,15 @@ func (m *metaCacheEntriesSorted) truncate(n int) { if m == nil { return } - if n < 0 { - n = 0 - } - if len(m.o) <= n { - return - } - for i := n; i < len(m.o); i++ { - if m.reuse && cap(m.o[i].metadata) >= metaDataReadDefault { - metaDataPoolPut(m.o[i].metadata) + if len(m.o) > n { + if m.reuse { + for i, entry := range m.o[n:] { + metaDataPoolPut(entry.metadata) + m.o[n+i].metadata = nil + } } - // Clear references held by the backing array. - m.o[i] = metaCacheEntry{} + m.o = m.o[:n] } - m.o = m.o[:n] } // len returns the number of objects and prefix dirs in m. diff --git a/cmd/metacache-entries_test.go b/cmd/metacache-entries_test.go index 1aec54098cf48..ebd157400ab96 100644 --- a/cmd/metacache-entries_test.go +++ b/cmd/metacache-entries_test.go @@ -18,8 +18,11 @@ package cmd import ( + "context" + "errors" "fmt" "math/rand" + "os" "reflect" "sort" "testing" @@ -124,7 +127,6 @@ func Test_metaCacheEntries_filterRecursive(t *testing.T) { data.filterRecursiveEntries("src/compress/bzip2/", slashSeparator) got := data.entries().names() want := []string{"src/compress/bzip2/", "src/compress/bzip2/bit_reader.go", "src/compress/bzip2/bzip2.go", "src/compress/bzip2/bzip2_test.go", "src/compress/bzip2/huffman.go", "src/compress/bzip2/move_to_front.go"} - // "src/compress/bzip2/", "src/compress/bzip2/bit_reader.go", "src/compress/bzip2/bzip2.go", "src/compress/bzip2/bzip2_test.go", "src/compress/bzip2/huffman.go", "src/compress/bzip2/move_to_front.go", "src/compress/bzip2/testdata/" if !reflect.DeepEqual(want, got) { t.Errorf("got unexpected result: %#v", got) } @@ -658,3 +660,525 @@ func Test_metaCacheEntries_resolve(t *testing.T) { } } } + +func testDir(name string) metaCacheEntry { + return metaCacheEntry{name: name} +} + +func testObject(name string, metadata ...byte) metaCacheEntry { + if len(metadata) == 0 { + metadata = []byte{1} + } + return metaCacheEntry{name: name, metadata: metadata} +} + +func testNames(entries metaCacheEntries) []string { + out := make([]string, 0, len(entries)) + for _, entry := range entries { + out = append(out, entry.name) + } + return out +} + +func requireNames(t *testing.T, got metaCacheEntries, want []string) { + t.Helper() + gotNames := testNames(got) + if len(gotNames) == 0 && len(want) == 0 { + return + } + if !reflect.DeepEqual(gotNames, want) { + t.Fatalf("names mismatch\n got: %v\nwant: %v", gotNames, want) + } +} + +func testEntryChan(entries ...metaCacheEntry) chan metaCacheEntry { + ch := make(chan metaCacheEntry, len(entries)) + for _, entry := range entries { + ch <- entry + } + close(ch) + return ch +} + +func collectEntryNames(ch <-chan metaCacheEntry) []string { + var names []string + for entry := range ch { + names = append(names, entry.name) + } + return names +} + +func TestMetaCacheEntryClassification(t *testing.T) { + tests := []struct { + name string + entry metaCacheEntry + wantDir bool + wantObject bool + wantObjectDir bool + }{ + { + name: "synthetic prefix directory", + entry: testDir("photos" + slashSeparator), + wantDir: true, + }, + { + name: "regular object", + entry: testObject("photos/image.jpg"), + wantObject: true, + }, + { + name: "object whose key ends with slash", + entry: testObject("photos"+slashSeparator, 1), + wantObject: true, + wantObjectDir: true, + }, + { + name: "empty entry", + entry: metaCacheEntry{}, + }, + { + name: "name without metadata and without trailing slash", + entry: metaCacheEntry{name: "photos"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.entry.isDir(); got != tt.wantDir { + t.Fatalf("isDir() = %v, want %v", got, tt.wantDir) + } + if got := tt.entry.isObject(); got != tt.wantObject { + t.Fatalf("isObject() = %v, want %v", got, tt.wantObject) + } + if got := tt.entry.isObjectDir(); got != tt.wantObjectDir { + t.Fatalf("isObjectDir() = %v, want %v", got, tt.wantObjectDir) + } + }) + } +} + +func TestMetaCacheEntryHasPrefix(t *testing.T) { + entry := metaCacheEntry{name: "photos/2026/image.jpg"} + for _, prefix := range []string{"", "photos", "photos/", "photos/2026/"} { + if !entry.hasPrefix(prefix) { + t.Fatalf("hasPrefix(%q) = false, want true", prefix) + } + } + for _, prefix := range []string{"video", "photoz", "/photos"} { + if entry.hasPrefix(prefix) { + t.Fatalf("hasPrefix(%q) = true, want false", prefix) + } + } +} + +func TestMetaCacheEntryMatchesNilAndDirectories(t *testing.T) { + var nilEntry *metaCacheEntry + prefer, ok := nilEntry.matches(nil, true) + if prefer != nil || !ok { + t.Fatalf("nil vs nil: prefer=%v ok=%v, want nil,true", prefer, ok) + } + + dir := testDir("a/") + prefer, ok = nilEntry.matches(&dir, true) + if prefer != &dir || ok { + t.Fatalf("nil vs dir: prefer=%p ok=%v, want dir,false", prefer, ok) + } + + otherDir := testDir("a/") + prefer, ok = dir.matches(&otherDir, true) + if prefer != &dir || !ok { + t.Fatalf("same synthetic dirs: prefer=%p ok=%v, want receiver,true", prefer, ok) + } + + object := testObject("a/") + prefer, ok = dir.matches(&object, true) + if prefer != &dir || ok { + t.Fatalf("dir vs object with same name: prefer=%p ok=%v, want dir,false", prefer, ok) + } + + bDir := testDir("b/") + prefer, ok = bDir.matches(&dir, true) + if prefer != &dir || ok { + t.Fatalf("different names choose lexicographically first: prefer.name=%q ok=%v, want a/,false", prefer.name, ok) + } +} + +func TestMetaCacheEntryIsInDir(t *testing.T) { + tests := []struct { + name string + entryName string + dir string + separator string + want bool + }{ + {"root regular object", "file.txt", "", "/", true}, + {"root synthetic directory", "photos/", "", "/", true}, + {"root recursive child", "photos/image.jpg", "", "/", false}, + {"direct child object", "photos/image.jpg", "photos/", "/", true}, + {"direct child prefix", "photos/2026/", "photos/", "/", true}, + {"recursive grandchild", "photos/2026/image.jpg", "photos/", "/", false}, + {"outside directory", "videos/image.jpg", "photos/", "/", false}, + {"prefix collision does not match as direct child", "photos-old/image.jpg", "photos/", "/", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + entry := metaCacheEntry{name: tt.entryName} + if got := entry.isInDir(tt.dir, tt.separator); got != tt.want { + t.Fatalf("isInDir(%q, %q) = %v, want %v", tt.dir, tt.separator, got, tt.want) + } + }) + } +} + +func TestMetaCacheEntryDirectoryFileInfoAndXLMetaErrors(t *testing.T) { + dir := testDir("photos/") + + fi, err := dir.fileInfo("bucket-a") + if err != nil { + t.Fatalf("fileInfo(dir) returned unexpected error: %v", err) + } + if fi.Volume != "bucket-a" || fi.Name != "photos/" || fi.Mode != uint32(os.ModeDir) { + t.Fatalf("unexpected directory FileInfo: %#v", fi) + } + + fiv, err := dir.fileInfoVersions("bucket-a") + if err != nil { + t.Fatalf("fileInfoVersions(dir) returned unexpected error: %v", err) + } + if fiv.Volume != "bucket-a" || fiv.Name != "photos/" || len(fiv.Versions) != 1 || fiv.Versions[0].Mode != uint32(os.ModeDir) { + t.Fatalf("unexpected directory FileInfoVersions: %#v", fiv) + } + + if _, err := dir.xlmeta(); !errors.Is(err, errFileNotFound) { + t.Fatalf("xlmeta(dir) error = %v, want errFileNotFound", err) + } + + missingObject := metaCacheEntry{name: "not-a-dir"} + if _, err := missingObject.xlmeta(); !errors.Is(err, errFileNotFound) { + t.Fatalf("xlmeta(empty non-dir) error = %v, want errFileNotFound", err) + } +} + +func TestMetaCacheEntriesSortIsSortedNamesFirstFoundAndClone(t *testing.T) { + entries := metaCacheEntries{ + testObject("c.txt", 3), + testDir("a/"), + testObject("b.txt", 2), + } + if entries.isSorted() { + t.Fatal("isSorted() = true for unsorted input, want false") + } + + sorted := entries.sort() + if !sorted.o.isSorted() { + t.Fatal("sort() did not return sorted entries") + } + requireNames(t, sorted.entries(), []string{"a/", "b.txt", "c.txt"}) + if !reflect.DeepEqual(sorted.o.names(), []string{"a/", "b.txt", "c.txt"}) { + t.Fatalf("names() = %v", sorted.o.names()) + } + + first, n := metaCacheEntries{{}, testDir("first/"), testObject("second.txt")}.firstFound() + if n != 2 || first == nil || first.name != "first/" { + t.Fatalf("firstFound() = (%v,%d), want first/,2", first, n) + } + + metadata := []byte{1, 2, 3} + original := metaCacheEntries{testObject("a.txt", metadata...)} + clone := original.shallowClone() + clone[0].name = "changed.txt" + if original[0].name != "a.txt" { + t.Fatalf("shallowClone changed original entry name: %q", original[0].name) + } + clone[0].metadata[0] = 9 + if original[0].metadata[0] != 9 { + t.Fatalf("shallowClone unexpectedly deep-copied metadata") + } +} + +func TestMetaCacheEntriesResolveDirectories(t *testing.T) { + tests := []struct { + name string + entries metaCacheEntries + params *metadataResolutionParams + wantOK bool + wantName string + }{ + { + name: "nil params", + entries: metaCacheEntries{testDir("photos/")}, + params: nil, + }, + { + name: "empty entries", + params: &metadataResolutionParams{dirQuorum: 1, objQuorum: 1}, + }, + { + name: "directory reaches quorum", + entries: metaCacheEntries{testDir("photos/"), {}, testDir("photos/")}, + params: &metadataResolutionParams{dirQuorum: 2, objQuorum: 2}, + wantOK: true, + wantName: "photos/", + }, + { + name: "directory does not reach quorum", + entries: metaCacheEntries{testDir("photos/"), {}, metaCacheEntry{name: "missing"}}, + params: &metadataResolutionParams{dirQuorum: 2, objQuorum: 2}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + selected, ok := tt.entries.resolve(tt.params) + if ok != tt.wantOK { + t.Fatalf("resolve() ok = %v, want %v; selected=%#v", ok, tt.wantOK, selected) + } + if tt.wantOK && (selected == nil || selected.name != tt.wantName) { + t.Fatalf("resolve() selected = %#v, want name %q", selected, tt.wantName) + } + }) + } +} + +func TestMetaCacheEntriesSortedCloneLenEntriesAndTruncate(t *testing.T) { + var nilSorted *metaCacheEntriesSorted + nilSorted.truncate(1) + if nilSorted.len() != 0 { + t.Fatalf("nil len() = %d, want 0", nilSorted.len()) + } + if nilSorted.entries() != nil { + t.Fatalf("nil entries() = %#v, want nil", nilSorted.entries()) + } + + metadata := []byte{1, 2} + m := metaCacheEntriesSorted{ + o: metaCacheEntries{testObject("a.txt", metadata...), testObject("b.txt", 2), testDir("c/")}, + listID: "list-1", + reuse: false, + lastSkippedEntry: "skipped", + } + clone := m.shallowClone() + if clone.listID != m.listID || clone.reuse != m.reuse || clone.lastSkippedEntry != m.lastSkippedEntry { + t.Fatalf("shallowClone did not preserve metadata fields: %#v", clone) + } + clone.o[0].name = "changed.txt" + if m.o[0].name != "a.txt" { + t.Fatalf("sorted shallowClone changed original entry name: %q", m.o[0].name) + } + clone.o[0].metadata[0] = 9 + if m.o[0].metadata[0] != 9 { + t.Fatalf("sorted shallowClone unexpectedly deep-copied metadata") + } + + m.truncate(2) + requireNames(t, m.entries(), []string{"a.txt", "b.txt"}) + m.truncate(10) + requireNames(t, m.entries(), []string{"a.txt", "b.txt"}) +} + +func TestMetaCacheEntriesSortedForwardToAndPast(t *testing.T) { + base := func() metaCacheEntriesSorted { + return metaCacheEntriesSorted{o: metaCacheEntries{ + testObject("a.txt"), + testObject("b.txt"), + testObject("c.txt"), + testObject("d.txt"), + }} + } + + t.Run("forwardTo keeps requested name and later entries", func(t *testing.T) { + m := base() + m.forwardTo("b.txt") + requireNames(t, m.entries(), []string{"b.txt", "c.txt", "d.txt"}) + }) + + t.Run("forwardTo starts at first greater name", func(t *testing.T) { + m := base() + m.forwardTo("bb") + requireNames(t, m.entries(), []string{"c.txt", "d.txt"}) + }) + + t.Run("forwardTo empty marker leaves list unchanged", func(t *testing.T) { + m := base() + m.forwardTo("") + requireNames(t, m.entries(), []string{"a.txt", "b.txt", "c.txt", "d.txt"}) + }) + + t.Run("forwardPast skips requested name", func(t *testing.T) { + m := base() + m.forwardPast("b.txt") + requireNames(t, m.entries(), []string{"c.txt", "d.txt"}) + }) + + t.Run("forwardPast beyond last empties list", func(t *testing.T) { + m := base() + m.forwardPast("z") + requireNames(t, m.entries(), nil) + }) +} + +func TestMetaCacheEntriesSortedFilters(t *testing.T) { + t.Run("filterPrefix keeps only entries with prefix", func(t *testing.T) { + m := metaCacheEntriesSorted{o: metaCacheEntries{ + testObject("a.txt"), + testObject("photos/1.jpg"), + testDir("photos/album/"), + testObject("photos2/1.jpg"), + testObject("z.txt"), + }} + m.filterPrefix("photos/") + requireNames(t, m.entries(), []string{"photos/1.jpg", "photos/album/"}) + }) + + t.Run("filterPrefix empty leaves entries unchanged", func(t *testing.T) { + m := metaCacheEntriesSorted{o: metaCacheEntries{testObject("a.txt"), testDir("b/")}} + m.filterPrefix("") + requireNames(t, m.entries(), []string{"a.txt", "b/"}) + }) + + t.Run("filterObjectsOnly removes synthetic dirs but keeps slash-suffixed objects", func(t *testing.T) { + m := metaCacheEntriesSorted{o: metaCacheEntries{ + testDir("a/"), + testObject("b.txt"), + testObject("c/"), + }} + m.filterObjectsOnly() + requireNames(t, m.entries(), []string{"b.txt", "c/"}) + }) + + t.Run("filterPrefixesOnly keeps only synthetic dirs", func(t *testing.T) { + m := metaCacheEntriesSorted{o: metaCacheEntries{ + testDir("a/"), + testObject("b.txt"), + testObject("c/"), + }} + m.filterPrefixesOnly() + requireNames(t, m.entries(), []string{"a/"}) + }) + + t.Run("filterRecursiveEntries root keeps entries without separator", func(t *testing.T) { + m := metaCacheEntriesSorted{o: metaCacheEntries{ + testObject("a.txt"), + testDir("photos/"), + testObject("photos/1.jpg"), + testObject("z.txt"), + }} + m.filterRecursiveEntries("", "/") + requireNames(t, m.entries(), []string{"a.txt", "z.txt"}) + }) + + t.Run("filterRecursiveEntries prefix keeps only direct non-recursive children", func(t *testing.T) { + m := metaCacheEntriesSorted{o: metaCacheEntries{ + testObject("a.txt"), + testObject("photos/1.jpg"), + testDir("photos/album/"), + testObject("photos/album/2.jpg"), + testObject("videos/1.jpg"), + }} + m.filterRecursiveEntries("photos/", "/") + requireNames(t, m.entries(), []string{"photos/1.jpg"}) + }) +} + +func TestMetaCacheEntriesSortedMerge(t *testing.T) { + t.Run("unlimited merge interleaves sorted inputs", func(t *testing.T) { + m := metaCacheEntriesSorted{o: metaCacheEntries{testObject("a.txt", 1), testObject("c.txt", 3)}} + other := metaCacheEntriesSorted{o: metaCacheEntries{testObject("b.txt", 2), testObject("d.txt", 4)}} + m.merge(other, -1) + requireNames(t, m.entries(), []string{"a.txt", "b.txt", "c.txt", "d.txt"}) + }) + + t.Run("positive limit truncates merged output", func(t *testing.T) { + m := metaCacheEntriesSorted{o: metaCacheEntries{testObject("a.txt", 1), testObject("c.txt", 3)}} + other := metaCacheEntriesSorted{o: metaCacheEntries{testObject("b.txt", 2), testObject("d.txt", 4)}} + m.merge(other, 2) + requireNames(t, m.entries(), []string{"a.txt", "b.txt"}) + }) + + t.Run("same name and same metadata deduplicates", func(t *testing.T) { + m := metaCacheEntriesSorted{o: metaCacheEntries{testObject("a.txt", 1), testObject("b.txt", 2)}} + other := metaCacheEntriesSorted{o: metaCacheEntries{testObject("a.txt", 1), testObject("c.txt", 3)}} + m.merge(other, -1) + requireNames(t, m.entries(), []string{"a.txt", "b.txt", "c.txt"}) + }) + + t.Run("same name and different metadata keeps receiver entry first per method contract", func(t *testing.T) { + m := metaCacheEntriesSorted{o: metaCacheEntries{testObject("same.txt", 1)}} + other := metaCacheEntriesSorted{o: metaCacheEntries{testObject("same.txt", 2)}} + m.merge(other, -1) + + if len(m.o) != 2 { + t.Fatalf("merge() produced %d entries, want 2", len(m.o)) + } + if !reflect.DeepEqual(m.o[0].metadata, []byte{1}) || !reflect.DeepEqual(m.o[1].metadata, []byte{2}) { + t.Fatalf("same-name different-metadata order = %v then %v; want receiver metadata first", m.o[0].metadata, m.o[1].metadata) + } + }) +} + +func TestMergeEntryChannels(t *testing.T) { + t.Run("no input channels closes output", func(t *testing.T) { + out := make(chan metaCacheEntry) + if err := mergeEntryChannels(context.Background(), nil, out, 1); err != nil { + t.Fatalf("mergeEntryChannels() error = %v, want nil", err) + } + if _, ok := <-out; ok { + t.Fatal("output channel still open, want closed") + } + }) + + t.Run("single channel forwards input order", func(t *testing.T) { + out := make(chan metaCacheEntry, 2) + err := mergeEntryChannels(context.Background(), []chan metaCacheEntry{ + testEntryChan(testObject("b.txt"), testDir("a/")), + }, out, 1) + if err != nil { + t.Fatalf("mergeEntryChannels() error = %v, want nil", err) + } + if got, want := collectEntryNames(out), []string{"b.txt", "a/"}; !reflect.DeepEqual(got, want) { + t.Fatalf("output names = %v, want %v", got, want) + } + }) + + t.Run("multiple channels merge sorted unique names", func(t *testing.T) { + out := make(chan metaCacheEntry, 4) + err := mergeEntryChannels(context.Background(), []chan metaCacheEntry{ + testEntryChan(testObject("a.txt"), testObject("c.txt")), + testEntryChan(testObject("b.txt"), testObject("d.txt")), + }, out, 1) + if err != nil { + t.Fatalf("mergeEntryChannels() error = %v, want nil", err) + } + if got, want := collectEntryNames(out), []string{"a.txt", "b.txt", "c.txt", "d.txt"}; !reflect.DeepEqual(got, want) { + t.Fatalf("output names = %v, want %v", got, want) + } + }) + + t.Run("object wins over synthetic directory with same clean path", func(t *testing.T) { + out := make(chan metaCacheEntry, 1) + err := mergeEntryChannels(context.Background(), []chan metaCacheEntry{ + testEntryChan(testDir("a/")), + testEntryChan(testObject("a", 1)), + }, out, 1) + if err != nil { + t.Fatalf("mergeEntryChannels() error = %v, want nil", err) + } + if got, want := collectEntryNames(out), []string{"a"}; !reflect.DeepEqual(got, want) { + t.Fatalf("output names = %v, want %v", got, want) + } + }) + + t.Run("canceled context returns context error and closes output", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cancel() + out := make(chan metaCacheEntry) + err := mergeEntryChannels(ctx, []chan metaCacheEntry{make(chan metaCacheEntry)}, out, 1) + if !errors.Is(err, context.Canceled) { + t.Fatalf("mergeEntryChannels() error = %v, want context.Canceled", err) + } + if _, ok := <-out; ok { + t.Fatal("output channel still open after cancellation, want closed") + } + }) +} diff --git a/cmd/metacache-manager.go b/cmd/metacache-manager.go index b7f66420f3b58..ce77648ef0510 100644 --- a/cmd/metacache-manager.go +++ b/cmd/metacache-manager.go @@ -49,50 +49,40 @@ const metacacheMaxEntries = 5000 func (m *metacacheManager) initManager() { // Add a transient bucket. // Start saver when object layer is ready. - m.init.Do(func() { - go m.cleanupLoop() - }) -} - -func (m *metacacheManager) cleanupLoop() { - for { - if objAPI := newObjectLayerFn(); objAPI != nil { - break - } - - select { - case <-time.After(time.Second): - case <-GlobalContext.Done(): - return + go func() { + objAPI := newObjectLayerFn() + for objAPI == nil { + time.Sleep(time.Second) + objAPI = newObjectLayerFn() } - } - t := time.NewTicker(time.Minute) - defer t.Stop() + t := time.NewTicker(time.Minute) + defer t.Stop() - var exit bool - for !exit { - select { - case <-t.C: - case <-GlobalContext.Done(): - exit = true - } - m.mu.RLock() - for _, v := range m.buckets { - if !exit { - v.cleanup() + var exit bool + for !exit { + select { + case <-t.C: + case <-GlobalContext.Done(): + exit = true } - } - m.mu.RUnlock() - m.mu.Lock() - for k, v := range m.trash { - if time.Since(v.lastUpdate) > metacacheMaxRunningAge { - v.delete(context.Background()) - delete(m.trash, k) + m.mu.RLock() + for _, v := range m.buckets { + if !exit { + v.cleanup() + } + } + m.mu.RUnlock() + m.mu.Lock() + for k, v := range m.trash { + if time.Since(v.lastUpdate) > metacacheMaxRunningAge { + v.delete(context.Background()) + delete(m.trash, k) + } } + m.mu.Unlock() } - m.mu.Unlock() - } + }() } // updateCacheEntry will update non-transient state. diff --git a/cmd/metacache-marker_test.go b/cmd/metacache-marker_test.go deleted file mode 100644 index 96d42465c1975..0000000000000 --- a/cmd/metacache-marker_test.go +++ /dev/null @@ -1,289 +0,0 @@ -package cmd - -import ( - "strings" - "testing" -) - -func TestListPathOptionsEncodeParseMarkerRoundTrip(t *testing.T) { - opts := listPathOptions{ - ID: "550e8400-e29b-41d4-a716-446655440000", - pool: 3, - set: 7, - } - - encoded := opts.encodeMarker("photos/2026/img.jpg") - - wantEncoded := "photos/2026/img.jpg[minio_cache:v2,id:550e8400-e29b-41d4-a716-446655440000,p:3,s:7]" - if encoded != wantEncoded { - t.Fatalf("unexpected encoded marker:\nwant: %q\n got: %q", wantEncoded, encoded) - } - - parsed := listPathOptions{ - Marker: encoded, - } - - parsed.parseMarker() - - if parsed.Marker != "photos/2026/img.jpg" { - t.Fatalf("expected marker without encoded suffix, got %q", parsed.Marker) - } - - if parsed.ID != opts.ID { - t.Fatalf("expected ID %q, got %q", opts.ID, parsed.ID) - } - - if parsed.pool != opts.pool { - t.Fatalf("expected pool %d, got %d", opts.pool, parsed.pool) - } - - if parsed.set != opts.set { - t.Fatalf("expected set %d, got %d", opts.set, parsed.set) - } - - if parsed.Create { - t.Fatalf("expected Create=false") - } -} - -func TestListPathOptionsEncodeMarkerWithoutIDMarksReturn(t *testing.T) { - opts := listPathOptions{} - - encoded := opts.encodeMarker("photos/") - - want := "photos/[minio_cache:v2,return:]" - if encoded != want { - t.Fatalf("unexpected encoded marker:\nwant: %q\n got: %q", want, encoded) - } - - parsed := listPathOptions{ - Marker: encoded, - } - - parsed.parseMarker() - - if parsed.Marker != "photos/" { - t.Fatalf("expected marker %q, got %q", "photos/", parsed.Marker) - } - - if !parsed.Create { - t.Fatalf("expected Create=true") - } - - if parsed.ID == "" { - t.Fatalf("expected generated ID to be set") - } -} - -func TestListPathOptionsEncodeMarkerInvalidIDFallsBackToReturn(t *testing.T) { - tests := []string{ - "bad:id", - "bad,id", - "bad[id", - "bad]id", - } - - for _, id := range tests { - t.Run(id, func(t *testing.T) { - opts := listPathOptions{ - ID: id, - pool: 1, - set: 2, - } - - encoded := opts.encodeMarker("marker") - - want := "marker[minio_cache:v2,return:]" - if encoded != want { - t.Fatalf("expected invalid ID to fallback to return marker:\nwant: %q\n got: %q", want, encoded) - } - }) - } -} - -func TestListPathOptionsParseMarkerIgnoresPlainMarker(t *testing.T) { - opts := listPathOptions{ - Marker: "photos/2026/img.jpg", - ID: "existing-id", - Create: false, - pool: 9, - set: 8, - } - - opts.parseMarker() - - if opts.Marker != "photos/2026/img.jpg" { - t.Fatalf("plain marker should not be changed, got %q", opts.Marker) - } - - if opts.ID != "existing-id" { - t.Fatalf("ID should not be changed, got %q", opts.ID) - } - - if opts.pool != 9 || opts.set != 8 { - t.Fatalf("pool/set should not be changed, got pool=%d set=%d", opts.pool, opts.set) - } -} - -func TestListPathOptionsParseMarkerIgnoresUnknownVersion(t *testing.T) { - opts := listPathOptions{ - Marker: "photos/[minio_cache:v1,id:abc,p:1,s:2]", - ID: "existing-id", - pool: 9, - set: 8, - } - - opts.parseMarker() - - if opts.Marker != "photos/[minio_cache:v1,id:abc,p:1,s:2]" { - t.Fatalf("marker should not be changed for unknown version, got %q", opts.Marker) - } - - if opts.ID != "existing-id" { - t.Fatalf("ID should not be changed, got %q", opts.ID) - } - - if opts.pool != 9 || opts.set != 8 { - t.Fatalf("pool/set should not be changed, got pool=%d set=%d", opts.pool, opts.set) - } -} - -func TestListPathOptionsParseMarkerRequiresEncodedTagAsSuffix(t *testing.T) { - opts := listPathOptions{ - Marker: "photos/[minio_cache:v2,id:abc,p:1,s:2]/extra", - ID: "existing-id", - pool: 9, - set: 8, - } - - opts.parseMarker() - - if opts.Marker != "photos/[minio_cache:v2,id:abc,p:1,s:2]/extra" { - t.Fatalf("marker should not be changed when encoded tag is not suffix, got %q", opts.Marker) - } - - if opts.ID != "existing-id" { - t.Fatalf("ID should not be changed, got %q", opts.ID) - } - - if opts.pool != 9 || opts.set != 8 { - t.Fatalf("pool/set should not be changed, got pool=%d set=%d", opts.pool, opts.set) - } -} - -func TestListPathOptionsParseMarkerDoesNotPanicOnMalformedMarkers(t *testing.T) { - tests := []string{ - "", - "photos/", - "photos/[", - "photos/]", - "photos/[minio_cache:v2", - "photos/minio_cache:v2]", - "photos/[minio_cache:v2,id:abc,p:1,s:2", - "photos/[garbage]", - "photos/[minio_cache]", - "photos/[minio_cache:v2,id]", - "photos/[minio_cache:v2,p:not-int,s:2]", - "photos/[minio_cache:v2,p:1,s:not-int]", - } - - for _, marker := range tests { - t.Run(marker, func(t *testing.T) { - opts := listPathOptions{ - Marker: marker, - } - - defer func() { - if r := recover(); r != nil { - t.Fatalf("parseMarker panicked for marker %q: %v", marker, r) - } - }() - - opts.parseMarker() - }) - } -} - -func TestListPathOptionsParseMarkerWithAdditionalUnknownFields(t *testing.T) { - opts := listPathOptions{ - Marker: "marker[minio_cache:v2,id:abc,unknown:value,p:4,s:5]", - } - - opts.parseMarker() - - if opts.Marker != "marker" { - t.Fatalf("expected marker %q, got %q", "marker", opts.Marker) - } - - if opts.ID != "abc" { - t.Fatalf("expected ID %q, got %q", "abc", opts.ID) - } - - if opts.pool != 4 { - t.Fatalf("expected pool 4, got %d", opts.pool) - } - - if opts.set != 5 { - t.Fatalf("expected set 5, got %d", opts.set) - } -} - -func TestListPathOptionsParseMarkerPreservesMarkerWithEarlierBrackets(t *testing.T) { - opts := listPathOptions{ - Marker: "photos/[folder]/image[minio_cache:v2,id:abc,p:1,s:2]", - } - - opts.parseMarker() - - if opts.Marker != "photos/[folder]/image" { - t.Fatalf("expected marker before encoded suffix to be preserved, got %q", opts.Marker) - } - - if opts.ID != "abc" { - t.Fatalf("expected ID abc, got %q", opts.ID) - } - - if opts.pool != 1 { - t.Fatalf("expected pool 1, got %d", opts.pool) - } - - if opts.set != 2 { - t.Fatalf("expected set 2, got %d", opts.set) - } -} - -func TestListPathOptionsParseReturnMarkerCreatesNewListing(t *testing.T) { - opts := listPathOptions{ - Marker: "marker[minio_cache:v2,return:]", - } - - opts.parseMarker() - - if opts.Marker != "marker" { - t.Fatalf("expected marker %q, got %q", "marker", opts.Marker) - } - - if !opts.Create { - t.Fatalf("expected Create=true") - } - - if opts.ID == "" { - t.Fatalf("expected generated ID") - } -} - -func TestListPathOptionsEncodedMarkerDoesNotContainInvalidIDCharacters(t *testing.T) { - opts := listPathOptions{ - ID: "valid-id", - } - - encoded := opts.encodeMarker("marker") - - if strings.Contains(encoded, "return:") { - t.Fatalf("did not expect valid ID to fallback to return marker: %q", encoded) - } - - if !strings.Contains(encoded, "id:valid-id") { - t.Fatalf("expected encoded marker to contain valid ID, got %q", encoded) - } -} diff --git a/cmd/metacache-server-pool.go b/cmd/metacache-server-pool.go index 4d56dab20e55c..3c923ccdb5704 100644 --- a/cmd/metacache-server-pool.go +++ b/cmd/metacache-server-pool.go @@ -28,64 +28,30 @@ import ( "sync" "time" - "github.com/minio/minio/internal/bucket/lifecycle" "github.com/minio/minio/internal/grid" xioutil "github.com/minio/minio/internal/ioutil" ) -var listMergedFn = func( - z *erasureServerPools, - ctx context.Context, - o listPathOptions, - filterCh chan<- metaCacheEntry, -) error { - return z.listMerged(ctx, o, filterCh) -} - func renameAllBucketMetacache(epPath string) error { // Rename all previous `.minio.sys/buckets//.metacache` to - // to `.minio.sys/tmp/` for deletion. + // `.minio.sys/tmp/` for deletion. return readDirFn(pathJoin(epPath, minioMetaBucket, bucketMetaPrefix), func(name string, typ os.FileMode) error { - if typ&os.ModeDir == 0 { + if !typ.IsDir() { return nil } - srcMetacacheOld := pathJoin(epPath, minioMetaBucket, metacachePrefixForID(name, slashSeparator)) - tmpMetacacheOld := pathutil.Join(epPath, minioMetaTmpDeletedBucket, mustGetUUID()) - if err := renameAll(srcMetacacheOld, tmpMetacacheOld, epPath); !errors.Is(err, errFileNotFound) { + srcMetacache := pathJoin(epPath, minioMetaBucket, metacachePrefixForID(name, slashSeparator)) + tmpMetacacheOld := pathutil.Join(epPath, minioMetaTmpDeletedBucket, mustGetUUID()) + if err := renameAll(srcMetacache, tmpMetacacheOld, epPath); err != nil && err != errFileNotFound { return fmt.Errorf("unable to rename (%s -> %s) %w", - srcMetacacheOld, + srcMetacache, tmpMetacacheOld, - osErrToFileErr(err), - ) + osErrToFileErr(err)) } return nil }) } -var markMetacacheListingUnused = func(o listPathOptions) { - rpc := globalNotificationSys.restClientFromHash(pathJoin(o.Bucket, o.Prefix)) - if rpc == nil { - return - } - - ctx, cancel := context.WithTimeout(GlobalContext, 5*time.Second) - defer cancel() - - c, err := rpc.GetMetacacheListing(ctx, o) - if err == nil && c != nil { - c.error = "no longer used" - c.status = scanStateError - rpc.UpdateMetacacheListing(ctx, *c) - } -} - -func markMetacacheListingUnusedAsyncAndClearID(o *listPathOptions) { - opts := *o - go markMetacacheListingUnused(opts) - o.ID = "" -} - // listPath will return the requested entries. // If no more entries are in the listing io.EOF is returned, // otherwise nil or an unexpected error is returned. @@ -227,7 +193,21 @@ func (z *erasureServerPools) listPath(ctx context.Context, o *listPathOptions) ( return entries, err } entries.truncate(0) - markMetacacheListingUnusedAsyncAndClearID(o) + staleListing := *o + go func(o listPathOptions) { + rpc := globalNotificationSys.restClientFromHash(pathJoin(o.Bucket, o.Prefix)) + if rpc != nil { + ctx, cancel := context.WithTimeout(GlobalContext, 5*time.Second) + defer cancel() + c, err := rpc.GetMetacacheListing(ctx, o) + if err == nil { + c.error = "no longer used" + c.status = scanStateError + rpc.UpdateMetacacheListing(ctx, *c) + } + } + }(staleListing) + o.ID = "" } if contextCanceled(ctx) { @@ -251,7 +231,7 @@ func (z *erasureServerPools) listPath(ctx context.Context, o *listPathOptions) ( // listing exactly at a specific limit. o.StopDiskAtLimit = true } - listErr = listMergedFn(z, listCtx, o, filterCh) + listErr = z.listMerged(listCtx, o, filterCh) o.debugln("listMerged returned with", listErr) }(*o) @@ -277,52 +257,48 @@ func (z *erasureServerPools) listPath(ctx context.Context, o *listPathOptions) ( return entries, nil } -var listPathOnSetFn = func( - set *erasureObjects, - ctx context.Context, - o listPathOptions, - results chan<- metaCacheEntry, -) error { - return set.listPath(ctx, o, results) -} - -var mergeEntryChannelsFn = mergeEntryChannels - // listMerged will list across all sets and return a merged results stream. // The result channel is closed when no more results are expected. func (z *erasureServerPools) listMerged(ctx context.Context, o listPathOptions, results chan<- metaCacheEntry) error { - var mu sync.Mutex var wg sync.WaitGroup - var errs []error - allAtEOF := true - var inputs []chan metaCacheEntry - mu.Lock() + // Ask all sets and merge entries. listCtx, cancelList := context.WithCancel(ctx) defer cancelList() + + nSets := 0 + for _, pool := range z.serverPools { + nSets += len(pool.sets) + } + if nSets == 0 { + xioutil.SafeClose(results) + if contextCanceled(ctx) { + return ctx.Err() + } + return io.EOF + } + + errs := make([]error, nSets) + inputs := make([]chan metaCacheEntry, 0, nSets) for _, pool := range z.serverPools { for _, set := range pool.sets { - wg.Add(1) + idx := len(inputs) innerResults := make(chan metaCacheEntry, 100) inputs = append(inputs, innerResults) - go func(i int, set *erasureObjects) { + + wg.Add(1) + go func(i int, set *erasureObjects, innerResults chan metaCacheEntry) { defer wg.Done() - err := listPathOnSetFn(set, listCtx, o, innerResults) - mu.Lock() - defer mu.Unlock() - if err == nil { - allAtEOF = false - } - errs[i] = err - }(len(errs), set) - errs = append(errs, nil) + // set.listPath owns innerResults and closes it when the producer is done. + // Do not close it here: closing from both sides races and can panic. + errs[i] = set.listPath(listCtx, o, innerResults) + }(idx, set, innerResults) } } - mu.Unlock() // Gather results to a single channel. // Quorum is one since we are merging across sets. - err := mergeEntryChannelsFn(ctx, inputs, results, 1) + err := mergeEntryChannels(ctx, inputs, results, 1) cancelList() wg.Wait() @@ -343,6 +319,7 @@ func (z *erasureServerPools) listMerged(ctx context.Context, o listPathOptions, return ctx.Err() } + allAtEOF := true for _, err := range errs { if errors.Is(err, io.EOF) { continue @@ -360,26 +337,6 @@ func (z *erasureServerPools) listMerged(ctx context.Context, o listPathOptions, return nil } -var metaCacheEntryFileInfoFn = func(obj metaCacheEntry, bucket string) (FileInfo, error) { - return obj.fileInfo(bucket) -} - -var metaCacheEntryFileInfoVersionsFn = func(obj metaCacheEntry, bucket string) (FileInfoVersions, error) { - return obj.fileInfoVersions(bucket) -} - -var evalLifecycleForListObjectFn = func(ctx context.Context, o listPathOptions, objInfo ObjectInfo) lifecycle.Event { - return evalActionFromLifecycle(ctx, *o.Lifecycle, o.Retention, o.Replication.Config, objInfo) -} - -var enqueueExpiryByDaysFn = func(objInfo ObjectInfo, evt lifecycle.Event, src lcEventSrc) { - globalExpiryState.enqueueByDays(objInfo, evt, src) -} - -var queueReplicationHealFn = func(ctx context.Context, o listPathOptions, objInfo ObjectInfo) { - queueReplicationHeal(ctx, o.Bucket, objInfo, o.Replication, 0) -} - // triggerExpiryAndRepl applies lifecycle and replication actions on the listing // It returns true if the listing is non-versioned and the given object is expired. func triggerExpiryAndRepl(ctx context.Context, o listPathOptions, obj metaCacheEntry) (skip bool) { @@ -390,19 +347,19 @@ func triggerExpiryAndRepl(ctx context.Context, o listPathOptions, obj metaCacheE // filter out between versions 'obj' cannot be truncated // in such a manner, so look for skipping an object only // for regular ListObjects() call only. - if !o.Versioned { - fi, err := metaCacheEntryFileInfoFn(obj, o.Bucket) + if !o.Versioned && !o.V1 { + fi, err := obj.fileInfo(o.Bucket) if err != nil { return skip } objInfo := fi.ToObjectInfo(o.Bucket, obj.name, versioned) if o.Lifecycle != nil { - act := evalLifecycleForListObjectFn(ctx, o, objInfo).Action + act := evalActionFromLifecycle(ctx, *o.Lifecycle, o.Retention, o.Replication.Config, objInfo).Action skip = act.Delete() && !act.DeleteRestored() } } - fiv, err := metaCacheEntryFileInfoVersionsFn(obj, o.Bucket) + fiv, err := obj.fileInfoVersions(o.Bucket) if err != nil { return skip } @@ -412,94 +369,23 @@ func triggerExpiryAndRepl(ctx context.Context, o listPathOptions, obj metaCacheE objInfo := version.ToObjectInfo(o.Bucket, obj.name, versioned) if o.Lifecycle != nil { - evt := evalLifecycleForListObjectFn(ctx, o, objInfo) + evt := evalActionFromLifecycle(ctx, *o.Lifecycle, o.Retention, o.Replication.Config, objInfo) if evt.Action.Delete() { - enqueueExpiryByDaysFn(objInfo, evt, lcEventSrc_s3ListObjects) + globalExpiryState.enqueueByDays(objInfo, evt, lcEventSrc_s3ListObjects) if !evt.Action.DeleteRestored() { continue } // queue version for replication upon expired restored copies if needed. } } - queueReplicationHealFn(ctx, o, objInfo) + queueReplicationHeal(ctx, o.Bucket, objInfo, o.Replication, 0) } return skip } -// seams for tests -var listAndSaveGetAvailablePoolIdxFn = func(z *erasureServerPools, ctx context.Context, bucket string, object string, size int64) int { - return z.getAvailablePoolIdx(ctx, bucket, object, size) -} - -var listAndSaveGetHashedSetIndexFn = func(pool *erasureSets, id string) int { - return pool.getHashedSetIndex(id) -} - -var listAndSaveSaveMetaCacheStreamFn = func(saver *erasureObjects, ctx context.Context, meta *metaCacheRPC, saveCh <-chan metaCacheEntry) error { - return saver.saveMetaCacheStream(ctx, meta, saveCh) -} - -var listAndSaveListMergedFn = func(z *erasureServerPools, ctx context.Context, o listPathOptions, results chan<- metaCacheEntry) error { - return z.listMerged(ctx, o, results) -} - -var listAndSaveRestClientFromHashFn = func(hash string) *peerRESTClient { - return globalNotificationSys.restClientFromHash(hash) -} - -// helper -func listAndSaveFanOut( - ctx context.Context, - listCtx context.Context, - o listPathOptions, - inCh <-chan metaCacheEntry, - outCh chan<- metaCacheEntry, - saveCh chan<- metaCacheEntry, - funcReturned *bool, - funcReturnedMu *sync.Mutex, -) { - var returned bool - - defer xioutil.SafeClose(saveCh) - - for entry := range inCh { - if o.shouldSkip(ctx, entry) { - continue - } - - if !returned { - funcReturnedMu.Lock() - returned = *funcReturned - funcReturnedMu.Unlock() - - if returned { - xioutil.SafeClose(outCh) - } else { - select { - case outCh <- entry: - case <-listCtx.Done(): - return - } - } - } - - entry.reusable = returned - - select { - case saveCh <- entry: - case <-listCtx.Done(): - return - } - } - - if !returned { - xioutil.SafeClose(outCh) - } -} - func (z *erasureServerPools) listAndSave(ctx context.Context, o *listPathOptions) (entries metaCacheEntriesSorted, err error) { // Use ID as the object name... - o.pool = listAndSaveGetAvailablePoolIdxFn(z, ctx, minioMetaBucket, o.ID, 10<<20) + o.pool = z.getAvailablePoolIdx(ctx, minioMetaBucket, o.ID, 10<<20) if o.pool < 0 { // No space or similar, don't persist the listing. o.pool = 0 @@ -508,13 +394,12 @@ func (z *erasureServerPools) listAndSave(ctx context.Context, o *listPathOptions o.Transient = true return entries, errDiskFull } - o.set = listAndSaveGetHashedSetIndexFn(z.serverPools[o.pool], o.ID) + o.set = z.serverPools[o.pool].getHashedSetIndex(o.ID) saver := z.serverPools[o.pool].sets[o.set] - // Disconnect from request context so the metacache save can continue - // after this call has returned enough entries to the caller. + // Disconnect cache creation from the request context so the cache can + // continue being built after the first page has been returned. listCtx, cancel := context.WithCancel(GlobalContext) - saveCh := make(chan metaCacheEntry, metacacheBlockSize) inCh := make(chan metaCacheEntry, metacacheBlockSize) outCh := make(chan metaCacheEntry, o.Limit) @@ -522,72 +407,88 @@ func (z *erasureServerPools) listAndSave(ctx context.Context, o *listPathOptions filteredResults := o.gatherResults(ctx, outCh) mc := o.newMetacache() - meta := metaCacheRPC{ - meta: &mc, - cancel: cancel, - rpc: listAndSaveRestClientFromHashFn(pathJoin(o.Bucket, o.Prefix)), - o: *o, - } + meta := metaCacheRPC{meta: &mc, cancel: cancel, rpc: globalNotificationSys.restClientFromHash(pathJoin(o.Bucket, o.Prefix)), o: *o} // Save listing... go func() { - defer cancel() - if err := listAndSaveSaveMetaCacheStreamFn(saver, listCtx, &meta, saveCh); err != nil { - meta.setErr(err.Error()) - } - }() - - saveDone := make(chan struct{}) - go func() { - defer close(saveDone) - defer cancel() - - if err := listAndSaveSaveMetaCacheStreamFn(saver, listCtx, &meta, saveCh); err != nil { + if err := saver.saveMetaCacheStream(listCtx, &meta, saveCh); err != nil { meta.setErr(err.Error()) } + cancel() }() // Do listing... - listErrCh := make(chan error, 1) - go func(o listPathOptions) { - err := listAndSaveListMergedFn(z, listCtx, o, inCh) + err := z.listMerged(listCtx, o, inCh) if err != nil { meta.setErr(err.Error()) } - o.debugln("listAndSave: listing", o.ID, "finished with ", err) - - listErrCh <- err - close(listErrCh) }(*o) - // Keep track of when we return since we no longer have to send entries to output. - var funcReturned bool - var funcReturnedMu sync.Mutex + // Signal the fan-out goroutine when this call no longer needs first-page + // results. Cache saving may continue, but forwarding to outCh must stop to + // avoid blocking forever when gatherResults has returned. + returnedCh := make(chan struct{}) + var returnedOnce sync.Once + markReturned := func() { + returnedOnce.Do(func() { + close(returnedCh) + }) + } + defer markReturned() - go listAndSaveFanOut( - ctx, listCtx, *o, - inCh, outCh, saveCh, - &funcReturned, &funcReturnedMu, - ) + // Write listing to results and saver. Once the caller has enough results or + // its context is canceled, stop sending to outCh and keep feeding saveCh. + go func() { + defer xioutil.SafeClose(saveCh) - entries, err = filteredResults() + forwardResults := true + closeResults := func() { + if forwardResults { + forwardResults = false + xioutil.SafeClose(outCh) + } + } - funcReturnedMu.Lock() - funcReturned = true - funcReturnedMu.Unlock() + for entry := range inCh { + if o.shouldSkip(listCtx, entry) { + continue + } - if errors.Is(err, io.EOF) { - if listErr := <-listErrCh; listErr != nil { - return entries, listErr - } - } - if metaErr := meta.getErr(); metaErr != nil { - if err == nil || errors.Is(err, io.EOF) { - err = metaErr + if forwardResults { + select { + case <-returnedCh: + closeResults() + default: + } + } + + if forwardResults { + select { + case outCh <- entry: + case <-returnedCh: + closeResults() + case <-ctx.Done(): + closeResults() + } + } + + entry.reusable = !forwardResults + select { + case saveCh <- entry: + case <-listCtx.Done(): + return + } } - } + closeResults() + }() + entries, err = filteredResults() + markReturned() + if err == nil { + // Check if listing recorded an error. + err = meta.getErr() + } return entries, err } diff --git a/cmd/metacache-server-pool_test.go b/cmd/metacache-server-pool_test.go deleted file mode 100644 index 5a24b37881da3..0000000000000 --- a/cmd/metacache-server-pool_test.go +++ /dev/null @@ -1,1018 +0,0 @@ -package cmd - -import ( - "context" - "errors" - "io" - "strings" - "sync" - "testing" - "time" - - "github.com/minio/minio/internal/bucket/lifecycle" -) - -func rawListPathOptionsForErrorTest() listPathOptions { - return listPathOptions{ - Bucket: "testbucket", - Prefix: "photos/", - Separator: slashSeparator, - Recursive: false, - Limit: 100, - - // Important: avoid metacache/RPC branches. - Transient: true, - Create: true, - } -} - -func Test_erasureServerPools_listPath_InvalidBucketReturnsError(t *testing.T) { - z := &erasureServerPools{} - - o := listPathOptions{ - Bucket: "", - Prefix: "photos/", - Limit: 100, - } - - entries, err := z.listPath(context.Background(), &o) - if err == nil { - t.Fatal("expected error for invalid bucket, got nil") - } - - if errors.Is(err, io.EOF) { - t.Fatalf("expected validation error, got io.EOF") - } - - if entries.len() != 0 { - t.Fatalf("entries length = %d, want 0", entries.len()) - } -} - -func Test_erasureServerPools_listPath_CanceledContextReturnsCanceled(t *testing.T) { - z := &erasureServerPools{} - - ctx, cancel := context.WithCancel(context.Background()) - cancel() - - o := rawListPathOptionsForErrorTest() - - entries, err := z.listPath(ctx, &o) - if !errors.Is(err, context.Canceled) { - t.Fatalf("listPath error = %v, want context.Canceled", err) - } - - if entries.len() != 0 { - t.Fatalf("entries length = %d, want 0", entries.len()) - } -} - -func Test_erasureServerPools_listPath_EarlyReturnsEOF(t *testing.T) { - z := &erasureServerPools{} - ctx := context.Background() - - tests := []struct { - name string - opts listPathOptions - }{ - { - name: "limit zero", - opts: listPathOptions{ - Bucket: "testbucket", - Prefix: "photos/", - Limit: 0, - }, - }, - { - name: "marker outside prefix", - opts: listPathOptions{ - Bucket: "testbucket", - Prefix: "a/", - Marker: "b/object", - Limit: 100, - }, - }, - { - name: "prefix starts with slash", - opts: listPathOptions{ - Bucket: "testbucket", - Prefix: SlashSeparator, - Limit: 100, - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - entries, err := z.listPath(ctx, &tt.opts) - - if !errors.Is(err, io.EOF) { - t.Fatalf("listPath error = %v, want io.EOF", err) - } - - if entries.len() != 0 { - t.Fatalf("entries length = %d, want 0", entries.len()) - } - }) - } -} - -func Test_erasureServerPools_listPath_ListMergedDeadlineExceededIsReturned(t *testing.T) { - z := &erasureServerPools{} - - oldListMergedFn := listMergedFn - listMergedFn = func( - z *erasureServerPools, - ctx context.Context, - o listPathOptions, - filterCh chan<- metaCacheEntry, - ) error { - close(filterCh) - return context.DeadlineExceeded - } - t.Cleanup(func() { - listMergedFn = oldListMergedFn - }) - - o := rawListPathOptionsForErrorTest() - - entries, err := z.listPath(context.Background(), &o) - if !errors.Is(err, context.DeadlineExceeded) { - t.Fatalf("listPath error = %v, want context.DeadlineExceeded", err) - } - - if entries.len() != 0 { - t.Fatalf("entries length = %d, want 0", entries.len()) - } -} - -func Test_erasureServerPools_listPath_MarkerBeforePrefixIsCleared(t *testing.T) { - z := &erasureServerPools{} - ctx := context.Background() - - o := listPathOptions{ - Bucket: "testbucket", - Prefix: "photos/", - Marker: "abc", - Limit: 0, - } - - _, err := z.listPath(ctx, &o) - if !errors.Is(err, io.EOF) { - t.Fatalf("listPath error = %v, want io.EOF", err) - } - - if o.Marker != "" { - t.Fatalf("Marker = %q, want empty", o.Marker) - } -} - -func Test_erasureServerPools_listPath_RawListingNormalizesOptionsAndCallsListMerged(t *testing.T) { - z := &erasureServerPools{} - ctx := context.Background() - - oldListMergedFn := listMergedFn - t.Cleanup(func() { - listMergedFn = oldListMergedFn - }) - - gotOptions := make(chan listPathOptions, 1) - - listMergedFn = func( - z *erasureServerPools, - ctx context.Context, - o listPathOptions, - filterCh chan<- metaCacheEntry, - ) error { - gotOptions <- o - close(filterCh) - return io.EOF - } - - o := listPathOptions{ - Bucket: "testbucket", - Prefix: "photos/2026/", - Separator: slashSeparator, - Recursive: false, - Limit: 100, - - // Force raw listing path and avoid metacache RPC/cache lookup branches. - Transient: true, - Create: true, - } - - entries, err := z.listPath(ctx, &o) - if !errors.Is(err, io.EOF) { - t.Fatalf("listPath error = %v, want io.EOF", err) - } - - if entries.len() != 0 { - t.Fatalf("entries length = %d, want 0", entries.len()) - } - - var got listPathOptions - - select { - case got = <-gotOptions: - case <-time.After(time.Second): - t.Fatal("listMergedFn was not called") - } - - if got.Bucket != "testbucket" { - t.Fatalf("Bucket = %q, want %q", got.Bucket, "testbucket") - } - - if got.Prefix != "photos/2026/" { - t.Fatalf("Prefix = %q, want %q", got.Prefix, "photos/2026/") - } - - if got.Separator != slashSeparator { - t.Fatalf("Separator = %q, want %q", got.Separator, slashSeparator) - } - - if got.Recursive { - t.Fatal("Recursive = true, want false for slash separator non-recursive listing") - } - - if !got.IncludeDirectories { - t.Fatal("IncludeDirectories = false, want true for slash separator listing") - } - - if !got.Transient { - t.Fatal("Transient = false, want true") - } - - if got.Create { - t.Fatal("Create = true, want false because Transient forces Create=false") - } - - if !got.StopDiskAtLimit { - t.Fatal("StopDiskAtLimit = false, want true when Lifecycle is nil") - } - - if got.BaseDir == "" { - t.Fatal("BaseDir is empty, want it to be derived from Prefix") - } -} - -func Test_erasureServerPools_listPath_EmptySeparatorBecomesRecursiveSlashListing(t *testing.T) { - z := &erasureServerPools{} - ctx := context.Background() - - oldListMergedFn := listMergedFn - t.Cleanup(func() { - listMergedFn = oldListMergedFn - }) - - gotOptions := make(chan listPathOptions, 1) - - listMergedFn = func( - z *erasureServerPools, - ctx context.Context, - o listPathOptions, - filterCh chan<- metaCacheEntry, - ) error { - gotOptions <- o - close(filterCh) - return io.EOF - } - - o := listPathOptions{ - Bucket: "testbucket", - Prefix: "photos/", - Separator: "", - Recursive: false, - Limit: 100, - Transient: true, - Create: true, - } - - _, err := z.listPath(ctx, &o) - if !errors.Is(err, io.EOF) { - t.Fatalf("listPath error = %v, want io.EOF", err) - } - - var got listPathOptions - - select { - case got = <-gotOptions: - case <-time.After(time.Second): - t.Fatal("listMergedFn was not called") - } - - if got.Separator != slashSeparator { - t.Fatalf("Separator = %q, want %q", got.Separator, slashSeparator) - } - - if !got.Recursive { - t.Fatal("Recursive = false, want true when original separator is empty") - } - - if got.IncludeDirectories { - t.Fatal("IncludeDirectories = true, want false when original separator was empty") - } - - if got.Create { - t.Fatal("Create = true, want false because Transient forces Create=false") - } -} - -func Test_erasureServerPools_listPath_ListMergedContextCanceledIsNotReturnedAsListErr(t *testing.T) { - z := &erasureServerPools{} - - oldListMergedFn := listMergedFn - listMergedFn = func( - z *erasureServerPools, - ctx context.Context, - o listPathOptions, - filterCh chan<- metaCacheEntry, - ) error { - close(filterCh) - return context.Canceled - } - t.Cleanup(func() { - listMergedFn = oldListMergedFn - }) - - o := rawListPathOptionsForErrorTest() - - entries, err := z.listPath(context.Background(), &o) - - // listPath intentionally ignores listMerged's context.Canceled: - // - // if listErr != nil && !errors.Is(listErr, context.Canceled) { - // return entries, listErr - // } - // - // With no entries emitted, the final result should be io.EOF. - if !errors.Is(err, io.EOF) { - t.Fatalf("listPath error = %v, want io.EOF", err) - } - - if entries.len() != 0 { - t.Fatalf("entries length = %d, want 0", entries.len()) - } -} - -func Test_erasureServerPools_listPath_ListMergedErrorIsReturned(t *testing.T) { - z := &erasureServerPools{} - - wantErr := errors.New("forced listMerged failure") - - oldListMergedFn := listMergedFn - listMergedFn = func( - z *erasureServerPools, - ctx context.Context, - o listPathOptions, - filterCh chan<- metaCacheEntry, - ) error { - close(filterCh) - return wantErr - } - t.Cleanup(func() { - listMergedFn = oldListMergedFn - }) - - o := listPathOptions{ - Bucket: "testbucket", - Prefix: "photos/", - Separator: slashSeparator, - Recursive: false, - Limit: 100, - - // Avoid metacache/RPC branches. - Transient: true, - Create: true, - } - - entries, err := z.listPath(context.Background(), &o) - if !errors.Is(err, wantErr) { - t.Fatalf("listPath error = %v, want %v", err, wantErr) - } - - if entries.len() != 0 { - t.Fatalf("entries length = %d, want 0", entries.len()) - } -} - -func Test_markMetacacheListingUnusedAsyncAndClearID_UsesOriginalListID(t *testing.T) { - const bucket = "testbucket" - const prefix = "photos/" - const failedID = "failed-list-id" - - gotOptions := make(chan listPathOptions, 1) - - oldMarkMetacacheListingUnused := markMetacacheListingUnused - markMetacacheListingUnused = func(o listPathOptions) { - gotOptions <- o - } - t.Cleanup(func() { - markMetacacheListingUnused = oldMarkMetacacheListingUnused - }) - - o := &listPathOptions{ - Bucket: bucket, - Prefix: prefix, - ID: failedID, - } - - markMetacacheListingUnusedAsyncAndClearID(o) - - if o.ID != "" { - t.Fatalf("listPathOptions.ID after clear = %q, want empty", o.ID) - } - - select { - case got := <-gotOptions: - if got.ID != failedID { - t.Fatalf("markMetacacheListingUnused got ID = %q, want %q", got.ID, failedID) - } - - if got.Bucket != bucket { - t.Fatalf("markMetacacheListingUnused got Bucket = %q, want %q", got.Bucket, bucket) - } - - if got.Prefix != prefix { - t.Fatalf("markMetacacheListingUnused got Prefix = %q, want %q", got.Prefix, prefix) - } - - case <-time.After(time.Second): - t.Fatal("markMetacacheListingUnused was not called") - } -} - -func Test_erasureServerPools_listMerged_DoesNotMaskMergeErrorWhenAllSetsNotFound(t *testing.T) { - z := &erasureServerPools{ - serverPools: []*erasureSets{ - { - sets: []*erasureObjects{ - nil, - nil, - }, - }, - }, - } - - wantErr := errors.New("forced merge failure") - - oldListPathOnSetFn := listPathOnSetFn - oldMergeEntryChannelsFn := mergeEntryChannelsFn - - listPathOnSetFn = func( - set *erasureObjects, - ctx context.Context, - o listPathOptions, - results chan<- metaCacheEntry, - ) error { - return errFileNotFound - } - - mergeEntryChannelsFn = func( - ctx context.Context, - inputs []chan metaCacheEntry, - out chan<- metaCacheEntry, - quorum int, - ) error { - return wantErr - } - - t.Cleanup(func() { - listPathOnSetFn = oldListPathOnSetFn - mergeEntryChannelsFn = oldMergeEntryChannelsFn - }) - - results := make(chan metaCacheEntry, 10) - - err := z.listMerged(context.Background(), listPathOptions{ - Bucket: "testbucket", - Prefix: "photos/", - Limit: 100, - }, results) - - if !errors.Is(err, wantErr) { - t.Fatalf("listMerged error = %v, want merge error %v", err, wantErr) - } -} - -func Test_erasureServerPools_listMerged_AllSetsEOFReturnsEOF(t *testing.T) { - z := &erasureServerPools{ - serverPools: []*erasureSets{ - { - sets: []*erasureObjects{ - nil, - nil, - }, - }, - }, - } - - oldListPathOnSetFn := listPathOnSetFn - oldMergeEntryChannelsFn := mergeEntryChannelsFn - - listPathOnSetFn = func( - set *erasureObjects, - ctx context.Context, - o listPathOptions, - results chan<- metaCacheEntry, - ) error { - return io.EOF - } - - mergeEntryChannelsFn = func( - ctx context.Context, - inputs []chan metaCacheEntry, - out chan<- metaCacheEntry, - quorum int, - ) error { - return nil - } - - t.Cleanup(func() { - listPathOnSetFn = oldListPathOnSetFn - mergeEntryChannelsFn = oldMergeEntryChannelsFn - }) - - results := make(chan metaCacheEntry, 10) - - err := z.listMerged(context.Background(), listPathOptions{ - Bucket: "testbucket", - Prefix: "photos/", - Limit: 100, - }, results) - - if !errors.Is(err, io.EOF) { - t.Fatalf("listMerged error = %v, want io.EOF", err) - } -} - -func Test_erasureServerPools_listMerged_OneSuccessfulSetReturnsNil(t *testing.T) { - z := &erasureServerPools{ - serverPools: []*erasureSets{ - { - sets: []*erasureObjects{ - nil, - nil, - }, - }, - }, - } - - var call int - - oldListPathOnSetFn := listPathOnSetFn - oldMergeEntryChannelsFn := mergeEntryChannelsFn - - listPathOnSetFn = func( - set *erasureObjects, - ctx context.Context, - o listPathOptions, - results chan<- metaCacheEntry, - ) error { - call++ - if call == 1 { - return nil - } - return io.EOF - } - - mergeEntryChannelsFn = func( - ctx context.Context, - inputs []chan metaCacheEntry, - out chan<- metaCacheEntry, - quorum int, - ) error { - return nil - } - - t.Cleanup(func() { - listPathOnSetFn = oldListPathOnSetFn - mergeEntryChannelsFn = oldMergeEntryChannelsFn - }) - - results := make(chan metaCacheEntry, 10) - - err := z.listMerged(context.Background(), listPathOptions{ - Bucket: "testbucket", - Prefix: "photos/", - Limit: 100, - }, results) - - if err != nil { - t.Fatalf("listMerged error = %v, want nil", err) - } -} - -func Test_erasureServerPools_listMerged_UnexpectedSetErrorIsReturned(t *testing.T) { - z := &erasureServerPools{ - serverPools: []*erasureSets{ - { - sets: []*erasureObjects{ - nil, - nil, - }, - }, - }, - } - - wantErr := errors.New("forced set failure") - - var call int - - oldListPathOnSetFn := listPathOnSetFn - oldMergeEntryChannelsFn := mergeEntryChannelsFn - - listPathOnSetFn = func( - set *erasureObjects, - ctx context.Context, - o listPathOptions, - results chan<- metaCacheEntry, - ) error { - call++ - if call == 1 { - return wantErr - } - return io.EOF - } - - mergeEntryChannelsFn = func( - ctx context.Context, - inputs []chan metaCacheEntry, - out chan<- metaCacheEntry, - quorum int, - ) error { - return nil - } - - t.Cleanup(func() { - listPathOnSetFn = oldListPathOnSetFn - mergeEntryChannelsFn = oldMergeEntryChannelsFn - }) - - results := make(chan metaCacheEntry, 10) - - err := z.listMerged(context.Background(), listPathOptions{ - Bucket: "testbucket", - Prefix: "photos/", - Limit: 100, - }, results) - - if !errors.Is(err, wantErr) { - t.Fatalf("listMerged error = %v, want %v", err, wantErr) - } -} - -func Test_triggerExpiryAndRepl_ListObjectsV1ExpiredObjectIsSkipped(t *testing.T) { - const bucket = "testbucket" - const object = "expired-object" - - oldFileInfoFn := metaCacheEntryFileInfoFn - oldFileInfoVersionsFn := metaCacheEntryFileInfoVersionsFn - oldEvalLifecycleFn := evalLifecycleForListObjectFn - - metaCacheEntryFileInfoFn = func(obj metaCacheEntry, bucket string) (FileInfo, error) { - if bucket != "testbucket" { - t.Fatalf("bucket = %q, want %q", bucket, "testbucket") - } - - return FileInfo{ - Volume: bucket, - Name: obj.name, - Size: 1, - ModTime: time.Now(), - }, nil - } - - metaCacheEntryFileInfoVersionsFn = func(obj metaCacheEntry, bucket string) (FileInfoVersions, error) { - return FileInfoVersions{}, nil - } - - var evalCalls int - evalLifecycleForListObjectFn = func(ctx context.Context, o listPathOptions, objInfo ObjectInfo) lifecycle.Event { - evalCalls++ - return lifecycle.Event{Action: lifecycle.DeleteAction} - } - - t.Cleanup(func() { - metaCacheEntryFileInfoFn = oldFileInfoFn - metaCacheEntryFileInfoVersionsFn = oldFileInfoVersionsFn - evalLifecycleForListObjectFn = oldEvalLifecycleFn - }) - - skip := triggerExpiryAndRepl(context.Background(), listPathOptions{ - Bucket: bucket, - V1: true, - Versioned: false, - Lifecycle: &lifecycle.Lifecycle{}, - }, metaCacheEntry{ - name: object, - }) - - if !skip { - t.Fatal("skip = false, want true for expired object in regular ListObjects V1 listing") - } - - if evalCalls == 0 { - t.Fatal("lifecycle was not evaluated for ListObjects V1") - } -} - -func Test_triggerExpiryAndRepl_VersionedListingDoesNotSkipExpiredObject(t *testing.T) { - const bucket = "testbucket" - const object = "expired-object" - - oldFileInfoFn := metaCacheEntryFileInfoFn - oldFileInfoVersionsFn := metaCacheEntryFileInfoVersionsFn - oldEvalLifecycleFn := evalLifecycleForListObjectFn - - metaCacheEntryFileInfoFn = func(obj metaCacheEntry, bucket string) (FileInfo, error) { - t.Fatal("fileInfo must not be called for versioned listing skip decision") - return FileInfo{}, nil - } - - metaCacheEntryFileInfoVersionsFn = func(obj metaCacheEntry, bucket string) (FileInfoVersions, error) { - return FileInfoVersions{}, nil - } - - evalLifecycleForListObjectFn = func(ctx context.Context, o listPathOptions, objInfo ObjectInfo) lifecycle.Event { - return lifecycle.Event{Action: lifecycle.DeleteAction} - } - - t.Cleanup(func() { - metaCacheEntryFileInfoFn = oldFileInfoFn - metaCacheEntryFileInfoVersionsFn = oldFileInfoVersionsFn - evalLifecycleForListObjectFn = oldEvalLifecycleFn - }) - - skip := triggerExpiryAndRepl(context.Background(), listPathOptions{ - Bucket: bucket, - Versioned: true, - Lifecycle: &lifecycle.Lifecycle{}, - }, metaCacheEntry{ - name: object, - }) - - if skip { - t.Fatal("skip = true, want false for versioned listing") - } -} - -func Test_triggerExpiryAndRepl_FileInfoErrorDoesNotSkip(t *testing.T) { - const bucket = "testbucket" - const object = "bad-object" - - oldFileInfoFn := metaCacheEntryFileInfoFn - oldFileInfoVersionsFn := metaCacheEntryFileInfoVersionsFn - oldEvalLifecycleFn := evalLifecycleForListObjectFn - - metaCacheEntryFileInfoFn = func(obj metaCacheEntry, bucket string) (FileInfo, error) { - return FileInfo{}, errors.New("forced fileInfo failure") - } - - metaCacheEntryFileInfoVersionsFn = func(obj metaCacheEntry, bucket string) (FileInfoVersions, error) { - t.Fatal("fileInfoVersions must not be called after fileInfo failure") - return FileInfoVersions{}, nil - } - - evalLifecycleForListObjectFn = func(ctx context.Context, o listPathOptions, objInfo ObjectInfo) lifecycle.Event { - t.Fatal("lifecycle must not be evaluated after fileInfo failure") - return lifecycle.Event{} - } - - t.Cleanup(func() { - metaCacheEntryFileInfoFn = oldFileInfoFn - metaCacheEntryFileInfoVersionsFn = oldFileInfoVersionsFn - evalLifecycleForListObjectFn = oldEvalLifecycleFn - }) - - skip := triggerExpiryAndRepl(context.Background(), listPathOptions{ - Bucket: bucket, - Lifecycle: &lifecycle.Lifecycle{}, - }, metaCacheEntry{ - name: object, - }) - - if skip { - t.Fatal("skip = true, want false when fileInfo fails") - } -} - -func Test_erasureServerPools_listAndSave_DiskFullMakesListingTransient(t *testing.T) { - oldGetAvailablePoolIdxFn := listAndSaveGetAvailablePoolIdxFn - listAndSaveGetAvailablePoolIdxFn = func( - z *erasureServerPools, - ctx context.Context, - bucket string, - object string, - size int64, - ) int { - return -1 - } - t.Cleanup(func() { - listAndSaveGetAvailablePoolIdxFn = oldGetAvailablePoolIdxFn - }) - - z := &erasureServerPools{} - - o := &listPathOptions{ - Bucket: "testbucket", - Prefix: "photos/", - ID: "list-id", - Create: true, - Limit: 100, - } - - entries, err := z.listAndSave(context.Background(), o) - if !errors.Is(err, errDiskFull) { - t.Fatalf("listAndSave error = %v, want errDiskFull", err) - } - - if entries.len() != 0 { - t.Fatalf("entries length = %d, want 0", entries.len()) - } - - if o.pool != 0 { - t.Fatalf("pool = %d, want 0", o.pool) - } - - if o.Create { - t.Fatal("Create = true, want false") - } - - if o.ID != "" { - t.Fatalf("ID = %q, want empty", o.ID) - } - - if !o.Transient { - t.Fatal("Transient = false, want true") - } -} - -func Test_erasureServerPools_listAndSave_ListMergedErrorIsReturnedWhenNoEntries(t *testing.T) { - wantErr := errors.New("forced listMerged failure") - - oldGetAvailablePoolIdxFn := listAndSaveGetAvailablePoolIdxFn - oldGetHashedSetIndexFn := listAndSaveGetHashedSetIndexFn - oldSaveMetaCacheStreamFn := listAndSaveSaveMetaCacheStreamFn - oldListMergedFn := listAndSaveListMergedFn - oldRestClientFromHashFn := listAndSaveRestClientFromHashFn - - listAndSaveGetAvailablePoolIdxFn = func( - z *erasureServerPools, - ctx context.Context, - bucket string, - object string, - size int64, - ) int { - return 0 - } - - listAndSaveGetHashedSetIndexFn = func(pool *erasureSets, id string) int { - return 0 - } - - listAndSaveSaveMetaCacheStreamFn = func( - saver *erasureObjects, - ctx context.Context, - meta *metaCacheRPC, - saveCh <-chan metaCacheEntry, - ) error { - for range saveCh { - } - return nil - } - - listAndSaveListMergedFn = func( - z *erasureServerPools, - ctx context.Context, - o listPathOptions, - results chan<- metaCacheEntry, - ) error { - close(results) - return wantErr - } - - listAndSaveRestClientFromHashFn = func(hash string) *peerRESTClient { - return nil - } - - t.Cleanup(func() { - listAndSaveGetAvailablePoolIdxFn = oldGetAvailablePoolIdxFn - listAndSaveGetHashedSetIndexFn = oldGetHashedSetIndexFn - listAndSaveSaveMetaCacheStreamFn = oldSaveMetaCacheStreamFn - listAndSaveListMergedFn = oldListMergedFn - listAndSaveRestClientFromHashFn = oldRestClientFromHashFn - }) - - z := &erasureServerPools{ - serverPools: []*erasureSets{ - { - sets: []*erasureObjects{ - nil, - }, - }, - }, - } - - o := &listPathOptions{ - Bucket: "testbucket", - Prefix: "photos/", - ID: "list-id", - Create: true, - Limit: 1, - } - - type result struct { - entries metaCacheEntriesSorted - err error - } - - done := make(chan result, 1) - - go func() { - entries, err := z.listAndSave(context.Background(), o) - done <- result{ - entries: entries, - err: err, - } - }() - - var res result - - select { - case res = <-done: - case <-time.After(time.Second): - t.Fatal("listAndSave did not return") - } - - if res.err == nil { - t.Fatal("expected listMerged error, got nil") - } - - if !strings.Contains(res.err.Error(), wantErr.Error()) { - t.Fatalf("listAndSave error = %v, want it to contain %q", res.err, wantErr.Error()) - } - - if res.entries.len() != 0 { - t.Fatalf("entries length = %d, want 0", res.entries.len()) - } -} - -func Test_listAndSaveFanOut_DoesNotSendToOutputAfterFunctionReturned(t *testing.T) { - inCh := make(chan metaCacheEntry, 1) - outCh := make(chan metaCacheEntry, 1) - saveCh := make(chan metaCacheEntry, 1) - - funcReturned := true - var funcReturnedMu sync.Mutex - - o := listPathOptions{ - Bucket: "testbucket", - Prefix: "photos/", - Recursive: true, - } - - entry := metaCacheEntry{name: "photos/object-1"} - - if o.shouldSkip(context.Background(), entry) { - t.Fatal("test entry is skipped by listPathOptions.shouldSkip; test setup is invalid") - } - - inCh <- entry - close(inCh) - - listAndSaveFanOut( - context.Background(), - context.Background(), - o, - inCh, - outCh, - saveCh, - &funcReturned, - &funcReturnedMu, - ) - - select { - case got, ok := <-outCh: - if ok { - t.Fatalf("unexpected entry sent to outCh after function returned: %+v", got) - } - default: - } - - got, ok := <-saveCh - if !ok { - t.Fatal("saveCh was closed before saved entry could be read") - } - - if got.name != "photos/object-1" { - t.Fatalf("saved entry name = %q, want %q", got.name, "photos/object-1") - } - - if !got.reusable { - t.Fatal("saved entry reusable = false, want true") - } - - if _, ok := <-saveCh; ok { - t.Fatal("saveCh is not closed") - } -}