Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions client/resource_group/controller/global_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,10 +322,6 @@ func (c *ResourceGroupsController) Start(ctx context.Context) {
stateUpdateTicker.Reset(time.Millisecond * 100)
})

_, metaRevision, err := c.provider.LoadResourceGroups(ctx)
if err != nil {
log.Warn("load resource group revision failed", zap.Error(err))
}
resp, err := c.provider.Get(ctx, []byte(controllerConfigPath))
if err != nil {
log.Warn("load resource group revision failed", zap.Error(err))
Expand All @@ -335,7 +331,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) {
if !c.ruConfig.isSingleGroupByKeyspace {
// Use WithPrevKV() to get the previous key-value pair when get Delete Event.
prefix := pd.GroupSettingsPathPrefixBytes(c.keyspaceID)
watchMetaChannel, err = c.provider.Watch(ctx, prefix, opt.WithRev(metaRevision), opt.WithPrefix(), opt.WithPrevKV())
watchMetaChannel, err = c.provider.Watch(ctx, prefix, opt.WithPrefix(), opt.WithPrevKV())
Copy link
Copy Markdown
Contributor

@lhy1024 lhy1024 Apr 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The initial watch here no longer uses a revision, which leaves a startup window. If a resource group changes after the controller starts but before the watch is actually established, the local cache can miss that update. The reconnect path already rebuilds from a snapshot/revision, but the initial startup path still needs the same barrier.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but we just need the latest event and can ignore all the mvvc events

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree we do not need the whole MVCC history. The issue is that without a startup barrier, we may miss the latest update itself if it happens before the watch is established and no later event arrives. In that case the local cache can stay stale indefinitely.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lhy1024 In the previous implementation, the returned ResourceGroup was actually discarded and never stored in the local cache. Therefore, using the latest version should result in the same behavior as before.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is indeed the same behavior.

Does this issue need to be fixed? Alternatively, what was the original reasoning or consideration regarding this matter? @disksing

if err != nil {
log.Warn("watch resource group meta failed", zap.Error(err))
}
Expand Down Expand Up @@ -363,7 +359,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) {
if !c.ruConfig.isSingleGroupByKeyspace && watchMetaChannel == nil {
// Use WithPrevKV() to get the previous key-value pair when get Delete Event.
prefix := pd.GroupSettingsPathPrefixBytes(c.keyspaceID)
watchMetaChannel, err = c.provider.Watch(ctx, prefix, opt.WithRev(metaRevision), opt.WithPrefix(), opt.WithPrevKV())
watchMetaChannel, err = c.provider.Watch(ctx, prefix, opt.WithPrefix(), opt.WithPrevKV())
if err != nil {
log.Warn("watch resource group meta failed", zap.Error(err))
watchRetryTimer.Reset(watchRetryInterval)
Expand Down Expand Up @@ -417,7 +413,6 @@ func (c *ResourceGroupsController) Start(ctx context.Context) {
continue
}
for _, item := range resp {
metaRevision = item.Kv.ModRevision
group := &rmpb.ResourceGroup{}
switch item.Type {
case meta_storagepb.Event_PUT:
Expand Down
Loading