Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions pkg/mcs/resourcemanager/metadataapi/config_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ type ConfigStore interface {
GetResourceGroupList(uint32, bool) ([]*rmserver.ResourceGroup, error)
DeleteResourceGroup(uint32, string) error
GetControllerConfig() *rmserver.ControllerConfig
UpdateControllerConfigItems(map[string]any) error
UpdateControllerConfigItem(string, any) error
SetKeyspaceServiceLimit(uint32, float64) error
LookupKeyspaceID(context.Context, string) (uint32, error)
Expand Down Expand Up @@ -94,6 +95,11 @@ func (s *ManagerStore) UpdateControllerConfigItem(key string, value any) error {
return s.manager.UpdateControllerConfigItem(key, value)
}

// UpdateControllerConfigItems updates controller config items atomically.
func (s *ManagerStore) UpdateControllerConfigItems(items map[string]any) error {
return s.manager.UpdateControllerConfigItems(items)
}

// SetKeyspaceServiceLimit sets keyspace service limit.
func (s *ManagerStore) SetKeyspaceServiceLimit(keyspaceID uint32, limit float64) error {
return s.manager.SetKeyspaceServiceLimit(keyspaceID, limit)
Expand Down Expand Up @@ -242,15 +248,13 @@ func (s *ConfigService) SetControllerConfig(c *gin.Context) {
}
resolvedConf[key] = v
}
for key, v := range resolvedConf {
if err := s.configStore.UpdateControllerConfigItem(key, v); err != nil {
if rmserver.IsMetadataWriteDisabledError(err) {
c.String(http.StatusForbidden, err.Error())
return
}
c.String(http.StatusBadRequest, err.Error())
if err := s.configStore.UpdateControllerConfigItems(resolvedConf); err != nil {
if rmserver.IsMetadataWriteDisabledError(err) {
c.String(http.StatusForbidden, err.Error())
return
}
c.String(http.StatusBadRequest, err.Error())
return
Comment on lines +251 to +257
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Don't collapse persistence failures into 400 Bad Request.

Line 251 now calls the batch path, and that path can fail after validation when SaveControllerConfig hits storage. Returning 400 for every non-permission error will mislabel etcd/write failures as client input problems; please distinguish validation errors from persistence errors here, even if that means wrapping save failures from the store layer. As per coding guidelines, "HTTP handlers must validate payloads and return proper status codes; avoid panics".

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@pkg/mcs/resourcemanager/metadataapi/config_service.go` around lines 251 -
257, The current handler collapses all non-permission errors from
s.configStore.UpdateControllerConfigItems(resolvedConf) into 400 Bad Request;
change the error handling to distinguish validation errors from
persistence/storage failures (e.g., errors returned by SaveControllerConfig in
the store). Specifically, detect validation-related errors (the same error
type/value returned by your validation code) and continue to return 400 for
those, but treat store persistence/etcd/write errors (wrap/inspect errors coming
from UpdateControllerConfigItems/SaveControllerConfig or provide a
store.IsPersistenceError helper) as server-side failures and return an
appropriate 5xx (e.g., 500 or 503) with a clear log message; keep the existing
IsMetadataWriteDisabledError check for forbidden. Ensure the store layer
wraps/save failures so the handler can reliably distinguish the error kinds.

}
c.String(http.StatusOK, "Success!")
}
Expand Down
7 changes: 7 additions & 0 deletions pkg/mcs/resourcemanager/metadataapi/config_service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,13 @@ func (*testStore) GetControllerConfig() *rmserver.ControllerConfig {
return &rmserver.ControllerConfig{}
}

func (s *testStore) UpdateControllerConfigItems(items map[string]any) error {
for key := range items {
s.updatedControllerConfigItems = append(s.updatedControllerConfigItems, key)
}
return nil
}

func (s *testStore) UpdateControllerConfigItem(key string, _ any) error {
s.updatedControllerConfigItems = append(s.updatedControllerConfigItems, key)
return nil
Expand Down
86 changes: 55 additions & 31 deletions pkg/mcs/resourcemanager/server/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -467,49 +467,46 @@ func (m *Manager) initReserved() {

// UpdateControllerConfigItem updates the controller config item.
func (m *Manager) UpdateControllerConfigItem(key string, value any) error {
return m.UpdateControllerConfigItems(map[string]any{key: value})
}

// UpdateControllerConfigItems updates controller config items atomically.
func (m *Manager) UpdateControllerConfigItems(items map[string]any) error {
if !m.writeRole.AllowsMetadataWrite() {
return errMetadataWriteDisabled
}
kp := strings.Split(key, ".")
if len(kp) == 0 {
return errors.Errorf("invalid key %s", key)
}
m.Lock()
controllerConfig := cloneControllerConfig(m.controllerConfig)
var config any
switch kp[0] {
case "request-unit":
config = &controllerConfig.RequestUnit
default:
config = controllerConfig
}
updated, found, err := jsonutil.AddKeyValue(config, kp[len(kp)-1], value)
if err != nil {
m.Unlock()
return err
updatedItems := make([]struct {
key string
value any
}, 0, len(items))
for key, value := range items {
updated, err := applyControllerConfigItem(controllerConfig, key, value)
if err != nil {
m.Unlock()
return err
}
if updated {
updatedItems = append(updatedItems, struct {
key string
value any
}{key: key, value: value})
}
}

if !found {
if len(updatedItems) == 0 {
m.Unlock()
return errors.Errorf("config item %s not found", key)
return nil
}
// Validate RUVersionPolicy after any update, regardless of the key path,
// since the default branch merges into the full ControllerConfig.
if err := controllerConfig.RUVersionPolicy.validate(); err != nil {
if err := m.storage.SaveControllerConfig(controllerConfig); err != nil {
m.Unlock()
log.Error("save controller config failed", zap.Error(err))
return err
}
if updated {
if err := m.storage.SaveControllerConfig(controllerConfig); err != nil {
m.Unlock()
log.Error("save controller config failed", zap.Error(err))
return err
}
m.controllerConfig = controllerConfig
}
m.controllerConfig = controllerConfig
m.Unlock()
if updated {
log.Info("updated controller config item", zap.String("key", key), zap.Any("value", value))
for _, item := range updatedItems {
log.Info("updated controller config item", zap.String("key", item.key), zap.Any("value", item.value))
}
return nil
}
Expand All @@ -521,6 +518,33 @@ func (m *Manager) GetControllerConfig() *ControllerConfig {
return cloneControllerConfig(m.controllerConfig)
}

func applyControllerConfigItem(config *ControllerConfig, key string, value any) (bool, error) {
kp := strings.Split(key, ".")
if len(kp) == 0 {
return false, errors.Errorf("invalid key %s", key)
}
var target any
switch kp[0] {
case "request-unit":
target = &config.RequestUnit
default:
target = config
}
updated, found, err := jsonutil.AddKeyValue(target, kp[len(kp)-1], value)
if err != nil {
return false, err
}
if !found {
return false, errors.Errorf("config item %s not found", key)
}
// Validate RUVersionPolicy after any update, regardless of the key path,
// since the default branch merges into the full ControllerConfig.
if err := config.RUVersionPolicy.validate(); err != nil {
return false, err
}
return updated, nil
}

// AddResourceGroup puts a resource group.
// NOTE: AddResourceGroup should also be idempotent because tidb depends
// on this retry mechanism.
Expand Down
21 changes: 21 additions & 0 deletions pkg/mcs/resourcemanager/server/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,27 @@ func TestKeyspaceServiceLimit(t *testing.T) {
re.Equal(DefaultResourceGroupName, krgm.getMutableResourceGroup(DefaultResourceGroupName).Name)
}

func TestUpdateControllerConfigItemsAtomic(t *testing.T) {
re := require.New(t)
m := prepareManager()

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
re.NoError(m.Init(ctx))

before := *m.GetControllerConfig()
err := m.UpdateControllerConfigItems(map[string]any{
"request-unit.write-base-cost": 2.0,
"ltb-max-wait-duration": "not-a-duration",
})
re.Error(err)

after := m.GetControllerConfig()
re.Equal(before.RequestUnit.WriteBaseCost, after.RequestUnit.WriteBaseCost)
re.Equal(before.LTBMaxWaitDuration, after.LTBMaxWaitDuration)
re.Equal(before.EnableControllerTraceLog, after.EnableControllerTraceLog)
}

func TestKeyspaceNameLookup(t *testing.T) {
re := require.New(t)
m := prepareManager()
Expand Down
25 changes: 23 additions & 2 deletions tests/integrations/mcs/resourcemanager/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,21 @@ func (suite *resourceManagerAPITestSuite) TestControllerConfigAPI() {
re.Equal(2.0, config.RequestUnit.WriteBaseCost)
}

func (suite *resourceManagerAPITestSuite) TestControllerConfigAPIAllOrNothing() {
re := suite.Require()

before := suite.mustGetControllerConfig(re)
resp, statusCode := tryToSetControllerConfig(re, suite.cluster.GetLeaderServer().GetAddr(), map[string]any{
"enable-controller-trace-log": "true",
"ltb-max-wait-duration": "not-a-duration",
})
re.Equal(http.StatusBadRequest, statusCode)
re.Contains(resp, "time:")
Comment on lines +318 to +323
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Use a real boolean here to keep the regression deterministic.

Line 319 sends "true" as a string, so this request can fail on either field instead of exercising the intended valid+invalid mix. That makes the "time:" assertion order-dependent and weakens the atomicity regression.

Suggested fix
 	resp, statusCode := tryToSetControllerConfig(re, suite.cluster.GetLeaderServer().GetAddr(), map[string]any{
-		"enable-controller-trace-log": "true",
+		"enable-controller-trace-log": true,
 		"ltb-max-wait-duration":       "not-a-duration",
 	})
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
resp, statusCode := tryToSetControllerConfig(re, suite.cluster.GetLeaderServer().GetAddr(), map[string]any{
"enable-controller-trace-log": "true",
"ltb-max-wait-duration": "not-a-duration",
})
re.Equal(http.StatusBadRequest, statusCode)
re.Contains(resp, "time:")
resp, statusCode := tryToSetControllerConfig(re, suite.cluster.GetLeaderServer().GetAddr(), map[string]any{
"enable-controller-trace-log": true,
"ltb-max-wait-duration": "not-a-duration",
})
re.Equal(http.StatusBadRequest, statusCode)
re.Contains(resp, "time:")
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@tests/integrations/mcs/resourcemanager/api_test.go` around lines 318 - 323,
The test sends "true" as a string for the "enable-controller-trace-log" field
which makes the request's failure non-deterministic; update the call to
tryToSetControllerConfig so the map value for "enable-controller-trace-log" is a
boolean true (not the string "true") while keeping "ltb-max-wait-duration" as
the invalid string "not-a-duration" so the request deterministically exercises
the valid+invalid mix; locate the call to tryToSetControllerConfig in the test
and change that map entry accordingly.


after := suite.mustGetControllerConfig(re)
re.Equal(before, after)
}

func (suite *resourceManagerAPITestSuite) mustGetControllerConfig(re *require.Assertions) *server.ControllerConfig {
bodyBytes := suite.mustSendRequest(re, http.MethodGet, "/config/controller", nil)
config := &server.ControllerConfig{}
Expand All @@ -344,8 +359,14 @@ func (suite *resourceManagerAPITestSuite) mustGetControllerConfig(re *require.As
}

func (suite *resourceManagerAPITestSuite) mustSetControllerConfig(re *require.Assertions, config map[string]any) {
bodyBytes := suite.mustSendRequest(re, http.MethodPost, "/config/controller", config)
re.Equal("Success!", string(bodyBytes))
body, statusCode := tryToSetControllerConfig(re, suite.cluster.GetLeaderServer().GetAddr(), config)
re.Equal(http.StatusOK, statusCode, body)
re.Equal("Success!", body)
}

func tryToSetControllerConfig(re *require.Assertions, leaderAddr string, config map[string]any) (string, int) {
bodyBytes, statusCode := sendRequest(re, leaderAddr, http.MethodPost, "/config/controller", nil, config)
return string(bodyBytes), statusCode
}

func (suite *resourceManagerAPITestSuite) TestKeyspaceServiceLimitAPI() {
Expand Down
Loading