Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions pkg/storage/stores/shipper/indexshipper/tsdb/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/grafana/dskit/multierror"
"github.com/pkg/errors"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/model/labels"
Expand Down Expand Up @@ -77,16 +78,18 @@ func NewTSDBManager(
}
}

func (m *tsdbManager) Start() (err error) {
func (m *tsdbManager) Start() error {
var (
buckets, indices, loadingErrors int
)

var multiErr multierror.MultiError

defer func() {
level.Info(m.log).Log(
"msg", "loaded leftover local indices",
"err", err,
"successful", err == nil,
"err", multiErr.Err(),
"successful", multiErr.Err() == nil,
"buckets", buckets,
"indices", indices,
"failures", loadingErrors,
Expand All @@ -97,7 +100,8 @@ func (m *tsdbManager) Start() (err error) {
mulitenantDir := managerMultitenantDir(m.dir)
files, err := os.ReadDir(mulitenantDir)
if err != nil {
return err
multiErr.Add(err)
return multiErr.Err()
}

for _, f := range files {
Expand Down Expand Up @@ -134,22 +138,24 @@ func (m *tsdbManager) Start() (err error) {

if err != nil {
level.Warn(m.log).Log(
"msg", "",
"msg", "failed to load shippable TSDB file",
"tsdbPath", prefixed.Path(),
"err", err.Error(),
)
multiErr.Add(err)
loadingErrors++
continue
}

if err := m.shipper.AddIndex(bucket, "", loaded); err != nil {
multiErr.Add(err)
loadingErrors++
return err
continue
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a change of behavior, should we treat both loading and adding to index failures are errors and fail the start-up?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not it does not return on first error, but continues the operation with the remaining TSDBs. However, the error is still returned at the end.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a need to load more TSDBs if we are going to fail the service anyway?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At least we should try IMO

}
}

}

return nil
return multiErr.Err()
}

type chunkInfo struct {
Expand Down
Loading