Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 143 additions & 0 deletions src/Nethermind/Nethermind.Db.Test/DbTrackerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
using Nethermind.Core;
using Nethermind.Core.Test;
using Nethermind.Core.Test.Builders;
using Nethermind.Db.FullPruning;
using Nethermind.Db.Rocks;
using Nethermind.Db.Rocks.Config;
using Nethermind.Init.Modules;
Expand All @@ -26,6 +27,26 @@ namespace Nethermind.Db.Test;

public class DbTrackerTests
{
// Names mutated across tests are reset here so test order does not cause flakiness.
private static readonly string[] TouchedMetricKeys =
{
"TestDb", "GoodDb", "ThrowingDb", "SkippedDb", "TrackedDb", "PrunedState",
};

[TearDown]
public void TearDown()
{
foreach (string key in TouchedMetricKeys)
{
((IDictionary<string, long>)Metrics.DbReads).Remove(key);
((IDictionary<string, long>)Metrics.DbWrites).Remove(key);
((IDictionary<string, long>)Metrics.DbSize).Remove(key);
((IDictionary<string, long>)Metrics.DbMemtableSize).Remove(key);
((IDictionary<string, long>)Metrics.DbBlockCacheSize).Remove(key);
((IDictionary<string, long>)Metrics.DbIndexFilterSize).Remove(key);
}
}

[Test]
public void TestTrackOnlyCreatedDb()
{
Expand Down Expand Up @@ -77,6 +98,123 @@ public void TestUpdateDbMetric(bool isProcessing)
Assert.That(Metrics.DbReads["TestDb"], isProcessing ? Is.EqualTo(0) : Is.EqualTo(10));
}

[Test]
public void TestSkipMetricsTracking()
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Low: This test verifies the SkipMetricsTracking flag mechanism, but there's no test for the actual bug scenario: create a FullPruningDb, trigger a pruning cycle (via TryStartPruningCommit), then assert that the tracker's GatherMetric() reads from the new inner DB and not the discarded one. The unit test is good, but an integration test would close the loop on correctness.

{
using IContainer container = new ContainerBuilder()
.AddSingleton<DbMonitoringModule.DbTracker>()
.AddSingleton<IDbConfig>(new DbConfig())
.AddSingleton<HyperClockCacheWrapper>()
.AddSingleton<IMetricsConfig>(new MetricsConfig())
.AddSingleton<ILogManager>(LimboLogs.Instance)
.AddSingleton<IMonitoringService>(NoopMonitoringService.Instance)
.AddDecorator<IDbFactory, DbMonitoringModule.DbTracker.DbFactoryInterceptor>()
.AddSingleton<IDbFactory, MemDbFactory>()
.Build();

IDbFactory dbFactory = container.Resolve<IDbFactory>();
DbMonitoringModule.DbTracker tracker = container.Resolve<DbMonitoringModule.DbTracker>();

DbSettings skipped = new("SkippedDb", "SkippedDb") { SkipMetricsTracking = true };
DbSettings tracked = new("TrackedDb", "TrackedDb");

dbFactory.CreateDb(skipped);
dbFactory.CreateDb(tracked);

List<KeyValuePair<string, IDbMeta>> entries = tracker.GetAllDbMeta().ToList();
entries.Should().ContainSingle().Which.Key.Should().Be("TrackedDb");
}

[Parallelizable(ParallelScope.None)]
[Test]
public void ExceptionInGatherMetricDoesNotAbortOtherDbs()
{
IMonitoringService monitoringService = Substitute.For<IMonitoringService>();
Action updateAction = null!;
monitoringService
.When(m => m.AddMetricsUpdateAction(Arg.Any<Action>()))
.Do(c => updateAction = (Action)c[0]);

IDbFactory fakeDbFactory = Substitute.For<IDbFactory>();

using IContainer container = new ContainerBuilder()
.AddSingleton<DbMonitoringModule.DbTracker>()
.AddSingleton<IDbConfig>(new DbConfig())
.AddSingleton<HyperClockCacheWrapper>()
.AddSingleton<IMetricsConfig>(new MetricsConfig())
.AddSingleton<ILogManager>(LimboLogs.Instance)
.AddSingleton<IMonitoringService>(monitoringService)
.AddDecorator<IDbFactory, DbMonitoringModule.DbTracker.DbFactoryInterceptor>()
.AddSingleton<IDbFactory>(fakeDbFactory)
.Build();

ThrowingDb throwingDb = new();
FakeDb goodDb = new(new IDbMeta.DbMetric { TotalReads = 42 });
fakeDbFactory.CreateDb(Arg.Is<DbSettings>(s => s.DbName == "ThrowingDb")).Returns(throwingDb);
fakeDbFactory.CreateDb(Arg.Is<DbSettings>(s => s.DbName == "GoodDb")).Returns(goodDb);

IDbFactory intercepted = container.Resolve<IDbFactory>();
intercepted.CreateDb(new DbSettings("ThrowingDb", "ThrowingDb"));
intercepted.CreateDb(new DbSettings("GoodDb", "GoodDb"));

Metrics.DbReads["GoodDb"] = 0;

updateAction!();

Assert.That(Metrics.DbReads.ContainsKey("GoodDb"), Is.True);
Assert.That(Metrics.DbReads["GoodDb"], Is.EqualTo(42));
Comment on lines +160 to +165
}

[Parallelizable(ParallelScope.None)]
[Test]
public void FullPruningDbTrackedWrapper_SurvivesPruningCycle()
{
IMonitoringService monitoringService = Substitute.For<IMonitoringService>();
Action updateAction = null!;
monitoringService
.When(m => m.AddMetricsUpdateAction(Arg.Any<Action>()))
.Do(c => updateAction = (Action)c[0]);

// Inner factory returns a new FakeDb per call with a distinct size, so we can tell which
// inner DB the FullPruningDb wrapper is currently pointing at.
IDbFactory innerFactory = Substitute.For<IDbFactory>();
FakeDb innerDbV0 = new(new IDbMeta.DbMetric { Size = 100 });
FakeDb innerDbV1 = new(new IDbMeta.DbMetric { Size = 200 });
innerFactory.CreateDb(Arg.Any<DbSettings>()).Returns(innerDbV0, innerDbV1);

// DbMetricIntervalSeconds = 0 disables the interval guard so we can update twice in a row.
MetricsConfig metricsConfig = new() { DbMetricIntervalSeconds = 0 };

using IContainer container = new ContainerBuilder()
.AddSingleton<DbMonitoringModule.DbTracker>()
.AddSingleton<IDbConfig>(new DbConfig())
.AddSingleton<HyperClockCacheWrapper>()
.AddSingleton<IMetricsConfig>(metricsConfig)
.AddSingleton<ILogManager>(LimboLogs.Instance)
.AddSingleton<IMonitoringService>(monitoringService)
.Build();

DbMonitoringModule.DbTracker tracker = container.Resolve<DbMonitoringModule.DbTracker>();
FullPruningDb pruningDb = new(new DbSettings("PrunedState", "PrunedState"), innerFactory);

// Mirror WorldStateModule's behavior: register the outer wrapper, not the inner DBs.
tracker.AddDb("PrunedState", pruningDb);

updateAction!();
Assert.That(Metrics.DbSize["PrunedState"], Is.EqualTo(100));

// Trigger and commit a full pruning cycle; pruningDb._currentDb now points to innerDbV1.
pruningDb.TryStartPruning(out IPruningContext context).Should().BeTrue();
context.Commit();
context.Dispose();

updateAction!();

// After pruning, the wrapper delegates GatherMetric() to the new inner DB. No stale entry.
Assert.That(Metrics.DbSize["PrunedState"], Is.EqualTo(200));
tracker.GetAllDbMeta().Should().ContainSingle().Which.Key.Should().Be("PrunedState");
}

[Parallelizable(ParallelScope.None)]
[Test]
public void DoesNotUpdateIfIntervalHasNotPassed()
Expand Down Expand Up @@ -149,4 +287,9 @@ private class FakeDb(IDbMeta.DbMetric metric) : TestMemDb, IDbMeta

internal void SetMetric(IDbMeta.DbMetric metric) => _metric = metric;
}

private class ThrowingDb : TestMemDb, IDbMeta
{
public override IDbMeta.DbMetric GatherMetric() => throw new InvalidOperationException("Simulated GatherMetric failure");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
<ProjectReference Include="..\Nethermind.Db.Rocks\Nethermind.Db.Rocks.csproj" />
<ProjectReference Include="..\Nethermind.Db.Rpc\Nethermind.Db.Rpc.csproj" />
<ProjectReference Include="..\Nethermind.Db\Nethermind.Db.csproj" />
<ProjectReference Include="..\Nethermind.Init\Nethermind.Init.csproj" />
</ItemGroup>
<ItemGroup>
<None Update="InputFiles\CompactionStatsExample_MissingIntervalCompaction.txt">
Expand Down
2 changes: 2 additions & 0 deletions src/Nethermind/Nethermind.Db/DbSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ public class DbSettings(string name, string path)

public bool DeleteOnStart { get; set; }
public bool CanDeleteFolder { get; set; } = true;
/// <summary>When <c>true</c>, this database will not be registered with the metrics tracker.</summary>
public bool SkipMetricsTracking { get; set; }
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Low: Missing XML documentation on this public property. The coding style guide (coding-style.md) requires <summary> docs on all public APIs. Consider:

Suggested change
public bool SkipMetricsTracking { get; set; }
/// <summary>When true, this database will not be registered with the metrics tracker.</summary>
public bool SkipMetricsTracking { get; set; }

(Other properties in this file are also undocumented — consistent with the existing pattern, but worth fixing here since this one has non-obvious semantics.)


public IMergeOperator? MergeOperator { get; set; }
public Dictionary<string, IMergeOperator>? ColumnsMergeOperators { get; set; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ private DbSettings GetRocksDbSettings(DbSettings originalSetting)
string dbPath = firstDb ? originalSetting.DbPath : _fileSystem.Path.Combine(originalSetting.DbPath, _index.ToString());
DbSettings dbSettings = originalSetting.Clone(dbName, dbPath);
dbSettings.CanDeleteFolder = !firstDb; // we cannot delete main db folder, only indexed subfolders
// Inner DBs are tracked via the FullPruningDb wrapper (registered with a stable name)
// so we skip tracking these indexed sub-DBs to avoid stale references after pruning.
dbSettings.SkipMetricsTracking = true;
return dbSettings;
}

Expand Down
52 changes: 42 additions & 10 deletions src/Nethermind/Nethermind.Init/Modules/DbMonitoringModule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ protected override void Load(ContainerBuilder builder)
public class DbTracker
{
private readonly ConcurrentDictionary<string, IDbMeta> _createdDbs = new();
private readonly HashSet<string> _failingDbs = new();
private readonly int _intervalSec;
private readonly Lazy<HyperClockCacheWrapper> _sharedBlockCache;
private long _lastDbMetricsUpdate = 0;
Expand Down Expand Up @@ -84,14 +85,27 @@ private void UpdateDbMetrics()

foreach (KeyValuePair<string, IDbMeta> kv in GetAllDbMeta())
{
// Note: At the moment, the metric for a columns db is combined across column.
IDbMeta.DbMetric dbMetric = kv.Value.GatherMetric();
Db.Metrics.DbSize[kv.Key] = dbMetric.Size;
Db.Metrics.DbBlockCacheSize[kv.Key] = dbMetric.CacheSize;
Db.Metrics.DbMemtableSize[kv.Key] = dbMetric.MemtableSize;
Db.Metrics.DbIndexFilterSize[kv.Key] = dbMetric.IndexSize;
Db.Metrics.DbReads[kv.Key] = dbMetric.TotalReads;
Db.Metrics.DbWrites[kv.Key] = dbMetric.TotalWrites;
try
{
// Note: At the moment, the metric for a columns db is combined across column.
IDbMeta.DbMetric dbMetric = kv.Value.GatherMetric();
Db.Metrics.DbSize[kv.Key] = dbMetric.Size;
Db.Metrics.DbBlockCacheSize[kv.Key] = dbMetric.CacheSize;
Db.Metrics.DbMemtableSize[kv.Key] = dbMetric.MemtableSize;
Db.Metrics.DbIndexFilterSize[kv.Key] = dbMetric.IndexSize;
Db.Metrics.DbReads[kv.Key] = dbMetric.TotalReads;
Db.Metrics.DbWrites[kv.Key] = dbMetric.TotalWrites;
if (_failingDbs.Remove(kv.Key) && _logger.IsInfo)
_logger.Info($"DB metric collection recovered for '{kv.Key}'");
}
catch (Exception e)
{
// Remove stale entries so Prometheus does not report old values indefinitely.
RemoveStaleMetricEntry(kv.Key);
// Log only on the first failure of a streak; recovery is logged when GatherMetric succeeds again.
if (_failingDbs.Add(kv.Key) && _logger.IsWarn)
_logger.Warn($"Failed to gather metrics for DB '{kv.Key}': {e.Message}");
}
Comment on lines +88 to +108
}

Db.Metrics.DbBlockCacheSize["Shared"] = _sharedBlockCache.Value.GetUsage();
Expand All @@ -104,12 +118,30 @@ private void UpdateDbMetrics()
}
}

// Uses IDictionary<TKey,TValue>.Remove so it works for both the default NonBlocking.ConcurrentDictionary
// and the plain Dictionary used under the ZK_EVM compile flag.
private static void RemoveStaleMetricEntry(string name)
{
IDictionary<string, long> reads = Db.Metrics.DbReads;
IDictionary<string, long> writes = Db.Metrics.DbWrites;
IDictionary<string, long> size = Db.Metrics.DbSize;
IDictionary<string, long> memtable = Db.Metrics.DbMemtableSize;
IDictionary<string, long> blockCache = Db.Metrics.DbBlockCacheSize;
IDictionary<string, long> indexFilter = Db.Metrics.DbIndexFilterSize;
reads.Remove(name);
writes.Remove(name);
size.Remove(name);
memtable.Remove(name);
blockCache.Remove(name);
indexFilter.Remove(name);
}

public class DbFactoryInterceptor(DbTracker tracker, IDbFactory baseFactory) : IDbFactory
{
public IDb CreateDb(DbSettings dbSettings)
{
IDb db = baseFactory.CreateDb(dbSettings);
if (db is IDbMeta dbMeta)
if (!dbSettings.SkipMetricsTracking && db is IDbMeta dbMeta)
{
tracker.AddDb(dbSettings.DbName, dbMeta);
}
Expand All @@ -119,7 +151,7 @@ public IDb CreateDb(DbSettings dbSettings)
public IColumnsDb<T> CreateColumnsDb<T>(DbSettings dbSettings) where T : struct, Enum
{
IColumnsDb<T> db = baseFactory.CreateColumnsDb<T>(dbSettings);
if (db is IDbMeta dbMeta)
if (!dbSettings.SkipMetricsTracking && db is IDbMeta dbMeta)
{
tracker.AddDb(dbSettings.DbName, dbMeta);
}
Expand Down
10 changes: 9 additions & 1 deletion src/Nethermind/Nethermind.Init/Modules/WorldStateModule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,20 @@ protected override void Load(ContainerBuilder builder)
DbSettings stateDbSettings = new(GetTitleDbName(DbNames.State), DbNames.State);
IFileSystem fileSystem = ctx.Resolve<IFileSystem>();
IDbFactory dbFactory = ctx.Resolve<IDbFactory>();
return new FullPruningDb(
FullPruningDb db = new(
stateDbSettings,
dbFactory is not MemDbFactory
? new FullPruningInnerDbFactory(dbFactory, fileSystem, stateDbSettings.DbPath)
: dbFactory,
() => Interlocked.Increment(ref Nethermind.Db.Metrics.StateDbInPruningWrites));
// Register the outer wrapper so GatherMetric() always reflects the currently active
// inner DB, even across full-pruning cycles. The inner DBs are not tracked:
// - via FullPruningInnerDbFactory they get SkipMetricsTracking = true so the
// DbFactoryInterceptor skips registration.
// - via the MemDbFactory branch they're MemDbs created outside any interceptor and
// therefore never reach the tracker either.
ctx.ResolveOptional<DbMonitoringModule.DbTracker>()?.AddDb(stateDbSettings.DbName, db);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Low: AddDb uses TryAdd internally, which silently discards duplicate names. If this lambda ever re-runs (unlikely for a singleton, but possible in test teardown/rebuild cycles), the stale FullPruningDb instance remains in the tracker. Not a problem in practice, but worth noting if the FullPruningDb instance is ever recreated under the same name.

return db;
})

.AddSingleton<INodeStorageFactory>(ctx =>
Expand Down
Loading