Skip to content
1 change: 1 addition & 0 deletions ddtrace/internal/settings/_supported_configurations.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@
"DD_PROFILING_EXCEPTION_ENABLED",
"DD_PROFILING_EXCEPTION_SAMPLING_INTERVAL",
"DD_PROFILING_FILE_PATH",
"DD_PROFILING_GC_ENABLED",
"DD_PROFILING_HEAP_ENABLED",
"DD_PROFILING_HEAP_SAMPLE_SIZE",
"DD_PROFILING_IGNORE_PROFILER",
Expand Down
17 changes: 16 additions & 1 deletion ddtrace/internal/settings/profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def _derive_default_heap_sample_size(
try:
from ddtrace.vendor import psutil

total_mem = psutil.swap_memory().total + psutil.virtual_memory().total
total_mem = psutil.swap_memory().total + psutil.virtual_memory().total # type: ignore[no-untyped-call]
except Exception:
logger.warning(
"Unable to get total memory available, using default value of %d KB",
Expand Down Expand Up @@ -496,6 +496,18 @@ class ProfilingConfigPytorch(DDConfig):
)


class ProfilingConfigGC(DDConfig):
__item__ = __prefix__ = "gc"

enabled = DDConfig.v(
bool,
"enabled",
default=True,
help_type="Boolean",
help="Whether to enable the GC collector (pause durations, collection counts).",
)


class ProfilingConfigException(DDConfig):
__item__ = __prefix__ = "exception"

Expand Down Expand Up @@ -534,6 +546,7 @@ class ProfilingConfigException(DDConfig):
ProfilingConfig.include(ProfilingConfigLock, namespace="lock")
ProfilingConfig.include(ProfilingConfigMemory, namespace="memory")
ProfilingConfig.include(ProfilingConfigHeap, namespace="heap")
ProfilingConfig.include(ProfilingConfigGC, namespace="gc")
ProfilingConfig.include(ProfilingConfigPytorch, namespace="pytorch")
ProfilingConfig.include(ProfilingConfigException, namespace="exception")

Expand Down Expand Up @@ -584,6 +597,8 @@ def config_str(config: ProfilingConfig) -> str:
configured_features.append("mem")
if config.heap.sample_size > 0:
configured_features.append("heap")
if config.gc.enabled:
configured_features.append("gc")
if config.pytorch.enabled:
configured_features.append("pytorch")
if config.exception.enabled:
Expand Down
111 changes: 111 additions & 0 deletions ddtrace/profiling/collector/gc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from __future__ import annotations

import gc
import logging
import threading
import time
from typing import Callable

from ddtrace.internal.datadog.profiling import ddup
from ddtrace.profiling import collector


LOG = logging.getLogger(__name__)

_GEN_NAMES: tuple[str, ...] = (
"gc.collect[gen=0]",
"gc.collect[gen=1]",
"gc.collect[gen=2]",
)


class GCCollector(collector.Collector):
"""Collect GC pause durations, collection counts, and configuration state.

Hooks gc.callbacks for per-collection events and emits a snapshot sample
once per profile flush interval with cumulative counts and config state.

Data emitted:
- Wall time samples (push_walltime) attributed to synthetic gc.collect[gen=N]
frames — appear in the Wall Time profile view.
- Alloc samples (push_alloc) with collected-object count — appear in the
Alloc profile view under the same frames.
- A gc.config snapshot sample per flush carrying explicit gc.collect() call
count in the sample count field.
"""

def _start_service(self) -> None:
self._start_ns: dict[int, int] = {}
self._explicit_count: int = 0
self._count_lock = threading.Lock()
self._orig_collect: Callable[..., int] = gc.collect
gc.collect = self._patched_collect
gc.callbacks.append(self._on_gc)
LOG.debug("GCCollector started")

def _stop_service(self) -> None:
try:
gc.callbacks.remove(self._on_gc)
except ValueError:
pass
gc.collect = self._orig_collect

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Guard gc.collect restoration against later patches

When another library or test replaces gc.collect while the profiler is running, stopping this collector unconditionally restores the function captured at profiler startup and silently discards that later patch. Because this collector is enabled by default and mutates a process-wide stdlib function, this can break code that wraps gc.collect after profiling starts; only restore when gc.collect is still this collector's wrapper, or otherwise chain/coordinate the wrapper safely.

Useful? React with 👍 / 👎.

LOG.debug("GCCollector stopped")

def _patched_collect(self, generation: int = 2) -> int:
with self._count_lock:
self._explicit_count += 1
return self._orig_collect(generation)
Comment on lines +54 to +57

Comment on lines +54 to +58
def _on_gc(self, phase: str, info: dict[str, int]) -> None:
gen = info.get("generation", 0)
if phase == "start":
self._start_ns[gen] = time.monotonic_ns()
elif phase == "stop":
start = self._start_ns.pop(gen, None)
if start is None:
return
pause_ns = time.monotonic_ns() - start
frame_name = _GEN_NAMES[gen] if gen < len(_GEN_NAMES) else "gc.collect"

handle = ddup.SampleHandle()
handle.push_walltime(pause_ns, 1)
handle.push_frame(frame_name, "gc", 0, 0)
handle.push_monotonic_ns(time.monotonic_ns())
Comment thread
Copilot marked this conversation as resolved.
handle.flush_sample()

collected = info.get("collected", 0)
if collected > 0:
handle2 = ddup.SampleHandle()
handle2.push_alloc(collected, 1)
handle2.push_frame(frame_name, "gc", 0, 0)
handle2.push_monotonic_ns(time.monotonic_ns())
handle2.flush_sample()

def snapshot(self) -> None: # type: ignore[override]
with self._count_lock:
explicit = self._explicit_count
self._explicit_count = 0

handle = ddup.SampleHandle()
# Use count field to carry explicit gc.collect() tally for this interval.
# A zero walltime with count > 0 is the established pattern for pure-count
# samples (same as lock release-time samples with zero duration).
handle.push_walltime(0, explicit)
handle.push_frame("gc.config", "gc", 0, 0)
handle.push_monotonic_ns(time.monotonic_ns())
handle.flush_sample()

if LOG.isEnabledFor(logging.DEBUG):
thresholds = gc.get_threshold()
enabled = gc.isenabled()
freeze_count = gc.get_freeze_count() if hasattr(gc, "get_freeze_count") else 0
stats = gc.get_stats()
total_collections = sum(s.get("collections", 0) for s in stats)
LOG.debug(
"GCCollector snapshot: enabled=%s thresholds=%s freeze=%d total_collections=%d explicit_collect=%d",
enabled,
thresholds,
freeze_count,
total_collections,
explicit,
)
11 changes: 11 additions & 0 deletions ddtrace/profiling/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from ddtrace.profiling import scheduler
from ddtrace.profiling.collector import asyncio
from ddtrace.profiling.collector import exception
from ddtrace.profiling.collector import gc as gc_collector
from ddtrace.profiling.collector import memalloc
from ddtrace.profiling.collector import pytorch
from ddtrace.profiling.collector import stack
Expand Down Expand Up @@ -175,6 +176,7 @@ def __init__(
_memory_collector_enabled: bool = profiling_config.memory.enabled,
_stack_collector_enabled: bool = profiling_config.stack.enabled,
_lock_collector_enabled: bool = profiling_config.lock.enabled,
_gc_collector_enabled: bool = profiling_config.gc.enabled,
_pytorch_collector_enabled: bool = profiling_config.pytorch.enabled,
_exception_profiling_enabled: bool = profiling_config.exception.enabled,
enable_code_provenance: bool = profiling_config.code_provenance,
Expand All @@ -191,6 +193,7 @@ def __init__(
self._memory_collector_enabled: bool = _memory_collector_enabled
self._stack_collector_enabled: bool = _stack_collector_enabled
self._lock_collector_enabled: bool = _lock_collector_enabled
self._gc_collector_enabled: bool = _gc_collector_enabled
self._pytorch_collector_enabled: bool = _pytorch_collector_enabled
self._exception_profiling_enabled: bool = _exception_profiling_enabled
self.enable_code_provenance: bool = enable_code_provenance
Expand Down Expand Up @@ -266,6 +269,14 @@ def __post_init__(self) -> None:
except Exception:
LOG.error("Failed to start exception collector, disabling.", exc_info=True)

if self._gc_collector_enabled:
LOG.debug("Profiling collector (gc) enabled")
try:
self._collectors.append(gc_collector.GCCollector())
LOG.debug("Profiling collector (gc) initialized")
except Exception:
LOG.error("Failed to start gc collector, disabling.", exc_info=True)

if self._stack_collector_enabled:
LOG.debug("Profiling collector (stack) enabled")
try:
Expand Down
7 changes: 7 additions & 0 deletions supported-configurations.json
Original file line number Diff line number Diff line change
Expand Up @@ -2710,6 +2710,13 @@
"default": null
}
],
"DD_PROFILING_GC_ENABLED": [
{
"implementation": "C",
"type": "boolean",
"default": "true"
}
],
"DD_PROFILING_HEAP_ENABLED": [
{
"implementation": "C",
Expand Down
12 changes: 12 additions & 0 deletions tests/profiling/collector/test_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,15 @@ def test_capture_sampler_pure_python_fallback() -> None:
sys.modules[mod_name] = saved_module
sys.modules.pop(collector_mod, None)
importlib.import_module(collector_mod)
# Re-attach any already-imported direct submodules as attributes of the
# freshly-reimported package. Python only does this automatically on a
# fresh import of the submodule itself; it does not update attributes on
# a parent package object that was replaced in sys.modules mid-flight.
_parent = sys.modules[collector_mod]
_prefix = collector_mod + "."
_depth = collector_mod.count(".") + 1
for _key in list(sys.modules):
if _key.startswith(_prefix) and _key.count(".") == _depth:
_attr = _key.rsplit(".", 1)[-1]
if not hasattr(_parent, _attr):
setattr(_parent, _attr, sys.modules[_key])
Loading
Loading