Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/playbook-reference/actions/remediation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ Robusta includes actions that modify Kubernetes resources in your cluster. See a

.. robusta-action:: playbooks.robusta_playbooks.pod_actions.delete_pod

.. robusta-action:: playbooks.robusta_playbooks.pod_actions.delete_alert_pod on_prometheus_alert

.. robusta-action:: playbooks.robusta_playbooks.job_actions.delete_job on_job_failure

.. robusta-action:: playbooks.robusta_playbooks.autoscaler.alert_on_hpa_reached_limit on_horizontalpodautoscaler_update
Expand Down
55 changes: 54 additions & 1 deletion playbooks/robusta_playbooks/pod_actions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
from robusta.api import ActionException, ErrorCodes, PodEvent, action
import logging
from typing import Optional

from robusta.api import (
ActionException,
ActionParams,
ErrorCodes,
PodEvent,
PrometheusKubernetesAlert,
RateLimiter,
action,
)


@action
Expand All @@ -10,3 +21,45 @@ def delete_pod(event: PodEvent):
raise ActionException(ErrorCodes.RESOURCE_NOT_FOUND, "Failed to get the pod for deletion")

event.get_pod().delete()


class DeleteAlertPodParams(ActionParams):
"""
:var rate_limit: Optional rate limit (seconds). If set, the action will only run once per period for the same alert label value.
:var rate_limit_field: Alert label name whose value is used to build the rate limit key.
"""

rate_limit: Optional[int] = None
rate_limit_field: Optional[str] = None


@action
def delete_alert_pod(event: PrometheusKubernetesAlert, params: DeleteAlertPodParams):
"""
Deletes the pod associated with a Prometheus alert.

Supports an optional rate limit, scoped by an alert label value.
"""
pod = event.get_pod()
if not pod:
raise ActionException(ErrorCodes.RESOURCE_NOT_FOUND, "Failed to get the pod for deletion")

if params.rate_limit is not None:
if not params.rate_limit_field:
raise ActionException(
ErrorCodes.ILLEGAL_ACTION_PARAMS,
"rate_limit_field must be set when rate_limit is configured",
)

Comment thread
arikalon1 marked this conversation as resolved.
field_value = event.alert.labels.get(params.rate_limit_field)
if field_value is None:
logging.warning(
f"delete_alert_pod: alert missing label '{params.rate_limit_field}'; skipping rate limit check"
)
else:
Comment thread
arikalon1 marked this conversation as resolved.
key = f"{params.rate_limit_field}:{field_value}"
if not RateLimiter.mark_and_test("delete_alert_pod", key, params.rate_limit):
logging.info(f"delete_alert_pod rate limited for {key}; skipping deletion")
return

pod.delete()
Loading