Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/playbook-reference/actions/remediation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ Robusta includes actions that modify Kubernetes resources in your cluster. See a

.. robusta-action:: playbooks.robusta_playbooks.pod_actions.delete_pod

.. robusta-action:: playbooks.robusta_playbooks.pod_actions.delete_alert_pod on_prometheus_alert

.. robusta-action:: playbooks.robusta_playbooks.job_actions.delete_job on_job_failure

.. robusta-action:: playbooks.robusta_playbooks.autoscaler.alert_on_hpa_reached_limit on_horizontalpodautoscaler_update
Expand Down
55 changes: 54 additions & 1 deletion playbooks/robusta_playbooks/pod_actions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
from robusta.api import ActionException, ErrorCodes, PodEvent, action
import logging
from typing import Optional

from robusta.api import (
ActionException,
ActionParams,
ErrorCodes,
PodEvent,
PrometheusKubernetesAlert,
RateLimiter,
action,
)


@action
Expand All @@ -10,3 +21,45 @@ def delete_pod(event: PodEvent):
raise ActionException(ErrorCodes.RESOURCE_NOT_FOUND, "Failed to get the pod for deletion")

event.get_pod().delete()


class DeleteAlertPodParams(ActionParams):
"""
:var rate_limit: Optional rate limit (seconds). If set, the action will only run once per period for the same alert label value.
:var rate_limit_field: Alert label name whose value is used to build the rate limit key.
"""

rate_limit: Optional[int] = None
rate_limit_field: Optional[str] = None


@action
def delete_alert_pod(event: PrometheusKubernetesAlert, params: DeleteAlertPodParams):
"""
Deletes the pod associated with a Prometheus alert.

Supports an optional rate limit, scoped by an alert label value.
"""
pod = event.get_pod()
if not pod:
raise ActionException(ErrorCodes.RESOURCE_NOT_FOUND, "Failed to get the pod for deletion")

if params.rate_limit is not None:
if not params.rate_limit_field:
raise ActionException(
ErrorCodes.ILLEGAL_ACTION_PARAMS,
"rate_limit_field must be set when rate_limit is configured",
)

Comment thread
arikalon1 marked this conversation as resolved.
field_value = event.alert.labels.get(params.rate_limit_field)
if field_value is None:
logging.warning(
f"delete_alert_pod: alert missing label '{params.rate_limit_field}'; skipping rate limit check"
)
else:
Comment thread
arikalon1 marked this conversation as resolved.
key = f"{params.rate_limit_field}:{field_value}"
if not RateLimiter.mark_and_test("delete_alert_pod", key, params.rate_limit):
logging.info(f"delete_alert_pod rate limited for {key}; skipping deletion")
return

pod.delete()
Loading