From 62b981b55d8b2a74d1c0fb60c6a55a1a8c4dc488 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20C=C3=A1mara?= Date: Tue, 19 May 2026 10:15:33 +0200 Subject: [PATCH 1/5] Create buildkite pipeline to periodically clean up logstash artifact snyk scans --- .../snyk/cleanup-stale-projects/cleanup.py | 285 ++++++++++++++++++ .../cleanup-stale-projects/generate-steps.py | 36 +++ .../snyk/cleanup-stale-projects/run.sh | 16 + .../snyk_cleanup_stale_projects_pipeline.yml | 9 + catalog-info.yaml | 47 +++ 5 files changed, 393 insertions(+) create mode 100644 .buildkite/scripts/snyk/cleanup-stale-projects/cleanup.py create mode 100644 .buildkite/scripts/snyk/cleanup-stale-projects/generate-steps.py create mode 100755 .buildkite/scripts/snyk/cleanup-stale-projects/run.sh create mode 100644 .buildkite/snyk_cleanup_stale_projects_pipeline.yml diff --git a/.buildkite/scripts/snyk/cleanup-stale-projects/cleanup.py b/.buildkite/scripts/snyk/cleanup-stale-projects/cleanup.py new file mode 100644 index 0000000000..c7dbf8ec98 --- /dev/null +++ b/.buildkite/scripts/snyk/cleanup-stale-projects/cleanup.py @@ -0,0 +1,285 @@ +#!/usr/bin/env python3 +""" +Cleanup stale Snyk projects created by the Logstash artifact scan pipeline. + +Queries the Snyk REST API for projects imported by the service account that +haven't been monitored recently, and performs either deactivation or deletion. + +Usage: + python3 cleanup.py --action deactivate + python3 cleanup.py --action delete + +Environment variables: + SNYK_TOKEN - Snyk API token (required) + STALENESS_DAYS - Number of days before a project is considered stale (default: 2) + DRY_RUN - If "true", only log actions without performing them (default: "false") +""" + +import argparse +import os +import subprocess +import sys +from datetime import datetime, timedelta, timezone + +import requests +from requests.adapters import HTTPAdapter, Retry + +SNYK_REST_BASE = "https://api.snyk.io" +SNYK_REST_VERSION = "2024-10-15" +# Only clean up projects created by the artifact scan pipeline +ARTIFACT_SCAN_REMOTE_REPO_URL = "logstash-artifact" +IN_BUILDKITE = os.environ.get("BUILDKITE") == "true" + +class Annotation: + """Lazily creates a Buildkite annotation on first item; skips entirely if unused.""" + + def __init__(self, context: str, header: str, style: str): + self._context = context + self._header = header + self._style = style + self._initialized = False + + def add(self, message: str): + if not self._initialized: + annotate_build(self._context, f"{self._header}\n{message}", + self._style, append=False) + self._initialized = True + else: + annotate_build(self._context, message, self._style, append=True) + +def annotate_build(context: str, message: str, style: str, append: bool) -> None: + if IN_BUILDKITE: + cmd = ["buildkite-agent", "annotate", message, "--context", context, "--style", style] + if append: + cmd.append("--append") + subprocess.run(cmd) + +def get_env(): + token = os.environ.get("SNYK_TOKEN") + if not token: + print("Error: SNYK_TOKEN environment variable is required", file=sys.stderr) + sys.exit(1) + + staleness_days = int(os.environ.get("STALENESS_DAYS", "2")) + dry_run = os.environ.get("DRY_RUN", "false").lower() == "true" + + return token, staleness_days, dry_run + + +def create_session(token: str) -> requests.Session: + session = requests.Session() + retries = Retry(total=5, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]) + session.mount("https://", HTTPAdapter(max_retries=retries)) + session.headers.update({ + "Authorization": f"token {token}", + "Content-Type": "application/vnd.api+json", + }) + return session + + +def resolve_org_id(session: requests.Session) -> str: + """Resolve the org UUID from the slug 'logstash' via GET /rest/orgs.""" + url = f"{SNYK_REST_BASE}/rest/orgs" + resp = session.get(url, params={"version": SNYK_REST_VERSION}) + resp.raise_for_status() + data = resp.json() + + for org in data.get("data", []): + if org.get("attributes", {}).get("slug") == "logstash": + org_id = org["id"] + print(f"Resolved org 'logstash' UUID: {org_id}") + return org_id + + print("Error: Could not find logstash org", file=sys.stderr) + sys.exit(1) + + +def list_projects(session: requests.Session, org_id: str, **params) -> list: + """List all projects with pagination.""" + url = f"{SNYK_REST_BASE}/rest/orgs/{org_id}/projects" + query = { + "version": SNYK_REST_VERSION, + "limit": 100, + "expand": "target", + } + query.update(params) + + projects = [] + while url: + resp = session.get(url, params=query) + resp.raise_for_status() + data = resp.json() + projects.extend(data.get("data", [])) + + next_link = data.get("links", {}).get("next") + if next_link: + url = f"{SNYK_REST_BASE}{next_link}" if next_link.startswith("/") else next_link + query = {} + else: + url = None + + return projects + + +def list_stale_artifact_projects(session: requests.Session, org_id: str, + cutoff_str: str) -> list: + """List active artifact-scan projects monitored before the cutoff date.""" + projects = list_projects(session, org_id, cli_monitored_before=cutoff_str) + print(f"Total projects monitored before cutoff: {len(projects)}") + return [ + p for p in projects + if p.get("attributes", {}).get("status") == "active" + and (p.get("relationships", {}).get("target", {}).get("data", {}) + .get("attributes", {}).get("display_name", "")) == ARTIFACT_SCAN_REMOTE_REPO_URL + ] + + +def list_inactive_artifact_projects(session: requests.Session, org_id: str, + cutoff_str: str) -> list: + """List inactive artifact-scan projects monitored before the cutoff date.""" + projects = list_projects(session, org_id, cli_monitored_before=cutoff_str) + return [ + p for p in projects + if p.get("attributes", {}).get("status") == "inactive" + and (p.get("relationships", {}).get("target", {}).get("data", {}) + .get("attributes", {}).get("display_name", "")) == ARTIFACT_SCAN_REMOTE_REPO_URL + ] + + +def deactivate_project(session: requests.Session, org_id: str, project_id: str, project_name: str, dry_run: bool) -> bool: + """Deactivate a single project. Returns True on success, False on failure.""" + if dry_run: + print(f" [DRY RUN] Would deactivate: {project_name} ({project_id})") + return True + + url = f"{SNYK_REST_BASE}/v1/org/{org_id}/project/{project_id}/deactivate" + try: + resp = session.post(url) + if resp.status_code in (200, 422): + print(f" Deactivated: {project_name} ({project_id})") + return True + resp.raise_for_status() + except requests.HTTPError as e: + print(f" Failed to deactivate: {project_name} ({project_id}): {e}", file=sys.stderr) + return False + + +def delete_project(session: requests.Session, org_id: str, project_id: str, project_name: str, dry_run: bool) -> bool: + """Delete a single project. Returns True on success, False on failure.""" + if dry_run: + print(f" [DRY RUN] Would delete: {project_name} ({project_id})") + return True + + url = f"{SNYK_REST_BASE}/rest/orgs/{org_id}/projects/{project_id}" + try: + resp = session.delete(url, params={"version": SNYK_REST_VERSION}) + if resp.status_code == 204: + print(f" Deleted: {project_name} ({project_id})") + return True + resp.raise_for_status() + except requests.HTTPError as e: + print(f" Failed to delete: {project_name} ({project_id}): {e}", file=sys.stderr) + return False + + +def delete_target(session: requests.Session, org_id: str, target_id: str, dry_run: bool) -> bool: + """Delete an empty target. Returns True on success, False on failure.""" + if dry_run: + print(f" [DRY RUN] Would delete target: {target_id}") + return True + + url = f"{SNYK_REST_BASE}/rest/orgs/{org_id}/targets/{target_id}" + try: + resp = session.delete(url, params={"version": SNYK_REST_VERSION}) + if resp.status_code == 204: + print(f" Deleted target: {target_id}") + return True + resp.raise_for_status() + except requests.HTTPError as e: + print(f" Failed to delete target: {target_id}: {e}", file=sys.stderr) + return False + + +def parse_args(): + parser = argparse.ArgumentParser(description="Cleanup stale Snyk artifact-scan projects") + parser.add_argument("--action", required=True, choices=["deactivate", "delete"], + help="Action to perform: deactivate stale projects or delete inactive projects") + return parser.parse_args() + + +def action_deactivate(session: requests.Session, org_id: str, staleness_days: int, dry_run: bool): + """Find and deactivate stale active artifact-scan projects.""" + cutoff_str = (datetime.now(timezone.utc) - timedelta(days=staleness_days)).isoformat() + print(f"Cutoff date: {cutoff_str}") + + stale_projects = list_stale_artifact_projects(session, org_id, cutoff_str) + print(f"Found {len(stale_projects)} stale active artifact-scan project(s)") + + success = Annotation("successfully_deactivated_projects", + "
Deactivated projects:", "success") + failure = Annotation("unsuccessfully_deactivated_projects", + "
Projects failed to deactivate:", "error") + + for project in stale_projects: + project_id = project["id"] + project_name = project.get("attributes", {}).get("name", "unknown") + if deactivate_project(session, org_id, project_id, project_name, dry_run): + success.add(f"{project_name} ({project_id})
") + else: + failure.add(f"{project_name} ({project_id})
") + + +def action_delete(session: requests.Session, org_id: str, staleness_days: int, dry_run: bool): + """Find and delete inactive artifact-scan projects, then clean up empty targets.""" + cutoff_str = (datetime.now(timezone.utc) - timedelta(days=staleness_days)).isoformat() + inactive_projects = list_inactive_artifact_projects(session, org_id, cutoff_str) + print(f"Found {len(inactive_projects)} inactive artifact-scan project(s) to delete") + + proj_success = Annotation("successfully_deleted_projects", + "
Deleted projects:", "success") + proj_failure = Annotation("unsuccessfully_deleted_projects", + "
Projects failed to delete:", "error") + tgt_success = Annotation("successfully_deleted_targets", + "
Deleted targets:", "success") + tgt_failure = Annotation("unsuccessfully_deleted_targets", + "
Targets failed to delete:", "error") + + # Delete projects and collect target IDs for cleanup + target_ids = set() + for project in inactive_projects: + project_id = project["id"] + project_name = project.get("attributes", {}).get("name", "unknown") + target_ref = project.get("relationships", {}).get("target", {}).get("data", {}).get("id") + if delete_project(session, org_id, project_id, project_name, dry_run): + proj_success.add(f"{project_name} ({project_id})
") + if target_ref: + target_ids.add(target_ref) + else: + proj_failure.add(f"{project_name} ({project_id})
") + + # Clean up empty targets + for target_id in target_ids: + if delete_target(session, org_id, target_id, dry_run): + tgt_success.add(f"{target_id}
") + else: + tgt_failure.add(f"{target_id}
") + + +def main(): + args = parse_args() + token, staleness_days, dry_run = get_env() + session = create_session(token) + + mode_label = "[DRY RUN] " if dry_run else "" + print(f"{mode_label}Action: {args.action} | Staleness threshold: {staleness_days} days") + + org_id = resolve_org_id(session) + + if args.action == "deactivate": + action_deactivate(session, org_id, staleness_days, dry_run) + elif args.action == "delete": + action_delete(session, org_id, staleness_days, dry_run) + + +if __name__ == "__main__": + main() diff --git a/.buildkite/scripts/snyk/cleanup-stale-projects/generate-steps.py b/.buildkite/scripts/snyk/cleanup-stale-projects/generate-steps.py new file mode 100644 index 0000000000..471d3f2878 --- /dev/null +++ b/.buildkite/scripts/snyk/cleanup-stale-projects/generate-steps.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +""" +Generates Buildkite pipeline steps for Snyk stale project cleanup. +Produces two sequential steps: deactivate stale projects, then delete inactive ones. +""" + +import os +import yaml + +YAML_HEADER = '# yaml-language-server: $schema=https://raw.githubusercontent.com/buildkite/pipeline-schema/main/schema.json\n' +SCRIPT_PATH = ".buildkite/scripts/snyk/cleanup-stale-projects/run.sh" + + +def generate_pipeline() -> dict: + return { + "steps": [ + { + "label": ":deactivate: Deactivate stale artifact-scan projects", + "key": "deactivate-stale-projects", + "command": f"{SCRIPT_PATH} deactivate", + "retry": {"automatic": [{"limit": 2}]}, + }, + { + "label": ":wastebasket: Delete inactive artifact-scan projects", + "key": "delete-inactive-projects", + "depends_on": "deactivate-stale-projects", + "command": f"{SCRIPT_PATH} delete", + "retry": {"automatic": [{"limit": 2}]}, + }, + ] + } + + +if __name__ == "__main__": + pipeline = generate_pipeline() + print(YAML_HEADER + yaml.dump(pipeline, default_flow_style=False, sort_keys=False)) diff --git a/.buildkite/scripts/snyk/cleanup-stale-projects/run.sh b/.buildkite/scripts/snyk/cleanup-stale-projects/run.sh new file mode 100755 index 0000000000..bb5a8a4c12 --- /dev/null +++ b/.buildkite/scripts/snyk/cleanup-stale-projects/run.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# Cleans up stale Snyk projects created by the Logstash artifact scan pipeline. +# Uses the same Vault credentials as scan-artifact.sh. +# Usage: ./run.sh + +set -euo pipefail + +ACTION="${1:?Usage: $0 }" + +source .buildkite/scripts/common/vm-agent.sh + +echo "--- Retrieving Snyk token from Vault" +export SNYK_TOKEN=$(vault read -field=token secret/ci/elastic-logstash/snyk-creds) + +echo "--- Running stale project cleanup (action: ${ACTION})" +python3 .buildkite/scripts/snyk/cleanup-stale-projects/cleanup.py --action "${ACTION}" diff --git a/.buildkite/snyk_cleanup_stale_projects_pipeline.yml b/.buildkite/snyk_cleanup_stale_projects_pipeline.yml new file mode 100644 index 0000000000..606f9a85ab --- /dev/null +++ b/.buildkite/snyk_cleanup_stale_projects_pipeline.yml @@ -0,0 +1,9 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/buildkite/pipeline-schema/main/schema.json + +steps: + - label: ":pipeline: Cleanup stale Snyk artifact-scan projects" + command: | + set -euo pipefail + python3 -m pip install pyyaml requests + python3 .buildkite/scripts/snyk/cleanup-stale-projects/generate-steps.py > steps.yml + buildkite-agent pipeline upload < steps.yml diff --git a/catalog-info.yaml b/catalog-info.yaml index 4668aef65e..2e12093204 100644 --- a/catalog-info.yaml +++ b/catalog-info.yaml @@ -29,6 +29,7 @@ spec: - resource:logstash-snyk-report - resource:logstash-plugins-snyk-report - resource:logstash-artifact-snyk-scan + - resource:logstash-artifact-snyk-cleanup - resource:logstash-artifacts-acceptance - resource:logstash-dra-snapshot-pipeline - resource:logstash-dra-staging-pipeline @@ -219,6 +220,52 @@ spec: cronline: "@daily" message: "Run the Logstash Artifacts Snyk report every day." +# *********************************** +# Declare artifacts-snyk-cleanup pipeline +# *********************************** +--- +# yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json +apiVersion: backstage.io/v1alpha1 +kind: Resource +metadata: + name: logstash-artifact-snyk-cleanup + description: 'Cleanup stale Snyk artifact-scan projects.' +spec: + type: buildkite-pipeline + owner: group:logstash + system: platform-ingest + implementation: + apiVersion: buildkite.elastic.dev/v1 + kind: Pipeline + metadata: + name: logstash-artifact-snyk-cleanup-ci + description: ':logstash: Cleanup stale Snyk artifact-scan projects :pipeline:' + spec: + repository: elastic/logstash + pipeline_file: ".buildkite/snyk_cleanup_stale_projects_pipeline.yml" + maximum_timeout_in_minutes: 60 + provider_settings: + trigger_mode: none # don't trigger jobs + env: + ELASTIC_SLACK_NOTIFICATIONS_ENABLED: 'true' + SLACK_NOTIFICATIONS_CHANNEL: '#logstash-build' + SLACK_NOTIFICATIONS_ON_SUCCESS: 'false' + SLACK_NOTIFICATIONS_SKIP_FOR_RETRIES: 'true' + teams: + ingest-fp: + access_level: MANAGE_BUILD_AND_READ + logstash: + access_level: MANAGE_BUILD_AND_READ + ingest-eng-prod: + access_level: MANAGE_BUILD_AND_READ + everyone: + access_level: READ_ONLY + schedules: + Daily Artifacts Snyk cleanup: + branch: main + cronline: "@daily" + message: "Cleanup stale Snyk artifact-scan projects every day." + # *********************************** # Declare artifacts acceptance test pipeline # *********************************** From 26532869c678f0fde53e1bf49c7e5f8126f31b59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20C=C3=A1mara?= Date: Thu, 21 May 2026 14:17:43 +0200 Subject: [PATCH 2/5] Change staleness condition to delete projects whose version is no longer tracked --- .../snyk/cleanup-stale-projects/cleanup.py | 195 ++++++------------ .../cleanup-stale-projects/generate-steps.py | 17 +- .../snyk/cleanup-stale-projects/run.sh | 9 +- 3 files changed, 67 insertions(+), 154 deletions(-) diff --git a/.buildkite/scripts/snyk/cleanup-stale-projects/cleanup.py b/.buildkite/scripts/snyk/cleanup-stale-projects/cleanup.py index c7dbf8ec98..dd60ee6ef5 100644 --- a/.buildkite/scripts/snyk/cleanup-stale-projects/cleanup.py +++ b/.buildkite/scripts/snyk/cleanup-stale-projects/cleanup.py @@ -2,32 +2,27 @@ """ Cleanup stale Snyk projects created by the Logstash artifact scan pipeline. -Queries the Snyk REST API for projects imported by the service account that -haven't been monitored recently, and performs either deactivation or deletion. - -Usage: - python3 cleanup.py --action deactivate - python3 cleanup.py --action delete +Fetches the current active Logstash versions from logstash-versions.yml and +deletes any Snyk artifact-scan projects whose version is no longer tracked. Environment variables: - SNYK_TOKEN - Snyk API token (required) - STALENESS_DAYS - Number of days before a project is considered stale (default: 2) - DRY_RUN - If "true", only log actions without performing them (default: "false") + SNYK_TOKEN - Snyk API token (required) + DRY_RUN - If "true", only log actions without performing them (default: "false") """ -import argparse import os import subprocess import sys -from datetime import datetime, timedelta, timezone import requests +import yaml from requests.adapters import HTTPAdapter, Retry SNYK_REST_BASE = "https://api.snyk.io" SNYK_REST_VERSION = "2024-10-15" # Only clean up projects created by the artifact scan pipeline ARTIFACT_SCAN_REMOTE_REPO_URL = "logstash-artifact" +VERSIONS_URL = "https://raw.githubusercontent.com/logstash-plugins/.ci/1.x/logstash-versions.yml" IN_BUILDKITE = os.environ.get("BUILDKITE") == "true" class Annotation: @@ -60,10 +55,9 @@ def get_env(): print("Error: SNYK_TOKEN environment variable is required", file=sys.stderr) sys.exit(1) - staleness_days = int(os.environ.get("STALENESS_DAYS", "2")) dry_run = os.environ.get("DRY_RUN", "false").lower() == "true" - return token, staleness_days, dry_run + return token, dry_run def create_session(token: str) -> requests.Session: @@ -77,6 +71,27 @@ def create_session(token: str) -> requests.Session: return session +def fetch_active_versions() -> set: + """Fetch current active versions from logstash-versions.yml.""" + + try: + resp = requests.get(VERSIONS_URL, timeout=30) + resp.raise_for_status() + data = yaml.safe_load(resp.text) + except Exception as e: + print(f"Error: Failed to fetch logstash versions: {e}", file=sys.stderr) + sys.exit(1) + + versions = set() + for section in ("releases", "snapshots"): + if section in data: + for version in data[section].values(): + versions.add(version) + + print(f"Active versions from logstash-versions.yml: {sorted(versions)}") + return versions + + def resolve_org_id(session: requests.Session) -> str: """Resolve the org UUID from the slug 'logstash' via GET /rest/orgs.""" url = f"{SNYK_REST_BASE}/rest/orgs" @@ -121,49 +136,17 @@ def list_projects(session: requests.Session, org_id: str, **params) -> list: return projects -def list_stale_artifact_projects(session: requests.Session, org_id: str, - cutoff_str: str) -> list: - """List active artifact-scan projects monitored before the cutoff date.""" - projects = list_projects(session, org_id, cli_monitored_before=cutoff_str) - print(f"Total projects monitored before cutoff: {len(projects)}") - return [ - p for p in projects - if p.get("attributes", {}).get("status") == "active" - and (p.get("relationships", {}).get("target", {}).get("data", {}) - .get("attributes", {}).get("display_name", "")) == ARTIFACT_SCAN_REMOTE_REPO_URL - ] - - -def list_inactive_artifact_projects(session: requests.Session, org_id: str, - cutoff_str: str) -> list: - """List inactive artifact-scan projects monitored before the cutoff date.""" - projects = list_projects(session, org_id, cli_monitored_before=cutoff_str) +def list_artifact_projects(session: requests.Session, org_id: str) -> list: + """List all artifact-scan projects (target display_name = logstash-artifact).""" + projects = list_projects(session, org_id) + print(f"Total projects fetched: {len(projects)}") return [ p for p in projects - if p.get("attributes", {}).get("status") == "inactive" - and (p.get("relationships", {}).get("target", {}).get("data", {}) + if (p.get("relationships", {}).get("target", {}).get("data", {}) .get("attributes", {}).get("display_name", "")) == ARTIFACT_SCAN_REMOTE_REPO_URL ] -def deactivate_project(session: requests.Session, org_id: str, project_id: str, project_name: str, dry_run: bool) -> bool: - """Deactivate a single project. Returns True on success, False on failure.""" - if dry_run: - print(f" [DRY RUN] Would deactivate: {project_name} ({project_id})") - return True - - url = f"{SNYK_REST_BASE}/v1/org/{org_id}/project/{project_id}/deactivate" - try: - resp = session.post(url) - if resp.status_code in (200, 422): - print(f" Deactivated: {project_name} ({project_id})") - return True - resp.raise_for_status() - except requests.HTTPError as e: - print(f" Failed to deactivate: {project_name} ({project_id}): {e}", file=sys.stderr) - return False - - def delete_project(session: requests.Session, org_id: str, project_id: str, project_name: str, dry_run: bool) -> bool: """Delete a single project. Returns True on success, False on failure.""" if dry_run: @@ -182,103 +165,43 @@ def delete_project(session: requests.Session, org_id: str, project_id: str, proj return False -def delete_target(session: requests.Session, org_id: str, target_id: str, dry_run: bool) -> bool: - """Delete an empty target. Returns True on success, False on failure.""" - if dry_run: - print(f" [DRY RUN] Would delete target: {target_id}") - return True - - url = f"{SNYK_REST_BASE}/rest/orgs/{org_id}/targets/{target_id}" - try: - resp = session.delete(url, params={"version": SNYK_REST_VERSION}) - if resp.status_code == 204: - print(f" Deleted target: {target_id}") - return True - resp.raise_for_status() - except requests.HTTPError as e: - print(f" Failed to delete target: {target_id}: {e}", file=sys.stderr) - return False - - -def parse_args(): - parser = argparse.ArgumentParser(description="Cleanup stale Snyk artifact-scan projects") - parser.add_argument("--action", required=True, choices=["deactivate", "delete"], - help="Action to perform: deactivate stale projects or delete inactive projects") - return parser.parse_args() - - -def action_deactivate(session: requests.Session, org_id: str, staleness_days: int, dry_run: bool): - """Find and deactivate stale active artifact-scan projects.""" - cutoff_str = (datetime.now(timezone.utc) - timedelta(days=staleness_days)).isoformat() - print(f"Cutoff date: {cutoff_str}") +def main(): + token, dry_run = get_env() + session = create_session(token) - stale_projects = list_stale_artifact_projects(session, org_id, cutoff_str) - print(f"Found {len(stale_projects)} stale active artifact-scan project(s)") + mode_label = "[DRY RUN] " if dry_run else "" + print(f"{mode_label}Starting cleanup of stale Snyk artifact-scan projects") - success = Annotation("successfully_deactivated_projects", - "
Deactivated projects:", "success") - failure = Annotation("unsuccessfully_deactivated_projects", - "
Projects failed to deactivate:", "error") + active_versions = fetch_active_versions() + org_id = resolve_org_id(session) - for project in stale_projects: - project_id = project["id"] - project_name = project.get("attributes", {}).get("name", "unknown") - if deactivate_project(session, org_id, project_id, project_name, dry_run): - success.add(f"{project_name} ({project_id})
") - else: - failure.add(f"{project_name} ({project_id})
") + all_projects = list_artifact_projects(session, org_id) + print(f"Total artifact-scan projects in Snyk: {len(all_projects)}") + stale_projects = [ + p for p in all_projects + if p.get("attributes", {}).get("target_reference", "") not in active_versions + ] + print(f"Stale projects to delete (version not in logstash-versions.yml): {len(stale_projects)}") -def action_delete(session: requests.Session, org_id: str, staleness_days: int, dry_run: bool): - """Find and delete inactive artifact-scan projects, then clean up empty targets.""" - cutoff_str = (datetime.now(timezone.utc) - timedelta(days=staleness_days)).isoformat() - inactive_projects = list_inactive_artifact_projects(session, org_id, cutoff_str) - print(f"Found {len(inactive_projects)} inactive artifact-scan project(s) to delete") + if not stale_projects: + print("No stale projects found. Nothing to do.") + return proj_success = Annotation("successfully_deleted_projects", - "
Deleted projects:", "success") + "
Deleted projects:", "success") proj_failure = Annotation("unsuccessfully_deleted_projects", - "
Projects failed to delete:", "error") - tgt_success = Annotation("successfully_deleted_targets", - "
Deleted targets:", "success") - tgt_failure = Annotation("unsuccessfully_deleted_targets", - "
Targets failed to delete:", "error") - - # Delete projects and collect target IDs for cleanup - target_ids = set() - for project in inactive_projects: + "
Projects failed to delete:", "error") + + for project in stale_projects: project_id = project["id"] project_name = project.get("attributes", {}).get("name", "unknown") - target_ref = project.get("relationships", {}).get("target", {}).get("data", {}).get("id") - if delete_project(session, org_id, project_id, project_name, dry_run): - proj_success.add(f"{project_name} ({project_id})
") - if target_ref: - target_ids.add(target_ref) + target_ref = project.get("attributes", {}).get("target_reference", "unknown") + label = f"{project_name} (version: {target_ref})" + if delete_project(session, org_id, project_id, label, dry_run): + proj_success.add(f"{label}
") else: - proj_failure.add(f"{project_name} ({project_id})
") - - # Clean up empty targets - for target_id in target_ids: - if delete_target(session, org_id, target_id, dry_run): - tgt_success.add(f"{target_id}
") - else: - tgt_failure.add(f"{target_id}
") - - -def main(): - args = parse_args() - token, staleness_days, dry_run = get_env() - session = create_session(token) - - mode_label = "[DRY RUN] " if dry_run else "" - print(f"{mode_label}Action: {args.action} | Staleness threshold: {staleness_days} days") - - org_id = resolve_org_id(session) - - if args.action == "deactivate": - action_deactivate(session, org_id, staleness_days, dry_run) - elif args.action == "delete": - action_delete(session, org_id, staleness_days, dry_run) + proj_failure.add(f"{label}
") if __name__ == "__main__": diff --git a/.buildkite/scripts/snyk/cleanup-stale-projects/generate-steps.py b/.buildkite/scripts/snyk/cleanup-stale-projects/generate-steps.py index 471d3f2878..d64537d7b6 100644 --- a/.buildkite/scripts/snyk/cleanup-stale-projects/generate-steps.py +++ b/.buildkite/scripts/snyk/cleanup-stale-projects/generate-steps.py @@ -1,10 +1,8 @@ #!/usr/bin/env python3 """ -Generates Buildkite pipeline steps for Snyk stale project cleanup. -Produces two sequential steps: deactivate stale projects, then delete inactive ones. +Generates the Buildkite pipeline step for Snyk stale project cleanup. """ -import os import yaml YAML_HEADER = '# yaml-language-server: $schema=https://raw.githubusercontent.com/buildkite/pipeline-schema/main/schema.json\n' @@ -15,16 +13,9 @@ def generate_pipeline() -> dict: return { "steps": [ { - "label": ":deactivate: Deactivate stale artifact-scan projects", - "key": "deactivate-stale-projects", - "command": f"{SCRIPT_PATH} deactivate", - "retry": {"automatic": [{"limit": 2}]}, - }, - { - "label": ":wastebasket: Delete inactive artifact-scan projects", - "key": "delete-inactive-projects", - "depends_on": "deactivate-stale-projects", - "command": f"{SCRIPT_PATH} delete", + "label": ":wastebasket: Delete stale artifact-scan projects", + "key": "delete-stale-projects", + "command": SCRIPT_PATH, "retry": {"automatic": [{"limit": 2}]}, }, ] diff --git a/.buildkite/scripts/snyk/cleanup-stale-projects/run.sh b/.buildkite/scripts/snyk/cleanup-stale-projects/run.sh index bb5a8a4c12..3745312bcc 100755 --- a/.buildkite/scripts/snyk/cleanup-stale-projects/run.sh +++ b/.buildkite/scripts/snyk/cleanup-stale-projects/run.sh @@ -1,16 +1,15 @@ #!/bin/bash # Cleans up stale Snyk projects created by the Logstash artifact scan pipeline. +# Fetches active versions from logstash-versions.yml and deletes Snyk projects +# whose version is no longer tracked. # Uses the same Vault credentials as scan-artifact.sh. -# Usage: ./run.sh set -euo pipefail -ACTION="${1:?Usage: $0 }" - source .buildkite/scripts/common/vm-agent.sh echo "--- Retrieving Snyk token from Vault" export SNYK_TOKEN=$(vault read -field=token secret/ci/elastic-logstash/snyk-creds) -echo "--- Running stale project cleanup (action: ${ACTION})" -python3 .buildkite/scripts/snyk/cleanup-stale-projects/cleanup.py --action "${ACTION}" +echo "--- Running stale project cleanup" +python3 .buildkite/scripts/snyk/cleanup-stale-projects/cleanup.py From 4c2c643b1e42769efefec8985fc0d3b33aad1c19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20C=C3=A1mara?= Date: Thu, 21 May 2026 14:30:39 +0200 Subject: [PATCH 3/5] Fetch projects by target_id for faster executtion --- .../snyk/cleanup-stale-projects/cleanup.py | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/.buildkite/scripts/snyk/cleanup-stale-projects/cleanup.py b/.buildkite/scripts/snyk/cleanup-stale-projects/cleanup.py index dd60ee6ef5..5473462771 100644 --- a/.buildkite/scripts/snyk/cleanup-stale-projects/cleanup.py +++ b/.buildkite/scripts/snyk/cleanup-stale-projects/cleanup.py @@ -110,12 +110,11 @@ def resolve_org_id(session: requests.Session) -> str: def list_projects(session: requests.Session, org_id: str, **params) -> list: - """List all projects with pagination.""" + """List projects with pagination.""" url = f"{SNYK_REST_BASE}/rest/orgs/{org_id}/projects" query = { "version": SNYK_REST_VERSION, "limit": 100, - "expand": "target", } query.update(params) @@ -136,15 +135,31 @@ def list_projects(session: requests.Session, org_id: str, **params) -> list: return projects +def resolve_target_id(session: requests.Session, org_id: str) -> str: + """Find the target ID for the 'logstash-artifact' target.""" + url = f"{SNYK_REST_BASE}/rest/orgs/{org_id}/targets" + resp = session.get(url, params={ + "version": SNYK_REST_VERSION, + "display_name": ARTIFACT_SCAN_REMOTE_REPO_URL, + "source_types": "cli", + }) + resp.raise_for_status() + data = resp.json() + + for target in data.get("data", []): + if target.get("attributes", {}).get("display_name") == ARTIFACT_SCAN_REMOTE_REPO_URL: + target_id = target["id"] + print(f"Resolved target '{ARTIFACT_SCAN_REMOTE_REPO_URL}' UUID: {target_id}") + return target_id + + print(f"Error: Could not find target '{ARTIFACT_SCAN_REMOTE_REPO_URL}'", file=sys.stderr) + sys.exit(1) + + def list_artifact_projects(session: requests.Session, org_id: str) -> list: - """List all artifact-scan projects (target display_name = logstash-artifact).""" - projects = list_projects(session, org_id) - print(f"Total projects fetched: {len(projects)}") - return [ - p for p in projects - if (p.get("relationships", {}).get("target", {}).get("data", {}) - .get("attributes", {}).get("display_name", "")) == ARTIFACT_SCAN_REMOTE_REPO_URL - ] + """List artifact-scan projects by target ID.""" + target_id = resolve_target_id(session, org_id) + return list_projects(session, org_id, target_id=[target_id]) def delete_project(session: requests.Session, org_id: str, project_id: str, project_name: str, dry_run: bool) -> bool: From 80b688096d7c10a7bcc5f873fcb535fa3974a27f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20C=C3=A1mara?= Date: Thu, 21 May 2026 14:44:55 +0200 Subject: [PATCH 4/5] Re-organized files and folder and changed schedule to weekly --- .../cleanup-stale-projects.py} | 0 .../cleanup-stale-projects/generate-steps.py | 27 ------------------- .../snyk/cleanup-stale-projects/run.sh | 15 ----------- .../snyk_cleanup_stale_projects_pipeline.yml | 5 ++-- catalog-info.yaml | 6 ++--- 5 files changed, 6 insertions(+), 47 deletions(-) rename .buildkite/scripts/snyk/{cleanup-stale-projects/cleanup.py => cleanup-artifact-stale-projects/cleanup-stale-projects.py} (100%) delete mode 100644 .buildkite/scripts/snyk/cleanup-stale-projects/generate-steps.py delete mode 100755 .buildkite/scripts/snyk/cleanup-stale-projects/run.sh diff --git a/.buildkite/scripts/snyk/cleanup-stale-projects/cleanup.py b/.buildkite/scripts/snyk/cleanup-artifact-stale-projects/cleanup-stale-projects.py similarity index 100% rename from .buildkite/scripts/snyk/cleanup-stale-projects/cleanup.py rename to .buildkite/scripts/snyk/cleanup-artifact-stale-projects/cleanup-stale-projects.py diff --git a/.buildkite/scripts/snyk/cleanup-stale-projects/generate-steps.py b/.buildkite/scripts/snyk/cleanup-stale-projects/generate-steps.py deleted file mode 100644 index d64537d7b6..0000000000 --- a/.buildkite/scripts/snyk/cleanup-stale-projects/generate-steps.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python3 -""" -Generates the Buildkite pipeline step for Snyk stale project cleanup. -""" - -import yaml - -YAML_HEADER = '# yaml-language-server: $schema=https://raw.githubusercontent.com/buildkite/pipeline-schema/main/schema.json\n' -SCRIPT_PATH = ".buildkite/scripts/snyk/cleanup-stale-projects/run.sh" - - -def generate_pipeline() -> dict: - return { - "steps": [ - { - "label": ":wastebasket: Delete stale artifact-scan projects", - "key": "delete-stale-projects", - "command": SCRIPT_PATH, - "retry": {"automatic": [{"limit": 2}]}, - }, - ] - } - - -if __name__ == "__main__": - pipeline = generate_pipeline() - print(YAML_HEADER + yaml.dump(pipeline, default_flow_style=False, sort_keys=False)) diff --git a/.buildkite/scripts/snyk/cleanup-stale-projects/run.sh b/.buildkite/scripts/snyk/cleanup-stale-projects/run.sh deleted file mode 100755 index 3745312bcc..0000000000 --- a/.buildkite/scripts/snyk/cleanup-stale-projects/run.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -# Cleans up stale Snyk projects created by the Logstash artifact scan pipeline. -# Fetches active versions from logstash-versions.yml and deletes Snyk projects -# whose version is no longer tracked. -# Uses the same Vault credentials as scan-artifact.sh. - -set -euo pipefail - -source .buildkite/scripts/common/vm-agent.sh - -echo "--- Retrieving Snyk token from Vault" -export SNYK_TOKEN=$(vault read -field=token secret/ci/elastic-logstash/snyk-creds) - -echo "--- Running stale project cleanup" -python3 .buildkite/scripts/snyk/cleanup-stale-projects/cleanup.py diff --git a/.buildkite/snyk_cleanup_stale_projects_pipeline.yml b/.buildkite/snyk_cleanup_stale_projects_pipeline.yml index 606f9a85ab..5fddf096e6 100644 --- a/.buildkite/snyk_cleanup_stale_projects_pipeline.yml +++ b/.buildkite/snyk_cleanup_stale_projects_pipeline.yml @@ -4,6 +4,7 @@ steps: - label: ":pipeline: Cleanup stale Snyk artifact-scan projects" command: | set -euo pipefail + source .buildkite/scripts/common/vm-agent.sh python3 -m pip install pyyaml requests - python3 .buildkite/scripts/snyk/cleanup-stale-projects/generate-steps.py > steps.yml - buildkite-agent pipeline upload < steps.yml + export SNYK_TOKEN=$(vault read -field=token secret/ci/elastic-logstash/snyk-creds) + python3 .buildkite/scripts/snyk/cleanup-artifact-stale-projects/cleanup-stale-projects.py diff --git a/catalog-info.yaml b/catalog-info.yaml index 2e12093204..d8c9a49f6d 100644 --- a/catalog-info.yaml +++ b/catalog-info.yaml @@ -261,10 +261,10 @@ spec: everyone: access_level: READ_ONLY schedules: - Daily Artifacts Snyk cleanup: + Weekly Artifacts Snyk cleanup: branch: main - cronline: "@daily" - message: "Cleanup stale Snyk artifact-scan projects every day." + cronline: "@weekly" + message: "Cleanup stale Snyk artifact-scan projects weekly." # *********************************** # Declare artifacts acceptance test pipeline From b7c9af5c38ba762ef00302e8763e378bd516c5ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20C=C3=A1mara?= Date: Thu, 21 May 2026 16:50:01 +0200 Subject: [PATCH 5/5] delete unnecessary pagination --- .../cleanup-stale-projects.py | 20 ++++--------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/.buildkite/scripts/snyk/cleanup-artifact-stale-projects/cleanup-stale-projects.py b/.buildkite/scripts/snyk/cleanup-artifact-stale-projects/cleanup-stale-projects.py index 5473462771..0ef4f418d3 100644 --- a/.buildkite/scripts/snyk/cleanup-artifact-stale-projects/cleanup-stale-projects.py +++ b/.buildkite/scripts/snyk/cleanup-artifact-stale-projects/cleanup-stale-projects.py @@ -110,7 +110,7 @@ def resolve_org_id(session: requests.Session) -> str: def list_projects(session: requests.Session, org_id: str, **params) -> list: - """List projects with pagination.""" + """List projects for the given org.""" url = f"{SNYK_REST_BASE}/rest/orgs/{org_id}/projects" query = { "version": SNYK_REST_VERSION, @@ -118,21 +118,9 @@ def list_projects(session: requests.Session, org_id: str, **params) -> list: } query.update(params) - projects = [] - while url: - resp = session.get(url, params=query) - resp.raise_for_status() - data = resp.json() - projects.extend(data.get("data", [])) - - next_link = data.get("links", {}).get("next") - if next_link: - url = f"{SNYK_REST_BASE}{next_link}" if next_link.startswith("/") else next_link - query = {} - else: - url = None - - return projects + resp = session.get(url, params=query) + resp.raise_for_status() + return resp.json().get("data", []) def resolve_target_id(session: requests.Session, org_id: str) -> str: