Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,18 @@ artifactory-cleanup --help
configuration for multiple repositories and some of them are not found.
- Use `--worker-count=<WORKER_NUM>` to increase the number of workers. By default, it's 1. It's useful when you have a lot of
artifacts and you want to speed up the process.
- Use `page_size` on a policy to fetch artifacts in pages instead of a single request. This can prevent timeouts on
large repositories. By default all artifacts are fetched at once.

```yaml
- name: Remove all files from repo-name-here older than 7 days
page_size: 5000 # fetch 5000 artifacts per request
rules:
- rule: Repo
name: "reponame"
- rule: DeleteOlderThan
days: 7
```

## Commands ##

Expand Down
6 changes: 4 additions & 2 deletions artifactory_cleanup/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,9 @@ def get_root_schema(self, rules):
policy_schema = cfgv.Map(
"Policy",
"name",
cfgv.NoAdditionalKeys(["name", "rules"]),
cfgv.NoAdditionalKeys(["name", "page_size", "rules"]),
cfgv.Required("name", cfgv.check_string),
cfgv.Optional("page_size", cfgv.check_int, None),
cfgv.RequiredRecurse("rules", cfgv.Array(rule_schema)),
)

Expand Down Expand Up @@ -151,6 +152,7 @@ def get_policies(self) -> List[CleanupPolicy]:

for policy_data in config["artifactory-cleanup"]["policies"]:
policy_name = policy_data["name"]
page_size = policy_data.get("page_size")
rules = []
for rule_data in policy_data["rules"]:
try:
Expand All @@ -164,7 +166,7 @@ def get_policies(self) -> List[CleanupPolicy]:
sys.exit(1)

rules.append(rule)
policy = CleanupPolicy(policy_name, *rules)
policy = CleanupPolicy(policy_name, *rules, page_size=page_size)
policies.append(policy)
return policies

Expand Down
64 changes: 56 additions & 8 deletions artifactory_cleanup/rules/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,15 @@ def aql_add_filter(self, filters: List) -> List:
"""
return filters

def aql_add_include(self, includes: set) -> set:
"""
Declare which extra AQL fields this rule needs in `.include()`.

The field `*` (all basic fields: repo, path, name, size, created, etc.) is always included.
Override this method and add `property` or `stat` only when your filter actually reads those fields.
"""
return includes

def aql_add_text(self, aql: str) -> str:
"""
You can change AQL text after applying all rules filters.
Expand Down Expand Up @@ -167,7 +176,7 @@ class CleanupPolicy(object):
session: Optional[BaseUrlSession] = None
today: date = None

def __init__(self, name: str, *rules: Rule):
def __init__(self, name: str, *rules: Rule, page_size: Optional[int] = None):
if not isinstance(name, str):
raise ValueError(
"Bad CleanupPolicy, first argument must be name.\n"
Expand All @@ -177,6 +186,7 @@ def __init__(self, name: str, *rules: Rule):
self.name = name
self.rules = list(rules)
self.aql_text = None
self.page_size = page_size

# init object if passed not initialized class
# for `rules.repo` rule, see above in the docstring
Expand Down Expand Up @@ -252,8 +262,16 @@ def _get_aql_text(self, find_filters: Dict) -> str:
"""
Collect from all rules additional texts of requests
"""
includes = {"*",}
for rule in self.rules:
includes = rule.aql_add_include(includes)

include_fields = sorted(includes)
include_str = ", ".join(f'"{f}"' for f in include_fields)
print(f"AQL include fields: {include_str}")

filters_text = json.dumps(find_filters)
aql = f'{self.DOMAIN}.find({filters_text}).include("*", "property", "stat")'
aql = f'{self.DOMAIN}.find({filters_text}).include({include_str})'

for rule in self.rules:
before_aql = aql
Expand All @@ -267,15 +285,45 @@ def _get_aql_text(self, find_filters: Dict) -> str:

def get_artifacts(self) -> ArtifactsList:
"""
Get artifacts from Artifactory by AQL filters that we collect from all rules in the policy
Get artifacts from Artifactory by AQL filters that we collect from all rules in the policy.

By default all artifacts are fetched in a single request. Pass ``page_size`` to
``CleanupPolicy`` to enable paginated fetching, which can avoid timeouts on large
repositories.

:return list of artifacts
"""
assert self.aql_text, "Call build_aql_query before calling get_artifacts"
r = self.session.post("/api/search/aql", data=self.aql_text)
r.raise_for_status()
content = r.json()
artifacts = content["results"]
return ArtifactsList.from_response(artifacts)

if self.page_size is None:
r = self.session.post("/api/search/aql", data=self.aql_text)
r.raise_for_status()
content = r.json()
artifacts = content["results"]
return ArtifactsList.from_response(artifacts)

all_artifacts: List[Dict] = []
offset = 0

while True:
paginated_aql = (
'{aql}.sort({{"$asc": ["created"]}}).offset({offset}).limit({limit})'
.format(aql=self.aql_text, offset=offset, limit=self.page_size)
)
r = self.session.post("/api/search/aql", data=paginated_aql)
r.raise_for_status()
content = r.json()
page = content["results"]
all_artifacts.extend(page)
print(
f"Fetched page: {len(page)} artifacts (offset={offset}), "
f"total so far: {len(all_artifacts)}"
)
if len(page) < self.page_size:
break
offset += self.page_size

return ArtifactsList.from_response(all_artifacts)

def filter(self, artifacts: ArtifactsList) -> ArtifactsList:
"""
Expand Down
5 changes: 5 additions & 0 deletions artifactory_cleanup/rules/delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ class DeleteLeastRecentlyUsedFiles(Rule):
def __init__(self, keep: int):
self.keep = keep

def aql_add_include(self, includes: set) -> set:
# Reads artifact["stats"]["downloaded"] via sort_by_usage() in filter()
includes.add("stat")
return includes

def filter(self, artifacts: ArtifactsList) -> ArtifactsList:
# List will contain fresh files at the beginning
artifacts.sort(key=utils.sort_by_usage, reverse=True)
Expand Down
5 changes: 5 additions & 0 deletions artifactory_cleanup/rules/keep.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ class KeepLatestNupkgNVersions(Rule):
def __init__(self, count: int):
self.count = count

def aql_add_include(self, includes: set) -> set:
# Reads nuget.id and nuget.version from artifact properties in filter()
includes.add("property")
return includes

def filter(self, artifacts):
artifact_grouped = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))

Expand Down
5 changes: 5 additions & 0 deletions artifactory_cleanup/rules/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@ def __init__(self, property_key, property_value):
self.property_key = property_key
self.property_value = str(property_value)

def aql_add_include(self, includes: set) -> set:
# Reads artifact["properties"] in filter()
includes.add("property")
return includes

def filter(self, artifacts):
good_artifact = [
x
Expand Down
Loading