Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
277 changes: 148 additions & 129 deletions ld_refresh.py
Original file line number Diff line number Diff line change
@@ -1,129 +1,148 @@
from asyncio.log import logger
import os
import logging
import requests
import time
from API_calls.StudentDatafromLDRESTAPI import build_team_students_map
from config.quality_model_config import load_qualitymodel_map, choose_qualitymodel
from config.load_config_file import get_available_events
from database.mongo_client import db

API_URL = os.getenv("EVAL_API_URL", "http://localhost:5001/api/event")

QM_MAP = load_qualitymodel_map()

EVENT_TYPES = ["push", "task", "userstory"]


def team_is_active(team_id: str) -> bool:
"""Function to check if a team has any activity in the database.
If not active, it will not trigger any events."""
coll = f"metrics.{team_id}" # We will only check the metrics collection, because if it has metrics, the factors and indicators collections will also have data.
return (
coll in db.list_collection_names() and db[coll].estimated_document_count() > 0
)


def trigger_team_event(team_id: str, event_type: str) -> None:
"""Function to trigger an event for a team.
It will send a POST request to the API with the event data."""
payload = {
"event_type": event_type,
"prj": team_id,
"author_login": "system",
"quality_model": choose_qualitymodel(team_id, None, QM_MAP),
}
start = time.perf_counter()
r = requests.post(
API_URL, json=payload, timeout=(0.2, 1)
) # connect timeout 0.2s, read timeout 1s

logger.info(
f"Triggered event {event_type} for team {team_id} with status code {r.status_code} in {time.perf_counter() - start:.2f} seconds."
)

def delete_orphan_collections_from_mongo(actual_teams):
for prefix in [
"metrics",
"factors",
"strategic_indicators",
]: # afegeix tots els prefixes que toquin
collections = db.list_collection_names()
for coll in collections:
if coll.startswith(prefix + "."):
team_collection = coll.split(".", 1)[1]
if team_collection not in actual_teams:
db.drop_collection(coll)


def delete_orphan_student_documents(team_students_map):
"""
Elimina documentos de estudiantes que ya no existen en el mapa de estudiantes.
Busca en las colecciones metrics, factors y strategic_indicators.
"""
for team_id, sources in team_students_map.items():
# Obtener lista de estudiantes válidos: incluye nombres reales (EXCEL) + usernames (GITHUB + TAIGA)
valid_students = []
valid_students.extend(sources.get("EXCEL", []))
valid_students.extend(sources.get("GITHUB", []))
valid_students.extend(sources.get("TAIGA", []))

# Eliminar duplicados
valid_students = list(set(valid_students))

# Limpiar en cada tipo de colección
for prefix in ["metrics", "factors", "strategic_indicators"]:
collection_name = f"{prefix}.{team_id}"

if collection_name not in db.list_collection_names():
continue

collection = db[collection_name]

# Buscar documentos con student_name que no esté en la lista de válidos
# Los documentos de equipo no tienen student_name, así que los ignoramos
orphan_docs = collection.find(
{"student_name": {"$exists": True, "$nin": valid_students}}
)

deleted_count = 0
for doc in orphan_docs:
doc.get("student_name")
# Intentar obtener el nombre de la métrica/factor/indicador
(
doc.get("metric_name")
or doc.get("factor_name")
or doc.get("indicator_name")
or doc.get("name", "documento")
)

collection.delete_one({"_id": doc["_id"]})
deleted_count += 1


def run_daily_refresh() -> None:
"""Function to run the daily refresh of events."""
TEAM_STUDENTS = build_team_students_map()
actual_teams = list(TEAM_STUDENTS.keys())

# 1. Eliminar colecciones de equipos que ya no existen
delete_orphan_collections_from_mongo(actual_teams)

# 2. Eliminar documentos de estudiantes que ya no están en los equipos
# delete_orphan_student_documents(TEAM_STUDENTS)

# 3. Recalcular métricas para todos los equipos activos
for team in TEAM_STUDENTS.keys(): # Get all the teams from the TEAM_STUDENTS map
"""
if not team_is_active(team):
logging.info("Equipo %s sin actividad previa; se omite.", team) # If the team is not active, skip it
continue
"""
events = get_available_events()
for event in events: # If the team is active, trigger all the events
trigger_team_event(team, event)


if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(message)s")
run_daily_refresh()
from asyncio.log import logger
import os
import logging
import requests
import time
from API_calls.StudentDatafromLDRESTAPI import build_team_students_map
from config.quality_model_config import load_qualitymodel_map, choose_qualitymodel
from config.load_config_file import get_available_events
from database.mongo_client import db

API_URL = os.getenv("EVAL_API_URL", "http://localhost:5001/api/event")

QM_MAP = load_qualitymodel_map()

EVENT_TYPES = ["push", "task", "userstory"]


def team_is_active(team_id: str) -> bool:
"""Function to check if a team has any activity in the database.
If not active, it will not trigger any events."""
coll = f"metrics.{team_id}" # We will only check the metrics collection, because if it has metrics, the factors and indicators collections will also have data.
return (
coll in db.list_collection_names() and db[coll].estimated_document_count() > 0
)


def trigger_team_event(team_id: str, event_type: str) -> None:
"""Function to trigger an event for a team.
It will send a POST request to the API with the event data."""
payload = {
"event_type": event_type,
"prj": team_id,
"author_login": "system",
"quality_model": choose_qualitymodel(team_id, None, QM_MAP),
}
start = time.perf_counter()
r = requests.post(
API_URL, json=payload, timeout=(0.2, 1)
) # connect timeout 0.2s, read timeout 1s

logger.info(
f"Triggered event {event_type} for team {team_id} with status code {r.status_code} in {time.perf_counter() - start:.2f} seconds."
)

def delete_orphan_collections_from_mongo(actual_teams):
for prefix in [
"metrics",
"factors",
"strategic_indicators",
]: # afegeix tots els prefixes que toquin
collections = db.list_collection_names()
for coll in collections:
if coll.startswith(prefix + "."):
team_collection = coll.split(".", 1)[1]
if team_collection not in actual_teams:
db.drop_collection(coll)


def _valid_students_for_event(team_sources: dict, event_type: str) -> list:
"""
Return valid student identifiers for a concrete event type.
We must validate against the event data source only.
"""
meta = get_event_meta(event_type)
if not meta:
return []

data_source = meta.get("data_source")
if not data_source:
return []

valid = team_sources.get(data_source, [])
# Keep deterministic order while removing duplicates
return list(dict.fromkeys(valid))

def delete_orphan_student_documents(team_students_map):
"""
Elimina documentos de estudiantes que ya no existen en el mapa de estudiantes.
Busca en las colecciones metrics, factors y strategic_indicators.
La validación se hace por event_type y por su data source asociado,
para evitar mezclar nombres EXCEL con usernames de TAIGA/GITHUB.
"""
for team_id, sources in team_students_map.items():
# Fallback list to protect legacy docs that may not have event_type.
all_valid_students = list(dict.fromkeys(
sources.get("EXCEL", []) + sources.get("GITHUB", []) + sources.get("TAIGA", [])
))

# Limpiar en cada tipo de colección
for prefix in ["metrics", "factors", "strategic_indicators"]:
collection_name = f"{prefix}.{team_id}"

if collection_name not in db.list_collection_names():
continue

collection = db[collection_name]

deleted_count = 0

# 1) Remove docs with known event_type and invalid student for that source
for event_type in get_available_events():
valid_for_event = _valid_students_for_event(sources, event_type)
if not valid_for_event:
continue

result = collection.delete_many({
"student_name": {"$exists": True, "$nin": valid_for_event},
"event_type": event_type
})
deleted_count += result.deleted_count

# 2) Legacy docs without event_type: apply broad fallback validation
legacy_result = collection.delete_many({
"student_name": {"$exists": True, "$nin": all_valid_students},
"event_type": {"$exists": False}
})
deleted_count += legacy_result.deleted_count

if deleted_count > 0:
logging.info("Deleted %s orphan student documents from %s", deleted_count, collection_name)


def run_daily_refresh() -> None:
"""Function to run the daily refresh of events."""
TEAM_STUDENTS = build_team_students_map()
actual_teams = list(TEAM_STUDENTS.keys())

# 1. Eliminar colecciones de equipos que ya no existen
delete_orphan_collections_from_mongo(actual_teams)

# 2. Eliminar documentos de estudiantes que ya no están en los equipos
delete_orphan_student_documents(TEAM_STUDENTS)

# 3. Recalcular métricas para todos los equipos activos
for team in TEAM_STUDENTS.keys(): # Get all the teams from the TEAM_STUDENTS map
"""
if not team_is_active(team):
logging.info("Equipo %s sin actividad previa; se omite.", team) # If the team is not active, skip it
continue
"""
events = get_available_events()
for event in events: # If the team is active, trigger all the events
trigger_team_event(team, event)


if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(message)s")
run_daily_refresh()
Loading