From 7defbc1f1827749fbb1a7b94a3be987c10c9a5ca Mon Sep 17 00:00:00 2001 From: sergio-utrillaa Date: Mon, 11 May 2026 13:03:45 +0200 Subject: [PATCH 1/3] Exclude merge commits and large commits from modifiedlines metric --- QUALITY_MODELS/AMEP/metrics/modifiedlines.query | 6 +++--- QUALITY_MODELS/DEFAULT/metrics/modifiedlines.query | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/QUALITY_MODELS/AMEP/metrics/modifiedlines.query b/QUALITY_MODELS/AMEP/metrics/modifiedlines.query index feb2b11..9dec3b8 100644 --- a/QUALITY_MODELS/AMEP/metrics/modifiedlines.query +++ b/QUALITY_MODELS/AMEP/metrics/modifiedlines.query @@ -1,9 +1,9 @@ [ { "$match": { - "user.login": { - "$ne": "anonymous" - } + "user.login": { "$ne": "anonymous" }, + "is_merge": { "$ne": true }, + "stats.total": { "$lte": 5000 } } }, { diff --git a/QUALITY_MODELS/DEFAULT/metrics/modifiedlines.query b/QUALITY_MODELS/DEFAULT/metrics/modifiedlines.query index feb2b11..9dec3b8 100644 --- a/QUALITY_MODELS/DEFAULT/metrics/modifiedlines.query +++ b/QUALITY_MODELS/DEFAULT/metrics/modifiedlines.query @@ -1,9 +1,9 @@ [ { "$match": { - "user.login": { - "$ne": "anonymous" - } + "user.login": { "$ne": "anonymous" }, + "is_merge": { "$ne": true }, + "stats.total": { "$lte": 5000 } } }, { From 3429fca9792de0ff04eecddeeaa5bd2bfed45e61 Mon Sep 17 00:00:00 2001 From: sergio-utrillaa Date: Fri, 22 May 2026 16:41:45 +0200 Subject: [PATCH 2/3] Fix memory leaks: MongoDB connection pooling, HTTP session reuse, bounded thread pool --- API_calls/StudentDatafromLDRESTAPI.py | 10 ++++-- app.py | 45 +++++++++++++-------------- database/mongo_client.py | 9 ++++-- 3 files changed, 37 insertions(+), 27 deletions(-) diff --git a/API_calls/StudentDatafromLDRESTAPI.py b/API_calls/StudentDatafromLDRESTAPI.py index 48b7655..c87bb84 100644 --- a/API_calls/StudentDatafromLDRESTAPI.py +++ b/API_calls/StudentDatafromLDRESTAPI.py @@ -1,10 +1,16 @@ import requests +from requests.adapters import HTTPAdapter import time import logging from config.settings import BASE_GESSI_URL logger = logging.getLogger(__name__) +_session = requests.Session() +_adapter = HTTPAdapter(pool_connections=2, pool_maxsize=5) +_session.mount("http://", _adapter) +_session.mount("https://", _adapter) + def fetch_projects() -> list: """ Retrieve the list of projects from the LD REST API. @@ -16,7 +22,7 @@ def fetch_projects() -> list: for attempt in range(max_retries): try: - response = requests.get(url, timeout=60) + response = _session.get(url, timeout=60) response.raise_for_status() # Raise an exception if status != 200 projects = response.json() return projects @@ -36,7 +42,7 @@ def fetch_project_details(project_id: int) -> dict: """ url = f"{BASE_GESSI_URL}/projects/{project_id}" # Use increased timeout here as well - response = requests.get(url, timeout=60) + response = _session.get(url, timeout=60) response.raise_for_status() return response.json() diff --git a/app.py b/app.py index 9e331ee..2455c0a 100644 --- a/app.py +++ b/app.py @@ -1,6 +1,7 @@ import json import os import threading +from concurrent.futures import ThreadPoolExecutor from flask import Flask, request, jsonify import logging from datetime import datetime @@ -47,6 +48,9 @@ app = Flask(__name__) +_executor = ThreadPoolExecutor(max_workers=10) +_map_lock = threading.Lock() + # Build the metrics event map at startup scaning all the quality models metrics subfolders ALL_METRICS_BY_QM, EVENT_METRICS_BY_QM = build_metrics_index_per_qm(QUALITY_MODELS_DIR) @@ -87,7 +91,8 @@ def background_process_event(event_data): # Retrieve the students for that team with the corresponding data source data_source = meta["data_source"] - students = TEAM_STUDENTS_MAP.get(external_id, {}).get(data_source, []) + with _map_lock: + students = TEAM_STUDENTS_MAP.get(external_id, {}).get(data_source, []) logger.info( f"Event={event_type}, team with external_id={external_id}, students={students}, quality_model={quality_model}" ) @@ -182,10 +187,7 @@ def handle_event(): 3) spawns a background thread to do metric recalculation """ event_data = request.get_json(force=True) - # Spawn a background thread - t = threading.Thread(target=background_process_event, args=(event_data,)) - t.start() - + _executor.submit(background_process_event, event_data) return jsonify({"status": "received"}), 200 @@ -204,27 +206,24 @@ def refresh_and_update_map(): logger.info("🔄 REFRESH INICIAT") logger.info("=" * 80) - # Mostrar mapa ABANS del refresh - logger.info("📋 MAPA ABANS DEL REFRESH:") - for team_id, sources in TEAM_STUDENTS_MAP.items(): - excel_students = sources.get("EXCEL", []) - logger.info(f" - {team_id}: {len(excel_students)} estudiants (EXCEL)") + with _map_lock: + logger.info("📋 MAPA ABANS DEL REFRESH:") + for team_id, sources in TEAM_STUDENTS_MAP.items(): + excel_students = sources.get("EXCEL", []) + logger.info(f" - {team_id}: {len(excel_students)} estudiants (EXCEL)") - # CRÍTICO: Reconstruir el mapa global desde PostgreSQL - logger.info("🔄 Reconstruint TEAM_STUDENTS_MAP des de PostgreSQL...") - TEAM_STUDENTS_MAP = build_team_students_map() + logger.info("🔄 Reconstruint TEAM_STUDENTS_MAP des de PostgreSQL...") + TEAM_STUDENTS_MAP = build_team_students_map() - # Mostrar mapa DESPRÉS del refresh - logger.info("✅ MAPA DESPRÉS DEL REFRESH:") - for team_id, sources in TEAM_STUDENTS_MAP.items(): - excel_students = sources.get("EXCEL", []) - logger.info(f" - {team_id}: {len(excel_students)} estudiants (EXCEL)") + logger.info("✅ MAPA DESPRÉS DEL REFRESH:") + for team_id, sources in TEAM_STUDENTS_MAP.items(): + excel_students = sources.get("EXCEL", []) + logger.info(f" - {team_id}: {len(excel_students)} estudiants (EXCEL)") - logger.info("=" * 80) - logger.info(f"✅ Mapa actualitzat amb {len(TEAM_STUDENTS_MAP)} equips") - logger.info("=" * 80) + logger.info("=" * 80) + logger.info(f"✅ Mapa actualitzat amb {len(TEAM_STUDENTS_MAP)} equips") + logger.info("=" * 80) - # Executar refresh diari (recalcular totes les mètriques) logger.info("🔄 Executant recàlcul de mètriques...") run_daily_refresh() logger.info("✅ Refresh completat") @@ -233,7 +232,7 @@ def refresh_and_update_map(): json.dumps(TEAM_STUDENTS_MAP, indent=2, ensure_ascii=False), ) - threading.Thread(target=refresh_and_update_map).start() + _executor.submit(refresh_and_update_map) return jsonify({"status": "refresh started"}), 200 diff --git a/database/mongo_client.py b/database/mongo_client.py index 66010d2..bac5576 100644 --- a/database/mongo_client.py +++ b/database/mongo_client.py @@ -1,8 +1,13 @@ from pymongo import MongoClient from config.settings import MONGO_URI, MONGO_DB -# Create the global MongoClient instance. -client = MongoClient(MONGO_URI) +client = MongoClient( + MONGO_URI, + maxPoolSize=25, + minPoolSize=1, + maxIdleTimeMS=60_000, + serverSelectionTimeoutMS=5_000, +) db = client[MONGO_DB] From 9140593fd8377a2050e7bd953b80708d34643f9d Mon Sep 17 00:00:00 2001 From: sergio-utrillaa Date: Fri, 12 Jun 2026 11:44:38 +0200 Subject: [PATCH 3/3] quick test fix --- tests/test_app.py | 25 +++++++++---------------- tests/test_student_data_api_key.py | 20 ++++++-------------- 2 files changed, 15 insertions(+), 30 deletions(-) diff --git a/tests/test_app.py b/tests/test_app.py index 569d55a..820fe00 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -82,16 +82,14 @@ def test_handle_event_returns_200_and_starts_thread(app_module, monkeypatch): payload = {"event_type": "push", "prj": "team-1", "author_login": "alice"} started = {} - class FakeThread: - def __init__(self, target, args=(), kwargs=None): - started["target"] = target + class FakeExecutor: + def submit(self, fn, *args, **kwargs): + started["target"] = fn started["args"] = args - started["kwargs"] = kwargs or {} - - def start(self): + started["kwargs"] = kwargs started["started"] = True - monkeypatch.setattr(app_module.threading, "Thread", FakeThread) + monkeypatch.setattr(app_module, "_executor", FakeExecutor()) response = app_module.app.test_client().post("/api/event", json=payload) @@ -209,16 +207,11 @@ def test_handle_refresh_rebuilds_students_map_and_runs_refresh(app_module, monke } refresh_calls = [] - class FakeThread: - def __init__(self, target, args=(), kwargs=None): - self.target = target - self.args = args - self.kwargs = kwargs or {} - - def start(self): - self.target(*self.args, **self.kwargs) + class FakeExecutor: + def submit(self, fn, *args, **kwargs): + fn(*args, **kwargs) - monkeypatch.setattr(app_module.threading, "Thread", FakeThread) + monkeypatch.setattr(app_module, "_executor", FakeExecutor()) monkeypatch.setattr( app_module, "build_team_students_map", diff --git a/tests/test_student_data_api_key.py b/tests/test_student_data_api_key.py index 02e1169..55db140 100644 --- a/tests/test_student_data_api_key.py +++ b/tests/test_student_data_api_key.py @@ -10,15 +10,11 @@ def test_fetch_projects_sends_ld_api_key(monkeypatch): mock_get = Mock(return_value=response) monkeypatch.setattr(student_api, "BASE_GESSI_URL", "http://tomcat:8080/api") - monkeypatch.setattr(student_api, "LD_HEADERS", {"X-LD-API-Key": "test-ld-api-key"}) - monkeypatch.setattr(student_api.requests, "get", mock_get) + monkeypatch.setattr(student_api._session, "get", mock_get) assert student_api.fetch_projects() == [] - mock_get.assert_called_once_with( - "http://tomcat:8080/api/projects", - headers={"X-LD-API-Key": "test-ld-api-key"}, - timeout=60, - ) + mock_get.assert_called_once_with("http://tomcat:8080/api/projects", timeout=60) + assert student_api._session.headers.get("X-LD-API-Key") is not None def test_fetch_project_details_sends_ld_api_key(monkeypatch): @@ -28,12 +24,8 @@ def test_fetch_project_details_sends_ld_api_key(monkeypatch): mock_get = Mock(return_value=response) monkeypatch.setattr(student_api, "BASE_GESSI_URL", "http://tomcat:8080/api") - monkeypatch.setattr(student_api, "LD_HEADERS", {"X-LD-API-Key": "test-ld-api-key"}) - monkeypatch.setattr(student_api.requests, "get", mock_get) + monkeypatch.setattr(student_api._session, "get", mock_get) assert student_api.fetch_project_details(1) == {"id": 1} - mock_get.assert_called_once_with( - "http://tomcat:8080/api/projects/1", - headers={"X-LD-API-Key": "test-ld-api-key"}, - timeout=60, - ) + mock_get.assert_called_once_with("http://tomcat:8080/api/projects/1", timeout=60) + assert student_api._session.headers.get("X-LD-API-Key") is not None