From 7defbc1f1827749fbb1a7b94a3be987c10c9a5ca Mon Sep 17 00:00:00 2001 From: sergio-utrillaa Date: Mon, 11 May 2026 13:03:45 +0200 Subject: [PATCH 1/4] Exclude merge commits and large commits from modifiedlines metric --- QUALITY_MODELS/AMEP/metrics/modifiedlines.query | 6 +++--- QUALITY_MODELS/DEFAULT/metrics/modifiedlines.query | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/QUALITY_MODELS/AMEP/metrics/modifiedlines.query b/QUALITY_MODELS/AMEP/metrics/modifiedlines.query index feb2b11..9dec3b8 100644 --- a/QUALITY_MODELS/AMEP/metrics/modifiedlines.query +++ b/QUALITY_MODELS/AMEP/metrics/modifiedlines.query @@ -1,9 +1,9 @@ [ { "$match": { - "user.login": { - "$ne": "anonymous" - } + "user.login": { "$ne": "anonymous" }, + "is_merge": { "$ne": true }, + "stats.total": { "$lte": 5000 } } }, { diff --git a/QUALITY_MODELS/DEFAULT/metrics/modifiedlines.query b/QUALITY_MODELS/DEFAULT/metrics/modifiedlines.query index feb2b11..9dec3b8 100644 --- a/QUALITY_MODELS/DEFAULT/metrics/modifiedlines.query +++ b/QUALITY_MODELS/DEFAULT/metrics/modifiedlines.query @@ -1,9 +1,9 @@ [ { "$match": { - "user.login": { - "$ne": "anonymous" - } + "user.login": { "$ne": "anonymous" }, + "is_merge": { "$ne": true }, + "stats.total": { "$lte": 5000 } } }, { From f656abaaf106f1686a2ba40bc5924bd05fc504d6 Mon Sep 17 00:00:00 2001 From: EncryptEx <41539618+EncryptEx@users.noreply.github.com> Date: Wed, 20 May 2026 12:27:43 +0200 Subject: [PATCH 2/4] feat: added apikey --- API_calls/StudentDatafromLDRESTAPI.py | 8 +++--- README.md | 3 ++- config/settings.py | 17 +++++++++++- template.env | 16 +++++++++++ tests/conftest.py | 3 +++ tests/test_student_data_api_key.py | 39 +++++++++++++++++++++++++++ 6 files changed, 81 insertions(+), 5 deletions(-) create mode 100644 template.env create mode 100644 tests/test_student_data_api_key.py diff --git a/API_calls/StudentDatafromLDRESTAPI.py b/API_calls/StudentDatafromLDRESTAPI.py index 48b7655..00c2046 100644 --- a/API_calls/StudentDatafromLDRESTAPI.py +++ b/API_calls/StudentDatafromLDRESTAPI.py @@ -1,10 +1,12 @@ import requests import time import logging -from config.settings import BASE_GESSI_URL +from config.settings import BASE_GESSI_URL, LD_API_KEY, LD_API_KEY_HEADER logger = logging.getLogger(__name__) +LD_HEADERS = {LD_API_KEY_HEADER: LD_API_KEY} + def fetch_projects() -> list: """ Retrieve the list of projects from the LD REST API. @@ -16,7 +18,7 @@ def fetch_projects() -> list: for attempt in range(max_retries): try: - response = requests.get(url, timeout=60) + response = requests.get(url, headers=LD_HEADERS, timeout=60) response.raise_for_status() # Raise an exception if status != 200 projects = response.json() return projects @@ -36,7 +38,7 @@ def fetch_project_details(project_id: int) -> dict: """ url = f"{BASE_GESSI_URL}/projects/{project_id}" # Use increased timeout here as well - response = requests.get(url, timeout=60) + response = requests.get(url, headers=LD_HEADERS, timeout=60) response.raise_for_status() return response.json() diff --git a/README.md b/README.md index 5210ac5..6fc8542 100644 --- a/README.md +++ b/README.md @@ -98,10 +98,11 @@ docker compose up -d --build ld_eval | `MONGO_USER` / `MONGO_PASS` | Credentials (leave blank for local dev) | | `MONGO_AUTHSRC` | Auth DB (usually `admin`) | | `BASE_GESSI_URL` | REST endpoint of the public dashboard | +| `LD_API_KEY` | Shared Learning Dashboard API key sent as `X-LD-API-Key` | | `QUALITY_MODELS_DIR` | Path to `QUALITY_MODELS` folder | | Scheduler: `_Start_scheduler_date`, `_End_scheduler_date`, `_Hour_scheduler` … | Daily refresh window (see `config_files/config_variables.py`) | -All vars can be placed in `.env` and are loaded automatically. +All vars can be placed in `.env` and are loaded automatically. Generate `LD_API_KEY` once from the repository root with `python3 -c 'import secrets; print(secrets.token_urlsafe(48))'` and copy the same value into local consumer `.env` files when running outside Docker. --- diff --git a/config/settings.py b/config/settings.py index 302986a..6fbe1ef 100644 --- a/config/settings.py +++ b/config/settings.py @@ -4,14 +4,29 @@ # Determine the base directory (adjust if needed) BASE_DIR = Path(__file__).resolve().parent.parent +ROOT_DIR = BASE_DIR.parent -# Load environment variables from the .env file +# Load centralized root configuration first, then module-local values for +# standalone runs. Existing environment variables keep priority. +load_dotenv(ROOT_DIR / ".env") load_dotenv(BASE_DIR / ".env") +def _require_env(name: str) -> str: + value = os.getenv(name) + if not value: + raise RuntimeError( + f"Missing required environment variable: {name}. " + f"Please set it in the root .env or this module's .env file." + ) + return value + + QUALITY_MODELS_DIR = os.getenv("QUALITY_MODELS_DIR", "QUALITY_MODELS") BASE_GESSI_URL = os.getenv("BASE_GESSI_URL", "") +LD_API_KEY = _require_env("LD_API_KEY") +LD_API_KEY_HEADER = "X-LD-API-Key" # Mongo database settings diff --git a/template.env b/template.env new file mode 100644 index 0000000..2c282d7 --- /dev/null +++ b/template.env @@ -0,0 +1,16 @@ +# LD Eval local configuration. +# Docker receives LD_API_KEY from the root .env; use this file only for standalone runs. + +BASE_GESSI_URL=http://localhost:8888/api + +# Generate once in the repository root: +# python3 -c 'import secrets; print(secrets.token_urlsafe(48))' +# Copy the same value from the root .env. +LD_API_KEY= + +MONGO_HOST=mongodb +MONGO_PORT=27017 +MONGO_DB=mongo +MONGO_USER= +MONGO_PASS= +MONGO_AUTHSRC=mongo diff --git a/tests/conftest.py b/tests/conftest.py index e855bd4..3d8e8c5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,11 @@ import sys +import os from pathlib import Path ROOT_DIR = Path(__file__).resolve().parents[1] +os.environ.setdefault("LD_API_KEY", "test-ld-api-key") + if str(ROOT_DIR) not in sys.path: sys.path.insert(0, str(ROOT_DIR)) diff --git a/tests/test_student_data_api_key.py b/tests/test_student_data_api_key.py new file mode 100644 index 0000000..02e1169 --- /dev/null +++ b/tests/test_student_data_api_key.py @@ -0,0 +1,39 @@ +from unittest.mock import Mock + +import API_calls.StudentDatafromLDRESTAPI as student_api + + +def test_fetch_projects_sends_ld_api_key(monkeypatch): + response = Mock() + response.json.return_value = [] + response.raise_for_status.return_value = None + mock_get = Mock(return_value=response) + + monkeypatch.setattr(student_api, "BASE_GESSI_URL", "http://tomcat:8080/api") + monkeypatch.setattr(student_api, "LD_HEADERS", {"X-LD-API-Key": "test-ld-api-key"}) + monkeypatch.setattr(student_api.requests, "get", mock_get) + + assert student_api.fetch_projects() == [] + mock_get.assert_called_once_with( + "http://tomcat:8080/api/projects", + headers={"X-LD-API-Key": "test-ld-api-key"}, + timeout=60, + ) + + +def test_fetch_project_details_sends_ld_api_key(monkeypatch): + response = Mock() + response.json.return_value = {"id": 1} + response.raise_for_status.return_value = None + mock_get = Mock(return_value=response) + + monkeypatch.setattr(student_api, "BASE_GESSI_URL", "http://tomcat:8080/api") + monkeypatch.setattr(student_api, "LD_HEADERS", {"X-LD-API-Key": "test-ld-api-key"}) + monkeypatch.setattr(student_api.requests, "get", mock_get) + + assert student_api.fetch_project_details(1) == {"id": 1} + mock_get.assert_called_once_with( + "http://tomcat:8080/api/projects/1", + headers={"X-LD-API-Key": "test-ld-api-key"}, + timeout=60, + ) From 3429fca9792de0ff04eecddeeaa5bd2bfed45e61 Mon Sep 17 00:00:00 2001 From: sergio-utrillaa Date: Fri, 22 May 2026 16:41:45 +0200 Subject: [PATCH 3/4] Fix memory leaks: MongoDB connection pooling, HTTP session reuse, bounded thread pool --- API_calls/StudentDatafromLDRESTAPI.py | 10 ++++-- app.py | 45 +++++++++++++-------------- database/mongo_client.py | 9 ++++-- 3 files changed, 37 insertions(+), 27 deletions(-) diff --git a/API_calls/StudentDatafromLDRESTAPI.py b/API_calls/StudentDatafromLDRESTAPI.py index 48b7655..c87bb84 100644 --- a/API_calls/StudentDatafromLDRESTAPI.py +++ b/API_calls/StudentDatafromLDRESTAPI.py @@ -1,10 +1,16 @@ import requests +from requests.adapters import HTTPAdapter import time import logging from config.settings import BASE_GESSI_URL logger = logging.getLogger(__name__) +_session = requests.Session() +_adapter = HTTPAdapter(pool_connections=2, pool_maxsize=5) +_session.mount("http://", _adapter) +_session.mount("https://", _adapter) + def fetch_projects() -> list: """ Retrieve the list of projects from the LD REST API. @@ -16,7 +22,7 @@ def fetch_projects() -> list: for attempt in range(max_retries): try: - response = requests.get(url, timeout=60) + response = _session.get(url, timeout=60) response.raise_for_status() # Raise an exception if status != 200 projects = response.json() return projects @@ -36,7 +42,7 @@ def fetch_project_details(project_id: int) -> dict: """ url = f"{BASE_GESSI_URL}/projects/{project_id}" # Use increased timeout here as well - response = requests.get(url, timeout=60) + response = _session.get(url, timeout=60) response.raise_for_status() return response.json() diff --git a/app.py b/app.py index 9e331ee..2455c0a 100644 --- a/app.py +++ b/app.py @@ -1,6 +1,7 @@ import json import os import threading +from concurrent.futures import ThreadPoolExecutor from flask import Flask, request, jsonify import logging from datetime import datetime @@ -47,6 +48,9 @@ app = Flask(__name__) +_executor = ThreadPoolExecutor(max_workers=10) +_map_lock = threading.Lock() + # Build the metrics event map at startup scaning all the quality models metrics subfolders ALL_METRICS_BY_QM, EVENT_METRICS_BY_QM = build_metrics_index_per_qm(QUALITY_MODELS_DIR) @@ -87,7 +91,8 @@ def background_process_event(event_data): # Retrieve the students for that team with the corresponding data source data_source = meta["data_source"] - students = TEAM_STUDENTS_MAP.get(external_id, {}).get(data_source, []) + with _map_lock: + students = TEAM_STUDENTS_MAP.get(external_id, {}).get(data_source, []) logger.info( f"Event={event_type}, team with external_id={external_id}, students={students}, quality_model={quality_model}" ) @@ -182,10 +187,7 @@ def handle_event(): 3) spawns a background thread to do metric recalculation """ event_data = request.get_json(force=True) - # Spawn a background thread - t = threading.Thread(target=background_process_event, args=(event_data,)) - t.start() - + _executor.submit(background_process_event, event_data) return jsonify({"status": "received"}), 200 @@ -204,27 +206,24 @@ def refresh_and_update_map(): logger.info("🔄 REFRESH INICIAT") logger.info("=" * 80) - # Mostrar mapa ABANS del refresh - logger.info("📋 MAPA ABANS DEL REFRESH:") - for team_id, sources in TEAM_STUDENTS_MAP.items(): - excel_students = sources.get("EXCEL", []) - logger.info(f" - {team_id}: {len(excel_students)} estudiants (EXCEL)") + with _map_lock: + logger.info("📋 MAPA ABANS DEL REFRESH:") + for team_id, sources in TEAM_STUDENTS_MAP.items(): + excel_students = sources.get("EXCEL", []) + logger.info(f" - {team_id}: {len(excel_students)} estudiants (EXCEL)") - # CRÍTICO: Reconstruir el mapa global desde PostgreSQL - logger.info("🔄 Reconstruint TEAM_STUDENTS_MAP des de PostgreSQL...") - TEAM_STUDENTS_MAP = build_team_students_map() + logger.info("🔄 Reconstruint TEAM_STUDENTS_MAP des de PostgreSQL...") + TEAM_STUDENTS_MAP = build_team_students_map() - # Mostrar mapa DESPRÉS del refresh - logger.info("✅ MAPA DESPRÉS DEL REFRESH:") - for team_id, sources in TEAM_STUDENTS_MAP.items(): - excel_students = sources.get("EXCEL", []) - logger.info(f" - {team_id}: {len(excel_students)} estudiants (EXCEL)") + logger.info("✅ MAPA DESPRÉS DEL REFRESH:") + for team_id, sources in TEAM_STUDENTS_MAP.items(): + excel_students = sources.get("EXCEL", []) + logger.info(f" - {team_id}: {len(excel_students)} estudiants (EXCEL)") - logger.info("=" * 80) - logger.info(f"✅ Mapa actualitzat amb {len(TEAM_STUDENTS_MAP)} equips") - logger.info("=" * 80) + logger.info("=" * 80) + logger.info(f"✅ Mapa actualitzat amb {len(TEAM_STUDENTS_MAP)} equips") + logger.info("=" * 80) - # Executar refresh diari (recalcular totes les mètriques) logger.info("🔄 Executant recàlcul de mètriques...") run_daily_refresh() logger.info("✅ Refresh completat") @@ -233,7 +232,7 @@ def refresh_and_update_map(): json.dumps(TEAM_STUDENTS_MAP, indent=2, ensure_ascii=False), ) - threading.Thread(target=refresh_and_update_map).start() + _executor.submit(refresh_and_update_map) return jsonify({"status": "refresh started"}), 200 diff --git a/database/mongo_client.py b/database/mongo_client.py index 66010d2..bac5576 100644 --- a/database/mongo_client.py +++ b/database/mongo_client.py @@ -1,8 +1,13 @@ from pymongo import MongoClient from config.settings import MONGO_URI, MONGO_DB -# Create the global MongoClient instance. -client = MongoClient(MONGO_URI) +client = MongoClient( + MONGO_URI, + maxPoolSize=25, + minPoolSize=1, + maxIdleTimeMS=60_000, + serverSelectionTimeoutMS=5_000, +) db = client[MONGO_DB] From 9140593fd8377a2050e7bd953b80708d34643f9d Mon Sep 17 00:00:00 2001 From: sergio-utrillaa Date: Fri, 12 Jun 2026 11:44:38 +0200 Subject: [PATCH 4/4] quick test fix --- tests/test_app.py | 25 +++++++++---------------- tests/test_student_data_api_key.py | 20 ++++++-------------- 2 files changed, 15 insertions(+), 30 deletions(-) diff --git a/tests/test_app.py b/tests/test_app.py index 569d55a..820fe00 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -82,16 +82,14 @@ def test_handle_event_returns_200_and_starts_thread(app_module, monkeypatch): payload = {"event_type": "push", "prj": "team-1", "author_login": "alice"} started = {} - class FakeThread: - def __init__(self, target, args=(), kwargs=None): - started["target"] = target + class FakeExecutor: + def submit(self, fn, *args, **kwargs): + started["target"] = fn started["args"] = args - started["kwargs"] = kwargs or {} - - def start(self): + started["kwargs"] = kwargs started["started"] = True - monkeypatch.setattr(app_module.threading, "Thread", FakeThread) + monkeypatch.setattr(app_module, "_executor", FakeExecutor()) response = app_module.app.test_client().post("/api/event", json=payload) @@ -209,16 +207,11 @@ def test_handle_refresh_rebuilds_students_map_and_runs_refresh(app_module, monke } refresh_calls = [] - class FakeThread: - def __init__(self, target, args=(), kwargs=None): - self.target = target - self.args = args - self.kwargs = kwargs or {} - - def start(self): - self.target(*self.args, **self.kwargs) + class FakeExecutor: + def submit(self, fn, *args, **kwargs): + fn(*args, **kwargs) - monkeypatch.setattr(app_module.threading, "Thread", FakeThread) + monkeypatch.setattr(app_module, "_executor", FakeExecutor()) monkeypatch.setattr( app_module, "build_team_students_map", diff --git a/tests/test_student_data_api_key.py b/tests/test_student_data_api_key.py index 02e1169..55db140 100644 --- a/tests/test_student_data_api_key.py +++ b/tests/test_student_data_api_key.py @@ -10,15 +10,11 @@ def test_fetch_projects_sends_ld_api_key(monkeypatch): mock_get = Mock(return_value=response) monkeypatch.setattr(student_api, "BASE_GESSI_URL", "http://tomcat:8080/api") - monkeypatch.setattr(student_api, "LD_HEADERS", {"X-LD-API-Key": "test-ld-api-key"}) - monkeypatch.setattr(student_api.requests, "get", mock_get) + monkeypatch.setattr(student_api._session, "get", mock_get) assert student_api.fetch_projects() == [] - mock_get.assert_called_once_with( - "http://tomcat:8080/api/projects", - headers={"X-LD-API-Key": "test-ld-api-key"}, - timeout=60, - ) + mock_get.assert_called_once_with("http://tomcat:8080/api/projects", timeout=60) + assert student_api._session.headers.get("X-LD-API-Key") is not None def test_fetch_project_details_sends_ld_api_key(monkeypatch): @@ -28,12 +24,8 @@ def test_fetch_project_details_sends_ld_api_key(monkeypatch): mock_get = Mock(return_value=response) monkeypatch.setattr(student_api, "BASE_GESSI_URL", "http://tomcat:8080/api") - monkeypatch.setattr(student_api, "LD_HEADERS", {"X-LD-API-Key": "test-ld-api-key"}) - monkeypatch.setattr(student_api.requests, "get", mock_get) + monkeypatch.setattr(student_api._session, "get", mock_get) assert student_api.fetch_project_details(1) == {"id": 1} - mock_get.assert_called_once_with( - "http://tomcat:8080/api/projects/1", - headers={"X-LD-API-Key": "test-ld-api-key"}, - timeout=60, - ) + mock_get.assert_called_once_with("http://tomcat:8080/api/projects/1", timeout=60) + assert student_api._session.headers.get("X-LD-API-Key") is not None