From e38bc1ade98bd13736f85669757b56c7a4fe4438 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Tue, 28 Apr 2026 15:44:36 +0200 Subject: [PATCH 01/22] remove report id from legacy measurement sumission function --- .../src/ooniprobe/routers/reports.py | 151 ++++++++++-------- .../ooniprobe/tests/integ/test_reports.py | 27 +++- 2 files changed, 111 insertions(+), 67 deletions(-) diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py index 208efe39f..da2a6ae5c 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py @@ -1,5 +1,6 @@ import io import logging +from dataclasses import dataclass from datetime import datetime, timezone from hashlib import sha512 from typing import Any, Dict, List, Tuple @@ -107,41 +108,14 @@ async def receive_measurement( content_encoding: str = Header(default=None), ) -> ReceiveMeasurementResponse | Dict[str, Any]: """ - Submit measurement + Submit measurement. + + The `report_id` path parameter is accepted for URL backwards + compatibility but is ignored: metadata used to identify + and validate the measurement is read from the body """ setnocacheresponse(response) empty_measurement = {} - try: - rid_timestamp, test_name, cc, asn, format_cid, rand = report_id.split("_") - except Exception as e: - log.info( - f"Unexpected report_id {report_id[:200]}. Error: {e}", - ) - Metrics.BAD_MEASUREMENTS_CNT.labels(reason="bad_report_id").inc() - raise error("Incorrect format") - - # TODO validate the timestamp? - good = len(cc) == 2 and test_name.isalnum() and 1 < len(test_name) < 30 - if not good: - log.info("Unexpected report_id %r", report_id[:200]) - error("Incorrect format") - - try: - asn_i = int(asn) - except ValueError as e: - log.info(f"ASN value not parsable {asn}. Error: {e}") - Metrics.BAD_MEASUREMENTS_CNT.labels(reason="bad_asn").inc() - error("Incorrect format") - - if asn_i == 0: - log.info("Discarding ASN == 0") - Metrics.BAD_MEASUREMENTS_CNT.labels(reason="asn_0").inc() - return empty_measurement - - if cc.upper() == "ZZ": - log.info("Discarding CC == ZZ") - Metrics.BAD_MEASUREMENTS_CNT.labels(reason="cc_zz").inc() - return empty_measurement with Metrics.READ_BODY_TIMING.time(): data = await request.body() @@ -157,13 +131,37 @@ async def receive_measurement( error("Incorrect format") try: - data = await run_in_threadpool(_set_unverified_flag, data) + data, metadata = await run_in_threadpool(_process_measurement_body, data) except Exception as e: log.info("Failed to parse and modify measurement body") log.exception(e) Metrics.BAD_MEASUREMENTS_CNT.labels(reason="bad_json").inc() error("Incorrect format") + test_name = metadata.test_name + cc = metadata.probe_cc + asn = metadata.probe_asn + asn_i = normalize_asn(asn) + + # Same validation as old report_id + good = len(cc) == 2 and test_name.isalnum() and 1 < len(test_name) < 30 + if not good: + log.error( + f"Bad metadata in measurement body: test_name={test_name[:30]}, cc={cc}" + ) + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="bad_metadata").inc() + error("Incorrect format") + + if asn_i == 0: + log.info("Discarding ASN == 0") + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="asn_0").inc() + return empty_measurement + + if cc == "ZZ": + log.info("Discarding CC == ZZ") + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="cc_zz").inc() + return empty_measurement + # Write the whole body of the measurement in a directory based on a 1-hour # time window now = datetime.now(timezone.utc) @@ -204,7 +202,7 @@ async def receive_measurement( cc, asn, msmt_uid, - data, + metadata, ) except Exception as e: log.error(f"Error checking for geoip anomalies: {e}") @@ -232,13 +230,61 @@ async def receive_measurement( Metrics.SEND_S3_CNT.labels(status="fail").inc() return empty_measurement +@dataclass +class MeasurementMetadata: + """ + Metadata extracted from a measurement body in a single parse pass. + """ + test_name: str # lowercase, alnum, no underscores + probe_cc: str # uppercase, or "ZZ" + probe_asn: str # E.g. "AS15704", or "AS0" + platform: str + software_name: str + software_version: str + + +def _process_measurement_body(data: bytes) -> Tuple[bytes, MeasurementMetadata]: + """ + - Parse the measurement body + - extract some metadata fields + - set `is_verified="u"` + - re-serialize. + """ -def _set_unverified_flag(data: bytes) -> bytes: with Metrics.DESERIALIZE_BODY_TIMING.time(): - measurement = ujson.loads(data) - measurement["is_verified"] = "u" + json = ujson.loads(data) + + assert isinstance(json, dict) + + content = json.get("content") + assert isinstance(content, dict) + + test_name = str(content.get("test_name") or "").lower().replace("_", "") + + cc = str(content.get("probe_cc") or "").upper().replace("_", "") + if len(cc) != 2: + cc = "ZZ" + + raw_asn = str(content.get("probe_asn") or "AS0").upper() + if len(raw_asn) > 12 or len(raw_asn) < 3 or not raw_asn.startswith("AS"): + raw_asn = "AS0" + + annotations = content.get("annotations", {}) + assert isinstance(annotations, dict) + + metadata = MeasurementMetadata( + test_name=test_name, + probe_cc=cc, + probe_asn=raw_asn, + platform=str(annotations.get("platform") or ""), + software_name=str(content.get("software_name") or ""), + software_version=str(content.get("software_version") or ""), + ) + + json["is_verified"] = "u" + with Metrics.SERIALIZE_BODY_TIMING.time(): - return ujson.dumps(measurement).encode("utf-8") + return ujson.dumps(json).encode("utf-8"), metadata def _check_and_register_geoip_anomaly( @@ -248,12 +294,10 @@ def _check_and_register_geoip_anomaly( cc: str, asn: str, msmt_uid: str, - data: bytes, + metadata: MeasurementMetadata, ) -> None: actual_cc, actual_asn = get_cc_asn(request, asn_cc_reader) if actual_cc != cc or normalize_asn(actual_asn) != normalize_asn(asn): - # expensive: parses measurement body and sends anomaly to clickhouse - platform, software_name, software_version = _parse_metadata(data) register_geoip_anomaly( cc, actual_cc, @@ -261,33 +305,14 @@ def _check_and_register_geoip_anomaly( actual_asn, clickhouse, msmt_uid, - platform, - software_name, - software_version, + metadata.platform, + metadata.software_name, + metadata.software_version, ) else: Metrics.PROBE_CC_ASN_MATCH.inc() -def _parse_metadata(data: bytes) -> Tuple[str, str, str]: - """ - Parse measurement body, and return the following metadata: - - platform, software_name, software_version - """ - try: - body = ujson.loads(data.decode("utf-8")) - except Exception as e: - log.error(f"Couldn't parse json body: {e}") - return ("", "", "") - content = body.get("content", {}) - annotations = content.get("annotations", {}) - platform = annotations.get("platform", "") - software_name = content.get("software_name", "") - software_version = content.get("software_version", "") - return (platform, software_name, software_version) - - @timer(name="close_report") @router.post("/report/{report_id}/close", tags=["reports"]) def close_report(report_id): diff --git a/ooniapi/services/ooniprobe/tests/integ/test_reports.py b/ooniapi/services/ooniprobe/tests/integ/test_reports.py index 463759d1e..c97f37c76 100644 --- a/ooniapi/services/ooniprobe/tests/integ/test_reports.py +++ b/ooniapi/services/ooniprobe/tests/integ/test_reports.py @@ -42,6 +42,8 @@ async def test_collector_open_report(client): @pytest.mark.asyncio async def test_collector_upload_msmt_bogus(client): + # The path component is ignored, but a body without valid probe_cc / + # probe_asn / test_name still has to be rejected. j = dict(format="json", content=dict(test_keys={})) resp = client.post("/report/bogus", json=j) assert resp.status_code == 400, resp @@ -69,7 +71,15 @@ async def test_collector_upload_msmt_valid(client): } assert len(rid) == 61, rid - upload_payload = {"format": "json", "content": {"test_keys": {}}} + upload_payload = { + "format": "json", + "content": { + "test_keys": {}, + "probe_cc": "IE", + "probe_asn": "AS34245", + "test_name": "web_connectivity", + }, + } c = postj(client, f"/report/{rid}", upload_payload) expected_hash = _get_hash_of(upload_payload) assert c["measurement_uid"].endswith(f"_IE_webconnectivity_{expected_hash}"), c @@ -80,16 +90,25 @@ async def test_collector_upload_msmt_valid(client): @pytest.mark.asyncio async def test_collector_upload_msmt_valid_zstd(client): - rid = "20230101T000000Z_integtest_IT_1_n1_integtest0000000" - msmt_payload = {"test_keys": {}} + rid = "ignored-by-the-server" + msmt_payload = { + "format": "json", + "content": { + "test_keys": {}, + "probe_cc": "IT", + "probe_asn": "AS1", + "test_name": "integtest", + }, + } zmsmt = zstd.compress(json.dumps(msmt_payload).encode()) headers = [("Content-Encoding", "zstd")] c = post(client, f"/report/{rid}", zmsmt, headers=headers) - assert len(c) == 1 + assert "measurement_uid" in c, c expected_hash = _get_hash_of(msmt_payload) assert c["measurement_uid"].endswith(f"_IT_integtest_{expected_hash}"), c + def _get_hash_of(msmt: dict) -> str: payload = copy.deepcopy(msmt) payload["is_verified"] = "u" From 393d254252743f3a8ead07cde5d09eaa78fb387c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Tue, 28 Apr 2026 16:58:45 +0200 Subject: [PATCH 02/22] Remove report_id from anonymous credentials submit --- .../src/ooniprobe/routers/reports.py | 38 +--------- .../ooniprobe/routers/v1/probe_services.py | 73 +++++++++---------- .../services/ooniprobe/src/ooniprobe/utils.py | 41 ++++++++++- .../ooniprobe/tests/integ/test_geolookup.py | 13 ++-- .../services/ooniprobe/tests/test_anoncred.py | 29 +++----- 5 files changed, 97 insertions(+), 97 deletions(-) diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py index da2a6ae5c..fca2bd8b8 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py @@ -1,6 +1,5 @@ import io import logging -from dataclasses import dataclass from datetime import datetime, timezone from hashlib import sha512 from typing import Any, Dict, List, Tuple @@ -18,9 +17,11 @@ from ..dependencies import ASNCCReaderDep, SettingsDep from ..metrics import Metrics from ..utils import ( + MeasurementMetadata, error, generate_report_id, get_cc_asn, + metadata_from_measurement_content, normalize_asn, register_geoip_anomaly, ) @@ -230,19 +231,6 @@ async def receive_measurement( Metrics.SEND_S3_CNT.labels(status="fail").inc() return empty_measurement -@dataclass -class MeasurementMetadata: - """ - Metadata extracted from a measurement body in a single parse pass. - """ - test_name: str # lowercase, alnum, no underscores - probe_cc: str # uppercase, or "ZZ" - probe_asn: str # E.g. "AS15704", or "AS0" - platform: str - software_name: str - software_version: str - - def _process_measurement_body(data: bytes) -> Tuple[bytes, MeasurementMetadata]: """ - Parse the measurement body @@ -259,27 +247,7 @@ def _process_measurement_body(data: bytes) -> Tuple[bytes, MeasurementMetadata]: content = json.get("content") assert isinstance(content, dict) - test_name = str(content.get("test_name") or "").lower().replace("_", "") - - cc = str(content.get("probe_cc") or "").upper().replace("_", "") - if len(cc) != 2: - cc = "ZZ" - - raw_asn = str(content.get("probe_asn") or "AS0").upper() - if len(raw_asn) > 12 or len(raw_asn) < 3 or not raw_asn.startswith("AS"): - raw_asn = "AS0" - - annotations = content.get("annotations", {}) - assert isinstance(annotations, dict) - - metadata = MeasurementMetadata( - test_name=test_name, - probe_cc=cc, - probe_asn=raw_asn, - platform=str(annotations.get("platform") or ""), - software_name=str(content.get("software_name") or ""), - software_version=str(content.get("software_version") or ""), - ) + metadata = metadata_from_measurement_content(content) json["is_verified"] = "u" diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py index 90f8cc321..7e188e03d 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py @@ -52,6 +52,7 @@ generate_report_id, geolookup_probe, get_cc_asn, + metadata_from_measurement_content, normalize_asn, register_geoip_anomaly, ) @@ -872,9 +873,8 @@ class SubmitMeasurementResponse(BaseModel): ) -@router.post("/submit_measurement/{report_id}", tags=["anonymous_credentials"]) +@router.post("/submit_measurement", tags=["anonymous_credentials"]) async def submit_measurement( - report_id: str, request: Request, submit_request: SubmitMeasurementRequest, response: Response, @@ -900,30 +900,20 @@ async def submit_measurement( - status code 4xx or 5xx: the measurement was not processed nor stored """ setnocacheresponse(response) - try: - rid_timestamp, test_name, cc, asn, format_cid, rand = report_id.split("_") - except Exception: - err_msg = f"Incorrect format: unexpected report_id {report_id[:200]}" - log.info(err_msg) - raise HTTPException( - status_code=400, - detail={"error": "incorrect_format", "message": err_msg}, - ) - # TODO validate the timestamp? + metadata = metadata_from_measurement_content(submit_request.content) + + test_name = metadata.test_name + cc = metadata.probe_cc + asn = metadata.probe_asn + asn_i = normalize_asn(asn) + good = len(cc) == 2 and test_name.isalnum() and 1 < len(test_name) < 30 if not good: - err_msg = f"Incorrect format: unexpected report_id {report_id[:200]}" - log.info(err_msg) - raise HTTPException( - status_code=400, - detail={"error": "incorrect_format", "message": err_msg}, + err_msg = ( + "Incorrect format: bad metadata in measurement body " + f"(test_name={test_name[:30]}, cc={cc})" ) - - try: - asn_i = int(asn) - except ValueError: - err_msg = f"Incorrect format: ASN value not parsable {asn}" log.info(err_msg) raise HTTPException( status_code=400, @@ -933,23 +923,30 @@ async def submit_measurement( if asn_i == 0: log.info("Discarding ASN == 0") Metrics.MSMNT_DISCARD_ASN0.inc() - raise HTTPException(400, detail = {"error" : "asn_0", "message" : "Measurement discarded, ASN == 0"}) + raise HTTPException( + 400, + detail={ + "error": "asn_0", + "message": "Measurement discarded, ASN == 0", + }, + ) - if cc.upper() == "ZZ": + if cc == "ZZ": log.info("Discarding CC == ZZ") Metrics.MSMNT_DISCARD_CC_ZZ.inc() - raise HTTPException(400, detail = {"error" : "cc_zz", "message" : "Measurement discarded, CC == ZZ"}) + raise HTTPException( + 400, + detail={ + "error": "cc_zz", + "message": "Measurement discarded, CC == ZZ", + }, + ) # Anonymous credentials verification verification_status, submit_error, submit_response = _verify_submit( submit_request, manifest, settings ) - annotations = submit_request.content.get("annotations", {}) - platform = annotations.get("platform") or "" - software_name = submit_request.content.get("software_name") or "" - software_version = submit_request.content.get("software_version") or "" - data = submit_request.model_dump() # Add verification-related data. @@ -1010,9 +1007,9 @@ async def submit_measurement( cc, asn, msmt_uid, - platform, - software_name, - software_version, + metadata.platform, + metadata.software_name, + metadata.software_version, ) except Exception as e: log.error(f"Error checking for geoip anomalies: {e}") @@ -1028,19 +1025,21 @@ async def submit_measurement( Metrics.SEND_FASTPATH_CNT.labels(status="fail").inc() # wasn't possible to send msmnt to fastpath, try to send it to s3 - data_buff.seek(0) + ts_prefix = now.strftime("%Y%m%d%H") + tn = test_name.replace("_", "") + s3_key = f"postcans/{ts_prefix}/{ts_prefix}_{cc}_{tn}/{msmt_uid}.post" try: await run_in_threadpool( request.app.state.s3_client.upload_fileobj, - data_buff, + io.BytesIO(data_bin), Bucket=settings.failed_reports_bucket, - Key=report_id, + Key=s3_key, ) except Exception as exc: log.error(f"Unable to upload measurement to s3. Error: {exc}") Metrics.SEND_S3_FAILURE.inc() - log.error(f"Unable to send report to fastpath. report_id: {report_id}") + log.error(f"Unable to send report to fastpath. measurement_uid: {msmt_uid}") Metrics.MISSED_MSMNTS.inc() return SubmitMeasurementResponse( measurement_uid=msmt_uid, diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/utils.py b/ooniapi/services/ooniprobe/src/ooniprobe/utils.py index 296484787..e71223698 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/utils.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/utils.py @@ -4,6 +4,7 @@ Insert VPN credentials into database. """ +from dataclasses import dataclass import io import itertools import logging @@ -11,7 +12,7 @@ from base64 import b64encode from datetime import datetime, timezone from os import urandom -from typing import Any, Dict, List, Tuple, TypedDict +from typing import Any, Dict, List, Mapping, Tuple, TypedDict import requests import pem @@ -132,6 +133,44 @@ def generate_report_id(test_name, settings: Settings, cc: str, asn_i: int) -> st return rid +@dataclass +class MeasurementMetadata: + """Metadata extracted from a measurement body's `content` object.""" + + test_name: str # lowercase, underscores stripped (matches report id segment) + probe_cc: str # uppercase, 2-letter or "ZZ" when unknown + probe_asn: str # canonical "AS..." string or "AS0" + platform: str + software_name: str + software_version: str + + +def metadata_from_measurement_content(content: Mapping[str, Any]) -> MeasurementMetadata: + """Normalize fields the same way as `generate_report_id` / `open_report` do.""" + test_name = str(content.get("test_name") or "").lower().replace("_", "") + + cc = str(content.get("probe_cc") or "").upper().replace("_", "") + if len(cc) != 2: + cc = "ZZ" + + raw_asn = str(content.get("probe_asn") or "AS0").upper() + if len(raw_asn) > 12 or len(raw_asn) < 3 or not raw_asn.startswith("AS"): + raw_asn = "AS0" + + annotations = content.get("annotations", {}) + if not isinstance(annotations, dict): + annotations = {} + + return MeasurementMetadata( + test_name=test_name, + probe_cc=cc, + probe_asn=raw_asn, + platform=str(annotations.get("platform") or ""), + software_name=str(content.get("software_name") or ""), + software_version=str(content.get("software_version") or ""), + ) + + def extract_probe_ipaddr(request: Request) -> str: real_ip_headers = ["X-Forwarded-For", "X-Real-IP"] diff --git a/ooniapi/services/ooniprobe/tests/integ/test_geolookup.py b/ooniapi/services/ooniprobe/tests/integ/test_geolookup.py index 362985675..ef18f415e 100644 --- a/ooniapi/services/ooniprobe/tests/integ/test_geolookup.py +++ b/ooniapi/services/ooniprobe/tests/integ/test_geolookup.py @@ -152,14 +152,13 @@ async def test_geoip_mismatch_anoncred(client, clickhouse_db, clean_faulty_measu # Open a report for the anoncred submit endpoint report_req = make_report_request(probe_cc="VE", probe_asn="AS65550") - c = postj(client, "/report", report_req) - rid = c["report_id"] + postj(client, "/report", report_req) # Create anoncred user and submit_request user, manifest_version, emission_day = setup_user(client) submit_request = make_submit_request(user, "VE", "AS65550") - # Build measurement body used by /api/v1/submit_measurement/{rid} + # Build measurement body for `/api/v1/submit_measurement` msm = make_measurement( submit_request.nym, submit_request.request, @@ -176,7 +175,7 @@ async def test_geoip_mismatch_anoncred(client, clickhouse_db, clean_faulty_measu # matching cc and asn postj( client, - f"/api/v1/submit_measurement/{rid}", + "/api/v1/submit_measurement", msm, headers={"X-Forwarded-For": "123.123.123.123"}, ) @@ -186,7 +185,7 @@ async def test_geoip_mismatch_anoncred(client, clickhouse_db, clean_faulty_measu # cc mismatch only postj( client, - f"/api/v1/submit_measurement/{rid}", + "/api/v1/submit_measurement", msm, headers={"X-Forwarded-For": "123.123.123.124"}, ) @@ -197,7 +196,7 @@ async def test_geoip_mismatch_anoncred(client, clickhouse_db, clean_faulty_measu # ASN mismatch only postj( client, - f"/api/v1/submit_measurement/{rid}", + "/api/v1/submit_measurement", msm, headers={"X-Forwarded-For": "123.123.123.125"}, ) @@ -208,7 +207,7 @@ async def test_geoip_mismatch_anoncred(client, clickhouse_db, clean_faulty_measu # both cc and ASN mismatch postj( client, - f"/api/v1/submit_measurement/{rid}", + "/api/v1/submit_measurement", msm, headers={"X-Forwarded-For": "123.123.123.126"}, ) diff --git a/ooniapi/services/ooniprobe/tests/test_anoncred.py b/ooniapi/services/ooniprobe/tests/test_anoncred.py index 6cdc499ae..9bcc357db 100644 --- a/ooniapi/services/ooniprobe/tests/test_anoncred.py +++ b/ooniapi/services/ooniprobe/tests/test_anoncred.py @@ -80,9 +80,7 @@ async def test_registration_errors(client): @pytest.mark.asyncio async def test_submission_basic(client): # open report - j = make_report_request("IE", "AS34245") - resp = postj(client, "/report", json=j) - rid = resp.pop("report_id") + postj(client, "/report", json=make_report_request("IE", "AS34245")) # Create user user, manifest_version, emission_day = setup_user(client) @@ -91,7 +89,7 @@ async def test_submission_basic(client): msm = make_measurement(submit_request.nym, submit_request.request, manifest_version) - c = postj(client, f"/api/v1/submit_measurement/{rid}", msm) + c = postj(client, "/api/v1/submit_measurement", msm) assert c["verification_status"] == "verified", c assert c['submit_response'], "Submit response should not be null if the proof was verified" @@ -105,8 +103,7 @@ async def test_submission_non_verified(client): """ j = make_report_request() - resp = postj(client, "/report", json=j) - rid = resp.pop("report_id") + postj(client, "/report", json=j) msm = { "format": "json", @@ -119,14 +116,14 @@ async def test_submission_non_verified(client): } # no anoncred fields -> processed but not verified, no error - c = postj(client, f"/api/v1/submit_measurement/{rid}", msm) + c = postj(client, "/api/v1/submit_measurement", msm) assert c["verification_status"] == "unverified" assert c["submit_response"] is None assert c["error"] is None # unknown manifest -> processed but not verified, manifest error msm["manifest_version"] = "does-not-exist" - c = postj(client, f"/api/v1/submit_measurement/{rid}", msm) + c = postj(client, "/api/v1/submit_measurement", msm) assert c["verification_status"] == "unverified" assert c["submit_response"] is None assert c["error"] == "manifest_not_found" @@ -135,7 +132,7 @@ async def test_submission_non_verified(client): user, manifest_version, _ = setup_user(client) msm["nym"] = "dummy-nym" msm["manifest_version"] = manifest_version - c = postj(client, f"/api/v1/submit_measurement/{rid}", msm) + c = postj(client, "/api/v1/submit_measurement", msm) assert c["verification_status"] == "unverified" assert c["submit_response"] is None assert c["error"] == "incomplete_anonc_fields" @@ -146,14 +143,14 @@ async def test_submission_non_verified(client): msm["zkp_request"] = submit_request.request msm["manifest_version"] = manifest_version msm["protocol_version"] = "0.0.1" - c = postj(client, f"/api/v1/submit_measurement/{rid}", msm) + c = postj(client, "/api/v1/submit_measurement", msm) assert c["verification_status"] == "unverified" assert c["submit_response"] is None assert c["error"] == "protocol_version_too_old" # unparsable protocol version -> invalid protocol version error msm["protocol_version"] = "abc" - c = postj(client, f"/api/v1/submit_measurement/{rid}", msm) + c = postj(client, "/api/v1/submit_measurement", msm) assert c["verification_status"] == "unverified" assert c["submit_response"] is None assert c["error"] == "invalid_protocol_version" @@ -415,14 +412,13 @@ async def test_credential_update_with_submission(client, client_with_original_ma # first submit: should just work out of the box j = make_report_request() - resp = postj(client, "/report", json=j) - rid = resp.pop("report_id") + postj(client, "/report", json=j) submit_request = make_submit_request(user, "IE", "AS34245") msm = make_measurement(submit_request.nym, submit_request.request, manifest_version) - c = postj(client, f"/api/v1/submit_measurement/{rid}", msm) + c = postj(client, "/api/v1/submit_measurement", msm) assert c["verification_status"] == "verified" @@ -439,14 +435,13 @@ async def test_credential_update_with_submission(client, client_with_original_ma user.handle_credential_update_response(result['update_response']) # should not crash j = make_report_request() - resp = postj(client, "/report", json=j) - rid = resp.pop("report_id") + postj(client, "/report", json=j) submit_request = make_submit_request(user, "IE", "AS34245") msm = make_measurement(submit_request.nym, submit_request.request, manifest_version) - c = postj(client, f"/api/v1/submit_measurement/{rid}", msm) + c = postj(client, "/api/v1/submit_measurement", msm) def make_measurement( From c804c8401e41ca45f74edd21c3a1e2ab443d0922 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Wed, 29 Apr 2026 12:02:09 +0200 Subject: [PATCH 03/22] Disaggregate bad measurements count by type of error --- .../ooniprobe/src/ooniprobe/routers/reports.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py index fca2bd8b8..dcb261597 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py @@ -145,12 +145,23 @@ async def receive_measurement( asn_i = normalize_asn(asn) # Same validation as old report_id - good = len(cc) == 2 and test_name.isalnum() and 1 < len(test_name) < 30 - if not good: + cc_ok = len(cc) == 2 + test_name_alnum_ok = test_name.isalnum() + test_name_len_ok = 1 < len(test_name) < 30 + if not (cc_ok and test_name_alnum_ok and test_name_len_ok): log.error( f"Bad metadata in measurement body: test_name={test_name[:30]}, cc={cc}" ) - Metrics.BAD_MEASUREMENTS_CNT.labels(reason="bad_metadata").inc() + if not cc_ok: + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="bad_cc").inc() + if not test_name_alnum_ok: + Metrics.BAD_MEASUREMENTS_CNT.labels( + reason="tn_not_alnum" + ).inc() + if not test_name_len_ok: + Metrics.BAD_MEASUREMENTS_CNT.labels( + reason="tn_len" + ).inc() error("Incorrect format") if asn_i == 0: From 77a5ecf1904ae20633d1732a7919731f4b326f5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 30 Apr 2026 11:42:57 +0200 Subject: [PATCH 04/22] disaggregate type of error in anonc submission path --- .../ooniprobe/routers/v1/probe_services.py | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py index 7e188e03d..e90898d11 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py @@ -908,16 +908,26 @@ async def submit_measurement( asn = metadata.probe_asn asn_i = normalize_asn(asn) - good = len(cc) == 2 and test_name.isalnum() and 1 < len(test_name) < 30 - if not good: - err_msg = ( - "Incorrect format: bad metadata in measurement body " - f"(test_name={test_name[:30]}, cc={cc})" + cc_ok = len(cc) == 2 + test_name_alnum_ok = test_name.isalnum() + test_name_len_ok = 1 < len(test_name) < 30 + if not (cc_ok and test_name_alnum_ok and test_name_len_ok): + log.error( + f"Bad metadata in measurement body: test_name={test_name[:30]}, cc={cc}" ) - log.info(err_msg) + if not cc_ok: + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="bad_cc").inc() + if not test_name_alnum_ok: + Metrics.BAD_MEASUREMENTS_CNT.labels( + reason="tn_not_alnum" + ).inc() + if not test_name_len_ok: + Metrics.BAD_MEASUREMENTS_CNT.labels( + reason="tn_len" + ).inc() raise HTTPException( status_code=400, - detail={"error": "incorrect_format", "message": err_msg}, + detail={"error": "incorrect_format", "message": "Incorrect format"}, ) if asn_i == 0: From dff6de54205344c59a186f1cb8efd420ee4e8932 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 30 Apr 2026 12:02:59 +0200 Subject: [PATCH 05/22] Refactor metadata checking in submission --- .../src/ooniprobe/routers/reports.py | 32 +--------- .../ooniprobe/routers/v1/probe_services.py | 46 +------------- .../services/ooniprobe/src/ooniprobe/utils.py | 63 +++++++++++++++++++ 3 files changed, 67 insertions(+), 74 deletions(-) diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py index dcb261597..cb903bd4b 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py @@ -18,6 +18,7 @@ from ..metrics import Metrics from ..utils import ( MeasurementMetadata, + check_measurement_meta, error, generate_report_id, get_cc_asn, @@ -142,37 +143,8 @@ async def receive_measurement( test_name = metadata.test_name cc = metadata.probe_cc asn = metadata.probe_asn - asn_i = normalize_asn(asn) - - # Same validation as old report_id - cc_ok = len(cc) == 2 - test_name_alnum_ok = test_name.isalnum() - test_name_len_ok = 1 < len(test_name) < 30 - if not (cc_ok and test_name_alnum_ok and test_name_len_ok): - log.error( - f"Bad metadata in measurement body: test_name={test_name[:30]}, cc={cc}" - ) - if not cc_ok: - Metrics.BAD_MEASUREMENTS_CNT.labels(reason="bad_cc").inc() - if not test_name_alnum_ok: - Metrics.BAD_MEASUREMENTS_CNT.labels( - reason="tn_not_alnum" - ).inc() - if not test_name_len_ok: - Metrics.BAD_MEASUREMENTS_CNT.labels( - reason="tn_len" - ).inc() - error("Incorrect format") - - if asn_i == 0: - log.info("Discarding ASN == 0") - Metrics.BAD_MEASUREMENTS_CNT.labels(reason="asn_0").inc() - return empty_measurement - if cc == "ZZ": - log.info("Discarding CC == ZZ") - Metrics.BAD_MEASUREMENTS_CNT.labels(reason="cc_zz").inc() - return empty_measurement + check_measurement_meta(test_name, cc, asn) # Write the whole body of the measurement in a directory based on a 1-hour # time window diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py index e90898d11..3f9018d43 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py @@ -48,6 +48,7 @@ PsiphonConfigDep ) from ...utils import ( + check_measurement_meta, extract_probe_ipaddr, generate_report_id, geolookup_probe, @@ -906,51 +907,8 @@ async def submit_measurement( test_name = metadata.test_name cc = metadata.probe_cc asn = metadata.probe_asn - asn_i = normalize_asn(asn) - cc_ok = len(cc) == 2 - test_name_alnum_ok = test_name.isalnum() - test_name_len_ok = 1 < len(test_name) < 30 - if not (cc_ok and test_name_alnum_ok and test_name_len_ok): - log.error( - f"Bad metadata in measurement body: test_name={test_name[:30]}, cc={cc}" - ) - if not cc_ok: - Metrics.BAD_MEASUREMENTS_CNT.labels(reason="bad_cc").inc() - if not test_name_alnum_ok: - Metrics.BAD_MEASUREMENTS_CNT.labels( - reason="tn_not_alnum" - ).inc() - if not test_name_len_ok: - Metrics.BAD_MEASUREMENTS_CNT.labels( - reason="tn_len" - ).inc() - raise HTTPException( - status_code=400, - detail={"error": "incorrect_format", "message": "Incorrect format"}, - ) - - if asn_i == 0: - log.info("Discarding ASN == 0") - Metrics.MSMNT_DISCARD_ASN0.inc() - raise HTTPException( - 400, - detail={ - "error": "asn_0", - "message": "Measurement discarded, ASN == 0", - }, - ) - - if cc == "ZZ": - log.info("Discarding CC == ZZ") - Metrics.MSMNT_DISCARD_CC_ZZ.inc() - raise HTTPException( - 400, - detail={ - "error": "cc_zz", - "message": "Measurement discarded, CC == ZZ", - }, - ) + check_measurement_meta(test_name, cc, asn) # Anonymous credentials verification verification_status, submit_error, submit_response = _verify_submit( diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/utils.py b/ooniapi/services/ooniprobe/src/ooniprobe/utils.py index e71223698..725d3cd5e 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/utils.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/utils.py @@ -211,6 +211,69 @@ def normalize_asn(asn: str) -> int: return 0 +def check_measurement_meta( + test_name: str, + probe_cc: str, + probe_asn: str, +) -> None: + """ + Checks metadata consistency, raising an HTTPException and stopping + ingestion when an inconsistency is detected + """ + asn_i = normalize_asn(probe_asn) + + cc_ok = len(probe_cc) == 2 + test_name_alnum_ok = test_name.isalnum() + test_name_len_ok = 1 < len(test_name) < 30 + if not (cc_ok and test_name_alnum_ok and test_name_len_ok): + log.error( + "Bad metadata in measurement body: test_name=" + f"{test_name[:30]}, cc={probe_cc}" + ) + reason: str | None = None + if not cc_ok: + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="bad_cc").inc() + reason = reason or "bad_cc" + if not test_name_alnum_ok: + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="tn_not_alnum").inc() + reason = reason or "tn_not_alnum" + if not test_name_len_ok: + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="tn_len").inc() + reason = reason or "tn_len" + assert reason is not None + raise HTTPException( + status_code=400, + detail={ + "error": reason, + "message": "Incorrect format", + }, + ) + + if asn_i == 0: + log.info("Discarding ASN == 0") + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="asn_0").inc() + Metrics.MSMNT_DISCARD_ASN0.inc() + raise HTTPException( + 400, + detail={ + "error": "asn_0", + "message": "Measurement discarded, ASN == 0", + }, + ) + + if probe_cc == "ZZ": + log.info("Discarding CC == ZZ") + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="cc_zz").inc() + Metrics.MSMNT_DISCARD_CC_ZZ.inc() + raise HTTPException( + 400, + detail={ + "error": "cc_zz", + "message": "Measurement discarded, CC == ZZ", + }, + ) + + def get_cc_asn( request: Request, asn_cc_reader: ASNCCReaderDep ) -> Tuple[str, str]: From dc5ee4ead3ba8813e54ba67ad5e55c6778dfd460 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 30 Apr 2026 12:43:28 +0200 Subject: [PATCH 06/22] simplify typing --- ooniapi/services/ooniprobe/src/ooniprobe/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/utils.py b/ooniapi/services/ooniprobe/src/ooniprobe/utils.py index 725d3cd5e..319a621bc 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/utils.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/utils.py @@ -12,7 +12,7 @@ from base64 import b64encode from datetime import datetime, timezone from os import urandom -from typing import Any, Dict, List, Mapping, Tuple, TypedDict +from typing import Any, Dict, List, Tuple, TypedDict import requests import pem @@ -145,7 +145,7 @@ class MeasurementMetadata: software_version: str -def metadata_from_measurement_content(content: Mapping[str, Any]) -> MeasurementMetadata: +def metadata_from_measurement_content(content: dict[str, Any]) -> MeasurementMetadata: """Normalize fields the same way as `generate_report_id` / `open_report` do.""" test_name = str(content.get("test_name") or "").lower().replace("_", "") From 198bb1e63835f50378227a8b5bf77b70d22b084d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 30 Apr 2026 12:46:53 +0200 Subject: [PATCH 07/22] Improve docstring --- .../services/ooniprobe/src/ooniprobe/utils.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/utils.py b/ooniapi/services/ooniprobe/src/ooniprobe/utils.py index 319a621bc..6047391da 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/utils.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/utils.py @@ -146,7 +146,10 @@ class MeasurementMetadata: def metadata_from_measurement_content(content: dict[str, Any]) -> MeasurementMetadata: - """Normalize fields the same way as `generate_report_id` / `open_report` do.""" + """ + Parses metadata from the `content` key in a measurement body, then + formats fields the same way as `generate_report_id` / `open_report` do. + """ test_name = str(content.get("test_name") or "").lower().replace("_", "") cc = str(content.get("probe_cc") or "").upper().replace("_", "") @@ -201,7 +204,9 @@ def error(msg: str | Dict[str, Any], status_code: int = 400): def normalize_asn(asn: str) -> int: - """Return ASN as int (strip 'AS' prefix if present). Invalid values return 0.""" + """ + Return ASN as int (strip 'AS' prefix if present). Invalid values return 0. + """ s = str(asn).strip() s = s[2:] if s.startswith("AS") else s try: @@ -277,7 +282,8 @@ def check_measurement_meta( def get_cc_asn( request: Request, asn_cc_reader: ASNCCReaderDep ) -> Tuple[str, str]: - """Geo-lookup the request's source IP and return (cc, asn). + """ + Geo-lookup the request's source IP and return (cc, asn). Falls back to ("ZZ", "AS0") when the lookup fails. """ @@ -300,7 +306,9 @@ def register_geoip_anomaly( software_name: str, software_version: str, ) -> None: - """Record a geoip mismatch in faulty_measurements.""" + """ + Record a geoip mismatch in faulty_measurements. + """ sub_asn = normalize_asn(asn) actual_asn_int = normalize_asn(actual_asn) if actual_cc != cc: From 5374c5ce6681416f11b67ddd0831d2f3579218fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 30 Apr 2026 12:52:48 +0200 Subject: [PATCH 08/22] remove unused function --- ooniapi/services/ooniprobe/tests/integ/test_reports.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/ooniapi/services/ooniprobe/tests/integ/test_reports.py b/ooniapi/services/ooniprobe/tests/integ/test_reports.py index e2af5f9a7..380171fa3 100644 --- a/ooniapi/services/ooniprobe/tests/integ/test_reports.py +++ b/ooniapi/services/ooniprobe/tests/integ/test_reports.py @@ -111,13 +111,6 @@ async def test_collector_upload_msmt_valid_zstd(client): expected_hash = get_msmt_hash(msmt_payload) assert c["measurement_uid"].endswith(f"_IT_integtest_{expected_hash}"), c - -def _get_hash_of(msmt: dict) -> str: - payload = copy.deepcopy(msmt) - payload["is_verified"] = "u" - d = ujson.dumps(payload).encode() - return sha512(d).hexdigest()[:16] - @pytest.mark.asyncio async def test_fastpath_fallback(client_with_mocked_fastpath): """When the first fastpath URL fails, the second one in the list From 90d52f34350a8495996a82480c75967a4c5b09c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 30 Apr 2026 12:54:37 +0200 Subject: [PATCH 09/22] remove unused imports --- ooniapi/services/ooniprobe/tests/integ/test_reports.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/ooniapi/services/ooniprobe/tests/integ/test_reports.py b/ooniapi/services/ooniprobe/tests/integ/test_reports.py index 380171fa3..c25f10653 100644 --- a/ooniapi/services/ooniprobe/tests/integ/test_reports.py +++ b/ooniapi/services/ooniprobe/tests/integ/test_reports.py @@ -1,5 +1,3 @@ -import copy -from hashlib import sha512 import json import zstd import pytest From eea7d171b4312e81e44f1a73adcd5745d342e243 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 7 May 2026 13:20:08 +0200 Subject: [PATCH 10/22] Set report_id in measurement submission path and add test for checking this --- .../src/ooniprobe/routers/reports.py | 16 +++- .../services/ooniprobe/src/ooniprobe/utils.py | 2 +- .../ooniprobe/tests/integ/test_reports.py | 74 +++++++++++++++---- 3 files changed, 73 insertions(+), 19 deletions(-) diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py index f7980a6ee..f1cc5d015 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py @@ -10,6 +10,7 @@ from pydantic import Field from starlette.concurrency import run_in_threadpool +from ..common.config import Settings from ..common.dependencies import ClickhouseDep from ..common.metrics import timer from ..common.routers import BaseModel @@ -133,7 +134,9 @@ async def receive_measurement( error("Incorrect format") try: - data, metadata = await run_in_threadpool(_process_measurement_body, data) + data, metadata = await run_in_threadpool( + _process_measurement_body, data, settings + ) except Exception as e: log.info("Failed to parse and modify measurement body") log.exception(e) @@ -224,11 +227,14 @@ async def receive_measurement( Metrics.SEND_S3_CNT.labels(status="fail").inc() return empty_measurement -def _process_measurement_body(data: bytes) -> Tuple[bytes, MeasurementMetadata]: +def _process_measurement_body( + data: bytes, settings: Settings +) -> Tuple[bytes, MeasurementMetadata]: """ - Parse the measurement body - extract some metadata fields - set `is_verified="u"` + - set report_id - re-serialize. """ @@ -243,6 +249,12 @@ def _process_measurement_body(data: bytes) -> Tuple[bytes, MeasurementMetadata]: metadata = metadata_from_measurement_content(content) json["is_verified"] = "u" + json["report_id"] = generate_report_id( + metadata.test_name, + settings, + metadata.probe_cc, + normalize_asn(metadata.probe_asn) + ) with Metrics.SERIALIZE_BODY_TIMING.time(): return ujson.dumps(json).encode("utf-8"), metadata diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/utils.py b/ooniapi/services/ooniprobe/src/ooniprobe/utils.py index 6047391da..e7a281111 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/utils.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/utils.py @@ -207,7 +207,7 @@ def normalize_asn(asn: str) -> int: """ Return ASN as int (strip 'AS' prefix if present). Invalid values return 0. """ - s = str(asn).strip() + s = str(asn).strip().upper() s = s[2:] if s.startswith("AS") else s try: return int(s) diff --git a/ooniapi/services/ooniprobe/tests/integ/test_reports.py b/ooniapi/services/ooniprobe/tests/integ/test_reports.py index c25f10653..3575df39c 100644 --- a/ooniapi/services/ooniprobe/tests/integ/test_reports.py +++ b/ooniapi/services/ooniprobe/tests/integ/test_reports.py @@ -1,4 +1,5 @@ import json +import re import zstd import pytest import ujson @@ -82,8 +83,9 @@ async def test_collector_upload_msmt_valid(client): }, } c = postj(client, f"/report/{rid}", upload_payload) - expected_hash = get_msmt_hash(upload_payload) - assert c["measurement_uid"].endswith(f"_IE_webconnectivity_{expected_hash}"), c + assert re.fullmatch( + r"\d{14}\.\d+_IE_webconnectivity_[0-9a-f]{16}", c["measurement_uid"] + ), c c = postj(client, f"/report/{rid}/close", json={}) assert c == {}, c @@ -106,8 +108,9 @@ async def test_collector_upload_msmt_valid_zstd(client): c = post(client, f"/report/{rid}", zmsmt, headers=headers) assert "measurement_uid" in c, c - expected_hash = get_msmt_hash(msmt_payload) - assert c["measurement_uid"].endswith(f"_IT_integtest_{expected_hash}"), c + assert re.fullmatch( + r"\d{14}\.\d+_IT_integtest_[0-9a-f]{16}", c["measurement_uid"] + ), c @pytest.mark.asyncio async def test_fastpath_fallback(client_with_mocked_fastpath): @@ -136,18 +139,56 @@ async def test_fastpath_fallback(client_with_mocked_fastpath): msmt_uid = body["measurement_uid"] assert msmt_uid, body - expected_hash = get_msmt_hash(msmt_payload) - assert msmt_uid.endswith(f"_IT_integtest_{expected_hash}"), msmt_uid - - # check saved data expected_url = f"{success_url}/{msmt_uid}" assert list(mock_fastpath.uploads.keys()) == [expected_url] stored = ujson.loads(mock_fastpath.uploads[expected_url]) - assert get_msmt_hash(stored) == expected_hash + expected_hash = get_msmt_hash(stored) + assert msmt_uid.endswith(f"_IT_integtest_{expected_hash}"), msmt_uid assert stored["is_verified"] == "u" +@pytest.mark.asyncio +async def test_fastpath_payload_has_report_id(client_with_two_working_fastpaths): + """ + The body forwarded to the fastpath must include a freshly generated + `report_id` derived from the measurement body's metadata, regardless of + the (ignored) `report_id` in the URL path. + """ + client, mock_fastpath, first_url, _ = client_with_two_working_fastpaths + + og_rid = "ignored-by-the-server" + msmt_payload = { + "format": "json", + "content": { + "test_keys": {}, + "annotations": {"platform": "test_platform"}, + "software_name": "test_software", + "software_version": "0.0.0", + "probe_cc": "IT", + "probe_asn": "AS1", + "test_name": "integtest", + }, + } + resp = client.post(f"/report/{og_rid}", json=msmt_payload) + assert resp.status_code == 200, resp.text + msmt_uid = resp.json().get("measurement_uid") + assert msmt_uid + + expected_url = f"{first_url}/{msmt_uid}" + assert expected_url in mock_fastpath.uploads, mock_fastpath.uploads + + stored = ujson.loads(mock_fastpath.uploads[expected_url]) + rid = stored.get("report_id") + assert isinstance(rid, str) and rid, stored + assert rid != og_rid + # collector_id is "1" in test_settings; report id format: + # ____n_ + assert re.fullmatch( + r"\d{8}T\d{6}Z_integtest_IT_1_n1_[A-Za-z0-9oo]{16}", rid + ), rid + + @pytest.mark.asyncio async def test_fastpath_only_submits_once_on_success(client_with_two_working_fastpaths): """ @@ -174,13 +215,14 @@ async def test_fastpath_only_submits_once_on_success(client_with_two_working_fas msmt_uid = body.get("measurement_uid") assert msmt_uid, body - # Sanity-check the bytes that were forwarded to the fastpath - expected_hash = get_msmt_hash(msmt_payload) - assert msmt_uid.endswith(f"_IT_integtest_{expected_hash}"), msmt_uid - - # Only the first fastpath URL should have received the measurement expected_url = f"{first_url}/{msmt_uid}" - assert list(mock_fastpath.uploads.keys()) == [expected_url], ( + uploaded_urls = list(mock_fastpath.uploads.keys()) + assert uploaded_urls == [expected_url], ( "measurement should be forwarded to the first fastpath URL only, " - f"got {list(mock_fastpath.uploads.keys())}" + f"got {uploaded_urls}" ) + + # Sanity-check the bytes that were forwarded to the fastpath + stored = ujson.loads(mock_fastpath.uploads[expected_url]) + expected_hash = get_msmt_hash(stored) + assert msmt_uid.endswith(f"_IT_integtest_{expected_hash}"), msmt_uid From a973560b2294cd87d3ad6817e5e1e97b2247bdd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 7 May 2026 16:11:55 +0200 Subject: [PATCH 11/22] Add check to verify that submit_measurement always sets the report_id --- .../ooniprobe/routers/v1/probe_services.py | 4 + ooniapi/services/ooniprobe/tests/conftest.py | 85 +++++++++++-------- .../ooniprobe/tests/integ/test_reports.py | 10 +-- .../services/ooniprobe/tests/test_anoncred.py | 72 +++++++++++++--- 4 files changed, 116 insertions(+), 55 deletions(-) diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py index d7364689e..824c599ae 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py @@ -910,6 +910,9 @@ async def submit_measurement( check_measurement_meta(test_name, cc, asn) + # generate the new report_id + rid = generate_report_id(test_name, settings, cc, normalize_asn(asn)) + # Anonymous credentials verification verification_status, submit_error, submit_response = _verify_submit( submit_request, manifest, settings @@ -920,6 +923,7 @@ async def submit_measurement( # Add verification-related data. # use one-letter code for DB, human readable for clients data["is_verified"] = verification_status.code + data["report_id"] = rid data_buff = io.BytesIO() stream = io.TextIOWrapper(data_buff, "utf-8") ujson.dump(data, stream) diff --git a/ooniapi/services/ooniprobe/tests/conftest.py b/ooniapi/services/ooniprobe/tests/conftest.py index e5a7aeb40..7cfbdc315 100644 --- a/ooniapi/services/ooniprobe/tests/conftest.py +++ b/ooniapi/services/ooniprobe/tests/conftest.py @@ -1,6 +1,7 @@ import json import pathlib import time +from contextlib import asynccontextmanager from datetime import datetime from pathlib import Path from typing import Any, Dict @@ -334,26 +335,18 @@ def close(self): pass -@pytest_asyncio.fixture -async def client_with_mocked_fastpath( - clickhouse_server, test_settings, geoip_db_dir, test_creds +@asynccontextmanager +async def _client_with_mocked_fastpath_urls( + test_settings, geoip_db_dir, test_creds, fastpath_urls ): """ - Client variant to test fastpath behaviour, see the mocked fastpath client - above. - - Yields `(client, mock_fastpath, success_url)`. + Shared setup for the client_with_*_mocked_fastpath* fixtures: applies + dependency overrides, installs a `MockFastpathClient` on app state, and + yields `(client, mock_fastpath)` for the test to use. """ - fail_url = "http://fastpath.ooni/bad" - success_url = "http://fastpath.ooni/good" - _, public_key = test_creds - settings = test_settings().model_copy( - update={ - "fastpath_urls": [fail_url, success_url], - } - ) + settings = test_settings().model_copy(update={"fastpath_urls": fastpath_urls}) app.dependency_overrides[get_settings] = lambda: settings app.dependency_overrides[get_s3_client] = get_s3_client_mock app.dependency_overrides[get_tor_targets_from_s3] = get_tor_targets_from_s3_mock @@ -365,7 +358,41 @@ async def client_with_mocked_fastpath( async with lifespan(app, settings, repeating_tasks_active=False): with TestClient(app) as client: app.state.fastpath_client = mock_fastpath - yield client, mock_fastpath, success_url + yield client, mock_fastpath + + +@pytest_asyncio.fixture +async def client_with_mocked_fastpath( + clickhouse_server, test_settings, geoip_db_dir, test_creds +): + """ + Client with one healthy mocked fastpath URL. + + Yields `(client, mock_fastpath, url)`. + """ + url = "http://fastpath.ooni/good" + async with _client_with_mocked_fastpath_urls( + test_settings, geoip_db_dir, test_creds, [url] + ) as (client, mock_fastpath): + yield client, mock_fastpath, url + + +@pytest_asyncio.fixture +async def client_with_one_good_mocked_fastpath( + clickhouse_server, test_settings, geoip_db_dir, test_creds +): + """ + Client with one failing and one healthy mocked fastpath URL, used to + test the fallback path. + + Yields `(client, mock_fastpath, success_url)`. + """ + fail_url = "http://fastpath.ooni/bad" + success_url = "http://fastpath.ooni/good" + async with _client_with_mocked_fastpath_urls( + test_settings, geoip_db_dir, test_creds, [fail_url, success_url] + ) as (client, mock_fastpath): + yield client, mock_fastpath, success_url @pytest_asyncio.fixture @@ -373,29 +400,13 @@ async def client_with_two_working_fastpaths( clickhouse_server, test_settings, geoip_db_dir, test_creds ): """ - Client variant to test successful fastpath submissions with two healthy fastpath instances + Client with two healthy mocked fastpath URLs. Yields `(client, mock_fastpath, first_url, second_url)`. """ first_url = "http://fastpath-a.ooni/good" second_url = "http://fastpath-b.ooni/good" - - _, public_key = test_creds - - settings = test_settings().model_copy( - update={ - "fastpath_urls": [first_url, second_url], - } - ) - app.dependency_overrides[get_settings] = lambda: settings - app.dependency_overrides[get_s3_client] = get_s3_client_mock - app.dependency_overrides[get_tor_targets_from_s3] = get_tor_targets_from_s3_mock - app.dependency_overrides[get_psiphon_config_from_s3] = get_psiphon_config_from_s3_mock - app.dependency_overrides[_get_manifest] = make_manifest_mock_fn(public_key) - try_update(geoip_db_dir) - - mock_fastpath = MockFastpathClient() - async with lifespan(app, settings, repeating_tasks_active=False): - with TestClient(app) as client: - app.state.fastpath_client = mock_fastpath - yield client, mock_fastpath, first_url, second_url + async with _client_with_mocked_fastpath_urls( + test_settings, geoip_db_dir, test_creds, [first_url, second_url] + ) as (client, mock_fastpath): + yield client, mock_fastpath, first_url, second_url diff --git a/ooniapi/services/ooniprobe/tests/integ/test_reports.py b/ooniapi/services/ooniprobe/tests/integ/test_reports.py index 3575df39c..544bd8365 100644 --- a/ooniapi/services/ooniprobe/tests/integ/test_reports.py +++ b/ooniapi/services/ooniprobe/tests/integ/test_reports.py @@ -113,11 +113,11 @@ async def test_collector_upload_msmt_valid_zstd(client): ), c @pytest.mark.asyncio -async def test_fastpath_fallback(client_with_mocked_fastpath): +async def test_fastpath_fallback(client_with_one_good_mocked_fastpath): """When the first fastpath URL fails, the second one in the list should still receive the measurement. """ - client, mock_fastpath, success_url = client_with_mocked_fastpath + client, mock_fastpath, success_url = client_with_one_good_mocked_fastpath rid = "20230101T000000Z_integtest_IT_1_n1_integtest0000000" msmt_payload = { @@ -149,13 +149,13 @@ async def test_fastpath_fallback(client_with_mocked_fastpath): @pytest.mark.asyncio -async def test_fastpath_payload_has_report_id(client_with_two_working_fastpaths): +async def test_fastpath_payload_has_report_id(client_with_mocked_fastpath): """ The body forwarded to the fastpath must include a freshly generated `report_id` derived from the measurement body's metadata, regardless of the (ignored) `report_id` in the URL path. """ - client, mock_fastpath, first_url, _ = client_with_two_working_fastpaths + client, mock_fastpath, fastpath_url = client_with_mocked_fastpath og_rid = "ignored-by-the-server" msmt_payload = { @@ -175,7 +175,7 @@ async def test_fastpath_payload_has_report_id(client_with_two_working_fastpaths) msmt_uid = resp.json().get("measurement_uid") assert msmt_uid - expected_url = f"{first_url}/{msmt_uid}" + expected_url = f"{fastpath_url}/{msmt_uid}" assert expected_url in mock_fastpath.uploads, mock_fastpath.uploads stored = ujson.loads(mock_fastpath.uploads[expected_url]) diff --git a/ooniapi/services/ooniprobe/tests/test_anoncred.py b/ooniapi/services/ooniprobe/tests/test_anoncred.py index 54b0d8c6a..4b6d3ec08 100644 --- a/ooniapi/services/ooniprobe/tests/test_anoncred.py +++ b/ooniapi/services/ooniprobe/tests/test_anoncred.py @@ -1,3 +1,4 @@ +import re from typing import Any, Dict import ooniauth_py import pytest @@ -95,12 +96,12 @@ async def test_submission_basic(client): @pytest.mark.asyncio -async def test_fastpath_fallback(client_with_mocked_fastpath): +async def test_fastpath_fallback(client_with_one_good_mocked_fastpath): """ When the first fastpath URL fails, the second one in the list should still receive a verified anonymous-credentials measurement. """ - client, mock_fastpath, success_url = client_with_mocked_fastpath + client, mock_fastpath, success_url = client_with_one_good_mocked_fastpath # build a verifiable submission user, manifest_version, _ = setup_user(client) @@ -116,20 +117,64 @@ async def test_fastpath_fallback(client_with_mocked_fastpath): msmt_uid = c["measurement_uid"] assert msmt_uid, c - # Verified anonymous-credential submissions inject "t" as the - # `is_verified` flag in the stored payload before hashing. - expected_hash = get_msmt_hash(msm, is_verified="t") - assert msmt_uid.endswith(f"_IE_webconnectivity_{expected_hash}"), msmt_uid - - # Check response bytes expected_url = f"{success_url}/{msmt_uid}" assert list(mock_fastpath.uploads.keys()) == [expected_url] + # Verified anonymous-credential submissions inject "t" as the + # `is_verified` flag in the stored payload before hashing. The bytes the + # server hashes also include a freshly generated random `report_id`, so we + # compute the expected hash from the stored body. stored = ujson.loads(mock_fastpath.uploads[expected_url]) - assert get_msmt_hash(stored, is_verified="t") == expected_hash + expected_hash = get_msmt_hash(stored, is_verified="t") + assert msmt_uid.endswith(f"_IE_webconnectivity_{expected_hash}"), msmt_uid assert stored["is_verified"] == "t" +@pytest.mark.asyncio +async def test_fastpath_payload_has_report_id(client_with_mocked_fastpath): + """ + The body forwarded to the fastpath must always include a freshly + generated `report_id`, even if not provided by the client + """ + client, mock_fastpath, fastpath_url = client_with_mocked_fastpath + + # 1) Verified anonymous-credentials submission + user, manifest_version, _ = setup_user(client) + submit_request = make_submit_request(user, "IE", "AS34245") + msm_verified = make_measurement( + submit_request.nym, submit_request.request, manifest_version + ) + c = postj(client, "/api/v1/submit_measurement", msm_verified) + assert c["verification_status"] == "verified", c + verified_uid = c["measurement_uid"] + + # 2) Unverified submission (missing anoncred fields) + msm_unverified = { + "format": "json", + "content": { + "test_name": "web_connectivity", + "probe_asn": "AS34245", + "probe_cc": "IE", + "test_start_time": "2020-09-09 14:11:11", + }, + } + c = postj(client, "/api/v1/submit_measurement", msm_unverified) + assert c["verification_status"] == "unverified", c + unverified_uid = c["measurement_uid"] + + # ____ni_ + rid_re = re.compile( + r"\d{8}T\d{6}Z_webconnectivity_IE_34245_n1_[A-Za-z0-9oo]{16}" + ) + for uid in (verified_uid, unverified_uid): + url = f"{fastpath_url}/{uid}" + assert url in mock_fastpath.uploads, mock_fastpath.uploads + stored = ujson.loads(mock_fastpath.uploads[url]) + rid = stored.get("report_id") + assert isinstance(rid, str) and rid, stored + assert rid_re.fullmatch(rid), rid + + @pytest.mark.asyncio async def test_fastpath_only_submits_once_on_success(client_with_two_working_fastpaths): """ @@ -148,10 +193,6 @@ async def test_fastpath_only_submits_once_on_success(client_with_two_working_fas msmt_uid = c["measurement_uid"] assert msmt_uid, c - # Sanity-check the bytes that were forwarded to the fastpath - expected_hash = get_msmt_hash(msm, is_verified="t") - assert msmt_uid.endswith(f"_IE_webconnectivity_{expected_hash}"), msmt_uid - # Only the first fastpath URL should have received the measurement expected_url = f"{first_url}/{msmt_uid}" assert list(mock_fastpath.uploads.keys()) == [expected_url], ( @@ -159,6 +200,11 @@ async def test_fastpath_only_submits_once_on_success(client_with_two_working_fas f"got {list(mock_fastpath.uploads.keys())}" ) + # Sanity-check the bytes that were forwarded to the fastpath + stored = ujson.loads(mock_fastpath.uploads[expected_url]) + expected_hash = get_msmt_hash(stored, is_verified="t") + assert msmt_uid.endswith(f"_IE_webconnectivity_{expected_hash}"), msmt_uid + @pytest.mark.asyncio async def test_submission_non_verified(client): From 0582837d443dd5a974fd12765753f18b7b53c581 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Fri, 8 May 2026 16:25:58 +0200 Subject: [PATCH 12/22] Add report_id to the content key instead of root of json --- ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py | 2 +- .../ooniprobe/src/ooniprobe/routers/v1/probe_services.py | 2 +- ooniapi/services/ooniprobe/tests/integ/test_reports.py | 2 +- ooniapi/services/ooniprobe/tests/test_anoncred.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py index f1cc5d015..da18bc84f 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py @@ -249,7 +249,7 @@ def _process_measurement_body( metadata = metadata_from_measurement_content(content) json["is_verified"] = "u" - json["report_id"] = generate_report_id( + json["content"]["report_id"] = generate_report_id( metadata.test_name, settings, metadata.probe_cc, diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py index 824c599ae..34c944acd 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py @@ -923,7 +923,7 @@ async def submit_measurement( # Add verification-related data. # use one-letter code for DB, human readable for clients data["is_verified"] = verification_status.code - data["report_id"] = rid + data["content"]["report_id"] = rid data_buff = io.BytesIO() stream = io.TextIOWrapper(data_buff, "utf-8") ujson.dump(data, stream) diff --git a/ooniapi/services/ooniprobe/tests/integ/test_reports.py b/ooniapi/services/ooniprobe/tests/integ/test_reports.py index 544bd8365..c77c0e509 100644 --- a/ooniapi/services/ooniprobe/tests/integ/test_reports.py +++ b/ooniapi/services/ooniprobe/tests/integ/test_reports.py @@ -179,7 +179,7 @@ async def test_fastpath_payload_has_report_id(client_with_mocked_fastpath): assert expected_url in mock_fastpath.uploads, mock_fastpath.uploads stored = ujson.loads(mock_fastpath.uploads[expected_url]) - rid = stored.get("report_id") + rid = stored.get("content", {}).get("report_id") assert isinstance(rid, str) and rid, stored assert rid != og_rid # collector_id is "1" in test_settings; report id format: diff --git a/ooniapi/services/ooniprobe/tests/test_anoncred.py b/ooniapi/services/ooniprobe/tests/test_anoncred.py index 4b6d3ec08..b24e662ab 100644 --- a/ooniapi/services/ooniprobe/tests/test_anoncred.py +++ b/ooniapi/services/ooniprobe/tests/test_anoncred.py @@ -170,7 +170,7 @@ async def test_fastpath_payload_has_report_id(client_with_mocked_fastpath): url = f"{fastpath_url}/{uid}" assert url in mock_fastpath.uploads, mock_fastpath.uploads stored = ujson.loads(mock_fastpath.uploads[url]) - rid = stored.get("report_id") + rid = stored.get("content", {}).get("report_id") assert isinstance(rid, str) and rid, stored assert rid_re.fullmatch(rid), rid From 2e9739ddab625a8642230f9007f3d26ba73a6231 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 14 May 2026 10:59:42 +0200 Subject: [PATCH 13/22] Refactor validation into check function --- .../services/ooniprobe/src/ooniprobe/utils.py | 184 ++++++++++-------- 1 file changed, 101 insertions(+), 83 deletions(-) diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/utils.py b/ooniapi/services/ooniprobe/src/ooniprobe/utils.py index e7a281111..d712a00d3 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/utils.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/utils.py @@ -4,18 +4,18 @@ Insert VPN credentials into database. """ -from dataclasses import dataclass import io import itertools import logging -import ujson from base64 import b64encode +from dataclasses import dataclass from datetime import datetime, timezone from os import urandom from typing import Any, Dict, List, Tuple, TypedDict -import requests import pem +import requests +import ujson from fastapi import HTTPException, Request from mypy_boto3_s3 import S3Client from sqlalchemy.orm import Session @@ -150,122 +150,100 @@ def metadata_from_measurement_content(content: dict[str, Any]) -> MeasurementMet Parses metadata from the `content` key in a measurement body, then formats fields the same way as `generate_report_id` / `open_report` do. """ - test_name = str(content.get("test_name") or "").lower().replace("_", "") - - cc = str(content.get("probe_cc") or "").upper().replace("_", "") - if len(cc) != 2: - cc = "ZZ" - - raw_asn = str(content.get("probe_asn") or "AS0").upper() - if len(raw_asn) > 12 or len(raw_asn) < 3 or not raw_asn.startswith("AS"): - raw_asn = "AS0" annotations = content.get("annotations", {}) if not isinstance(annotations, dict): annotations = {} return MeasurementMetadata( - test_name=test_name, - probe_cc=cc, - probe_asn=raw_asn, - platform=str(annotations.get("platform") or ""), - software_name=str(content.get("software_name") or ""), - software_version=str(content.get("software_version") or ""), + test_name=content.get("test_name", ""), + probe_cc=content.get("probe_cc", ""), + probe_asn=content.get("probe_asn", ""), + platform=annotations.get("platform", ""), + software_name=content.get("software_name", ""), + software_version=content.get("software_version", ""), ) - -def extract_probe_ipaddr(request: Request) -> str: - - real_ip_headers = ["X-Forwarded-For", "X-Real-IP"] - - for h in real_ip_headers: - if h in request.headers: - return get_first_ip(request.headers.getlist(h)[0]) - - return request.client.host if request.client else "" - - -def geolookup_probe(ipaddr: str, asn_cc_reader: ASNCCReaderDep) -> Tuple[str, str, str]: - entry = asn_cc_reader.get(ipaddr) - try: - cc = entry['country']['iso_code'] - asn = entry['autonomous_system_number'] - as_org = entry.get('autonomous_system_organization', "0") - return (cc, f"AS{asn}", as_org) - except KeyError: - raise AddressNotFoundError - except Exception as e: - log.error(f"Error looking up {ipaddr}: {e}") - raise AddressNotFoundError - - -def error(msg: str | Dict[str, Any], status_code: int = 400): - raise HTTPException(status_code=status_code, detail=msg) - - -def normalize_asn(asn: str) -> int: - """ - Return ASN as int (strip 'AS' prefix if present). Invalid values return 0. - """ - s = str(asn).strip().upper() - s = s[2:] if s.startswith("AS") else s - try: - return int(s) - except (ValueError, TypeError) as e: - log.error(f"Invalid asn: {e}") - return 0 - - def check_measurement_meta( test_name: str, probe_cc: str, probe_asn: str, -) -> None: +): """ Checks metadata consistency, raising an HTTPException and stopping - ingestion when an inconsistency is detected + ingestion when an inconsistency is detected. + + This metadata is expected to come from the measurement body """ - asn_i = normalize_asn(probe_asn) - cc_ok = len(probe_cc) == 2 + cc_ok = ( + len(probe_cc) == 2 and + probe_cc.isupper() and + probe_cc.isalnum() + ) test_name_alnum_ok = test_name.isalnum() test_name_len_ok = 1 < len(test_name) < 30 - if not (cc_ok and test_name_alnum_ok and test_name_len_ok): + test_name_lower_ok = test_name.islower() + asn_starts_as_ok = probe_asn.startswith("AS") + asn_len_ok = len(probe_asn) >= 3 and len(probe_asn) <= 12 + + if not ( + cc_ok and test_name_alnum_ok and test_name_len_ok and + test_name_lower_ok and asn_starts_as_ok and asn_len_ok + ): + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="bad_metadata").inc() log.error( "Bad metadata in measurement body: test_name=" f"{test_name[:30]}, cc={probe_cc}" ) - reason: str | None = None + reasons = [] if not cc_ok: - Metrics.BAD_MEASUREMENTS_CNT.labels(reason="bad_cc").inc() - reason = reason or "bad_cc" + reasons.append("bad_cc") if not test_name_alnum_ok: - Metrics.BAD_MEASUREMENTS_CNT.labels(reason="tn_not_alnum").inc() - reason = reason or "tn_not_alnum" + reasons.append("tn_not_alnum") if not test_name_len_ok: - Metrics.BAD_MEASUREMENTS_CNT.labels(reason="tn_len").inc() - reason = reason or "tn_len" - assert reason is not None + reasons.append("tn_len") + if not test_name_lower_ok: + reasons.append("tn_no_lower") + if not asn_len_ok: + reasons.append("asn_len") + if not asn_starts_as_ok: + reasons.append("asn_no_as_prefix") + raise HTTPException( status_code=400, detail={ - "error": reason, - "message": "Incorrect format", + "error": "bad_metadata", + "message": f"Errors: {reasons}" }, ) - if asn_i == 0: - log.info("Discarding ASN == 0") - Metrics.BAD_MEASUREMENTS_CNT.labels(reason="asn_0").inc() - Metrics.MSMNT_DISCARD_ASN0.inc() + error = None + asn = str(probe_asn).strip().upper().lstrip("AS") + try: + asn_i = int(asn) + if asn_i == 0: + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="asn_0").inc() + Metrics.MSMNT_DISCARD_ASN0.inc() + log.info("Discarding ASN == 0") + error = "asn_0" + message = "Measurement discarded, ASN == 0" + except Exception as e: + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="asn_invalid").inc() + log.info(f"Discarding ASN == {probe_asn}, error: {e}") + error = "asn_invalid" + message = f"Measurement discarded, ASN == {probe_asn}" + + if error: raise HTTPException( 400, detail={ - "error": "asn_0", - "message": "Measurement discarded, ASN == 0", - }, + "error": error, + "message" : message + } ) + # Check probe_cc for ZZ cases if probe_cc == "ZZ": log.info("Discarding CC == ZZ") Metrics.BAD_MEASUREMENTS_CNT.labels(reason="cc_zz").inc() @@ -278,6 +256,46 @@ def check_measurement_meta( }, ) +def extract_probe_ipaddr(request: Request) -> str: + + real_ip_headers = ["X-Forwarded-For", "X-Real-IP"] + + for h in real_ip_headers: + if h in request.headers: + return get_first_ip(request.headers.getlist(h)[0]) + + return request.client.host if request.client else "" + + +def geolookup_probe(ipaddr: str, asn_cc_reader: ASNCCReaderDep) -> Tuple[str, str, str]: + entry = asn_cc_reader.get(ipaddr) + try: + cc = entry['country']['iso_code'] + asn = entry['autonomous_system_number'] + as_org = entry.get('autonomous_system_organization', "0") + return (cc, f"AS{asn}", as_org) + except KeyError: + raise AddressNotFoundError + except Exception as e: + log.error(f"Error looking up {ipaddr}: {e}") + raise AddressNotFoundError + + +def error(msg: str | Dict[str, Any], status_code: int = 400): + raise HTTPException(status_code=status_code, detail=msg) + + +def normalize_asn(asn: str) -> int: + """ + Return ASN as int (strip 'AS' prefix if present). Invalid values return 0. + """ + s = str(asn).strip().upper() + s = s[2:] if s.startswith("as") else s + try: + return int(s) + except (ValueError, TypeError) as e: + log.error(f"Invalid asn: {e}") + return 0 def get_cc_asn( request: Request, asn_cc_reader: ASNCCReaderDep From 18431549d4a43c269fee8dfd684d53f94ff1aa47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 14 May 2026 11:46:33 +0200 Subject: [PATCH 14/22] Add report_id to submission result --- .../src/ooniprobe/routers/v1/probe_services.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py index 34c944acd..16028bbe5 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py @@ -1,4 +1,4 @@ -import asyncio +from sqlalchemy import desc import io import logging import random @@ -873,6 +873,12 @@ class SubmitMeasurementResponse(BaseModel): default=None, ) + report_id: str = Field( + description="Report ID generated by the server. Note that this is " + "unique to every measurement, so it's not useful for grouping " + "measurements that were taken together" + ) + @router.post("/submit_measurement", tags=["anonymous_credentials"]) async def submit_measurement( @@ -995,6 +1001,7 @@ async def submit_measurement( verification_status=verification_status, submit_response=submit_response, error=submit_error, + report_id = rid ) Metrics.SEND_FASTPATH_CNT.labels(status="fail").inc() @@ -1021,6 +1028,7 @@ async def submit_measurement( verification_status=verification_status, submit_response=submit_response, error=submit_error or "submission_delivery_failed", + report_id = rid ) def _verify_submit( From 22bc4f44ef712ce7907be6ee7a6931ffb211ec35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 14 May 2026 12:30:23 +0200 Subject: [PATCH 15/22] Move reformating to function constructing msm uid --- .../services/ooniprobe/src/ooniprobe/routers/reports.py | 1 + .../ooniprobe/src/ooniprobe/routers/v1/probe_services.py | 1 + ooniapi/services/ooniprobe/src/ooniprobe/utils.py | 8 ++------ 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py index da18bc84f..87db6a9e6 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py @@ -157,6 +157,7 @@ async def receive_measurement( ts = now.strftime("%Y%m%d%H%M%S.%f") # msmt_uid is a unique id based on upload time, cc, testname and hash + test_name = test_name.replace("_", "") msmt_uid = f"{ts}_{cc}_{test_name}_{h}" Metrics.MSMNT_RECEIVED_CNT.inc() diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py index 16028bbe5..7c789284a 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py @@ -944,6 +944,7 @@ async def submit_measurement( ts = now.strftime("%Y%m%d%H%M%S.%f") # msmt_uid is a unique id based on upload time, cc, testname and hash + test_name = test_name.replace("_","") msmt_uid = f"{ts}_{cc}_{test_name}_{h}" Metrics.MSMNT_RECEIVED_CNT.inc() diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/utils.py b/ooniapi/services/ooniprobe/src/ooniprobe/utils.py index d712a00d3..488987064 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/utils.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/utils.py @@ -181,14 +181,13 @@ def check_measurement_meta( probe_cc.isupper() and probe_cc.isalnum() ) - test_name_alnum_ok = test_name.isalnum() test_name_len_ok = 1 < len(test_name) < 30 test_name_lower_ok = test_name.islower() asn_starts_as_ok = probe_asn.startswith("AS") asn_len_ok = len(probe_asn) >= 3 and len(probe_asn) <= 12 if not ( - cc_ok and test_name_alnum_ok and test_name_len_ok and + cc_ok and test_name_len_ok and test_name_lower_ok and asn_starts_as_ok and asn_len_ok ): Metrics.BAD_MEASUREMENTS_CNT.labels(reason="bad_metadata").inc() @@ -199,8 +198,6 @@ def check_measurement_meta( reasons = [] if not cc_ok: reasons.append("bad_cc") - if not test_name_alnum_ok: - reasons.append("tn_not_alnum") if not test_name_len_ok: reasons.append("tn_len") if not test_name_lower_ok: @@ -289,8 +286,7 @@ def normalize_asn(asn: str) -> int: """ Return ASN as int (strip 'AS' prefix if present). Invalid values return 0. """ - s = str(asn).strip().upper() - s = s[2:] if s.startswith("as") else s + s = str(asn).strip().upper().lstrip("AS") try: return int(s) except (ValueError, TypeError) as e: From b383f67790989cc33ad668381d1142b2b8f6b146 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 14 May 2026 13:59:13 +0200 Subject: [PATCH 16/22] Restore report_id parsing; add consistency check between report_id and body --- .../src/ooniprobe/routers/reports.py | 68 +++++++++++++++---- 1 file changed, 55 insertions(+), 13 deletions(-) diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py index 87db6a9e6..e7e9ff900 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py @@ -119,6 +119,35 @@ async def receive_measurement( """ setnocacheresponse(response) empty_measurement = {} + try: + rid_timestamp, test_name, cc, asn, format_cid, rand = report_id.split("_") + except Exception as e: + log.info( + f"Unexpected report_id {report_id[:200]}. Error: {e}", + ) + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="bad_report_id").inc() + error("Incorrect format") + + good = len(cc) == 2 and test_name.isalnum() and 1 < len(test_name) < 30 + if not good: + log.info("Unexpected report_id %r", report_id[:200]) + error("Incorrect format") + try: + asn_i = int(asn) + except ValueError as e: + log.info(f"ASN value not parsable {asn}. Error: {e}") + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="bad_asn").inc() + error("Incorrect format") + + if asn_i == 0: + log.info("Discarding ASN == 0") + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="asn_0").inc() + return empty_measurement + + if cc.upper() == "ZZ": + log.info("Discarding CC == ZZ") + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="cc_zz").inc() + return empty_measurement with Metrics.READ_BODY_TIMING.time(): data = await request.body() @@ -135,7 +164,7 @@ async def receive_measurement( try: data, metadata = await run_in_threadpool( - _process_measurement_body, data, settings + _process_measurement_body, data ) except Exception as e: log.info("Failed to parse and modify measurement body") @@ -143,10 +172,9 @@ async def receive_measurement( Metrics.BAD_MEASUREMENTS_CNT.labels(reason="bad_json").inc() error("Incorrect format") - test_name = metadata.test_name - cc = metadata.probe_cc - asn = metadata.probe_asn - + # Raise an exception in case the report_id is not consistent with the body, + # we flag this behavior as faulty data + _compare_report_id_to_body_meta(cc, asn, test_name, metadata) check_measurement_meta(test_name, cc, asn) # Write the whole body of the measurement in a directory based on a 1-hour @@ -229,13 +257,12 @@ async def receive_measurement( return empty_measurement def _process_measurement_body( - data: bytes, settings: Settings + data: bytes ) -> Tuple[bytes, MeasurementMetadata]: """ - Parse the measurement body - extract some metadata fields - set `is_verified="u"` - - set report_id - re-serialize. """ @@ -250,16 +277,31 @@ def _process_measurement_body( metadata = metadata_from_measurement_content(content) json["is_verified"] = "u" - json["content"]["report_id"] = generate_report_id( - metadata.test_name, - settings, - metadata.probe_cc, - normalize_asn(metadata.probe_asn) - ) with Metrics.SERIALIZE_BODY_TIMING.time(): return ujson.dumps(json).encode("utf-8"), metadata +def _compare_report_id_to_body_meta(cc: str, asn: str, test_name: str, metadata: MeasurementMetadata): + """ + Compare the metadata reported by a report_id against the metadata in a + measurement body + + Raise HTTPException on errors + """ + if cc.upper() != metadata.probe_cc.upper(): + log.info(f"CC mismatch: {cc} vs {metadata}") + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="cc_mismatch").inc() + error("Inconsistent measurement") + + if asn.upper().lstrip("AS") != metadata.probe_asn.upper().lstrip("AS"): + log.info(f"ASN mismatch: {asn} vs {metadata.probe_asn}") + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="asn_mismatch").inc() + error("Inconsistent measurement") + + if test_name != metadata.test_name.replace("_",""): + log.info(f"Test name mismatch: {test_name} vs {metadata.test_name}") + Metrics.BAD_MEASUREMENTS_CNT.labels(reason="test_name_mismatch").inc() + error("Inconsistent measurement") def _check_and_register_geoip_anomaly( request: Request, From 45e46d9da2cfcec3d7a68476042dcaf18d665edb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 14 May 2026 16:07:08 +0200 Subject: [PATCH 17/22] Update tests --- .../src/ooniprobe/routers/reports.py | 5 +- .../services/ooniprobe/src/ooniprobe/utils.py | 6 +- .../ooniprobe/tests/integ/test_reports.py | 73 ++++--------------- 3 files changed, 21 insertions(+), 63 deletions(-) diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py index e7e9ff900..9d631735b 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py @@ -175,7 +175,7 @@ async def receive_measurement( # Raise an exception in case the report_id is not consistent with the body, # we flag this behavior as faulty data _compare_report_id_to_body_meta(cc, asn, test_name, metadata) - check_measurement_meta(test_name, cc, asn) + check_measurement_meta(metadata.test_name, metadata.probe_cc, metadata.probe_asn) # Write the whole body of the measurement in a directory based on a 1-hour # time window @@ -185,7 +185,6 @@ async def receive_measurement( ts = now.strftime("%Y%m%d%H%M%S.%f") # msmt_uid is a unique id based on upload time, cc, testname and hash - test_name = test_name.replace("_", "") msmt_uid = f"{ts}_{cc}_{test_name}_{h}" Metrics.MSMNT_RECEIVED_CNT.inc() @@ -217,7 +216,7 @@ async def receive_measurement( if success: # Geoip anomaly detection runs only when the measurement was successfully - # submitted to the fastpath, so retries don't cause duplicate anomaly entries. + # submitted to the fastpath with Metrics.COMPARE_CC_TIMING.time(): try: await run_in_threadpool( diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/utils.py b/ooniapi/services/ooniprobe/src/ooniprobe/utils.py index 488987064..8a32ae0c5 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/utils.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/utils.py @@ -137,9 +137,9 @@ def generate_report_id(test_name, settings: Settings, cc: str, asn_i: int) -> st class MeasurementMetadata: """Metadata extracted from a measurement body's `content` object.""" - test_name: str # lowercase, underscores stripped (matches report id segment) - probe_cc: str # uppercase, 2-letter or "ZZ" when unknown - probe_asn: str # canonical "AS..." string or "AS0" + test_name: str + probe_cc: str + probe_asn: str platform: str software_name: str software_version: str diff --git a/ooniapi/services/ooniprobe/tests/integ/test_reports.py b/ooniapi/services/ooniprobe/tests/integ/test_reports.py index c77c0e509..7c7f2a605 100644 --- a/ooniapi/services/ooniprobe/tests/integ/test_reports.py +++ b/ooniapi/services/ooniprobe/tests/integ/test_reports.py @@ -1,5 +1,4 @@ import json -import re import zstd import pytest import ujson @@ -44,8 +43,6 @@ async def test_collector_open_report(client): @pytest.mark.asyncio async def test_collector_upload_msmt_bogus(client): - # The path component is ignored, but a body without valid probe_cc / - # probe_asn / test_name still has to be rejected. j = dict(format="json", content=dict(test_keys={})) resp = client.post("/report/bogus", json=j) assert resp.status_code == 400, resp @@ -93,7 +90,7 @@ async def test_collector_upload_msmt_valid(client): @pytest.mark.asyncio async def test_collector_upload_msmt_valid_zstd(client): - rid = "ignored-by-the-server" + rid = "20230101T000000Z_integtest_IT_1_n1_integtest0000000" msmt_payload = { "format": "json", "content": { @@ -124,12 +121,12 @@ async def test_fastpath_fallback(client_with_one_good_mocked_fastpath): "format": "json", "content": { "test_keys": {}, - "annotations": {"platform": "test_platform"}, - "software_name": "test_software", - "software_version": "0.0.0", "probe_cc": "IT", "probe_asn": "AS1", "test_name": "integtest", + "annotations": {"platform": "test_platform"}, + "software_name": "test_software", + "software_version": "0.0.0", }, } resp = client.post(f"/report/{rid}", json=msmt_payload) @@ -139,56 +136,17 @@ async def test_fastpath_fallback(client_with_one_good_mocked_fastpath): msmt_uid = body["measurement_uid"] assert msmt_uid, body + expected_hash = get_msmt_hash(msmt_payload) + assert msmt_uid.endswith(f"_IT_integtest_{expected_hash}"), msmt_uid + expected_url = f"{success_url}/{msmt_uid}" assert list(mock_fastpath.uploads.keys()) == [expected_url] stored = ujson.loads(mock_fastpath.uploads[expected_url]) - expected_hash = get_msmt_hash(stored) - assert msmt_uid.endswith(f"_IT_integtest_{expected_hash}"), msmt_uid + assert get_msmt_hash(stored) == expected_hash assert stored["is_verified"] == "u" -@pytest.mark.asyncio -async def test_fastpath_payload_has_report_id(client_with_mocked_fastpath): - """ - The body forwarded to the fastpath must include a freshly generated - `report_id` derived from the measurement body's metadata, regardless of - the (ignored) `report_id` in the URL path. - """ - client, mock_fastpath, fastpath_url = client_with_mocked_fastpath - - og_rid = "ignored-by-the-server" - msmt_payload = { - "format": "json", - "content": { - "test_keys": {}, - "annotations": {"platform": "test_platform"}, - "software_name": "test_software", - "software_version": "0.0.0", - "probe_cc": "IT", - "probe_asn": "AS1", - "test_name": "integtest", - }, - } - resp = client.post(f"/report/{og_rid}", json=msmt_payload) - assert resp.status_code == 200, resp.text - msmt_uid = resp.json().get("measurement_uid") - assert msmt_uid - - expected_url = f"{fastpath_url}/{msmt_uid}" - assert expected_url in mock_fastpath.uploads, mock_fastpath.uploads - - stored = ujson.loads(mock_fastpath.uploads[expected_url]) - rid = stored.get("content", {}).get("report_id") - assert isinstance(rid, str) and rid, stored - assert rid != og_rid - # collector_id is "1" in test_settings; report id format: - # ____n_ - assert re.fullmatch( - r"\d{8}T\d{6}Z_integtest_IT_1_n1_[A-Za-z0-9oo]{16}", rid - ), rid - - @pytest.mark.asyncio async def test_fastpath_only_submits_once_on_success(client_with_two_working_fastpaths): """ @@ -201,12 +159,12 @@ async def test_fastpath_only_submits_once_on_success(client_with_two_working_fas "format": "json", "content": { "test_keys": {}, - "annotations": {"platform": "test_platform"}, - "software_name": "test_software", - "software_version": "0.0.0", "probe_cc": "IT", "probe_asn": "AS1", "test_name": "integtest", + "annotations": {"platform": "test_platform"}, + "software_name": "test_software", + "software_version": "0.0.0", }, } resp = client.post(f"/report/{rid}", json=msmt_payload) @@ -215,14 +173,15 @@ async def test_fastpath_only_submits_once_on_success(client_with_two_working_fas msmt_uid = body.get("measurement_uid") assert msmt_uid, body + expected_hash = get_msmt_hash(msmt_payload) + assert msmt_uid.endswith(f"_IT_integtest_{expected_hash}"), msmt_uid + expected_url = f"{first_url}/{msmt_uid}" uploaded_urls = list(mock_fastpath.uploads.keys()) - assert uploaded_urls == [expected_url], ( + assert uploaded_urls == [expected_url], ( "measurement should be forwarded to the first fastpath URL only, " f"got {uploaded_urls}" ) - # Sanity-check the bytes that were forwarded to the fastpath stored = ujson.loads(mock_fastpath.uploads[expected_url]) - expected_hash = get_msmt_hash(stored) - assert msmt_uid.endswith(f"_IT_integtest_{expected_hash}"), msmt_uid + assert get_msmt_hash(stored) == expected_hash From acbf2ba715da56f271dc9ea9aee83d8477bd11e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 14 May 2026 16:23:24 +0200 Subject: [PATCH 18/22] Add tests to check metadata consistency against report_id --- .../ooniprobe/tests/integ/test_reports.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/ooniapi/services/ooniprobe/tests/integ/test_reports.py b/ooniapi/services/ooniprobe/tests/integ/test_reports.py index 7c7f2a605..4cbddb3a8 100644 --- a/ooniapi/services/ooniprobe/tests/integ/test_reports.py +++ b/ooniapi/services/ooniprobe/tests/integ/test_reports.py @@ -1,4 +1,5 @@ import json +import re import zstd import pytest import ujson @@ -88,6 +89,35 @@ async def test_collector_upload_msmt_valid(client): assert c == {}, c +@pytest.mark.parametrize( + "content_overrides", + [ + {"probe_cc": "DE"}, + {"probe_asn": "AS2"}, + {"test_name": "ndt"}, + ], +) +@pytest.mark.asyncio +async def test_collector_upload_msmt_rejects_body_path_metadata_mismatch( + client, content_overrides +): + """_compare_report_id_to_body_meta must reject uploads when report_id and body disagree""" + rid = "20230101T000000Z_integtest_IT_1_n1_integtest0000000" + msmt_payload = { + "format": "json", + "content": { + "test_keys": {}, + "probe_cc": "IT", + "probe_asn": "AS1", + "test_name": "integtest", + **content_overrides, + }, + } + resp = client.post(f"/report/{rid}", json=msmt_payload) + assert resp.status_code == 400, resp.json() + assert resp.json()["detail"] == "Inconsistent measurement" + + @pytest.mark.asyncio async def test_collector_upload_msmt_valid_zstd(client): rid = "20230101T000000Z_integtest_IT_1_n1_integtest0000000" From 72c2b814583274ed00d3c81fdb64e81066828c31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 14 May 2026 16:33:10 +0200 Subject: [PATCH 19/22] Remove outdated comment --- ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py index 9d631735b..e9440e386 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py @@ -112,10 +112,6 @@ async def receive_measurement( ) -> ReceiveMeasurementResponse | Dict[str, Any]: """ Submit measurement. - - The `report_id` path parameter is accepted for URL backwards - compatibility but is ignored: metadata used to identify - and validate the measurement is read from the body """ setnocacheresponse(response) empty_measurement = {} From 41d6bff7803bf12465ee7d898fed36162231ee79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 14 May 2026 16:33:42 +0200 Subject: [PATCH 20/22] Remove uneccesary identation --- ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py index e9440e386..74fe5f73f 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py @@ -116,7 +116,7 @@ async def receive_measurement( setnocacheresponse(response) empty_measurement = {} try: - rid_timestamp, test_name, cc, asn, format_cid, rand = report_id.split("_") + rid_timestamp, test_name, cc, asn, format_cid, rand = report_id.split("_") except Exception as e: log.info( f"Unexpected report_id {report_id[:200]}. Error: {e}", From 8d7dba6ff214f8f5de57bfce74b08ca2d29edaa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Thu, 14 May 2026 16:38:46 +0200 Subject: [PATCH 21/22] Improve test name and docs --- ooniapi/services/ooniprobe/tests/integ/test_reports.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ooniapi/services/ooniprobe/tests/integ/test_reports.py b/ooniapi/services/ooniprobe/tests/integ/test_reports.py index 4cbddb3a8..0e0fdee30 100644 --- a/ooniapi/services/ooniprobe/tests/integ/test_reports.py +++ b/ooniapi/services/ooniprobe/tests/integ/test_reports.py @@ -98,10 +98,10 @@ async def test_collector_upload_msmt_valid(client): ], ) @pytest.mark.asyncio -async def test_collector_upload_msmt_rejects_body_path_metadata_mismatch( +async def test_rejects_body_report_id_mismatch( client, content_overrides ): - """_compare_report_id_to_body_meta must reject uploads when report_id and body disagree""" + """Measurement upload must reject when report_id and body disagree""" rid = "20230101T000000Z_integtest_IT_1_n1_integtest0000000" msmt_payload = { "format": "json", From 0d2a8e2254f0b5550a26c22e88cb53193a391d5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Tue, 19 May 2026 12:34:36 +0200 Subject: [PATCH 22/22] Remove redundant test_name reformat --- .../ooniprobe/src/ooniprobe/routers/v1/probe_services.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py index 7c789284a..364f5e9a9 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py @@ -1009,7 +1009,6 @@ async def submit_measurement( # wasn't possible to send msmnt to fastpath, try to send it to s3 ts_prefix = now.strftime("%Y%m%d%H") - tn = test_name.replace("_", "") s3_key = f"postcans/{ts_prefix}/{ts_prefix}_{cc}_{tn}/{msmt_uid}.post" try: await run_in_threadpool(