compdemocracy · jucor · Mar 30, 2026
diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
@@ -91,9 +91,6 @@ jobs:
           -e AWS_REGION=us-east-1 \
           -e AWS_ACCESS_KEY_ID=dummy \
           -e AWS_SECRET_ACCESS_KEY=dummy \
-          -e POSTGRES_HOST=postgres \
-          -e POSTGRES_PASSWORD=PdwPNS2mDN73Vfbc \
-          -e POSTGRES_DB=polis-test \
           -e SKIP_GOLDEN=1 \
           delphi \
           bash -c " \

diff --git a/delphi/polismath/database/postgres.py b/delphi/polismath/database/postgres.py
@@ -283,6 +283,8 @@ def initialize(self) -> None:
                 pool_size=self.config.pool_size,
                 max_overflow=self.config.max_overflow,
                 pool_recycle=300,  # Recycle connections after 5 minutes
+                connect_args={"connect_timeout": 5},
+                pool_pre_ping=True,
             )
 
             # Create session factory

diff --git a/delphi/polismath/regression/comparer.py b/delphi/polismath/regression/comparer.py
@@ -137,7 +137,7 @@ def compare_with_golden(
         results = {
             "dataset": dataset_name,
             "stages_compared": {},
-            "timing_stats_compared": {} if benchmark else None,
+            "timing_stats_compared": {},
             "overall_match": True,
             "metadata": golden["metadata"]
         }

diff --git a/delphi/polismath/regression/utils.py b/delphi/polismath/regression/utils.py
@@ -42,7 +42,7 @@ def compute_all_stages(
     votes_dict: Dict,
     fixed_timestamp: int,
     skip_intermediate_stages: bool = False,
-) -> Dict[str, Dict[str, Any]]:
+) -> Dict[str, Any]:
     """
     Compute all conversation stages with timing information.
 

diff --git a/delphi/polismath/run_math_pipeline.py b/delphi/polismath/run_math_pipeline.py
@@ -52,7 +52,7 @@ def connect_to_db():
         database_url = os.environ.get("DATABASE_URL")
         if database_url:
             logger.info(f"Using DATABASE_URL: {database_url.split('@')[1] if '@' in database_url else '(hidden)'}")
-            conn = psycopg2.connect(database_url)
+            conn = psycopg2.connect(database_url, connect_timeout=5)
         else:
             # Fall back to individual connection parameters
             conn = psycopg2.connect(
@@ -61,6 +61,7 @@ def connect_to_db():
                 password=os.environ.get("DATABASE_PASSWORD", ""),
                 host=os.environ.get("DATABASE_HOST", "localhost"),
                 port=os.environ.get("DATABASE_PORT", 5432),
+                connect_timeout=5,
             )
 
         logger.info("Connected to database successfully")

diff --git a/delphi/pyproject.toml b/delphi/pyproject.toml
@@ -68,6 +68,7 @@ dev = [
     "pytest-asyncio>=0.21.0",
     "pytest-cov>=4.0.0",
     "pytest-check>=2.0.0",
+    "pytest-timestamper>=0.0.10",  # Timestamps in -v mode to detect hung tests
     "httpx>=0.23.0",
     "pytest-xdist>=3.8.0",
     "moto>=4.1.0",

diff --git a/delphi/scripts/regression_download.py b/delphi/scripts/regression_download.py
@@ -71,7 +71,7 @@ def get_db_connection():
             "DATABASE_URL environment variable is not set. "
             "Please set it to a valid Postgres connection string (e.g., postgres://user:pass@host:port/dbname)"
         )
-    return psycopg2.connect(database_url)
+    return psycopg2.connect(database_url, connect_timeout=5)
 
     # TODO: Uncomment once sorted the difference between env var names between delphi and rest of polis
     ## Fallback to individual parameters

diff --git a/delphi/tests/conftest.py b/delphi/tests/conftest.py
@@ -5,6 +5,7 @@
 - Command line options --include-local and --datasets for dataset selection
 - Fixtures for accessing dataset information
 - @pytest.mark.use_discovered_datasets for dynamic dataset parametrization
+- require_dynamodb() and require_s3() helpers for failing fast when services are unavailable
 - Session-scoped conversation cache for efficient test execution
 """
 
@@ -22,6 +23,80 @@
 from tests.common_utils import load_votes, load_comments
 
 
+def require_dynamodb(
+    endpoint: str | None = None,
+    timeout: float = 3.0,
+) -> None:
+    """Fail the test immediately if DynamoDB is not responding.
+
+    Performs a ``list_tables`` call with short timeouts and zero retries
+    so the test fails in seconds rather than hanging indefinitely.
+    """
+    import os
+
+    import boto3
+    from botocore.config import Config
+
+    endpoint = endpoint or os.environ.get(
+        "DYNAMODB_ENDPOINT", "http://localhost:8000"
+    )
+    cfg = Config(
+        connect_timeout=timeout,
+        read_timeout=timeout,
+        retries={"max_attempts": 0},
+    )
+    client = boto3.client(
+        "dynamodb",
+        endpoint_url=endpoint,
+        region_name="us-east-1",
+        aws_access_key_id="dummy",
+        aws_secret_access_key="dummy",
+        config=cfg,
+    )
+    try:
+        client.list_tables(Limit=1)
+    except Exception as exc:
+        pytest.fail(f"DynamoDB is not available at {endpoint}: {exc}")
+
+
+def require_s3(
+    endpoint: str | None = None,
+    timeout: float = 3.0,
+) -> None:
+    """Skip the test if S3/MinIO is not responding.
+
+    Uses pytest.skip (not fail) because MinIO is a local dev service
+    that is not available in CI.
+    """
+    import os
+
+    import boto3
+    from botocore.config import Config
+
+    endpoint = endpoint or os.environ.get(
+        "AWS_S3_ENDPOINT", "http://host.docker.internal:9000"
+    )
+    cfg = Config(
+        connect_timeout=timeout,
+        read_timeout=timeout,
+        retries={"max_attempts": 0},
+        signature_version="s3v4",
+    )
+    client = boto3.client(
+        "s3",
+        endpoint_url=endpoint,
+        region_name="us-east-1",
+        aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID", "minioadmin"),
+        aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY", "minioadmin"),
+        config=cfg,
+        verify=False,
+    )
+    try:
+        client.list_buckets()
+    except Exception as exc:
+        pytest.skip(f"S3/MinIO is not available at {endpoint}: {exc}")
+
+
 # =============================================================================
 # Session-scoped Conversation Cache
 # =============================================================================

diff --git a/delphi/tests/profile_postgres_data.py b/delphi/tests/profile_postgres_data.py
@@ -37,7 +37,8 @@ def connect_to_db():
             dbname="polis_subset",
             user="christian",
             password="christian",
-            host="localhost"
+            host="localhost",
+            connect_timeout=5,
         )
         print("Connected to database successfully")
         return conn

diff --git a/delphi/tests/test_501_calculate_comment_extremity.py b/delphi/tests/test_501_calculate_comment_extremity.py
@@ -16,7 +16,7 @@
 def test_calculate_and_store_extremity_with_mocks():
     """
     Tests the main logic of calculate_and_store_extremity by mocking its dependencies.
-    - Mocks GroupDataProcessor to avoid database calls.
+    - Mocks GroupDataProcessor and PostgresClient to avoid database calls.
     - Mocks check_existing_extremity_values to force recalculation.
     - Verifies that the function correctly processes the mock output.
     """
@@ -29,12 +29,14 @@ def test_calculate_and_store_extremity_with_mocks():
             {'comment_id': 102, 'comment_extremity': 0.25},
             {'comment_id': 103, 'comment_extremity': 0.50},
             # A comment that might be missing the extremity value
-            {'comment_id': 104}, 
+            {'comment_id': 104},
         ]
     }
 
     # 2. Patch the dependencies within the script's namespace
-    with mock.patch.object(extremity_module, 'GroupDataProcessor') as MockGroupDataProcessor, \
+    # Also patch PostgresClient to avoid DB connection attempts when Docker is unavailable
+    with mock.patch.object(extremity_module, 'PostgresClient') as MockPostgresClient, \
+         mock.patch.object(extremity_module, 'GroupDataProcessor') as MockGroupDataProcessor, \
          mock.patch.object(extremity_module, 'check_existing_extremity_values', return_value={}) as mock_check_existing:
 
         # Configure the mock instance of GroupDataProcessor
@@ -68,14 +70,16 @@ def test_check_for_existing_values(monkeypatch):
     conversation_id = 54321
     existing_values = {201: 0.9, 202: 0.1}
 
-    # Patch the check function and the GroupDataProcessor class
-    with mock.patch.object(extremity_module, 'check_existing_extremity_values', return_value=existing_values) as mock_check_existing, \
+    # Patch PostgresClient, GroupDataProcessor and the check function
+    # PostgresClient must be mocked to avoid DB connection attempts when Docker is unavailable
+    with mock.patch.object(extremity_module, 'PostgresClient') as MockPostgresClient, \
+         mock.patch.object(extremity_module, 'check_existing_extremity_values', return_value=existing_values) as mock_check_existing, \
          mock.patch.object(extremity_module, 'GroupDataProcessor') as MockGroupDataProcessor:
-        
+
         # Configure the mock instance that the class will produce upon instantiation
         mock_processor_instance = mock.MagicMock()
         MockGroupDataProcessor.return_value = mock_processor_instance
-        
+
         # Call the function with force_recalculation=False
         result = calculate_and_store_extremity(conversation_id, force_recalculation=False)
 
@@ -84,9 +88,9 @@ def test_check_for_existing_values(monkeypatch):
 
     # Assert that the check for existing values was performed
     mock_check_existing.assert_called_once_with(conversation_id)
-    
+
     # Assert that GroupDataProcessor was instantiated (due to the script's structure)
     MockGroupDataProcessor.assert_called_once()
-    
+
     # Crucially, assert that the expensive calculation method was NOT called on the instance
     mock_processor_instance.get_export_data.assert_not_called()
diff --git a/delphi/tests/test_batch_id.py b/delphi/tests/test_batch_id.py
@@ -27,10 +27,12 @@ class TestBatchIdStorage:
     @pytest.fixture(scope="class")
     def dynamodb_resource(self):
         """Set up DynamoDB resource connection."""
+        from tests.conftest import require_dynamodb
+        require_dynamodb()
         logger.debug("Setting up DynamoDB resource connection")
         return boto3.resource(
             'dynamodb',
-            endpoint_url= os.environ.get('DYNAMODB_ENDPOINT', 'http://localhost:8000'),
+            endpoint_url=os.environ.get('DYNAMODB_ENDPOINT', 'http://localhost:8000'),
             region_name='us-east-1',
             aws_access_key_id='fakeMyKeyId',
             aws_secret_access_key='fakeSecretAccessKey'

diff --git a/delphi/tests/test_math_pipeline_runs_e2e.py b/delphi/tests/test_math_pipeline_runs_e2e.py
@@ -32,6 +32,9 @@
 @pytest.fixture(scope="module")
 def dynamodb_resource():
     """Create a resource connection to the test DynamoDB."""
+    from tests.conftest import require_dynamodb
+    require_dynamodb()
+
     endpoint_url = os.environ.get('DYNAMODB_ENDPOINT', 'http://localhost:8000')
     if not endpoint_url:
         pytest.fail("DYNAMODB_ENDPOINT not set. Cannot connect to test DynamoDB.")

diff --git a/delphi/tests/test_minio_access.py b/delphi/tests/test_minio_access.py
@@ -17,9 +17,11 @@
 
 def test_s3_access():
     """Test S3/MinIO access by listing bucket contents"""
+    from tests.conftest import require_s3
 
     # Get S3 settings from environment or use defaults
     endpoint_url = os.environ.get("AWS_S3_ENDPOINT", "http://host.docker.internal:9000")
+    require_s3(endpoint=endpoint_url)
     access_key = os.environ.get("AWS_ACCESS_KEY_ID", "minioadmin")
     secret_key = os.environ.get("AWS_SECRET_ACCESS_KEY", "minioadmin")
     bucket_name = os.environ.get("AWS_S3_BUCKET_NAME", "delphi")

diff --git a/delphi/tests/test_pakistan_conversation.py b/delphi/tests/test_pakistan_conversation.py
@@ -95,7 +95,7 @@ def test_pakistan_conversation_batch():
     conn = connect_to_db()
     if not conn:
         logger.error(f"[{time.time() - start_time:.2f}s] Database connection failed")
-        pytest.skip("Could not connect to PostgreSQL database")
+        pytest.fail("Could not connect to PostgreSQL database")
 
     try:
         # Create a new conversation

diff --git a/delphi/tests/test_postgres_real_data.py b/delphi/tests/test_postgres_real_data.py
@@ -102,15 +102,28 @@ def write_to_dynamodb(dynamodb_client, conversation_id, conv):
 
 
 def connect_to_db():
-    """Connect to PostgreSQL database."""
+    """Connect to PostgreSQL database using DATABASE_URL or DATABASE_* env vars."""
+    from dotenv import find_dotenv, load_dotenv
+
+    # Walk up directories to find .env (works both locally and in containers)
+    load_dotenv(find_dotenv())
+
+    database_url = os.environ.get('DATABASE_URL')
+    if not database_url:
+        # Fall back to individual DATABASE_* vars (same pattern as postgres.py)
+        host = os.environ.get('DATABASE_HOST')
+        port = os.environ.get('DATABASE_PORT', '5432')
+        name = os.environ.get('DATABASE_NAME')
+        user = os.environ.get('DATABASE_USER')
+        password = os.environ.get('DATABASE_PASSWORD', '')
+        if host and name and user:
+            database_url = f"postgres://{user}:{password}@{host}:{port}/{name}"
+        else:
+            print("Neither DATABASE_URL nor DATABASE_HOST/NAME/USER are set")
+            return None
+
     try:
-        conn = psycopg2.connect(
-            database=os.environ.get('POSTGRES_DB', 'polismath'),
-            user=os.environ.get('POSTGRES_USER', 'postgres'),
-            password=os.environ.get('POSTGRES_PASSWORD', 'postgres'),
-            host=os.environ.get('POSTGRES_HOST', 'localhost'),
-            port=os.environ.get('POSTGRES_PORT', '5432')
-        )
+        conn = psycopg2.connect(database_url, connect_timeout=5)
         print("Connected to database successfully")
         return conn
     except Exception as e:
@@ -516,6 +529,9 @@ def test_conversation_from_postgres():
     """
     Test processing a conversation with data from PostgreSQL.
     """
+    from tests.conftest import require_dynamodb
+    require_dynamodb()
+
     import time
     start_time = time.time()
 
@@ -526,7 +542,7 @@ def test_conversation_from_postgres():
     conn = connect_to_db()
     if not conn:
         print(f"[{time.time() - start_time:.2f}s] Database connection failed")
-        pytest.skip("Could not connect to PostgreSQL database")
+        pytest.fail("Could not connect to PostgreSQL database")
 
     try:
         # Get popular conversations
@@ -801,6 +817,8 @@ def test_dynamodb_direct():
     Test writing directly to DynamoDB without PostgreSQL.
     This is useful for directly testing the DynamoDB functionality.
     """
+    from tests.conftest import require_dynamodb
+    require_dynamodb()
     print("\nTesting direct DynamoDB write functionality with new schema")
 
     try:

diff --git a/delphi/tests/test_umap_narrative_pipeline.py b/delphi/tests/test_umap_narrative_pipeline.py
@@ -9,8 +9,11 @@
 if umap_narrative_dir not in sys.path:
     sys.path.insert(0, umap_narrative_dir)
 
-# Now we can import the main function from the script we want to test
-from run_pipeline import main as run_pipeline_main
+# NOTE: Import run_pipeline inside tests, not at module level.
+# Module-level import loads heavy dependencies (SentenceTransformer, UMAP, evoc)
+# during test collection. With xdist, multiple workers collecting simultaneously
+# *might* cause lock contention and hangs. A previous session reported indefinite
+# hangs under xdist, but we couldn't reproduce it. Deferred import is defensive.
 
 @pytest.fixture(autouse=True)
 def setup_and_teardown(tmp_path, monkeypatch):
@@ -32,6 +35,9 @@ def test_pipeline_calls_correct_functions(tmp_path):
     that they are called correctly, instead of asserting on file creation.
     This avoids failures related to external library rendering issues.
     """
+    # Import inside test to avoid potential xdist collection-time hang (see module comment)
+    from run_pipeline import main as run_pipeline_main
+
     zid = "12345"
     test_args = [
         "run_pipeline.py",
@@ -59,7 +65,7 @@ def process_comments_side_effect(*args, **kwargs):
          mock.patch('run_pipeline.create_basic_layer_visualization') as mock_create_basic, \
          mock.patch('run_pipeline.create_named_layer_visualization') as mock_create_named, \
          mock.patch('run_pipeline.create_enhanced_multilayer_index') as mock_create_index:
-        
+
         # Ensure the mocked visualization function returns a mock file path
         mock_create_named.return_value = "mock/path/to/file.html"