Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .github/workflows/python-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,6 @@ jobs:
-e AWS_REGION=us-east-1 \
-e AWS_ACCESS_KEY_ID=dummy \
-e AWS_SECRET_ACCESS_KEY=dummy \
-e POSTGRES_HOST=postgres \
-e POSTGRES_PASSWORD=PdwPNS2mDN73Vfbc \
-e POSTGRES_DB=polis-test \
-e SKIP_GOLDEN=1 \
delphi \
bash -c " \
Expand Down
2 changes: 2 additions & 0 deletions delphi/polismath/database/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,8 @@ def initialize(self) -> None:
pool_size=self.config.pool_size,
max_overflow=self.config.max_overflow,
pool_recycle=300, # Recycle connections after 5 minutes
connect_args={"connect_timeout": 5},
pool_pre_ping=True,
)

# Create session factory
Expand Down
2 changes: 1 addition & 1 deletion delphi/polismath/regression/comparer.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def compare_with_golden(
results = {
"dataset": dataset_name,
"stages_compared": {},
"timing_stats_compared": {} if benchmark else None,
"timing_stats_compared": {},
"overall_match": True,
"metadata": golden["metadata"]
}
Expand Down
2 changes: 1 addition & 1 deletion delphi/polismath/regression/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def compute_all_stages(
votes_dict: Dict,
fixed_timestamp: int,
skip_intermediate_stages: bool = False,
) -> Dict[str, Dict[str, Any]]:
) -> Dict[str, Any]:
"""
Compute all conversation stages with timing information.

Expand Down
3 changes: 2 additions & 1 deletion delphi/polismath/run_math_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def connect_to_db():
database_url = os.environ.get("DATABASE_URL")
if database_url:
logger.info(f"Using DATABASE_URL: {database_url.split('@')[1] if '@' in database_url else '(hidden)'}")
conn = psycopg2.connect(database_url)
conn = psycopg2.connect(database_url, connect_timeout=5)
else:
# Fall back to individual connection parameters
conn = psycopg2.connect(
Expand All @@ -61,6 +61,7 @@ def connect_to_db():
password=os.environ.get("DATABASE_PASSWORD", ""),
host=os.environ.get("DATABASE_HOST", "localhost"),
port=os.environ.get("DATABASE_PORT", 5432),
connect_timeout=5,
)

logger.info("Connected to database successfully")
Expand Down
1 change: 1 addition & 0 deletions delphi/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ dev = [
"pytest-asyncio>=0.21.0",
"pytest-cov>=4.0.0",
"pytest-check>=2.0.0",
"pytest-timestamper>=0.0.10", # Timestamps in -v mode to detect hung tests
"httpx>=0.23.0",
"pytest-xdist>=3.8.0",
"moto>=4.1.0",
Expand Down
2 changes: 1 addition & 1 deletion delphi/scripts/regression_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def get_db_connection():
"DATABASE_URL environment variable is not set. "
"Please set it to a valid Postgres connection string (e.g., postgres://user:pass@host:port/dbname)"
)
return psycopg2.connect(database_url)
return psycopg2.connect(database_url, connect_timeout=5)

# TODO: Uncomment once sorted the difference between env var names between delphi and rest of polis
## Fallback to individual parameters
Expand Down
75 changes: 75 additions & 0 deletions delphi/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
- Command line options --include-local and --datasets for dataset selection
- Fixtures for accessing dataset information
- @pytest.mark.use_discovered_datasets for dynamic dataset parametrization
- require_dynamodb() and require_s3() helpers for failing fast when services are unavailable
- Session-scoped conversation cache for efficient test execution
"""

Expand All @@ -22,6 +23,80 @@
from tests.common_utils import load_votes, load_comments


def require_dynamodb(
endpoint: str | None = None,
timeout: float = 3.0,
) -> None:
"""Fail the test immediately if DynamoDB is not responding.

Performs a ``list_tables`` call with short timeouts and zero retries
so the test fails in seconds rather than hanging indefinitely.
"""
import os

import boto3
from botocore.config import Config

endpoint = endpoint or os.environ.get(
"DYNAMODB_ENDPOINT", "http://localhost:8000"
)
cfg = Config(
connect_timeout=timeout,
read_timeout=timeout,
retries={"max_attempts": 0},
)
client = boto3.client(
"dynamodb",
endpoint_url=endpoint,
region_name="us-east-1",
aws_access_key_id="dummy",
aws_secret_access_key="dummy",
config=cfg,
)
try:
client.list_tables(Limit=1)
except Exception as exc:
pytest.fail(f"DynamoDB is not available at {endpoint}: {exc}")


def require_s3(
endpoint: str | None = None,
timeout: float = 3.0,
) -> None:
"""Skip the test if S3/MinIO is not responding.

Uses pytest.skip (not fail) because MinIO is a local dev service
that is not available in CI.
"""
import os

import boto3
from botocore.config import Config

endpoint = endpoint or os.environ.get(
"AWS_S3_ENDPOINT", "http://host.docker.internal:9000"
)
cfg = Config(
connect_timeout=timeout,
read_timeout=timeout,
retries={"max_attempts": 0},
signature_version="s3v4",
)
client = boto3.client(
"s3",
endpoint_url=endpoint,
region_name="us-east-1",
aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID", "minioadmin"),
aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY", "minioadmin"),
config=cfg,
verify=False,
)
try:
client.list_buckets()
except Exception as exc:
pytest.skip(f"S3/MinIO is not available at {endpoint}: {exc}")


# =============================================================================
# Session-scoped Conversation Cache
# =============================================================================
Expand Down
3 changes: 2 additions & 1 deletion delphi/tests/profile_postgres_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def connect_to_db():
dbname="polis_subset",
user="christian",
password="christian",
host="localhost"
host="localhost",
connect_timeout=5,
)
print("Connected to database successfully")
return conn
Expand Down
22 changes: 13 additions & 9 deletions delphi/tests/test_501_calculate_comment_extremity.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
def test_calculate_and_store_extremity_with_mocks():
"""
Tests the main logic of calculate_and_store_extremity by mocking its dependencies.
- Mocks GroupDataProcessor to avoid database calls.
- Mocks GroupDataProcessor and PostgresClient to avoid database calls.
- Mocks check_existing_extremity_values to force recalculation.
- Verifies that the function correctly processes the mock output.
"""
Expand All @@ -29,12 +29,14 @@ def test_calculate_and_store_extremity_with_mocks():
{'comment_id': 102, 'comment_extremity': 0.25},
{'comment_id': 103, 'comment_extremity': 0.50},
# A comment that might be missing the extremity value
{'comment_id': 104},
{'comment_id': 104},
]
}

# 2. Patch the dependencies within the script's namespace
with mock.patch.object(extremity_module, 'GroupDataProcessor') as MockGroupDataProcessor, \
# Also patch PostgresClient to avoid DB connection attempts when Docker is unavailable
with mock.patch.object(extremity_module, 'PostgresClient') as MockPostgresClient, \
mock.patch.object(extremity_module, 'GroupDataProcessor') as MockGroupDataProcessor, \
mock.patch.object(extremity_module, 'check_existing_extremity_values', return_value={}) as mock_check_existing:

# Configure the mock instance of GroupDataProcessor
Expand Down Expand Up @@ -68,14 +70,16 @@ def test_check_for_existing_values(monkeypatch):
conversation_id = 54321
existing_values = {201: 0.9, 202: 0.1}

# Patch the check function and the GroupDataProcessor class
with mock.patch.object(extremity_module, 'check_existing_extremity_values', return_value=existing_values) as mock_check_existing, \
# Patch PostgresClient, GroupDataProcessor and the check function
# PostgresClient must be mocked to avoid DB connection attempts when Docker is unavailable
with mock.patch.object(extremity_module, 'PostgresClient') as MockPostgresClient, \
mock.patch.object(extremity_module, 'check_existing_extremity_values', return_value=existing_values) as mock_check_existing, \
mock.patch.object(extremity_module, 'GroupDataProcessor') as MockGroupDataProcessor:

# Configure the mock instance that the class will produce upon instantiation
mock_processor_instance = mock.MagicMock()
MockGroupDataProcessor.return_value = mock_processor_instance

# Call the function with force_recalculation=False
result = calculate_and_store_extremity(conversation_id, force_recalculation=False)

Expand All @@ -84,9 +88,9 @@ def test_check_for_existing_values(monkeypatch):

# Assert that the check for existing values was performed
mock_check_existing.assert_called_once_with(conversation_id)

# Assert that GroupDataProcessor was instantiated (due to the script's structure)
MockGroupDataProcessor.assert_called_once()

# Crucially, assert that the expensive calculation method was NOT called on the instance
mock_processor_instance.get_export_data.assert_not_called()
4 changes: 3 additions & 1 deletion delphi/tests/test_batch_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@ class TestBatchIdStorage:
@pytest.fixture(scope="class")
def dynamodb_resource(self):
"""Set up DynamoDB resource connection."""
from tests.conftest import require_dynamodb
require_dynamodb()
logger.debug("Setting up DynamoDB resource connection")
return boto3.resource(
'dynamodb',
endpoint_url= os.environ.get('DYNAMODB_ENDPOINT', 'http://localhost:8000'),
endpoint_url=os.environ.get('DYNAMODB_ENDPOINT', 'http://localhost:8000'),
region_name='us-east-1',
aws_access_key_id='fakeMyKeyId',
aws_secret_access_key='fakeSecretAccessKey'
Expand Down
3 changes: 3 additions & 0 deletions delphi/tests/test_math_pipeline_runs_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@
@pytest.fixture(scope="module")
def dynamodb_resource():
"""Create a resource connection to the test DynamoDB."""
from tests.conftest import require_dynamodb
require_dynamodb()

endpoint_url = os.environ.get('DYNAMODB_ENDPOINT', 'http://localhost:8000')
if not endpoint_url:
pytest.fail("DYNAMODB_ENDPOINT not set. Cannot connect to test DynamoDB.")
Expand Down
2 changes: 2 additions & 0 deletions delphi/tests/test_minio_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@

def test_s3_access():
"""Test S3/MinIO access by listing bucket contents"""
from tests.conftest import require_s3

# Get S3 settings from environment or use defaults
endpoint_url = os.environ.get("AWS_S3_ENDPOINT", "http://host.docker.internal:9000")
require_s3(endpoint=endpoint_url)
access_key = os.environ.get("AWS_ACCESS_KEY_ID", "minioadmin")
secret_key = os.environ.get("AWS_SECRET_ACCESS_KEY", "minioadmin")
bucket_name = os.environ.get("AWS_S3_BUCKET_NAME", "delphi")
Expand Down
2 changes: 1 addition & 1 deletion delphi/tests/test_pakistan_conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def test_pakistan_conversation_batch():
conn = connect_to_db()
if not conn:
logger.error(f"[{time.time() - start_time:.2f}s] Database connection failed")
pytest.skip("Could not connect to PostgreSQL database")
pytest.fail("Could not connect to PostgreSQL database")

try:
# Create a new conversation
Expand Down
36 changes: 27 additions & 9 deletions delphi/tests/test_postgres_real_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,15 +102,28 @@ def write_to_dynamodb(dynamodb_client, conversation_id, conv):


def connect_to_db():
"""Connect to PostgreSQL database."""
"""Connect to PostgreSQL database using DATABASE_URL or DATABASE_* env vars."""
from dotenv import find_dotenv, load_dotenv

# Walk up directories to find .env (works both locally and in containers)
load_dotenv(find_dotenv())

database_url = os.environ.get('DATABASE_URL')
if not database_url:
# Fall back to individual DATABASE_* vars (same pattern as postgres.py)
host = os.environ.get('DATABASE_HOST')
port = os.environ.get('DATABASE_PORT', '5432')
name = os.environ.get('DATABASE_NAME')
user = os.environ.get('DATABASE_USER')
password = os.environ.get('DATABASE_PASSWORD', '')
if host and name and user:
database_url = f"postgres://{user}:{password}@{host}:{port}/{name}"
else:
print("Neither DATABASE_URL nor DATABASE_HOST/NAME/USER are set")
return None

try:
conn = psycopg2.connect(
database=os.environ.get('POSTGRES_DB', 'polismath'),
user=os.environ.get('POSTGRES_USER', 'postgres'),
password=os.environ.get('POSTGRES_PASSWORD', 'postgres'),
host=os.environ.get('POSTGRES_HOST', 'localhost'),
port=os.environ.get('POSTGRES_PORT', '5432')
)
conn = psycopg2.connect(database_url, connect_timeout=5)
print("Connected to database successfully")
return conn
except Exception as e:
Expand Down Expand Up @@ -516,6 +529,9 @@ def test_conversation_from_postgres():
"""
Test processing a conversation with data from PostgreSQL.
"""
from tests.conftest import require_dynamodb
require_dynamodb()

import time
start_time = time.time()

Expand All @@ -526,7 +542,7 @@ def test_conversation_from_postgres():
conn = connect_to_db()
if not conn:
print(f"[{time.time() - start_time:.2f}s] Database connection failed")
pytest.skip("Could not connect to PostgreSQL database")
pytest.fail("Could not connect to PostgreSQL database")

try:
# Get popular conversations
Expand Down Expand Up @@ -801,6 +817,8 @@ def test_dynamodb_direct():
Test writing directly to DynamoDB without PostgreSQL.
This is useful for directly testing the DynamoDB functionality.
"""
from tests.conftest import require_dynamodb
require_dynamodb()
print("\nTesting direct DynamoDB write functionality with new schema")

try:
Expand Down
12 changes: 9 additions & 3 deletions delphi/tests/test_umap_narrative_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@
if umap_narrative_dir not in sys.path:
sys.path.insert(0, umap_narrative_dir)

# Now we can import the main function from the script we want to test
from run_pipeline import main as run_pipeline_main
# NOTE: Import run_pipeline inside tests, not at module level.
# Module-level import loads heavy dependencies (SentenceTransformer, UMAP, evoc)
# during test collection. With xdist, multiple workers collecting simultaneously
# *might* cause lock contention and hangs. A previous session reported indefinite
# hangs under xdist, but we couldn't reproduce it. Deferred import is defensive.

@pytest.fixture(autouse=True)
def setup_and_teardown(tmp_path, monkeypatch):
Expand All @@ -32,6 +35,9 @@ def test_pipeline_calls_correct_functions(tmp_path):
that they are called correctly, instead of asserting on file creation.
This avoids failures related to external library rendering issues.
"""
# Import inside test to avoid potential xdist collection-time hang (see module comment)
from run_pipeline import main as run_pipeline_main

zid = "12345"
test_args = [
"run_pipeline.py",
Expand Down Expand Up @@ -59,7 +65,7 @@ def process_comments_side_effect(*args, **kwargs):
mock.patch('run_pipeline.create_basic_layer_visualization') as mock_create_basic, \
mock.patch('run_pipeline.create_named_layer_visualization') as mock_create_named, \
mock.patch('run_pipeline.create_enhanced_multilayer_index') as mock_create_index:

# Ensure the mocked visualization function returns a mock file path
mock_create_named.return_value = "mock/path/to/file.html"

Expand Down
Loading