diff --git a/.circleci/config.yml b/.circleci/config.yml
index 62f8aa66a0a..5afe6f8b5a6 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -224,7 +224,7 @@ workflows:
- test
filters:
tags:
- only: /[0-9]+(\.[0-9]+)+(\.[0-9]+)*/
+ only: /[0-9]+(\.[0-9]+)+(\.[0-9]+)*(-lts\.[0-9]*)?/
branches:
ignore: /.*/
- build:
diff --git a/ci-requirements.txt b/ci-requirements.txt
index a1a2e6f74f7..7908a922505 100644
--- a/ci-requirements.txt
+++ b/ci-requirements.txt
@@ -1,3 +1,4 @@
isort ==7.0.0
black ==26.1.0
-pytest ==8.4.2
\ No newline at end of file
+pytest ==8.4.2
+uv
diff --git a/connectors-sdk/TDRs/2026-02-27-Deprecating_configuration_variables _with_BaseConnectorSettings.md b/connectors-sdk/TDRs/2026-02-27-Deprecating_configuration_variables _with_BaseConnectorSettings.md
index 93fa372e515..6782c7d65a8 100644
--- a/connectors-sdk/TDRs/2026-02-27-Deprecating_configuration_variables _with_BaseConnectorSettings.md
+++ b/connectors-sdk/TDRs/2026-02-27-Deprecating_configuration_variables _with_BaseConnectorSettings.md
@@ -78,7 +78,7 @@ This validator:
The SDK introduces `DeprecatedField`, a helper built on top of `pydantic.Field`, allowing deprecation metadata to be declared directly on fields.
Supported metadata includes:
-- `deprecated`: deprecation flag or message (already present on pydanctic.Field)
+- `deprecated`: deprecation flag or message (already present on pydantic.Field)
- `new_namespace`: destination namespace
- `new_namespaced_var`: destination variable name
- `new_value_factory`: optional value transformation function
@@ -108,9 +108,14 @@ Variable migration supports:
### Deprecated variable inside a configuration model
+Preferred: using `DeprecatedField`:
+
```python
+from pydantic import Field
+from connectors_sdk import BaseConfigModel, DeprecatedField
+
class MyConfig(BaseConfigModel):
- old_var: SkipValidation[int] = DeprecatedField(
+ old_var: int = DeprecatedField(
deprecated="Use new_var instead",
new_namespaced_var="new_var",
new_value_factory=lambda x: x * 60, # Optional transformation
@@ -119,11 +124,33 @@ class MyConfig(BaseConfigModel):
new_var: int = Field(description="New variable")
```
+Alternative: using `Deprecate` annotation metadata:
+
+```python
+from typing import Annotated
+from pydantic import Field
+from connectors_sdk import BaseConfigModel, Deprecate
+
+class MyConfig(BaseConfigModel):
+ old_var: Annotated[
+ int,
+ Deprecate(
+ new_namespaced_var="new_var",
+ new_value_factory=lambda x: x * 60, # Optional transformation
+ removal_date="2026-12-31", # Optional informative removal deadline
+ ),
+ ]
+ new_var: int = Field(description="New variable")
+```
+
### Deprecated namespace at connector settings level
```python
+from pydantic import Field
+from connectors_sdk import BaseConnectorSettings, DeprecatedField
+
class ConnectorSettings(BaseConnectorSettings):
- old_namespace: SkipValidation[MyConfig] = DeprecatedField(
+ old_namespace: MyConfig = DeprecatedField(
deprecated="Use new_namespace instead",
new_namespace="new_namespace",
removal_date="2026-12-31", # Optional informative removal deadline
@@ -166,3 +193,16 @@ If both old and new settings are present, new settings take precedence
This change introduces a declarative, centralized framework for configuration deprecation in connectors.
By moving migration logic into the SDK and driving it through field metadata, it eliminates duplicated code, enforces a single validated configuration schema, and provides a clear and explicit deprecation path for users.
+
+---
+
+## Update (2026-03-13)
+
+This TDR is amended to clarify the recommended public API for field deprecation declarations.
+
+- `DeprecatedField` is the preferred and documented way to mark configuration fields as deprecated.
+- `Deprecate` exists as a lower-level annotation and is available for direct `Annotated[...]` usage if preferred.
+- `DeprecatedField` uses `Deprecate` metadata under the hood in `BaseConnectorSettings`, so both declaration syntaxes behave the same.
+- `pydantic.SkipValidation` remains supported for backward compatibility, but it is no longer required when using `DeprecatedField`/`Deprecate`.
+
+This update is a documentation clarification only (code examples have been updated). It does not change migration behavior.
\ No newline at end of file
diff --git a/connectors-sdk/connectors_sdk/__init__.py b/connectors-sdk/connectors_sdk/__init__.py
index fdd56483404..ec2fd3ac909 100644
--- a/connectors-sdk/connectors_sdk/__init__.py
+++ b/connectors-sdk/connectors_sdk/__init__.py
@@ -16,6 +16,7 @@
BaseStreamConnectorConfig,
)
from connectors_sdk.settings.deprecations import (
+ Deprecate,
DeprecatedField,
)
from connectors_sdk.settings.exceptions import (
@@ -40,5 +41,6 @@
"DatetimeFromIsoString",
"ListFromString",
# Deprecations
+ "Deprecate",
"DeprecatedField",
]
diff --git a/connectors-sdk/connectors_sdk/models/__init__.py b/connectors-sdk/connectors_sdk/models/__init__.py
index 22957bbc276..1181f6ceef1 100644
--- a/connectors-sdk/connectors_sdk/models/__init__.py
+++ b/connectors-sdk/connectors_sdk/models/__init__.py
@@ -12,6 +12,7 @@
from connectors_sdk.models.city import City
from connectors_sdk.models.country import Country
from connectors_sdk.models.domain_name import DomainName
+from connectors_sdk.models.email_address import EmailAddress
from connectors_sdk.models.external_reference import ExternalReference
from connectors_sdk.models.file import File
from connectors_sdk.models.hostname import Hostname
@@ -35,6 +36,7 @@
from connectors_sdk.models.threat_actor_group import ThreatActorGroup
from connectors_sdk.models.tlp_marking import TLPMarking
from connectors_sdk.models.url import URL
+from connectors_sdk.models.user_account import UserAccount
from connectors_sdk.models.vulnerability import Vulnerability
from connectors_sdk.models.x509_certificate import X509Certificate
@@ -53,6 +55,7 @@
"City",
"Country",
"DomainName",
+ "EmailAddress",
"ExternalReference",
"File",
"Hostname",
@@ -76,6 +79,7 @@
"ThreatActorGroup",
"TLPMarking",
"URL",
+ "UserAccount",
"Vulnerability",
"X509Certificate",
]
diff --git a/connectors-sdk/connectors_sdk/models/email_address.py b/connectors-sdk/connectors_sdk/models/email_address.py
new file mode 100644
index 00000000000..82378fb5418
--- /dev/null
+++ b/connectors-sdk/connectors_sdk/models/email_address.py
@@ -0,0 +1,33 @@
+"""EmailAddress."""
+
+from connectors_sdk.models.base_observable_entity import BaseObservableEntity
+from connectors_sdk.models.reference import Reference
+from connectors_sdk.models.user_account import UserAccount
+from pydantic import Field
+from stix2.v21 import EmailAddress as Stix2EmailAddress
+
+
+class EmailAddress(BaseObservableEntity):
+ """Represent an email address observable on OpenCTI."""
+
+ value: str = Field(
+ description="The email address value.",
+ min_length=1,
+ )
+ display_name: str | None = Field(
+ description="The display name of the email address.",
+ default=None,
+ )
+ belongs_to: UserAccount | Reference | None = Field(
+ description="The user account associated with the email address.",
+ default=None,
+ )
+
+ def to_stix2_object(self) -> Stix2EmailAddress:
+ """Make stix object."""
+ return Stix2EmailAddress(
+ value=self.value,
+ display_name=self.display_name,
+ belongs_to_ref=self.belongs_to.id if self.belongs_to else None,
+ **self._common_stix2_properties(),
+ )
diff --git a/connectors-sdk/connectors_sdk/models/enums.py b/connectors-sdk/connectors_sdk/models/enums.py
index 074e911e8d6..eca60465858 100644
--- a/connectors-sdk/connectors_sdk/models/enums.py
+++ b/connectors-sdk/connectors_sdk/models/enums.py
@@ -6,6 +6,7 @@
from enum import StrEnum
__all__ = [
+ "AccountType",
"AttackMotivation",
"AttackResourceLevel",
"CvssSeverity",
@@ -47,6 +48,25 @@ def _missing_(cls: type[_PermissiveEnum], value: object) -> _PermissiveEnum:
return obj
+class AccountType(_PermissiveEnum):
+ """Account Type Open Vocabulary.
+
+ See https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_k2b7lkt45f0i
+ """
+
+ FACEBOOK = "facebook"
+ LDAP = "ldap"
+ NIS = "nis"
+ OPENID = "openid"
+ RADIUS = "radius"
+ SKYPE = "skype"
+ TACACS = "tacacs"
+ TWITTER = "twitter"
+ UNIX = "unix"
+ WINDOWS_LOCAL = "windows-local"
+ WINDOWS_DOMAIN = "windows-domain"
+
+
class AttackMotivation(_PermissiveEnum):
"""Attack Motivation Open Vocabulary.
diff --git a/connectors-sdk/connectors_sdk/models/user_account.py b/connectors-sdk/connectors_sdk/models/user_account.py
new file mode 100644
index 00000000000..897d4657088
--- /dev/null
+++ b/connectors-sdk/connectors_sdk/models/user_account.py
@@ -0,0 +1,88 @@
+"""UserAccount model."""
+
+from datetime import datetime
+
+from connectors_sdk.models.base_observable_entity import BaseObservableEntity
+from connectors_sdk.models.enums import AccountType
+from pydantic import Field
+from stix2.v21 import UserAccount as Stix2UserAccount
+
+
+class UserAccount(BaseObservableEntity):
+ """Represent a user account observable on OpenCTI."""
+
+ user_id: str | None = Field(
+ description="Identifier of the account in the system (for example UID, GUID, account name, or email address).",
+ default=None,
+ )
+ credential: str | None = Field(
+ description="Cleartext credential for the account (intended for malware-analysis metadata, not for sharing PII).",
+ default=None,
+ )
+ account_login: str | None = Field(
+ description="Account login used by the user to sign in when different from user_id.",
+ default=None,
+ )
+ account_type: AccountType | None = Field(
+ description="Type of account (for example unix, windows-local, windows-domain, twitter).",
+ default=None,
+ )
+ display_name: str | None = Field(
+ description="Display name of the account shown in user interfaces.",
+ default=None,
+ )
+ is_service_account: bool | None = Field(
+ description="Whether the account is associated with a service or system process rather than an individual.",
+ default=None,
+ )
+ is_privileged: bool | None = Field(
+ description="Whether the account has elevated privileges.",
+ default=None,
+ )
+ can_escalate_privs: bool | None = Field(
+ description="Whether the account can escalate privileges.",
+ default=None,
+ )
+ is_disabled: bool | None = Field(
+ description="Whether the account is disabled.",
+ default=None,
+ )
+ account_created: datetime | None = Field(
+ description="When the account was created.",
+ default=None,
+ )
+ account_expires: datetime | None = Field(
+ description="When the account expires.",
+ default=None,
+ )
+ credential_last_changed: datetime | None = Field(
+ description="When the account credential was last changed.",
+ default=None,
+ )
+ account_first_login: datetime | None = Field(
+ description="When the account was first accessed.",
+ default=None,
+ )
+ account_last_login: datetime | None = Field(
+ description="When the account was last accessed.",
+ default=None,
+ )
+
+ def to_stix2_object(self) -> Stix2UserAccount:
+ """Make stix object."""
+ return Stix2UserAccount(
+ user_id=self.user_id,
+ account_login=self.account_login,
+ account_type=self.account_type.value if self.account_type else None,
+ display_name=self.display_name,
+ is_service_account=self.is_service_account,
+ is_privileged=self.is_privileged,
+ can_escalate_privs=self.can_escalate_privs,
+ is_disabled=self.is_disabled,
+ account_created=self.account_created,
+ account_expires=self.account_expires,
+ credential_last_changed=self.credential_last_changed,
+ account_first_login=self.account_first_login,
+ account_last_login=self.account_last_login,
+ **self._common_stix2_properties(),
+ )
diff --git a/connectors-sdk/connectors_sdk/settings/base_settings.py b/connectors-sdk/connectors_sdk/settings/base_settings.py
index bad4b7b8d66..aa9e96dcd0c 100644
--- a/connectors-sdk/connectors_sdk/settings/base_settings.py
+++ b/connectors-sdk/connectors_sdk/settings/base_settings.py
@@ -11,17 +11,17 @@
from copy import deepcopy
from datetime import timedelta
from pathlib import Path
-from typing import Any, Literal, Self
+from typing import Any, ClassVar, Literal, Self
from connectors_sdk.settings.annotated_types import ListFromString
from connectors_sdk.settings.deprecations import (
+ Deprecate,
migrate_deprecated_namespace,
migrate_deprecated_variable,
)
from connectors_sdk.settings.exceptions import ConfigValidationError
from connectors_sdk.settings.json_schema_generator import (
ConnectorConfigJsonSchemaGenerator,
- SanitizedJsonSchemaGenerator,
)
from pydantic import (
BaseModel,
@@ -33,6 +33,7 @@
create_model,
model_validator,
)
+from pydantic.fields import FieldInfo
from pydantic_settings import (
BaseSettings,
DotEnvSettingsSource,
@@ -49,6 +50,41 @@ class BaseConfigModel(BaseModel, ABC):
model_config = ConfigDict(extra="allow", frozen=True, validate_default=True)
+ _model_deprecated_fields: ClassVar[dict[str, FieldInfo]] = {}
+
+ @classmethod
+ def __pydantic_init_subclass__(cls, **kwargs: Any) -> None:
+ """Initialize the `BaseConfigModel` subclass and rebuild model with deprecated fields."""
+ super().__pydantic_init_subclass__(**kwargs)
+
+ cls._model_deprecated_fields = {}
+
+ for name, field in cls.model_fields.items():
+ for meta in field.metadata:
+ if isinstance(meta, Deprecate):
+ # Change validation behavior
+ if not field.deprecated:
+ field.deprecated = True
+ field.default = None
+ field.default_factory = None
+ field.validate_default = False
+
+ # Add deprecation info to JSON schema
+ if not field.json_schema_extra:
+ field.json_schema_extra = {}
+ field.json_schema_extra.update( # type: ignore[union-attr]
+ {
+ "new_namespace": meta.new_namespace,
+ "new_namespaced_var": meta.new_namespaced_var,
+ "removal_date": meta.removal_date,
+ }
+ )
+
+ cls._model_deprecated_fields[name] = field
+
+ if cls._model_deprecated_fields:
+ cls.model_rebuild(force=True)
+
class _OpenCTIConfig(BaseConfigModel):
url: HttpUrl = Field(
@@ -269,19 +305,13 @@ def __init__(self) -> None:
except ValidationError as e:
raise ConfigValidationError("Error validating configuration.") from e
- @classmethod
- def model_json_schema(cls, **kwargs: Any) -> dict[str, Any]: # type: ignore[override]
- """Use a custom JSON schema generator to sanitize the schema and remove function references."""
- kwargs.setdefault("schema_generator", SanitizedJsonSchemaGenerator)
- return super().model_json_schema(**kwargs)
-
@classmethod
def config_json_schema(
cls,
*,
connector_name: str,
by_alias: bool = False,
- mode: str = "validation",
+ mode: Literal["validation", "serialization"] = "validation",
) -> dict[str, Any]:
"""Generate the connector-specific environment variable JSON schema used for metadata contracts."""
@@ -300,38 +330,27 @@ def make_schema_generator(
mode=mode,
)
- @model_validator(mode="wrap")
@classmethod
- def migrate_deprecation(
- cls,
- data: dict[str, Any],
- handler: ModelWrapValidatorHandler[Self],
- ) -> Self:
- """Migrate deprecated variables and namespaces in the configuration data.
+ def _migrate_deprecated_namespaces(cls, data: dict[str, Any]) -> dict[str, Any]:
+ """Migrate deprecated namespaces in the configuration data.
Args:
data: Raw configuration data.
- handler: Pydantic validation handler.
Returns:
- Validated and migrated configuration data.
+ Migrated configuration data.
"""
- for field_name, field in cls.model_fields.items():
- json_schema_extra = field.json_schema_extra
- if not isinstance(json_schema_extra, dict):
- json_schema_extra = {}
- deprecated = field.deprecated
- new_namespace = json_schema_extra.get("new_namespace")
- new_namespaced_var = json_schema_extra.get("new_namespaced_var")
- removal_date = (
- str(json_schema_extra.get("removal_date"))
- if json_schema_extra.get("removal_date")
- else None
- )
+ for field_name, field in cls._model_deprecated_fields.items():
annotation = field.annotation
is_namespace = isinstance(annotation, type) and issubclass(
annotation, BaseConfigModel
)
+ deprecate_metadata = next(
+ m for m in field.metadata if isinstance(m, Deprecate)
+ )
+ new_namespace = deprecate_metadata.new_namespace
+ new_namespaced_var = deprecate_metadata.new_namespaced_var
+ removal_date = deprecate_metadata.removal_date
if is_namespace and new_namespaced_var:
raise ValueError(
@@ -339,11 +358,12 @@ def migrate_deprecation(
"Use only `new_namespace`."
)
- if deprecated and new_namespace and is_namespace:
+ if is_namespace and new_namespace:
if not isinstance(new_namespace, str):
raise ValueError(
- f"`new_namespace` for namespace {field_name} must be a string."
+ f"`new_namespace` for field {field_name} must be a string."
)
+
migrate_deprecated_namespace(
data,
old_namespace=field_name,
@@ -351,32 +371,75 @@ def migrate_deprecation(
removal_date=removal_date,
)
+ return data
+
+ @classmethod
+ def _migrate_deprecated_variables(cls, data: dict[str, Any]) -> dict[str, Any]:
+ """Migrate deprecated variables in the configuration data.
+
+ Args:
+ data: Raw configuration data.
+
+ Returns:
+ Migrated configuration data.
+ """
+ for field_name, field in cls.model_fields.items():
+ annotation = field.annotation
+ is_namespace = isinstance(annotation, type) and issubclass(
+ annotation, BaseConfigModel
+ )
if is_namespace:
- for sub_field_name, sub_field in annotation.model_fields.items(): # type: ignore[union-attr]
- sub_json_schema_extra = sub_field.json_schema_extra
- if not isinstance(sub_json_schema_extra, dict):
- sub_json_schema_extra = {}
- sub_deprecated = sub_field.deprecated
- sub_new_namespace = sub_json_schema_extra.get("new_namespace")
- sub_new_namespaced_var = sub_json_schema_extra.get(
- "new_namespaced_var"
+ for (
+ sub_field_name,
+ sub_field,
+ ) in annotation._model_deprecated_fields.items(): # type: ignore[union-attr]
+ deprecate_metadata = next(
+ m for m in sub_field.metadata if isinstance(m, Deprecate)
)
- sub_new_value_factory = sub_json_schema_extra.get(
- "new_value_factory"
- )
- sub_removal_date = sub_json_schema_extra.get("removal_date")
+ new_namespace = deprecate_metadata.new_namespace
+ new_namespaced_var = deprecate_metadata.new_namespaced_var
+ new_value_factory = deprecate_metadata.new_value_factory
+ removal_date = deprecate_metadata.removal_date
+
+ if new_namespaced_var:
+ if not isinstance(new_namespaced_var, str):
+ raise ValueError(
+ f"`new_namespaced_var` for field {sub_field_name} must be a string."
+ )
- if sub_deprecated and sub_new_namespaced_var:
migrate_deprecated_variable(
data,
old_name=sub_field_name,
- new_name=sub_new_namespaced_var,
+ new_name=new_namespaced_var,
current_namespace=field_name,
- new_namespace=sub_new_namespace,
- new_value_factory=sub_new_value_factory,
- removal_date=sub_removal_date,
+ new_namespace=new_namespace,
+ new_value_factory=new_value_factory,
+ removal_date=removal_date,
)
+ return data
+
+ @model_validator(mode="wrap")
+ @classmethod
+ def _migrate_deprecation(
+ cls, data: dict[str, Any], handler: ModelWrapValidatorHandler[Self]
+ ) -> Self:
+ """Migrate deprecated namespaces and variables in the configuration data.
+
+ Args:
+ data: Raw configuration data.
+ handler: Pydantic validation handler.
+
+ Returns:
+ Validated and migrated configuration data.
+
+ Notes:
+ - This is the second validator to be executed at runtime, after `_load_config_dict`.
+ """
+ # First migrate deprecated namespaces, then deprecated variables to ensure all deprecations are handled.
+ data = cls._migrate_deprecated_namespaces(data)
+ data = cls._migrate_deprecated_variables(data)
+
return handler(data)
@model_validator(mode="wrap")
@@ -394,6 +457,7 @@ def _load_config_dict(
- This method is a `model_validator`, i.e. it's internally executed by pydantic during model validation
- The mode (`"wrap"`) guarantees that this validator is always executed _before_ the validators defined in child class
- See `_SettingsLoader.build_loader_from_model` for further details about env/config vars parsing implementation
+ - This is the first validator to be executed at runtime, before `_migrate_deprecated_namespaces` and `_migrate_deprecated_variables`
References:
https://github.com/pydantic/pydantic/issues/8277 [consulted on 2025-11-19]
@@ -410,7 +474,7 @@ def to_helper_config(self) -> dict[str, Any]:
return self.model_dump(
mode="json",
context={"mode": "pycti"},
- # Deprecated fields can be set to `None` despite their type (due to `SkipValidation` annotation).
+ # Deprecated fields can be set to `None` despite their type (due to `Deprecate` annotation).
# To avoid `PydanticSerializationError`, we exclude all fields set to `None` during serialization.
# OpenCTIConnectorHelper handles missing fields with default values or internal logic.
exclude_none=True,
diff --git a/connectors-sdk/connectors_sdk/settings/deprecations.py b/connectors-sdk/connectors_sdk/settings/deprecations.py
index 0d3906aa0a2..a31e64cba79 100644
--- a/connectors-sdk/connectors_sdk/settings/deprecations.py
+++ b/connectors-sdk/connectors_sdk/settings/deprecations.py
@@ -2,7 +2,7 @@
import warnings
from datetime import date
-from typing import Any, Callable
+from typing import Any, Callable, Literal
from pydantic import Field
from pydantic.fields import FieldInfo
@@ -121,48 +121,88 @@ def migrate_deprecated_variable(
data[destination_namespace] = new_config
-class DeprecatedField:
- """Define a deprecated field with migration information.
-
- The migration information is used in the BaseConnectorSettings to automatically
- migrate deprecated fields to their new names or namespaces.
+class Deprecate:
+ """A metadata class that indicates that a field is deprecated and may be migrated
+ to a new variable during `BaseConnectorSettings` validation.
Args:
- deprecated (str | bool): `True` to mark the field as deprecated, or a deprecation message to be displayed in warnings and JSON schemas.
new_namespace (str | None): The new namespace to migrate to.
new_namespaced_var (str | None): The new variable name when migrating a variable.
new_value_factory (Callable | None): A function to change the value when migrating.
removal_date (date | str | None): Date when the deprecated setting will be removed.
- Returns:
- FieldInfo: A Pydantic FieldInfo object with deprecation metadata.
+ Notes:
+ - If this is applied as an annotation (e.g., via `x: Annotated[int, Deprecate(removal_date="2027-01-01")]`),
+ the field will be marked as deprecated and its default value (always `None`) will not be validated.
+ However, user-provided values remain subject to normal validation.
+ - Because this sets the field as optional (i.e., sets its default to `None`),
+ subsequent annotation-applied transformations may be impacted.
+ Additionally, IDE and static type checkers may ignore that the field can be set to `None`,
+ which can lead to issues if the field is accessed without checking for `None` first.
"""
- def __new__( # type: ignore[misc]
- cls,
- *,
- deprecated: str | bool = True,
+ def __init__(
+ self,
new_namespace: str | None = None,
new_namespaced_var: str | None = None,
new_value_factory: Callable[[Any], Any] | None = None,
removal_date: date | str | None = None,
- ) -> FieldInfo:
- """Create a Pydantic Field with deprecation metadata."""
- if not deprecated:
- raise ValueError(
- "DeprecatedField must have a deprecation reason or be set to True."
- )
+ ):
+ """Instantiate a `Deprecate` metadata."""
+ self.new_namespace = new_namespace
+ self.new_namespaced_var = new_namespaced_var
+ self.new_value_factory = new_value_factory
if isinstance(removal_date, str):
removal_date = date.fromisoformat(removal_date)
- if removal_date:
- removal_date = removal_date.strftime("%Y-%m-%d")
- return Field(
- default=None,
- deprecated=deprecated,
- json_schema_extra={
- "new_namespace": new_namespace,
- "new_namespaced_var": new_namespaced_var,
- "new_value_factory": new_value_factory, # type: ignore[dict-item]
- "removal_date": removal_date, # type: ignore[dict-item]
- },
- ) # type: ignore[return-value]
+ self.removal_date = removal_date.strftime("%Y-%m-%d") if removal_date else None
+
+
+def DeprecatedField( # noqa: N802 (using pydantic.Field naming convention)
+ *,
+ deprecated: str | Literal[True] = True,
+ new_namespace: str | None = None,
+ new_namespaced_var: str | None = None,
+ new_value_factory: Callable[[Any], Any] | None = None,
+ removal_date: date | str | None = None,
+ **kwargs: Any,
+) -> Any:
+ """Define a deprecated field with migration information.
+
+ The migration information is used in the `BaseConnectorSettings` to automatically
+ migrate deprecated fields to their new names or namespaces.
+
+ Args:
+ deprecated (str | Literal[True]): `True` to mark the field as deprecated, or
+ a deprecation message to be displayed in warnings and JSON schemas.
+ new_namespace (str | None): The new namespace to migrate to.
+ new_namespaced_var (str | None): The new variable name when migrating a variable.
+ new_value_factory (Callable | None): A function to change the value when migrating.
+ removal_date (date | str | None): Date when the deprecated setting will be removed.
+ **kwargs: Additional keyword arguments to be passed to the underlying `Field` definition.
+
+ Returns:
+ FieldInfo: A Pydantic FieldInfo object with deprecation metadata.
+
+ Notes:
+ - The return annotation is `Any` so `DeprecatedField` can be used on any type-annotated
+ fields without causing a type error (same as `Field` from Pydantic).
+ - See `pydantic.Field` (https://docs.pydantic.dev/latest/api/fields/) for more information
+ on the available parameters to define a field, as they can be used in conjunction with the deprecation parameters.
+ """
+ if not deprecated:
+ raise ValueError(
+ "DeprecatedField must have a deprecation reason or be set to True."
+ )
+
+ field_info: FieldInfo = Field(deprecated=deprecated, **kwargs)
+ # Add Deprecate metadata so it can be used for migration in BaseConnectorSettings
+ field_info.metadata.append(
+ Deprecate(
+ new_namespace=new_namespace,
+ new_namespaced_var=new_namespaced_var,
+ new_value_factory=new_value_factory,
+ removal_date=removal_date,
+ )
+ )
+
+ return field_info
diff --git a/connectors-sdk/connectors_sdk/settings/json_schema_generator.py b/connectors-sdk/connectors_sdk/settings/json_schema_generator.py
index f35dc34dd37..296456ca5ee 100644
--- a/connectors-sdk/connectors_sdk/settings/json_schema_generator.py
+++ b/connectors-sdk/connectors_sdk/settings/json_schema_generator.py
@@ -3,43 +3,10 @@
from copy import deepcopy
from typing import Any
-from pydantic.json_schema import (
- GenerateJsonSchema,
- JsonSchemaValue,
-)
+from pydantic.json_schema import GenerateJsonSchema, JsonSchemaValue
-class SanitizedJsonSchemaGenerator(GenerateJsonSchema):
- """A JsonSchema generator that removes function references from schemas."""
-
- def generate_inner(self, schema: Any) -> JsonSchemaValue:
- """Generate inner schema, removing function references from metadata.
-
- Args:
- schema: The schema to process.
-
- Returns:
- The processed JSON schema value.
- """
- if (
- not isinstance(schema, dict)
- or not isinstance(meta := schema.get("metadata"), dict)
- or not isinstance(js_extra := meta.get("pydantic_js_extra"), dict)
- or "new_value_factory" not in js_extra
- ):
- return super().generate_inner(schema)
-
- schema = schema.copy()
- meta = meta.copy()
- js_extra = js_extra.copy()
- js_extra.pop("new_value_factory", None)
- meta["pydantic_js_extra"] = js_extra
- schema["metadata"] = meta
-
- return super().generate_inner(schema)
-
-
-class ConnectorConfigJsonSchemaGenerator(SanitizedJsonSchemaGenerator):
+class ConnectorConfigJsonSchemaGenerator(GenerateJsonSchema):
"""Generate JSON schemas for connector configurations with resolved references and deprecation handling."""
connector_name: str
diff --git a/connectors-sdk/tests/test_api.py b/connectors-sdk/tests/test_api.py
index 6c5d2f88c29..83b4025429a 100644
--- a/connectors-sdk/tests/test_api.py
+++ b/connectors-sdk/tests/test_api.py
@@ -20,6 +20,7 @@ def test_root_public_api_is_valid():
"ConfigValidationError",
"DatetimeFromIsoString",
"ListFromString",
+ "Deprecate",
"DeprecatedField",
}
missing = imports - set(root_api.__all__)
diff --git a/connectors-sdk/tests/test_models/test_api.py b/connectors-sdk/tests/test_models/test_api.py
index 5b58b7d69cb..085d4654842 100644
--- a/connectors-sdk/tests/test_models/test_api.py
+++ b/connectors-sdk/tests/test_models/test_api.py
@@ -68,6 +68,7 @@ def test_public_models_are_present():
"City",
"Country",
"DomainName",
+ "EmailAddress",
"ExternalReference",
"File",
"Hostname",
@@ -91,6 +92,7 @@ def test_public_models_are_present():
"ThreatActorGroup",
"TLPMarking",
"URL",
+ "UserAccount",
"Vulnerability",
"X509Certificate",
}
diff --git a/connectors-sdk/tests/test_models/test_email_address.py b/connectors-sdk/tests/test_models/test_email_address.py
new file mode 100644
index 00000000000..31e10510fa0
--- /dev/null
+++ b/connectors-sdk/tests/test_models/test_email_address.py
@@ -0,0 +1,76 @@
+import pytest
+from connectors_sdk.models.base_identified_entity import BaseIdentifiedEntity
+from connectors_sdk.models.email_address import EmailAddress
+from connectors_sdk.models.user_account import UserAccount
+from pydantic import ValidationError
+from stix2.v21 import EmailAddress as Stix2EmailAddress
+
+
+def test_email_address_is_a_base_identified_entity() -> None:
+ """Test that EmailAddress is a BaseIdentifiedEntity."""
+ assert issubclass(EmailAddress, BaseIdentifiedEntity)
+
+
+def test_email_address_class_should_not_accept_invalid_input():
+ """Test that EmailAddress class should not accept invalid input."""
+ input_data = {
+ "value": "user@example.com",
+ "invalid_key": "invalid_value",
+ }
+ with pytest.raises(ValidationError) as error:
+ EmailAddress.model_validate(input_data)
+ assert error.value.errors()[0]["loc"] == ("invalid_key",)
+
+
+def test_email_address_to_stix2_object_returns_valid_stix_object():
+ """Test that EmailAddress to_stix2_object method returns a valid STIX2.1 object."""
+ email_address = EmailAddress(value="user@example.com")
+ stix2_obj = email_address.to_stix2_object()
+ assert isinstance(stix2_obj, Stix2EmailAddress)
+
+
+def test_email_address_to_stix2_object(
+ fake_valid_organization_author,
+ fake_valid_tlp_markings,
+ fake_valid_external_references,
+ fake_valid_associated_files,
+) -> None:
+ user_account = UserAccount(
+ user_id="john.doe@example.com",
+ account_login="john.doe",
+ display_name="John Doe",
+ )
+
+ email_address = EmailAddress(
+ value="john.doe@example.com",
+ display_name="John Doe",
+ belongs_to=user_account,
+ score=80,
+ description="Suspicious phishing sender",
+ labels=["phishing", "email"],
+ associated_files=fake_valid_associated_files,
+ create_indicator=True,
+ author=fake_valid_organization_author,
+ markings=fake_valid_tlp_markings,
+ external_references=fake_valid_external_references,
+ ).to_stix2_object()
+
+ assert email_address == Stix2EmailAddress(
+ value="john.doe@example.com",
+ display_name="John Doe",
+ belongs_to_ref=user_account.id,
+ allow_custom=True,
+ object_marking_refs=[marking.id for marking in fake_valid_tlp_markings],
+ x_opencti_score=80,
+ x_opencti_description="Suspicious phishing sender",
+ x_opencti_labels=["phishing", "email"],
+ x_opencti_external_references=[
+ external_ref.to_stix2_object()
+ for external_ref in fake_valid_external_references
+ ],
+ x_opencti_created_by_ref=fake_valid_organization_author.id,
+ x_opencti_files=[
+ file.to_stix2_object() for file in fake_valid_associated_files
+ ],
+ x_opencti_create_indicator=True,
+ )
diff --git a/connectors-sdk/tests/test_models/test_enums.py b/connectors-sdk/tests/test_models/test_enums.py
index 8e6a3c470e8..35f59255425 100644
--- a/connectors-sdk/tests/test_models/test_enums.py
+++ b/connectors-sdk/tests/test_models/test_enums.py
@@ -26,6 +26,7 @@
}
ENUMS = OCTI_ENUMS | {
+ "AccountType",
"RelationshipType",
}
diff --git a/connectors-sdk/tests/test_models/test_user_account.py b/connectors-sdk/tests/test_models/test_user_account.py
new file mode 100644
index 00000000000..1d305676fc2
--- /dev/null
+++ b/connectors-sdk/tests/test_models/test_user_account.py
@@ -0,0 +1,99 @@
+from datetime import datetime, timezone
+
+import pytest
+from connectors_sdk.models.base_identified_entity import BaseIdentifiedEntity
+from connectors_sdk.models.enums import AccountType
+from connectors_sdk.models.user_account import UserAccount
+from pydantic import ValidationError
+from stix2.v21 import UserAccount as Stix2UserAccount
+
+
+def test_user_account_is_a_base_identified_entity() -> None:
+ """Test that UserAccount is a BaseIdentifiedEntity."""
+ assert issubclass(UserAccount, BaseIdentifiedEntity)
+
+
+def test_user_account_class_should_not_accept_invalid_input() -> None:
+ """Test that UserAccount class should not accept invalid input."""
+ input_data = {
+ "user_id": "john.doe",
+ "invalid_key": "invalid_value",
+ }
+ with pytest.raises(ValidationError) as error:
+ UserAccount.model_validate(input_data)
+ assert error.value.errors()[0]["loc"] == ("invalid_key",)
+
+
+def test_user_account_to_stix2_object_returns_valid_stix_object() -> None:
+ """Test that UserAccount to_stix2_object method returns a valid STIX2.1 object."""
+ user_account = UserAccount(user_id="john.doe")
+ stix2_obj = user_account.to_stix2_object()
+ assert isinstance(stix2_obj, Stix2UserAccount)
+
+
+def test_user_account_to_stix2_object(
+ fake_valid_organization_author,
+ fake_valid_tlp_markings,
+ fake_valid_external_references,
+ fake_valid_associated_files,
+) -> None:
+ """Test that UserAccount to_stix2_object method returns correct STIX2.1 object."""
+ account_created = datetime(2025, 1, 1, 8, 30, 0, tzinfo=timezone.utc)
+ account_expires = datetime(2026, 1, 1, 8, 30, 0, tzinfo=timezone.utc)
+ credential_last_changed = datetime(2025, 2, 1, 9, 0, 0, tzinfo=timezone.utc)
+ account_first_login = datetime(2025, 2, 2, 10, 0, 0, tzinfo=timezone.utc)
+ account_last_login = datetime(2025, 3, 1, 11, 0, 0, tzinfo=timezone.utc)
+
+ user_account = UserAccount(
+ user_id="john.doe@example.com",
+ account_login="john.doe",
+ account_type=AccountType.WINDOWS_DOMAIN,
+ display_name="John Doe",
+ is_service_account=False,
+ is_privileged=True,
+ can_escalate_privs=True,
+ is_disabled=False,
+ account_created=account_created,
+ account_expires=account_expires,
+ credential_last_changed=credential_last_changed,
+ account_first_login=account_first_login,
+ account_last_login=account_last_login,
+ score=90,
+ description="Compromised domain account",
+ labels=["credential-access", "active-directory"],
+ associated_files=fake_valid_associated_files,
+ create_indicator=True,
+ author=fake_valid_organization_author,
+ markings=fake_valid_tlp_markings,
+ external_references=fake_valid_external_references,
+ ).to_stix2_object()
+
+ assert user_account == Stix2UserAccount(
+ user_id="john.doe@example.com",
+ account_login="john.doe",
+ account_type="windows-domain",
+ display_name="John Doe",
+ is_service_account=False,
+ is_privileged=True,
+ can_escalate_privs=True,
+ is_disabled=False,
+ account_created=account_created,
+ account_expires=account_expires,
+ credential_last_changed=credential_last_changed,
+ account_first_login=account_first_login,
+ account_last_login=account_last_login,
+ allow_custom=True,
+ object_marking_refs=[marking.id for marking in fake_valid_tlp_markings],
+ x_opencti_score=90,
+ x_opencti_description="Compromised domain account",
+ x_opencti_labels=["credential-access", "active-directory"],
+ x_opencti_external_references=[
+ external_ref.to_stix2_object()
+ for external_ref in fake_valid_external_references
+ ],
+ x_opencti_created_by_ref=fake_valid_organization_author.id,
+ x_opencti_files=[
+ file.to_stix2_object() for file in fake_valid_associated_files
+ ],
+ x_opencti_create_indicator=True,
+ )
diff --git a/connectors-sdk/tests/test_settings/test_base_settings.py b/connectors-sdk/tests/test_settings/test_base_settings.py
index f16e366a568..2d8f3f9db9c 100644
--- a/connectors-sdk/tests/test_settings/test_base_settings.py
+++ b/connectors-sdk/tests/test_settings/test_base_settings.py
@@ -1,11 +1,104 @@
import sys
from pathlib import Path
+from typing import Annotated
from unittest.mock import patch
import pytest
-from connectors_sdk.settings.base_settings import BaseConnectorSettings, _SettingsLoader
+from connectors_sdk.settings.base_settings import (
+ BaseConfigModel,
+ BaseConnectorSettings,
+ _SettingsLoader,
+)
+from connectors_sdk.settings.deprecations import Deprecate, DeprecatedField
from connectors_sdk.settings.exceptions import ConfigValidationError
-from pydantic import HttpUrl
+from pydantic import Field, HttpUrl
+
+
+def test_base_config_model_should_retrieve_deprecated_fields():
+ """Test that `BaseConfigModel` subclasses can retrieve deprecated fields metadata."""
+
+ # Given: A BaseConfigModel subclass declaring one DeprecatedField
+ class TestConfig(BaseConfigModel):
+ test_field: str = Field(default="test")
+ old_field: str = DeprecatedField(removal_date="2026-12-31")
+
+ # When: The model is built
+ # Then: The deprecated field is tracked in model's deprecated fields
+ assert len(TestConfig.model_fields) == 2
+ assert "old_field" in TestConfig.model_fields
+ assert len(TestConfig._model_deprecated_fields) == 1
+ assert "old_field" in TestConfig._model_deprecated_fields
+
+
+def test_base_config_model_should_retrieve_fields_with_deprecate_annotation():
+ """Test that `BaseConfigModel` subclasses can retrieve fields with `Deprecate` metadata."""
+
+ # Given: A BaseConfigModel subclass with a field using Deprecate annotation
+ class TestConfig(BaseConfigModel):
+ test_field: str = Field(default="test")
+ old_field: Annotated[
+ str, Field(description="Test field"), Deprecate(removal_date="2026-12-31")
+ ]
+
+ # When: The model is built
+ # Then: The annotated field is tracked in model's deprecated fields
+ assert len(TestConfig.model_fields) == 2
+ assert "old_field" in TestConfig.model_fields
+ assert len(TestConfig._model_deprecated_fields) == 1
+ assert "old_field" in TestConfig._model_deprecated_fields
+
+
+def test_base_config_model_should_set_default_to_none_for_deprecated_fields():
+ """Test that `BaseConfigModel` subclasses set `default` to `None` for deprecated fields."""
+
+ # Given: A deprecated field explicitly defines a non-None default
+ class TestConfig(BaseConfigModel):
+ test_field: str = Field(default="test")
+ old_field: str = DeprecatedField(
+ default="deprecated default" # should be overwritten to None
+ )
+
+ # When: The model field definitions are built
+ # Then: Deprecated field defaults are normalized to None
+ assert TestConfig.model_fields["old_field"].default is None
+ assert TestConfig._model_deprecated_fields["old_field"].default is None
+
+
+def test_base_config_model_should_disable_validate_default_for_deprecated_fields():
+ """Test that `BaseConfigModel` subclasses set `validate_default` to `False` for deprecated fields."""
+
+ # Given: A deprecated field that would normally validate its default
+ class TestConfig(BaseConfigModel):
+ test_field: str = Field(default="test")
+ old_field: str = DeprecatedField(
+ default="deprecated default" # should be overwritten to None
+ )
+
+ # When: A model instance is created
+ config = TestConfig()
+
+ # Then: Deprecated defaults do not trigger validation and resolve to None
+ assert config.old_field is None
+
+
+def test_base_config_model_should_set_json_schema_extra_on_deprecated_fields():
+ """Test that `BaseConfigModel` subclasses set `json_schema_extra` with deprecation info for deprecated fields."""
+
+ # Given: A deprecated field with replacement metadata
+ class TestConfig(BaseConfigModel):
+ test_field: str = Field(default="test")
+ old_field: str = DeprecatedField(
+ new_namespaced_var="test_field",
+ removal_date="2026-12-31",
+ )
+
+ # When: The model is built
+ # Then: Deprecation information is exposed in field's json_schema_extra
+ assert TestConfig.model_fields["old_field"].json_schema_extra == {
+ "new_namespace": None,
+ "new_namespaced_var": "test_field",
+ "removal_date": "2026-12-31",
+ }
def test_settings_loader_should_get_connector_main_path(mock_main_path):
@@ -14,8 +107,11 @@ def test_settings_loader_should_get_connector_main_path(mock_main_path):
For testing purpose, a fake path is assigned to `sys.modules[__main__].__file__`.
"""
+ # Given: The connector main module path is available
+ # When: The main path resolver is executed
main_path = _SettingsLoader._get_connector_main_path()
+ # Then: The resolved main.py path matches the expected connector location
assert main_path == Path("/app/src/main.py").resolve()
@@ -27,8 +123,11 @@ def test_settings_loader_should_raise_when_main_module_misses_file_attribute(
For testing purpose, `sys.modules[__main__].__file__` is set to `None`.
"""
+ # Given: The __main__.__file__ attribute is missing
sys.modules["__main__"].__file__ = None
+ # When: The main path resolver is executed
+ # Then: A runtime error is raised to signal invalid execution context
with pytest.raises(RuntimeError):
_SettingsLoader._get_connector_main_path()
@@ -44,9 +143,12 @@ def test_settings_loader_should_get_legacy_config_yml_file_path(
def is_file(self: Path) -> bool:
return self.name == "config.yml"
+ # Given: Legacy config file (/src/config.yml) is present
with patch("pathlib.Path.is_file", is_file):
+ # When: The config.yml path resolver is executed
config_yml_file_path = _SettingsLoader._get_config_yml_file_path()
+ # Then: The legacy config.yml path is returned
assert config_yml_file_path == Path("/app/src/config.yml").resolve()
@@ -59,9 +161,12 @@ def test_settings_loader_should_get_config_yml_file_path(mock_main_path):
def is_file(self: Path) -> bool:
return self.name == "config.yml" and self.parent.name != "src"
+ # Given: Root config file (/config.yml) is present
with patch("pathlib.Path.is_file", is_file):
+ # When: The config.yml path resolver is executed
config_yml_file_path = _SettingsLoader._get_config_yml_file_path()
+ # Then: The new config.yml path is returned
assert config_yml_file_path == Path("/app/config.yml").resolve()
@@ -74,9 +179,12 @@ def test_settings_loader_should_get_dot_env_file_path(mock_main_path):
def is_file(self: Path) -> bool:
return self.name == ".env"
+ # Given: Root env file (/.env) is present
with patch("pathlib.Path.is_file", is_file):
+ # When: The .env path resolver is executed
dot_env_file_path = _SettingsLoader._get_dot_env_file_path()
+ # Then: The .env file path is returned
assert dot_env_file_path == Path("/app/.env").resolve()
@@ -85,9 +193,12 @@ def test_settings_loader_should_parse_config_yml_file(mock_config_yml_file_prese
Test that `_SettingsLoader()` parses config vars in `config.yml`.
For testing purpose, the path of `config.yml` file is `tests/test_settings/data/config.test.yml`.
"""
+ # Given: A valid config.yml
+ # When: The settings loader is instantiated and dumped
settings_loader = _SettingsLoader()
settings_dict = settings_loader.model_dump()
+ # Then: Parsed nested settings match expected config.yml values
assert settings_dict == {
"opencti": {
"url": "http://localhost:8080",
@@ -109,9 +220,12 @@ def test_settings_loader_should_parse_dot_env_file(mock_dot_env_file_presence):
For testing purpose, the path of `.env` file is `tests/test_settings/data/.env.test`.
"""
+ # Given: A valid .env file
+ # When: The settings loader is instantiated and dumped
settings_loader = _SettingsLoader()
settings_dict = settings_loader.model_dump()
+ # Then: Parsed flat settings match expected environment variables
assert settings_dict == {
"opencti_url": "http://localhost:8080",
"opencti_token": "changeme",
@@ -123,15 +237,18 @@ def test_settings_loader_should_parse_dot_env_file(mock_dot_env_file_presence):
}
-def test_settings_loader_should_parse_os_environ(mock_environment):
+def test_settings_loader_should_not_parse_os_environ(mock_environment):
"""
- Test that `_SettingsLoader()` parses env vars from `os.environ`.
+ Test that `_SettingsLoader()` does not parse env vars from `os.environ` (for security purposes).
For testing purpose, `os.environ` is patched.
"""
+ # Given: Valid environment variables
+ # When: The settings loader is instantiated and dumped
settings_loader = _SettingsLoader()
settings_dict = settings_loader.model_dump()
+ # Then: No implicit values are parsed
assert settings_dict == {}
@@ -144,9 +261,12 @@ def test_settings_loader_should_parse_config_yml_from_model(
For testing purpose, the path of `config.yml` file is `tests/test_settings/data/config.test.yml`.
"""
+ # Given: A model-aware loader is built for BaseConnectorSettings with config.yml fixture
+ # When: The loader instance parses and dumps values
settings_loader = _SettingsLoader.build_loader_from_model(BaseConnectorSettings)
settings_dict = settings_loader().model_dump()
+ # Then: Parsed nested settings expose expected OpenCTI and connector values
assert settings_dict["opencti"]["url"] == "http://localhost:8080"
assert settings_dict["opencti"]["token"] == "changeme"
assert settings_dict["connector"]["id"] == "connector-poc--uid"
@@ -162,9 +282,12 @@ def test_settings_loader_should_parse_dot_env_from_model(mock_dot_env_file_prese
For testing purpose, the path of `.env` file is `tests/test_settings/data/.env.test`.
"""
+ # Given: A model-aware loader is built for BaseConnectorSettings with .env fixture
+ # When: The loader instance parses and dumps values
settings_loader = _SettingsLoader.build_loader_from_model(BaseConnectorSettings)
settings_dict = settings_loader().model_dump()
+ # Then: Parsed nested settings expose expected OpenCTI and connector values
assert settings_dict["opencti"]["url"] == "http://localhost:8080"
assert settings_dict["opencti"]["token"] == "changeme"
assert settings_dict["connector"]["id"] == "connector-poc--uid"
@@ -180,9 +303,12 @@ def test_settings_loader_should_parse_os_environ_from_model(mock_environment):
For testing purpose, `os.environ` is patched.
"""
+ # Given: A model-aware loader is built for BaseConnectorSettings with patched os.environ
+ # When: The loader instance parses and dumps values
settings_loader = _SettingsLoader.build_loader_from_model(BaseConnectorSettings)
settings_dict = settings_loader().model_dump()
+ # Then: Parsed nested settings expose expected OpenCTI and connector values
assert settings_dict["opencti"]["url"] == "http://localhost:8080"
assert settings_dict["opencti"]["token"] == "changeme"
assert settings_dict["connector"]["id"] == "connector-poc--uid"
@@ -199,8 +325,11 @@ def test_base_connector_settings_should_validate_settings_from_config_yaml_file(
For testing purpose, the path of `config.yml` file is `tests/test_settings/data/config.test.yml`.
"""
+ # Given: Valid connector settings are provided through config.yml fixture
+ # When: BaseConnectorSettings is instantiated
settings = BaseConnectorSettings()
+ # Then: Values are validated and cast to expected runtime types
assert settings.opencti.url == HttpUrl("http://localhost:8080/")
assert settings.opencti.token == "changeme"
assert settings.connector.id == "connector-poc--uid"
@@ -217,8 +346,11 @@ def test_base_connector_settings_should_validate_settings_from_dot_env_file(
For testing purpose, the path of `.env` file is `tests/test_settings/data/.env.test`.
"""
+ # Given: Valid connector settings are provided through .env fixture
+ # When: BaseConnectorSettings is instantiated
settings = BaseConnectorSettings()
+ # Then: Values are validated and cast to expected runtime types
assert settings.opencti.url == HttpUrl("http://localhost:8080/")
assert settings.opencti.token == "changeme"
assert settings.connector.id == "connector-poc--uid"
@@ -235,8 +367,11 @@ def test_base_connector_settings_should_validate_settings_from_os_environ(
For testing purpose, `os.environ` is patched.
"""
+ # Given: Valid connector settings are provided through patched os.environ
+ # When: BaseConnectorSettings is instantiated
settings = BaseConnectorSettings()
+ # Then: Values are validated and cast to expected runtime types
assert settings.opencti.url == HttpUrl("http://localhost:8080/")
assert settings.opencti.token == "changeme"
assert settings.connector.id == "connector-poc--uid"
@@ -247,6 +382,9 @@ def test_base_connector_settings_should_validate_settings_from_os_environ(
def test_base_connector_settings_should_raise_when_missing_mandatory_env_vars():
"""Test that `BaseConnectorSettings` raises a `ValidationError` when no value is provided for required fields."""
+ # Given: Mandatory connector settings are absent from env vars
+ # When: BaseConnectorSettings is instantiated
+ # Then: A ConfigValidationError is raised
with pytest.raises(ConfigValidationError):
BaseConnectorSettings()
@@ -257,9 +395,12 @@ def test_base_connector_settings_should_provide_helper_config(mock_environment):
For testing purpose, `os.environ` is patched.
"""
+ # Given: A valid BaseConnectorSettings instance built from patched environment
+ # When: OpenCTIConnectorHelper config dict is generated
settings = BaseConnectorSettings()
opencti_dict = settings.to_helper_config()
+ # Then: The resulting helper config dict matches expected structure and values
assert opencti_dict == {
"connector": {
"duration_period": "PT5M",
@@ -275,31 +416,35 @@ def test_base_connector_settings_should_provide_helper_config(mock_environment):
}
-def test_base_connector_settings_model_json_schema_uses_sanitizing_schema():
- """Test that model_json_schema uses SanitizingJsonSchema by default."""
+def test_base_connector_settings_model_json_schema_generates_the_default_json_schema():
+ """Test that model_json_schema generates the default JSON schema."""
+ # Given: BaseConnectorSettings uses default JSON schema generation
+ # When: model_json_schema is called
schema = BaseConnectorSettings.model_json_schema()
- # Should generate valid schema
+ # Then: The resulting schema contains top-level connector and opencti properties
assert "$defs" in schema or "properties" in schema
assert "opencti" in schema["properties"]
assert "connector" in schema["properties"]
-def test_base_connector_settings_flattened_json_schema():
- """Test flattened_json_schema generation."""
+def test_base_connector_settings_config_json_schema():
+ """Test config_json_schema generation."""
+ # Given: A connector name for config JSON schema generation
+ # When: config_json_schema is called
schema = BaseConnectorSettings.config_json_schema(connector_name="test-connector")
- # Should have flattened structure
+ # Then: The config JSON schema contains standard schema metadata and properties
assert "$schema" in schema
assert "$id" in schema
assert "test-connector" in schema["$id"]
assert "properties" in schema
- # Should have uppercased environment variable names
+ # Then: Property names are uppercased
assert "OPENCTI_URL" in schema["properties"]
assert "OPENCTI_TOKEN" in schema["properties"]
assert "CONNECTOR_NAME" in schema["properties"]
- # CONNECTOR_ID should be filtered out
+ # Then: CONNECTOR_ID is intentionally excluded from generated properties
assert "CONNECTOR_ID" not in schema["properties"]
diff --git a/connectors-sdk/tests/test_settings/test_deprecation_migration.py b/connectors-sdk/tests/test_settings/test_deprecation_migration.py
index 4a9ccbd07e5..1a28a51149e 100644
--- a/connectors-sdk/tests/test_settings/test_deprecation_migration.py
+++ b/connectors-sdk/tests/test_settings/test_deprecation_migration.py
@@ -9,7 +9,7 @@
)
from connectors_sdk.settings.deprecations import DeprecatedField
from connectors_sdk.settings.exceptions import ConfigValidationError
-from pydantic import Field, SkipValidation
+from pydantic import Field
class TestMigrateDeprecation:
@@ -18,6 +18,7 @@ class TestMigrateDeprecation:
def test_migrate_deprecated_variable_in_settings(self, monkeypatch):
"""Test variable migration during settings initialization."""
+ # Given: A settings model with a deprecated field mapped to a new field
class CustomConnectorConfig(BaseExternalImportConnectorConfig):
old_field: str | None = DeprecatedField(
deprecated="Use new_field instead",
@@ -39,23 +40,56 @@ class TestSettings(BaseConnectorSettings):
monkeypatch.setenv("CONNECTOR_DURATION_PERIOD", "PT5M")
monkeypatch.setenv("CONNECTOR_OLD_FIELD", "old_value")
+ # When: TestSettings is instantiated
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
settings = TestSettings()
- # Should have warnings about variable migration
+ # Then: A warning indicates the deprecated field migration
warning_messages = [str(warning.message) for warning in w]
assert any("old_field" in msg.lower() for msg in warning_messages)
- # Should have migrated value
+ # And the deprecated value is copied into the replacement field
assert settings.connector.new_field == "old_value"
+ def test_migrate_deprecated_variable_with_non_string_value(self, monkeypatch):
+ """Test that variable migration raises ValueError for non-string new_namespaced_var."""
+
+ # Given: A deprecated field configured with wrongly typed argument
+ class CustomConnectorConfig(BaseExternalImportConnectorConfig):
+ old_field: str | None = DeprecatedField(
+ deprecated="Use new_field instead",
+ new_namespaced_var=123, # type: ignore
+ )
+ new_field: str = Field(default="default")
+
+ class TestSettings(BaseConnectorSettings):
+ connector: CustomConnectorConfig = Field(
+ default_factory=CustomConnectorConfig
+ )
+
+ # Setup mandatory environment variables only
+ monkeypatch.setenv("OPENCTI_URL", "http://localhost:8080")
+ monkeypatch.setenv("OPENCTI_TOKEN", "test-token")
+ monkeypatch.setenv("CONNECTOR_ID", "test-id")
+ monkeypatch.setenv("CONNECTOR_NAME", "Test")
+ monkeypatch.setenv("CONNECTOR_SCOPE", "test")
+ monkeypatch.setenv("CONNECTOR_DURATION_PERIOD", "PT5M")
+
+ # When: Settings initialization evaluates migration metadata
+ # Then: A ConfigValidationError is raised for invalid migration metadata
+ with pytest.raises(
+ ConfigValidationError, match="Error validating configuration"
+ ):
+ TestSettings()
+
def test_migrate_deprecated_variable_with_value_transformation(self, monkeypatch):
"""Test variable migration with value transformation."""
def double_value(val):
return int(val) * 2
+ # Given: A TestSettings model that migrates and transforms old_value into new_value
class CustomConnectorConfig(BaseExternalImportConnectorConfig):
old_value: str | None = DeprecatedField(
deprecated="Use new_value instead",
@@ -69,7 +103,7 @@ class TestSettings(BaseConnectorSettings):
default_factory=CustomConnectorConfig
)
- # Setup environment
+ # Setup environment variables with a deprecated value
monkeypatch.setenv("OPENCTI_URL", "http://localhost:8080")
monkeypatch.setenv("OPENCTI_TOKEN", "test-token")
monkeypatch.setenv("CONNECTOR_ID", "test-id")
@@ -78,26 +112,29 @@ class TestSettings(BaseConnectorSettings):
monkeypatch.setenv("CONNECTOR_DURATION_PERIOD", "PT5M")
monkeypatch.setenv("CONNECTOR_OLD_VALUE", "5")
+ # When: TestSettings is instantiated
with warnings.catch_warnings(record=True):
warnings.simplefilter("always")
settings = TestSettings()
+ # Then: The migrated value is transformed by the factory
assert settings.connector.new_value == 10
def test_migrate_deprecated_namespace_with_non_string_value(self, monkeypatch):
"""Test that namespace migration raises ValueError for non-string new_namespace."""
+ # Given: A deprecated namespace configured with wrongly typed argument
class CustomConnectorConfig(BaseExternalImportConnectorConfig):
pass
class TestSettings(BaseConnectorSettings):
- connector: CustomConnectorConfig = Field(
+ connector: CustomConnectorConfig = DeprecatedField(
default_factory=CustomConnectorConfig,
deprecated=True,
- json_schema_extra={"new_namespace": 123}, # Non-string value
+ new_namespace=123, # type: ignore
)
- # Setup environment
+ # Setup mandatory environment variables only
monkeypatch.setenv("OPENCTI_URL", "http://localhost:8080")
monkeypatch.setenv("OPENCTI_TOKEN", "test-token")
monkeypatch.setenv("CONNECTOR_ID", "test-id")
@@ -105,31 +142,31 @@ class TestSettings(BaseConnectorSettings):
monkeypatch.setenv("CONNECTOR_SCOPE", "test")
monkeypatch.setenv("CONNECTOR_DURATION_PERIOD", "PT5M")
- # Should raise a ConfigValidationError due to non-string new_namespace
+ # When: TestSettings initialization evaluates namespace migration metadata
+ # Then: A ConfigValidationError is raised for invalid migration metadata
with pytest.raises(
ConfigValidationError, match="Error validating configuration"
):
TestSettings()
- def test_error_when_namespace_has_new_namespaced_var_in_legacy_field(
+ def test_error_when_namespace_has_new_namespaced_var_in_deprecated_field(
self, monkeypatch
):
"""Test that ValueError is raised when a deprecated namespace has new_namespaced_var."""
+ # Given: A deprecated namespace incorrectly defining new_namespaced_var
class DeprecatedConfig(BaseExternalImportConnectorConfig):
pass
class TestSettings(BaseConnectorSettings):
- old_connector: DeprecatedConfig = Field(
+ old_connector: DeprecatedConfig = DeprecatedField(
default_factory=DeprecatedConfig,
deprecated="Use new_connector",
- json_schema_extra={
- "new_namespace": "new_connector",
- "new_namespaced_var": "renamed", # This should trigger ValueError
- },
+ new_namespace="new_connector",
+ new_namespaced_var="renamed", # This should trigger ValueError
)
- # Setup environment
+ # Setup environment variables for the deprecated namespace
monkeypatch.setenv("OPENCTI_URL", "http://localhost:8080")
monkeypatch.setenv("OPENCTI_TOKEN", "test-token")
monkeypatch.setenv("OLD_CONNECTOR_ID", "test-id")
@@ -137,7 +174,8 @@ class TestSettings(BaseConnectorSettings):
monkeypatch.setenv("OLD_CONNECTOR_SCOPE", "test")
monkeypatch.setenv("OLD_CONNECTOR_DURATION_PERIOD", "PT5M")
- # This should raise during initialization wrapped in ConfigValidationError
+ # When: TestSettings initialization validates deprecated namespace migration
+ # Then: A ConfigValidationError is raised for incompatible metadata combination
with pytest.raises(
ConfigValidationError, match="Error validating configuration"
):
@@ -146,6 +184,7 @@ class TestSettings(BaseConnectorSettings):
def test_migrate_with_nested_field_metadata(self, monkeypatch):
"""Test migration with nested field-level new_namespace metadata."""
+ # Given: A deprecated field configured to migrate into another namespace
class CustomConnectorConfig(BaseExternalImportConnectorConfig):
special_field: str | None = DeprecatedField(
deprecated="Moved to other_namespace",
@@ -159,7 +198,7 @@ class TestSettings(BaseConnectorSettings):
)
other_namespace: dict = Field(default_factory=dict)
- # Setup environment
+ # Setup environment variables with the deprecated field in connector namespace
monkeypatch.setenv("OPENCTI_URL", "http://localhost:8080")
monkeypatch.setenv("OPENCTI_TOKEN", "test-token")
monkeypatch.setenv("CONNECTOR_ID", "test-id")
@@ -168,17 +207,19 @@ class TestSettings(BaseConnectorSettings):
monkeypatch.setenv("CONNECTOR_DURATION_PERIOD", "PT5M")
monkeypatch.setenv("CONNECTOR_SPECIAL_FIELD", "special_value")
+ # When: TestSettings is instantiated
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
TestSettings()
- # Should have warnings about cross-namespace migration
+ # Then: A warning indicates cross-namespace field migration
warning_messages = [str(warning.message) for warning in w]
assert any("special_field" in msg.lower() for msg in warning_messages)
def test_sub_field_migration_within_namespace(self, monkeypatch):
"""Test that sub-field migration works correctly within a namespace."""
+ # Given: A connector config with a deprecated field
class CustomConnectorConfig(BaseExternalImportConnectorConfig):
old_field: str | None = DeprecatedField(
deprecated="Use new_field",
@@ -191,7 +232,7 @@ class TestSettings(BaseConnectorSettings):
default_factory=CustomConnectorConfig
)
- # Setup environment
+ # Setup environment variables with only the deprecated field
monkeypatch.setenv("OPENCTI_URL", "http://localhost:8080")
monkeypatch.setenv("OPENCTI_TOKEN", "test-token")
monkeypatch.setenv("CONNECTOR_ID", "test-id")
@@ -200,26 +241,28 @@ class TestSettings(BaseConnectorSettings):
monkeypatch.setenv("CONNECTOR_DURATION_PERIOD", "PT5M")
monkeypatch.setenv("CONNECTOR_OLD_FIELD", "migrated_value")
+ # When: TestSettings is instantiated with warning capture enabled
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
settings = TestSettings()
- # Should have warnings about field migration
+ # Then: A warning indicates in-namespace field migration
warning_messages = [str(warning.message) for warning in w]
assert any("old_field" in msg.lower() for msg in warning_messages)
- # Value should be migrated
+ # And the deprecated field value is assigned to the new field
assert settings.connector.new_field == "migrated_value"
def test_migrate_entire_namespace(self, monkeypatch):
"""Test that an entire namespace with all its fields is migrated."""
+ # Given: A deprecated namespace configured to migrate into connector namespace
class CustomConnectorConfig(BaseExternalImportConnectorConfig):
custom_field: str = Field(default="default")
api_key: str = Field(default="key")
class TestSettings(BaseConnectorSettings):
- old_connector: SkipValidation[CustomConnectorConfig] = DeprecatedField(
+ old_connector: CustomConnectorConfig = DeprecatedField(
deprecated="Use connector namespace instead",
new_namespace="connector",
)
@@ -227,7 +270,7 @@ class TestSettings(BaseConnectorSettings):
default_factory=CustomConnectorConfig
)
- # Setup environment with only old_connector variables
+ # Setup environment variables with both old namespace values and new namespace values
monkeypatch.setenv("OPENCTI_URL", "http://localhost:8080")
monkeypatch.setenv("OPENCTI_TOKEN", "test-token")
monkeypatch.setenv("OLD_CONNECTOR_ID", "old-id")
@@ -236,24 +279,66 @@ class TestSettings(BaseConnectorSettings):
monkeypatch.setenv("OLD_CONNECTOR_DURATION_PERIOD", "PT10M")
monkeypatch.setenv("OLD_CONNECTOR_CUSTOM_FIELD", "migrated_custom")
monkeypatch.setenv("OLD_CONNECTOR_API_KEY", "migrated_key")
- # Provide required but different fields for connector - old values should be used
monkeypatch.setenv("CONNECTOR_ID", "new-id")
monkeypatch.setenv("CONNECTOR_NAME", "New Name")
monkeypatch.setenv("CONNECTOR_SCOPE", "new-scope")
+ # When: TestSettings is instantiated
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
settings = TestSettings()
- # Should have warnings about namespace migration
+ # Then: A warning indicates namespace-level migration
warning_messages = [str(warning.message) for warning in w]
assert any("old_connector" in msg.lower() for msg in warning_messages)
- # Fields from new namespace take precedence when both exist
- # (migration warns but doesn't override)
+ # Then: Existing new-namespace values take precedence over migrated duplicates
assert settings.connector.id == "new-id"
assert settings.connector.name == "New Name"
assert settings.connector.scope == ["new-scope"] # scope is a list
- # But fields that don't exist in new namespace are migrated
+ # Then: Missing new-namespace fields are filled from migrated old namespace values
assert settings.connector.custom_field == "migrated_custom"
assert settings.connector.api_key == "migrated_key"
+
+ def test_migrate_both_namespace_and_field(self, monkeypatch):
+ """Test that both namespace and field-level migration can occur together."""
+
+ # Given: A config supporting both namespace-level and field-level deprecation migration
+ class CustomConnectorConfig(BaseExternalImportConnectorConfig):
+ new_field: str = Field(default="default")
+ old_field: str | None = DeprecatedField(
+ deprecated="Use new_field instead",
+ new_namespace="connector",
+ new_namespaced_var="new_field",
+ )
+
+ class TestSettings(BaseConnectorSettings):
+ connector: CustomConnectorConfig = Field(
+ default_factory=CustomConnectorConfig
+ )
+ old_connector: CustomConnectorConfig = DeprecatedField(
+ deprecated="Use connector namespace instead",
+ new_namespace="connector",
+ )
+
+ # Setup environment variables with only deprecated namespace and field values
+ monkeypatch.setenv("OPENCTI_URL", "http://localhost:8080")
+ monkeypatch.setenv("OPENCTI_TOKEN", "test-token")
+ monkeypatch.setenv("OLD_CONNECTOR_ID", "old-id")
+ monkeypatch.setenv("OLD_CONNECTOR_NAME", "Old Name")
+ monkeypatch.setenv("OLD_CONNECTOR_SCOPE", "old-scope")
+ monkeypatch.setenv("OLD_CONNECTOR_DURATION_PERIOD", "PT10M")
+ monkeypatch.setenv("OLD_CONNECTOR_OLD_FIELD", "migrated_value")
+
+ # When: TestSettings is instantiated
+ with warnings.catch_warnings(record=True) as w:
+ warnings.simplefilter("always")
+ settings = TestSettings()
+
+ # Then: Warnings indicate both namespace and field-level migrations
+ warning_messages = [str(warning.message) for warning in w]
+ assert any("old_connector" in msg.lower() for msg in warning_messages)
+ assert any("old_field" in msg.lower() for msg in warning_messages)
+
+ # And the deprecated field value is migrated to connector.new_field
+ assert settings.connector.new_field == "migrated_value"
diff --git a/connectors-sdk/tests/test_settings/test_deprecations.py b/connectors-sdk/tests/test_settings/test_deprecations.py
index f47b347c285..b41c4a0d698 100644
--- a/connectors-sdk/tests/test_settings/test_deprecations.py
+++ b/connectors-sdk/tests/test_settings/test_deprecations.py
@@ -4,6 +4,7 @@
import pytest
from connectors_sdk.settings.deprecations import (
+ Deprecate,
DeprecatedField,
migrate_deprecated_namespace,
migrate_deprecated_variable,
@@ -16,106 +17,126 @@ class TestMigrateDeprecatedNamespace:
def test_migrate_with_empty_data(self):
"""Test migration with empty data dict."""
+ # Given: An empty settings payload
data: dict = {}
+
+ # When: A deprecated namespace migration is requested
migrate_deprecated_namespace(data, "old_ns", "new_ns")
+
+ # Then: The payload remains unchanged
assert data == {}
def test_migrate_with_none_data(self):
"""Test migration with None data."""
+ # Given: A missing settings payload
data = None
+
+ # When: A deprecated namespace migration is requested
migrate_deprecated_namespace(data, "old_ns", "new_ns") # type: ignore
+
+ # Then: The payload remains None
assert data is None
def test_migrate_basic_namespace(self):
"""Test basic namespace migration."""
+ # Given: Old namespace values and an empty target namespace
data = {
"old_namespace": {"key1": "value1", "key2": "value2"},
"new_namespace": {},
}
+ # When: The old namespace is migrated to the new namespace
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
migrate_deprecated_namespace(data, "old_namespace", "new_namespace")
- # Check warnings
+ # Then: A warning is emitted for each migrated key
assert len(w) == 2
assert "Deprecated setting 'old_namespace.key1'" in str(w[0].message)
assert "Migrating to 'new_namespace.key1'" in str(w[0].message)
assert "Deprecated setting 'old_namespace.key2'" in str(w[1].message)
- # Check migration
+ # Then: Old namespace is removed and values are present in the new namespace
assert "old_namespace" not in data
assert data["new_namespace"] == {"key1": "value1", "key2": "value2"}
def test_migrate_with_existing_keys_in_new_namespace(self):
"""Test migration when keys already exist in new namespace."""
+ # Given: Both old and new namespaces define the same key
data = {
"old_namespace": {"key1": "old_value"},
"new_namespace": {"key1": "new_value"},
}
+ # When: Namespace migration is performed
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
migrate_deprecated_namespace(data, "old_namespace", "new_namespace")
- # Should warn about using only new value
+ # Then: A warning indicates that the new namespace value is kept
assert len(w) == 1
assert "Using only 'new_namespace.key1'" in str(w[0].message)
- # New value should be preserved
+ # And existing new namespace values are preserved
assert data["new_namespace"]["key1"] == "new_value"
assert "old_namespace" not in data
def test_migrate_when_new_namespace_extends_old(self):
"""Test migration when new namespace extends old (e.g., 'settings' -> 'settings_good')."""
+ # Given: An old namespace containing one key that belongs to the new namespace prefix
data = {
"settings": {"good_api_key": "secret1", "other_key": "value1"},
"settings_good": {},
}
+ # When: Namespace migration is performed
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
migrate_deprecated_namespace(data, "settings", "settings_good")
- # Should not migrate 'good_api_key' as it belongs to new namespace
+ # Then: Only eligible keys are warned and migrated
assert len(w) == 1
assert "other_key" in str(w[0].message)
- # Only other_key should be migrated
+ # And only non-overlapping keys are migrated into the new namespace
assert data["settings_good"] == {"other_key": "value1"}
assert "settings" not in data
def test_migrate_when_old_namespace_extends_new(self):
"""Test migration when old namespace extends new (e.g., 'settings_bad' -> 'settings')."""
+ # Given: Old namespace keys and new namespace keys with old prefix artifacts
data = {
"settings_bad": {"api_key": "secret1", "bad_other_key": "value1"},
"settings": {"bad_api_key": "secret2"},
}
+ # When: Namespace migration is performed
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
migrate_deprecated_namespace(data, "settings_bad", "settings")
- # Should migrate and cleanup wrong prefixed keys
+ # Then: Migration emits at least one warning
assert len(w) >= 1
- # Check migration and cleanup
+ # And deprecated namespace is removed and bad prefixed keys are cleaned up
assert "settings_bad" not in data
assert data["settings"]["api_key"] == "secret1"
assert "bad_api_key" not in data["settings"]
def test_migrate_with_missing_old_namespace(self):
"""Test migration when old namespace doesn't exist."""
+ # Given: Data without the deprecated namespace
data = {"new_namespace": {"existing": "value"}}
+ # When: Namespace migration is requested
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
migrate_deprecated_namespace(data, "old_namespace", "new_namespace")
- # Should not warn
+ # Then: No warning is emitted
assert len(w) == 0
- # Should not change data
+ # And data remains unchanged
assert data == {"new_namespace": {"existing": "value"}}
@@ -124,164 +145,216 @@ class TestMigrateDeprecatedVariable:
def test_migrate_with_empty_data(self):
"""Test migration with empty data dict."""
+ # Given: An empty settings payload
data: dict = {}
+
+ # When: A deprecated variable migration is requested
migrate_deprecated_variable(
data, "old_var", "new_var", "current_namespace", "new_namespace"
)
+
+ # Then: The payload remains unchanged
assert data == {}
def test_migrate_with_none_data(self):
"""Test migration with None data."""
+ # Given: A missing settings payload
data = None
+
+ # When: A deprecated variable migration is requested
migrate_deprecated_variable(
data, "old_var", "new_var", "current_namespace", "new_namespace" # type: ignore
)
+
+ # Then: The payload remains None
assert data is None
def test_migrate_basic_variable(self):
"""Test basic variable migration within same namespace."""
+ # Given: A namespace containing only a deprecated variable
data = {
"connector": {"old_var": "old_value"},
}
+ # When: The deprecated variable is migrated to its replacement
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
migrate_deprecated_variable(data, "old_var", "new_var", "connector", None)
- # Check warnings
+ # Then: A migration warning is emitted
assert len(w) == 1
assert "Deprecated setting 'connector.old_var'" in str(w[0].message)
assert "Migrating to 'connector.new_var'" in str(w[0].message)
- # Check migration
+ # And the old variable is removed and the new variable receives the value
assert "old_var" not in data["connector"]
assert data["connector"]["new_var"] == "old_value"
def test_migrate_variable_to_different_namespace(self):
"""Test variable migration to different namespace."""
+ # Given: A deprecated variable in one namespace and an empty target namespace
data = {"old_namespace": {"old_var": "value"}, "new_namespace": {}}
+ # When: The variable is migrated across namespaces
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
migrate_deprecated_variable(
data, "old_var", "new_var", "old_namespace", "new_namespace"
)
- # Check warnings
+ # Then: Warning message includes old and new fully-qualified variable names
assert len(w) == 1
assert "old_namespace.old_var" in str(w[0].message)
assert "new_namespace.new_var" in str(w[0].message)
- # Check migration
+ # And value is removed from old namespace and added to new namespace
assert "old_var" not in data["old_namespace"]
assert data["new_namespace"]["new_var"] == "value"
def test_migrate_with_existing_new_variable(self):
"""Test migration when new variable already exists."""
+ # Given: Both deprecated and replacement variables already exist
data = {
"connector": {"old_var": "old_value", "new_var": "new_value"},
}
+ # When: Variable migration is performed
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
migrate_deprecated_variable(data, "old_var", "new_var", "connector", None)
- # Should warn about using only new value
+ # Then: Warning indicates only the replacement variable is used
assert len(w) == 1
assert "Using only 'connector.new_var'" in str(w[0].message)
- # New value should be preserved
+ # And existing replacement value is preserved and deprecated variable removed
assert data["connector"]["new_var"] == "new_value"
assert "old_var" not in data["connector"]
def test_migrate_with_value_transformation(self):
"""Test migration with value transformation function."""
+ # Given: A deprecated string variable and a transformation function
data = {"connector": {"old_var": "5"}}
def new_value_factory(val):
return int(val) * 2
+ # When: Migration is performed with new_value_factory
with warnings.catch_warnings(record=True):
warnings.simplefilter("always")
migrate_deprecated_variable(
data, "old_var", "new_var", "connector", None, new_value_factory
)
- # Check value transformation
+ # Then: The replacement variable contains the transformed value
assert data["connector"]["new_var"] == 10
def test_migrate_with_missing_old_variable(self):
"""Test migration when old variable doesn't exist."""
+ # Given: A namespace that does not contain the deprecated variable
data = {"connector": {"other_var": "value"}}
+ # When: Variable migration is requested
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
migrate_deprecated_variable(data, "old_var", "new_var", "connector", None)
- # Should not warn
+ # Then: No warning is emitted
assert len(w) == 0
- # Should not change data
+ # And data remains unchanged
assert data == {"connector": {"other_var": "value"}}
def test_migrate_with_missing_namespace(self):
"""Test migration when namespace doesn't exist in data."""
+ # Given: An empty payload with no target namespace
data: dict = {}
+ # When: Variable migration is requested
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
migrate_deprecated_variable(data, "old_var", "new_var", "connector", None)
- # Should not warn
+ # Then: No warning is emitted
assert len(w) == 0
- # Should not add namespace
+ # And no namespace is created
assert data == {}
class TestDeprecatedField:
"""Test DeprecatedField factory function."""
+ def get_field_deprecate_annotation(self, field: FieldInfo) -> Deprecate | None:
+ """Helper method to get Deprecate annotation from FieldInfo."""
+ return next(
+ (meta for meta in field.metadata if isinstance(meta, Deprecate)), None
+ )
+
def test_deprecated_field_with_deprecation(self):
"""Test DeprecatedField creates FieldInfo with deprecation."""
+ # Given: A deprecation message for a deprecated field
+ # When: The deprecated field is created with the deprecation message
field = DeprecatedField(deprecated="Use new_field instead")
+ # Then: Field metadata includes deprecation information
assert isinstance(field, FieldInfo)
assert field.deprecated == "Use new_field instead"
- assert field.default is None
+ # And a Deprecate annotation is present in the field metadata
+ assert self.get_field_deprecate_annotation(field) is not None
def test_deprecated_field_with_boolean_deprecation(self):
"""Test DeprecatedField with boolean deprecation flag."""
+ # Given: A deprecetation boolean flag for a deprecated field
+ # When: The deprecated field is created with the boolean flag
field = DeprecatedField(deprecated=True)
+ # Then: Field metadata includes deprecation flag
assert isinstance(field, FieldInfo)
assert field.deprecated is True
+ # And a Deprecate annotation is present in the field metadata
+ assert self.get_field_deprecate_annotation(field) is not None
def test_deprecated_field_with_new_namespace(self):
"""Test DeprecatedField with new_namespace metadata."""
+ # Given: A new namespace for a deprecated field
+ # When: The deprecated field is created with the new_namespace argument
field = DeprecatedField(
deprecated="Moved to new_namespace", new_namespace="new_namespace"
)
- assert field.json_schema_extra["new_namespace"] == "new_namespace" # type: ignore
+ # Then: Deprecate annotation exposes new_namespace value
+ deprecate_annotation = self.get_field_deprecate_annotation(field)
+ assert deprecate_annotation is not None
+ assert deprecate_annotation.new_namespace == "new_namespace"
def test_deprecated_field_with_new_namespaced_var(self):
"""Test DeprecatedField with new_namespaced_var metadata."""
+ # Given: A new namespaced variable name for a deprecated field
+ # When: The deprecated field is created with the new_namespaced_var argument
field = DeprecatedField(
deprecated="Renamed to new_var", new_namespaced_var="new_var"
)
- assert field.json_schema_extra["new_namespaced_var"] == "new_var" # type: ignore
+ # Then: Deprecate annotation exposes new_namespaced_var value
+ deprecate_annotation = self.get_field_deprecate_annotation(field)
+ assert deprecate_annotation is not None
+ assert deprecate_annotation.new_namespaced_var == "new_var"
def test_deprecated_field_with_new_value_factory(self):
"""Test DeprecatedField with new_value_factory function."""
+ # Given: A function to transform the deprecated value for a deprecated field
def transformer(val):
return val * 2
+ # When: The deprecated field is created with the new_value_factory argument
field = DeprecatedField(deprecated=True, new_value_factory=transformer)
- assert field.json_schema_extra["new_value_factory"] == transformer # type: ignore
+ # Then: Deprecate annotation exposes the provided factory function
+ deprecate_annotation = self.get_field_deprecate_annotation(field)
+ assert deprecate_annotation is not None
+ assert deprecate_annotation.new_value_factory == transformer
def test_deprecated_field_with_all_parameters(self):
"""Test DeprecatedField with all parameters."""
@@ -289,6 +362,8 @@ def test_deprecated_field_with_all_parameters(self):
def transformer(val):
return val.upper()
+ # Given: All migration metadata for a deprecated field
+ # When: The deprecated field is created with all metadata arguments
field = DeprecatedField(
deprecated="Complete migration",
new_namespace="new_ns",
@@ -296,47 +371,54 @@ def transformer(val):
new_value_factory=transformer,
)
+ # Then: Field and Deprecate annotation contain all provided metadata
assert field.deprecated == "Complete migration"
- assert field.json_schema_extra["new_namespace"] == "new_ns" # type: ignore
- assert field.json_schema_extra["new_namespaced_var"] == "new_var" # type: ignore
- assert field.json_schema_extra["new_value_factory"] == transformer # type: ignore
-
- def test_deprecated_field_default_none(self):
- """Test DeprecatedField always sets default to None."""
- field = DeprecatedField(deprecated=True)
-
- assert field.default is None
+ deprecate_annotation = self.get_field_deprecate_annotation(field)
+ assert deprecate_annotation is not None
+ assert deprecate_annotation.new_namespace == "new_ns"
+ assert deprecate_annotation.new_namespaced_var == "new_var"
+ assert deprecate_annotation.new_value_factory == transformer
def test_deprecated_field_raises_when_deprecated_is_false(self):
"""Test DeprecatedField raises ValueError when deprecated is False."""
+ # Given: An invalid DeprecatedField declaration with deprecated=False
+ # When: DeprecatedField is created
+ # Then: A ValueError is raised
with pytest.raises(ValueError, match="DeprecatedField must have"):
- DeprecatedField(deprecated=False)
+ DeprecatedField(deprecated=False) # type: ignore
def test_deprecated_field_with_removal_date(self):
"""Test DeprecatedField with removal_date."""
+ # Given: A removal date for a deprecated field
+ # When: The deprecated field is created with the removal_date argument
field = DeprecatedField(
deprecated="This field is deprecated",
new_namespaced_var="new_field",
removal_date="2026-12-31",
)
- assert field.json_schema_extra["removal_date"] == "2026-12-31" # type: ignore
- assert field.json_schema_extra["new_namespaced_var"] == "new_field" # type: ignore
+ # Then: Deprecate annotation includes removal date metadata
+ deprecate_annotation = self.get_field_deprecate_annotation(field)
+ assert deprecate_annotation is not None
+ assert deprecate_annotation.removal_date == "2026-12-31"
+ assert deprecate_annotation.new_namespaced_var == "new_field"
def test_migrate_namespace_with_removal_date(self):
"""Test migrate_deprecated_namespace includes removal_date in warning."""
+ # Given: A settings payload with deprecated namespace
data = {
"old_settings": {"key1": "value1", "key2": "value2"},
"new_settings": {},
}
+ # When: Namespace migration is performed with removal_date argument
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
migrate_deprecated_namespace(
data, "old_settings", "new_settings", removal_date="2026-12-31"
)
- # Should have warnings with removal date
+ # Then: Warning messages include the removal date and migrated keys
assert len(w) == 2
warning_messages = [str(warning.message) for warning in w]
assert any("2026-12-31" in msg for msg in warning_messages)
@@ -344,10 +426,12 @@ def test_migrate_namespace_with_removal_date(self):
def test_migrate_variable_with_removal_date(self):
"""Test migrate_deprecated_variable includes removal_date in warning."""
+ # Given: A settings payload with a deprecated namespaced variable
data = {
"settings": {"old_var": "value", "other": "data"},
}
+ # When: Variable migration is performed with removal_date argument
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
migrate_deprecated_variable(
@@ -358,7 +442,7 @@ def test_migrate_variable_with_removal_date(self):
removal_date="2026-06-30",
)
- # Should have warning with removal date
+ # Then: The warning message includes removal date and migration details
assert len(w) == 1
warning_msg = str(w[0].message)
assert "2026-06-30" in warning_msg
diff --git a/connectors-sdk/tests/test_settings/test_json_schema.py b/connectors-sdk/tests/test_settings/test_json_schema.py
index d44ac2e6d21..97829d14cd4 100644
--- a/connectors-sdk/tests/test_settings/test_json_schema.py
+++ b/connectors-sdk/tests/test_settings/test_json_schema.py
@@ -3,112 +3,10 @@
import pytest
from connectors_sdk.settings.json_schema_generator import (
ConnectorConfigJsonSchemaGenerator,
- SanitizedJsonSchemaGenerator,
)
from pydantic import BaseModel, Field
-class TestSanitizingJsonSchema:
- """Test SanitizingJsonSchema class."""
-
- def test_removes_new_value_factory_from_metadata(self):
- """Test that new_value_factory function is removed from schema metadata."""
-
- class TestModel(BaseModel):
- field: str = Field(
- default="test",
- json_schema_extra={"new_value_factory": lambda x: x.upper()},
- )
-
- schema = TestModel.model_json_schema(
- schema_generator=SanitizedJsonSchemaGenerator
- )
-
- # Check that new_value_factory is not in the generated schema
- properties = schema["properties"]
- assert "new_value_factory" not in str(properties)
-
- def test_preserves_other_metadata(self):
- """Test that other metadata is preserved."""
-
- class TestModel(BaseModel):
- field: str = Field(
- default="test",
- json_schema_extra={
- "new_namespace": "ns",
- "new_namespaced_var": "var",
- "new_value_factory": lambda x: x,
- },
- )
-
- schema = TestModel.model_json_schema(
- schema_generator=SanitizedJsonSchemaGenerator
- )
-
- # Other metadata should be preserved
- assert "new_namespace" in str(schema)
- assert "new_namespaced_var" in str(schema)
-
- def test_handles_schema_without_metadata(self):
- """Test handling schemas without metadata."""
-
- class TestModel(BaseModel):
- field: str = "test"
-
- schema = TestModel.model_json_schema(
- schema_generator=SanitizedJsonSchemaGenerator
- )
-
- assert "properties" in schema
- assert "field" in schema["properties"]
-
- def test_handles_nested_models(self):
- """Test handling nested models."""
-
- class NestedModel(BaseModel):
- nested_field: str = Field(
- default="test", json_schema_extra={"new_value_factory": lambda x: x}
- )
-
- class TestModel(BaseModel):
- nested: NestedModel = Field(default_factory=NestedModel)
-
- schema = TestModel.model_json_schema(
- schema_generator=SanitizedJsonSchemaGenerator
- )
-
- # Should not contain new_value_factory function
- assert "new_value_factory" not in str(schema)
-
- def test_generate_inner_with_function_in_metadata(self):
- """Test that generate_inner properly sanitizes metadata with functions."""
-
- class TestModel(BaseModel):
- field: str = Field(
- default="test",
- deprecated="Use new_field",
- json_schema_extra={
- "new_namespaced_var": "new_field",
- "new_value_factory": lambda x: x.upper(),
- },
- )
-
- schema = TestModel.model_json_schema(
- schema_generator=SanitizedJsonSchemaGenerator
- )
-
- # Check that schema is generated
- assert "properties" in schema
- assert "field" in schema["properties"]
-
- # new_value_factory should not appear in the string representation
- schema_str = str(schema)
- assert "new_value_factory" not in schema_str
-
- # But other metadata should be preserved
- assert "new_namespaced_var" in schema_str or "deprecated" in schema_str
-
-
class TestConnectorConfigJsonSchemaGenerator:
"""Test ConnectorConfigJsonSchemaGenerator class."""
diff --git a/external-import/checkfirst-import-connector/README.md b/external-import/checkfirst-import-connector/README.md
index 7cd652f79ce..7cd0e046767 100644
--- a/external-import/checkfirst-import-connector/README.md
+++ b/external-import/checkfirst-import-connector/README.md
@@ -1,16 +1,57 @@
# OpenCTI Connector: Checkfirst Import
-Ingest Checkfirst articles from the Checkfirst API into OpenCTI as STIX 2.1 bundles.
+Ingest Checkfirst articles from the Checkfirst API into OpenCTI as STIX 2.1 bundles, tracking the Portal-Kombat / Pravda Network Russian influence operation.
This is an `EXTERNAL_IMPORT` connector that:
+- On first run, sends a one-off bundle of known Pravda network infrastructure (36 `pravda-XX.com` domains, 60+ `news-pravda.com` subdomains, shared hosting IP `178.21.15.85`) attributed to the Portal-Kombat intrusion set per SGDSN/VIGINUM reports (Feb + Apr 2024)
- Fetches articles from a paginated REST API (`Api-Key` header auth)
-- Maps each article to OpenCTI STIX objects: `Channel`, `Media-Content`, `URL`, and relationships
-- Channels sourced from `https://t.me/` are typed as `Telegram`; all others as `website`
-- Sends bundles via `helper.send_stix2_bundle`
+- Maps each article to STIX 2.1 objects and sends them in batches
- Persists page-based progress in OpenCTI connector state so reruns resume where they left off
- Records a `last_run` unix timestamp in state for operational visibility
+## STIX object model
+
+### First-run infrastructure bundle
+
+Sent once when `start_page == 1` (first ever run, or `CHECKFIRST_FORCE_REPROCESS=true`):
+
+```
+IntrusionSet (Portal-Kombat)
+ ← attributed-to ← Campaign 2023
+
+Campaign 2023
+ → uses → Infrastructure (pravda-XX.com) [per domain, start_time = first_observed]
+ → consists-of → DomainName (pravda-XX.com)
+ → consists-of → IPv4Address (178.21.15.85, stop_time = 2024-12-31)
+ DomainName (news-pravda.com subdomain)
+ → related-to → Infrastructure (pravda-XX.com)
+```
+
+### Per-article ingestion
+
+For each article row fetched from the API:
+
+```
+Campaign YYYY (per-year, first_seen = YYYY-01-01, special-cased 2023-09-01)
+ ← attributed-to ← IntrusionSet (Portal-Kombat)
+ → uses → Infrastructure (article domain)
+ → consists-of → DomainName (article domain)
+ → uses → Channel/website (article domain) [start_time = publication date]
+ → related-to → Infrastructure (article domain)
+ → publishes → Media-Content (article)
+ → related-to → Channel/source (Telegram or website origin)
+ DomainName (article domain)
+ → related-to → Channel/website (article domain)
+ Media-Content (article)
+ → related-to → Channel/source
+ → related-to → URL (alternate URLs, if any)
+ Channel/website (article domain)
+ → related-to → DomainName (pravda-XX.com parent, if subdomain of news-pravda.com)
+```
+
+All STIX IDs are deterministic — reruns produce no duplicates.
+
## Requirements
- A running OpenCTI stack (platform + worker) at a version matching `pycti` in `src/requirements.txt`
@@ -42,7 +83,7 @@ All settings can be provided as environment variables or via `config.yml` (see `
| `CHECKFIRST_API_ENDPOINT` | Endpoint path | `/v1/articles` |
| `CHECKFIRST_SINCE` | Only ingest articles published on or after this date. Accepts ISO 8601 absolute dates (`2024-01-01T00:00:00Z`) or durations relative to now (`P365D`, `P1Y`, `P6M`, `P4W`) | `P365D` |
| `CHECKFIRST_TLP_LEVEL` | TLP marking applied to all created objects (`clear`, `green`, `amber`, `amber+strict`, `red`) | `clear` |
-| `CHECKFIRST_FORCE_REPROCESS` | Ignore saved state and restart from page 1 | `false` |
+| `CHECKFIRST_FORCE_REPROCESS` | Ignore saved state and restart from page 1 (also re-sends the infrastructure bundle) | `false` |
| `CHECKFIRST_MAX_ROW_BYTES` | Skip API rows exceeding this approximate byte size | unset |
See `.env.sample` for a ready-to-use local template.
@@ -83,15 +124,23 @@ See `.env.sample` for a ready-to-use local template.
- **Data > Connectors** — confirm the connector registers and shows as active
- **Data > Ingestion** — confirm a new work item is created and completes
-- Search for ingested objects:
- - `Media-Content` entities with `publication_date`
+- After first run, search for:
+ - 36 `Domain-Name` observables for `pravda-XX.com` domains
+ - 36 `Infrastructure` objects wrapping those domains
+ - 1 `IPv4-Addr` observable `178.21.15.85`
+ - 60+ `Domain-Name` observables for `news-pravda.com` subdomains
+ - `Campaign` objects per year (`Portal-Kombat 2023`, `Portal-Kombat 2024`, …)
+ - `IntrusionSet` — Portal-Kombat
+- Per article:
+ - `Media-Content` with `publication_date`
- `Channel` entities (type `Telegram` or `website`)
- - `URL` observables
- - Relationships: `publishes`, `related-to`, `attributed-to`
+ - `Infrastructure` wrapping the publishing domain
+ - Relationships: `uses`, `consists-of`, `publishes`, `related-to`
## Notes
- STIX IDs are deterministic — reruns do not create duplicate entities.
+- The infrastructure bundle is sent only when `start_page == 1`. Set `CHECKFIRST_FORCE_REPROCESS=true` to resend it.
- The connector saves the last successfully processed API page in OpenCTI state; on restart it resumes from the next page.
- The `since` filter is resolved to an absolute UTC datetime at connector startup; duration strings like `P365D` are supported for convenience.
-- API requests use a 300-second timeout per page. The CheckFirst infrastructure can be slow to respond on large result pages, so a generous timeout is used to avoid spurious network errors.
+- API requests use a 300-second timeout per page. The Checkfirst infrastructure can be slow to respond on large result pages.
diff --git a/external-import/checkfirst-import-connector/src/checkfirst_dataset/main_logic.py b/external-import/checkfirst-import-connector/src/checkfirst_dataset/main_logic.py
index 6115c76a0dc..d6fecf47cac 100644
--- a/external-import/checkfirst-import-connector/src/checkfirst_dataset/main_logic.py
+++ b/external-import/checkfirst-import-connector/src/checkfirst_dataset/main_logic.py
@@ -1,4 +1,12 @@
+"""Orchestration logic for a single Checkfirst ingestion pass.
+
+Coordinates API pagination, STIX object creation, bundle assembly, and
+state persistence. On the first run it also sends a one-off infrastructure
+bundle covering all known Pravda network domains and their shared hosting IP.
+"""
+
from datetime import datetime, timezone
+from urllib.parse import urlparse
from checkfirst_dataset.alternates import parse_alternates
from checkfirst_dataset.api_reader import iter_api_rows
@@ -6,7 +14,9 @@
from checkfirst_dataset.reporting import RunReport, SkipReason
from checkfirst_dataset.state import load_state_from_helper, save_state_to_helper
from connector.converter_to_stix import ConverterToStix
+from connector.pravda_network import SUBDOMAIN_TO_DOMAIN
from connector.settings import ConnectorSettings
+from pycti import OpenCTIConnectorHelper
BUNDLE_SIZE = 1000
@@ -15,16 +25,116 @@ class BundleSendError(Exception):
pass
+def _send_infrastructure_bundle(
+ helper: OpenCTIConnectorHelper, converter: ConverterToStix, work_id: str
+) -> None:
+ """Send a one-off bundle of known Pravda network infrastructure objects.
+
+ Called only when starting from page 1 (first run or force_reprocess).
+ Relationships per domain:
+ - Campaign 2023 → attributed-to → IntrusionSet
+ - Campaign 2023 → uses [first_observed] → Infrastructure
+ - Infrastructure → consists-of → DomainName
+ - Infrastructure → consists-of → IPv4Address (with stop_time)
+ - Subdomain [first_observed] → related-to → Infrastructure
+ """
+ from connector.pravda_network import PRAVDA_DOMAINS, PRAVDA_IP
+
+ objects: list = [
+ converter.infrastructure_campaign,
+ converter.infrastructure_campaign_attributed_to_ims,
+ ]
+
+ ip_obj = converter.create_ipv4_address(
+ value=PRAVDA_IP["IP"],
+ first_seen=PRAVDA_IP["first_seen"],
+ last_seen=PRAVDA_IP["last_seen"],
+ )
+ objects.append(ip_obj)
+
+ for entry in PRAVDA_DOMAINS:
+ first_observed = entry["first_observed"]
+
+ infra_obj = converter.create_infrastructure(
+ name=entry["domain"],
+ first_seen=first_observed,
+ )
+ objects.append(infra_obj)
+
+ domain_obj = converter.create_domain_name(
+ value=entry["domain"],
+ first_seen=first_observed,
+ )
+ objects.append(domain_obj)
+
+ # Campaign → uses → Infrastructure
+ objects.append(
+ converter.create_relationship(
+ source_id=converter.infrastructure_campaign.id,
+ relationship_type="uses",
+ target_id=infra_obj.id,
+ start_time=first_observed,
+ )
+ )
+
+ # Infrastructure → consists-of → DomainName
+ objects.append(
+ converter.create_relationship(
+ source_id=infra_obj.id,
+ relationship_type="consists-of",
+ target_id=domain_obj.id,
+ start_time=first_observed,
+ )
+ )
+
+ # Infrastructure → consists-of → IPv4Address (with temporal bounds)
+ objects.append(
+ converter.create_relationship(
+ source_id=infra_obj.id,
+ relationship_type="consists-of",
+ target_id=ip_obj.id,
+ start_time=first_observed,
+ stop_time=PRAVDA_IP["last_seen"],
+ )
+ )
+
+ # Subdomains → related-to → Infrastructure
+ for subdomain in entry.get("subdomains", []):
+ sub_obj = converter.create_domain_name(
+ value=subdomain,
+ first_seen=first_observed,
+ )
+ objects.append(sub_obj)
+ objects.append(
+ converter.create_relationship(
+ source_id=sub_obj.id,
+ relationship_type="related-to",
+ target_id=infra_obj.id,
+ start_time=first_observed,
+ )
+ )
+
+ _send_bundle(helper, converter, objects, work_id)
+
+
def _send_bundle(
- helper, converter: ConverterToStix, objects: list, work_id: str
+ helper: OpenCTIConnectorHelper,
+ converter: ConverterToStix,
+ objects: list,
+ work_id: str,
) -> None:
- """Assemble and send a STIX bundle via the helper."""
- stix_objects = list(objects) + [
+ """Assemble and send a STIX bundle via the helper, deduplicating by ID."""
+ seen_ids: set[str] = set()
+ unique: list = []
+ for obj in objects:
+ if obj.id not in seen_ids:
+ seen_ids.add(obj.id)
+ unique.append(obj)
+
+ stix_objects = unique + [
converter.tlp_marking,
converter.author,
converter.intrusion_set,
- converter.campaign,
- converter.campaign_attributed_to_ims,
]
bundle = helper.stix2_create_bundle(stix_objects)
helper.send_stix2_bundle(
@@ -34,10 +144,11 @@ def _send_bundle(
)
-def run_once(helper, settings: ConnectorSettings) -> None:
+def run_once(helper: OpenCTIConnectorHelper, settings: ConnectorSettings) -> None:
"""Run a single ingestion pass.
- - Fetches data from the API endpoint.
+ - On first run (page 1): sends the Pravda network infrastructure bundle.
+ - Fetches article data from the API endpoint.
- Builds STIX objects and sends them in bundles of BUNDLE_SIZE rows.
- Updates state after each successfully sent bundle.
"""
@@ -78,6 +189,12 @@ def run_once(helper, settings: ConnectorSettings) -> None:
run_name = f"{helper.connect_name} - {now.isoformat()}"
work_id = helper.api.work.initiate_work(helper.connect_id, run_name)
+ if start_page == 1:
+ helper.connector_logger.info(
+ "Sending Pravda network infrastructure bundle (first run)"
+ )
+ _send_infrastructure_bundle(helper, converter, work_id)
+
bundle_objects: list[object] = []
rows_in_bundle = 0
rows_yielded = 0
@@ -102,61 +219,145 @@ def run_once(helper, settings: ConnectorSettings) -> None:
try:
published_dt = parse_publication_date(row.publication_date)
+ year = published_dt.year
+
+ # --- Per-year campaign (deterministic, cached) ---
+ year_campaign, year_campaign_attributed = (
+ converter.get_campaign_for_year(year)
+ )
+
+ # --- Domain observable (extracted from article URL) ---
+ article_domain = urlparse(row.url).netloc
+ domain_obj = converter.create_domain_name(value=article_domain)
- channel = converter.create_channel(
+ # --- Infrastructure wrapping the publishing domain ---
+ infra_obj = converter.create_infrastructure(
+ name=article_domain,
+ first_seen=published_dt,
+ )
+
+ # --- Channel as website (the publishing domain/subdomain) ---
+ channel_website = converter.create_channel(
+ name=article_domain,
+ source_url=row.url,
+ )
+
+ # --- Source as Channel (Telegram or website origin) ---
+ source_channel = converter.create_channel(
name=row.source_title,
source_url=row.source_url,
)
- media_content = converter.create_media_content(
+
+ # --- Content (article) ---
+ content = converter.create_media_content(
title=row.og_title,
description=row.og_description,
url=row.url,
publication_date=published_dt,
)
- source_url_obj = converter.create_url(value=row.source_url)
+ # --- Relationships ---
+ # Campaign → uses → Infrastructure
+ campaign_uses_infra = converter.create_relationship(
+ source_id=year_campaign.id,
+ relationship_type="uses",
+ target_id=infra_obj.id,
+ )
+ # Campaign → uses → Channel as website
+ campaign_uses_channel = converter.create_relationship(
+ source_id=year_campaign.id,
+ relationship_type="uses",
+ target_id=channel_website.id,
+ start_time=published_dt,
+ )
+ # Infrastructure → consists-of → DomainName
+ infra_consists_of_domain = converter.create_relationship(
+ source_id=infra_obj.id,
+ relationship_type="consists-of",
+ target_id=domain_obj.id,
+ )
+ # Channel as website → related-to → Infrastructure
+ channel_related_to_infra = converter.create_relationship(
+ source_id=channel_website.id,
+ relationship_type="related-to",
+ target_id=infra_obj.id,
+ start_time=published_dt,
+ )
+ # DomainName → related-to → Channel as website
+ domain_related_to_channel = converter.create_relationship(
+ source_id=domain_obj.id,
+ relationship_type="related-to",
+ target_id=channel_website.id,
+ start_time=published_dt,
+ )
+ # Channel as website → publishes → Content
publishes = converter.create_relationship(
- source_id=channel.id,
+ source_id=channel_website.id,
relationship_type="publishes",
- target_id=media_content.id,
+ target_id=content.id,
start_time=published_dt,
)
- related_to_source = converter.create_relationship(
- source_id=channel.id,
+ # Channel as website → related-to → Source as Channel
+ channel_uses_source = converter.create_relationship(
+ source_id=channel_website.id,
relationship_type="related-to",
- target_id=source_url_obj.id,
- )
- ims_uses_channel = converter.create_relationship(
- source_id=converter.intrusion_set.id,
- relationship_type="uses",
- target_id=channel.id,
+ target_id=source_channel.id,
+ start_time=published_dt,
)
- campaign_uses_channel = converter.create_relationship(
- source_id=converter.campaign.id,
- relationship_type="uses",
- target_id=channel.id,
+ # Content → related-to → Source as Channel
+ content_related_to_source = converter.create_relationship(
+ source_id=content.id,
+ relationship_type="related-to",
+ target_id=source_channel.id,
+ start_time=published_dt,
)
-
bundle_objects.extend(
[
- channel,
- media_content,
- source_url_obj,
- publishes,
- related_to_source,
- ims_uses_channel,
+ year_campaign,
+ year_campaign_attributed,
+ domain_obj,
+ infra_obj,
+ channel_website,
+ source_channel,
+ content,
+ campaign_uses_infra,
campaign_uses_channel,
+ infra_consists_of_domain,
+ channel_related_to_infra,
+ domain_related_to_channel,
+ publishes,
+ channel_uses_source,
+ content_related_to_source,
]
)
+ # If the article domain is a known news-pravda.com subdomain,
+ # link the Channel as website to its parent pravda-XX.com domain.
+ parent_domain_str = SUBDOMAIN_TO_DOMAIN.get(article_domain)
+ if parent_domain_str:
+ parent_domain_obj = converter.create_domain_name(
+ value=parent_domain_str
+ )
+ bundle_objects.append(parent_domain_obj)
+ bundle_objects.append(
+ converter.create_relationship(
+ source_id=channel_website.id,
+ relationship_type="related-to",
+ target_id=parent_domain_obj.id,
+ start_time=published_dt,
+ )
+ )
+
+ # Content → related-to → alternate URLs
for alt in parse_alternates(row.alternates):
alt_url = converter.create_url(value=alt)
- rel = converter.create_relationship(
- source_id=media_content.id,
+ alt_rel = converter.create_relationship(
+ source_id=content.id,
relationship_type="related-to",
target_id=alt_url.id,
+ start_time=published_dt,
)
- bundle_objects.extend([alt_url, rel])
+ bundle_objects.extend([alt_url, alt_rel])
except DateParseError as exc:
report.skip(SkipReason.ROW_INVALID_PUBLICATION_DATE)
@@ -167,7 +368,7 @@ def run_once(helper, settings: ConnectorSettings) -> None:
continue
except Exception as exc: # noqa: BLE001
report.skip(SkipReason.ROW_MAPPING_ERROR)
- helper.connector_logger.error(
+ helper.connector_logger.warning(
"Skip row (mapping error)",
{"row": row.row_number, "error": str(exc)},
)
diff --git a/external-import/checkfirst-import-connector/src/checkfirst_dataset/types.py b/external-import/checkfirst-import-connector/src/checkfirst_dataset/types.py
index 5faedeeec87..20f5141c2d2 100644
--- a/external-import/checkfirst-import-connector/src/checkfirst_dataset/types.py
+++ b/external-import/checkfirst-import-connector/src/checkfirst_dataset/types.py
@@ -1,3 +1,5 @@
+"""Protocol definitions for duck-typed interfaces used by API helpers."""
+
from typing import Protocol
diff --git a/external-import/checkfirst-import-connector/src/connector/connector.py b/external-import/checkfirst-import-connector/src/connector/connector.py
index 100f4c4dd89..fe2c54fed47 100644
--- a/external-import/checkfirst-import-connector/src/connector/connector.py
+++ b/external-import/checkfirst-import-connector/src/connector/connector.py
@@ -1,3 +1,5 @@
+"""Entry point class for the Checkfirst external-import connector."""
+
import sys
from checkfirst_dataset.main_logic import run_once
@@ -10,7 +12,7 @@ class CheckfirstImportConnector:
This follows the standard external-import connector template:
- `process_message()` does one ingestion pass
- - `run()` schedules runs via `OpenCTIConnectorHelper.schedule_process()`
+ - `run()` schedules runs via `OpenCTIConnectorHelper.schedule_iso()`
The actual API ingestion + STIX mapping is implemented under
`checkfirst_dataset/` and reused here.
@@ -35,7 +37,7 @@ def process_message(self) -> None:
def run(self) -> None:
"""Start the connector using the standard scheduler."""
- self.helper.schedule_process(
+ self.helper.schedule_iso(
message_callback=self.process_message,
- duration_period=self.config.connector.duration_period.total_seconds(),
+ duration_period=self.config.connector.duration_period,
)
diff --git a/external-import/checkfirst-import-connector/src/connector/converter_to_stix.py b/external-import/checkfirst-import-connector/src/connector/converter_to_stix.py
index 4431ad6ac5b..5bb52fe9748 100644
--- a/external-import/checkfirst-import-connector/src/connector/converter_to_stix.py
+++ b/external-import/checkfirst-import-connector/src/connector/converter_to_stix.py
@@ -15,6 +15,7 @@
Channel,
CustomObjectChannel,
CustomObservableMediaContent,
+ Infrastructure,
IntrusionSet,
OpenCTIConnectorHelper,
StixCoreRelationship,
@@ -47,13 +48,15 @@ def __init__(
self.tlp_marking_id = self.tlp_marking.id
self.intrusion_set = self._create_intrusion_set()
- self.campaign = self._create_campaign()
- self.campaign_attributed_to_ims = self.create_relationship(
- source_id=self.campaign.id,
+ self.infrastructure_campaign = self._create_campaign(year=2023)
+ self.infrastructure_campaign_attributed_to_ims = self.create_relationship(
+ source_id=self.infrastructure_campaign.id,
relationship_type="attributed-to",
target_id=self.intrusion_set.id,
)
+ self._campaign_cache: dict[int, tuple[stix2.Campaign, stix2.Relationship]] = {}
+
def _create_intrusion_set(self) -> stix2.IntrusionSet:
return stix2.IntrusionSet(
id=IntrusionSet.generate_id(name="Pravda Network"),
@@ -63,6 +66,7 @@ def _create_intrusion_set(self) -> stix2.IntrusionSet:
"influence operations through a network of 190+ websites"
),
aliases=["Portal-Kombat", "Pravda Network IMS"],
+ first_seen="2023-06-24T00:00:00Z",
goals=[
"Undermine Western unity",
"Promote Russian narratives",
@@ -75,18 +79,20 @@ def _create_intrusion_set(self) -> stix2.IntrusionSet:
allow_custom=True,
)
- def _create_campaign(self) -> stix2.Campaign:
+ def _create_campaign(self, year: int) -> stix2.Campaign:
+ name = f"Pravda Network Campaigns {year}"
+ first_seen = (
+ "2023-09-01T00:00:00Z" if year == 2023 else f"{year}-01-01T00:00:00Z"
+ )
return stix2.Campaign(
- id=Campaign.generate_id(
- name="Pravda Network Campaigns",
- ),
- name="Pravda Network Campaigns",
+ id=Campaign.generate_id(name=name),
+ name=name,
description=(
"Coordinated FIMI campaign spreading pro-Russian narratives "
"across multiple countries and languages"
),
- aliases=["Portal-Kombat Campaign", "Pravda"],
- first_seen="2023-09-01T00:00:00Z",
+ aliases=[f"Portal-Kombat Campaign {year}", f"Pravda {year}"],
+ first_seen=first_seen,
objective=(
"Manipulate public opinion, undermine trust in Western "
"institutions, justify Russian actions"
@@ -96,6 +102,20 @@ def _create_campaign(self) -> stix2.Campaign:
allow_custom=True,
)
+ def get_campaign_for_year(
+ self, year: int
+ ) -> tuple[stix2.Campaign, stix2.Relationship]:
+ """Return (Campaign, attributed-to Relationship) for the given year, cached."""
+ if year not in self._campaign_cache:
+ campaign = self._create_campaign(year=year)
+ attributed_to = self.create_relationship(
+ source_id=campaign.id,
+ relationship_type="attributed-to",
+ target_id=self.intrusion_set.id,
+ )
+ self._campaign_cache[year] = (campaign, attributed_to)
+ return self._campaign_cache[year]
+
def create_channel(
self, *, name: str, source_url: str | None = None
) -> CustomObjectChannel:
@@ -109,7 +129,7 @@ def create_channel(
channel = CustomObjectChannel(
id=Channel.generate_id(name=name),
name=name,
- channel_types=["Telegram"] if is_telegram else ["website"],
+ channel_types=["channel"] if is_telegram else ["website"],
created_by_ref=self.author_id,
object_marking_refs=[self.tlp_marking_id],
external_references=external_refs,
@@ -137,6 +157,53 @@ def create_media_content(
)
return media
+ def create_domain_name(
+ self, *, value: str, first_seen: str | None = None
+ ) -> stix2.DomainName:
+ custom: dict = {"x_opencti_created_by_ref": self.author_id}
+ if first_seen:
+ custom["x_opencti_first_seen_at"] = first_seen
+ return stix2.DomainName(
+ value=value,
+ object_marking_refs=[self.tlp_marking_id],
+ custom_properties=custom,
+ )
+
+ def create_ipv4_address(
+ self,
+ *,
+ value: str,
+ first_seen: str | None = None,
+ last_seen: str | None = None,
+ ) -> stix2.IPv4Address:
+ custom: dict = {"x_opencti_created_by_ref": self.author_id}
+ if first_seen:
+ custom["x_opencti_first_seen_at"] = first_seen
+ if last_seen:
+ custom["x_opencti_last_seen_at"] = last_seen
+ return stix2.IPv4Address(
+ value=value,
+ object_marking_refs=[self.tlp_marking_id],
+ custom_properties=custom,
+ )
+
+ def create_infrastructure(
+ self, *, name: str, first_seen: datetime | str | None = None
+ ) -> stix2.Infrastructure:
+ custom: dict = {"x_opencti_created_by_ref": self.author_id}
+ if first_seen:
+ custom["x_opencti_first_seen_at"] = first_seen
+ return stix2.Infrastructure(
+ id=Infrastructure.generate_id(name=name),
+ name=name,
+ infrastructure_types=["hosting-infrastructure"],
+ first_seen=first_seen,
+ created_by_ref=self.author_id,
+ object_marking_refs=[self.tlp_marking_id],
+ custom_properties=custom,
+ allow_custom=True,
+ )
+
def create_url(self, *, value: str) -> stix2.URL:
return stix2.URL(
value=value,
@@ -153,6 +220,7 @@ def create_relationship(
relationship_type: str,
target_id: str,
start_time: datetime | None = None,
+ stop_time: datetime | None = None,
) -> stix2.Relationship:
rel = stix2.Relationship(
id=StixCoreRelationship.generate_id(
@@ -167,5 +235,6 @@ def create_relationship(
object_marking_refs=[self.tlp_marking_id],
allow_custom=True,
start_time=start_time,
+ stop_time=stop_time,
)
return rel
diff --git a/external-import/checkfirst-import-connector/src/connector/pravda_network.py b/external-import/checkfirst-import-connector/src/connector/pravda_network.py
new file mode 100644
index 00000000000..bce065a80df
--- /dev/null
+++ b/external-import/checkfirst-import-connector/src/connector/pravda_network.py
@@ -0,0 +1,224 @@
+"""Static Pravda network infrastructure data.
+
+All 36 domains share the same hosting IP (AS49352, Russia).
+"""
+
+PRAVDA_IP = {
+ "IP": "178.21.15.85",
+ "first_seen": "2023-09-01T00:00:00Z",
+ "last_seen": "2024-12-31T00:00:00Z",
+}
+
+PRAVDA_DOMAINS = [
+ {
+ "domain": "pravda-de.com",
+ "first_observed": "2023-06-24T00:00:00Z",
+ "subdomains": [
+ "deutsch.news-pravda.com",
+ "germany.news-pravda.com",
+ "austria.news-pravda.com",
+ "switzerland.news-pravda.com",
+ ],
+ },
+ {
+ "domain": "pravda-en.com",
+ "first_observed": "2023-06-24T00:00:00Z",
+ "subdomains": [
+ "news-pravda.com",
+ "uk.news-pravda.com",
+ "usa.news-pravda.com",
+ ],
+ },
+ {
+ "domain": "pravda-es.com",
+ "first_observed": "2023-06-24T00:00:00Z",
+ "subdomains": [
+ "spanish.news-pravda.com",
+ "spain.news-pravda.com",
+ ],
+ },
+ {
+ "domain": "pravda-fr.com",
+ "first_observed": "2023-08-01T00:00:00Z",
+ "subdomains": [
+ "francais.news-pravda.com",
+ "france.news-pravda.com",
+ ],
+ },
+ {
+ "domain": "pravda-pl.com",
+ "first_observed": "2023-06-24T00:00:00Z",
+ "subdomains": ["poland.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-nl.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": [
+ "dutch.news-pravda.com",
+ "netherlands.news-pravda.com",
+ "belgium.news-pravda.com",
+ ],
+ },
+ {
+ "domain": "pravda-dk.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": ["denmark.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-se.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": ["sweden.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-fi.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": ["finland.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-ee.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": ["estonia.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-lt.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": ["lt.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-lv.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": ["latvia.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-cz.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": ["czechia.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-sk.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": ["slovakia.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-si.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": ["slovenia.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-hr.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": ["croatia.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-hu.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": ["hungary.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-ro.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": ["romania.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-bg.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": ["bulgaria.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-gr.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": ["greece.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-cy.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": ["cyprus.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-it.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": ["italy.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-ie.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": ["ireland.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-pt.com",
+ "first_observed": "2024-03-20T00:00:00Z",
+ "subdomains": ["portuguese.news-pravda.com", "portugal.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-al.com",
+ "first_observed": "2024-03-26T00:00:00Z",
+ "subdomains": ["albania.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-ba.com",
+ "first_observed": "2024-03-26T00:00:00Z",
+ "subdomains": ["bosnia-herzegovina.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-mk.com",
+ "first_observed": "2024-03-26T00:00:00Z",
+ "subdomains": ["north-macedonia.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-md.com",
+ "first_observed": "2024-03-26T00:00:00Z",
+ "subdomains": ["md.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-rs.com",
+ "first_observed": "2024-03-26T00:00:00Z",
+ "subdomains": ["serbia.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-no.com",
+ "first_observed": "2024-03-26T00:00:00Z",
+ "subdomains": ["norway.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-cf.com",
+ "first_observed": "2024-03-26T00:00:00Z",
+ "subdomains": ["rca.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-bf.com",
+ "first_observed": "2024-03-26T00:00:00Z",
+ "subdomains": ["burkina-faso.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-ne.com",
+ "first_observed": "2024-03-26T00:00:00Z",
+ "subdomains": ["niger.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-jp.com",
+ "first_observed": "2024-03-26T00:00:00Z",
+ "subdomains": ["japan.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-tw.com",
+ "first_observed": "2024-03-26T00:00:00Z",
+ "subdomains": ["taiwan.news-pravda.com"],
+ },
+ {
+ "domain": "pravda-ko.com",
+ "first_observed": "2024-03-26T00:00:00Z",
+ "subdomains": [
+ "korea.news-pravda.com",
+ "south-korea.news-pravda.com",
+ "dprk.news-pravda.com",
+ ],
+ },
+]
+
+# Reverse lookup: news-pravda.com subdomain → parent pravda-XX.com domain
+SUBDOMAIN_TO_DOMAIN: dict[str, str] = {
+ sub: entry["domain"] for entry in PRAVDA_DOMAINS for sub in entry["subdomains"]
+}
+
+# Set of all known pravda-XX.com domain values
+PRAVDA_DOMAIN_VALUES: set[str] = {entry["domain"] for entry in PRAVDA_DOMAINS}
diff --git a/external-import/checkfirst-import-connector/src/connector/settings.py b/external-import/checkfirst-import-connector/src/connector/settings.py
index 4980de10f5a..48e3e922469 100644
--- a/external-import/checkfirst-import-connector/src/connector/settings.py
+++ b/external-import/checkfirst-import-connector/src/connector/settings.py
@@ -1,3 +1,5 @@
+"""Pydantic settings models for the Checkfirst connector."""
+
import re
from datetime import datetime, timedelta, timezone
from typing import Literal
@@ -37,6 +39,7 @@ class CheckfirstConfig(BaseConfigModel):
@field_validator("api_url")
@classmethod
def _strip_trailing_slash(cls, v: str) -> str:
+ """Strip trailing slashes from the API URL."""
return v.rstrip("/")
api_key: str = Field(
diff --git a/external-import/checkfirst-import-connector/src/main.py b/external-import/checkfirst-import-connector/src/main.py
index 2dbf3bd6f32..3fd4917252b 100644
--- a/external-import/checkfirst-import-connector/src/main.py
+++ b/external-import/checkfirst-import-connector/src/main.py
@@ -1,3 +1,5 @@
+"""Connector entry point: initialises settings, helper, and starts the run loop."""
+
import traceback
from connector.connector import CheckfirstImportConnector
diff --git a/external-import/doppel/README.md b/external-import/doppel/README.md
index aff3d45f1fa..d16447a1359 100644
--- a/external-import/doppel/README.md
+++ b/external-import/doppel/README.md
@@ -26,30 +26,15 @@
## Introduction
-**Doppel** is a modern Digital Risk Protection solution that detects phishing and brand cyberattacks across channels like social media, domains, ads, and the dark web. By identifying malicious content and threats early, Doppel helps organizations proactively remove digital risks.
-
-This connector integrates OpenCTI with the Doppel platform by fetching alerts from the Doppel API and importing them as STIX 2.1 Indicators. Each alert is converted into an Indicator object enriched with metadata such as severity, entity state, platform, audit logs, and more.
-
-More information: [https://www.doppel.com](https://www.doppel.com)
-
-The connector leverages the OpenCTI connector scheduler to import Doppel alerts periodically and create corresponding entities in OpenCTI.
-
-```mermaid
-flowchart LR
- A[Doppel API] -->|fetch alerts periodically| B(Connector)
- B --> C{Convert to STIX bundle}
- C -->|dispatch| D1[worker]
- C -->|dispatch| D2[worker]
- C -->|dispatch| D3[worker]
- D1 & D2 & D3 -->|ingest| E(OpenCTI)
-```
+This connector fetches alerts from the Doppel API and imports them into OpenCTI as Observables. Each alert is mapped to a
+STIX 2.1 Observable object, enriched with metadata such as severity, entity state, platform, audit logs, etc.
## Installation
### Requirements
-- OpenCTI Platform >= 6.5.1
-- Doppel API access (API Key required, User API Key optional)
+- OpenCTI Platform version >= 6.x
+- Doppel API access (URL + API Key + User API Key (optional) + Organization Code (optional))
## Configuration variables
@@ -78,19 +63,18 @@ Below are the parameters you'll need to set for running the connector properly:
### Connector extra parameters environment variables
-Below are the parameters you'll need to set for the Doppel connector:
-
-| Parameter | config.yml `doppel` | Docker environment variable | Default | Mandatory | Description |
-|-------------------------|--------------------------|----------------------------------|---------------------------|-----------|-----------------------------------------------------------------------------------|
-| API Base URL | `api_base_url` | `DOPPEL_API_BASE_URL` | https://api.doppel.com/v1 | No | The Doppel API base URL. |
-| API Key | `api_key` | `DOPPEL_API_KEY` | / | Yes | The Doppel API key for authentication. |
-| User API Key | `user_api_key` | `DOPPEL_USER_API_KEY` | / | No | The Doppel User API key (optional, for additional API access). |
-| Alerts Endpoint | `alerts_endpoint` | `DOPPEL_ALERTS_ENDPOINT` | /alerts | No | The API endpoint path for fetching alerts. |
-| Historical Polling Days | `historical_polling_days`| `DOPPEL_HISTORICAL_POLLING_DAYS` | 30 | No | Number of days to fetch data for on the first run. |
-| Max Retries | `max_retries` | `DOPPEL_MAX_RETRIES` | 3 | No | Maximum number of retry attempts on API errors. |
-| Retry Delay | `retry_delay` | `DOPPEL_RETRY_DELAY` | 30 | No | Delay in seconds between retry attempts. |
-| TLP Level | `tlp_level` | `DOPPEL_TLP_LEVEL` | clear | No | TLP marking for created STIX objects (`clear`, `white`, `green`, `amber`, `amber+strict`, `red`). |
-| Page Size | `page_size` | `DOPPEL_PAGE_SIZE` | 100 | No | Number of alerts to fetch per API request. |
+| Parameter | config.yml | Docker environment variable | Role | Default | Mandatory | Description |
+|-------------------------|--------------------------------|----------------------------------|---------|---------|-----------|---------------------------------------|
+| API base URL | doppel.api_base_url | `DOPPEL_API_BASE_URL` | Connectivity: Defines the network entry point for all API requests. | https://api.doppel.com/v1 | Yes | Doppel API base URL |
+| API key | doppel.api_key | `DOPPEL_API_KEY` | Authentication: Provides the primary security credentials for service access. | | Yes | Doppel API key |
+| User API key | doppel.user_api_key | `DOPPEL_USER_API_KEY` | Authorization: Used for user-specific identity. | | No | Doppel User API key |
+| Organization Code | doppel.organization_code | `DOPPEL_ORGANIZATION_CODE` | Scope: Identifies the specific organizational workspace for multi-tenant keys. | | No | Organization Code for Doppel API Keys |
+| Alerts endpoint | doppel.alerts_endpoint | `DOPPEL_ALERTS_ENDPOINT` | Routing: Specifies the API resource path for alert ingestion. | /alerts | Yes | API endpoint for fetching alerts |
+| Historical polling days | doppel.historical_polling_days | `DOPPEL_HISTORICAL_POLLING_DAYS` | Synchronization: Determines the time-window for initial data fetching. | 30 | No | Days of data to fetch on first run |
+| Max retries | doppel.max_retries | `DOPPEL_MAX_RETRIES` | Resilience: Configures automated error recovery from transient failures. | 3 | No | Retry attempts on API errors |
+| Retry delay (seconds) | doppel.retry_delay | `DOPPEL_RETRY_DELAY` | Rate Management: Controls the frequency of requests during error recovery. | 30 | No | Delay between retry attempts |
+| TLP Level | doppel.tlp_level | `DOPPEL_TLP_LEVEL` | Data Governance: Assigns sensitivity markings for downstream sharing. | clear | No | TLP marking for created STIX objects. |
+| Page size | doppel.page_size | `DOPPEL_PAGE_SIZE` | Performance: Optimizes request volume and memory usage per fetch. | 100 | No | Number of alerts to fetch per request |
## Deployment
@@ -102,8 +86,29 @@ Build a Docker Image using the provided `Dockerfile`.
Example:
-```shell
-docker build . -t opencti/connector-doppel:latest
+3. Register connector in the **main** OpenCTI `docker-compose.yml`:
+
+```yaml
+ connector-doppel:
+ image: opencti/connector-doppel:latest
+ environment:
+ - OPENCTI_URL=http://opencti:8080
+ - OPENCTI_TOKEN=changeme
+ - CONNECTOR_ID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+ - CONNECTOR_NAME=Doppel Threat Intelligence
+ - CONNECTOR_SCOPE=Indicator
+ - CONNECTOR_LOG_LEVEL=info
+ - CONNECTOR_DURATION_PERIOD=PT1H
+ - DOPPEL_API_BASE_URL=https://api.doppel.com
+ - DOPPEL_API_KEY=changeme
+ - DOPPEL_USER_API_KEY=changeme
+ - DOPPEL_ORGANIZATION_CODE=changeme
+ - DOPPEL_ALERTS_ENDPOINT=/v1/alerts
+ - DOPPEL_HISTORICAL_POLLING_DAYS=30
+ - DOPPEL_MAX_RETRIES=3
+ - DOPPEL_RETRY_DELAY=30
+ - DOPPEL_TLP_LEVEL=clear
+ restart: always
```
Make sure to replace the environment variables in `docker-compose.yml` with the appropriate configurations for your environment. Then, start the docker container with the provided `docker-compose.yml`.
@@ -143,83 +148,11 @@ Find the "Doppel" connector, and click on the refresh button to reset the connec
## Behavior
-The connector fetches alerts from the Doppel API and converts them into STIX 2.1 Indicator objects.
-
-- **Initial population**: On the first run, the connector retrieves alerts from the last `DOPPEL_HISTORICAL_POLLING_DAYS` days (default: 30 days).
-- **Delta-based updates**: Subsequent runs fetch only new or updated alerts since the last run, based on `last_activity_timestamp`.
-- **Parallel fetching**: The connector uses multi-threaded pagination to efficiently fetch large volumes of alerts.
-
-### Mapping to OpenCTI entities
-
-The graph below describes all the different entities created by the connector in OpenCTI from Doppel alerts.
-
-```mermaid
-graph LR
- subgraph Doppel
- direction TB
- DoppelAlert[Alert]
- DoppelEntity[Entity
URL / IP / Email / Domain]
- DoppelMetadata[Metadata
severity, platform, state...]
- DoppelAuditLogs[Audit Logs]
- end
-
- subgraph OpenCTI
- direction TB
- subgraph "STIX Domain Objects"
- OpenCTIIdentity[Identity
Doppel Organization]
- OpenCTIIndicator[Indicator]
- end
- subgraph "STIX Meta Objects"
- OpenCTIMarking[TLP Marking]
- OpenCTIExternalRef[External Reference]
- end
- end
-
- %% Main transformation
- DoppelAlert ==> OpenCTIIndicator
- DoppelEntity -->|"pattern + name"| OpenCTIIndicator
- DoppelMetadata -->|"description + custom properties"| OpenCTIIndicator
- DoppelAuditLogs -->|"x_opencti_audit_logs"| OpenCTIIndicator
-
- %% References
- OpenCTIIdentity -.->|"created_by_ref"| OpenCTIIndicator
- OpenCTIMarking -.->|"object_marking_refs"| OpenCTIIndicator
- OpenCTIExternalRef -.->|"external_references"| OpenCTIIndicator
-```
-
-#### Doppel Alert to OpenCTI Indicator mapping
-
-| Doppel Alert Field | OpenCTI Indicator Field | Description |
-|---------------------------|-----------------------------------|------------------------------------------------------------|
-| `entity` | `name`, `pattern` | The entity value; pattern type detected automatically |
-| `created_at` | `created` | Timestamp when the alert was created |
-| `last_activity_timestamp` | `modified` | Timestamp of last activity on the alert |
-| `platform` | `description` | Platform where the threat was detected |
-| `entity_state` | `description` | Current state of the entity |
-| `queue_state` | `description` | Queue processing state |
-| `severity` | `description` | Alert severity level |
-| `entity_content` | `description` | JSON content of the entity (formatted) |
-| `score` | `x_opencti_score` | Risk score converted to 0-100 scale |
-| `brand` | `x_opencti_brand` | Associated brand being protected |
-| `product` | `x_mitre_platforms` | Platform/product value |
-| `source` | `x_opencti_source` | Source of the alert |
-| `notes` | `x_opencti_notes` | Additional notes on the alert |
-| `audit_logs` | `x_opencti_audit_logs` | Formatted audit log entries (timestamp: type - value) |
-| `doppel_link` | `external_references[].url` | Link to the alert in Doppel platform |
-| `id` | `external_references[].external_id` | Doppel alert ID |
-
-### Entity type detection
-
-The connector automatically detects the entity type from the alert value and creates the appropriate STIX pattern:
-
-| Entity Type | Detection Pattern | STIX Pattern Example | Observable Type |
-|---------------|--------------------------------------------|--------------------------------------------|-----------------|
-| URL | Starts with `http://` or `https://` | `[url:value = 'https://example.com']` | Url |
-| IPv4 Address | Matches `^(\d{1,3}\.){3}\d{1,3}$` | `[ipv4-addr:value = '192.168.1.1']` | IPv4-Addr |
-| Email Address | Matches `^[^@]+@[^@]+\.[^@]+$` | `[email-addr:value = 'user@example.com']` | Email-Addr |
-| Domain Name | Default (if no other pattern matches) | `[domain-name:value = 'example.com']` | Domain-Name |
-
-> **Note**: Domain names are the most common entity type for brand protection use cases and are used as the default when no other pattern matches.
+- Fetches alerts from Doppel API paginated by `last_activity_timestamp`
+- Converts each alert into a STIX 2.1 Observable object
+- Bundles and sends the STIX objects to OpenCTI
+- Includes platform, score, brand, audit logs, notes, etc. as `custom_properties`
+- On first run, fetches up to `HISTORICAL_POLLING_DAYS`; subsequent runs are delta-based
## Debugging
@@ -235,8 +168,7 @@ Set `CONNECTOR_LOG_LEVEL=debug` for verbose logging. Log output includes:
## Additional information
-- This connector creates only `Indicator` STIX Domain Objects (no Observables, Incidents, or other SDO types are created directly).
-- Each indicator includes the `x_opencti_main_observable_type` custom property to indicate the detected observable type.
-- The connector uses pattern-based deterministic ID generation (`PyCTIIndicator.generate_id(pattern=...)`) to ensure idempotent imports and avoid duplicates.
-- TLP marking is applied to all created indicators based on the `DOPPEL_TLP_LEVEL` configuration.
-- The connector creates a "Doppel" organization identity as the author of all imported indicators.
+- This connector strictly follows OpenCTI's standard STIX schema.
+- Custom properties like `x_opencti_brand`, `x_opencti_source` are preserved.
+- When queue_state is actioned/taken_down, Observables are converted to STIX 2.1 Indicators.
+- Supports safe reprocessing with unique `indicator_id` generation to avoid duplication.
diff --git a/external-import/doppel/docker-compose.yml b/external-import/doppel/docker-compose.yml
index 42ec34cc539..193890cc15b 100644
--- a/external-import/doppel/docker-compose.yml
+++ b/external-import/doppel/docker-compose.yml
@@ -18,6 +18,7 @@ services:
- DOPPEL_API_BASE_URL=https://api.doppel.com/v1
- DOPPEL_API_KEY=changeMe
- DOPPEL_USER_API_KEY=changeMe
+ - DOPPEL_ORGANIZATION_CODE=changeMe
- DOPPEL_TLP_LEVEL=clear
- DOPPEL_ALERTS_ENDPOINT=/alerts
- DOPPEL_HISTORICAL_POLLING_DAYS=30
diff --git a/external-import/doppel/src/config.yml.sample b/external-import/doppel/src/config.yml.sample
index 19808a4be9c..b5be92cf8d4 100644
--- a/external-import/doppel/src/config.yml.sample
+++ b/external-import/doppel/src/config.yml.sample
@@ -14,6 +14,7 @@ doppel:
api_base_url: 'https://api.doppel.com/v1'
api_key: 'changeMe'
user_api_key: 'changeMe'
+ organization_code: 'changeMe'
tlp_level: 'clear' # Options: clear, white, green, amber, amber+strict, red
alerts_endpoint: '/alerts'
historical_polling_days: 30
diff --git a/external-import/doppel/src/doppel/client_api.py b/external-import/doppel/src/doppel/client_api.py
index c3b3b77931f..0b09e2067a6 100644
--- a/external-import/doppel/src/doppel/client_api.py
+++ b/external-import/doppel/src/doppel/client_api.py
@@ -2,7 +2,14 @@
from typing import Any
import requests
-from tenacity import retry, stop_after_attempt, wait_fixed
+from doppel.constants import RETRYABLE_REQUEST_ERRORS
+from tenacity import (
+ retry,
+ retry_if_exception,
+ stop_after_attempt,
+ wait_exponential_jitter,
+ wait_fixed,
+)
class ConnectorClient:
@@ -18,10 +25,27 @@ def __init__(self, helper, config):
# Add user_api_key if provided
if self.config.user_api_key:
headers["x-user-api-key"] = self.config.user_api_key
+ if self.config.organization_code:
+ headers["x-organization-code"] = self.config.organization_code
self.session.headers.update(headers)
- @retry(wait=wait_fixed(5), stop=stop_after_attempt(3)) # Default fallback values
+ @staticmethod
+ def is_retryable_exception(exception):
+ if isinstance(exception, requests.HTTPError):
+ if exception.response.status_code in (429, 500, 502, 503, 504):
+ return True
+
+ if isinstance(exception, RETRYABLE_REQUEST_ERRORS):
+ return True
+ return False
+
+ @retry(
+ retry=retry_if_exception(is_retryable_exception),
+ wait=wait_exponential_jitter(initial=10, max=60, jitter=1),
+ stop=stop_after_attempt(5),
+ reraise=True,
+ )
def _request_data(self, api_url: str, params=None):
"""
Internal method to handle API requests
@@ -38,6 +62,12 @@ def _request_data(self, api_url: str, params=None):
{"url": api_url, "params": params},
)
raise
+ elif http_err.response.status_code == 429:
+ self.helper.connector_logger.warning(
+ "[API] Rate limited (429), retrying with backoff...",
+ {"url": api_url, "params": params},
+ )
+ raise
else:
try:
error_json = http_err.response.json()
diff --git a/external-import/doppel/src/doppel/config_loader.py b/external-import/doppel/src/doppel/config_loader.py
index ea3fecc124d..cc00c82b268 100644
--- a/external-import/doppel/src/doppel/config_loader.py
+++ b/external-import/doppel/src/doppel/config_loader.py
@@ -88,6 +88,12 @@ def _initialize_configurations(self) -> None:
"DOPPEL_USER_API_KEY", ["doppel", "user_api_key"], self.load
)
+ self.organization_code = get_config_variable(
+ "DOPPEL_ORGANIZATION_CODE",
+ ["doppel", "organization_code"],
+ self.load,
+ )
+
self.alerts_endpoint = get_config_variable(
"DOPPEL_ALERTS_ENDPOINT",
["doppel", "alerts_endpoint"],
diff --git a/external-import/doppel/src/doppel/constants.py b/external-import/doppel/src/doppel/constants.py
new file mode 100644
index 00000000000..60bd4485761
--- /dev/null
+++ b/external-import/doppel/src/doppel/constants.py
@@ -0,0 +1,8 @@
+import requests
+
+RETRYABLE_REQUEST_ERRORS = (
+ requests.Timeout,
+ requests.ConnectionError,
+)
+
+STIX_VERSION = "2.1"
diff --git a/external-import/doppel/src/doppel/converter_to_stix.py b/external-import/doppel/src/doppel/converter_to_stix.py
index b4143ddb5d4..f997b269587 100644
--- a/external-import/doppel/src/doppel/converter_to_stix.py
+++ b/external-import/doppel/src/doppel/converter_to_stix.py
@@ -1,12 +1,39 @@
-import json
-import re
+from datetime import datetime
+from uuid import NAMESPACE_URL, uuid5
+from doppel.constants import STIX_VERSION
+from doppel.stix_helpers import (
+ build_custom_properties,
+ build_description,
+ build_external_references,
+ build_labels,
+ calculate_priority,
+ is_reverted_state,
+ is_takedown_state,
+)
from doppel.utils import parse_iso_datetime
from pycti import Identity as PyCTIIdentity
-from pycti import Indicator as PyCTIIndicator
-from pycti import MarkingDefinition
-from stix2 import TLP_AMBER, TLP_GREEN, TLP_RED, TLP_WHITE, Identity, Indicator
+from pycti import Indicator as PyctiIndicator
+from pycti import MarkingDefinition as PyctiMarkingDefinition
+from pycti import Note as PyctiNote
+from pycti import StixCoreRelationship as PyctiStixCoreRelationship
+from pycti.utils.constants import CustomObservablePhoneNumber as PhoneNumber
+from stix2 import (
+ TLP_AMBER,
+ TLP_GREEN,
+ TLP_RED,
+ TLP_WHITE,
+ DomainName,
+ Grouping,
+ Identity,
+ Indicator,
+ IPv4Address,
+)
from stix2 import MarkingDefinition as Stix2MarkingDefinition
+from stix2 import (
+ Note,
+)
+from stix2 import Relationship as StixCoreRelationship
class ConverterToStix:
@@ -14,13 +41,6 @@ class ConverterToStix:
Provides methods for converting various types of input data into STIX 2.1 objects.
"""
- # URL regex pattern
- URL_PATTERN = re.compile(r"^https?://", re.IGNORECASE)
- # IPv4 regex pattern
- IPV4_PATTERN = re.compile(r"^(\d{1,3}\.){3}\d{1,3}$")
- # Email regex pattern
- EMAIL_PATTERN = re.compile(r"^[^@]+@[^@]+\.[^@]+$")
-
def __init__(self, helper, config):
self.helper = helper
self.config = config
@@ -52,7 +72,7 @@ def _create_tlp_marking(level: str) -> Stix2MarkingDefinition:
"green": TLP_GREEN,
"amber": TLP_AMBER,
"amber+strict": Stix2MarkingDefinition(
- id=MarkingDefinition.generate_id("TLP", "TLP:AMBER+STRICT"),
+ id=PyctiMarkingDefinition.generate_id("TLP", "TLP:AMBER+STRICT"),
definition_type="statement",
definition={"statement": "custom"},
custom_properties={
@@ -64,128 +84,624 @@ def _create_tlp_marking(level: str) -> Stix2MarkingDefinition:
}
return mapping[level]
- def _detect_entity_type(self, entity: str) -> tuple[str, str]:
+ def _find_indicators_by_alert_id(
+ self, alert_id, domain_name=None, ip_address=None
+ ) -> list:
"""
- Detect the type of entity and return the appropriate STIX pattern and observable type.
- :param entity: The entity value to analyze
- :return: Tuple of (stix_pattern, opencti_observable_type)
+ Find indicators by alert_id stored in external_id
+ :param alert_id: Doppel alert ID
+ :param domain_name: Optional domain name to search by pattern
+ :param ip_address: Optional IP address to search by pattern
+ :return: List of indicator objects
"""
- entity_stripped = entity.strip()
- # Check for URL
- if self.URL_PATTERN.match(entity_stripped):
- escaped_entity = entity_stripped.replace("'", "\\'")
- return f"[url:value = '{escaped_entity}']", "Url"
+ # First try searching by custom property (may not work if not indexed)
+ filters = {
+ "mode": "and",
+ "filters": [
+ {"key": "entity_type", "values": ["Indicator"]},
+ {"key": "x_opencti_workflow_id", "values": [alert_id]},
+ ],
+ "filterGroups": [],
+ }
- # Check for IPv4
- if self.IPV4_PATTERN.match(entity_stripped):
- return f"[ipv4-addr:value = '{entity_stripped}']", "IPv4-Addr"
+ indicators = self.helper.api.indicator.list(filters=filters)
- # Check for Email
- if self.EMAIL_PATTERN.match(entity_stripped):
- return f"[email-addr:value = '{entity_stripped}']", "Email-Addr"
+ # If not found and we have domain/IP, search by pattern
+ if (not indicators or len(indicators) == 0) and (domain_name or ip_address):
+ search_value = domain_name or ip_address
+ self.helper.connector_logger.info(
+ "[DoppelConverter] No indicators found by workflow_id, trying pattern search",
+ {"alert_id": alert_id, "search_value": search_value},
+ )
- # Default to domain-name (most common for brand protection)
- escaped_entity = entity_stripped.replace("'", "\\'").lower()
- return f"[domain-name:value = '{escaped_entity}']", "Domain-Name"
+ # Search by indicator name (which is the domain/IP)
+ filters = {
+ "mode": "and",
+ "filters": [
+ {"key": "entity_type", "values": ["Indicator"]},
+ {"key": "name", "values": [search_value]},
+ ],
+ "filterGroups": [],
+ }
- def convert_alerts_to_stix(self, alerts: list):
+ indicators = self.helper.api.indicator.list(filters=filters)
+
+ # Filter results to only include indicators with matching external_id
+ if indicators:
+ filtered_indicators = []
+ for ind in indicators:
+ ext_refs = ind.get("externalReferences", []) or []
+ for ext_ref in ext_refs:
+ if ext_ref.get("external_id") == alert_id:
+ filtered_indicators.append(ind)
+ break
+ indicators = filtered_indicators
+
+ self.helper.connector_logger.info(
+ "[DoppelConverter] Found indicators for alert_id",
+ {"alert_id": alert_id, "count": len(indicators) if indicators else 0},
+ )
+
+ return indicators or []
+
+ def _process_takedown(
+ self, alert, domain_observable_id, ip_observable_id, stix_objects
+ ):
"""
- Convert list of alerts to stix2 Indicator objects
- :return: stix2 bundle json
+ Process takedown workflow: Create Indicator (based-on Observable)
"""
- stix_objects = [self.author, self.tlp_marking]
- created_by_ref = self.author.id
+ alert_id = alert.get("id")
+ queue_state = alert.get("queue_state")
- for alert in alerts:
- try:
- alert_id = alert.get("id", "unknown")
+ self.helper.connector_logger.info(
+ "[DoppelConverter] Processing takedown workflow",
+ {"alert_id": alert_id, "queue_state": queue_state},
+ )
+
+ # Extract domain/IP
+ entity_content = alert.get("entity_content", {})
+ root_domain = entity_content.get("root_domain", {})
+ domain_name = root_domain.get("domain")
+ ip_address = root_domain.get("ip_address", "")
+ phone_value = alert.get("entity") if alert.get("product") == "telco" else None
+
+ # Parse timestamps once for indicator/note reuse
+ created_at = (
+ parse_iso_datetime(alert["created_at"]) if alert.get("created_at") else None
+ )
+ modified = (
+ parse_iso_datetime(alert.get("last_activity"))
+ if alert.get("last_activity")
+ else None
+ )
+ note_timestamp = modified or created_at or datetime.utcnow()
+ note_content = (
+ "Alert is Actioned"
+ if queue_state and queue_state.lower() == "actioned"
+ else "Moved to Takedown"
+ )
+ note_body = f"Alert {alert_id} has been {queue_state}"
+
+ # Find existing indicators for this alert
+ existing_indicators = self._find_indicators_by_alert_id(
+ alert_id, domain_name=domain_name, ip_address=ip_address
+ )
+
+ # Filter for active (non-revoked) indicators
+ active_indicators = [
+ ind for ind in existing_indicators if not ind.get("revoked", False)
+ ]
+
+ # Get primary observable ID for relationship
+ primary_observable_id = domain_observable_id or ip_observable_id
+
+ if active_indicators:
+ # Un-revoke if previously revoked
+ indicator = active_indicators[0]
+ indicator_id = indicator.get("standard_id") or indicator.get("id")
+
+ if indicator.get("revoked"):
self.helper.connector_logger.info(
- "Processing alert", {"alert_id": alert_id}
+ "[DoppelConverter] Un-revoking indicator after re-takedown",
+ {"alert_id": alert_id, "indicator_id": indicator_id},
)
- entity = alert.get("entity", "unknown")
- pattern, observable_type = self._detect_entity_type(entity)
- indicator_id = PyCTIIndicator.generate_id(pattern=pattern)
-
- created_at = (
- parse_iso_datetime(alert["created_at"])
- if alert.get("created_at", None)
- else None
+ # Update to revoked=false
+ self.helper.api.stix_domain_object.update_field(
+ id=indicator.get("id"), input={"key": "revoked", "value": False}
)
- modified = (
- parse_iso_datetime(alert["last_activity_timestamp"])
- if alert.get("last_activity_timestamp", None)
- else None
+ # Always record note when takedown/actioned occurs
+ note_refs = []
+ indicator_ref = indicator.get("standard_id") or indicator.get("id")
+ if indicator_ref:
+ note_refs.append(indicator_ref)
+ if primary_observable_id:
+ note_refs.append(primary_observable_id)
+
+ if note_refs:
+ note = self._create_note(
+ note_content, note_body, note_refs, note_timestamp
)
+ stix_objects.append(note)
+
+ return # Indicator already exists and is active
+
+ # Build pattern
+ if domain_name:
+ pattern = f"[domain-name:value = '{domain_name}']"
+ name = domain_name
+ elif ip_address:
+ pattern = f"[ipv4-addr:value = '{ip_address}']"
+ name = ip_address
+ elif phone_value:
+ pattern = f"[tracking-number:value = '{phone_value}']"
+ name = phone_value
+ else:
+ return
+
+ # Create Indicator
+ indicator = self._create_indicator(alert, pattern, name, created_at, modified)
+ stix_objects.append(indicator)
+
+ # Create based-on relationship to primary observable
+ if primary_observable_id:
+ based_on_rel = self._create_relationship(
+ source_id=indicator.id,
+ target_id=primary_observable_id,
+ relationship_type="based-on",
+ )
+ stix_objects.append(based_on_rel)
+
+ self.helper.connector_logger.info(
+ "[DoppelConverter] Created based-on relationship for new indicator",
+ {
+ "alert_id": alert_id,
+ "indicator_id": indicator.id,
+ "observable_id": primary_observable_id,
+ },
+ )
+ else:
+ self.helper.connector_logger.warning(
+ "[DoppelConverter] No observable ID available for relationship",
+ {"alert_id": alert_id},
+ )
+
+ # Add note referencing both indicator and observable when possible
+ note_refs = [indicator.id]
+ if primary_observable_id:
+ note_refs.append(primary_observable_id)
+
+ note = self._create_note(note_content, note_body, note_refs, note_timestamp)
+ stix_objects.append(note)
- audit_logs = alert.get("audit_logs", [])
- audit_log_text = "\n".join(
- [
- f"{log['timestamp']}: {log['type']} - {log['value']}"
- for log in audit_logs
- ]
+ self.helper.connector_logger.info(
+ "[DoppelConverter] Created indicator for takedown alert",
+ {"alert_id": alert_id, "pattern": pattern},
+ )
+
+ def _process_reversion(
+ self, alert, domain_observable_id, ip_observable_id, stix_objects
+ ):
+ """
+ Process reversion workflow: Revoke Indicator
+ """
+ alert_id = alert.get("id")
+ queue_state = alert.get("queue_state")
+
+ self.helper.connector_logger.info(
+ "[DoppelConverter] Processing reversion workflow",
+ {"alert_id": alert_id, "queue_state": queue_state},
+ )
+
+ # Extract domain/IP for search
+ entity_content = alert.get("entity_content", {})
+ root_domain = entity_content.get("root_domain", {})
+ domain_name = root_domain.get("domain")
+ ip_address = root_domain.get("ip_address", "")
+
+ # Find existing indicators for this alert
+ existing_indicators = self._find_indicators_by_alert_id(
+ alert_id, domain_name=domain_name, ip_address=ip_address
+ )
+
+ # Filter for active (non-revoked) indicators
+ active_indicators = [
+ ind for ind in existing_indicators if not ind.get("revoked", False)
+ ]
+
+ if not active_indicators:
+ self.helper.connector_logger.info(
+ "[DoppelConverter] No active indicators found to revoke",
+ {"alert_id": alert_id},
+ )
+ return
+
+ # Parse timestamps
+ modified = (
+ parse_iso_datetime(alert.get("last_activity"))
+ if alert.get("last_activity")
+ else datetime.utcnow()
+ )
+
+ revoked_indicator_refs = []
+ for existing_indicator in active_indicators:
+ indicator_id = existing_indicator.get("id")
+ self.helper.connector_logger.info(
+ "[DoppelConverter] Revoking indicator",
+ {"alert_id": alert_id, "indicator_id": indicator_id},
+ )
+ indicator_standard_id = (
+ existing_indicator.get("standard_id") or indicator_id
+ )
+ if indicator_standard_id:
+ revoked_indicator_refs.append(indicator_standard_id)
+
+ # Use OpenCTI API to revoke the indicator
+ try:
+ self.helper.api.stix_domain_object.update_field(
+ id=indicator_id, input={"key": "revoked", "value": True}
)
- entity_content = alert.get("entity_content", {})
- formatted_entity_content = json.dumps(entity_content, indent=2)
- platform = alert.get("platform", "unknown")
- platform_value = alert.get("product", "unknown")
-
- entity_state = alert.get("entity_state", "unknown")
- queue_state = alert.get("queue_state", "unknown")
- raw_severity = alert.get("severity", "unknown")
- severity = f"{raw_severity} severity"
-
- description = (
- f"**Platform**: {platform} \n"
- f"**Entity State**: {entity_state} \n"
- f"**Queue State**: {queue_state} \n"
- f"**Severity**: {severity} \n"
- f"**Entity Content**: \n"
- f"{formatted_entity_content}"
+ # Add revoked-false-positive label
+ label = self.helper.api.label.create(value="revoked-false-positive")
+ self.helper.api.stix_domain_object.add_label(
+ id=indicator_id, label_id=label["id"]
)
- raw_score = alert.get("score")
- try:
- score = int(float(raw_score) * 100) if raw_score is not None else 0
- except (ValueError, TypeError):
- score = 0
-
- indicator = Indicator(
- id=indicator_id,
- name=entity,
- pattern=pattern,
- pattern_type="stix",
- description=description,
- created=created_at,
- modified=modified,
- created_by_ref=created_by_ref,
- object_marking_refs=[self.tlp_marking["id"]],
- external_references=[
- {
- "source_name": self.author.name,
- "url": alert.get("doppel_link"),
- "external_id": alert.get("id"),
- }
- ],
- custom_properties={
- "x_opencti_score": score,
- "x_opencti_main_observable_type": observable_type,
- "x_opencti_brand": alert.get("brand", "unknown"),
- "x_mitre_platforms": platform_value,
- "x_opencti_source": alert.get("source", "unknown"),
- "x_opencti_notes": alert.get("notes", ""),
- "x_opencti_audit_logs": audit_log_text,
+ self.helper.connector_logger.info(
+ "[DoppelConverter] Successfully revoked indicator via API",
+ {"alert_id": alert_id, "indicator_id": indicator_id},
+ )
+ except Exception as e:
+ self.helper.connector_logger.error(
+ "[DoppelConverter] Error revoking indicator via API",
+ {
+ "alert_id": alert_id,
+ "indicator_id": indicator_id,
+ "error": str(e),
},
- allow_custom=True,
)
- stix_objects.append(indicator)
+
+ # Add reversion note to observable
+ primary_observable_id = domain_observable_id or ip_observable_id
+ note_refs = revoked_indicator_refs[:]
+ if primary_observable_id:
+ note_refs.append(primary_observable_id)
+
+ if note_refs:
+ reversion_note = Note(
+ id=PyctiNote.generate_id(
+ content=note_refs,
+ created=modified,
+ ),
+ abstract="Moved from taken down back to unresolved",
+ content=f"Alert {alert_id} has been reverted from takedown state to {queue_state}",
+ spec_version=STIX_VERSION,
+ created=modified,
+ modified=modified,
+ created_by_ref=self.author.id,
+ object_refs=note_refs,
+ object_marking_refs=[self.tlp_marking.id],
+ allow_custom=True,
+ )
+ stix_objects.append(reversion_note)
+
+ self.helper.connector_logger.info(
+ "[DoppelConverter] Revoked indicators",
+ {"alert_id": alert_id, "count": len(active_indicators)},
+ )
+
+ def _create_phone_number_observable(self, phone_number, alert) -> PhoneNumber:
+ """
+ Create PhoneNumber observable
+ """
+ # labels_flat = build_labels(alert)
+ # external_references = build_external_references(alert)
+ custom_properties = build_custom_properties(alert, self.author.id)
+
+ phone_number_observable = PhoneNumber(
+ value=phone_number,
+ spec_version=STIX_VERSION,
+ object_marking_refs=[self.tlp_marking.id],
+ custom_properties=custom_properties or None,
+ )
+ return phone_number_observable
+
+ def _create_domain_observable(self, domain_name, alert) -> DomainName:
+ """
+ Create DomainName observable
+ """
+ labels_flat = build_labels(alert)
+ external_references = build_external_references(alert)
+ custom_properties = build_custom_properties(alert, self.author.id)
+
+ domain_observable = DomainName(
+ value=domain_name,
+ spec_version=STIX_VERSION,
+ object_marking_refs=[self.tlp_marking.id],
+ labels=labels_flat or None,
+ external_references=external_references if external_references else None,
+ custom_properties=custom_properties,
+ allow_custom=True,
+ )
+ return domain_observable
+
+ def _create_ip_observable(self, ip_address, alert) -> IPv4Address:
+ """
+ Create IPv4Address observable
+ """
+ labels_flat = build_labels(alert)
+ external_references = build_external_references(alert)
+ custom_properties = build_custom_properties(alert, self.author.id)
+
+ ip_observable = IPv4Address(
+ value=ip_address,
+ spec_version=STIX_VERSION,
+ object_marking_refs=[self.tlp_marking.id],
+ labels=labels_flat or None,
+ external_references=external_references if external_references else None,
+ custom_properties=custom_properties,
+ allow_custom=True,
+ )
+ return ip_observable
+
+ def _create_grouping_case(self, alert, object_refs) -> Grouping:
+ """
+ Create Grouping case object
+ """
+ alert_id = alert.get("id")
+ score = alert.get("score")
+ priority = calculate_priority(score)
+
+ case_id = f"grouping--{uuid5(NAMESPACE_URL, f'doppel-case-{alert_id}')}"
+ case_labels = build_labels(alert)
+ case_labels.append(f"priority:{priority}")
+
+ case = Grouping(
+ id=case_id,
+ name=f"Case for Alert {alert_id}",
+ context="suspicious-activity",
+ object_refs=object_refs,
+ spec_version=STIX_VERSION,
+ created_by_ref=self.author.id,
+ external_references=(
+ build_external_references(alert)
+ if build_external_references(alert)
+ else None
+ ),
+ description=build_description(alert),
+ labels=case_labels or None,
+ object_marking_refs=[self.tlp_marking.id],
+ allow_custom=True,
+ )
+ return case
+
+ def _create_relationship(
+ self, source_id, target_id, relationship_type
+ ) -> StixCoreRelationship:
+ """
+ Create StixCoreRelationship object
+ """
+ relationship = StixCoreRelationship(
+ id=PyctiStixCoreRelationship.generate_id(
+ relationship_type=relationship_type,
+ source_ref=source_id,
+ target_ref=target_id,
+ ),
+ relationship_type=relationship_type,
+ source_ref=source_id,
+ target_ref=target_id,
+ spec_version=STIX_VERSION,
+ created_by_ref=self.author.id,
+ object_marking_refs=[self.tlp_marking.id],
+ allow_custom=True,
+ )
+ return relationship
+
+ def _create_note(self, note_content, note_body, note_refs, note_timestamp) -> Note:
+ """
+ Create Note object
+ """
+ return Note(
+ id=PyctiNote.generate_id(
+ content=note_body,
+ created=note_timestamp,
+ ),
+ abstract=note_content,
+ content=note_body,
+ spec_version=STIX_VERSION,
+ created=note_timestamp,
+ modified=note_timestamp,
+ created_by_ref=self.author.id,
+ object_refs=note_refs,
+ object_marking_refs=[self.tlp_marking.id],
+ allow_custom=True,
+ )
+
+ def _create_indicator(
+ self, alert, pattern, name, created_at, modified
+ ) -> Indicator:
+ """
+ Create Indicator
+ """
+ labels_flat = build_labels(alert)
+ external_references = build_external_references(alert)
+ custom_properties = build_custom_properties(alert, self.author.id)
+
+ indicator = Indicator(
+ id=PyctiIndicator.generate_id(pattern),
+ pattern=pattern,
+ pattern_type="stix",
+ spec_version=STIX_VERSION,
+ name=name,
+ description=build_description(alert),
+ created=created_at,
+ modified=modified,
+ created_by_ref=self.author.id,
+ object_marking_refs=[self.tlp_marking.id],
+ labels=labels_flat or None,
+ external_references=external_references if external_references else None,
+ valid_from=created_at,
+ custom_properties=custom_properties,
+ allow_custom=True,
+ )
+ return indicator
+
+ def _handle_state_transitions(
+ self,
+ current_queue_state,
+ previous_queue_state,
+ alert_id,
+ alert,
+ domain_observable_id,
+ ip_observable_id,
+ stix_objects,
+ domain_name,
+ ip_address,
+ ):
+ """
+ Handle state transitions based on queue_state
+ """
+ is_takedown_now = is_takedown_state(current_queue_state)
+ was_takedown = (
+ is_takedown_state(previous_queue_state) if previous_queue_state else False
+ )
+ is_reverted = is_reverted_state(current_queue_state)
+
+ # Transition: TO_TAKEDOWN
+ if is_takedown_now and not was_takedown:
+ self._process_takedown(
+ alert, domain_observable_id, ip_observable_id, stix_objects
+ )
+
+ # Transition: REVERSION
+ elif was_takedown and not is_takedown_now:
+ self._process_reversion(
+ alert, domain_observable_id, ip_observable_id, stix_objects
+ )
+
+ # Handle case where previous_state is null but we have an active indicator in reverted state
+ elif previous_queue_state is None and is_reverted and not is_takedown_now:
+ existing_indicators = self._find_indicators_by_alert_id(
+ alert_id, domain_name=domain_name, ip_address=ip_address
+ )
+ active_indicators = [
+ ind for ind in existing_indicators if not ind.get("revoked", False)
+ ]
+
+ if active_indicators:
+ self._process_reversion(
+ alert, domain_observable_id, ip_observable_id, stix_objects
+ )
+
+ def convert_alerts_to_stix(self, alerts: list):
+ """
+ Convert list of alerts to stix2 Observable objects (domain-name and ipv4-addr)
+ Uses helper.get_state() / helper.set_state() for persistent state tracking
+ """
+ stix_objects = [self.author, self.tlp_marking]
+
+ # Get persistent state
+ state = self.helper.get_state() or {}
+
+ for alert in alerts:
+ try:
+ alert_id = alert.get("id", "unknown")
+ current_queue_state = alert.get("queue_state")
+ previous_queue_state = state.get(alert_id, {}).get("queue_state")
+
+ # Extract required fields
+ entity_content = alert.get("entity_content", {})
+ product = alert.get("product")
+ root_domain = entity_content.get("root_domain", {})
+ domain_name = root_domain.get("domain")
+ ip_address = root_domain.get("ip_address")
+
+ domain_observable_id = None
+ ip_observable_id = None
+
+ # Create Phone Number Observable for product = telco.
+ if product == "telco":
+ phone_number_observable = self._create_phone_number_observable(
+ alert.get("entity"), alert
+ )
+ stix_objects.append(phone_number_observable)
+ domain_observable_id = (
+ phone_number_observable.id
+ ) # mocked domain observable id
+
+ # Create or reference Domain Observable
+ if domain_name:
+ domain_observable = self._create_domain_observable(
+ domain_name, alert
+ )
+ stix_objects.append(domain_observable)
+ domain_observable_id = domain_observable.id
+
+ # Create or reference IP Observable
+ if ip_address:
+ ip_observable = self._create_ip_observable(ip_address, alert)
+ stix_objects.append(ip_observable)
+ ip_observable_id = ip_observable.id
+
+ # Create resolves-to relationship if domain also exists
+ if domain_observable_id:
+ relationship = self._create_relationship(
+ source_id=domain_observable_id,
+ target_id=ip_observable.id,
+ relationship_type="resolves-to",
+ )
+ stix_objects.append(relationship)
+
+ # DETECT STATE TRANSITIONS
+ self._handle_state_transitions(
+ current_queue_state,
+ previous_queue_state,
+ alert_id,
+ alert,
+ domain_observable_id,
+ ip_observable_id,
+ stix_objects,
+ domain_name,
+ ip_address,
+ )
+
+ # Case Creation
+ if domain_observable_id or ip_observable_id:
+ case_refs = []
+ if domain_observable_id:
+ case_refs.append(domain_observable_id)
+ if ip_observable_id:
+ case_refs.append(ip_observable_id)
+
+ case = self._create_grouping_case(alert, object_refs=case_refs)
+ stix_objects.append(case)
+
+ # Create related-to relationship from case to primary observable
+ related_to = self._create_relationship(
+ source_id=case.id,
+ target_id=domain_observable_id or ip_observable_id,
+ relationship_type="related-to",
+ )
+ stix_objects.append(related_to)
+
+ # Update state for this alert
+ state[alert_id] = {
+ "queue_state": current_queue_state,
+ "last_processed": datetime.utcnow().isoformat(),
+ }
+
except Exception as e:
- self.helper.connector_logger.warning(
- "Failed to process alert",
- {"alert": alert, "error": str(e)},
+ # Unexpected errors - log and raise
+ self.helper.connector_logger.error(
+ "[DoppelConverter] Failed to process alert",
+ {"alert_id": alert_id, "error": str(e)},
)
+ raise
+
+ # Persist updated state
+ self.helper.set_state(state)
return self.helper.stix2_create_bundle(stix_objects)
diff --git a/external-import/doppel/src/doppel/stix_helpers.py b/external-import/doppel/src/doppel/stix_helpers.py
new file mode 100644
index 00000000000..adac9739d14
--- /dev/null
+++ b/external-import/doppel/src/doppel/stix_helpers.py
@@ -0,0 +1,189 @@
+def calculate_priority(score) -> str:
+ """Calculate case priority based on score"""
+ if score is None:
+ return "P4"
+ try:
+ score_float = float(score)
+ if score_float > 0.8:
+ return "P1"
+ elif score_float >= 0.5:
+ return "P2"
+ elif score_float > 0:
+ return "P3"
+ else:
+ return "P4"
+ except (ValueError, TypeError):
+ return "P4"
+
+
+def is_takedown_state(queue_state) -> bool:
+ """Check if alert is in takedown state"""
+ return queue_state and queue_state.lower() in ["actioned", "taken_down"]
+
+
+def is_reverted_state(queue_state) -> bool:
+ """Check if alert is reverted from takedown"""
+ return queue_state and queue_state.lower() in [
+ "archived",
+ "needs_confirmation",
+ "doppel_review",
+ "monitoring",
+ ]
+
+
+def build_external_references(alert) -> list:
+ """
+ Build external references for observables/indicators
+ :param alert: Doppel alert
+ :return: List of external reference dicts
+ """
+ external_references = []
+ audit_logs = alert.get("audit_logs", [])
+ audit_log_text = (
+ "\n".join(
+ [
+ f"{log.get('timestamp', '')}: {log.get('type', '')} - {log.get('value', '')} (by {log.get('changed_by', '')})"
+ for log in audit_logs
+ ]
+ )
+ if audit_logs
+ else ""
+ )
+
+ if alert.get("doppel_link") or alert.get("id"):
+ external_ref = {
+ "source_name": alert.get("source", "Doppel"),
+ }
+ if alert.get("doppel_link"):
+ external_ref["url"] = alert.get("doppel_link")
+ if alert.get("id"):
+ external_ref["external_id"] = alert.get("id")
+ if audit_log_text:
+ external_ref["description"] = audit_log_text
+ external_references.append(external_ref)
+
+ return external_references
+
+
+def build_description(alert) -> str:
+ """
+ Build description field from alert data
+ :param alert: Doppel alert
+ :return: Description string
+ """
+
+ # Extract entity_content data
+ entity_content = alert.get("entity_content", {})
+ root_domain = entity_content.get("root_domain", {})
+
+ country_code = root_domain.get("country_code")
+ registrar = root_domain.get("registrar")
+ hosting_provider = root_domain.get("hosting_provider")
+ contact_email = root_domain.get("contact_email")
+ mx_records = root_domain.get("mx_records", [])
+ nameservers = root_domain.get("nameservers", [])
+
+ description_parts = []
+ if alert.get("brand"):
+ description_parts.append(f"**Brand**: {alert.get('brand')}\n")
+ if alert.get("product"):
+ description_parts.append(f"**Product**: {alert.get('product')}\n")
+ if alert.get("notes"):
+ description_parts.append(f"**Notes**: {alert.get('notes')}\n")
+ if alert.get("uploaded_by"):
+ description_parts.append(f"**Uploaded By**: {alert.get('uploaded_by')}\n")
+ if alert.get("screenshot_url"):
+ description_parts.append(f"**Screenshot URL**: {alert.get('screenshot_url')}\n")
+ if alert.get("message"):
+ description_parts.append(f"**Message**: {alert.get('message')}\n")
+ if alert.get("source"):
+ description_parts.append(f"**Source**: {alert.get('source')}\n")
+ if alert.get("assignee"):
+ description_parts.append(f"**Assignee**: {alert.get('assignee')}\n")
+ if country_code:
+ description_parts.append(f"**Country**: {country_code}\n")
+ if registrar:
+ description_parts.append(f"**Registrar**: {registrar}\n")
+ if hosting_provider:
+ description_parts.append(f"**Hosting Provider**: {hosting_provider}\n")
+ if contact_email:
+ description_parts.append(f"**Contact Email**: {contact_email}\n")
+ if mx_records:
+ formatted_mx = [
+ f"{mx.get('exchange')} (pref: {mx.get('preference')})" for mx in mx_records
+ ]
+ description_parts.append(f"**MX Records**: {', '.join(formatted_mx)}\n")
+ if nameservers:
+ ns_text = ", ".join(
+ [ns if isinstance(ns, str) else ns.get("host") for ns in nameservers]
+ )
+ description_parts.append(f"**Nameservers**: {ns_text}\n")
+
+ return "\n".join(description_parts) if description_parts else ""
+
+
+def build_custom_properties(alert, author_id) -> dict:
+ """
+ Build custom properties for observables/indicators
+ :param alert: Doppel alert
+ :return: Dict of custom properties
+ """
+ custom_properties = {}
+ raw_score = alert.get("score")
+ try:
+ score = int(float(raw_score)) if raw_score is not None else 0
+ except (ValueError, TypeError):
+ score = 0
+ custom_properties["x_opencti_created_by_ref"] = author_id
+ custom_properties["x_opencti_score"] = score
+ custom_properties["x_opencti_workflow_id"] = alert.get(
+ "id"
+ ) # Store alert_id for lookup
+
+ if alert.get("product") == "telco":
+ custom_properties["x_opencti_labels"] = build_labels(alert)
+ custom_properties["x_opencti_external_references"] = build_external_references(
+ alert
+ )
+
+ x_opencti_description = build_description(alert)
+ if x_opencti_description:
+ custom_properties["x_opencti_description"] = x_opencti_description
+
+ return custom_properties
+
+
+def build_labels(alert) -> list:
+ """
+ Build labels for observables/indicators with semantic prefixes
+ Returns dict with categorized labels and flat list
+ """
+ labels_dict = {
+ "queue_state": None,
+ "entity_state": None,
+ "severity": None,
+ "platform": None,
+ "brand": None,
+ "tags": [],
+ }
+
+ if alert.get("queue_state"):
+ labels_dict["queue_state"] = f"queue_state:{alert['queue_state']}"
+ if alert.get("entity_state"):
+ labels_dict["entity_state"] = f"entity_state:{alert['entity_state']}"
+ if alert.get("severity"):
+ labels_dict["severity"] = f"severity:{alert['severity']}"
+ if alert.get("platform"):
+ labels_dict["platform"] = f"platform:{alert['platform']}"
+ if alert.get("brand"):
+ labels_dict["brand"] = f"brand:{alert['brand']}"
+
+ tags = alert.get("tags", [])
+
+ if tags:
+ labels_dict["tags"] = [tag.get("name") for tag in tags if "name" in tag]
+
+ labels_flat = [v for v in labels_dict.values() if v and isinstance(v, str)]
+ labels_flat.extend(labels_dict["tags"])
+
+ return labels_flat
diff --git a/external-import/google-ti-feeds/connector/src/octi/configs/connector_config.py b/external-import/google-ti-feeds/connector/src/octi/configs/connector_config.py
index fcf92c35037..69706fc8edb 100644
--- a/external-import/google-ti-feeds/connector/src/octi/configs/connector_config.py
+++ b/external-import/google-ti-feeds/connector/src/octi/configs/connector_config.py
@@ -12,7 +12,7 @@ class ConnectorConfig(BaseConfig):
"""Configuration for the connector."""
yaml_section: ClassVar[str] = "connector"
- model_config = SettingsConfigDict(env_prefix="connector_")
+ model_config = SettingsConfigDict(env_prefix="connector_", extra="allow")
id: str = Field(
...,
diff --git a/external-import/misp/src/connector/connector.py b/external-import/misp/src/connector/connector.py
index e7b8efd196d..d3fba2426ca 100644
--- a/external-import/misp/src/connector/connector.py
+++ b/external-import/misp/src/connector/connector.py
@@ -1,4 +1,5 @@
from datetime import datetime, timedelta, timezone
+from enum import Enum, auto
from typing import TYPE_CHECKING
from api_client.client import MISPClient, MISPClientError
@@ -17,6 +18,13 @@
LOG_PREFIX = "[Connector]"
+class ProcessingOutcome(Enum):
+ """Outcome of processing a STIX bundle within a batch."""
+
+ COMPLETED = auto()
+ BUFFERING = auto()
+
+
class Misp:
def __init__(self, config: "ConnectorSettings", helper: "OpenCTIConnectorHelper"):
self.config = config
@@ -65,6 +73,8 @@ def __init__(self, config: "ConnectorSettings", helper: "OpenCTIConnectorHelper"
batch_size=self.config.misp.batch_count,
)
+ self._current_bundle = None
+
def _check_batch_size_and_flush(
self,
all_entities: "list[stix2.v21._STIXBase21]",
@@ -270,14 +280,20 @@ def _process_bundle_in_batch(
bundle_objects: "list[stix2.v21._STIXBase21]",
author: "stix2.Identity",
markings: "list[stix2.MarkingDefinition]",
- ) -> None:
+ ) -> ProcessingOutcome:
"""Process a bundle of STIX objects in a batch.
Args:
- event_id: ID of the event
+ event: The MISP event being processed
bundle_objects: list of STIX objects to process
author: Author of the event
markings: Markings of the event
+
+ Returns:
+ ProcessingOutcome.BUFFERING if the connector queue is full and
+ processing was interrupted mid-event (caller should stop the event
+ loop and resume on the next scheduler run).
+ ProcessingOutcome.COMPLETED when all chunks were sent successfully.
"""
bundle_size = len(bundle_objects)
current_state = self.work_manager.get_state()
@@ -291,6 +307,43 @@ def _process_bundle_in_batch(
bundle_size,
batch_chunk_size,
):
+ if (
+ not self.work_manager.check_connector_run_and_terminate()
+ and self.work_manager.check_connector_buffering()
+ ):
+ remaining_objects_count = max(
+ 0,
+ remaining_objects_count
+ # Chunk size - author - markings
+ + (
+ self.batch_processor.get_current_batch_size()
+ - (1 + len(markings))
+ ),
+ )
+ # Clear the current batch to avoid processing duplicated items
+ # during the next run.
+ self.batch_processor.clear_current_batch()
+ self.logger.info(
+ "Connector is buffering, this event will be processed in "
+ "the next scheduler process",
+ {
+ "prefix": LOG_PREFIX,
+ "event_id": event.Event.id,
+ "event_uuid": event.Event.uuid,
+ },
+ )
+ # Save the event date to restart from the current one in the
+ # next process.
+ new_state = {
+ "last_event_date": self._get_event_datetime(event).isoformat(),
+ "remaining_objects_count": remaining_objects_count,
+ }
+ if self.config.misp.datetime_attribute == "date":
+ new_state["current_event_id"] = event.Event.id
+ self.work_manager.update_state(state_update=new_state)
+
+ return ProcessingOutcome.BUFFERING
+
now = datetime.now(tz=timezone.utc)
self.batch_processor.work_name_template = (
f"MISP run @ {now.isoformat(timespec='seconds')}"
@@ -318,6 +371,8 @@ def _process_bundle_in_batch(
self._flush_batch_processor()
self.work_manager.update_state(state_update={"remaining_objects_count": 0})
+ return ProcessingOutcome.COMPLETED
+
def process_events(self) -> str | None:
"""Fetch, convert and send MISP events."""
@@ -379,33 +434,35 @@ def process_events(self) -> str | None:
curr_event_date = self._get_event_datetime(event).isoformat()
- if self.work_manager.check_connector_buffering():
+ if self._current_bundle is None:
self.logger.info(
- "Connector is buffering, this event will be processed in the next scheduler process",
- event_log_data,
+ "MISP event found - Processing...", event_log_data
)
- # Save the event date to restart from the current one in the next process.
- new_state = {"last_event_date": curr_event_date}
- self.work_manager.update_state(state_update=new_state)
- break
-
- self.logger.info("MISP event found - Processing...", event_log_data)
- try:
- author, markings, bundle_objects = self.converter.process(
- event=event,
- include_relationships=(
- len(event.Event.Attribute or [])
- + len(event.Event.Object or [])
+ try:
+ self._current_bundle = author, markings, bundle_objects = (
+ self.converter.process(
+ event=event,
+ include_relationships=(
+ len(event.Event.Attribute or [])
+ + len(event.Event.Object or [])
+ )
+ # TODO: Add a configuration for the maximum number of attributes and objects
+ < 10000,
+ )
)
- # TODO: Add a configuration for the maximum number of attributes and objects
- < 10000,
- )
- except ConverterError as err:
- self.logger.error(
- f"Error while converting MISP event, skipping it. {err}",
+ except ConverterError as err:
+ self.logger.error(
+ f"Error while converting MISP event, skipping it. {err}",
+ event_log_data,
+ )
+ self._current_bundle = None
+ continue
+ else:
+ self.logger.info(
+ "Resuming processing of MISP event...",
event_log_data,
)
- continue
+ author, markings, bundle_objects = self._current_bundle
self.logger.debug(
"Converted to STIX entities",
@@ -415,12 +472,16 @@ def process_events(self) -> str | None:
},
)
- self._process_bundle_in_batch(
+ outcome = self._process_bundle_in_batch(
event=event,
bundle_objects=bundle_objects,
author=author,
markings=markings,
)
+ if outcome is ProcessingOutcome.BUFFERING:
+ break
+
+ self._current_bundle = None
else:
# FOR-ELSE: The else block executes only if the loop is not
@@ -467,6 +528,13 @@ def process_events(self) -> str | None:
},
)
+ except Exception as e:
+ self.logger.error(
+ "Error while processing MISP events",
+ {"prefix": LOG_PREFIX, "error": str(e)},
+ )
+ self._current_bundle = None
+
finally:
self._flush_batch_processor()
diff --git a/external-import/misp/src/utils/batch_processor/batch_processor.py b/external-import/misp/src/utils/batch_processor/batch_processor.py
index 4e8d66b328a..28a62303c60 100644
--- a/external-import/misp/src/utils/batch_processor/batch_processor.py
+++ b/external-import/misp/src/utils/batch_processor/batch_processor.py
@@ -206,6 +206,14 @@ def get_statistics(self) -> dict[str, Any]:
"batch_size_limit": self.batch_size,
}
+ def clear_current_batch(self) -> None:
+ """Discard all items currently in the batch without sending them.
+
+ Use this when the connector is buffering and the queued items must be
+ re-processed on the next scheduler run instead of being sent now.
+ """
+ self._current_batch.clear()
+
def get_current_batch_size(self) -> int:
"""Get the number of items in the current batch.
diff --git a/external-import/misp/src/utils/work_manager/work_manager.py b/external-import/misp/src/utils/work_manager/work_manager.py
index 4d5fd2af28c..75418fdea06 100644
--- a/external-import/misp/src/utils/work_manager/work_manager.py
+++ b/external-import/misp/src/utils/work_manager/work_manager.py
@@ -54,6 +54,25 @@ def check_connector_buffering(self) -> bool:
"""
return self._helper.check_connector_buffering()
+ def check_connector_run_and_terminate(self) -> bool:
+ """Check whether the connector is in 'run and terminate' mode.
+
+ This helper indicates if the connector is configured to run once and then
+ terminate, either explicitly via ``connect_run_and_terminate`` or
+ implicitly via a ``duration_period`` of zero seconds.
+
+ Returns:
+ bool: True if run-and-terminate mode is active
+ (``helper.connect_run_and_terminate`` is truthy or the
+ configured ``duration_period`` is 0 seconds), False otherwise.
+
+ """
+ # Run-and-terminate mode is enabled either by the helper flag or by
+ # configuring a zero-second duration period.
+ return bool(self._helper.connect_run_and_terminate) or (
+ self._config.connector.duration_period.total_seconds() == 0
+ )
+
@staticmethod
def _is_valid_iso_format(date_string: str) -> bool:
"""Check if a string is a valid ISO format date.
diff --git a/external-import/misp/tests/tests_connector/test_connector.py b/external-import/misp/tests/tests_connector/test_connector.py
index 9d444f02146..55796725c70 100644
--- a/external-import/misp/tests/tests_connector/test_connector.py
+++ b/external-import/misp/tests/tests_connector/test_connector.py
@@ -6,6 +6,7 @@
from api_client.models import EventRestSearchListItem
from connector import ConnectorSettings, Misp
+from connector.connector import ProcessingOutcome
from freezegun import freeze_time
from pycti import OpenCTIConnectorHelper
@@ -578,10 +579,17 @@ def _make_publish_timestamp_event(event_id: str, ts: int) -> EventRestSearchList
)
-def _run_process_events(connector, events, buffering_sequence, initial_state=None):
+def _run_process_events(
+ connector, events, buffering_at_event_index=None, initial_state=None
+):
"""
Run `process_events` with all external dependencies mocked.
+ Args:
+ buffering_at_event_index: 0-based index of the event call at which
+ ``_process_bundle_in_batch`` should return
+ ``ProcessingOutcome.BUFFERING``. ``None`` means no buffering.
+
Returns:
(state dict, mock for _process_bundle_in_batch, process_events return value)
"""
@@ -591,6 +599,20 @@ def track_update_state(state_update=None, **kwargs):
if state_update:
state.update(state_update)
+ call_count = [0]
+
+ def process_bundle_side_effect(event, bundle_objects, author, markings):
+ idx = call_count[0]
+ call_count[0] += 1
+ if buffering_at_event_index is not None and idx == buffering_at_event_index:
+ # Simulate what the real _process_bundle_in_batch does when it
+ # detects buffering: persist the checkpoint state and signal the
+ # caller to stop the event loop.
+ state["last_event_date"] = connector._get_event_datetime(event).isoformat()
+ state["remaining_objects_count"] = len(bundle_objects)
+ return ProcessingOutcome.BUFFERING
+ return ProcessingOutcome.COMPLETED
+
with (
patch.object(connector, "helper") as mock_helper,
patch.object(connector, "work_manager") as mock_wm,
@@ -604,10 +626,8 @@ def track_update_state(state_update=None, **kwargs):
mock_wm.get_state.side_effect = lambda: dict(state)
mock_wm.update_state.side_effect = track_update_state
- if isinstance(buffering_sequence, list):
- mock_wm.check_connector_buffering.side_effect = buffering_sequence
- else:
- mock_wm.check_connector_buffering.return_value = buffering_sequence
+
+ mock_process.side_effect = process_bundle_side_effect
mock_api.search_events.return_value = iter(events)
mock_converter.process.return_value = (MagicMock(), [], [MagicMock()])
@@ -622,15 +642,18 @@ def test_process_events_state_set_to_buffered_event_date_on_buffering(
mock_opencti_connector_helper, mock_py_misp
):
"""
- Test that when buffering is detected, `last_event_date` is saved to the
- buffered event's timestamp so the next run restarts from that event.
+ Test that when buffering is detected inside _process_bundle_in_batch for
+ event B, ``last_event_date`` is saved to event B's timestamp so the next
+ run restarts from that event.
Scenario:
- Event A (earlier timestamp): processed normally.
- - Event B (later timestamp): buffering detected → loop breaks.
+ - Event B (later timestamp): _process_bundle_in_batch detects buffering
+ mid-chunk → returns ProcessingOutcome.BUFFERING → loop breaks.
- Expected: after the run, `last_event_date` equals event B's timestamp (not
- A's), so the next run re-processes event B.
+ Expected: after the run, ``last_event_date`` equals event B's timestamp
+ (not A's), so the next run re-processes event B from the saved chunk
+ offset.
"""
config_dict = deepcopy(minimal_config_dict)
config_dict["misp"]["datetime_attribute"] = "publish_timestamp"
@@ -642,9 +665,9 @@ def test_process_events_state_set_to_buffered_event_date_on_buffering(
event_a = _make_publish_timestamp_event("1", ts_a)
event_b = _make_publish_timestamp_event("2", ts_b)
- # No buffering for A, buffering triggers on B
+ # Buffering triggers on the second _process_bundle_in_batch call (event B)
state, _, result = _run_process_events(
- connector, [event_a, event_b], buffering_sequence=[False, True]
+ connector, [event_a, event_b], buffering_at_event_index=1
)
expected = datetime.fromtimestamp(ts_b, tz=timezone.utc).isoformat()
@@ -652,13 +675,19 @@ def test_process_events_state_set_to_buffered_event_date_on_buffering(
@freeze_time("2026-01-01 00:00:00")
-def test_process_events_buffered_event_not_processed(
+def test_process_events_buffering_breaks_event_loop(
mock_opencti_connector_helper, mock_py_misp
):
"""
- Test that when buffering is detected for event B, `_process_bundle_in_batch`
- is NOT called for it — even though the state has already been advanced to
- its timestamp.
+ Test that when ``_process_bundle_in_batch`` returns
+ ``ProcessingOutcome.BUFFERING`` for event B, the event loop is broken
+ immediately — event B itself IS passed to the method (buffering is
+ detected inside it), but any subsequent events are not processed at all.
+
+ Scenario:
+ - Event A: _process_bundle_in_batch returns COMPLETED.
+ - Event B: _process_bundle_in_batch returns BUFFERING → loop breaks.
+ - Event C: never reached.
"""
config_dict = deepcopy(minimal_config_dict)
config_dict["misp"]["datetime_attribute"] = "publish_timestamp"
@@ -666,17 +695,21 @@ def test_process_events_buffered_event_not_processed(
ts_a = int(time.time())
ts_b = int(time.time() + 1)
+ ts_c = int(time.time() + 2)
event_a = _make_publish_timestamp_event("1", ts_a)
event_b = _make_publish_timestamp_event("2", ts_b)
+ event_c = _make_publish_timestamp_event("3", ts_c)
state, mock_process, _ = _run_process_events(
- connector, [event_a, event_b], buffering_sequence=[False, True]
+ connector, [event_a, event_b, event_c], buffering_at_event_index=1
)
- # Only event A should have been processed
- assert mock_process.call_count == 1
- assert mock_process.call_args[1]["event"] == event_a
+ # Both event A and event B were passed to _process_bundle_in_batch;
+ # event C was never reached because the loop broke after event B.
+ assert mock_process.call_count == 2
+ assert mock_process.call_args_list[0][1]["event"] == event_a
+ assert mock_process.call_args_list[1][1]["event"] == event_b
def test_process_events_adds_one_second_after_loop_completion(
@@ -696,9 +729,7 @@ def test_process_events_adds_one_second_after_loop_completion(
event = _make_publish_timestamp_event("1", ts)
# No buffering — loop completes normally
- state, _, result = _run_process_events(
- connector, [event], buffering_sequence=False
- )
+ state, _, result = _run_process_events(connector, [event])
# ts == Now: process_events does not update last_event_date (handled
# by _process_bundle_in_batch, which is mocked here).
@@ -706,9 +737,7 @@ def test_process_events_adds_one_second_after_loop_completion(
assert result is None
frozen_time.move_to("2026-01-01 00:00:01")
- state, _, result = _run_process_events(
- connector, [event], buffering_sequence=False
- )
+ state, _, result = _run_process_events(connector, [event])
expected = (
datetime.fromtimestamp(ts, tz=timezone.utc) + timedelta(seconds=1)
).isoformat()
diff --git a/run_test.sh b/run_test.sh
index 406ae24eba5..203d916af94 100644
--- a/run_test.sh
+++ b/run_test.sh
@@ -49,7 +49,7 @@ do
continue
fi
- echo 'Running tests pipeline for project' "$project"
+ echo 'Running tests uv pipeline for project' "$project"
# Per-connector outputs
OUT_DIR="test_outputs/$(echo "$project" | tr '/ ' '__')"
@@ -64,24 +64,24 @@ do
fi
echo 'Installing requirements'
- python -m pip install -q -r "$requirements_file"
+ uv pip install -q -r "$requirements_file"
- python -m pip freeze | grep "connectors-sdk\|pycti" || true
+ uv pip freeze | grep "connectors-sdk\|pycti" || true
if [ -n "$project_has_sdk_dependency" ] ; then
echo 'Installing connectors-sdk local version'
- python -m pip uninstall -y connectors-sdk
- python -m pip install -q ./connectors-sdk
+ uv pip uninstall connectors-sdk
+ uv pip install -q ./connectors-sdk
fi
- python -m pip freeze | grep "connectors-sdk\|pycti" || true
+ uv pip freeze | grep "connectors-sdk\|pycti" || true
echo 'Installing latest version of pycti'
- python -m pip uninstall -y pycti
- python -m pip install -q git+https://github.com/OpenCTI-Platform/opencti.git@master#subdirectory=client-python
- python -m pip freeze | grep "connectors-sdk\|pycti" || true
+ uv pip uninstall pycti
+ uv pip install -q git+https://github.com/OpenCTI-Platform/opencti.git@master#subdirectory=client-python
+ uv pip freeze | grep "connectors-sdk\|pycti" || true
- python -m pip check || exit 1 # exit if dependencies are broken
+ uv pip check || exit 1 # exit if dependencies are broken
echo 'Running tests'
python -m pytest "$project" --junitxml="$OUT_DIR/junit.xml" -q -rA # exit non zero if no test run