diff --git a/encord/client.py b/encord/client.py index 48f43917d..2d1c261f9 100644 --- a/encord/client.py +++ b/encord/client.py @@ -12,7 +12,7 @@ client.get_project() Returns: - Project: A project record instance. See Project ORM for details. + Project: A project record instance. See Project ORM for details. """ @@ -192,9 +192,9 @@ def get_dataset(self) -> OrmDataset: OrmDataset: A dataset record instance. Raises: - AuthorisationError: If the dataset API key is invalid. - ResourceNotFoundError: If no dataset exists by the specified dataset EntityId. - UnknownError: If an error occurs while retrieving the dataset. + :class:`~encord.exceptions.AuthorisationError`: If the dataset API key is invalid. + :class:`~encord.exceptions.ResourceNotFoundError`: If no dataset exists by the specified dataset EntityId. + :class:`~encord.exceptions.UnknownError`: If an error occurs while retrieving the dataset. """ res = self._querier.basic_getter( OrmDataset, @@ -227,12 +227,12 @@ def list_data_rows( data_hashes: optional list of individual data unit hashes to include Returns: - List[DataRow]: A list of DataRows object that match the filter + List[:class:`~encord.orm.dataset.DataRow`]: A list of :class:`~encord.orm.dataset.DataRow`s object that match the filter Raises: - AuthorisationError: If the dataset API key is invalid. - ResourceNotFoundError: If no dataset exists by the specified dataset EntityId. - UnknownError: If an error occurs while retrieving the dataset. + :class:`~encord.exceptions.AuthorisationError`: If the dataset API key is invalid. + :class:`~encord.exceptions.ResourceNotFoundError`: If no dataset exists by the specified dataset EntityId. + :class:`~encord.exceptions.UnknownError`: If an error occurs while retrieving the dataset. """ created_before = optional_datetime_to_iso_str("created_before", created_before) created_after = optional_datetime_to_iso_str("created_after", created_after) @@ -803,9 +803,9 @@ def get_project(self, include_labels_metadata=True) -> OrmProject: OrmProject: A project record instance. Raises: - AuthorisationError: If the project API key is invalid. - ResourceNotFoundError: If no project exists by the specified project EntityId. - UnknownError: If an error occurs while retrieving the project. + :class:`~encord.exceptions.AuthorisationError`: If the project API key is invalid. + :class:`~encord.exceptions.ResourceNotFoundError`: If no project exists by the specified project EntityId. + :class:`~encord.exceptions.UnknownError`: If an error occurs while retrieving the project. """ return self._querier.basic_getter( OrmProject, payload={"include_labels_metadata": include_labels_metadata}, retryable=True @@ -1021,6 +1021,7 @@ def create_label_rows( uids: list of data uids where label_status is NOT_LABELLED. get_signed_url: bool whether to fetch the signed url for the internal label row branch_name: Optional[str] which branch name against which to create the label row + Returns: List[LabelRow]: A list of created label rows """ @@ -1157,6 +1158,7 @@ def get_label_logs( def __set_project_ontology(self, ontology: LegacyOntology) -> bool: """Save updated project ontology + Args: ontology: the updated project ontology diff --git a/encord/client_metadata_schema.py b/encord/client_metadata_schema.py index 9f940f63f..418ed3378 100644 --- a/encord/client_metadata_schema.py +++ b/encord/client_metadata_schema.py @@ -10,8 +10,8 @@ def set_client_metadata_schema_from_dict(api_client: ApiClient, json_dict: Dict[ """Set the client metadata schema from a dictionary. Args: - api_client (ApiClient): The API client to use for the request. - json_dict (Dict[str, orm.ClientMetadataSchemaTypes]): A dictionary containing the client metadata schema types. + api_client: The API client to use for the request. + json_dict: A dictionary containing the client metadata schema types. Raises: NotImplementedError: If an unexpected data type is encountered in the schema. @@ -32,7 +32,7 @@ def get_client_metadata_schema(api_client: ApiClient) -> Optional[Dict[str, orm. """Retrieve the client metadata schema. Args: - api_client (ApiClient): The API client to use for the request. + api_client: The API client to use for the request. Returns: Optional[Dict[str, orm.ClientMetadataSchemaTypes]]: A dictionary containing the client metadata schema types diff --git a/encord/collection.py b/encord/collection.py index f8aeb07c3..88b2ace60 100644 --- a/encord/collection.py +++ b/encord/collection.py @@ -163,8 +163,8 @@ def update_collection(self, name: Optional[str] = None, description: Optional[st """Update the collection's name and/or description. Args: - name (Optional[str]): The new name for the collection. - description (Optional[str]): The new description for the collection. + name: The new name for the collection. + description: The new description for the collection. """ payload = UpdateCollectionPayload(name=name, description=description) self._client.patch( @@ -182,11 +182,11 @@ def list_items( """List storage items in the collection. Args: - include_client_metadata (Optional[bool]): Whether to include client metadata for each item. - page_size (Optional[int]): The number of items to fetch per page. + include_client_metadata: Whether to include client metadata for each item. + page_size: The number of items to fetch per page. Returns: - Iterator[StorageItem]: An iterator containing storage items in the collection. + Iterator[:class:`~encord.storage.StorageItem`]: An iterator containing storage items in the collection. """ params = GetCollectionItemsParams(includeClientMetadata=include_client_metadata, pageSize=page_size) paged_items = self._client.get_paged_iterator( @@ -203,11 +203,11 @@ def list_items_include_inaccessible( """List storage items in the collection, including those that are inaccessible. Args: - include_client_metadata (Optional[bool]): Whether to include client metadata for each item. - page_size (Optional[int]): The number of items to fetch per page. + include_client_metadata: Whether to include client metadata for each item. + page_size: The number of items to fetch per page. Returns: - Iterator[Union[StorageItem, StorageItemInaccessible]]: An iterator containing both accessible + Iterator[Union[:class:`~encord.storage.StorageItem`, :class:`~encord.storage.StorageItem`Inaccessible]]: An iterator containing both accessible and inaccessible storage items in the collection. """ params = GetCollectionItemsParams(includeClientMetadata=include_client_metadata, pageSize=page_size) @@ -228,8 +228,8 @@ def add_items(self, storage_item_uuids: Sequence[Union[UUID, str]]) -> Collectio """Add storage items to the collection. Args: - storage_item_uuids (Sequence[Union[UUID, str]]): The list of storage item UUIDs to be added. - Either UUIDs or string representations of UUIDs are accepted. + storage_item_uuids: The list of storage item UUIDs to be added. + Either UUIDs or string representations of UUIDs are accepted. Returns: CollectionBulkItemResponse: The response after adding items to the collection. @@ -247,8 +247,8 @@ def remove_items(self, storage_item_uuids: Sequence[Union[UUID, str]]) -> Collec """Remove storage items from the collection. Args: - storage_item_uuids (Sequence[Union[UUID, str]]): The list of storage item UUIDs to be removed. - Either UUIDs or string representations of UUIDs are accepted. + storage_item_uuids: The list of storage item UUIDs to be removed. + Either UUIDs or string representations of UUIDs are accepted. Returns: CollectionBulkItemResponse: The response after removing items from the collection. @@ -266,7 +266,7 @@ def add_preset_items(self, filter_preset: Union[FilterPreset, UUID, str]) -> Non """Async operation to add storage items matching a filter preset to the collection. Args: - filter_preset (Union[FilterPreset, UUID, str]): The filter preset or its UUID/ID used to filter items. + filter_preset: The filter preset or its UUID/ID used to filter items. """ if isinstance(filter_preset, FilterPreset): preset_uuid = filter_preset.uuid @@ -289,7 +289,7 @@ def remove_preset_items(self, filter_preset: Union[FilterPreset, UUID, str]) -> """Async operation to remove storage items matching a filter preset from the collection. Args: - filter_preset (Union[FilterPreset, UUID, str]): The filter preset or its UUID/ID used to filter items. + filter_preset: The filter preset or its UUID/ID used to filter items. """ if isinstance(filter_preset, FilterPreset): preset_uuid = filter_preset.uuid @@ -467,8 +467,8 @@ def update_collection(self, name: Optional[str] = None, description: Optional[st """Update the collection's name and/or description. Args: - name (Optional[str]): The new name for the collection. - description (Optional[str]): The new description for the collection. + name: The new name for the collection. + description: The new description for the collection. """ payload = UpdateCollectionPayload(name=name, description=description) self._client.patch( @@ -485,10 +485,10 @@ def list_frames( """List frames in the collection. Args: - page_size (Optional[int]): The number of items to fetch per page. + page_size: The number of items to fetch per page. Returns: - Iterator[Tuple[LabelRowV2, List[ProjectDataCollectionInstance]]]: An list of tuples containing label + Iterator[Tuple[:class:`~encord.objects.LabelRowV2`, List[ProjectDataCollectionInstance]]]: An list of tuples containing label row and corresponding frame instances in the collection. """ params = GetCollectionItemsParams(pageSize=page_size) @@ -515,10 +515,10 @@ def list_annotations( """List annotations in the collection. Args: - page_size (Optional[int]): The number of items to fetch per page. + page_size: The number of items to fetch per page. Returns: - Iterator[Tuple[LabelRowV2, List[ProjectLabelCollectionInstance]]]: An list of tuples containing label + Iterator[Tuple[:class:`~encord.objects.LabelRowV2`, List[ProjectLabelCollectionInstance]]]: An list of tuples containing label row and corresponding label instances in the collection. """ params = GetCollectionItemsParams(pageSize=page_size) @@ -544,7 +544,7 @@ def add_items( """Add data items to the collection. Args: - items (Sequence[ProjectDataCollectionItemRequest | ProjectLabelCollectionItemRequest]): The list of data items to be added. + items: The list of data items to be added. Returns: ProjectCollectionBulkItemResponse: The response after adding items to the collection. @@ -563,7 +563,7 @@ def remove_items( """Remove data items from the collection. Args: - items (Sequence[ProjectDataCollectionItemRequest | ProjectLabelCollectionItemRequest]): The list of data items to be removed. + items: The list of data items to be removed. Returns: ProjectCollectionBulkItemResponse: The response after removing items from the collection. @@ -580,7 +580,7 @@ def add_preset_items(self, filter_preset: Union[FilterPreset, UUID, str]) -> Non """Async operation to add storage items matching a filter preset to the collection. Args: - filter_preset (Union[FilterPreset, UUID, str]): The filter preset or its UUID/ID used to filter items. + filter_preset: The filter preset or its UUID/ID used to filter items. """ if isinstance(filter_preset, FilterPreset): preset_uuid = filter_preset.uuid @@ -603,7 +603,7 @@ def remove_preset_items(self, filter_preset: Union[FilterPreset, UUID, str]) -> """Async operation to remove storage items matching a filter preset from the collection. Args: - filter_preset (Union[FilterPreset, UUID, str]): The filter preset or its UUID/ID used to filter items. + filter_preset: The filter preset or its UUID/ID used to filter items. """ if isinstance(filter_preset, FilterPreset): preset_uuid = filter_preset.uuid diff --git a/encord/common/utils.py b/encord/common/utils.py index 762b74642..315a22ff8 100644 --- a/encord/common/utils.py +++ b/encord/common/utils.py @@ -40,6 +40,7 @@ def validate_user_agent_suffix(user_agent_suffix: str) -> str: """ Validate a User-Agent string according to RFC 9110, excluding comments. Returns it whitespace trimmed + Args: user_agent_suffix: The User-Agent string to validate diff --git a/encord/configs.py b/encord/configs.py index 9a4ddf520..00b96a903 100644 --- a/encord/configs.py +++ b/encord/configs.py @@ -77,9 +77,9 @@ def define_headers(self, resource_id: Optional[str], resource_type: Optional[str """Define headers for a request. Args: - resource_id (Optional[str]): The resource ID. - resource_type (Optional[str]): The resource type. - data (str): The request data. + resource_id: The resource ID. + resource_type: The resource type. + data: The request data. Returns: Dict[str, Any]: A dictionary of headers. @@ -91,7 +91,7 @@ def define_headers_v2(self, request: PreparedRequest) -> PreparedRequest: """Define headers for a request (v2). Args: - request (PreparedRequest): The prepared request. + request: The prepared request. Returns: PreparedRequest: The prepared request with headers defined. @@ -126,9 +126,9 @@ def define_headers(self, resource_id: Optional[str], resource_type: Optional[str """Define headers for a user-specific request. Args: - resource_id (Optional[str]): The resource ID. - resource_type (Optional[str]): The resource type. - data (str): The request data. + resource_id: The resource ID. + resource_type: The resource type. + data: The request data. Returns: Dict[str, Any]: A dictionary of headers. @@ -139,7 +139,7 @@ def define_headers_v2(self, request: PreparedRequest) -> PreparedRequest: """Define headers for a user-specific request (v2). Args: - request (PreparedRequest): The prepared request. + request: The prepared request. Returns: PreparedRequest: The prepared request with headers defined. @@ -193,7 +193,7 @@ def get_env_ssh_key() -> str: str: The raw SSH key. Raises: - ResourceNotFoundError: If the SSH key file or key is not found or is empty. + :class:`~encord.exceptions.ResourceNotFoundError`: If the SSH key file or key is not found or is empty. """ ssh_file = os.environ.get(_ENCORD_SSH_KEY_FILE) if ssh_file: @@ -265,9 +265,9 @@ def define_headers(self, resource_id: Optional[str], resource_type: Optional[str """Define headers for an SSH key-based request. Args: - resource_id (Optional[str]): The resource ID. - resource_type (Optional[str]): The resource type. - data (str): The request data. + resource_id: The resource ID. + resource_type: The resource type. + data: The request data. Returns: Dict[str, Any]: A dictionary of headers. @@ -288,7 +288,7 @@ def define_headers_v2(self, request: PreparedRequest) -> PreparedRequest: """Define headers for an SSH key-based request (v2). Args: - request (PreparedRequest): The prepared request. + request: The prepared request. Returns: PreparedRequest: The prepared request with headers defined. @@ -356,9 +356,9 @@ def define_headers(self, resource_id: Optional[str], resource_type: Optional[str """Define headers for a bearer token-based request. Args: - resource_id (Optional[str]): The resource ID. - resource_type (Optional[str]): The resource type. - data (str): The request data. + resource_id: The resource ID. + resource_type: The resource type. + data: The request data. Returns: Dict[str, Any]: A dictionary of headers. @@ -378,7 +378,7 @@ def define_headers_v2(self, request: PreparedRequest) -> PreparedRequest: """Define headers for a bearer token-based request (v2). Args: - request (PreparedRequest): The prepared request. + request: The prepared request. Returns: PreparedRequest: The prepared request with headers defined. @@ -402,6 +402,7 @@ def from_bearer_token( token: The bearer token. requests_settings: The requests settings for all outgoing network requests. domain: Base domain for the endpoints + Returns: BearerConfig: The bearer token configuration. """ diff --git a/encord/dataset.py b/encord/dataset.py index 87e9013b8..738875292 100644 --- a/encord/dataset.py +++ b/encord/dataset.py @@ -104,7 +104,7 @@ def data_rows(self) -> List[DataRow]: ``` Returns: - List[DataRow]: A list of DataRow objects. + List[:class:`~encord.orm.dataset.DataRow`]: A list of :class:`~encord.orm.dataset.DataRow` objects. """ return self._dataset_instance.data_rows @@ -128,12 +128,12 @@ def list_data_rows( data_hashes: Optional list of individual data unit hashes to include. Returns: - List[DataRow]: A list of DataRow objects that match the filter. + List[:class:`~encord.orm.dataset.DataRow`]: A list of :class:`~encord.orm.dataset.DataRow` objects that match the filter. Raises: - AuthorisationError: If the dataset API key is invalid. - ResourceNotFoundError: If no dataset exists by the specified dataset EntityId. - UnknownError: If an error occurs while retrieving the dataset. + :class:`~encord.exceptions.AuthorisationError`: If the dataset API key is invalid. + :class:`~encord.exceptions.ResourceNotFoundError`: If no dataset exists by the specified dataset EntityId. + :class:`~encord.exceptions.UnknownError`: If an error occurs while retrieving the dataset. """ return self._client.list_data_rows(title_eq, title_like, created_before, created_after, data_types, data_hashes) @@ -169,7 +169,6 @@ def set_access_settings(self, dataset_access_settings: DatasetAccessSettings, *, def add_users(self, user_emails: List[str], user_role: DatasetUserRole) -> List[DatasetUser]: """Add users to the dataset. - If the user already exists in the Dataset, this operation succeeds but the `user_role` remains unchanged. The existing `user_role` is reflected in the `DatasetUser` instance returned. @@ -239,7 +238,7 @@ def upload_video( Video: An object describing the created video, see :class:`encord.orm.dataset.Video`. Raises: - UploadOperationNotSupportedError: If trying to upload to external datasets (e.g., S3/GPC/Azure). + :class:`~encord.exceptions.UploadOperationNotSupportedError`: If trying to upload to external datasets (e.g., S3/GPC/Azure). """ folder_uuid = folder.uuid if isinstance(folder, StorageFolder) else folder @@ -273,7 +272,7 @@ def create_image_group( List[ImageGroup]: A list containing the object(s) describing the created data unit(s). See :class:`encord.orm.dataset.ImageGroup`. Raises: - UploadOperationNotSupportedError: If trying to upload to external datasets (e.g., S3/GPC/Azure). + :class:`~encord.exceptions.UploadOperationNotSupportedError`: If trying to upload to external datasets (e.g., S3/GPC/Azure). InvalidArgumentError: If the folder is specified, but the dataset is a mirror dataset. """ return self._client.create_image_group( @@ -303,7 +302,7 @@ def create_dicom_series( A dictionary describing the created series. Raises: - UploadOperationNotSupportedError: If trying to upload to external datasets (e.g., S3/GPC/Azure). + :class:`~encord.exceptions.UploadOperationNotSupportedError`: If trying to upload to external datasets (e.g., S3/GPC/Azure). InvalidArgumentError: If the folder is specified, but the dataset is a mirror dataset. """ return self._client.create_dicom_series( @@ -348,8 +347,7 @@ def link_items( duplicates_behavior: The behavior to follow when encountering duplicates. Defaults to `SKIP`. See also :class:`encord.orm.dataset.DataLinkDuplicatesBehavior`. Returns: - List of DataRow objects representing linked items. - + List of :class:`~encord.orm.dataset.DataRow` objects representing linked items. """ return self._client.link_items(item_uuids, duplicates_behavior) @@ -445,8 +443,7 @@ def update_data_item(self, data_hash: str, new_title: str) -> bool: new_title: New title of the data item being updated. Returns: - Boolean indicating whether the update was successful. - + Boolean indicating whether the update was successful. """ return self._client.update_data_item(data_hash, new_title) diff --git a/encord/filter_preset.py b/encord/filter_preset.py index ee375d9f5..6aa2cad73 100644 --- a/encord/filter_preset.py +++ b/encord/filter_preset.py @@ -157,9 +157,9 @@ def update_preset( """Update the preset's definition. Args: - name (Optional[str]): The new name for the preset. - description (Optional[str]): The new description for the preset. - filter_preset_json (Optional[dict]): The new filters for the preset in their raw json format. + name: The new name for the preset. + description: The new description for the preset. + filter_preset_json: The new filters for the preset in their raw json format. """ filters_definition = None if isinstance(filter_preset_json, dict): diff --git a/encord/http/utils.py b/encord/http/utils.py index 645fde46f..840b798e2 100644 --- a/encord/http/utils.py +++ b/encord/http/utils.py @@ -89,8 +89,8 @@ def get_batches(iterable: List, n: int) -> List[List]: """Split an iterable into fixed-size batches. Args: - iterable (List): The input list to be split. - n (int): The maximum size of each batch. + iterable: The input list to be split. + n: The maximum size of each batch. Returns: List[List]: A list of lists where each sublist represents a batch. @@ -130,13 +130,13 @@ def upload_to_signed_url_list_for_single_file( """Attempt to upload a single file to a signed URL, appending failures if any occur. Args: - failures (List[UploadToSignedUrlFailure]): A list to append failures to. - file_path (Union[str, Path]): Path of the file to upload. - title (str): Title or identifier for the file. - signed_url (str): The signed URL to upload the file to. - upload_item_type (StorageItemType): The type of the file being uploaded. - max_retries (int): Maximum number of retries in case of failure. - backoff_factor (float): Backoff factor for retry delays. + failures: A list to append failures to. + file_path: Path of the file to upload. + title: Title or identifier for the file. + signed_url: The signed URL to upload the file to. + upload_item_type: The type of the file being uploaded. + max_retries: Maximum number of retries in case of failure. + backoff_factor: Backoff factor for retry delays. """ try: _upload_single_file( @@ -178,11 +178,11 @@ def upload_to_signed_url_list( """Upload multiple files to signed URLs and return upload results. Args: - file_paths (Iterable[Union[str, Path]]): Paths of files to upload. - config (BaseConfig): Configuration object with request settings. - api_client (ApiClient): API client used to fetch presigned URLs. - upload_item_type (StorageItemType): Type of items being uploaded. - cloud_upload_settings (CloudUploadSettings): Upload configuration. + file_paths: Paths of files to upload. + config: Configuration object with request settings. + api_client: API client used to fetch presigned URLs. + upload_item_type: Type of items being uploaded. + cloud_upload_settings: Upload configuration. Returns: List[Dict]: A list of dictionaries containing upload metadata: @@ -191,8 +191,8 @@ def upload_to_signed_url_list( - ``title`` (str): File name or title. Raises: - EncordException: If any file path does not exist. - CloudUploadError: If uploads fail and ``allow_failures`` is False. + :class:`~encord.exceptions.EncordException`: If any file path does not exist. + :class:`~encord.exceptions.CloudUploadError`: If uploads fail and ``allow_failures`` is False. """ for file_path in file_paths: if not os.path.exists(file_path): diff --git a/encord/issues/issue_client.py b/encord/issues/issue_client.py index 0b7c1956a..58a69ec7f 100644 --- a/encord/issues/issue_client.py +++ b/encord/issues/issue_client.py @@ -79,8 +79,8 @@ def add_file_issue(self, comment: str, issue_tags: List[str]) -> None: """Adds a file issue. Args: - comment (str): The comment for the issue. - issue_tags (List[str]): The issue tags for the issue. + comment: The comment for the issue. + issue_tags: The issue tags for the issue. """ self._issue_client.add_issue( project_uuid=self._project_uuid, @@ -95,9 +95,9 @@ def add_frame_issue(self, frame_index: int, comment: str, issue_tags: List[str]) """Adds a frame issue. Args: - frame_index (int): The index of the frame to add the issue to. - comment (str): The comment for the issue. - issue_tags (List[str]): The issue tags for the issue. + frame_index: The index of the frame to add the issue to. + comment: The comment for the issue. + issue_tags: The issue tags for the issue. """ self._issue_client.add_issue( project_uuid=self._project_uuid, @@ -107,14 +107,14 @@ def add_frame_issue(self, frame_index: int, comment: str, issue_tags: List[str]) ) def add_coordinate_issue(self, frame_index: int, x: float, y: float, comment: str, issue_tags: List[str]) -> None: - """Adds a issue pinned to a coordinate. + """Adds an issue pinned to a coordinate. Args: - frame_index (int): The index of the frame to add the issue to. - x (float): The x coordinate of the issue. - y (float): The y coordinate of the issue. - comment (str): The comment for the issue. - issue_tags (List[str]): The issue tags for the issue. + frame_index: The index of the frame to add the issue to. + x: The x coordinate of the issue. + y: The y coordinate of the issue. + comment: The comment for the issue. + issue_tags: The issue tags for the issue. """ self._issue_client.add_issue( project_uuid=self._project_uuid, diff --git a/encord/metadata_schema.py b/encord/metadata_schema.py index 3fa1ce110..23d9b14a2 100644 --- a/encord/metadata_schema.py +++ b/encord/metadata_schema.py @@ -231,14 +231,12 @@ def save(self) -> None: def add_embedding(self, k: str, *, size: int) -> None: """Adds a new embedding to the metadata schema. - **Parameters:** + Args: + k : str: The key under which the embedding will be stored in the schema. + size : int: The size of the embedding. - - k : str: The key under which the embedding will be stored in the schema. - - size : int: The size of the embedding. - - **Raises:** - - MetadataSchemaError: If the key `k` is already defined in the schema. + Raises: + :class:`~encord.metadata_schema.MetadataSchemaError`: If the key `k` is already defined in the schema. """ if k in self._schema: raise MetadataSchemaError(f"{k} is already defined") @@ -253,14 +251,12 @@ def add_embedding(self, k: str, *, size: int) -> None: def add_enum(self, k: str, *, values: Sequence[str]) -> None: """Adds a new enum to the metadata schema. - **Parameters:** - - - k : str: The key under which the embedding will be stored in the schema. - - values : Sequence[str]: The set of values for the enum (min 1, max 256). - - **Raises:** + Args: + k : str: The key under which the embedding will be stored in the schema. + values : Sequence[str]: The set of values for the enum (min 1, max 256). - MetadataSchemaError: If the key `k` is already defined in the schema. + Raises: + :class:`~encord.metadata_schema.MetadataSchemaError`: If the key `k` is already defined in the schema. """ if k in self._schema: raise MetadataSchemaError(f"{k} is already defined") @@ -275,14 +271,12 @@ def add_enum(self, k: str, *, values: Sequence[str]) -> None: def add_enum_options(self, k: str, *, values: Sequence[str]) -> None: """Adds extra valid enum values to an existing enum schema. - **Parameters:** - - - k : str: The key referencing the enum. - - values : Sequence[str]: The set of new values to add to the enum (min 1, max 256). + Args: + k : str: The key referencing the enum. + values : Sequence[str]: The set of new values to add to the enum (min 1, max 256). - **Raises:** - - MetadataSchemaError: If the key `k` is not defined in the schema or is not an enum. + Raises: + :class:`~encord.metadata_schema.MetadataSchemaError`: If the key `k` is not defined in the schema or is not an enum. """ if k not in self._schema: raise MetadataSchemaError(f"{k} is already defined") @@ -309,18 +303,16 @@ def add_scalar( ) -> None: """Sets a simple metadata type for a given key in the schema. - **Parameters:** + Args: + k : str: The key for which the metadata type is being set. + data_type : Literal["boolean", "datetime", "number", "uuid", "varchar", "text", "string", "long_string"] + The type of metadata to be associated with the key. Must be a valid identifier. + "string" is an alias of "varchar" + "long_string" is an alias of "text" - - k : str: The key for which the metadata type is being set. - - data_type : Literal["boolean", "datetime", "number", "uuid", "varchar", "text", "string", "long_string"] - The type of metadata to be associated with the key. Must be a valid identifier. - "string" is an alias of "varchar" - "long_string" is an alias of "text" - - **Raises:** - - MetadataSchemaError: If the key `k` is already defined in the schema with a conflicting type. - ValueError: If `data_type` is not a valid type of metadata identifier. + Raises: + :class:`~encord.metadata_schema.MetadataSchemaError`: If the key `k` is already defined in the schema with a conflicting type. + ValueError: If `data_type` is not a valid type of metadata identifier. """ if isinstance(data_type, MetadataSchemaScalarType): data_type = data_type.to_simple_str() @@ -357,34 +349,29 @@ def set_scalar( Alias of add_scalar - **Parameters:** - - - k : str: The key for which the metadata type is being set. - - data_type : Literal["boolean", "datetime", "number", "uuid", "varchar", "text", "string", "long_string"] - The type of metadata to be associated with the key. Must be a valid identifier. - "string" is an alias of "varchar" - "long_string" is an alias of "text" - - **Raises:** + Args: + k : str: The key for which the metadata type is being set. + data_type : Literal["boolean", "datetime", "number", "uuid", "varchar", "text", "string", "long_string"] + The type of metadata to be associated with the key. Must be a valid identifier. + "string" is an alias of "varchar" + "long_string" is an alias of "text" - MetadataSchemaError: If the key `k` is already defined in the schema with a conflicting type. - ValueError: If `data_type` is not a valid type of metadata identifier. + Raises: + :class:`~encord.metadata_schema.MetadataSchemaError`: If the key `k` is already defined in the schema with a conflicting type. + ValueError: If `data_type` is not a valid type of metadata identifier. """ self.add_scalar(k, data_type=data_type) def delete_key(self, k: str, *, hard: bool = False) -> None: """Delete a metadata key from the schema. - **Parameters:** - - k : str: The key for which the metadata type is being deleted. - - hard: bool: If the deletion should prevent indexing of this key unconditionally. + Args: + k: The key for which the metadata type is being deleted. + hard: If the deletion should prevent indexing of this key unconditionally. Setting this to true prevents restoring the type definition in the future. - **Raises:** - - MetadataSchemaError: If the key `k` is already deleted or not present in the schema + Raises: + :class:`~encord.metadata_schema.MetadataSchemaError`: If the key `k` is already deleted or not present in the schema """ if k not in self._schema: raise MetadataSchemaError(f"{k} is not defined") @@ -402,13 +389,11 @@ def delete_key(self, k: str, *, hard: bool = False) -> None: def restore_key(self, k: str) -> None: """Restore a deleted metadata key to its original value. - **Parameters:** - - k : str: The key for which the metadata type is to be restored. - - **Raises:** + Args: + k: The key for which the metadata type is to be restored. - MetadataSchemaError: If the key `k` is not already deleted or not present in the schema + Raises: + :class:`~encord.metadata_schema.MetadataSchemaError`: If the key `k` is not already deleted or not present in the schema """ if k not in self._schema: raise MetadataSchemaError(f"{k} is not defined") @@ -424,27 +409,24 @@ def restore_key(self, k: str) -> None: def keys(self) -> Sequence[str]: """Returns a sequence of all keys defined in the metadata schema. - **Returns:** - - Sequence[str]: A list of keys present in the metadata schema. + Returns: + Sequence[str]: A list of keys present in the metadata schema. """ return list(self._schema.keys()) def has_key(self, k: str) -> bool: """Check if any definition exists for a key. - **Parameters:** - - k : str: The key for which the metadata type is to be retrieved. + Args: + k: The key for which the metadata type is to be retrieved. """ return k in self._schema def is_key_deleted(self, k: str) -> bool: """Check if the key is defined as deleted. (Tombstone type) - **Parameters:** - - k : str: The key for which the metadata type is to be retrieved. + Args: + k: The key for which the metadata type is to be retrieved. """ if k not in self._schema: return False @@ -457,16 +439,14 @@ def get_key_type( ) -> Union[Literal["boolean", "datetime", "uuid", "number", "varchar", "text", "embedding", "enum"], None]: """Retrieves the metadata type associated with a given key. - **Parameters:** - - k : str: The key for which the metadata type is to be retrieved. - - **Returns:** + Args: + k : str: The key for which the metadata type is to be retrieved. - Literal["boolean", "datetime", "uuid", "number", "varchar", "text", "embedding", "enum"]: The metadata type associated with the key `k`. + Returns: + Literal["boolean", "datetime", "uuid", "number", "varchar", "text", "embedding", "enum"]: The metadata type associated with the key `k`. Raises: - MetadataSchemaError: If the key `k` is not supported by the current SDK. + :class:`~encord.metadata_schema.MetadataSchemaError`: If the key `k` is not supported by the current SDK. """ if k not in self._schema: return None @@ -498,17 +478,14 @@ def get_key_type( def get_embedding_size(self, k: str) -> int: """Retrieves size associated with a given embedding. - **Parameters:** + Args: + k: The key for which the metadata type is to be retrieved. - k : str: The key for which the metadata type is to be retrieved. + Returns: + The size of the embedding - **Returns:** - - int: The size of the embedding - - **Raises:** - - MetadataSchemaError: If the key `k` is not defined in the schema or is not an embedding + Raises: + :class:`~encord.metadata_schema.MetadataSchemaError`: If the key `k` is not defined in the schema or is not an embedding """ if k not in self._schema: raise MetadataSchemaError(f"{k} is not defined") @@ -522,17 +499,14 @@ def get_embedding_size(self, k: str) -> int: def get_enum_options(self, k: str) -> Sequence[str]: """Retrieves all values associated with a given enum. - **Parameters:** + Args: + k: The key for which the metadata type is to be retrieved. - k : str: The key for which the metadata type is to be retrieved. + Returns: + Sequence: The list of all values associated with an enum type. - **Returns:** - - Sequence[str]: The list of all values associated with an enum type. - - **Raises:** - - MetadataSchemaError: If the key `k` is not defined in the schema or is not an enum. + Raises: + :class:`~encord.metadata_schema.MetadataSchemaError`: If the key `k` is not defined in the schema or is not an enum. """ if k not in self._schema: raise MetadataSchemaError(f"{k} is not defined") diff --git a/encord/ml_models_client.py b/encord/ml_models_client.py index 5dad46d63..f1bc661dc 100644 --- a/encord/ml_models_client.py +++ b/encord/ml_models_client.py @@ -286,9 +286,9 @@ def get_training_status( ModelIteration: Information about the training iteration Raises: - EncordException: If training encountered an error + :class:`~encord.exceptions.EncordException`: If training encountered an error ValueError: If status response is invalid - RequestException: If there are network connectivity issues + :class:`~encord.exceptions.RequestException`: If there are network connectivity issues """ failed_requests_count = 0 polling_start_timestamp = time.perf_counter() diff --git a/encord/objects/answers.py b/encord/objects/answers.py index 23d5600b4..40ab8d9ca 100644 --- a/encord/objects/answers.py +++ b/encord/objects/answers.py @@ -36,7 +36,7 @@ class Answer(ABC, Generic[ValueType, AttributeType]): - """An internal helper class for the LabelRowV2 class. This class is not meant to be used directly by users.""" + """An internal helper class for the :class:`~encord.objects.LabelRowV2` class. This class is not meant to be used directly by users.""" _ontology_attribute: AttributeType _value: Optional[ValueType] diff --git a/encord/objects/attributes.py b/encord/objects/attributes.py index ea0b082fb..e806bd002 100644 --- a/encord/objects/attributes.py +++ b/encord/objects/attributes.py @@ -179,16 +179,17 @@ def add_option( local_uid: Optional[int] = None, feature_node_hash: Optional[str] = None, ) -> NestableOption: - """Args: + """ + Args: label: user-visible name of the option value: internal unique value; optional; normally mechanically constructed from the label local_uid: integer identifier of the option. Normally auto-generated; - omit this unless the aim is to create an exact clone of existing ontology + omit this unless the aim is to create an exact clone of existing ontology feature_node_hash: global identifier of the option. Normally auto-generated; - omit this unless the aim is to create an exact clone of existing ontology + omit this unless the aim is to create an exact clone of existing ontology Returns: - a `NestableOption` instance attached to the attribute. This can be further specified by adding nested attributes. + A `NestableOption` instance attached to the attribute. This can be further specified by adding nested attributes. """ return _add_option(self._options, NestableOption, label, self.uid, local_uid, feature_node_hash, value) @@ -240,15 +241,17 @@ def add_option( local_uid: Optional[int] = None, feature_node_hash: Optional[str] = None, ) -> FlatOption: - """Args: + """ + Args: label: user-visible name of the option value: internal unique value; optional; normally mechanically constructed from the label local_uid: integer identifier of the option. Normally auto-generated; - omit this unless the aim is to create an exact clone of existing ontology + omit this unless the aim is to create an exact clone of existing ontology feature_node_hash: global identifier of the option. Normally auto-generated; - omit this unless the aim is to create an exact clone of existing ontology + omit this unless the aim is to create an exact clone of existing ontology + Returns: - a `FlatOption` instance attached to the attribute. + A `FlatOption` instance attached to the attribute. """ return _add_option(self._options, FlatOption, label, self.uid, local_uid, feature_node_hash, value) diff --git a/encord/objects/classification.py b/encord/objects/classification.py index eb0ce9d1e..a98c36241 100644 --- a/encord/objects/classification.py +++ b/encord/objects/classification.py @@ -32,8 +32,8 @@ class OntologyClassificationLevel(StringEnum): Attributes: GLOBAL (OntologyClassificationLevel): Indicates a global, whole-image - classification that applies to the entire data unit rather than - to a specific object or region. + classification that applies to the entire data unit rather than + to a specific object or region. """ GLOBAL = "global" @@ -41,7 +41,7 @@ class OntologyClassificationLevel(StringEnum): @dataclass class Classification(OntologyElement): - """Represents a whole-image classification as part of an Ontology structure. + """Represents a whole-image classification as part of an :class:`~encord.ontology.Ontology` structure. This class encapsulates an image-level classification that describes the entire data unit rather than individual labeled objects. Each classification @@ -59,14 +59,14 @@ class Classification(OntologyElement): this classification (for example: `GLOBAL`). Optional: defaults to None. Example: - ``` - >>> classification = Classification( - ... uid=1, - ... feature_node_hash="abc123", - ... attributes=[Attribute(name="Scene Type", value="Indoor")], - ... level=OntologyClassificationLevel.GLOBAL, - ... ) - ``` + ``` + >>> classification = Classification( + ... uid=1, + ... feature_node_hash="abc123", + ... attributes=[Attribute(name="Scene Type", value="Indoor")], + ... level=OntologyClassificationLevel.GLOBAL, + ... ) + ``` """ uid: int @@ -111,7 +111,7 @@ def is_global(self) -> bool: return self.level == OntologyClassificationLevel.GLOBAL def create_instance(self, range_only: bool = False) -> ClassificationInstance: - """Create a ClassificationInstance to be used with a label row. + """Create a :class:`~encord.objects.ClassificationInstance` to be used with a label row. Returns: ClassificationInstance: An instance of ClassificationInstance. @@ -123,7 +123,7 @@ def from_dict(cls, d: dict) -> Classification: """Create a Classification instance from a dictionary. Args: - d (dict): A dictionary containing classification information. + d: A dictionary containing classification information. Returns: Classification: An instance of Classification. @@ -172,13 +172,13 @@ def add_attribute( """Adds an attribute to the classification. Args: - cls (Type[AttributeType]): The attribute type, one of `RadioAttribute`, `ChecklistAttribute`, `TextAttribute`. - name (str): The user-visible name of the attribute. - local_uid (Optional[int]): Integer identifier of the attribute. Normally auto-generated; omit this unless - the aim is to create an exact clone of an existing ontology. - feature_node_hash (Optional[str]): Global identifier of the attribute. Normally auto-generated; omit this - unless the aim is to create an exact clone of an existing ontology. - required (bool): Whether the label editor would mark this attribute as 'required'. + cls: The attribute type, one of `RadioAttribute`, `ChecklistAttribute`, `TextAttribute`. + name: The user-visible name of the attribute. + local_uid: Integer identifier of the attribute. Normally auto-generated; omit this unless + the aim is to create an exact clone of an existing ontology. + feature_node_hash: Global identifier of the attribute. Normally auto-generated; omit this + unless the aim is to create an exact clone of an existing ontology. + required: Whether the label editor would mark this attribute as 'required'. Returns: AttributeType: The created attribute that can be further specified with Options, where appropriate. diff --git a/encord/objects/classification_instance.py b/encord/objects/classification_instance.py index 6bac00565..590af65ae 100644 --- a/encord/objects/classification_instance.py +++ b/encord/objects/classification_instance.py @@ -289,9 +289,10 @@ def set_frame_data(self, frame_data: FrameData, frames: Frames) -> None: self._frames_to_data[frame] = frame_data def get_annotation(self, frame: Union[int, str] = 0) -> Annotation: - """Args: - frame: Either the frame number or the image hash if the data type is an image or image group. - Defaults to the first frame. + """ + Args: + frame: Either the frame number or the image hash if the data type is an image or image group. + Defaults to the first frame. """ if self.is_global(): raise LabelRowError("Cannot get annotation for a global classification instance.") @@ -325,8 +326,9 @@ def remove_from_frames(self, frames: Frames) -> None: self._parent._remove_frames_from_classification(self, frames) def get_annotations(self) -> List[Annotation]: - """Returns: - A list of `ClassificationInstance.Annotation` in order of available frames. + """ + Returns: + A list of `ClassificationInstance.Annotation` in order of available frames. """ return [self.get_annotation(frame_num) for frame_num in sorted(self._frames_to_data.keys())] diff --git a/encord/objects/coordinates.py b/encord/objects/coordinates.py index fcb16e7c1..1c6daa38c 100644 --- a/encord/objects/coordinates.py +++ b/encord/objects/coordinates.py @@ -62,7 +62,7 @@ def from_dict(d: BoundingBoxFrameCoordinatesDict) -> BoundingBoxCoordinates: """Create a BoundingBoxCoordinates instance from a dictionary. Args: - d (dict): A dictionary containing bounding box information. + d: A dictionary containing bounding box information. Returns: BoundingBoxCoordinates: An instance of BoundingBoxCoordinates. @@ -113,7 +113,7 @@ def from_dict(d: RotatableBoundingBoxFrameCoordinatesDict) -> RotatableBoundingB """Create a RotatableBoundingBoxCoordinates instance from a dictionary. Args: - d (dict): A dictionary containing rotatable bounding box information. + d: A dictionary containing rotatable bounding box information. Returns: RotatableBoundingBoxCoordinates: An instance of RotatableBoundingBoxCoordinates. @@ -190,7 +190,7 @@ def from_dict(d: PointFrameCoordinatesDict) -> PointCoordinate: """Create a PointCoordinate instance from a dictionary. Args: - d (dict): A dictionary containing point coordinate information. + d: A dictionary containing point coordinate information. Returns: PointCoordinate: An instance of PointCoordinate. @@ -229,7 +229,7 @@ def from_dict(d: Point3DFrameCoordinatesDict) -> PointCoordinate3D: """Create a PointCoordinate3D instance from a dictionary. Args: - d (dict): A dictionary containing point coordinate information. + d: A dictionary containing point coordinate information. Returns: PointCoordinate3D: An instance of PointCoordinate3D. @@ -261,8 +261,8 @@ def __init__( ): """ Args: - values (List[PointCoordinate]): A list of PointCoordinate objects defining the polygon. - polygons (List[List[List[PointCoordinate]]]): A list of polygons, where each polygon is a list of contours, where each contour is a list of points. + values: A list of PointCoordinate objects defining the polygon. + polygons: A list of polygons, where each polygon is a list of contours, where each contour is a list of points. """ if not values and not polygons: raise LabelRowError("Either `values` or `polygons` must be provided") @@ -296,45 +296,45 @@ def from_dict(d: PolygonFrameCoordinatesDict) -> "PolygonCoordinates": Supports both legacy format (single polygon with one contour) and the new complex format (multiple polygons and contours). Args: - d (dict): Dictionary containing polygon coordinates information. + d: Dictionary containing polygon coordinates information. Returns: - PolygonCoordinates: A PolygonCoordinates instance. + PolygonCoordinates: A PolygonCoordinates instance. Examples: - Legacy format (mapping of index -> point): - ```json - { - "polygon": { - "0": {"x": 12.3, "y": 45.6}, - "1": {"x": 78.9, "y": 12.3} - } - } - ``` - - Legacy format (list of points): - ```json - { - "polygon": [ - {"x": 12.3, "y": 45.6}, - {"x": 78.9, "y": 12.3} - ] - } - ``` - - New format (polygons -> contours -> points): - ```json - { - "polygons": [ - [ - [ + Legacy format (mapping of index -> point): + ```json + { + "polygon": { + "0": {"x": 12.3, "y": 45.6}, + "1": {"x": 78.9, "y": 12.3} + } + } + ``` + + Legacy format (list of points): + ```json + { + "polygon": [ {"x": 12.3, "y": 45.6}, {"x": 78.9, "y": 12.3} ] - ] - ] - } - ``` + } + ``` + + New format (polygons -> contours -> points): + ```json + { + "polygons": [ + [ + [ + {"x": 12.3, "y": 45.6}, + {"x": 78.9, "y": 12.3} + ] + ] + ] + } + ``` """ polygon_dict = d.get("polygon") values: List[PointCoordinate] = [] @@ -416,7 +416,7 @@ def from_dict(d: PolylineFrameCoordinatesDict) -> PolylineCoordinates: """Create a PolylineCoordinates instance from a dictionary. Args: - d (dict): A dictionary containing polyline coordinates information. + d: A dictionary containing polyline coordinates information. Returns: PolylineCoordinates: An instance of PolylineCoordinates. diff --git a/encord/objects/frames.py b/encord/objects/frames.py index 2e2097b85..e62f3feda 100644 --- a/encord/objects/frames.py +++ b/encord/objects/frames.py @@ -53,7 +53,7 @@ def frame_to_range(frame: int) -> Range: """Convert a single frame to a Range. Args: - frame (int): The single frame to be converted. + frame: The single frame to be converted. Returns: Range: A Range object with both start and end set to the input frame. @@ -65,7 +65,7 @@ def frames_to_ranges(frames: Collection[int]) -> Ranges: """Create a sorted list (in ascending order) of non-overlapping run-length encoded ranges from a collection of frames. Args: - frames (Collection[int]): A collection of integers representing frames. + frames: A collection of integers representing frames. Returns: Ranges: A list of Range objects representing the non-overlapping ranges. @@ -96,7 +96,7 @@ def ranges_to_list(ranges: Ranges) -> List[List[int]]: """Convert a list of Range objects to a list of lists (run-length encoded) of integers. Args: - ranges (Ranges): A list of Range objects. + ranges: A list of Range objects. Returns: List[List[int]]: A list of lists where each inner list contains two integers, the start and end of a range. @@ -108,7 +108,7 @@ def range_to_ranges(range_: Range) -> Ranges: """Convert a single Range to a list of Ranges. Args: - range_ (Range): The single Range to be converted. + range_: The single Range to be converted. Returns: Ranges: A list containing the input Range as its only element. @@ -120,7 +120,7 @@ def range_to_frames(range_: Range) -> List[int]: """Convert a single Range (run-length encoded) to a list of integers. Args: - range_ (Range): The single Range to be converted. + range_: The single Range to be converted. Returns: List[int]: A list of integers representing the frames within the range. @@ -132,7 +132,7 @@ def ranges_to_frames(range_list: Ranges) -> List[int]: """Convert a list of Ranges (run-length encoded) to a list of integers. Args: - range_list (Ranges): A list of Range objects. + range_list: A list of Range objects. Returns: List[int]: A sorted list of integers representing all frames within the ranges. @@ -147,7 +147,7 @@ def ranges_list_to_ranges(range_list: List[List[int]]) -> Ranges: """Convert a list of lists (run-length encoded) of integers to a list of Range objects. Args: - range_list (List[List[int]]): A list of lists where each inner list contains two integers, the start and end of a range. + range_list: A list of lists where each inner list contains two integers, the start and end of a range. Returns: Ranges: A list of Range objects created from the input list of lists. @@ -159,7 +159,7 @@ def frames_class_to_frames_list(frames_class: Frames) -> List[int]: """Convert a Frames class (which can be an int, a list of ints, a Range, or a list of Ranges) to a list of integers. Args: - frames_class (Frames): A Frames class which can be a single int, a list of ints, a Range object, or a list of Range objects. + frames_class: A Frames class which can be a single int, a list of ints, a Range object, or a list of Range objects. Returns: List[int]: A sorted list of integers representing all frames within the input Frames class. diff --git a/encord/objects/ontology_element.py b/encord/objects/ontology_element.py index 6d93bbd04..7a69636bc 100644 --- a/encord/objects/ontology_element.py +++ b/encord/objects/ontology_element.py @@ -102,7 +102,7 @@ def get_child_by_title( The child node with the specified title and type. Raises: - OntologyError: If more than one or no matching child is found. + :class:`~encord.exceptions.OntologyError`: If more than one or no matching child is found. """ found_items = self.get_children_by_title(title, type_) _assert_singular_result_list(found_items, title, type_) diff --git a/encord/objects/ontology_labels_impl.py b/encord/objects/ontology_labels_impl.py index da681fcd7..a2bc80f9a 100644 --- a/encord/objects/ontology_labels_impl.py +++ b/encord/objects/ontology_labels_impl.py @@ -247,14 +247,15 @@ def client_metadata(self) -> Optional[dict]: def label_status(self) -> LabelStatus: """Returns the current labeling status for the label row. - **Note**: This method is not supported for workflow-based projects. Please see our - :ref:`workflow documentation ` for more details. + Note: + This method is not supported for workflow-based projects. Please see our + :ref:`workflow documentation ` for more details. Returns: LabelStatus: The current labeling status. Raises: - WrongProjectTypeError: If used with workflow-based projects. + :class:`~encord.exceptions.WrongProjectTypeError`: If used with workflow-based projects. """ if self.__is_tms2_project: raise WrongProjectTypeError( @@ -268,14 +269,15 @@ def label_status(self) -> LabelStatus: def annotation_task_status(self) -> AnnotationTaskStatus: """Returns the current annotation task status for the label row. - **Note**: This method is not supported for workflow-based projects. Please see our - :ref:`workflow documentation ` for more details. + Note: + This method is not supported for workflow-based projects. Please see our + :ref:`workflow documentation ` for more details. Returns: AnnotationTaskStatus: The current annotation task status. Raises: - WrongProjectTypeError: If used with workflow-based projects. + :class:`~encord.exceptions.WrongProjectTypeError`: If used with workflow-based projects. """ if self.__is_tms2_project: raise WrongProjectTypeError( @@ -453,7 +455,7 @@ def task_uuid(self) -> Optional[UUID]: Optional[UUID]: The workflow task uuid or None if not applicable. Raises: - WrongProjectTypeError: If used with non-workflow-based projects. + :class:`~encord.exceptions.WrongProjectTypeError`: If used with non-workflow-based projects. """ if not self.__is_tms2_project: raise WrongProjectTypeError('"task_uuid" property only works with workflow-based projects.') @@ -472,7 +474,7 @@ def priority(self) -> Optional[float]: Optional[float]: The workflow priority or None if not applicable. Raises: - WrongProjectTypeError: If used with non-workflow-based projects. + :class:`~encord.exceptions.WrongProjectTypeError`: If used with non-workflow-based projects. """ if not self.__is_tms2_project: raise WrongProjectTypeError('"priority" property only works with workflow-based projects.') @@ -697,7 +699,7 @@ def get_image_hash(self, frame_number: int) -> Optional[str]: Optional[str]: The image hash if the frame number is within bounds, `None` otherwise. Raises: - LabelRowError: If this function is used for non-image data types. + :class:`~encord.exceptions.LabelRowError`: If this function is used for non-image data types. """ self._check_labelling_is_initalised() @@ -716,7 +718,7 @@ def get_frame_number(self, image_hash: str) -> Optional[int]: Optional[int]: The frame number if the image hash is found, `None` otherwise. Raises: - LabelRowError: If this function is used for non-image data types. + :class:`~encord.exceptions.LabelRowError`: If this function is used for non-image data types. """ self._check_labelling_is_initalised() @@ -809,7 +811,7 @@ def get_frame_metadata(self, frame: Union[int, str] = 0) -> FrameViewMetadata: FrameViewMetadata: Metadata for the specified frame or image hash. Raises: - LabelRowError: If the specified frame or image hash is not found in the label row. + :class:`~encord.exceptions.LabelRowError`: If the specified frame or image hash is not found in the label row. """ self._method_not_supported_for_audio() @@ -912,7 +914,7 @@ def add_object_instance(self, object_instance: ObjectInstance, force: bool = Tru force: If `True`, overwrites the current objects; otherwise, it will replace the current object. Raises: - LabelRowError: If the object instance is already part of another LabelRowV2. + :class:`~encord.exceptions.LabelRowError`: If the object instance is already part of another LabelRowV2. """ self._method_not_supported_for_audio(range_only=object_instance.is_range_only()) @@ -962,7 +964,7 @@ def add_classification_instance(self, classification_instance: ClassificationIns force: If `True`, overwrites the current objects; otherwise, it will replace the current object. Raises: - LabelRowError: If the classification instance is already part of another LabelRowV2 or has overlapping frames. + :class:`~encord.exceptions.LabelRowError`: If the classification instance is already part of another LabelRowV2 or has overlapping frames. """ self._check_labelling_is_initalised() @@ -1223,7 +1225,7 @@ def workflow_complete(self, bundle: Optional[Bundle] = None) -> None: bundle: Optional parameter. If passed, the method will be executed in a deferred way as part of the bundle. Raises: - LabelRowError: If the label hash is None. + :class:`~encord.exceptions.LabelRowError`: If the label hash is None. """ if self.label_hash is None: raise LabelRowError( @@ -1245,7 +1247,7 @@ def set_priority(self, priority: float, bundle: Optional[Bundle] = None) -> None bundle: Optional parameter. If passed, the method will be executed in a deferred way as part of the bundle. Raises: - WrongProjectTypeError: If the project is not a workflow-based project. + :class:`~encord.exceptions.WrongProjectTypeError`: If the project is not a workflow-based project. """ if not self.__is_tms2_project: raise WrongProjectTypeError("Setting priority only possible for workflow-based projects") @@ -1317,7 +1319,7 @@ def image_hash(self) -> str: str: The image hash. Raises: - LabelRowError: If the data type is not IMAGE or IMG_GROUP. + :class:`~encord.exceptions.LabelRowError`: If the data type is not IMAGE or IMG_GROUP. """ if self._label_row.data_type not in [DataType.IMAGE, DataType.IMG_GROUP]: raise LabelRowError("Image hash can only be retrieved for DataType.IMAGE or DataType.IMG_GROUP") @@ -1331,7 +1333,7 @@ def image_title(self) -> str: str: The image title. Raises: - LabelRowError: If the data type is not IMAGE or IMG_GROUP. + :class:`~encord.exceptions.LabelRowError`: If the data type is not IMAGE or IMG_GROUP. """ if self._label_row.data_type not in [DataType.IMAGE, DataType.IMG_GROUP]: raise LabelRowError("Image title can only be retrieved for DataType.IMAGE or DataType.IMG_GROUP") @@ -1345,7 +1347,7 @@ def file_type(self) -> str: str: The file type. Raises: - LabelRowError: If the data type is not IMAGE or IMG_GROUP. + :class:`~encord.exceptions.LabelRowError`: If the data type is not IMAGE or IMG_GROUP. """ if self._label_row.data_type not in [DataType.IMAGE, DataType.IMG_GROUP]: raise LabelRowError("File type can only be retrieved for DataType.IMAGE or DataType.IMG_GROUP") @@ -1365,7 +1367,7 @@ def width(self) -> int: """Get the width of the frame. Raises: - LabelRowError: If the width is not set for the data type. + :class:`~encord.exceptions.LabelRowError`: If the width is not set for the data type. """ if self._label_row.data_type == DataType.IMG_GROUP: return self._frame_level_data().width @@ -1387,7 +1389,7 @@ def height(self) -> int: """Get the height of the frame. Raises: - LabelRowError: If the height is not set for the data type. + :class:`~encord.exceptions.LabelRowError`: If the height is not set for the data type. """ if self._label_row.data_type == DataType.IMG_GROUP: return self._frame_level_data().height @@ -1412,7 +1414,7 @@ def data_link(self) -> Optional[str]: Optional[str]: The data link, or `None` if not available. Raises: - LabelRowError: If the data type is not IMAGE or IMG_GROUP. + :class:`~encord.exceptions.LabelRowError`: If the data type is not IMAGE or IMG_GROUP. """ if self._label_row.data_type not in [DataType.IMAGE, DataType.IMG_GROUP]: raise LabelRowError("Data link can only be retrieved for DataType.IMAGE or DataType.IMG_GROUP") @@ -1457,7 +1459,7 @@ def add_object_instance( manual_annotation: Optional flag indicating manual annotation. Raises: - LabelRowError: If the object instance is already assigned to a different label row. + :class:`~encord.exceptions.LabelRowError`: If the object instance is already assigned to a different label row. """ label_row = object_instance.is_assigned_to_label_row() if label_row and self._label_row != label_row: @@ -1506,7 +1508,7 @@ def add_classification_instance( last_edited_by: Optional last editor identifier. Raises: - LabelRowError: If the classification instance is already assigned to a different label row. + :class:`~encord.exceptions.LabelRowError`: If the classification instance is already assigned to a different label row. """ if created_at is None: created_at = datetime.now() diff --git a/encord/objects/ontology_object.py b/encord/objects/ontology_object.py index 58d524533..450d82ec9 100644 --- a/encord/objects/ontology_object.py +++ b/encord/objects/ontology_object.py @@ -121,9 +121,9 @@ def add_attribute( cls: The attribute type, one of `RadioAttribute`, `ChecklistAttribute`, `TextAttribute`. name: The user-visible name of the attribute. local_uid: Integer identifier of the attribute. Normally auto-generated; - omit this unless the aim is to create an exact clone of an existing ontology. + omit this unless the aim is to create an exact clone of an existing ontology. feature_node_hash: Global identifier of the attribute. Normally auto-generated; - omit this unless the aim is to create an exact clone of an existing ontology. + omit this unless the aim is to create an exact clone of an existing ontology. required: Whether the label editor would mark this attribute as 'required'. dynamic: Whether the attribute can have a different answer for the same object across different frames. diff --git a/encord/objects/ontology_object_instance.py b/encord/objects/ontology_object_instance.py index 2d18463c7..9ab091da3 100644 --- a/encord/objects/ontology_object_instance.py +++ b/encord/objects/ontology_object_instance.py @@ -433,7 +433,7 @@ def check_within_range(self, frame: int) -> None: frame: The frame number to check. Raises: - LabelRowError: If the frame is out of the acceptable range. + :class:`~encord.exceptions.LabelRowError`: If the frame is out of the acceptable range. """ if frame < 0 or frame >= self._last_frame: raise LabelRowError( @@ -547,7 +547,7 @@ def get_annotation(self, frame: Union[int, str] = 0) -> Annotation: Annotation: The annotation for the specified frame. Raises: - LabelRowError: If the frame is not present in the label row. + :class:`~encord.exceptions.LabelRowError`: If the frame is not present in the label row. """ if self._non_geometric and frame != 0: raise LabelRowError( @@ -622,7 +622,7 @@ def is_valid(self) -> None: """Check if the ObjectInstance is valid. Raises: - LabelRowError: If the ObjectInstance is not on any frames. + :class:`~encord.exceptions.LabelRowError`: If the ObjectInstance is not on any frames. """ if len(self._frames_to_instance_data) == 0: raise LabelRowError("ObjectInstance is not on any frames. Please add it to at least one frame.") @@ -633,7 +633,7 @@ def are_dynamic_answers_valid(self) -> None: """Validate if there are any dynamic answers on frames that have no coordinates. Raises: - LabelRowError: If there are dynamic answers on frames without coordinates. + :class:`~encord.exceptions.LabelRowError`: If there are dynamic answers on frames without coordinates. """ dynamic_frames = set(self._dynamic_answer_manager.frames()) local_frames = self.get_annotation_frames() @@ -942,12 +942,12 @@ def check_coordinate_type(coordinates: Coordinates, ontology_object: Object, par """Check if the coordinate type matches the expected type for the ontology object. Args: - coordinates (Coordinates): The coordinates to check. - ontology_object (Object): The ontology object to check against. - parent (LabelRowV2): The parent label row (if any) of the ontology object. + coordinates: The coordinates to check. + ontology_object: The ontology object to check against. + parent: The parent label row (if any) of the ontology object. Raises: - LabelRowError: If the coordinate type does not match the expected type. + :class:`~encord.exceptions.LabelRowError`: If the coordinate type does not match the expected type. """ expected_coordinate_types = ACCEPTABLE_COORDINATES_FOR_ONTOLOGY_ITEMS[ontology_object.shape] if all( @@ -989,7 +989,7 @@ def is_valid_dynamic_attribute(self, attribute: Attribute) -> bool: """Check if the attribute is a valid dynamic attribute. Args: - attribute (Attribute): The attribute to check. + attribute: The attribute to check. Returns: bool: True if the attribute is valid, False otherwise. @@ -1008,9 +1008,9 @@ def delete_answer( """Delete the answer for a given attribute and frames. Args: - attribute (Attribute): The attribute to delete the answer for. - frames (Optional[Frames]): The frames to delete the answer for. - filter_answer (Union[str, Option, Iterable[Option], None]): The specific answer to delete. + attribute: The attribute to delete the answer for. + frames: The frames to delete the answer for. + filter_answer: The specific answer to delete. """ if frames is None: frames = [Range(i, i) for i in self._frames_to_answers.keys()] @@ -1045,9 +1045,9 @@ def set_answer( """Set the answer for a given attribute and frames. Args: - answer (Union[str, Option, Iterable[Option]]): The answer to set. - attribute (Attribute): The attribute to set the answer for. - frames (Optional[Frames]): The frames to set the answer for. + answer: The answer to set. + attribute: The attribute to set the answer for. + frames: The frames to set the answer for. """ if frames is None: for available_frame_view in self._object_instance.get_annotations(): @@ -1081,9 +1081,9 @@ def get_answer( """Get answers for a given attribute, filtered by the specified criteria. Args: - attribute (Attribute): The attribute to get the answers for. - filter_answer (Union[str, Option, Iterable[Option], None]): The specific answer to filter by. - filter_frames (Optional[Frames]): The specific frames to filter by. + attribute: The attribute to get the answers for. + filter_answer: The specific answer to filter by. + filter_frames: The specific frames to filter by. Returns: AnswersForFrames: A list of answers and their associated frames. diff --git a/encord/objects/ontology_structure.py b/encord/objects/ontology_structure.py index 81de02a4b..8f8a077a3 100644 --- a/encord/objects/ontology_structure.py +++ b/encord/objects/ontology_structure.py @@ -51,7 +51,7 @@ def get_child_by_hash( type_: The expected type of the item. If the found child does not match the type, an error will be thrown. Raises: - OntologyError: If the item with the specified feature_node_hash is not found or if the type does not match. + :class:`~encord.exceptions.OntologyError`: If the item with the specified feature_node_hash is not found or if the type does not match. """ for object_ in self.objects: if object_.feature_node_hash == feature_node_hash: @@ -84,7 +84,7 @@ def get_child_by_title( type_: The expected type of the child node. Only a node that matches this type will be returned. Raises: - OntologyError: If no child node with the specified title and type is found, or if multiple matches are found. + :class:`~encord.exceptions.OntologyError`: If no child node with the specified title and type is found, or if multiple matches are found. """ found_items = self.get_children_by_title(title, type_) _assert_singular_result_list(found_items, title, type_) @@ -128,13 +128,13 @@ def get_children_by_title( @classmethod def from_dict(cls, d: Dict[str, Any]) -> OntologyStructure: - """Create an OntologyStructure from a dictionary. + """Create an :class:`~encord.objects.OntologyStructure` from a dictionary. Args: d: A JSON blob of an "ontology structure" (e.g. from Encord web app) Returns: - OntologyStructure: The created OntologyStructure object. + :class:`~encord.objects.OntologyStructure`: The created :class:`~encord.objects.OntologyStructure` object. Raises: KeyError: If the dict is missing a required field. @@ -153,7 +153,7 @@ def from_dict(cls, d: Dict[str, Any]) -> OntologyStructure: ) def to_dict(self) -> Dict[str, List[Dict[str, Any]]]: - """Convert the OntologyStructure to a dictionary. + """Convert the :class:`~encord.objects.OntologyStructure` to a dictionary. Returns: Dict[str, List[Dict[str, Any]]]: The dictionary representation of the ontology. diff --git a/encord/objects/options.py b/encord/objects/options.py index 660be02df..cfac6c5c2 100644 --- a/encord/objects/options.py +++ b/encord/objects/options.py @@ -152,9 +152,9 @@ def add_nested_attribute( cls: attribute type, one of `RadioAttribute`, `ChecklistAttribute`, `TextAttribute` name: the user-visible name of the attribute local_uid: integer identifier of the attribute. Normally auto-generated; - omit this unless the aim is to create an exact clone of existing ontology + omit this unless the aim is to create an exact clone of existing ontology feature_node_hash: global identifier of the object. Normally auto-generated; - omit this unless the aim is to create an exact clone of existing ontology + omit this unless the aim is to create an exact clone of existing ontology required: whether the label editor would mark this attribute as 'required' Returns: diff --git a/encord/objects/skeleton_template.py b/encord/objects/skeleton_template.py index a0e3e1cc1..cb7397ab5 100644 --- a/encord/objects/skeleton_template.py +++ b/encord/objects/skeleton_template.py @@ -33,7 +33,7 @@ def create_instance(self, provided_coordinates: List[SkeletonCoordinate]) -> Ske """Create an instance of SkeletonCoordinates with the provided coordinates. Args: - provided_coordinates (List[SkeletonCoordinate]): A list of SkeletonCoordinate objects to align. + provided_coordinates: A list of SkeletonCoordinate objects to align. Returns: SkeletonCoordinates: An instance of SkeletonCoordinates with aligned coordinates. diff --git a/encord/orm/collection.py b/encord/orm/collection.py index 51bf770d6..7e8542b49 100644 --- a/encord/orm/collection.py +++ b/encord/orm/collection.py @@ -326,7 +326,7 @@ class ProjectCollectionBulkItemResponse(BaseDTO): Attributes: failed_items: Items for which the bulk operation failed. Each entry contains - the original request that could not be processed successfully. + the original request that could not be processed successfully. """ failed_items: List[Union[ProjectDataCollectionItemRequest, ProjectLabelCollectionItemRequest]] diff --git a/encord/orm/dataset.py b/encord/orm/dataset.py index d95870766..773647a59 100644 --- a/encord/orm/dataset.py +++ b/encord/orm/dataset.py @@ -534,7 +534,8 @@ def from_dict_list(cls, json_list: List) -> List[DataRow]: def _compare_upload_payload(self, upload_res: dict, initial_payload: dict) -> None: """Compares the upload payload with the response from the server. - NOTE: this could also compare the new fields, field by field and update the current DataRow. + Note: + This could also compare the new fields, field by field and update the current :class:`~encord.orm.dataset.DataRow`. """ updated_fields = set(upload_res["updated_fields"]) fields_requested_for_update = set(initial_payload.keys()) @@ -1119,8 +1120,9 @@ class DataUnitError(BaseDTO): class DatasetDataLongPolling(BaseDTO): """Response of the upload job's long polling request. - **Note:** An upload job consists of job units, where job unit could be - either a video, image group, dicom series, or a single image. + Note: + An upload job consists of job units, where job unit could be + either a video, image group, dicom series, or a single image. """ status: LongPollingStatus @@ -1189,10 +1191,10 @@ class CreateDatasetResponseV2(BaseDTO): """Response returned when creating a dataset (current format). Args: - dataset_uuid: UUID of the newly created dataset. - backing_folder_uuid: Optional UUID of the backing folder created alongside the - dataset, if applicable. - A 'not None' indicates a legacy "mirror" dataset was created. + dataset_uuid: UUID of the newly created dataset. + backing_folder_uuid: Optional UUID of the backing folder created alongside the + dataset, if applicable. + A 'not None' indicates a legacy "mirror" dataset was created. """ dataset_uuid: UUID diff --git a/encord/orm/label_row.py b/encord/orm/label_row.py index b6374fe85..0ff678d83 100644 --- a/encord/orm/label_row.py +++ b/encord/orm/label_row.py @@ -380,8 +380,9 @@ def from_list(cls, json_list: list) -> List[LabelRowMetadata]: return ret def to_dict(self) -> Dict: - """Returns: - The dict equivalent of LabelRowMetadata. + """ + Returns: + The dict equivalent of LabelRowMetadata. """ def transform(value: Any): @@ -484,19 +485,17 @@ class LabelRowMetadataDTO(BaseDTO): def label_row_metadata_dto_to_label_row_metadata(label_row_metadata_dto: LabelRowMetadataDTO) -> LabelRowMetadata: """Convert a :class:`LabelRowMetadataDTO` instance to a - :class:`LabelRowMetadata` dataclass. + :class:LabelRowMetadata dataclass. - This helper is useful when working with DTOs returned by the API + This helper is useful when working with DTOs returned by the API, but you prefer the richer, formatter-enabled - :class:`LabelRowMetadata` representation in your application code. + :class:LabelRowMetadata representation in your application code. Args: - - label_row_metadata_dto: Metadata DTO returned from the API. + label_row_metadata_dto: Metadata DTO returned from the API. Returns: - - LabelRowMetadata: Equivalent dataclass instance with the same fields populated. + :class:LabelRowMetadata: Equivalent dataclass instance with the same fields populated. """ return LabelRowMetadata( label_hash=label_row_metadata_dto.label_hash, diff --git a/encord/orm/project.py b/encord/orm/project.py index bafa86080..8a10f44bf 100644 --- a/encord/orm/project.py +++ b/encord/orm/project.py @@ -7,6 +7,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union from uuid import UUID +from encord.common.deprecated import deprecated from encord.exceptions import WrongProjectTypeError from encord.orm import base_orm from encord.orm.analytics import CamelStrEnum @@ -15,6 +16,7 @@ from encord.utilities.project_user import ProjectUserRole +@deprecated("0.1.180", "get_dataset()") class Project(base_orm.BaseORM): """DEPRECATED - prefer using the `encord.project.Project` class instead. @@ -554,7 +556,7 @@ class GetProjectUsersPayload(BaseDTO): class ProjectUserResponse(BaseDTO): """ - This one should be merged with ProjectUser class + This one should be merged with :class:`~encord.utilities.project_user.ProjectUser` class But at the moment server doesn't return "project_hash" field (which is correct I suppose), that ProjectUser expects TODO: deprecate project_hash field from ProjectUsers and merge this two together """ diff --git a/encord/orm/storage.py b/encord/orm/storage.py index 0d4956a96..b65200441 100644 --- a/encord/orm/storage.py +++ b/encord/orm/storage.py @@ -118,7 +118,7 @@ class StorageFolder(BaseDTO): user_role: Role of the current user in this folder. synced_dataset_hash: UUID of the dataset synced with this folder, if any. path_to_root: Path from this folder to the root, represented as a list - of :class:`PathElement` instances. + of :class:`~encord.orm.PathElement` instances. """ uuid: UUID @@ -148,7 +148,7 @@ class StorageItem(BaseDTO): created_at: Timestamp when the item was created. last_edited_at: Timestamp when the item was last modified. is_tombstone: This item has been deleted but the link is retained for consistency reasons. - Mostly for items in the 'cloud linked folders' that are referenced but aren't present after a re-sync. + Mostly for items in the 'cloud linked folders' that are referenced but aren't present after a re-sync. is_placeholder: This item has been added to the folder but isn't fully processed yet. backed_data_units_count: Number of data units backed by this storage item. storage_location: Storage backend where the item resides. diff --git a/encord/orm/workflow.py b/encord/orm/workflow.py index 7536758c3..f052a0a2b 100644 --- a/encord/orm/workflow.py +++ b/encord/orm/workflow.py @@ -134,7 +134,7 @@ class WorkflowDTO(BaseDTO): Args: stages: List of workflow stages in the project workflow graph. - This can include both standard workflow nodes and agent nodes. + This can include both standard workflow nodes and agent nodes. """ stages: List[Union[WorkflowAgentNode, WorkflowNode]] diff --git a/encord/project.py b/encord/project.py index 53926c01a..b95bb1142 100644 --- a/encord/project.py +++ b/encord/project.py @@ -279,19 +279,19 @@ def add_users(self, user_emails: List[str], user_role: ProjectUserRole) -> List[ """Add users to the project. If the user already exists in the Project, this operation succeeds but the `user_role` remains unchanged. The - existing `user_role` is reflected in the `ProjectUser` instance returned. + existing `user_role` is reflected in the `:class:`~encord.utilities.project_user.ProjectUser`` instance returned. Args: user_emails: List of user emails to be added. user_role: The user role to assign to all users. Returns: - List[ProjectUser]: A list of ProjectUser objects representing the added users. + List[:class:`~encord.utilities.project_user.ProjectUser`]: A list of :class:`~encord.utilities.project_user.ProjectUser` objects representing the added users. Raises: - AuthorisationError: If the project API key is invalid. - ResourceNotFoundError: If no project exists by the specified project EntityId. - UnknownError: If an error occurs while adding the users to the project. + :class:`~encord.exceptions.AuthorisationError`: If the project API key is invalid. + :class:`~encord.exceptions.ResourceNotFoundError`: If no project exists by the specified project EntityId. + :class:`~encord.exceptions.UnknownError`: If an error occurs while adding the users to the project. """ return self._client.add_users(user_emails, user_role) @@ -299,7 +299,7 @@ def list_users(self) -> Iterable[ProjectUser]: """List all users that have access to the project. Returns: - Iterable[ProjectUser]: An iterable of ProjectUser objects. + Iterable[:class:`~encord.utilities.project_user.ProjectUser`]: An iterable of :class:`~encord.utilities.project_user.ProjectUser` objects. """ yield from self._client.list_users(UUID(self.project_hash)) @@ -357,7 +357,7 @@ def copy_project( Args: copy_datasets: If True, the datasets of the existing project are copied over, and new tasks are created from those datasets. copy_collaborators: If True, all users of the existing project are copied over with their current roles. - If label and/or annotator reviewer mapping is set, this will also be copied over. + If label and/or annotator reviewer mapping is set, this will also be copied over. copy_models: If True, all models with their training information will be copied into the new project. copy_labels: Options for copying labels, defined in `CopyLabelsOptions`. new_title: When provided, will be used as the title for the new project. @@ -367,9 +367,9 @@ def copy_project( str: The EntityId of the newly created project. Raises: - AuthorisationError: If the project API key is invalid. - ResourceNotFoundError: If no project exists by the specified project EntityId. - UnknownError: If an error occurs while copying the project. + :class:`~encord.exceptions.AuthorisationError`: If the project API key is invalid. + :class:`~encord.exceptions.ResourceNotFoundError`: If no project exists by the specified project EntityId. + :class:`~encord.exceptions.UnknownError`: If an error occurs while copying the project. """ return self._client.copy_project( new_title=new_title, @@ -390,11 +390,11 @@ def add_datasets(self, dataset_hashes: List[str]) -> bool: bool: True if the datasets were successfully added, False otherwise. Raises: - AuthenticationError: If the project API key is invalid. - AuthorisationError: If access to the specified resource is restricted. - ResourceNotFoundError: If one or more datasets don't exist by the specified dataset_hashes. - UnknownError: If an error occurs while adding the datasets to the project. - OperationNotAllowed: If the write operation is not allowed by the API key. + :class:`~encord.exceptions.AuthenticationError`: If the project API key is invalid. + :class:`~encord.exceptions.AuthorisationError`: If access to the specified resource is restricted. + :class:`~encord.exceptions.ResourceNotFoundError`: If one or more datasets don't exist by the specified dataset_hashes. + :class:`~encord.exceptions.UnknownError`: If an error occurs while adding the datasets to the project. + :class:`~encord.exceptions.OperationNotAllowed`: If the write operation is not allowed by the API key. """ res = self._client.add_datasets(dataset_hashes) self.refetch_data() @@ -410,11 +410,11 @@ def remove_datasets(self, dataset_hashes: List[str]) -> bool: bool: True if the datasets were successfully removed, False otherwise. Raises: - AuthenticationError: If the project API key is invalid. - AuthorisationError: If access to the specified resource is restricted. - ResourceNotFoundError: If no dataset exists by the specified dataset_hash (uid). - UnknownError: If an error occurs while removing the datasets from the project. - OperationNotAllowed: If the operation is not allowed by the API key. + :class:`~encord.exceptions.AuthenticationError`: If the project API key is invalid. + :class:`~encord.exceptions.AuthorisationError`: If access to the specified resource is restricted. + :class:`~encord.exceptions.ResourceNotFoundError`: If no dataset exists by the specified dataset_hash (uid). + :class:`~encord.exceptions.UnknownError`: If an error occurs while removing the datasets from the project. + :class:`~encord.exceptions.OperationNotAllowed`: If the operation is not allowed by the API key. """ res = self._client.remove_datasets(dataset_hashes) self.refetch_data() @@ -431,7 +431,7 @@ def get_project_ontology(self) -> LegacyOntology: @deprecated("0.1.102", alternative="encord.ontology.Ontology class") def add_object(self, name: str, shape: ObjectShape) -> bool: - """DEPRECATED: Prefer using :class:`Ontology [encord.ontology.Ontology]` to manipulate ontology. + """DEPRECATED: Prefer using :class:`~encord.ontology.Ontology` to manipulate ontology. Add an object to an ontology. @@ -445,10 +445,10 @@ def add_object(self, name: str, shape: ObjectShape) -> bool: bool: True if the object was added successfully, False otherwise. Raises: - AuthenticationError: If the project API key is invalid. - AuthorisationError: If access to the specified resource is restricted. - UnknownError: If an error occurs while adding the object to the project ontology. - OperationNotAllowed: If the operation is not allowed by the API key. + :class:`~encord.exceptions.AuthenticationError`: If the project API key is invalid. + :class:`~encord.exceptions.AuthorisationError`: If access to the specified resource is restricted. + :class:`~encord.exceptions.UnknownError`: If an error occurs while adding the object to the project ontology. + :class:`~encord.exceptions.OperationNotAllowed`: If the operation is not allowed by the API key. ValueError: If invalid arguments are supplied in the function call. """ res = self._client.add_object(name, shape) @@ -463,7 +463,7 @@ def add_classification( required: bool, options: Optional[Iterable[str]] = None, ): - """DEPRECATED: Prefer using :class:`Ontology encord.ontology.Ontology` to manipulate ontology. + """DEPRECATED: Prefer using :class:`~encord.ontology.Ontology` to manipulate ontology. Add a classification to an ontology. @@ -476,10 +476,10 @@ def add_classification( options: The list of options for the classification (to be set to None for texts). Raises: - AuthenticationError: If the project API key is invalid. - AuthorisationError: If access to the specified resource is restricted. - UnknownError: If an error occurs while adding the classification to the project ontology. - OperationNotAllowed: If the operation is not allowed by the API key. + :class:`~encord.exceptions.AuthenticationError`: If the project API key is invalid. + :class:`~encord.exceptions.AuthorisationError`: If access to the specified resource is restricted. + :class:`~encord.exceptions.UnknownError`: If an error occurs while adding the classification to the project ontology. + :class:`~encord.exceptions.OperationNotAllowed`: If the operation is not allowed by the API key. ValueError: If invalid arguments are supplied in the function call. """ res = self._client.add_classification(name, classification_type, required, options) @@ -524,9 +524,9 @@ def object_interpolation( dict: Full set of filled frames including interpolated objects. Raises: - AuthenticationError: If the project API key is invalid. - AuthorisationError: If access to the specified resource is restricted. - UnknownError: If an error occurs while running interpolation. + :class:`~encord.exceptions.AuthenticationError`: If the project API key is invalid. + :class:`~encord.exceptions.AuthorisationError`: If access to the specified resource is restricted. + :class:`~encord.exceptions.UnknownError`: If an error occurs while running interpolation. """ return self._client.object_interpolation(key_frames, objects_to_interpolate) @@ -541,9 +541,9 @@ def get_data(self, data_hash: str, get_signed_url: bool = False) -> Tuple[Option A tuple consisting of the video (if it exists) and a list of individual images (if they exist). Raises: - AuthenticationError: If the project API key is invalid. - AuthorisationError: If access to the specified resource is restricted. - UnknownError: If an error occurs while retrieving the object. + :class:`~encord.exceptions.AuthenticationError`: If the project API key is invalid. + :class:`~encord.exceptions.AuthorisationError`: If access to the specified resource is restricted. + :class:`~encord.exceptions.UnknownError`: If an error occurs while retrieving the object. """ return self._client.get_data(data_hash, get_signed_url) @@ -597,15 +597,15 @@ def get_editor_logs( The start_time and end_time parameters are required. The maximum time range is 30 days. Args: - action: Filter the editor logs by action. - actor_user_email: Filter the editor logs by the user email. - data_unit_id: Filter the editor logs by the data id (data_hash). - workflow_stage_id: Filter the editor logs by the workflow stage id. - end_time: Filter the editor logs to only include logs before the specified time. - start_time: Filter the editor logs to only include logs after the specified time. + action: Filter the editor logs by action. + actor_user_email: Filter the editor logs by the user email. + data_unit_id: Filter the editor logs by the data id (data_hash). + workflow_stage_id: Filter the editor logs by the workflow stage id. + end_time: Filter the editor logs to only include logs before the specified time. + start_time: Filter the editor logs to only include logs after the specified time. Returns: - An iterator on the editor logs. + An iterator on the editor logs. """ # we don't put the limit in the parameters anymore because it works as a batch size in the iterator. @@ -700,11 +700,11 @@ def get_label_row( LabelRow: A label row instance. Raises: - AuthenticationError: If the project API key is invalid. - AuthorisationError: If access to the specified resource is restricted. - ResourceNotFoundError: If no label exists by the specified label_hash (uid). - UnknownError: If an error occurs while retrieving the label. - OperationNotAllowed: If the read operation is not allowed by the API key. + :class:`~encord.exceptions.AuthenticationError`: If the project API key is invalid. + :class:`~encord.exceptions.AuthorisationError`: If access to the specified resource is restricted. + :class:`~encord.exceptions.ResourceNotFoundError`: If no label exists by the specified label_hash (uid). + :class:`~encord.exceptions.UnknownError`: If an error occurs while retrieving the label. + :class:`~encord.exceptions.OperationNotAllowed`: If the read operation is not allowed by the API key. """ return self._client.get_label_row( uid, @@ -739,12 +739,12 @@ def get_label_rows( List of LabelRow instances. Raises: - MultiLabelLimitError: If too many labels were requested. Check the error's maximum_labels_allowed field to read the most up to date error limit. - AuthenticationError: If the project API key is invalid. - AuthorisationError: If access to the specified resource is restricted. - ResourceNotFoundError: If no label exists by the specified label_hash (uid). - UnknownError: If an error occurs while retrieving the label. - OperationNotAllowed: If the read operation is not allowed by the API key. + :class:`~encord.exceptions.MultiLabelLimitError`: If too many labels were requested. Check the error's maximum_labels_allowed field to read the most up to date error limit. + :class:`~encord.exceptions.AuthenticationError`: If the project API key is invalid. + :class:`~encord.exceptions.AuthorisationError`: If access to the specified resource is restricted. + :class:`~encord.exceptions.ResourceNotFoundError`: If no label exists by the specified label_hash (uid). + :class:`~encord.exceptions.UnknownError`: If an error occurs while retrieving the label. + :class:`~encord.exceptions.OperationNotAllowed`: If the read operation is not allowed by the API key. """ return self._client.get_label_rows( uids, @@ -773,13 +773,13 @@ def save_label_row(self, uid, label, validate_before_saving: bool = False): bool: True if the label row is successfully saved, False otherwise. Raises: - AuthenticationError: If the project API key is invalid. - AuthorisationError: If access to the specified resource is restricted. - ResourceNotFoundError: If no label exists by the specified label_hash (uid). - UnknownError: If an error occurs while saving the label. - OperationNotAllowed: If the write operation is not allowed by the API key. - AnswerDictionaryError: If an object or classification instance is missing in answer dictionaries. - CorruptedLabelError: If a blurb is corrupted (e.g., if the frame labels have more frames than the video). + :class:`~encord.exceptions.AuthenticationError`: If the project API key is invalid. + :class:`~encord.exceptions.AuthorisationError`: If access to the specified resource is restricted. + :class:`~encord.exceptions.ResourceNotFoundError`: If no label exists by the specified label_hash (uid). + :class:`~encord.exceptions.UnknownError`: If an error occurs while saving the label. + :class:`~encord.exceptions.OperationNotAllowed`: If the write operation is not allowed by the API key. + :class:`~encord.exceptions.AnswerDictionaryError`: If an object or classification instance is missing in answer dictionaries. + :class:`~encord.exceptions.CorruptedLabelError`: If a blurb is corrupted (e.g., if the frame labels have more frames than the video). """ return self._client.save_label_row(uid, label, validate_before_saving) @@ -798,13 +798,13 @@ def create_label_row(self, uid: str): LabelRow: A label row instance. Raises: - AuthenticationError: If the project API key is invalid. - AuthorisationError: If access to the specified resource is restricted. - UnknownError: If an error occurs while saving the label. - OperationNotAllowed: If the write operation is not allowed by the API key. - AnswerDictionaryError: If an object or classification instance is missing in answer dictionaries. - CorruptedLabelError: If a blurb is corrupted (e.g., if the frame labels have more frames than the video). - ResourceExistsError: If a label row already exists for this project data. Avoids overriding existing work. + :class:`~encord.exceptions.AuthenticationError`: If the project API key is invalid. + :class:`~encord.exceptions.AuthorisationError`: If access to the specified resource is restricted. + :class:`~encord.exceptions.UnknownError`: If an error occurs while saving the label. + :class:`~encord.exceptions.OperationNotAllowed`: If the write operation is not allowed by the API key. + :class:`~encord.exceptions.AnswerDictionaryError`: If an object or classification instance is missing in answer dictionaries. + :class:`~encord.exceptions.CorruptedLabelError`: If a blurb is corrupted (e.g., if the frame labels have more frames than the video). + :class:`~encord.exceptions.ResourceExistsError`: If a label row already exists for this project data. Avoids overriding existing work. """ return self._client.create_label_row(uid) @@ -839,7 +839,7 @@ def list_collaborator_timers( after: The beginning of the period of interest. before: The end of the period of interest. group_by_data_unit: If True, time spent by a collaborator for each data unit is provided separately. - If False, all time spent in the scope of the project is aggregated together. + If False, all time spent in the scope of the project is aggregated together. Yields: CollaboratorTimer: Information about the time spent by each collaborator. @@ -902,11 +902,11 @@ def import_coco_labels( """Import labels in COCO format to an Encord Project. Args: - labels_dict (Dict[str, Any]): A dictionary in COCO annotation format. - category_id_to_feature_hash (Dict[CategoryID, str]): A mapping of category IDs from the COCO data to their corresponding feature hashes in the Project's Ontology. - image_id_to_frame_index (Dict[ImageID, FrameIndex]): A mapping of image IDs to FrameIndex(data_hash, frame_offset), used to locate the corresponding frames in the Encord Project. - branch_name (Optional[str]): Optionally specify a branch name. Defaults to the `main` branch. - confidence_field_name (Optional[str]): Optionally specify the name of the confidence field in the COCO annotations. Defaults to assigning `1.0` as confidence value to all annotations. + labels_dict: A dictionary in COCO annotation format. + category_id_to_feature_hash: A mapping of category IDs from the COCO data to their corresponding feature hashes in the Project's Ontology. + image_id_to_frame_index: A mapping of image IDs to FrameIndex(data_hash, frame_offset), used to locate the corresponding frames in the Encord Project. + branch_name: Optionally specify a branch name. Defaults to the `main` branch. + confidence_field_name: Optionally specify the name of the confidence field in the COCO annotations. Defaults to assigning `1.0` as confidence value to all annotations. """ from encord.utilities.coco.datastructure import CocoRootModel from encord.utilities.coco.importer import import_coco_labels @@ -935,16 +935,16 @@ def export_coco_labels( Args: label_hashes: List of label hashes to include. If not provided, all label rows will be included. include_object_feature_hashes: If `None`, all objects will be included. - Otherwise, only objects with the specified feature hashes will be included. + Otherwise, only objects with the specified feature hashes will be included. include_classification_feature_hashes: If `None`, all classifications will be included. Otherwise, only classifications with the specified feature hashes will be included. branch_name: Optionally specify a branch name. Defaults to the `main` branch. Returns: Dict[str, Any]: A dictionary in the COCO format containing the exported labels, - including annotations and metadata conforming to COCO standards. - The dictionary also includes additional fields specific to Encord, - providing supplementary information not defined in the COCO standard. + including annotations and metadata conforming to COCO standards. + The dictionary also includes additional fields specific to Encord, + providing supplementary information not defined in the COCO standard. Raises: ImportError: If the 'coco' extra dependencies are not installed. @@ -980,14 +980,14 @@ def list_collections( Args: collection_uuids: The unique identifiers (UUIDs) of the collections to retrieve. - page_size (int): Number of items to return per page. Default if not specified is 100. Maximum value is 1000. + page_size: Number of items to return per page. Default if not specified is 100. Maximum value is 1000. Returns: The list of collections which match the given criteria. Raises: ValueError: If any of the collection uuids is a badly formed UUID. - :class:`encord.exceptions.AuthorizationError` : If the user does not have access to it. + :class:`~encord.exceptions.AuthorisationError` : If the user does not have access to it. """ collections = ( [UUID(collection) if isinstance(collection, str) else collection for collection in collection_uuids] @@ -1010,9 +1010,10 @@ def delete_collection(self, collection_uuid: Union[str, UUID]) -> None: Returns: None + Raises: ValueError: If `collection_uuid` is a badly formed UUID. - :class:`encord.exceptions.AuthorizationError` : If the user does not have access to it. + :class:`~encord.exceptions.AuthorisationError` : If the user does not have access to it. """ if isinstance(collection_uuid, str): collection_uuid = UUID(collection_uuid) @@ -1034,7 +1035,7 @@ def create_collection( ProjectCollection: Newly created collection. Raises: - :class:`encord.exceptions.AuthorizationError` : If the user does not have access to the folder. + :class:`~encord.exceptions.AuthorisationError` : If the user does not have access to the folder. """ new_uuid = ProjectCollection._create_collection( self._client._api_client, self._project_instance.project_hash, name, description, collection_type @@ -1047,9 +1048,11 @@ def active_sync(self) -> None: def active_import(self, project_mode: ActiveProjectMode, *, video_sampling_rate: Optional[float] = None) -> None: """Import the associated Active project. Progress in the app + Args: project_mode: Active projects can be imported up to a certain stage. Use the ActiveProjectModeEnum to select the stage video_sampling_rate: Optional[float]: For videos, what's the sampling rate of frames for analysis + Returns: None """ @@ -1064,14 +1067,14 @@ def list_filter_presets( Args: filter_preset_uuids: The unique identifiers (UUIDs) of the filter presets to retrieve. - page_size (int): Number of items to return per page. Default if not specified is 100. Maximum value is 1000. + page_size: Number of items to return per page. Default if not specified is 100. Maximum value is 1000. Returns: The list of filter presets which match the given criteria. Raises: ValueError: If any of the filter preset uuids is a badly formed UUID. - :class:`encord.exceptions.AuthorizationError` : If the user does not have access to it. + :class:`~encord.exceptions.AuthorisationError` : If the user does not have access to it. """ filter_presets = ( [ diff --git a/encord/project_ontology/ontology.py b/encord/project_ontology/ontology.py index 09a93b46f..b7e4c27f4 100644 --- a/encord/project_ontology/ontology.py +++ b/encord/project_ontology/ontology.py @@ -160,11 +160,10 @@ def add_classification( """Add a classification to the ontology. Args: - name: A descriptive name of the classification. - classification_type: The type of the classification. - required: Indicate whether annotating this classification is required. - options: Nested classification options. - + name: A descriptive name of the classification. + classification_type: The type of the classification. + required: Indicate whether annotating this classification is required. + options: Nested classification options. """ ontology_classification = OntologyClassification( str(self.__current_classification_id()), diff --git a/encord/storage.py b/encord/storage.py index 2511cadad..c0279cdd7 100644 --- a/encord/storage.py +++ b/encord/storage.py @@ -4,7 +4,7 @@ hidden: false metadata: title: "Storage" - description: "Encord SDK StorageFolder and StorageItem classes" + description: "Encord SDK StorageFolder and :class:`~encord.storage.StorageItem` classes" category: "64e481b57b6027003f20aaa0" --- """ @@ -151,18 +151,18 @@ def list_items( """Lists items in the folder. Args: - search (Optional[str]): Search string to filter items by name. - is_in_dataset (Optional[bool]): Filter items by whether they are linked to any dataset. - `True` and `False` select only linked and only unlinked items, respectively. - `None` includes all items regardless of their dataset links. - item_types (Optional[List[StorageItemType]]): Filter items by type. - order (FoldersSortBy): Sort order. Defaults to FoldersSortBy.NAME. - get_signed_urls (bool): Whether to get signed URLs for the items. Defaults to False. - desc (bool): Sort in descending order. Defaults to False. - page_size (int): Number of items to return per page. Default if not specified is 100. Maximum value is 1000. + search: Search string to filter items by name. + is_in_dataset: Filter items by whether they are linked to any dataset. + `True` and `False` select only linked and only unlinked items, respectively. + `None` includes all items regardless of their dataset links. + item_types: Filter items by type. + order: Sort order. Defaults to FoldersSortBy.NAME. + get_signed_urls: Whether to get signed URLs for the items. Defaults to False. + desc: Sort in descending order. Defaults to False. + page_size: Number of items to return per page. Default if not specified is 100. Maximum value is 1000. Returns: - Iterable[StorageItem]: Iterable of items in the folder. + Iterable[:class:`~encord.storage.StorageItem`]: Iterable of items in the folder. """ params = ListItemsParams( search=search, @@ -198,19 +198,19 @@ def upload_image( """Uploads an image to a folder in Encord storage. Args: - file_path (Union[Path, str]): Path to the image file (e.g., '/home/user/data/image.png'). - title (Optional[str]): The image title. If unspecified, this will be the file name. - client_metadata (Optional[Dict[str, Any]]): Optional arbitrary metadata to be associated with the image. - Should be a dictionary that is JSON-serializable. - cloud_upload_settings (CloudUploadSettings): Settings for uploading data into the cloud. Change this object - to overwrite the default values. + file_path: Path to the image file (e.g., '/home/user/data/image.png'). + title: The image title. If unspecified, this will be the file name. + client_metadata: Optional arbitrary metadata to be associated with the image. + Should be a dictionary that is JSON-serializable. + cloud_upload_settings: Settings for uploading data into the cloud. Change this object + to overwrite the default values. Returns: - UUID: The UUID of the newly created image item. + The UUID of the newly created image item. Raises: - AuthorizationError: If the user is not authorized to access the folder. - EncordException: If the image could not be uploaded, e.g., due to being in an unsupported format. + :class:`~encord.exceptions.AuthorisationError`: If the user is not authorized to access the folder. + :class:`~encord.exceptions.EncordException`: If the image could not be uploaded, e.g., due to being in an unsupported format. """ upload_url_info = self._get_upload_signed_urls( item_type=StorageItemType.IMAGE, count=1, frames_subfolder_name=None @@ -257,24 +257,24 @@ def upload_video( """Uploads a video to a folder in Encord storage. Args: - file_path (Union[Path, str]): Path to the video file (e.g., '/home/user/data/video.mp4'). - title (Optional[str]): The video title. If unspecified, this will be the file name. This title should include an extension. - For example, "encord_video.mp4". - client_metadata (Optional[Dict[str, Any]]): Optional arbitrary metadata to be associated with the video. - Should be a dictionary that is JSON-serializable. - video_metadata (Optional[CustomerProvidedVideoMetadata]): Optional media metadata for a video file; if provided, - Encord service will skip frame synchronization checks and - will use the values specified here to render the video - in the label editor. - cloud_upload_settings (CloudUploadSettings): Settings for uploading data into the cloud. Change this object to - overwrite the default values. + file_path: Path to the video file (e.g., '/home/user/data/video.mp4'). + title: The video title. If unspecified, this will be the file name. This title should include an extension. + For example, "encord_video.mp4". + client_metadata: Optional arbitrary metadata to be associated with the video. + Should be a dictionary that is JSON-serializable. + video_metadata: Optional media metadata for a video file; if provided, + Encord service will skip frame synchronization checks and + will use the values specified here to render the video + in the label editor. + cloud_upload_settings: Settings for uploading data into the cloud. Change this object to + overwrite the default values. Returns: UUID: The UUID of the newly created video item. Raises: - AuthorizationError: If the user is not authorized to access the folder. - EncordException: If the video could not be uploaded, e.g., due to being in an unsupported format. + :class:`~encord.exceptions.AuthorisationError`: If the user is not authorized to access the folder. + :class:`~encord.exceptions.EncordException`: If the video could not be uploaded, e.g., due to being in an unsupported format. """ upload_url_info = self._get_upload_signed_urls( item_type=StorageItemType.VIDEO, count=1, frames_subfolder_name=None @@ -319,12 +319,12 @@ def re_encode_videos(self, storage_items: List[UUID], process_title: str, force_ """Re-encodes the specified video items. Args: - storage_items (List[UUID]): List of UUIDs representing the video items to be re-encoded. - process_title (str): Title for the re-encoding process. - force_full_reencoding (bool): Flag to force full re-encoding. + storage_items: List of UUIDs representing the video items to be re-encoded. + process_title: Title for the re-encoding process. + force_full_reencoding: Flag to force full re-encoding. Returns: - UUID: The UUID of the re-encoding process. + The UUID of the re-encoding process. """ return self._api_client.post( "/storage/items/reencode", @@ -342,10 +342,10 @@ def get_re_encoding_status(self, process_hash: UUID) -> ReencodeVideoItemsRespon """Retrieves the status of a re-encoding process. Args: - process_hash (UUID): The UUID of the re-encoding process. + process_hash: The UUID of the re-encoding process. Returns: - ReencodeVideoItemsResponse: Response object containing the status of the re-encoding process. + Response object containing the status of the re-encoding process. """ return self._api_client.get( f"/storage/items/reencode/{process_hash}", params=None, result_type=ReencodeVideoItemsResponse @@ -361,21 +361,21 @@ def create_dicom_series( """Uploads a DICOM series to a folder in Encord storage. Args: - file_paths (Sequence[Union[str, Path]]): A list of paths to DICOM files, e.g., + file_paths: A list of paths to DICOM files, e.g., ['/home/user/data/DICOM_1.dcm', '/home/user/data/DICOM_2.dcm']. - title (Optional[str]): The title of the DICOM series. If unspecified, this will be randomly generated. + title: The title of the DICOM series. If unspecified, this will be randomly generated. This title should NOT include an extension. For example, "encord_image_group". - client_metadata (Optional[Dict[str, Any]]): Optional arbitrary metadata to be associated with the DICOM series. + client_metadata: Optional arbitrary metadata to be associated with the DICOM series. Should be a dictionary that is JSON-serializable. - cloud_upload_settings (CloudUploadSettings): Settings for uploading data into the cloud. Change this object + cloud_upload_settings: Settings for uploading data into the cloud. Change this object to overwrite the default values. Returns: - UUID: The UUID of the newly created DICOM series item. + The UUID of the newly created DICOM series item. Raises: - AuthorizationError: If the user is not authorized to access the folder. - EncordException: If the series could not be uploaded, e.g., due to being in an unsupported format. + :class:`~encord.exceptions.AuthorisationError`: If the user is not authorized to access the folder. + :class:`~encord.exceptions.EncordException`: If the series could not be uploaded, e.g., due to being in an unsupported format. """ upload_url_info = self._get_upload_signed_urls( item_type=StorageItemType.DICOM_FILE, count=len(file_paths), frames_subfolder_name=None @@ -434,21 +434,21 @@ def create_image_group( and :meth:`encord.storage.StorageFolder.upload_image`. Args: - file_paths (Collection[Union[Path, str]]): A list of paths to images, e.g., + file_paths: A list of paths to images, e.g., ['/home/user/data/img1.png', '/home/user/data/img2.png']. - title (Optional[str]): The title of the image group. If unspecified, this will be randomly generated. + title: The title of the image group. If unspecified, this will be randomly generated. This title should NOT include an extension. For example, "encord_image_group". - client_metadata (Optional[Dict[str, Any]]): Optional arbitrary metadata to be associated with the image group. + client_metadata: Optional arbitrary metadata to be associated with the image group. Should be a dictionary that is JSON-serializable. - cloud_upload_settings (CloudUploadSettings): Settings for uploading data into the cloud. Change this object + cloud_upload_settings: Settings for uploading data into the cloud. Change this object to overwrite the default values. Returns: - UUID: The UUID of the newly created image group item. + The UUID of the newly created image group item. Raises: - AuthorizationError: If the user is not authorized to access the folder. - EncordException: If the images could not be uploaded, e.g., due to being in an unsupported format. + :class:`~encord.exceptions.AuthorisationError`: If the user is not authorized to access the folder. + :class:`~encord.exceptions.EncordException`: If the images could not be uploaded, e.g., due to being in an unsupported format. """ return self._create_image_group_or_sequence( file_paths, @@ -471,21 +471,21 @@ def create_image_sequence( See also :meth:`encord.storage.StorageFolder.create_image_group` and :meth:`encord.storage.StorageFolder.upload_image`. Args: - file_paths (Collection[Union[Path, str]]): A list of paths to images, e.g., + file_paths: A list of paths to images, e.g., ['/home/user/data/img1.png', '/home/user/data/img2.png']. - title (Optional[str]): The title of the image sequence. If unspecified, this will be randomly generated. + title: The title of the image sequence. If unspecified, this will be randomly generated. This title should NOT include an extension. For example, "front camera 2024-04-01". - client_metadata (Optional[Dict[str, Any]]): Optional arbitrary metadata to be associated with the image sequence. + client_metadata: Optional arbitrary metadata to be associated with the image sequence. Should be a dictionary that is JSON-serializable. - cloud_upload_settings (CloudUploadSettings): Settings for uploading data into the cloud. Change this object + cloud_upload_settings: Settings for uploading data into the cloud. Change this object to overwrite the default values. Returns: - UUID: The UUID of the newly created image sequence item. + The UUID of the newly created image sequence item. Raises: - AuthorizationError: If the user is not authorized to access the folder. - EncordException: If the images could not be uploaded, e.g., due to being in an unsupported format. + :class:`~encord.exceptions.AuthorisationError`: If the user is not authorized to access the folder. + :class:`~encord.exceptions.EncordException`: If the images could not be uploaded, e.g., due to being in an unsupported format. """ return self._create_image_group_or_sequence( file_paths, @@ -560,25 +560,18 @@ def upload_nifti( Uploads a NIfTI file to an Encord Storage Folder. Args: - file_path (Union[Path, str]): - Path to the local NIfTI file (e.g., '/home/user/data/brain_scan.nii.gz'). - - title (Optional[str]): - Title of the NIfTI item in Encord. If not provided, the filename is used. + file_path: Path to the local NIfTI file (e.g., '/home/user/data/brain_scan.nii.gz'). + title: Title of the NIfTI item in Encord. If not provided, the filename is used. Must include the '.nii' or '.nii.gz' extension. - - client_metadata (Optional[Dict[str, Any]]): - Custom metadata to associate with the NIfTI file. Must be JSON-serializable. - - cloud_upload_settings (CloudUploadSettings): - Configuration for how the file is uploaded to the cloud. Use this to override default behavior. + client_metadata: Custom metadata to associate with the NIfTI file. Must be JSON-serializable. + cloud_upload_settings: Configuration for how the file is uploaded to the cloud. Use this to override default behavior. Returns: - UUID: UUID of the uploaded NIfTI item. + UUID of the uploaded NIfTI item. Raises: - AuthorizationError: If the user lacks permission to access the target folder. - EncordException: If the file cannot be uploaded (for example due to format or metadata issues). + :class:`~encord.exceptions.AuthorisationError`: If the user lacks permission to access the target folder. + :class:`~encord.exceptions.EncordException`: If the file cannot be uploaded (for example due to format or metadata issues). """ upload_url_info = self._get_upload_signed_urls( @@ -629,28 +622,19 @@ def upload_audio( """Uploads an audio file to an Encord Storage Folder. Args: - file_path (Union[Path, str]): - Path to the local audio file (e.g., '/home/user/data/audio.mp3'). - - title (Optional[str]): - Title of the audio file in Encord. If not provided, the filename is used. + file_path: Path to the local audio file (e.g., '/home/user/data/audio.mp3'). + title: Title of the audio file in Encord. If not provided, the filename is used. The title must include a valid audio file extension (e.g., "encord_audio.mp3"). - - client_metadata (Optional[Dict[str, Any]]): - Custom metadata to associate with the audio. Must be JSON-serializable. - - audio_metadata (Optional[CustomerProvidedAudioMetadata]): - Optional media metadata describing the audio file. If provided, the Encord skips scanning the file to extract media metadata. - - cloud_upload_settings (CloudUploadSettings): - Configuration for cloud upload behavior. Override default settings using this parameter. + client_metadata: Custom metadata to associate with the audio. Must be JSON-serializable. + audio_metadata: Optional media metadata describing the audio file. If provided, the Encord skips scanning the file to extract media metadata. + cloud_upload_settings: Configuration for cloud upload behavior. Override default settings using this parameter. Returns: - UUID: UUID of the uploaded audio item. + UUID of the uploaded audio item. Raises: - AuthorizationError: If the user is not authorized to access the destination folder. - EncordException: If the upload fails (for example due to an unsupported audio format). + :class:`~encord.exceptions.AuthorisationError`: If the user is not authorized to access the destination folder. + :class:`~encord.exceptions.EncordException`: If the upload fails (for example due to an unsupported audio format). #### audio_metadata @@ -724,8 +708,8 @@ def upload_text( UUID of the newly created text item. Raises: - AuthorizationError: If the user is not authorized to access the folder. - EncordException: If the audio could not be uploaded. For example, due to being in an unsupported format. + :class:`~encord.exceptions.AuthorisationError`: If the user is not authorized to access the folder. + :class:`~encord.exceptions.EncordException`: If the audio could not be uploaded. For example, due to being in an unsupported format. #### text_metadata @@ -795,8 +779,8 @@ def upload_pdf( UUID of the newly created PDF item. Raises: - AuthorizationError: If the user is not authorized to access the folder. - EncordException: If the document could not be uploaded. For example, due to being in an unsupported format. + :class:`~encord.exceptions.AuthorisationError`: If the user is not authorized to access the folder. + :class:`~encord.exceptions.EncordException`: If the document could not be uploaded. For example, due to being in an unsupported format. #### pdf_metadata @@ -853,14 +837,13 @@ def add_private_data_to_folder_start( """Starts the process of adding private data to a folder in Encord storage. Args: - integration_id (str): The integration ID for the folder. - private_files (Union[str, Dict, Path, TextIO, DataUploadItems]): - The specification of private files to be added. Can be either a JSON in Encord upload format + integration_id: The integration ID for the folder. + private_files: The specification of private files to be added. Can be either a JSON in Encord upload format (see the relevant documentation), or an :class:`encord.orm.storage.DataUploadItems` object. - ignore_errors (bool): If True, errors will be ignored during the upload process. + ignore_errors: If True, errors will be ignored during the upload process. Returns: - UUID: The UUID of the upload job. + The UUID of the upload job. """ return self._add_data_to_folder_start(integration_id, private_files, ignore_errors) @@ -881,11 +864,11 @@ def sync_private_data_with_cloud_synced_folder_start(self) -> UUID: Returns: UUID: The unique identifier for the sync job that can be used with - `sync_private_data_with_cloud_synced_folder_get_result` to poll for results. + `sync_private_data_with_cloud_synced_folder_get_result` to poll for results. Raises: - InvalidArgumentsError: If the folder is not a cloud-synced folder, or if there are - permission issues with the cloud storage bucket. + :class:`~encord.exceptions.InvalidArgumentsError`: If the folder is not a cloud-synced folder, or if there are + permission issues with the cloud storage bucket. Note: This method can only be used with folders created with cloud_synced_folder_params. @@ -920,8 +903,8 @@ def sync_private_data_with_cloud_synced_folder_get_result( the job completes or the timeout is reached. Args: - sync_job_uuid (UUID): The UUID of the synchronization job to poll for results. - timeout_seconds (int): Maximum time in seconds to wait for the job to complete. + sync_job_uuid: The UUID of the synchronization job to poll for results. + timeout_seconds: Maximum time in seconds to wait for the job to complete. Default is 7 days (604800 seconds). Returns: @@ -929,7 +912,7 @@ def sync_private_data_with_cloud_synced_folder_get_result( the synchronization job. Raises: - InvalidArgumentsError: If the synchronization job UUID does not exist or is not associated + :class:`~encord.exceptions.InvalidArgumentsError`: If the synchronization job UUID does not exist or is not associated with this folder. Note: @@ -989,8 +972,8 @@ def add_private_data_to_folder_get_result( """Retrieves the result of adding private data to a folder in Encord storage. Args: - upload_job_id (UUID): The UUID of the upload job. - timeout_seconds (int): The timeout in seconds for the upload job. + upload_job_id: The UUID of the upload job. + timeout_seconds: The timeout in seconds for the upload job. Returns: UploadLongPollingState: The state of the upload job. @@ -1009,12 +992,12 @@ def list_subfolders( """Lists subfolders of the current folder. Args: - search (Optional[str]): Search string to filter folders by name. - dataset_synced (Optional[bool]): Include or exclude folders that are mirrored by a dataset. If `None`, + search: Search string to filter folders by name. + dataset_synced: Include or exclude folders that are mirrored by a dataset. If `None`, no filtering is applied. - order (FoldersSortBy): Sort order for the folders. See :class:`encord.storage.FoldersSortBy` for available options. - desc (bool): If True, sort in descending order. - page_size (int): Number of folders to return per page. Default if not specified is 100. Maximum value is 1000. + order: Sort order for the folders. See :class:`encord.storage.FoldersSortBy` for available options. + desc: If True, sort in descending order. + page_size: Number of folders to return per page. Default if not specified is 100. Maximum value is 1000. Returns: Iterable[StorageFolder]: An iterable of :class:`encord.StorageFolder` objects. @@ -1042,12 +1025,12 @@ def find_subfolders( """Recursively searches for storage folders, starting from this folder. Args: - search (Optional[str]): Search string to filter folders by name. - dataset_synced (Optional[bool]): Include or exclude folders that are mirrored by a dataset. If `None`, + search: Search string to filter folders by name. + dataset_synced: Include or exclude folders that are mirrored by a dataset. If `None`, no filtering is applied. - order (FoldersSortBy): Sort order for the folders. See :class:`encord.storage.FoldersSortBy` for available options. - desc (bool): If True, sort in descending order. - page_size (int): Number of folders to return per page. Default if not specified is 100. Maximum value is 1000. + order: Sort order for the folders. See :class:`encord.storage.FoldersSortBy` for available options. + desc: If True, sort in descending order. + page_size: Number of folders to return per page. Default if not specified is 100. Maximum value is 1000. Returns: Iterable[StorageFolder]: An iterable of :class:`encord.StorageFolder` objects. @@ -1106,18 +1089,18 @@ def find_items( Warning: This method is slow. We recommend using `storage_folder.list_items` instead. Args: - search (Optional[str]): Search string to filter items by name. - is_in_dataset (Optional[bool]): Filter items by whether they are linked to any dataset. `True` and `False` select + search: Search string to filter items by name. + is_in_dataset: Filter items by whether they are linked to any dataset. `True` and `False` select only linked and only unlinked items, respectively. `None` includes all items regardless of their dataset links. - item_types (Optional[List[StorageItemType]]): Filter items by type. - order (FoldersSortBy): Sort order. - desc (bool): Sort in descending order. - get_signed_urls (bool): If True, return signed URLs for the items. - page_size (int): Number of items to return per page. Default if not specified is 100. Maximum value is 1000. + item_types: Filter items by type. + order: Sort order. + desc: Sort in descending order. + get_signed_urls: If True, return signed URLs for the items. + page_size: Number of items to return per page. Default if not specified is 100. Maximum value is 1000. Returns: - Iterable[StorageItem]: An iterable of items in the folder and its subfolders. + Iterable[:class:`~encord.storage.StorageItem`]: An iterable of items in the folder and its subfolders. Raises: ValueError: If neither `search` nor `item_types` are provided. @@ -1161,10 +1144,10 @@ def update( """Update the folder's modifiable properties. Any parameters that are not provided will not be updated. Args: - name (Optional[str]): New folder name. - description (Optional[str]): New folder description. - client_metadata (Optional[Dict[str, Any]]): New client metadata. - bundle (Optional[Bundle]): Optional :class:`encord.http.Bundle` to use for the operation. If provided, the operation + name: New folder name. + description: New folder description. + client_metadata: New client metadata. + bundle: Optional :class:`encord.http.Bundle` to use for the operation. If provided, the operation will be bundled into a single server call with other item updates using the same bundle. Returns: @@ -1211,7 +1194,7 @@ def move_to_folder(self, target_folder: Optional[Union["StorageFolder", UUID]]) """Move the folder to another folder (specify folder object or UUID), or to the root level if `target_folder` is None. Args: - target_folder (Optional[Union[StorageFolder, UUID]]): The target folder to move to, or `None` to move to the root level. + target_folder: The target folder to move to, or `None` to move to the root level. Returns: None @@ -1238,9 +1221,9 @@ def create_data_group( """Creates a data group storage item in this folder. Args: - params (Union[DataGroupInput, List[UUID]]): Parameters for the data group. When a list of UUIDs is provided, + params: Parameters for the data group. When a list of UUIDs is provided, the group will be created with a grid layout. For custom layouts, use DataGroupGrid, DataGroupList or DataGroupCustom. - client_metadata (Optional[Dict[str, Any]]): Optional custom metadata to be associated with the data group. + client_metadata: Optional custom metadata to be associated with the data group. Should be a dictionary that is JSON-serializable. Returns: @@ -1264,13 +1247,13 @@ def move_items_to_folder( items_to_move: Sequence[Union[UUID, "StorageItem"]], allow_mirror_dataset_changes: bool = False, ) -> None: - """Move items (list of `StorageItem` objects or UUIDs) to another folder (specify folder object or UUID). + """Move items (list of `:class:`~encord.storage.StorageItem`` objects or UUIDs) to another folder (specify folder object or UUID). Args: - target_folder (Union[StorageFolder, UUID]): Target folder to move items to. - items_to_move (Sequence[Union[UUID, StorageItem]]): List of items to move. All the items should be immediate children + target_folder: Target folder to move items to. + items_to_move: List of items to move. All the items should be immediate children of the current folder. - allow_mirror_dataset_changes (bool): If `True`, allow moving items that are linked to a mirror dataset. By default, + allow_mirror_dataset_changes: If `True`, allow moving items that are linked to a mirror dataset. By default, moving such items is prohibited, as it would result in data units being removed from a dataset, potentially deleting related annotations and other data. @@ -1295,9 +1278,9 @@ def delete_storage_items(self, item_uuids: List[UUID], remove_unused_frames: boo """Delete storage items by their UUIDs. Args: - item_uuids (List[UUID]): List of UUIDs of items to delete. All the items should be immediate children + item_uuids: List of UUIDs of items to delete. All the items should be immediate children of the current folder. - remove_unused_frames (bool): If `True` (default), remove individual images or DICOM files from image groups or + remove_unused_frames: If `True` (default), remove individual images or DICOM files from image groups or DICOM series that are not used in any other item. Returns: @@ -1576,7 +1559,7 @@ def add_data_to_folder_job_cancel( """Cancels a data upload in progress job, associated with this folder. Args: - upload_job_id (UUID): The unique identifier for the upload job. + upload_job_id: The unique identifier for the upload job. Returns: AddDataToFolderJobCancelResponse: A response indicating the result of the cancelled job. @@ -1703,7 +1686,7 @@ def parent_folder(self) -> StorageFolder: @property def item_type(self) -> StorageItemType: - """StorageItemType: The type of the storage item.""" + """:class:`~encord.storage.StorageItem`Type: The type of the storage item.""" return self._orm_item.item_type @property @@ -1851,7 +1834,7 @@ def get_summary(self) -> StorageItemSummary: """Retrieve a summary of the item, including linked datasets and other information. Returns: - StorageItemSummary: Object containing summary information about the item. + :class:`~encord.storage.StorageItem`Summary: Object containing summary information about the item. """ return self._api_client.get( f"storage/folders/{self.parent_folder_uuid}/items/{self.uuid}/summary", @@ -1866,7 +1849,7 @@ def get_child_items(self, get_signed_urls: bool = False) -> Iterable["StorageIte get_signed_urls: If `True`, include signed URLs for child items. Returns: - Iterable[StorageItem]: List of child items accessible to the user. + Iterable[:class:`~encord.storage.StorageItem`]: List of child items accessible to the user. """ if self.item_type not in { StorageItemType.IMAGE_GROUP, diff --git a/encord/user_client.py b/encord/user_client.py index d59a1b16c..d03729938 100644 --- a/encord/user_client.py +++ b/encord/user_client.py @@ -126,7 +126,7 @@ class EncordUserClient: High–level entrypoint to the Encord SDK for an authenticated user. `EncordUserClient`` is the primary interface for interacting with - Encord resources such as Projects, Datasets, Collections, Workflows, and + Encord resources such as Projects, Datasets, :class:`~encord.collection.Collection`s, Workflows, and Storage Items. It manages authentication, request signing, and low-level communication on behalf of the user. @@ -141,16 +141,15 @@ class EncordUserClient: - Interacting with collections and Index Args: - config : UserConfig The user configuration containing authentication credentials - and client options. - querier : Querier Internal HTTP/query executor used to communicate with the Encord API. - Users do not normally construct this directly. - - **Notes** - - Instances of this class should generally be created using the provided - ``create_*`` methods rather than by calling the constructor - directly. + config : UserConfig The user configuration containing authentication credentials + and client options. + querier : Querier Internal HTTP/query executor used to communicate with the Encord API. + Users do not normally construct this directly. + + Notes: + Instances of this class should generally be created using the provided + ``create_*`` methods rather than by calling the constructor + directly. """ def __init__(self, config: UserConfig, querier: Querier): @@ -184,7 +183,7 @@ def get_dataset( dataset_hash: Union[str, UUID], dataset_access_settings: DatasetAccessSettings = DEFAULT_DATASET_ACCESS_SETTINGS, ) -> Dataset: - """Get the Dataset class to access dataset fields and manipulate a dataset. + """Get the :class:`~encord.dataset.Dataset` class to access dataset fields and manipulate a dataset. You only have access to this project if you are one of the following @@ -213,7 +212,7 @@ def get_dataset( return Dataset(client, orm_dataset) def get_project(self, project_hash: Union[str, UUID]) -> Project: - """Get the Project class to access project fields and manipulate a project. + """Get the :class:`~encord.project.Project` class to access project fields and manipulate a project. You will only have access to this project if you are one of the following @@ -297,16 +296,15 @@ def create_dataset( ) -> CreateDatasetResponse: """ Args: - dataset_title (str): Title of the dataset. - dataset_type (StorageLocation): Type of storage location where the data will be stored. - dataset_description (Optional[str]): Optional description of the dataset. - create_backing_folder (bool): Whether to create a mirrored backing Folder. If True (default), + dataset_title: Title of the dataset. + dataset_type: Type of storage location where the data will be stored. + dataset_description: Optional description of the dataset. + create_backing_folder: Whether to create a mirrored backing Folder. If True (default), the Folder and Dataset are synced. Recommended to set False for complex or large-scale projects. Returns: - CreateDatasetResponse: - + :class:`~encord.orm.dataset.CreateDatasetResponse` """ return self.__create_dataset( title=dataset_title, @@ -340,7 +338,7 @@ def get_datasets( edited_before: optional last modification date filter, 'less' edited_after: optional last modification date filter, 'greater' include_org_access: if set to true and the calling user is the organization admin, the - method returns all datasets in the organization. + method returns all datasets in the organization. Returns: list of datasets matching filter conditions, with the roles that the current user has on them. Each item @@ -491,7 +489,7 @@ def list_projects( edited_before: optional last modification date filter, 'less' edited_after: optional last modification date filter, 'greater' include_org_access: if set to true and the calling user is the organization admin, the - method will return all projects in the organization. + method will return all projects in the organization. tags_anyof: optional tag names filter; matches projects having at least one of the tag names. Returns: @@ -522,18 +520,18 @@ def create_project( workflow_settings: ProjectWorkflowSettings = ManualReviewWorkflowSettings(), workflow_template_hash: Optional[str] = None, ) -> str: - """Creates a new Project and returns its uid ('project_hash') + """Creates a new :class:`~encord.project.Project` and returns its uid ('project_hash') Args: project_title: the title of the Project dataset_hashes: a list of the Dataset uids that the project will use project_description: the optional description of the project - ontology_hash: the uid of an Ontology to be used. If omitted, a new empty Ontology will be created + ontology_hash: the uid of an :class:`~encord.ontology.Ontology` to be used. If omitted, a new empty :class:`~encord.ontology.Ontology` will be created workflow_settings: selects and configures the type of the quality control Workflow to use, See :class:`encord.orm.project.ProjectWorkflowSettings` for details. If omitted, :class:`~encord.orm.project.ManualReviewWorkflowSettings` is used. workflow_template_hash: Project is created using a Workflow based on the template provided. If omitted, the project will be created using the default standard workflow. Returns: - the uid of the Project. + The UUID of the Project. """ project = { "title": project_title, @@ -883,10 +881,11 @@ def get_cloud_integrations( filter_integration_uuids: optional list of integration UUIDs to include. filter_integration_titles: optional list of integration titles to include (exact match). include_org_access: if set to true and the calling user is the organization admin, the - method will return all cloud integrations in the organization. + method will return all cloud integrations in the organization. - If `filter_integration_uuids` and `filter_integration_titles` are both provided, the method will return - the integrations that match both of the filters. + Notes: + If `filter_integration_uuids` and `filter_integration_titles` are both provided, the method will return + the integrations that match both of the filters. """ if filter_integration_uuids is not None: filter_integration_uuids = [UUID(x) if isinstance(x, str) else x for x in filter_integration_uuids] @@ -930,7 +929,7 @@ def get_ontologies( edited_before: optional last modification date filter, 'less' edited_after: optional last modification date filter, 'greater' include_org_access: if set to true and the calling user is the organization admin, the - method will return all ontologies in the organization. + method will return all ontologies in the organization. Returns: list of ontologies matching filter conditions, with the roles that the current user has on them. Each item @@ -961,15 +960,15 @@ def create_ontology( """Creates a new ontology with the given title, description, and structure. Args: - title (str): The title of the ontology. - description (str, optional): A brief description of the ontology. Defaults to an empty string. - structure (Optional[OntologyStructure], optional): The structural definition of the ontology. If not provided, a default structure is used. + title: The title of the ontology. + description: A brief description of the ontology. Defaults to an empty string. + structure: The structural definition of the ontology. If not provided, a default structure is used. Returns: - Ontology: The newly created ontology object. + :class:`~encord.ontology.Ontology`: The newly created ontology object. Raises: - ValueError: If the provided structure contains a classification without any attributes. + ValueError: If the provided structure contains a classification without any attributes. """ try: structure_dict = structure.to_dict() if structure else OntologyStructure().to_dict() @@ -1258,7 +1257,7 @@ def get_storage_folder(self, folder_uuid: Union[UUID, str]) -> StorageFolder: Raises: ValueError: If `folder_uuid` is a badly formed UUID. - :class:`encord.exceptions.AuthorizationError` : If the folder with the given UUID does not exist or + :class:`~encord.exceptions.AuthorisationError` : If the folder with the given UUID does not exist or the user does not have access to it. """ if isinstance(folder_uuid, str): @@ -1277,7 +1276,7 @@ def get_storage_item(self, item_uuid: Union[UUID, str], sign_url: bool = False) Raises: ValueError: If `item_uuid` is a badly formed UUID. - :class:`encord.exceptions.AuthorizationError` : If the item with the given UUID does not exist or + :class:`~encord.exceptions.AuthorisationError` : If the item with the given UUID does not exist or the user does not have access to it. """ if isinstance(item_uuid, str): @@ -1301,7 +1300,7 @@ def get_storage_items( Raises: ValueError: If any of the item uuids is a badly formed UUID. - :class:`encord.exceptions.AuthorizationError` : If some of the items with the given UUIDs do not exist or + :class:`~encord.exceptions.AuthorisationError` : If some of the items with the given UUIDs do not exist or the user does not have access to them. """ internal_item_uuids: List[UUID] = [UUID(item) if isinstance(item, str) else item for item in item_uuids] @@ -1325,7 +1324,7 @@ def list_storage_folders( no filtering is applied. org_access: If `True`, and if the caller is `ADMIN` of their organization, the results contain the folders belonging to the organization, instead of those accessible to the user. If enabled - but the user is not an organization admin, the `AuthorisationError` is raised. Default value is `False`. + but the user is not an organization admin, the :class:`~encord.exceptions.AuthorisationError` is raised. Default value is `False`. order: Sort order for the folders. See :class:`encord.storage.FoldersSortBy` for available options. desc: If True, sort in descending order. page_size: Number of folders to return per page. Default if not specified is 100. Maximum value is 1000. @@ -1364,7 +1363,7 @@ def find_storage_folders( no filtering is applied. org_access: If `True`, and if the caller is `ADMIN` of their organization, the results contain the folders belonging to the organization, instead of those accessible to the user. If enabled - but the user is not an organization admin, the `AuthorisationError` is raised. Default value is `False`. + but the user is not an organization admin, the :class:`~encord.exceptions.AuthorisationError` is raised. Default value is `False`. order: Sort order for the folders. See :class:`encord.storage.FoldersSortBy` for available options. desc: If True, sort in descending order. page_size: Number of folders to return per page. Default if not specified is 100. Maximum value is 1000. @@ -1400,7 +1399,8 @@ def find_storage_items( ) -> Iterable[StorageItem]: """Recursively search for storage items, starting from the root level. - **Warning:** This method is slow. We recommend using `storage_folder.list_items` instead. + Warning: + This method is slow. We recommend using `storage_folder.list_items` instead. Args: search: Search string to filter items by name. @@ -1409,14 +1409,15 @@ def find_storage_items( dataset links. item_types: Filter items by type. org_access: If `True`, and if the caller is `ADMIN` of their organization, the results contain the - items belonging to the organization, instead of those accessible to the user. If enabled - but the user is not an organization admin, the `AuthorisationError` is raised. Default value is `False`. + items belonging to the organization, instead of those accessible to the user. If enabled + but the user is not an organization admin, the :class:`~encord.exceptions.AuthorisationError` is raised. Default value is `False`. order: Sort order. desc: Sort in descending order. get_signed_urls: If True, return signed URLs for the items. page_size: Number of items to return per page. Default if not specified is 100. Maximum value is 1000. - At least one of `search` or `item_types` must be provided. + Notes: + At least one of `search` or `item_types` must be provided. Returns: Iterable of items in the folder. @@ -1457,7 +1458,7 @@ def get_collection(self, collection_uuid: Union[str, UUID]) -> Collection: Raises: ValueError: If `collection_uuid` is a badly formed UUID. - :class:`encord.exceptions.AuthorizationError` : If the item with the given UUID does not exist or + :class:`~encord.exceptions.AuthorisationError` : If the item with the given UUID does not exist or the user does not have access to it. """ if isinstance(collection_uuid, str): @@ -1478,14 +1479,14 @@ def list_collections( Args: top_level_folder_uuid: The unique identifier of the top level folder. collection_uuids: The unique identifiers (UUIDs) of the collections to retrieve. - page_size (int): Number of items to return per page. Default if not specified is 100. Maximum value is 1000. + page_size: Number of items to return per page. Default if not specified is 100. Maximum value is 1000. Returns: The list of collections which match the given criteria. Raises: ValueError: If `top_level_folder_uuid` or any of the collection uuids is a badly formed UUID. - :class:`encord.exceptions.AuthorizationError` : If the user does not have access to it. + :class:`~encord.exceptions.AuthorisationError` : If the user does not have access to it. """ if isinstance(top_level_folder_uuid, str): top_level_folder_uuid = UUID(top_level_folder_uuid) @@ -1512,7 +1513,7 @@ def delete_collection(self, collection_uuid: Union[str, UUID]) -> None: Raises: ValueError: If `collection_uuid` is a badly formed UUID. - :class:`encord.exceptions.AuthorizationError` : If the user does not have access to it. + :class:`~encord.exceptions.AuthorisationError` : If the user does not have access to it. """ if isinstance(collection_uuid, str): collection_uuid = UUID(collection_uuid) @@ -1529,11 +1530,11 @@ def create_collection( description: The description of the collection. Returns: - Collection: Newly created collection. + :class:`~encord.collection.Collection`: Newly created collection. Raises: ValueError: If `top_level_folder_uuid` is a badly formed UUID. - :class:`encord.exceptions.AuthorizationError` : If the user does not have access to the folder. + :class:`~encord.exceptions.AuthorisationError` : If the user does not have access to the folder. """ if isinstance(top_level_folder_uuid, str): top_level_folder_uuid = UUID(top_level_folder_uuid) @@ -1551,7 +1552,7 @@ def get_filter_preset(self, preset_uuid: Union[str, UUID]) -> FilterPreset: Raises: ValueError: If `preset_uuid` is a badly formed UUID. - :class:`encord.exceptions.AuthorizationError` : If the item with the given UUID does not exist or + :class:`~encord.exceptions.AuthorisationError` : If the item with the given UUID does not exist or the user does not have access to it. """ if isinstance(preset_uuid, str): @@ -1565,14 +1566,14 @@ def get_filter_presets( Args: preset_uuids: The list of unique identifiers (UUIDs) to be retrieved. - page_size (int): Number of items to return per page. Default if not specified is 100. Maximum value is 1000. + page_size: Number of items to return per page. Default if not specified is 100. Maximum value is 1000. Returns: The list of presets which match the given criteria. Raises: ValueError: If any of the preset uuids is a badly formed UUID. - :class:`encord.exceptions.AuthorizationError` : If the user does not have access to it. + :class:`~encord.exceptions.AuthorisationError` : If the user does not have access to it. """ internal_preset_uuids: List[UUID] = [ UUID(collection) if isinstance(collection, str) else collection for collection in preset_uuids @@ -1586,14 +1587,14 @@ def list_presets( Args: top_level_folder_uuid: The unique identifier of the top level folder. - page_size (int): Number of items to return per page. Default if not specified is 100. Maximum value is 1000. + page_size: Number of items to return per page. Default if not specified is 100. Maximum value is 1000. Returns: The list of presets which match the given criteria. Raises: ValueError: If `top_level_folder_uuid` is a badly formed UUID. - :class:`encord.exceptions.AuthorizationError` : If the user does not have access to it. + :class:`~encord.exceptions.AuthorisationError` : If the user does not have access to it. """ if isinstance(top_level_folder_uuid, str): top_level_folder_uuid = UUID(top_level_folder_uuid) @@ -1626,7 +1627,7 @@ def delete_preset(self, preset_uuid: Union[str, UUID]) -> None: Raises: ValueError: If `preset_uuid` is a badly formed UUID. - :class:`encord.exceptions.AuthorizationError` : If the user does not have access to it. + :class:`~encord.exceptions.AuthorisationError` : If the user does not have access to it. """ if isinstance(preset_uuid, str): preset_uuid = UUID(preset_uuid) diff --git a/encord/utilities/storage/cloud_data_migration.py b/encord/utilities/storage/cloud_data_migration.py index e93573833..2fb08ee5f 100644 --- a/encord/utilities/storage/cloud_data_migration.py +++ b/encord/utilities/storage/cloud_data_migration.py @@ -30,17 +30,18 @@ def update_storage_item_cloud_info( configuration changes (and thus the data is accessible through a different cloud integration), or if the data has been moved to a different location (and thus the URL has changed). - :param user_client: The user client to use. - :param item: The item to update. Can be either a StorageItem instance, the item's UUID, or the URL of the item. - :param new_url: The new URL to set. URL will be left unchanged if `None` is passed. - :param new_cloud_integration: The new cloud integration to set. Cloud integration will be left unchanged if `None` - is passed. - :param from_cloud_integration: The cloud integration to update from. Acts as a check: no update will be performed if - cloud integration of the item does not match this value. If `None`, the check will be skipped. - :param verify_access: Whether to verify access to the item. - :param skip_missing: if true, no error will be raised if the item is not found. Otherwise, the operation (and - the operations in the same bundle) will be cancelled and an error will be raised. - :param bundle: The optional :class:`encord.http.bundle.Bundle` instance used to group updates into bulk calls. + Args: + user_client: The user client to use. + item: The item to update. Can be either a :class:`~encord.storage.StorageItem` instance, the item's UUID, or the URL of the item. + new_url: The new URL to set. URL will be left unchanged if `None` is passed. + new_cloud_integration: The new cloud integration to set. Cloud integration will be left unchanged if `None` + is passed. + from_cloud_integration: The cloud integration to update from. Acts as a check: no update will be performed if + cloud integration of the item does not match this value. If `None`, the check will be skipped. + verify_access: Whether to verify access to the item. + skip_missing: if true, no error will be raised if the item is not found. Otherwise, the operation (and + the operations in the same bundle) will be cancelled and an error will be raised. + bundle: The optional :class:`encord.http.bundle.Bundle` instance used to group updates into bulk calls. """ if not bundle: with Bundle() as bundle: diff --git a/encord/workflow/stages/agent.py b/encord/workflow/stages/agent.py index 85683be8e..3c7cec06c 100644 --- a/encord/workflow/stages/agent.py +++ b/encord/workflow/stages/agent.py @@ -63,12 +63,12 @@ def proceed( Exactly one of `pathway_name` or `pathway_uuid` must be provided to specify the pathway to take. - **Parameters** - pathway_name (Optional[str]): Name of the pathway to follow. - pathway_uuid (Optional[Union[UUID, str]]): Unique identifier of the pathway to follow. - bundle (Optional[Bundle]): Optional bundle to associate with the action. + Args: + pathway_name: Name of the pathway to follow. + pathway_uuid: Unique identifier of the pathway to follow. + bundle: Optional bundle to associate with the action. - **Raises** + Raises: ValueError: If neither `pathway_name` nor `pathway_uuid` is provided. """ if not pathway_name and not pathway_uuid: @@ -88,13 +88,12 @@ def proceed( def move(self, *, destination_stage_uuid: UUID, bundle: Optional[Bundle] = None) -> None: """Moves the task from its current stage to another stage. - **Parameters** - destination_stage_uuid (UUID): Unique identifier of the stage to move the task to. - bundle (Optional[Bundle]): Optional bundle to associate with the move action. + Args: + destination_stage_uuid: Unique identifier of the stage to move the task to. + bundle: Optional bundle to associate with the move action. - **Returns** - - None + Returns: + None """ workflow_client, stage_uuid = self._get_client_data() workflow_client.move( @@ -140,17 +139,15 @@ def get_tasks( ) -> Iterable[AgentTask]: """Retrieves tasks for the AgentStage. - **Parameters** - - - `assignee` (Union[List[str], str, None]): A list of user emails or a single user email to filter tasks by assignee. - - `data_hash` (Union[List[UUID], UUID, List[str], str, None]): A list of data unit UUIDs or a single data unit UUID to filter tasks by data hash. - - `dataset_hash` (Union[List[UUID], UUID, List[str], str, None]): A list of dataset UUIDs or a single dataset UUID to filter tasks by dataset hash. - - `data_title` (Optional[str]): A string to filter tasks by data title. - - `status` (Optional[AnnotationTaskStatus | List[AnnotationTaskStatus]]): A status or a list of statuses to filter tasks by their status. - - **Returns** + Args: + assignee: A list of user emails or a single user email to filter tasks by assignee. + data_hash: A list of data unit UUIDs or a single data unit UUID to filter tasks by data hash. + dataset_hash: A list of dataset UUIDs or a single dataset UUID to filter tasks by dataset hash. + data_title: A string to filter tasks by data title. + status: A status or a list of statuses to filter tasks by their status. - An iterable of `AnnotationTask` instances from both non-Consensus and Consensus Projects. + Returns: + An iterable of :class:`~encord.workflow.stages.AnnotationTask` instances from both non-Consensus and Consensus Projects. """ params = _AgentTasksQueryParams( user_emails=ensure_list(assignee), diff --git a/encord/workflow/stages/annotation.py b/encord/workflow/stages/annotation.py index 66ee39c51..3d7a64868 100644 --- a/encord/workflow/stages/annotation.py +++ b/encord/workflow/stages/annotation.py @@ -64,17 +64,15 @@ def get_tasks( ) -> Iterable[AnnotationTask]: """Retrieves tasks for the AnnotationStage. - **Parameters** - - - `assignee` (Union[List[str], str, None]): A list of user emails or a single user email to filter tasks by assignee. - - `data_hash` (Union[List[UUID], UUID, List[str], str, None]): A list of data unit UUIDs or a single data unit UUID to filter tasks by data hash. - - `dataset_hash` (Union[List[UUID], UUID, List[str], str, None]): A list of dataset UUIDs or a single dataset UUID to filter tasks by dataset hash. - - `data_title` (Optional[str]): A string to filter tasks by data title. - - `status` (Optional[AnnotationTaskStatus | List[AnnotationTaskStatus]]): A status or a list of statuses to filter tasks by their status. - - **Returns** - - An iterable of `AnnotationTask` instances from both non-Consensus and Consensus Projects. + Args: + assignee: A list of user emails or a single user email to filter tasks by assignee. + data_hash: A list of data unit UUIDs or a single data unit UUID to filter tasks by data hash. + dataset_hash: A list of dataset UUIDs or a single dataset UUID to filter tasks by dataset hash. + data_title: A string to filter tasks by data title. + status: A status or a list of statuses to filter tasks by their status. + + Returns: + An iterable of :class:`~encord.workflow.stages.AnnotationTask` instances from both non-Consensus and Consensus Projects. """ params = _AnnotationTasksQueryParams( user_emails=ensure_list(assignee), @@ -145,11 +143,10 @@ def submit( ) -> None: """Submits the task for review. - **Parameters** - - - `assignee` (Optional[str]): User email to be assigned to the task whilst submitting the task. - - `retain_assignee` (bool): Retains the current assignee of the task. This is ignored if `assignee` is provided. An Error will occur if the task does not already have an assignee and `retain_assignee` is True. - - `bundle` (Optional[Bundle]): Optional bundle to be included with the submission. + Args: + assignee: User email to be assigned to the task whilst submitting the task. + retain_assignee: Retains the current assignee of the task. This is ignored if `assignee` is provided. An Error will occur if the task does not already have an assignee and `retain_assignee` is True. + bundle: Optional bundle to be included with the submission. """ workflow_client, stage_uuid = self._get_client_data() workflow_client.action( @@ -161,10 +158,9 @@ def submit( def assign(self, assignee: str, *, bundle: Optional[Bundle] = None) -> None: """Assigns the task to a user. - **Parameters** - - - `assignee` (str): The email of the user to assign the task to. - - `bundle` (Optional[Bundle]): Optional bundle to be included with the assignment. + Args: + assignee: The email of the user to assign the task to. + bundle: Optional bundle to be included with the assignment. """ workflow_client, stage_uuid = self._get_client_data() workflow_client.action( @@ -176,9 +172,8 @@ def assign(self, assignee: str, *, bundle: Optional[Bundle] = None) -> None: def release(self, *, bundle: Optional[Bundle] = None) -> None: """Releases the task from the current user. - **Parameters** - - - `bundle` (Optional[Bundle]): Optional bundle to be included with the release. + Args: + bundle: Optional bundle to be included with the release. """ workflow_client, stage_uuid = self._get_client_data() workflow_client.action( @@ -190,14 +185,12 @@ def release(self, *, bundle: Optional[Bundle] = None) -> None: def move(self, *, destination_stage_uuid: UUID, bundle: Optional[Bundle] = None) -> None: """Moves the task from its current stage to another stage. - **Parameters** - - - `destination_stage_uuid` (UUID): Unique identifier of the stage to move the task to. - - `bundle` (Optional[Bundle]): Optional bundle to be included with the move. - - **Returns** + Args: + destination_stage_uuid: Unique identifier of the stage to move the task to. + bundle: Optional bundle to be included with the move. - None + Returns: + None """ workflow_client, stage_uuid = self._get_client_data() workflow_client.move( diff --git a/encord/workflow/stages/consensus_annotation.py b/encord/workflow/stages/consensus_annotation.py index 2875b97b2..c3eb4a43d 100644 --- a/encord/workflow/stages/consensus_annotation.py +++ b/encord/workflow/stages/consensus_annotation.py @@ -46,22 +46,20 @@ def get_tasks( ) -> Iterable[ConsensusAnnotationTask]: """Retrieves tasks for the ConsensusAnnotationStage. - **Parameters** - - - `assignee` (Union[List[str], str, None]): A list of user emails or a single user email to filter tasks by assignee. - - `data_hash` (Union[List[UUID], UUID, List[str], str, None]): A list of data unit UUIDs or a single data unit UUID to filter tasks by data hash. - - `dataset_hash` (Union[List[UUID], UUID, List[str], str, None]): A list of dataset UUIDs or a single dataset UUID to filter tasks by dataset hash. - - `data_title` (Optional[str]): A string to filter tasks by data title. - - **Returns** - - An iterable of `ConsensusAnnotationTask` instances with the following information: - - `uuid`: Unique identifier for the task. - - `created_at`: Time and date the task was created. - - `updated_at`: Time and date the task was last edited. - - `data_hash`: Unique identifier for the data unit. - - `data_title`: Name/title of the data unit. - - `subtasks`: A list of subtasks that follow the task format for `AnnotationTask`. + Args: + assignee: A list of user emails or a single user email to filter tasks by assignee. + data_hash: A list of data unit UUIDs or a single data unit UUID to filter tasks by data hash. + dataset_hash: A list of dataset UUIDs or a single dataset UUID to filter tasks by dataset hash. + data_title: A string to filter tasks by data title. + + Returns: + An iterable of `ConsensusAnnotationTask` instances with the following information: + - `uuid`: Unique identifier for the task. + - `created_at`: Time and date the task was created. + - `updated_at`: Time and date the task was last edited. + - `data_hash`: Unique identifier for the data unit. + - `data_title`: Name/title of the data unit. + - `subtasks`: A list of subtasks that follow the task format for `AnnotationTask`. """ params = _AnnotationTasksQueryParams( user_emails=ensure_list(assignee), @@ -101,14 +99,12 @@ class ConsensusAnnotationTask(WorkflowTask): def move(self, *, destination_stage_uuid: UUID, bundle: Optional[Bundle] = None) -> None: """Moves the consensus annotation task from its current stage to another stage. - **Parameters** + Args: + destination_stage_uuid: Unique identifier of the stage to move the task to. + bundle: Optional bundle to be included with the move. - - `destination_stage_uuid` (UUID): Unique identifier of the stage to move the task to. - - `bundle` (Optional[Bundle]): Optional bundle to be included with the move. - - **Returns** - - None + Returns: + None """ workflow_client, stage_uuid = self._get_client_data() workflow_client.move( diff --git a/encord/workflow/stages/consensus_review.py b/encord/workflow/stages/consensus_review.py index 8ce889ced..ed82a8f45 100644 --- a/encord/workflow/stages/consensus_review.py +++ b/encord/workflow/stages/consensus_review.py @@ -56,24 +56,22 @@ def get_tasks( ) -> Iterable[ConsensusReviewTask]: """Retrieves tasks for the ConsensusReviewStage. - **Parameters** - - - `assignee` (Union[List[str], str, None]): A list of user emails or a single user email to filter tasks by assignee. - - `data_hash` (Union[List[UUID], UUID, List[str], str, None]): A list of data unit UUIDs or a single data unit UUID to filter tasks by data hash. - - `dataset_hash` (Union[List[UUID], UUID, List[str], str, None]): A list of dataset UUIDs or a single dataset UUID to filter tasks by dataset hash. - - `data_title` (Optional[str]): A string to filter tasks by data title. - - `status` (Union[ConsensusReviewTaskStatus, List[ConsensusReviewTaskStatus], None]): A list of task statuses or a single task status to filter tasks by their status. - - **Returns** - - An iterable of `ConsensusReviewTask` instances with the following information: - - `uuid`: Unique identifier for the task. - - `created_at`: Time and date the task was created. - - `updated_at`: Time and date the task was last edited. - - `assignee`: The user currently assigned to the task. The value is None if no one is assigned to the task. - - `data_hash`: Unique identifier for the data unit. - - `data_title`: Name/title of the data unit. - - `options`: List of ConsensusReviewOptions. ConsensusReviewOptions are the labels available for each subtask, including information such as annotator, label_branch_name, and label_hash. + Args: + assignee: A list of user emails or a single user email to filter tasks by assignee. + data_hash: A list of data unit UUIDs or a single data unit UUID to filter tasks by data hash. + dataset_hash: A list of dataset UUIDs or a single dataset UUID to filter tasks by dataset hash. + data_title: A string to filter tasks by data title. + status: A list of task statuses or a single task status to filter tasks by their status. + + Returns: + An iterable of `ConsensusReviewTask` instances with the following information: + - `uuid`: Unique identifier for the task. + - `created_at`: Time and date the task was created. + - `updated_at`: Time and date the task was last edited. + - `assignee`: The user currently assigned to the task. The value is None if no one is assigned to the task. + - `data_hash`: Unique identifier for the data unit. + - `data_title`: Name/title of the data unit. + - `options`: List of ConsensusReviewOptions. ConsensusReviewOptions are the labels available for each subtask, including information such as annotator, label_branch_name, and label_hash. """ params = _ReviewTasksQueryParams( user_emails=ensure_list(assignee), @@ -156,15 +154,13 @@ def approve( ) -> None: """Approve the current task. - **Parameters** + Args: + assignee: User email to be assigned to the review task whilst approving the task. + retain_assignee: Retains the current assignee whilst approving the task. This is ignored if `assignee` is provided. An error will occur if the task does not already have an assignee and `retain_assignee` is True. + bundle: Optional bundle of actions to execute with the approval. - - `assignee` (Optional[str]): User email to be assigned to the review task whilst approving the task. - - `retain_assignee` (bool): Retains the current assignee whilst approving the task. This is ignored if `assignee` is provided. An error will occur if the task does not already have an assignee and `retain_assignee` is True. - - `bundle` (Optional[Bundle]): Optional bundle of actions to execute with the approval. - - **Returns** - - None + Returns: + None """ workflow_client, stage_uuid = self._get_client_data() workflow_client.action( @@ -182,15 +178,13 @@ def reject( ) -> None: """Reject the current task. - **Parameters** - - - `assignee` (Optional[str]): User email to be assigned to the review task whilst rejecting the task. - - `retain_assignee` (bool): Retains the current assignee whilst rejecting the task. This is ignored if `assignee` is provided. An error will occur if the task does not already have an assignee and `retain_assignee` is True. - - `bundle` (Optional[Bundle]): Optional bundle of actions to execute with the rejection. + Args: + assignee: User email to be assigned to the review task whilst rejecting the task. + retain_assignee: Retains the current assignee whilst rejecting the task. This is ignored if `assignee` is provided. An error will occur if the task does not already have an assignee and `retain_assignee` is True. + bundle: Optional bundle of actions to execute with the rejection. - **Returns** - - None + Returns: + None """ workflow_client, stage_uuid = self._get_client_data() workflow_client.action( @@ -202,14 +196,12 @@ def reject( def assign(self, assignee: str, *, bundle: Optional[Bundle] = None) -> None: """Assign the current task to a user. - **Parameters** - - - `assignee` (str): The user to assign the task to. - - `bundle` (Optional[Bundle]): Optional bundle of actions to execute with the assignment. + Args: + assignee: The user to assign the task to. + bundle: Optional bundle of actions to execute with the assignment. - **Returns** - - None + Returns: + None """ workflow_client, stage_uuid = self._get_client_data() workflow_client.action( @@ -221,13 +213,11 @@ def assign(self, assignee: str, *, bundle: Optional[Bundle] = None) -> None: def release(self, *, bundle: Optional[Bundle] = None) -> None: """Release the current task from the current user. - **Parameters** - - - `bundle` (Optional[Bundle]): Optional bundle of actions to execute with the release. - - **Returns** + Args: + bundle: Optional bundle of actions to execute with the release. - None + Returns: + None """ workflow_client, stage_uuid = self._get_client_data() workflow_client.action( @@ -239,14 +229,12 @@ def release(self, *, bundle: Optional[Bundle] = None) -> None: def move(self, *, destination_stage_uuid: UUID, bundle: Optional[Bundle] = None) -> None: """Moves the review task from its current stage to another stage. - **Parameters** - - - `destination_stage_uuid` (UUID): Unique identifier of the stage to move the task to. - - `bundle` (Optional[Bundle]): Optional bundle of actions to execute with the move. - - **Returns** + Args: + destination_stage_uuid: Unique identifier of the stage to move the task to. + bundle: Optional bundle of actions to execute with the move. - None + Returns: + None """ workflow_client, stage_uuid = self._get_client_data() workflow_client.move( diff --git a/encord/workflow/stages/final.py b/encord/workflow/stages/final.py index 29da4475e..49170ec31 100644 --- a/encord/workflow/stages/final.py +++ b/encord/workflow/stages/final.py @@ -41,20 +41,18 @@ def get_tasks( ) -> Iterable[FinalStageTask]: """Retrieves tasks for the FinalStage. - **Parameters** - - - `data_hash` (Union[List[UUID], UUID, List[str], str, None]): Unique ID(s) for the data unit(s). - - `dataset_hash` (Union[List[UUID], UUID, List[str], str, None]): Unique ID(s) for the dataset(s) that the data unit(s) belongs to. - - `data_title` (Optional[str]): A string to filter tasks by the data unit's name. - - **Returns** - - An iterable of `FinalStageTask` instances with the following information: - - `uuid`: Unique identifier for the task. - - `created_at`: Time and date the task was created. - - `updated_at`: Time and date the task was last edited. - - `data_hash`: Unique identifier for the data unit. - - `data_title`: Name/title of the data unit. + Args: + data_hash: Unique ID(s) for the data unit(s). + dataset_hash: Unique ID(s) for the dataset(s) that the data unit(s) belongs to. + data_title: A string to filter tasks by the data unit's name. + + Returns: + An iterable of `FinalStageTask` instances with the following information: + - `uuid`: Unique identifier for the task. + - `created_at`: Time and date the task was created. + - `updated_at`: Time and date the task was last edited. + - `data_hash`: Unique identifier for the data unit. + - `data_title`: Name/title of the data unit. """ params = _FinalTasksQueryParams( data_hashes=ensure_uuid_list(data_hash), @@ -88,14 +86,12 @@ class FinalStageTask(WorkflowTask): def move(self, *, destination_stage_uuid: UUID, bundle: Optional[Bundle] = None) -> None: """Moves the final stage task from its current stage to another stage. - **Parameters** - - - `destination_stage_uuid` (UUID): Unique identifier of the stage to move the task to. - - `bundle` (Optional[Bundle]): Optional bundle of actions to execute with the move. - - **Returns** + Args: + destination_stage_uuid: Unique identifier of the stage to move the task to. + bundle: Optional bundle of actions to execute with the move. - None + Returns: + None """ workflow_client, stage_uuid = self._get_client_data() workflow_client.move( diff --git a/encord/workflow/stages/review.py b/encord/workflow/stages/review.py index 2b80ace03..607ab5054 100644 --- a/encord/workflow/stages/review.py +++ b/encord/workflow/stages/review.py @@ -72,9 +72,8 @@ def _get_client_data(self) -> Tuple[WorkflowClient, UUID, UUID]: def approve(self, *, bundle: Optional[Bundle] = None): """Approves the review. - **Parameters** - - - `bundle` (Optional[Bundle]): Optional bundle parameter. + Args: + bundle: Optional bundle parameter. """ workflow_client, stage_uuid, task_uuid = self._get_client_data() workflow_client.label_review_action( @@ -90,9 +89,10 @@ def reject( ): """Rejects the review. - **Parameters** - - - `bundle` (Optional[Bundle]): Optional bundle parameter. + Args: + comment: Optional comment for the review. + issue_tags: Optional list of tags for the issue. + bundle: Optional bundle parameter. """ workflow_client, stage_uuid, task_uuid = self._get_client_data() workflow_client.label_review_action( @@ -105,9 +105,8 @@ def reject( def reopen(self, *, bundle: Optional[Bundle] = None): """Reopens the review. - **Parameters** - - - `bundle` (Optional[Bundle]): Optional bundle parameter. + Args: + bundle: Optional bundle parameter. """ workflow_client, stage_uuid, task_uuid = self._get_client_data() workflow_client.label_review_action( @@ -150,23 +149,21 @@ def get_tasks( ) -> Iterable[ReviewTask]: """Retrieves tasks for the ReviewStage. - **Parameters** - - - `assignee` (Union[List[str], str, None]): User assigned to a task. - - `data_hash` (Union[List[UUID], UUID, List[str], str, None]): Unique ID for the data unit. - - `dataset_hash` (Union[List[UUID], UUID, List[str], str, None]): Unique ID for the dataset that the data unit belongs to. - - `data_title` (Optional[str]): Name of the data unit. - - `status` (Union[ReviewTaskStatus, List[ReviewTaskStatus], None]): Status of the task. - - **Returns** - - An iterable of `ReviewTask` instances with the following information: - - `uuid`: Unique identifier for the task. - - `created_at`: Time and date the task was created. - - `updated_at`: Time and date the task was last edited. - - `assignee`: The user currently assigned to the task. The value is None if no one is assigned to the task. - - `data_hash`: Unique identifier for the data unit. - - `data_title`: Name/title of the data unit. + Args: + assignee: User assigned to a task. + data_hash: Unique ID for the data unit. + dataset_hash: Unique ID for the dataset that the data unit belongs to. + data_title: Name of the data unit. + status: Status of the task. + + Returns: + An iterable of `ReviewTask` instances with the following information: + - `uuid`: Unique identifier for the task. + - `created_at`: Time and date the task was created. + - `updated_at`: Time and date the task was last edited. + - `assignee`: The user currently assigned to the task. The value is None if no one is assigned to the task. + - `data_hash`: Unique identifier for the data unit. + - `data_title`: Name/title of the data unit. """ params = _ReviewTasksQueryParams( user_emails=ensure_list(assignee), @@ -243,15 +240,13 @@ def approve( ) -> None: """Approves the task. - **Parameters** - - - `assignee` (Optional[str]): User email to be assigned to the task whilst approving the task. - - `retain_assignee` (bool): Retains the current assignee whilst approving the task. This is ignored if `assignee` is provided. An error will occur if the task does not already have an assignee and `retain_assignee` is True. - - `bundle` (Optional[Bundle]): Optional bundle parameter. + Args: + assignee: User email to be assigned to the task whilst approving the task. + retain_assignee: Retains the current assignee whilst approving the task. This is ignored if `assignee` is provided. An error will occur if the task does not already have an assignee and `retain_assignee` is True. + bundle: Optional bundle parameter. - **Returns** - - None + Returns: + None """ workflow_client, stage_uuid = self._get_client_data() workflow_client.action( @@ -269,15 +264,13 @@ def reject( ) -> None: """Rejects the task. - **Parameters** - - - `assignee` (Optional[str]): User email to be assigned to the task whilst rejecting the task. - - `retain_assignee` (bool): Retains the current assignee whilst rejecting the task. This is ignored if `assignee` is provided. An error will occur if the task does not already have an assignee and `retain_assignee` is True. - - `bundle` (Optional[Bundle]): Optional bundle parameter. + Args: + assignee: User email to be assigned to the task whilst rejecting the task. + retain_assignee: Retains the current assignee whilst rejecting the task. This is ignored if `assignee` is provided. An error will occur if the task does not already have an assignee and `retain_assignee` is True. + bundle: Optional bundle parameter. - **Returns** - - None + Returns: + None """ workflow_client, stage_uuid = self._get_client_data() workflow_client.action( @@ -289,14 +282,12 @@ def reject( def assign(self, assignee: str, *, bundle: Optional[Bundle] = None) -> None: """Assigns the task to a user. - **Parameters** - - - `assignee` (str): The user to assign the task to. - - `bundle` (Optional[Bundle]): Optional bundle parameter. + Args: + assignee: The user to assign the task to. + bundle: Optional bundle parameter. - **Returns** - - None + Returns: + None """ workflow_client, stage_uuid = self._get_client_data() workflow_client.action( @@ -308,13 +299,11 @@ def assign(self, assignee: str, *, bundle: Optional[Bundle] = None) -> None: def release(self, *, bundle: Optional[Bundle] = None) -> None: """Releases the task from the current user. - **Parameters** - - - `bundle` (Optional[Bundle]): Optional bundle parameter. + Args: + bundle: Optional bundle parameter. - **Returns** - - None + Returns: + None """ workflow_client, stage_uuid = self._get_client_data() workflow_client.action( @@ -326,14 +315,12 @@ def release(self, *, bundle: Optional[Bundle] = None) -> None: def move(self, *, destination_stage_uuid: UUID, bundle: Optional[Bundle] = None) -> None: """Moves the review task from its current stage to another stage. - **Parameters** - - - `destination_stage_uuid` (UUID): Unique identifier of the stage to move the task to. - - `bundle` (Optional[Bundle]): Optional bundle parameter. + Args: + destination_stage_uuid: Unique identifier of the stage to move the task to. + bundle: Optional bundle parameter. - **Returns** - - None + Returns: + None """ workflow_client, stage_uuid = self._get_client_data() workflow_client.move( @@ -348,16 +335,15 @@ def get_label_reviews( ) -> Iterable[LabelReview]: """Retrieves label reviews for the Review task. - **Parameters** - - `status` (Union[ReviewTaskStatus, List[ReviewTaskStatus], None]): Status of the task. - - **Returns** + Args: + status: Status of the task. - An iterable of `ReviewTask` instances with the following information: - - `uuid`: Unique identifier for label review. - - `status`: Current status of the label review. - - `label_type`: Type of the label. Can be either Object or Classification. - - `label_id`: Unique identifier of the label. + Returns: + An iterable of `ReviewTask` instances with the following information: + - `uuid`: Unique identifier for label review. + - `status`: Current status of the label review. + - `label_type`: Type of the label. Can be either Object or Classification. + - `label_id`: Unique identifier of the label. """ workflow_client, stage_uuid = self._get_client_data() for r in workflow_client.get_label_reviews(stage_uuid, self.uuid, type_=LabelReview): diff --git a/encord/workflow/workflow.py b/encord/workflow/workflow.py index 2c64d0150..c1b621186 100644 --- a/encord/workflow/workflow.py +++ b/encord/workflow/workflow.py @@ -97,15 +97,14 @@ def get_stage( uuid: Optional[Union[UUID, str]] = None, type_: Optional[Type[WorkflowStageT]] = None, ) -> WorkflowStageT: - """**Params** - - - name: Name of the stage. - - uuid: Unique identifier for the stage. - - type_: The type of stage. - - **Returns** + """ + Args: + name: Name of the stage. + uuid: Unique identifier for the stage. + type_: The type of stage. - Returns a Workflow stage (`type_`) from non-Consensus and Consensus Projects. + Returns: + A Workflow stage (`type_`) from non-Consensus and Consensus Projects. """ for stage in self.stages: if (uuid is not None and stage.uuid == _ensure_uuid(uuid)) or (name is not None and stage.title == name): diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 000000000..43cd7f8ae --- /dev/null +++ b/tools/README.md @@ -0,0 +1,233 @@ +# Docstring Quality Linter + +Checks Python docstrings for proper Sphinx role usage, consistent formatting, and cross-references. + +## What It Checks + +- ✅ Sphinx roles (`:class:`, `:meth:`, `:func:`, `:attr:`) in docstrings +- ✅ Section header formatting (`Args:` not `**Args**`) +- ✅ Spelling consistency (`AuthorizationError` not `AuthorisationError`) +- ✅ Exception cross-references in Raises sections +- ✅ Class cross-references throughout docstrings + +## Quick Start + +### Run Manually + +Check a single file: +```bash +python tools/docstring_linter.py --check --config tools/docstring_linter_config.json --files encord/user_client.py +``` + +Check a single file with detailed output (shows all issues with file locations): +```bash +python tools/docstring_linter.py --check --config tools/docstring_linter_config.json --files encord/user_client.py --verbose +``` + +Check all SDK files: +```bash +python tools/docstring_linter.py --check --config tools/docstring_linter_config.json --sdk-path encord/ +``` + +Check all SDK files with detailed output: +```bash +python tools/docstring_linter.py --check --config tools/docstring_linter_config.json --sdk-path encord/ --verbose +``` + +Auto-fix safe issues (section headers, spelling): +```bash +python tools/docstring_linter.py --fix --config tools/docstring_linter_config.json --sdk-path encord/ +``` + +### Pre-commit Hook + +The linter is integrated into the pre-commit hooks. It will automatically check your docstrings before commit. + +**Current mode:** Check only (warnings will block commits) + +To run pre-commit hooks manually: +```bash +pre-commit run --all-files +``` + +To run just the docstring linter: +```bash +pre-commit run docstring-linter --all-files +``` + +## Configuration Options + +### Option 1: Strict Mode (Current - Blocks on Warnings) + +Blocks commits if any warnings are found. Good for enforcing high quality. + +```yaml +# In .pre-commit-config.yaml +- id: docstring-linter + entry: python tools/docstring_linter.py --check --config tools/docstring_linter_config.json --files +``` + +### Option 2: Relaxed Mode (Warn Only) + +Only blocks commits on errors, not warnings. Good for gradual adoption. + +To enable, edit `.pre-commit-config.yaml`: + +```yaml +# In .pre-commit-config.yaml +- id: docstring-linter + entry: python tools/docstring_linter.py --check --config tools/docstring_linter_config.json --files + # Add this to make warnings not block: + verbose: true + # Or skip for now and only run manually +``` + +Or temporarily disable: +```yaml +- id: docstring-linter + # Comment out to disable: + # entry: python tools/docstring_linter.py ... +``` + +### Option 3: Auto-fix Mode + +Automatically fixes safe issues on commit. **Use with caution!** + +```yaml +- id: docstring-linter + entry: python tools/docstring_linter.py --fix --config tools/docstring_linter_config.json --files +``` + +## Issue Severity Levels + +- **Error** (exit 1): Critical issues like parse errors - must fix +- **Warning** (exit 1): Important issues - should fix + - Unlinked exceptions + - Invalid section headers + - Spelling inconsistencies +- **Info** (exit 0): Nice to have improvements + - Unlinked class references + - Missing cross-references + +## Recommended Workflow + +### For New Code + +Write docstrings with proper Sphinx roles from the start: + +```python +def get_dataset(self, dataset_hash: str) -> Dataset: + """Get a dataset by hash. + + Args: + dataset_hash: The unique identifier for the dataset. + + Returns: + :class:`~encord.dataset.Dataset`: The dataset instance. + + Raises: + :class:`~encord.exceptions.AuthorizationError`: If API key is invalid. + :class:`~encord.exceptions.ResourceNotFoundError`: If dataset not found. + """ +``` + +### For Existing Code + +When modifying a file with warnings, fix the docstrings in that file: + +1. Run linter with verbose output: `python tools/docstring_linter.py --check --files your_file.py --verbose` +2. Review issues grouped by type with file locations and suggested fixes +3. Apply fixes manually or use `--fix` for auto-fixable issues +4. Verify: `python tools/docstring_linter.py --check --files your_file.py` + +**What does `--verbose` show?** +- Issues grouped by type (same grouping as the summary) +- File paths and line numbers in clickable format (`file.py:123`) +- Suggested fixes for each issue +- Function/method names where issues occur + +Example verbose output: +``` +unlinked_exception (15): +---------------------------------------------------------------------- +encord/client.py:123: ⚠ [get_dataset] Exception "AuthenticationError" should use :class: role + "AuthenticationError" → ":class:`~encord.exceptions.AuthenticationError`" +encord/project.py:456: ⚠ [create_project] Exception "ResourceNotFoundError" should use :class: role + ... + +unlinked_class (8): +---------------------------------------------------------------------- +encord/user_client.py:182: ℹ [get_dataset] Class "Dataset" could use :class: role + "Dataset" → ":class:`~encord.dataset.Dataset`" + ... +``` + +## Skip Hook for Specific Commit + +If you need to commit despite warnings: + +```bash +git commit --no-verify -m "Your message" +``` + +**Use sparingly!** Better to fix the issues. + +## Configuring the Linter + +Edit `tools/docstring_linter_config.json` to: + +- Add new SDK classes to detect +- Add new exception types +- Change auto-fix rules +- Modify ignore patterns + +Example: +```json +{ + "sdk_package_name": "encord", + "sdk_classes": [ + "LabelRowV2", + "Dataset", + "YourNewClass" // Add here + ], + "exceptions": [ + "AuthorizationError", + "YourNewException" // Add here + ] +} +``` + +## Generating Reports + +Generate a JSON report of all issues: + +```bash +python tools/docstring_linter.py \ + --check \ + --config tools/docstring_linter_config.json \ + --sdk-path encord/ \ + --report issues.json +``` + +## Command Line Options + +Run with `--help` for full options: +```bash +python tools/docstring_linter.py --help +``` + +Common options: +- `--check` - Check only, don't modify files (dry run) +- `--fix` - Automatically fix issues where possible +- `--verbose` or `-v` - Show detailed output with all issues grouped by type +- `--files FILE [FILE ...]` - Check specific files +- `--sdk-path PATH` - Check all Python files in a directory +- `--report FILE.json` - Save results as JSON report +- `--config FILE.json` - Use custom configuration file + +## Questions? + +See the full documentation in the `encord-docs-mint` repository: +- Comprehensive style guide +- Before/after examples +- Complete setup instructions diff --git a/tools/docstring_linter.py b/tools/docstring_linter.py new file mode 100644 index 000000000..9bd8b2723 --- /dev/null +++ b/tools/docstring_linter.py @@ -0,0 +1,1907 @@ +#!/usr/bin/env python3 +""" +Docstring Linter for Encord SDK + +Checks and fixes docstring formatting to ensure optimal documentation generation. +Enforces Sphinx role usage, consistent section headers, and proper cross-references. + +Usage: + # Check only (dry run, no modifications will be made): + python docstring_linter.py --sdk-path /path/to/encord-client-python/encord --check + + # Check with detailed output (shows all issues grouped by type): + python docstring_linter.py --sdk-path /path/to/encord-client-python/encord --check --verbose + + # Fix issues automatically: + python docstring_linter.py --sdk-path /path/to/encord-client-python/encord --fix + + # Check specific files: + python docstring_linter.py --files user_client.py project.py --check + + # Check specific files with detailed output: + python docstring_linter.py --files user_client.py project.py --check --verbose + + # Generate detailed report: + python docstring_linter.py --sdk-path /path/to/sdk --check --report report.json +""" + +import argparse +import ast +import json +import re +import sys +from dataclasses import dataclass, field +from pathlib import Path +from typing import Dict, List, Optional, Tuple, Union + + +@dataclass +class DocstringIssue: + """Represents a single docstring issue.""" + + file_path: str + line_number: int + function_name: str + issue_type: str + severity: str # 'error', 'warning', 'info' + message: str + original: Optional[str] = None + suggested_fix: Optional[str] = None + + +@dataclass +class LintResult: + """Results from linting operation.""" + + issues: List[DocstringIssue] = field(default_factory=list) + files_checked: int = 0 + files_modified: int = 0 + + def add_issue(self, issue: DocstringIssue): + self.issues.append(issue) + + def get_stats(self) -> Dict[str, int]: + """Get statistics about issues found.""" + stats = { + "total": len(self.issues), + "errors": sum(1 for i in self.issues if i.severity == "error"), + "warnings": sum(1 for i in self.issues if i.severity == "warning"), + "info": sum(1 for i in self.issues if i.severity == "info"), + } + # Count by type + for issue in self.issues: + stats[issue.issue_type] = stats.get(issue.issue_type, 0) + 1 + return stats + + +class DocstringLinter: + """Lints Python docstrings for documentation generation.""" + + # Patterns for SDK classes that should be cross-referenced + SDK_CLASS_PATTERNS = [ + r"\bLabelRowV2\b", + r"\bDataset\b", + r"\bProject\b", + r"\bOntologyStructure\b", + r"\bWorkflow\b", + r"\bStorageItem\b", + r"\bStorageFolder\b", + r"\bCollection\b", + r"\bOntology\b", + r"\bProjectUser\b", + r"\bDataRow\b", + r"\bObjectInstance\b", + r"\bClassificationInstance\b", + ] + + # Mapping of class names to their actual import paths + CLASS_TO_MODULE = { + "LabelRowV2": "encord.objects.LabelRowV2", + "Dataset": "encord.dataset.Dataset", + "Project": "encord.project.Project", + "OntologyStructure": "encord.objects.OntologyStructure", + "Workflow": "encord.workflow.Workflow", + "StorageItem": "encord.storage.StorageItem", + "StorageFolder": "encord.storage.StorageFolder", + "Collection": "encord.collection.Collection", + "Ontology": "encord.ontology.Ontology", + "ProjectUser": "encord.utilities.project_user.ProjectUser", + "DataRow": "encord.orm.dataset.DataRow", + "ObjectInstance": "encord.objects.ObjectInstance", + "ClassificationInstance": "encord.objects.ClassificationInstance", + } + + # Known exception names - automatically extracted from exceptions.py + EXCEPTION_CLASSES = { + "EncordException", + "InitialisationError", + "AuthenticationError", + "AuthorisationError", + "ResourceNotFoundError", + "TimeOutError", + "RequestException", + "RateLimitExceededError", + "PayloadTooLargeError", + "UnknownException", + "InvalidDateFormatError", + "MethodNotAllowedError", + "OperationNotAllowed", + "AnswerDictionaryError", + "CorruptedLabelError", + "FileTypeNotSupportedError", + "FileSizeNotSupportedError", + "FeatureDoesNotExistError", + "ModelWeightsInconsistentError", + "ModelFeaturesInconsistentError", + "UploadOperationNotSupportedError", + "DetectionRangeInvalidError", + "InvalidAlgorithmError", + "ResourceExistsError", + "DuplicateSshKeyError", + "SshKeyNotFound", + "InvalidArgumentsError", + "GenericServerError", + "CloudUploadError", + "MultiLabelLimitError", + "LabelRowError", + "OntologyError", + "WrongProjectTypeError", + "BundledMoveWorkflowTasksPayloadError", + # Exceptions from other modules + "MetadataSchemaError", + } + + # Build regex patterns for all exceptions + EXCEPTION_PATTERNS = [rf"\b{exc}\b" for exc in EXCEPTION_CLASSES] + + # Normalize exception names to match actual class names + # (Some docstrings use American spelling but the class uses British spelling) + EXCEPTION_NAME_MAPPING = { + "AuthorizationError": "AuthorisationError", # Normalize to British spelling + "AuthorisationError": "AuthorisationError", # Already correct + } + # Default: map each exception to itself + for exc in EXCEPTION_CLASSES: + if exc not in EXCEPTION_NAME_MAPPING: + EXCEPTION_NAME_MAPPING[exc] = exc + + # Exceptions not in encord.exceptions need special path mapping + EXCEPTION_MODULE_MAPPING = { + "MetadataSchemaError": "encord.metadata_schema.MetadataSchemaError", + } + + # Valid Google-style section headers + VALID_SECTIONS = { + "Args:", + "Arguments:", # Removed 'Parameters:' and 'Params:' - use Args: instead + "Returns:", + "Return:", + "Yields:", + "Yield:", + "Raises:", + "Raise:", + "Throws:", + "Note:", + "Notes:", + "Warning:", + "Warnings:", + "Caution:", + "Example:", + "Examples:", + "See Also:", + "See also:", + "Attributes:", + "Deprecated:", + } + + # Invalid section headers (markdown bold or non-standard) + INVALID_SECTION_PATTERNS = [ + r"\*\*Args\*\*:?", # Matches **Args** or **Args:** + r"\*\*Args:\*\*", # Matches **Args:** (bold with colon inside) + r"Args::", # Matches Args:: (double colon) + r"\*\*Returns\*\*:?", # Matches **Returns** or **Returns:** + r"\*\*Returns:\*\*", # Matches **Returns:** (bold with colon inside) + r"Returns::", # Matches Returns:: (double colon) + r"\*\*Raises\*\*:?", # Matches **Raises** or **Raises:** + r"\*\*Raises:\*\*", # Matches **Raises:** (bold with colon inside) + r"Raises::", # Matches Raises:: (double colon) + r"\*\*Note\*\*:?", # Matches **Note** or **Note:** + r"\*\*Note:\*\*", # Matches **Note:** (bold with colon inside) + r"Note::", # Matches Note:: (double colon) + r"\*\*Notes\*\*:?", # Matches **Notes** or **Notes:** + r"\*\*Notes:\*\*", # Matches **Notes:** (bold with colon inside) + r"Notes::", # Matches Notes:: (double colon) + r"\*\*Params\*\*:?", # Matches **Params** or **Params:** + r"\*\*Params:\*\*", # Matches **Params:** (bold with colon inside) + r"Params::", # Matches Params:: (double colon) + r"\*\*Parameters\*\*:?", # Matches **Parameters** or **Parameters:** + r"\*\*Parameters:\*\*", # Matches **Parameters:** (bold with colon inside) + r"Parameters::", # Matches Parameters:: (double colon) + r"\*\*Warning\*\*:?", # Matches **Warning** or **Warning:** + r"\*\*Warning:\*\*", # Matches **Warning:** (bold with colon inside) + r"Warning::", # Matches Warning:: (double colon) + r"\*\*Warnings\*\*:?", # Matches **Warnings** or **Warnings:** + r"\*\*Warnings:\*\*", # Matches **Warnings:** (bold with colon inside) + r"Warnings::", # Matches Warnings:: (double colon) + r"\*\*Example\*\*:?", # Matches **Example** or **Example:** + r"\*\*Example:\*\*", # Matches **Example:** (bold with colon inside) + r"Example::", # Matches Example:: (double colon) + r"\*\*Examples\*\*:?", # Matches **Examples** or **Examples:** + r"\*\*Examples:\*\*", # Matches **Examples:** (bold with colon inside) + r"Examples::", # Matches Examples:: (double colon) + r"\*\*Attributes\*\*:?", # Matches **Attributes** or **Attributes:** + r"\*\*Attributes:\*\*", # Matches **Attributes:** (bold with colon inside) + r"Attributes::", # Matches Attributes:: (double colon) + r"\*\*Yields\*\*:?", # Matches **Yields** or **Yields:** + r"\*\*Yields:\*\*", # Matches **Yields:** (bold with colon inside) + r"Yields::", # Matches Yields:: (double colon) + ] + + # Non-standard section headers that should be replaced + DEPRECATED_SECTIONS = { + "Parameters:": "Args:", # NumPy style → Google style + "Params:": "Args:", # Abbreviated form → Standard form + } + + def __init__(self, config: Optional[Dict] = None): + self.config = config or {} + self.sdk_package_name = self.config.get("sdk_package_name", "encord") + + def lint_file(self, file_path: Path, fix: bool = False) -> LintResult: + """Lint a single Python file. + + Args: + file_path: Path to the Python file. + fix: If True, apply fixes to the file. + + Returns: + LintResult with issues found. + """ + result = LintResult() + result.files_checked = 1 + + try: + content = file_path.read_text(encoding="utf-8") + tree = ast.parse(content, filename=str(file_path)) + except Exception as e: + result.add_issue( + DocstringIssue( + file_path=str(file_path), + line_number=0, + function_name="", + issue_type="parse_error", + severity="error", + message=f"Failed to parse file: {e}", + ) + ) + return result + + # Extract all docstrings + docstrings = self._extract_docstrings(tree, file_path) + + # Check each docstring + for node, docstring in docstrings: + issues = self._check_docstring(node, docstring, file_path) + result.issues.extend(issues) + + # Check for missing class docstrings + missing_docstring_issues = self._check_missing_docstrings(tree, file_path) + result.issues.extend(missing_docstring_issues) + + # Apply fixes if requested + # Loop until all fixable issues are resolved (some fixes might reveal new issues or + # prevent other fixes from being found on the first pass) + max_iterations = 5 # Prevent infinite loops + iteration = 0 + while fix and result.issues and iteration < max_iterations: + # Check if there are any fixable issues + fixable_types = { + "invalid_section_header", + "spelling_inconsistency", + "deprecated_section_header", + "markdown_list_formatting", + "inconsistent_indentation", + "section_spacing", + "redundant_type_annotation", + "malformed_class_reference", + "malformed_method_reference", + "returns_missing_crossref", + "incomplete_class_reference", + } + has_fixable = any(any(ftype in issue.issue_type for ftype in fixable_types) for issue in result.issues) + + if not has_fixable: + break + + modified = self._apply_fixes(file_path, content, result.issues) + if modified: + result.files_modified = 1 + # Re-parse and re-check with fix=True to continue fixing + content = file_path.read_text(encoding="utf-8") + result = self.lint_file(file_path, fix=False) + iteration += 1 + else: + # No changes made, exit loop + break + + return result + + def _extract_docstrings(self, tree: ast.AST, file_path: Path) -> List[Tuple[ast.AST, str]]: + """Extract all docstrings from an AST. + + Returns: + List of (node, docstring) tuples. + """ + docstrings: list[tuple[ast.AST, str]] = [] + + for node in ast.walk(tree): + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef, ast.Module)): + docstring = ast.get_docstring(node) + if docstring: + docstrings.append((node, docstring)) + + return docstrings + + def _check_missing_docstrings(self, tree: ast.AST, file_path: Path) -> List[DocstringIssue]: + """Check for classes without docstrings. + + Args: + tree: The AST tree to check. + file_path: Path to the file being checked. + + Returns: + List of issues for missing class docstrings. + """ + issues = [] + + for node in ast.walk(tree): + if isinstance(node, ast.ClassDef): + docstring = ast.get_docstring(node) + if not docstring: + # Skip private classes (starting with _) unless they're special (__init__, etc.) + if node.name.startswith("_") and not node.name.startswith("__"): + continue + + issues.append( + DocstringIssue( + file_path=str(file_path), + line_number=node.lineno, + function_name=node.name, + issue_type="missing_class_docstring", + severity="warning", + message=f'Class "{node.name}" is missing a docstring', + ) + ) + + return issues + + def _check_docstring(self, node: ast.AST, docstring: str, file_path: Path) -> List[DocstringIssue]: + """Check a single docstring for issues. + + Args: + node: AST node containing the docstring. + docstring: The docstring text. + file_path: Path to the file. + + Returns: + List of issues found. + """ + issues: list[DocstringIssue] = [] + + # Get function/class name and line number + if isinstance(node, ast.Module): + name = "" + line_number = 1 + elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): + name = node.name + line_number = node.lineno + else: + # Shouldn't happen, but satisfy type checker + name = "" + line_number = 1 + + # Check for issues + issues.extend(self._check_unlinked_exceptions(docstring, file_path, line_number, name)) + issues.extend(self._check_unlinked_classes(docstring, file_path, line_number, name)) + issues.extend(self._check_malformed_class_references(docstring, file_path, line_number, name)) + issues.extend(self._check_malformed_method_references(docstring, file_path, line_number, name)) + issues.extend(self._check_incomplete_class_references(docstring, file_path, line_number, name)) + issues.extend(self._check_invalid_section_headers(docstring, file_path, line_number, name)) + issues.extend(self._check_deprecated_sections(docstring, file_path, line_number, name)) + issues.extend(self._check_markdown_list_formatting(docstring, file_path, line_number, name)) + issues.extend(self._check_indentation_consistency(docstring, file_path, line_number, name)) + issues.extend(self._check_section_spacing(docstring, file_path, line_number, name)) + issues.extend(self._check_spelling_inconsistencies(docstring, file_path, line_number, name)) + issues.extend(self._check_returns_section(docstring, file_path, line_number, name)) + + # Check for redundant type annotations (only for functions) + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + issues.extend(self._check_redundant_type_annotations(docstring, file_path, line_number, name, node)) + + return issues + + def _check_unlinked_exceptions( + self, docstring: str, file_path: Path, line_number: int, name: str + ) -> List[DocstringIssue]: + """Check for exceptions in Raises section without Sphinx roles.""" + issues: list[DocstringIssue] = [] + + # Find Raises section (match lines with or without trailing newline) + raises_match = re.search(r"Raises?:\s*\n((?:[ \t]+.+(?:\n|$))+)", docstring, re.MULTILINE) + if not raises_match: + return issues + + raises_section = raises_match.group(1) + + # Look for exception names without :class: role + for pattern in self.EXCEPTION_PATTERNS: + for match in re.finditer(pattern, raises_section): + exception_name = match.group(0) + + # Check if it's already in a Sphinx role + start = match.start() + # Look back far enough to catch the full path + prefix = raises_section[max(0, start - 100) : start] + + # Check if we're inside a Sphinx role by finding the last backtick before our position + last_backtick = prefix.rfind("`") + if last_backtick != -1: + # Check if there's a role opener before that backtick + text_before_backtick = prefix[:last_backtick] + if ":class:" in text_before_backtick or ":exc:" in text_before_backtick: + continue + + # Normalize exception name (e.g., AuthorizationError -> AuthorisationError) + normalized_name = self.EXCEPTION_NAME_MAPPING.get(exception_name, exception_name) + + # Get the full path for the exception + if normalized_name in self.EXCEPTION_MODULE_MAPPING: + exception_path = self.EXCEPTION_MODULE_MAPPING[normalized_name] + else: + exception_path = f"{self.sdk_package_name}.exceptions.{normalized_name}" + + issues.append( + DocstringIssue( + file_path=str(file_path), + line_number=line_number, + function_name=name, + issue_type="unlinked_exception", + severity="warning", + message=f'Exception "{exception_name}" should use :class: role', + original=exception_name, + suggested_fix=f":class:`~{exception_path}`", + ) + ) + + return issues + + def _check_unlinked_classes( + self, docstring: str, file_path: Path, line_number: int, name: str + ) -> List[DocstringIssue]: + """Check for SDK class references without Sphinx roles.""" + issues = [] + + # Skip YAML front matter (between --- markers) if it exists + front_matter_end = 0 + if docstring.startswith("---"): + second_marker = docstring.find("---", 3) + if second_marker != -1: + front_matter_end = second_marker + 3 + + for pattern in self.SDK_CLASS_PATTERNS: + for match in re.finditer(pattern, docstring): + class_name = match.group(0) + start = match.start() + + # Skip if in front matter section + if start < front_matter_end: + continue + + # Check if it's already in a Sphinx role (:class:, :meth:, etc.) + # Look back far enough to catch the full path (e.g., :class:`~encord.project.XXX`) + prefix = docstring[max(0, start - 100) : start] + + # Check if we're inside a Sphinx role by finding the last backtick before our position + last_backtick = prefix.rfind("`") + if last_backtick != -1: + # Check if there's a role opener before that backtick + text_before_backtick = prefix[:last_backtick] + if ( + ":class:" in text_before_backtick + or ":meth:" in text_before_backtick + or ":func:" in text_before_backtick + or ":exc:" in text_before_backtick + or ":attr:" in text_before_backtick + or ":mod:" in text_before_backtick + ): + continue + + # Skip if in code block (indented or triple backticks) + line_start = docstring.rfind("\n", 0, start) + 1 + line = docstring[line_start : start + len(class_name)] + if line.startswith(" ") or line.startswith("```"): + continue + + # Skip if it's the function/class name itself being defined + if class_name == name: + continue + + # Use the mapping to get the correct module path, or fall back to naive approach + if class_name in self.CLASS_TO_MODULE: + suggested_path = self.CLASS_TO_MODULE[class_name] + else: + # Fallback: assume module name is class name in lowercase + suggested_path = f"{self.sdk_package_name}.{class_name.lower()}.{class_name}" + + issues.append( + DocstringIssue( + file_path=str(file_path), + line_number=line_number, + function_name=name, + issue_type="unlinked_class", + severity="info", + message=f'Class "{class_name}" could use :class: role for cross-reference', + original=class_name, + suggested_fix=f":class:`~{suggested_path}`", + ) + ) + + return issues + + def _check_malformed_class_references( + self, docstring: str, file_path: Path, line_number: int, name: str + ) -> List[DocstringIssue]: + """Check for malformed :class: role syntax.""" + issues = [] + + # Pattern to find malformed :class: references + # Valid: :class:`path.to.Class` or :class:`~path.to.Class` + # Invalid: :class:`Class [path]` or :class:`Class path` + + # Find :class: with brackets like :class:`Name [path]` + bracket_pattern = r":class:`([^`\[]+)\s*\[([^\]]+)\]`" + for match in re.finditer(bracket_pattern, docstring): + _display_name = match.group(1).strip() # Extracted but not used, path is authoritative + path = match.group(2).strip() + original = match.group(0) + + # Suggest proper format + suggested_fix = f":class:`~{path}`" + + issues.append( + DocstringIssue( + file_path=str(file_path), + line_number=line_number, + function_name=name, + issue_type="malformed_class_reference", + severity="error", + message=f"Malformed :class: syntax. Use :class:`~{path}` instead of brackets.", + original=original, + suggested_fix=suggested_fix, + ) + ) + + # Find :class: with space-separated name and path like :class:`Name path.to.Name` + space_pattern = r":class:`([A-Z]\w+)\s+([a-z_][\w.]+[A-Z]\w+)`" + for match in re.finditer(space_pattern, docstring): + _display_name = match.group(1) # Extracted but not used, path is authoritative + path = match.group(2) + original = match.group(0) + + # Suggest proper format + suggested_fix = f":class:`~{path}`" + + issues.append( + DocstringIssue( + file_path=str(file_path), + line_number=line_number, + function_name=name, + issue_type="malformed_class_reference", + severity="error", + message=f"Malformed :class: syntax. Use :class:`~{path}` instead of space separation.", + original=original, + suggested_fix=suggested_fix, + ) + ) + + return issues + + def _check_malformed_method_references( + self, docstring: str, file_path: Path, line_number: int, name: str + ) -> List[DocstringIssue]: + """Check for malformed :meth: role syntax.""" + issues = [] + + # Pattern to find malformed :meth: references + # Valid: :meth:`path.to.method` or :meth:`~path.to.method` + # Invalid: :meth:`Name [path]` or :meth:`Name path` + + # Find :meth: with brackets like :meth:`method_name [path.to.method]` + bracket_pattern = r":meth:`([^`\[]+)\s*\[([^\]]+)\]`" + for match in re.finditer(bracket_pattern, docstring): + _display_name = match.group(1).strip() # Extracted but not used, path is authoritative + path = match.group(2).strip() + original = match.group(0) + + # Suggest proper format + suggested_fix = f":meth:`~{path}`" + + issues.append( + DocstringIssue( + file_path=str(file_path), + line_number=line_number, + function_name=name, + issue_type="malformed_method_reference", + severity="error", + message=f"Malformed :meth: syntax. Use :meth:`~{path}` instead of brackets.", + original=original, + suggested_fix=suggested_fix, + ) + ) + + # Find :meth: with space-separated name and path like :meth:`method_name path.to.method` + space_pattern = r":meth:`([a-z_]\w+)\s+([a-z_][\w.]+\.[a-z_]\w+)`" + for match in re.finditer(space_pattern, docstring): + _display_name = match.group(1) # Extracted but not used, path is authoritative + path = match.group(2) + original = match.group(0) + + # Suggest proper format + suggested_fix = f":meth:`~{path}`" + + issues.append( + DocstringIssue( + file_path=str(file_path), + line_number=line_number, + function_name=name, + issue_type="malformed_method_reference", + severity="error", + message=f"Malformed :meth: syntax. Use :meth:`~{path}` instead of space separation.", + original=original, + suggested_fix=suggested_fix, + ) + ) + + return issues + + def _check_incomplete_class_references( + self, docstring: str, file_path: Path, line_number: int, name: str + ) -> List[DocstringIssue]: + """Check for :class: references that are missing the tilde and full path. + + For example: :class:`EncordException` should be :class:`~encord.exceptions.EncordException` + """ + issues = [] + + # Pattern 1: Find :class:`ClassName` without tilde and full path + # Match :class:`SomeClass` but not :class:`~path.to.SomeClass` or :class:`path.to.SomeClass` + pattern_with_backticks = r":class:`([A-Z]\w+)`" + + for match in re.finditer(pattern_with_backticks, docstring): + class_name = match.group(1) + original = match.group(0) + + # Check if it's an exception class + if class_name in self.EXCEPTION_CLASSES: + # It's an exception - get the correct path + normalized_name = self.EXCEPTION_NAME_MAPPING.get(class_name, class_name) + if normalized_name in self.EXCEPTION_MODULE_MAPPING: + exception_path = self.EXCEPTION_MODULE_MAPPING[normalized_name] + else: + exception_path = f"{self.sdk_package_name}.exceptions.{normalized_name}" + suggested_fix = f":class:`~{exception_path}`" + elif class_name in self.CLASS_TO_MODULE: + # It's a known SDK class - use the mapping + suggested_path = self.CLASS_TO_MODULE[class_name] + suggested_fix = f":class:`~{suggested_path}`" + else: + # Unknown class - skip it + continue + + issues.append( + DocstringIssue( + file_path=str(file_path), + line_number=line_number, + function_name=name, + issue_type="incomplete_class_reference", + severity="warning", + message=f':class: reference for "{class_name}" is missing tilde and full path', + original=original, + suggested_fix=suggested_fix, + ) + ) + + # Pattern 2: Find :class:ClassName without backticks at all + # This is malformed - should be :class:`~path.to.ClassName` + pattern_no_backticks = r":class:([A-Z]\w+)\b" + + for match in re.finditer(pattern_no_backticks, docstring): + class_name = match.group(1) + original = match.group(0) + + # Check if it's an exception class + if class_name in self.EXCEPTION_CLASSES: + # It's an exception - get the correct path + normalized_name = self.EXCEPTION_NAME_MAPPING.get(class_name, class_name) + if normalized_name in self.EXCEPTION_MODULE_MAPPING: + exception_path = self.EXCEPTION_MODULE_MAPPING[normalized_name] + else: + exception_path = f"{self.sdk_package_name}.exceptions.{normalized_name}" + suggested_fix = f":class:`~{exception_path}`" + elif class_name in self.CLASS_TO_MODULE: + # It's a known SDK class - use the mapping + suggested_path = self.CLASS_TO_MODULE[class_name] + suggested_fix = f":class:`~{suggested_path}`" + else: + # Unknown class - skip it + continue + + issues.append( + DocstringIssue( + file_path=str(file_path), + line_number=line_number, + function_name=name, + issue_type="incomplete_class_reference", + severity="error", + message=f':class: reference for "{class_name}" is missing backticks, tilde and full path', + original=original, + suggested_fix=suggested_fix, + ) + ) + + return issues + + def _check_invalid_section_headers( + self, docstring: str, file_path: Path, line_number: int, name: str + ) -> List[DocstringIssue]: + """Check for invalid section headers (e.g., **Args** instead of Args:).""" + issues = [] + + for pattern in self.INVALID_SECTION_PATTERNS: + for match in re.finditer(pattern, docstring): + invalid_header = match.group(0) + + # Skip if this section has markdown list formatting (will be handled by markdown_list_formatting check) + # Check if the section is followed by blank line(s) and then markdown list items + pos = match.end() + remaining = docstring[pos : pos + 200] # Check next 200 chars + has_markdown_list = re.search(r"^\s*\n(\s*\n)?\s*-\s+", remaining) + if has_markdown_list: + continue # Skip - will be handled by markdown_list_formatting + + # Extract the section name (strip both asterisks and colons) + # Strip colons first, then asterisks (order matters for **Note**: format) + section_name = invalid_header.rstrip(":").strip("*") + valid_header = f"{section_name}:" + + issues.append( + DocstringIssue( + file_path=str(file_path), + line_number=line_number, + function_name=name, + issue_type="invalid_section_header", + severity="warning", + message=f'Use "{valid_header}" instead of "{invalid_header}"', + original=invalid_header, + suggested_fix=valid_header, + ) + ) + + return issues + + def _check_deprecated_sections( + self, docstring: str, file_path: Path, line_number: int, name: str + ) -> List[DocstringIssue]: + """Check for deprecated section headers like Parameters: that should be Args:.""" + issues = [] + + for deprecated, replacement in self.DEPRECATED_SECTIONS.items(): + # Look for the deprecated section header at the start of a line (with indentation) + pattern = rf"^\s*{re.escape(deprecated)}" + for match in re.finditer(pattern, docstring, re.MULTILINE): + # Skip if this section has markdown list formatting (will be handled by that check) + # Check if there's a markdown list after this header + pos = match.end() + remaining = docstring[pos : pos + 500] # Check next 500 chars + if re.search(r"^\s*\n(\s*\n)?\s*-\s+", remaining): + continue # Skip - will be handled by markdown_list_formatting + + issues.append( + DocstringIssue( + file_path=str(file_path), + line_number=line_number, + function_name=name, + issue_type="deprecated_section_header", + severity="warning", + message=f'Use "{replacement}" instead of "{deprecated}" (Google style)', + original=deprecated, + suggested_fix=replacement, + ) + ) + + return issues + + def _check_markdown_list_formatting( + self, docstring: str, file_path: Path, line_number: int, name: str + ) -> List[DocstringIssue]: + """Check for markdown list formatting in Args/Returns sections (should be Google Style).""" + issues = [] + + # Pattern to find Args/Parameters/Returns followed by markdown lists + # Matches "Args:", "**Args**", or "**Args:**" formats + # Looks for section header followed by optional blank line, then lines starting with "- " + # The (?:\n|$) handles both lines with newlines and the last line without trailing newline + section_pattern = r"(\*\*(Args|Parameters|Params|Returns|Yields|Raises)\*\*:?|(Args|Parameters|Params|Returns|Yields|Raises):)\s*\n(\s*\n)?((?:\s*-\s+.*(?:\n|$))+)" + + for match in re.finditer(section_pattern, docstring, re.MULTILINE): + # Extract section name from either **Name** or Name: format + if match.group(2): # **Name** format + section_name_raw = match.group(2) + else: # Name: format + section_name_raw = match.group(3) + + _blank_line = match.group(4) or "" # Captured for regex structure, not used + markdown_list = match.group(5) + + # Normalize section name to Google Style + section_name = section_name_raw + if section_name in ("Parameters", "Params"): + section_name = "Args" + + # Check if this is actually a markdown list (has "- " pattern) + if re.search(r"^\s*-\s+", markdown_list, re.MULTILINE): + # Convert markdown list to Google Style + converted_lines = [] + for line in markdown_list.splitlines(): + line = line.strip() + if line.startswith("- "): + # Remove leading "- " + line = line[2:] + + # Remove backticks around parameter name + # Pattern: `param_name` (Type): description + line = re.sub(r"`([^`]+)`\s*\([^)]+\):", r"\1:", line) + + # Or just: `param_name`: description + line = re.sub(r"`([^`]+)`:", r"\1:", line) + + # Add proper indentation (4 spaces) + converted_lines.append(f" {line}") + + suggested_section = f"{section_name}:\n" + "\n".join(converted_lines) + "\n" + + issues.append( + DocstringIssue( + file_path=str(file_path), + line_number=line_number, + function_name=name, + issue_type="markdown_list_formatting", + severity="warning", + message=f'Section uses markdown list format, should use Google Style "{section_name}:"', + original=match.group(0), # Full matched section + suggested_fix=suggested_section, + ) + ) + + return issues + + def _check_indentation_consistency( + self, docstring: str, file_path: Path, line_number: int, name: str + ) -> List[DocstringIssue]: + """Check for inconsistent indentation in Args/Returns/Raises sections. + + Parameter definition lines should have 4 spaces of indentation. + Continuation lines (that don't start with param_name:) should have 8 spaces. + """ + issues = [] + + # Pattern to find common docstring sections in Google Style (no markdown lists) + section_pattern = r"(Args|Returns|Yields|Raises|Note|Notes|Warning|Warnings|See Also|Attributes|Examples?):[ \t]*\n((?:(?!(?:Args|Returns|Yields|Raises|Note|Notes|Warning|Warnings|See Also|Attributes|Examples?):)(?:(?![ \t]*-\s+).*\n|\n))*(?:(?!(?:Args|Returns|Yields|Raises|Note|Notes|Warning|Warnings|See Also|Attributes|Examples?):)(?![ \t]*-\s+).+)?)" + + for match in re.finditer(section_pattern, docstring, re.MULTILINE): + section_name = match.group(1) + section_content = match.group(2) + + # Only check indentation for lines before the first blank line + # Special case: if there's a blank line immediately after the section header, + # check the lines after it until the next blank line + lines = section_content.splitlines() + + # Find the first blank line + first_blank_idx = None + for i, line in enumerate(lines): + if not line.strip(): + first_blank_idx = i + break + + # Determine which lines to check + if first_blank_idx == 0: + # Blank line immediately after section header - check lines after it + # Find the next blank line + second_blank_idx = None + for i in range(1, len(lines)): + if not lines[i].strip(): + second_blank_idx = i + break + + if second_blank_idx is not None: + lines_to_check = lines[1:second_blank_idx] + else: + lines_to_check = lines[1:] + elif first_blank_idx is not None: + # Blank line found later - check lines before it + lines_to_check = lines[:first_blank_idx] + else: + # No blank line - check all lines + lines_to_check = lines + + if len(lines_to_check) == 0: + continue + + # Analyze line types: parameter definitions vs continuation lines + # A parameter line typically has format: "name: description" or "name (type): description" + # For Raises sections, also match Sphinx roles: ":class:`ExceptionName`: description" + # Also match incomplete Sphinx roles: ":class:ExceptionName: description" + param_line_pattern = r"^\s*(\w+(\s*\([^)]+\))?|:[a-z_:]+:`[^`]+`|:[a-z_:]+:\w+)\s*:" + + # Section headers that should not be treated as parameter lines + section_headers = { + "Args:", + "Returns:", + "Yields:", + "Raises:", + "Attributes:", + "Note:", + "Notes:", + "Example:", + "Examples:", + "See Also:", + "Parameters:", + "Warning:", + "Warnings:", + } + + # For Args/Raises sections: distinguish param lines (4 spaces) from continuation lines (8 spaces) + # For Returns/Yields sections: all content lines should have 4 spaces (no param lines) + is_param_section = section_name in ["Args", "Raises"] + + has_incorrect_indent = False + lines_with_endings = section_content.splitlines(keepends=True) + fixed_lines = [] + + # Determine the range of lines to modify based on blank line position + if first_blank_idx == 0: + # Blank line immediately after header - modify lines after first blank until second blank + second_blank_idx = None + for i in range(1, len(lines)): + if not lines[i].strip(): + second_blank_idx = i + break + modify_start = 1 + modify_end = second_blank_idx if second_blank_idx is not None else len(lines) + elif first_blank_idx is not None: + # Blank line found later - modify lines before it + modify_start = 0 + modify_end = first_blank_idx + else: + # No blank line - modify all lines + modify_start = 0 + modify_end = len(lines) + + for i, original_line in enumerate(lines_with_endings): + if i < modify_start or i >= modify_end: + # Outside the range to modify - keep as-is + fixed_lines.append(original_line) + elif not original_line.strip(): + # Keep blank lines as-is + fixed_lines.append(original_line) + else: + # Non-empty line before first blank + stripped = original_line.lstrip() + current_indent = len(original_line) - len(stripped) + + # Check if this is a section header (should not be treated as parameter line) + is_section_header = stripped in section_headers + + if is_section_header: + # Section headers should be left as-is (don't modify indentation) + fixed_lines.append(original_line) + elif is_param_section: + # Args/Raises sections: check if this is a parameter definition line or continuation + is_param_line = re.match(param_line_pattern, original_line) + + if is_param_line: + # Parameter definition: should have 4 spaces + expected_indent = 4 + else: + # Continuation line: should have 8 spaces + expected_indent = 8 + + if current_indent != expected_indent: + has_incorrect_indent = True + fixed_line = " " * expected_indent + stripped + fixed_lines.append(fixed_line) + else: + fixed_lines.append(original_line) + else: + # Returns/Yields sections: all description lines should have 4 spaces + expected_indent = 4 + + if current_indent != expected_indent: + has_incorrect_indent = True + fixed_line = " " * expected_indent + stripped + fixed_lines.append(fixed_line) + else: + fixed_lines.append(original_line) + + if has_incorrect_indent: + fixed_section = "".join(fixed_lines) + + # Include section header to make the replacement unique + # This prevents replacing the wrong occurrence when multiple functions have similar content + section_header = section_name + ":\n" # e.g., "Returns:\n" + original_with_header = section_header + section_content + fixed_with_header = section_header + fixed_section + + issues.append( + DocstringIssue( + file_path=str(file_path), + line_number=line_number, + function_name=name, + issue_type="inconsistent_indentation", + severity="warning", + message=f"Inconsistent indentation in {section_name} section", + original=original_with_header, + suggested_fix=fixed_with_header, + ) + ) + + return issues + + def _check_section_spacing( + self, docstring: str, file_path: Path, line_number: int, name: str + ) -> List[DocstringIssue]: + """Check that there's exactly 1 blank line between sections. + + Google Style requires exactly 1 blank line between sections like Args, Returns, Raises, etc. + """ + issues = [] + + # Pattern to find section headers in Google Style + section_pattern = ( + r"(Args|Returns|Yields|Raises|Note|Notes|Example|Examples|See Also|Attributes|Parameters):[ \t]*\n" + ) + + # Find all section headers + sections = list(re.finditer(section_pattern, docstring, re.MULTILINE)) + + # Check spacing before the first section (if there's content before it) + if sections: + first_section = sections[0] + before_first = docstring[: first_section.start()] + + # Only check if there's actual content before the first section + # (not just the opening """ or blank lines) + if before_first.strip(): + # Count trailing blank lines before the section + lines = before_first.split("\n") + trailing_empty = 0 + for line in reversed(lines): + if line.strip(): + break + trailing_empty += 1 + + num_blank_lines = trailing_empty - 1 + + # Google Style requires exactly 1 blank line after summary and before first section + if num_blank_lines != 1: + # Find the last non-blank line + last_nonblank_idx = len(lines) - trailing_empty - 1 + + if last_nonblank_idx >= 0: + # Reconstruct: content lines + exactly 1 blank line + fixed_lines = lines[: last_nonblank_idx + 1] + ["", ""] + + original = "\n".join(lines) + fixed = "\n".join(fixed_lines) + + if original != fixed: + issues.append( + DocstringIssue( + file_path=str(file_path), + line_number=line_number, + function_name=name, + issue_type="section_spacing", + severity="warning", + message=f"Expected exactly 1 blank line before {first_section.group(1)} section (found {num_blank_lines})", + original=original, + suggested_fix=fixed, + ) + ) + + # Check spacing between consecutive sections + for i in range(len(sections) - 1): + current_section = sections[i] + next_section = sections[i + 1] + + # Get the text between the end of current section header and start of next section header + between_text = docstring[current_section.end() : next_section.start()] + + # Count blank lines at the end (between sections) + # Split by newline - when we have "content\n\n", split gives ['content', '', ''] + # The number of trailing empty strings minus 1 is the number of blank lines + lines = between_text.split("\n") + + # Count empty strings from the end + trailing_empty = 0 + for line in reversed(lines): + if line.strip(): + break + trailing_empty += 1 + + # Number of blank lines = trailing_empty - 1 + # (the last empty string is just after the final \n of the last content line) + num_blank_lines = trailing_empty - 1 + + # Google Style requires exactly 1 blank line between sections + if num_blank_lines != 1: + # Find the last non-blank line + last_nonblank_idx = len(lines) - trailing_empty - 1 + + if last_nonblank_idx >= 0: + # Reconstruct: content lines + exactly 1 blank line before next section + # The blank line is represented by adding an empty string, then the final empty string + # This gives us: content\n\n (one blank line) + fixed_lines = lines[: last_nonblank_idx + 1] + ["", ""] + + # Include section headers to make the replacement unique + # This prevents replacing the wrong occurrence when multiple functions have similar content + current_header = current_section.group(0) # e.g., "Returns:\n" + next_header = next_section.group(0) # e.g., "Raises:\n" + + original_with_headers = current_header + "\n".join(lines) + next_header + fixed_with_headers = current_header + "\n".join(fixed_lines) + next_header + + # Only report if there's an actual difference + if original_with_headers != fixed_with_headers: + issues.append( + DocstringIssue( + file_path=str(file_path), + line_number=line_number, + function_name=name, + issue_type="section_spacing", + severity="warning", + message=f"Expected exactly 1 blank line between {current_section.group(1)} and {next_section.group(1)} sections (found {num_blank_lines})", + original=original_with_headers, + suggested_fix=fixed_with_headers, + ) + ) + + return issues + + def _check_redundant_type_annotations( + self, + docstring: str, + file_path: Path, + line_number: int, + name: str, + node: Union[ast.FunctionDef, ast.AsyncFunctionDef], + ) -> List[DocstringIssue]: + """Check for redundant type annotations in docstrings when function has type hints.""" + issues: list[DocstringIssue] = [] + + # Get function arguments with type annotations + typed_params = set() + for arg in node.args.args: + if arg.annotation is not None: + typed_params.add(arg.arg) + # Also check kwonly args + for arg in node.args.kwonlyargs: + if arg.annotation is not None: + typed_params.add(arg.arg) + + if not typed_params: + # No type annotations in signature, nothing to check + return issues + + # Find Args sections + # Pattern matches Args/Parameters section and captures all content until next section or end + # Uses (?:\n|$) to match lines with or without trailing newlines (last line might not have \n) + section_pattern = r"(Args|Parameters):[ \t]*\n((?:(?!(?:Args|Returns|Yields|Raises|Example|Note):).*(?:\n|$))*)" + + for match in re.finditer(section_pattern, docstring, re.MULTILINE): + section_name = match.group(1) + section_content = match.group(2) + + # Pattern to match parameter lines with type annotations + # Format: " param_name (type): description" + # The type can contain nested brackets, commas, spaces, etc. + param_with_type_pattern = r"^(\s*)(\w+)\s*\(([^)]+(?:\([^)]*\))*[^)]*)\)\s*:\s*(.*)$" + + lines = section_content.splitlines(keepends=True) + fixed_lines = [] + has_changes = False + + for line in lines: + match_param = re.match(param_with_type_pattern, line) + if match_param: + _indent = match_param.group(1) # Original indent, will be normalized + param_name = match_param.group(2) + _type_annotation = match_param.group(3) # Extracted for detection, will be removed + description = match_param.group(4) + + # If this parameter has a type annotation in the function signature, + # remove it from the docstring + if param_name in typed_params: + has_changes = True + # Reconstruct line without type annotation + # Always use 4 spaces for parameter lines in Args sections + correct_indent = " " # 4 spaces + fixed_line = ( + f"{correct_indent}{param_name}: {description}\n" + if line.endswith("\n") + else f"{correct_indent}{param_name}: {description}" + ) + fixed_lines.append(fixed_line) + else: + # Keep the type annotation if parameter doesn't have one in signature + fixed_lines.append(line) + else: + # Not a parameter line (could be continuation or blank line) + fixed_lines.append(line) + + if has_changes: + fixed_section = "".join(fixed_lines) + + issues.append( + DocstringIssue( + file_path=str(file_path), + line_number=line_number, + function_name=name, + issue_type="redundant_type_annotation", + severity="warning", + message=f"Type annotations in {section_name} section are redundant when function has type hints", + original=section_content, + suggested_fix=fixed_section, + ) + ) + + return issues + + def _check_spelling_inconsistencies( + self, docstring: str, file_path: Path, line_number: int, name: str + ) -> List[DocstringIssue]: + """Check for spelling inconsistencies.""" + issues: list[DocstringIssue] = [] + + # Note: AuthorisationError uses British spelling intentionally + # (that's the actual class name in encord.exceptions) + + return issues + + def _check_returns_section( + self, docstring: str, file_path: Path, line_number: int, name: str + ) -> List[DocstringIssue]: + """Check Returns section for missing cross-references.""" + issues: list[DocstringIssue] = [] + + # Find Returns section + returns_match = re.search(r"Returns?:\s*\n((?:[ \t]+.+\n)+)", docstring, re.MULTILINE) + if not returns_match: + return issues + + returns_section = returns_match.group(1) + + # Check if SDK classes are mentioned without :class: role + for pattern in self.SDK_CLASS_PATTERNS: + for match in re.finditer(pattern, returns_section): + class_name = match.group(0) + start = match.start() + + # Check if it's already in a Sphinx role (:class:, :meth:, etc.) + # Look back far enough to catch the full path + prefix = returns_section[max(0, start - 100) : start] + + # Check if we're inside a Sphinx role by finding the last backtick before our position + last_backtick = prefix.rfind("`") + if last_backtick != -1: + # Check if there's a role opener before that backtick + text_before_backtick = prefix[:last_backtick] + if ( + ":class:" in text_before_backtick + or ":meth:" in text_before_backtick + or ":func:" in text_before_backtick + or ":exc:" in text_before_backtick + or ":attr:" in text_before_backtick + or ":mod:" in text_before_backtick + ): + continue + + # Use the mapping to get the correct module path, or fall back to naive approach + if class_name in self.CLASS_TO_MODULE: + suggested_path = self.CLASS_TO_MODULE[class_name] + else: + # Fallback: assume module name is class name in lowercase + suggested_path = f"{self.sdk_package_name}.{class_name.lower()}.{class_name}" + + issues.append( + DocstringIssue( + file_path=str(file_path), + line_number=line_number, + function_name=name, + issue_type="returns_missing_crossref", + severity="info", + message=f'Class "{class_name}" in Returns section should use :class: role', + original=class_name, + suggested_fix=f":class:`~{suggested_path}`", + ) + ) + + return issues + + def _apply_fixes(self, file_path: Path, content: str, issues: List[DocstringIssue]) -> bool: + """Apply automatic fixes to a file. + + Args: + file_path: Path to the file. + content: Current file content. + issues: List of issues to fix. + + Returns: + True if file was modified, False otherwise. + """ + modified = False + + # Group issues by what can be auto-fixed + fixable_issues = [ + i for i in issues if i.original and i.suggested_fix and i.severity in ("error", "warning", "info") + ] + + if not fixable_issues: + return False + + # Filter out invalid_section_header fixes if there's a markdown_list_formatting fix for the same function + # (markdown_list_formatting will handle the full conversion including the header) + markdown_formatting_functions = { + i.function_name for i in fixable_issues if i.issue_type == "markdown_list_formatting" + } + fixable_issues = [ + i + for i in fixable_issues + if not (i.issue_type == "invalid_section_header" and i.function_name in markdown_formatting_functions) + ] + + # Merge fixes that have the same original text + # This handles cases where both indentation and spacing fixes apply to the same content + merged_issues = [] + processed = set() + + for i, issue in enumerate(fixable_issues): + if i in processed: + continue + + # Look for other issues with the same original text + conflicts = [] + for j, other in enumerate(fixable_issues[i + 1 :], start=i + 1): + if other.original == issue.original and other.function_name == issue.function_name: + conflicts.append((j, other)) + + if conflicts: + # Merge the fixes: combine indentation and spacing fixes intelligently + if issue.suggested_fix is None: + # Should not happen for fixable_issues, but handle for type safety + merged_issues.append(issue) + processed.add(i) + continue + + combined_fix: str = issue.suggested_fix + + for idx, (j, conflict) in enumerate(conflicts): + # If the conflict is section_spacing, it needs blank lines adjusted (added or removed) + if conflict.issue_type == "section_spacing": + # section_spacing adjusts blank lines at the end + # Count how many newlines are at the end of each + def count_trailing_newlines(s: str | None) -> int: + if s is None: + return 0 + count = 0 + for c in reversed(s): + if c == "\n": + count += 1 + else: + break + return count + + if conflict.suggested_fix is not None: + # Count trailing newlines to adjust combined fix + _orig_newlines = count_trailing_newlines(conflict.original) # For reference only + fix_newlines = count_trailing_newlines(conflict.suggested_fix) + + # Adjust the combined fix to have the correct number of trailing newlines + # Remove existing trailing newlines and add the correct amount + combined_fix = combined_fix.rstrip("\n") + "\n" * fix_newlines + + processed.add(j) + + # Create a merged issue + merged_issue = DocstringIssue( + file_path=issue.file_path, + line_number=issue.line_number, + function_name=issue.function_name, + issue_type=f"{issue.issue_type}+{'+'.join(c.issue_type for _, c in conflicts)}", + severity=issue.severity, + message=f"{issue.message} (combined with {len(conflicts)} other fixes)", + original=issue.original, + suggested_fix=combined_fix, + ) + merged_issues.append(merged_issue) + processed.add(i) + else: + merged_issues.append(issue) + processed.add(i) + + fixable_issues = merged_issues + + # Sort fixes to apply markdown_list_formatting FIRST + # This prevents other fixes from changing headers before we can convert the markdown lists + def fix_priority(issue): + if issue.issue_type == "markdown_list_formatting": + return 0 # Highest priority + else: + return 1 # Everything else + + fixable_issues.sort(key=fix_priority) + + # Apply fixes + for issue in fixable_issues: + # Check if issue type is fixable (handle merged types like "inconsistent_indentation+section_spacing") + fixable_types = { + "invalid_section_header", + "spelling_inconsistency", + "deprecated_section_header", + "markdown_list_formatting", + "inconsistent_indentation", + "section_spacing", + "redundant_type_annotation", + "malformed_class_reference", + "malformed_method_reference", + "returns_missing_crossref", + "incomplete_class_reference", + } + is_fixable = any(ftype in issue.issue_type for ftype in fixable_types) + + if not is_fixable or issue.original is None or issue.suggested_fix is None: + continue + + old_content = content + original_text: str = issue.original + suggested_text: str = issue.suggested_fix + + # Handle different types of replacements: + # 1. Line-based formatting (needs indentation handling) + # 2. Inline text within docstrings (needs docstring boundary checking) + # 3. Simple replacements (section headers, etc.) + + needs_indentation_handling = any( + ftype in issue.issue_type + for ftype in [ + "markdown_list_formatting", + "inconsistent_indentation", + "section_spacing", + "redundant_type_annotation", + ] + ) + + needs_docstring_only = any( + ftype in issue.issue_type + for ftype in [ + "malformed_class_reference", + "malformed_method_reference", + "returns_missing_crossref", + "incomplete_class_reference", + ] + ) + + if needs_indentation_handling: + # For line-based formatting issues, we need to handle indentation + content = self._replace_with_indentation(content, original_text, suggested_text) + elif needs_docstring_only: + # For inline text, only replace within docstrings + content = self._replace_in_docstrings(content, original_text, suggested_text) + else: + # For section headers and other simple replacements + content = content.replace(original_text, suggested_text) + + if content != old_content: + modified = True + + # Write back if modified + if modified: + file_path.write_text(content, encoding="utf-8") + print(f" ✓ Fixed {len(fixable_issues)} issues in {file_path.name}") + + return modified + + def _is_within_docstring(self, content: str, pos: int) -> bool: + """Check if a position in content is within a docstring (between triple quotes).""" + # Count triple quotes before this position + before = content[:pos] + # Count both ''' and """ as docstring delimiters + triple_double = before.count('"""') + triple_single = before.count("'''") + + # If odd number of triple quotes before, we're inside a docstring + # We check both types independently + in_double = (triple_double % 2) == 1 + in_single = (triple_single % 2) == 1 + + return in_double or in_single + + def _is_within_sphinx_role(self, content: str, pos: int) -> bool: + """Check if a position is within a Sphinx role like :class:`...`.""" + # Look back up to 200 characters to find if we're inside a role + prefix = content[max(0, pos - 200) : pos] + + # Find the last backtick before our position + last_backtick = prefix.rfind("`") + if last_backtick == -1: + return False + + # Check if there's a role opener (:class:, :meth:, etc.) before that backtick + text_before_backtick = prefix[:last_backtick] + sphinx_roles = [":class:", ":meth:", ":func:", ":exc:", ":attr:", ":mod:", ":data:", ":const:"] + for role in sphinx_roles: + if role in text_before_backtick: + # Make sure there's not a closing backtick between the role and our position + text_after_role = prefix[text_before_backtick.rfind(role) :] + if text_after_role.count("`") % 2 == 1: + # Odd number of backticks means we're inside the role + return True + return False + + def _replace_in_docstrings(self, content: str, original: str, replacement: str) -> str: + """Replace text only within docstrings (between triple quotes) and not within Sphinx roles. + + This is for inline replacements like class names that appear in the middle of lines. + Unlike _replace_with_indentation, this works for text anywhere in a docstring. + + Args: + content: File content with docstrings. + original: Text to find and replace. + replacement: Replacement text. + + Returns: + Modified content with replacements only in docstrings and not in Sphinx roles. + """ + result = [] + pos = 0 + + while True: + # Find next occurrence + found_pos = content.find(original, pos) + if found_pos == -1: + # No more occurrences, append rest of content + result.append(content[pos:]) + break + + # Check if this occurrence is within a docstring and not within a Sphinx role + if self._is_within_docstring(content, found_pos) and not self._is_within_sphinx_role(content, found_pos): + # Within docstring and not in a role - replace it + result.append(content[pos:found_pos]) + result.append(replacement) + pos = found_pos + len(original) + else: + # Not in docstring or already in a role - keep original + result.append(content[pos : found_pos + len(original)]) + pos = found_pos + len(original) + + return "".join(result) + + def _replace_with_indentation(self, content: str, original: str, replacement: str) -> str: + """Replace text in content, handling different indentation levels. + + The original text is from a dedented docstring, but the file content + has indented docstrings. This method finds the indented version and + replaces it with the properly indented replacement. + + Args: + content: File content with indented docstrings. + original: Dedented text to find (from AST). + replacement: Dedented replacement text. + + Returns: + Modified content with replacement applied. + """ + # Split original into lines to understand structure + orig_lines = original.splitlines(keepends=True) + if not orig_lines: + return content + + # If last line doesn't have \n, we need to handle both cases + # (with and without trailing \n in the file) + variants = [original] + if not original.endswith("\n"): + # Try version with trailing newline too + variants.append(original + "\n") + elif original.endswith("\n"): + # Try version without trailing newline too + variants.append(original.rstrip("\n")) + + # Try to find the original text with various indentation levels (0-16 spaces) + for i, variant in enumerate(variants): + variant_lines = variant.splitlines(keepends=True) + + for indent_level in range(0, 17, 4): + indent = " " * indent_level + + # Create indented version of original + indented_original = "".join(indent + line if line.strip() else line for line in variant_lines) + + # Find all occurrences and check if they're in docstrings + pos = 0 + while True: + pos = content.find(indented_original, pos) + if pos == -1: + break + + # Check if this occurrence is within a docstring + if self._is_within_docstring(content, pos): + # Found it! Now create indented replacement + repl_lines = replacement.splitlines(keepends=True) + indented_replacement = "".join(indent + line if line.strip() else line for line in repl_lines) + + # Match the trailing newline behavior of what we found + if not variant.endswith("\n") and indented_replacement.endswith("\n"): + indented_replacement = indented_replacement.rstrip("\n") + + # Do the replacement + return content[:pos] + indented_replacement + content[pos + len(indented_original) :] + + pos += 1 + + # Try mixed indentation: first line unindented (for """text format), rest indented + # This handles docstrings that start with """SomeText on the same line + for indent_level in range(4, 17, 4): + indent = " " * indent_level + + # First line no indent, rest with indent + indented_lines = [] + for idx, line in enumerate(variant_lines): + if idx == 0: + # First line: no indentation + indented_lines.append(line) + elif line.strip(): + # Other non-blank lines: add indentation + indented_lines.append(indent + line) + else: + # Blank lines: keep as-is + indented_lines.append(line) + + indented_original = "".join(indented_lines) + + # Find all occurrences and check if they're in docstrings + pos = 0 + while True: + pos = content.find(indented_original, pos) + if pos == -1: + break + + # Check if this occurrence is within a docstring + if self._is_within_docstring(content, pos): + # Found it! Now create indented replacement with same pattern + repl_lines = replacement.splitlines(keepends=True) + indented_repl_lines = [] + for idx, line in enumerate(repl_lines): + if idx == 0: + indented_repl_lines.append(line) + elif line.strip(): + indented_repl_lines.append(indent + line) + else: + indented_repl_lines.append(line) + + indented_replacement = "".join(indented_repl_lines) + + # Match the trailing newline behavior + if not variant.endswith("\n") and indented_replacement.endswith("\n"): + indented_replacement = indented_replacement.rstrip("\n") + + return content[:pos] + indented_replacement + content[pos + len(indented_original) :] + + pos += 1 + + # Fallback: try simple replacement (for non-indented cases) but only within docstrings + pos = content.find(original) + if pos != -1 and self._is_within_docstring(content, pos): + return content[:pos] + replacement + content[pos + len(original) :] + + return content + + def lint_directory(self, directory: Path, fix: bool = False) -> LintResult: + """Lint all Python files in a directory recursively. + + Args: + directory: Directory to lint. + fix: If True, apply fixes to files. + + Returns: + Combined LintResult for all files. + """ + combined_result = LintResult() + + python_files = list(directory.rglob("*.py")) + print(f"Scanning {len(python_files)} Python files in {directory}...\n") + + for py_file in python_files: + # Skip __pycache__ and similar + if "__pycache__" in str(py_file) or ".pytest_cache" in str(py_file): + continue + + result = self.lint_file(py_file, fix=fix) + combined_result.files_checked += result.files_checked + combined_result.files_modified += result.files_modified + combined_result.issues.extend(result.issues) + + return combined_result + + +def print_report(result: LintResult, verbose: bool = False): + """Print a human-readable report of linting results. + + Args: + result: LintResult to report on. + verbose: If True, show all issues. If False, show summary only. + """ + stats = result.get_stats() + + print("\n" + "=" * 70) + print("DOCSTRING LINTING REPORT") + print("=" * 70) + + print(f"\nFiles checked: {result.files_checked}") + if result.files_modified > 0: + print(f"Files modified: {result.files_modified}") + + print(f"\nTotal issues: {stats['total']}") + print(f" Errors: {stats['errors']}") + print(f" Warnings: {stats['warnings']}") + print(f" Info: {stats['info']}") + + if stats["total"] == 0: + print("\n✓ No issues found!") + return + + # Show breakdown by type + print("\nIssues by type:") + issue_types = {k: v for k, v in stats.items() if k not in ("total", "errors", "warnings", "info")} + for issue_type, count in sorted(issue_types.items(), key=lambda x: -x[1]): + print(f" {issue_type}: {count}") + + # Show detailed issues if verbose + if verbose: + print("\n" + "-" * 70) + print("DETAILED ISSUES") + print("-" * 70) + + # Group by issue type, then by file + issues_by_type: dict[str, list[DocstringIssue]] = {} + for issue in result.issues: + issues_by_type.setdefault(issue.issue_type, []).append(issue) + + # Sort by issue type (descending by count) + for issue_type in sorted(issues_by_type.keys(), key=lambda t: len(issues_by_type[t]), reverse=True): + issues = issues_by_type[issue_type] + print(f"\n{issue_type} ({len(issues)}):") + print("-" * 70) + + # Group by file within each issue type + issues_by_file: dict[str, list[DocstringIssue]] = {} + for issue in issues: + issues_by_file.setdefault(issue.file_path, []).append(issue) + + for file_path, file_issues in sorted(issues_by_file.items()): + for issue in sorted(file_issues, key=lambda x: x.line_number): + severity_marker = {"error": "✗", "warning": "⚠", "info": "ℹ"}.get(issue.severity, "•") + + # Use clickable format: file:line: message + print(f"{file_path}:{issue.line_number}: {severity_marker} [{issue.function_name}] {issue.message}") + if issue.suggested_fix and issue.original: + print(f' "{issue.original}" → "{issue.suggested_fix}"') + + +def save_json_report(result: LintResult, output_path: Path): + """Save linting results as JSON. + + Args: + result: LintResult to save. + output_path: Path to output JSON file. + """ + data = { + "files_checked": result.files_checked, + "files_modified": result.files_modified, + "statistics": result.get_stats(), + "issues": [ + { + "file": issue.file_path, + "line": issue.line_number, + "function": issue.function_name, + "type": issue.issue_type, + "severity": issue.severity, + "message": issue.message, + "original": issue.original, + "suggested_fix": issue.suggested_fix, + } + for issue in result.issues + ], + } + + output_path.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8") + print(f"\nJSON report saved to: {output_path}") + + +def main(): + parser = argparse.ArgumentParser( + description="Lint Python docstrings for Encord SDK documentation generation.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("--sdk-path", type=Path, help="Path to SDK source directory") + group.add_argument("--files", nargs="+", type=Path, help="Specific files to check") + + parser.add_argument("--check", action="store_true", help="Check only (no modifications)") + parser.add_argument("--fix", action="store_true", help="Automatically fix issues where possible") + parser.add_argument("--report", type=Path, help="Save JSON report to file") + parser.add_argument("--verbose", "-v", action="store_true", help="Show detailed issues") + parser.add_argument("--config", type=Path, help="Config file (JSON)") + + args = parser.parse_args() + + # Load config if provided + config = {} + if args.config and args.config.exists(): + config = json.loads(args.config.read_text(encoding="utf-8")) + + # Default to check mode if neither specified + if not args.check and not args.fix: + args.check = True + + # Create linter + linter = DocstringLinter(config) + + # Run linting + if args.sdk_path: + if not args.sdk_path.exists(): + print(f"Error: SDK path not found: {args.sdk_path}") + sys.exit(1) + result = linter.lint_directory(args.sdk_path, fix=args.fix) + else: + result = LintResult() + for file_path in args.files: + if not file_path.exists(): + print(f"Error: File not found: {file_path}") + continue + file_result = linter.lint_file(file_path, fix=args.fix) + result.files_checked += file_result.files_checked + result.files_modified += file_result.files_modified + result.issues.extend(file_result.issues) + + # Print report + print_report(result, verbose=args.verbose) + + # Save JSON report if requested + if args.report: + save_json_report(result, args.report) + + # Exit with error code if issues found + stats = result.get_stats() + if stats["errors"] > 0: + sys.exit(1) + elif stats["warnings"] > 0: + sys.exit(0 if args.fix else 1) + else: + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/tools/docstring_linter_config.json b/tools/docstring_linter_config.json new file mode 100644 index 000000000..92115b30c --- /dev/null +++ b/tools/docstring_linter_config.json @@ -0,0 +1,51 @@ +{ + "sdk_package_name": "encord", + "description": "Configuration for Encord SDK docstring linter", + + "sdk_classes": [ + "LabelRowV2", + "Dataset", + "Project", + "OntologyStructure", + "Workflow", + "StorageItem", + "StorageFolder", + "Collection", + "Ontology", + "ProjectUser", + "DataRow", + "ObjectInstance", + "ClassificationInstance", + "EncordUserClient", + "StorageItemType", + "AnnotationTaskStatus", + "LabelStatus", + "ProjectType", + "WorkflowGraphNode", + "Bundle" + ], + + "exceptions": [ + "AuthorizationError", + "AuthenticationError", + "ResourceNotFoundError", + "UnknownError", + "OperationNotAllowed", + "LabelRowError", + "WrongProjectTypeError" + ], + + "auto_fix_rules": { + "invalid_section_headers": true, + "spelling_inconsistencies": true, + "unlinked_exceptions": false, + "unlinked_classes": false + }, + + "ignore_patterns": [ + "*/tests/*", + "*/test_*.py", + "*/__pycache__/*", + "*/.*" + ] +}