diff --git a/packages/graphrag/graphrag/config/models/graph_rag_config.py b/packages/graphrag/graphrag/config/models/graph_rag_config.py index 1b753d58fc..eb32b3e5a7 100644 --- a/packages/graphrag/graphrag/config/models/graph_rag_config.py +++ b/packages/graphrag/graphrag/config/models/graph_rag_config.py @@ -270,7 +270,7 @@ def _validate_vector_store_db_uri(self) -> None: """Validate the vector store configuration.""" store = self.vector_store if store.type == VectorStoreType.LanceDB: - if not store.db_uri or store.db_uri.strip == "": + if not store.db_uri or store.db_uri.strip() == "": store.db_uri = graphrag_config_defaults.vector_store.db_uri store.db_uri = str(Path(store.db_uri).resolve()) diff --git a/packages/graphrag/graphrag/query/indexer_adapters.py b/packages/graphrag/graphrag/query/indexer_adapters.py index 7119ad842c..4db0de2e78 100644 --- a/packages/graphrag/graphrag/query/indexer_adapters.py +++ b/packages/graphrag/graphrag/query/indexer_adapters.py @@ -219,7 +219,16 @@ def embed_community_reports( def _filter_under_community_level( df: pd.DataFrame, community_level: int ) -> pd.DataFrame: + nan_count = df.level.isna().sum() + if nan_count > 0: + orphan_pct = nan_count / len(df) * 100 + if orphan_pct > 10: + logger.warning( + "%.0f%% of entities have no community assignment. " + "Consider checking your community detection settings.", + orphan_pct, + ) return cast( "pd.DataFrame", - df[df.level <= community_level], + df[(df.level <= community_level) | df.level.isna()], )