Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions graphrag_sdk/src/graphrag_sdk/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@
)
from graphrag_sdk.core.providers import Embedder, LLMInterface
from graphrag_sdk.ingestion.chunking_strategies.base import ChunkingStrategy
from graphrag_sdk.ingestion.chunking_strategies.fixed_size import FixedSizeChunking
from graphrag_sdk.ingestion.chunking_strategies.sentence_token_cap import (
SentenceTokenCapChunking,
)
from graphrag_sdk.ingestion.extraction_strategies.base import ExtractionStrategy
from graphrag_sdk.ingestion.extraction_strategies.graph_extraction import GraphExtraction
from graphrag_sdk.ingestion.loaders.base import LoaderStrategy
Expand Down Expand Up @@ -320,7 +322,10 @@ async def ingest(

Uses sensible defaults for any unspecified strategy:
- Loader: auto-detected from file extension (PDF or text)
- Chunker: FixedSizeChunking(chunk_size=1000)
- Chunker: SentenceTokenCapChunking(max_tokens=512, overlap_sentences=2)
— sentence-aware, never splits entity names at chunk boundaries.
Override with ``chunker=FixedSizeChunking(...)`` if you need
character-window chunking.
Comment on lines +326 to +328
- Extractor: GraphExtraction with configured LLM
- Resolver: ExactMatchResolution

Expand Down Expand Up @@ -529,7 +534,7 @@ async def _ingest_single(

pipeline = IngestionPipeline(
loader=loader or TextLoader(),
chunker=chunker or FixedSizeChunking(),
chunker=chunker or SentenceTokenCapChunking(),
extractor=extractor or self._default_extractor(),
Comment on lines 535 to 538
resolver=resolver or ExactMatchResolution(),
graph_store=self._graph_store,
Expand Down Expand Up @@ -1010,7 +1015,7 @@ async def update(

pipeline = IngestionPipeline(
loader=loader or TextLoader(), # unused (text is provided below)
chunker=chunker or FixedSizeChunking(),
chunker=chunker or SentenceTokenCapChunking(),
extractor=extractor or self._default_extractor(),
resolver=resolver or ExactMatchResolution(),
graph_store=self._graph_store,
Expand Down Expand Up @@ -1271,7 +1276,7 @@ async def apply_changes(
to ``ingest()`` and ``update()``). Defaults to per-extension
auto-selection. ``deleted`` ignores this.
chunker: Override the chunking strategy for ``added``/``modified``.
Defaults to ``FixedSizeChunking``. ``deleted`` ignores this.
Defaults to ``SentenceTokenCapChunking``. ``deleted`` ignores this.
extractor: Override the entity-extraction strategy for
``added``/``modified``. ``deleted`` ignores this.
resolver: Override the resolution strategy for ``added``/
Expand Down
Loading