diff --git a/.env.rag.example b/.env.rag.example index bb28594..a037320 100644 --- a/.env.rag.example +++ b/.env.rag.example @@ -66,3 +66,8 @@ S3_ACCOUNT1_SCHEDULES= #SERPAPI_KEY=your-serpapi-api-key #SERPAPI_QUERIES="OpenAI news, Bitcoin price, Tesla updates" #SERPAPI_SCHEDULES=60 + +# DROPBOX CONNECTORS (optional): + +#DROPBOX1_ACCESS_TOKEN=sl.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +#DROPBOX1_SCHEDULES=3600 diff --git a/README.md b/README.md index f297b0e..b0d86c0 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ interact with your knowledge with ease! * S3 (any AWS compatible Object Storage including AWS, Contabo, B2, Cloudflare R2, OVH, etc) * MediaWiki (all versions supported, both private and public wiki) * SerpAPI +* Dropbox (repository files and folders with flexible path, extension, and directory filters) ### 🌐 Extra connectors @@ -52,7 +53,6 @@ Over 100 extra connectors are available at request, including the most popular o * Notion * Microsoft Teams * Microsoft Office 365 -* Dropbox * Trello * YouTube * FTP @@ -228,6 +228,40 @@ SERPAPI1_QUERIES=aaa SERPAPI1_SCHEDULES=3600 ```` +### Dropbox Connector + +The Dropbox connector ingests files from Dropbox using the official Dropbox Python SDK. +Supports ingesting from specific paths or the entire account root, with optional extension and directory name filters. +Requires a [Dropbox access token](https://www.dropbox.com/developers/apps) with `files.content.read` scope. + +```yaml +# config.yaml + +sources: + - type: "dropbox" + name: "dropbox1" + config: + access_token: "${DROPBOX1_ACCESS_TOKEN}" + # Paths to ingest (optional). If omitted, ingests everything from root recursively. + paths: + - "/Documents/Engineering" + - "/Shared/Wiki" + # Extension filters (mutually exclusive, optional): + #include_extensions: "md,docx,pdf" # only these extensions + #exclude_extensions: "png,jpg,gif" # all except these + # Directory name filters (mutually exclusive, optional): + #include_directories: "source,docs" # only these folder names + #exclude_directories: "archive,tmp" # all except these folder names + schedules: "${DROPBOX1_SCHEDULES}" +``` + +```dotenv +# .env.rag + +DROPBOX1_ACCESS_TOKEN=sl.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +DROPBOX1_SCHEDULES=3600 +``` + ## Embeddings and Inference ### Embeddings support diff --git a/config.yaml.example b/config.yaml.example index cec4fac..a83da37 100644 --- a/config.yaml.example +++ b/config.yaml.example @@ -40,6 +40,24 @@ sources: # queries: "${SERPAPI_QUERIES}" # schedules: "${SERPAPI_SCHEDULES}" + # DROPBOX CONNECTORS (optional): + + #- type: "dropbox" + # name: "dropbox1" + # config: + # access_token: "${DROPBOX1_ACCESS_TOKEN}" + # # Paths to ingest (optional). If omitted, ingests everything from root recursively. + # #paths: + # # - "/Documents/Engineering" + # # - "/Shared/Wiki" + # # Extension filters (mutually exclusive, optional): + # #include_extensions: "md,docx,pdf" # only these extensions + # #exclude_extensions: "png,jpg,gif" # all except these + # # Directory name filters (mutually exclusive, optional): + # #include_directories: "source,docs" # only these folder names + # #exclude_directories: "archive,tmp" # all except these folder names + # schedules: "${DROPBOX1_SCHEDULES}" + embedding: # can be `local` or `openrouter`/`openai` provider: local