diff --git a/README.md b/README.md index f297b0e..8672cbd 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,8 @@ interact with your knowledge with ease! * S3 (any AWS compatible Object Storage including AWS, Contabo, B2, Cloudflare R2, OVH, etc) * MediaWiki (all versions supported, both private and public wiki) * SerpAPI +* GitHub (repository files and issues, PAT or GitHub App auth) +* GitLab (repository files and issues, supports GitLab.com and self-hosted) ### 🌐 Extra connectors @@ -47,8 +49,6 @@ Over 100 extra connectors are available at request, including the most popular o * Google Drive * Jira * Slack -* GitHub -* Gitlab * Notion * Microsoft Teams * Microsoft Office 365 @@ -228,6 +228,92 @@ SERPAPI1_QUERIES=aaa SERPAPI1_SCHEDULES=3600 ```` +### GitHub Connector + +The GitHub connector ingests repository files and optionally issues from a GitHub repository. +Supports PAT and GitHub App authentication, branch or commit targeting, file extension/directory +filters, and issue label filters. + +```yaml +# config.yaml + +sources: + - type: "github" + name: "github1" + config: + # Auth — use one of: personal_token OR github_app_* credentials + personal_token: "${GITHUB1_PERSONAL_TOKEN}" + owner: "${GITHUB1_OWNER}" # repository owner / org + repo: "${GITHUB1_REPO}" # repository name + branch: "main" # default "main" (mutually exclusive with commit_sha) + include_extensions: "md,py" # optional, comma-separated + include_issues: false # set true to also ingest issues + concurrent_requests: 5 # optional, default 5 + schedules: "${GITHUB1_SCHEDULES}" +``` + +```dotenv +# .env.rag + +GITHUB1_PERSONAL_TOKEN=ghp_xxxxxxxxxxxx +GITHUB1_OWNER=your-org-or-username +GITHUB1_REPO=your-repo-name +GITHUB1_SCHEDULES=3600 +``` + +For GitHub App authentication, replace `personal_token` with: + +```yaml + github_app_id: "${GITHUB1_APP_ID}" + github_app_installation_id: "${GITHUB1_APP_INSTALLATION_ID}" + github_app_private_key: "${GITHUB1_APP_PRIVATE_KEY}" +``` + +### GitLab Connector + +The GitLab connector ingests repository files and optionally issues from a GitLab project or group. +Supports GitLab.com and self-hosted instances via a Personal Access Token with `read_api` scope. + +```yaml +# config.yaml + +sources: + - type: "gitlab" + name: "gitlab1" + config: + gitlab_url: "${GITLAB1_URL}" + personal_token: "${GITLAB1_TOKEN}" + project_id: 12345678 # integer project ID (required unless group_id only) + #group_id: 999 # optional, for group-level issue queries + ref: "main" # optional, branch/tag/commit, default "main" + #path: "docs" # optional, limit to sub-directory + #file_path: "README.md" # optional, single file only + recursive: true # optional, default true + include_issues: false # set true to also ingest issues + #issues_state: "opened" # optional: opened/closed/all, default "opened" + #issues_labels: "bug,docs" # optional, comma-separated + #issues_assignee: "username" # optional + #issues_author: "username" # optional + #issues_milestone: "v1.0" # optional + #issues_search: "keyword" # optional + #issues_get_all: false # optional, fetch all pages, default false + #issues_scope: "created_by_me" # optional: created_by_me/assigned_to_me/all + #issues_type: "issue" # optional: issue/incident/test_case/task + #issues_confidential: false # optional + #issues_iids: [1, 2, 3] # optional, filter by specific issue IDs + #issues_created_after: "2024-01-01T00:00:00Z" + #issues_created_before: "2024-12-31T23:59:59Z" + schedules: "${GITLAB1_SCHEDULES}" +``` + +```dotenv +# .env.rag + +GITLAB1_URL=https://gitlab.com +GITLAB1_TOKEN=glpat-xxxxxxxxxxxxxxxxxxxx +GITLAB1_SCHEDULES=3600 +``` + ## Embeddings and Inference ### Embeddings support diff --git a/config.yaml.example b/config.yaml.example index cec4fac..2c5af3f 100644 --- a/config.yaml.example +++ b/config.yaml.example @@ -40,6 +40,62 @@ sources: # queries: "${SERPAPI_QUERIES}" # schedules: "${SERPAPI_SCHEDULES}" + # GITHUB CONNECTORS (optional): + + #- type: "github" + # name: "github1" + # config: + # # Auth — use one of: personal_token OR github_app_id + github_app_installation_id + github_app_private_key + # personal_token: "${GITHUB1_PERSONAL_TOKEN}" + # #github_app_id: "${GITHUB1_APP_ID}" + # #github_app_installation_id: "${GITHUB1_APP_INSTALLATION_ID}" + # #github_app_private_key: "${GITHUB1_APP_PRIVATE_KEY}" + # owner: "${GITHUB1_OWNER}" + # repo: "${GITHUB1_REPO}" + # branch: "main" # optional, default "main" (mutually exclusive with commit_sha) + # #commit_sha: "" # optional (mutually exclusive with branch) + # include_extensions: "md,py" # optional, comma-separated (mutually exclusive with exclude_extensions) + # #exclude_extensions: "" # optional (mutually exclusive with include_extensions) + # #include_directories: "" # optional, comma-separated (mutually exclusive with exclude_directories) + # #exclude_directories: "" # optional (mutually exclusive with include_directories) + # include_issues: false # optional, default false + # #include_issues_labels: "" # optional, comma-separated (mutually exclusive with exclude_issues_labels) + # #exclude_issues_labels: "" # optional (mutually exclusive with include_issues_labels) + # concurrent_requests: 5 # optional, default 5 + # schedules: "${GITHUB1_SCHEDULES}" + + # GITLAB CONNECTORS (optional): + + #- type: "gitlab" + # name: "gitlab1" + # config: + # gitlab_url: "${GITLAB1_URL}" # e.g. https://gitlab.com + # personal_token: "${GITLAB1_TOKEN}" + # project_id: 12345678 # integer project ID (required unless group_id only) + # #group_id: 999 # optional, for group-level issue queries + # ref: "main" # optional, branch/tag/commit, default "main" + # #path: "docs" # optional, limit to sub-directory + # #file_path: "README.md" # optional, single file only + # recursive: true # optional, default true + # include_issues: false # optional, default false + # #issues_state: "opened" # optional: opened/closed/all, default "opened" + # #issues_labels: "bug,docs" # optional, comma-separated + # #issues_assignee: "username" # optional + # #issues_author: "username" # optional + # #issues_milestone: "v1.0" # optional + # #issues_search: "keyword" # optional + # #issues_get_all: false # optional, fetch all pages, default false + # #issues_scope: "created_by_me" # optional: created_by_me/assigned_to_me/all + # #issues_type: "issue" # optional: issue/incident/test_case/task + # #issues_confidential: false # optional + # #issues_non_archived: true # optional + # #issues_iids: [1, 2, 3] # optional, filter by specific issue IDs + # #issues_created_after: "2024-01-01T00:00:00Z" # optional, ISO-8601 + # #issues_created_before: "2024-12-31T23:59:59Z" # optional, ISO-8601 + # #issues_updated_after: "2024-01-01T00:00:00Z" # optional, ISO-8601 + # #issues_updated_before: "2024-12-31T23:59:59Z" # optional, ISO-8601 + # schedules: "${GITLAB1_SCHEDULES}" + embedding: # can be `local` or `openrouter`/`openai` provider: local