From 0197002d811c75e04fd2c3b3b17390790245d1b2 Mon Sep 17 00:00:00 2001 From: Ty Hob Date: Mon, 20 Oct 2025 14:27:09 -0400 Subject: [PATCH 01/13] feat!: Upgrade Vector to 0.50.0, fix migrations Previously installing a clean Aspects with Vector set as the xAPI database migrations would fail due to ASPECTS_XAPI_DATABASE not being the Ralph database. This upgrade fixes the migrations by adding an explicit Ralph database variable allowing both databases to be created independantly as designed. --- .../patches/local-docker-compose-services | 3 +- tutoraspects/plugin.py | 15 ++++--- .../alembic/versions/0002_raw_xapi_table.py | 15 ++++--- .../alembic/versions/0029_drop_json_column.py | 40 +++++++++++-------- ...0032_partition_tables_by_year_and_month.py | 30 +++++++++----- .../templates/aspects/apps/vector/local.toml | 2 +- .../apps/vector/partials/common-post.toml | 3 +- .../jobs/init/clickhouse/init-clickhouse.sh | 10 ++--- 8 files changed, 70 insertions(+), 48 deletions(-) diff --git a/tutoraspects/patches/local-docker-compose-services b/tutoraspects/patches/local-docker-compose-services index 54b4f27d2..371c0abe2 100644 --- a/tutoraspects/patches/local-docker-compose-services +++ b/tutoraspects/patches/local-docker-compose-services @@ -88,12 +88,13 @@ superset-worker-beat: {% if RUN_VECTOR %} vector: image: {{ DOCKER_IMAGE_VECTOR }} + command: -c /etc/vector/vector.toml volumes: - ../../data/vector:/var/lib/vector - ../plugins/aspects/apps/vector/local.toml:/etc/vector/vector.toml:ro {% if ASPECTS_DOCKER_HOST_SOCK_PATH %}- {{ ASPECTS_DOCKER_HOST_SOCK_PATH }}:/var/run/docker.sock:ro{% endif %} environment: - - DOCKER_HOST=/var/run/docker.sock + - DOCKER_HOST=unix:///var/run/docker.sock - VECTOR_LOG=warn restart: unless-stopped {% endif %} diff --git a/tutoraspects/plugin.py b/tutoraspects/plugin.py index 6a2df6c90..c2ad666ac 100644 --- a/tutoraspects/plugin.py +++ b/tutoraspects/plugin.py @@ -31,7 +31,7 @@ # Each new setting is a pair: (setting_name, default_value). # Prefix your setting names with 'ASPECTS_'. ("ASPECTS_VERSION", __version__), - # For out default deployment we currently use Celery -> Ralph for transport, + # For our default deployment we currently use Celery -> Ralph for transport, # so Vector is off by default. ("RUN_VECTOR", False), ("RUN_CLICKHOUSE", True), @@ -41,7 +41,7 @@ ("DOCKER_IMAGE_CLICKHOUSE", "clickhouse/clickhouse-server:25.8"), ("DOCKER_IMAGE_RALPH", "fundocker/ralph:4.1.0"), ("DOCKER_IMAGE_SUPERSET", "edunext/aspects-superset:{{ ASPECTS_VERSION }}"), - ("DOCKER_IMAGE_VECTOR", "timberio/vector:0.30.0-alpine"), + ("DOCKER_IMAGE_VECTOR", "timberio/vector:0.50.0-alpine"), ( "EVENT_SINK_MODELS_ENABLED", ["course_overviews", "tag", "taxonomy", "object_tag", "course_enrollment"], @@ -160,7 +160,11 @@ }, ), # ClickHouse xAPI settings - ("ASPECTS_XAPI_DATABASE", "xapi"), + ("ASPECTS_XAPI_SOURCE", "ralph"), + ( + "ASPECTS_XAPI_DATABASE", + "{% if ASPECTS_XAPI_SOURCE == 'vector' %}{{ ASPECTS_VECTOR_DATABASE }}{% else %}{{ RALPH_DATABASE }}{% endif %}", + ), ("ASPECTS_RAW_XAPI_TABLE", "xapi_events_all"), # ClickHouse event sink settings ("ASPECTS_EVENT_SINK_DATABASE", "event_sink"), @@ -275,6 +279,7 @@ # Change to https:// if the public interface to it is secure ("RALPH_HOST", "ralph"), ("RALPH_PORT", "8100"), + ("RALPH_DATABASE", "xapi"), ("RALPH_ENABLE_PUBLIC_URL", False), ("RALPH_RUN_HTTPS", False), ("RALPH_EXTRA_SETTINGS", {}), @@ -530,9 +535,7 @@ def _mount_superset_compose( # run it as part of the `init` job. try: for service, template_path, priority in MY_INIT_TASKS: - hooks.Filters.COMMANDS_INIT.add_item( - (service, template_path) - ) # pylint: disable=no-member + hooks.Filters.COMMANDS_INIT.add_item((service, template_path)) # pylint: disable=no-member except AttributeError: for service, template_path, priority in MY_INIT_TASKS: full_path = os.path.join( diff --git a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0002_raw_xapi_table.py b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0002_raw_xapi_table.py index 49cc31ec1..11c28bbc6 100644 --- a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0002_raw_xapi_table.py +++ b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0002_raw_xapi_table.py @@ -1,11 +1,14 @@ from alembic import op -import sqlalchemy as sa revision = "0002" down_revision = "0001" branch_labels = None depends_on = None -on_cluster = " ON CLUSTER '{{CLICKHOUSE_CLUSTER_NAME}}' " if "{{CLICKHOUSE_CLUSTER_NAME}}" else "" +on_cluster = ( + " ON CLUSTER '{{CLICKHOUSE_CLUSTER_NAME}}' " + if "{{CLICKHOUSE_CLUSTER_NAME}}" + else "" +) engine = "ReplicatedMergeTree" if "{{CLICKHOUSE_CLUSTER_NAME}}" else "MergeTree" @@ -13,9 +16,9 @@ def upgrade(): op.execute( f""" -- Raw table that Ralph writes to - CREATE TABLE IF NOT EXISTS {{ ASPECTS_XAPI_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} - {on_cluster} - ( + CREATE TABLE IF NOT EXISTS {{ RALPH_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} + {on_cluster} + ( event_id UUID NOT NULL, emission_time DateTime64(6) NOT NULL, event String NOT NULL, @@ -29,6 +32,6 @@ def upgrade(): def downgrade(): op.execute( - "DROP TABLE IF EXISTS {{ ASPECTS_XAPI_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }}" + "DROP TABLE IF EXISTS {{ RALPH_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }}" f"{on_cluster};" ) diff --git a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0029_drop_json_column.py b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0029_drop_json_column.py index 708322256..0159a0fb1 100644 --- a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0029_drop_json_column.py +++ b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0029_drop_json_column.py @@ -2,30 +2,38 @@ As part of the Ralph 4.0 upgrade, we drop the unused JSON column "event" and rename "event_json" to "event". """ -from alembic import op +from alembic import op revision = "0029" down_revision = "0028" branch_labels = None depends_on = None -on_cluster = " ON CLUSTER '{{CLICKHOUSE_CLUSTER_NAME}}' " if "{{CLICKHOUSE_CLUSTER_NAME}}" else "" -engine = "ReplicatedReplacingMergeTree" if "{{CLICKHOUSE_CLUSTER_NAME}}" else "ReplacingMergeTree" +on_cluster = ( + " ON CLUSTER '{{CLICKHOUSE_CLUSTER_NAME}}' " + if "{{CLICKHOUSE_CLUSTER_NAME}}" + else "" +) +engine = ( + "ReplicatedReplacingMergeTree" + if "{{CLICKHOUSE_CLUSTER_NAME}}" + else "ReplacingMergeTree" +) # Only used in downgrade, where we have to move data -upgraded_table_name = "{{ASPECTS_XAPI_DATABASE}}.old_{{ASPECTS_RAW_XAPI_TABLE}}" +upgraded_table_name = "{{ RALPH_DATABASE }}.old_{{ ASPECTS_RAW_XAPI_TABLE }}" def upgrade(): op.execute( - f""" - DROP VIEW IF EXISTS {{ ASPECTS_XAPI_DATABASE }}.xapi_events_all_parsed_mv; + """ + DROP VIEW IF EXISTS {{ RALPH_DATABASE }}.xapi_events_all_parsed_mv; """ ) op.execute( f""" - ALTER TABLE {{ ASPECTS_XAPI_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} - {on_cluster} + ALTER TABLE {{ RALPH_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} + {on_cluster} DROP COLUMN event, RENAME COLUMN event_str to event; """ ) @@ -34,14 +42,14 @@ def upgrade(): def downgrade(): # 0. Remove the MV that may be pointing at the table we're changing op.execute( - f""" - DROP VIEW IF EXISTS {{ ASPECTS_XAPI_DATABASE }}.xapi_events_all_parsed_mv; + """ + DROP VIEW IF EXISTS {{ RALPH_DATABASE }}.xapi_events_all_parsed_mv; """ ) # 1. Rename updated table op.execute( f""" - RENAME TABLE {{ ASPECTS_XAPI_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} + RENAME TABLE {{ RALPH_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} TO {upgraded_table_name} {on_cluster} """ @@ -52,9 +60,9 @@ def downgrade(): op.execute( f""" -- Raw table that Ralph writes to - CREATE TABLE IF NOT EXISTS {{ ASPECTS_XAPI_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} - {on_cluster} - ( + CREATE TABLE IF NOT EXISTS {{ RALPH_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} + {on_cluster} + ( event_id UUID NOT NULL, emission_time DateTime64(6) NOT NULL, event String NOT NULL, @@ -67,9 +75,9 @@ def downgrade(): # 3. Insert data from the old table into the new one op.execute( f""" - INSERT INTO {{ ASPECTS_XAPI_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} + INSERT INTO {{ RALPH_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} (event_id, emission_time, event, event_str) - SELECT event_id, emission_time, event, event as event_str + SELECT event_id, emission_time, event, event as event_str FROM {upgraded_table_name} """ ) diff --git a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0032_partition_tables_by_year_and_month.py b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0032_partition_tables_by_year_and_month.py index 5ad90cb7d..82a14ed3b 100644 --- a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0032_partition_tables_by_year_and_month.py +++ b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0032_partition_tables_by_year_and_month.py @@ -1,24 +1,33 @@ """ Partition the xapi table by year and month """ -from alembic import op +from alembic import op revision = "0032" down_revision = "0031" branch_labels = None depends_on = None -on_cluster = " ON CLUSTER '{{CLICKHOUSE_CLUSTER_NAME}}' " if "{{CLICKHOUSE_CLUSTER_NAME}}" else "" -engine = "ReplicatedReplacingMergeTree" if "{{CLICKHOUSE_CLUSTER_NAME}}" else "ReplacingMergeTree" +on_cluster = ( + " ON CLUSTER '{{CLICKHOUSE_CLUSTER_NAME}}' " + if "{{CLICKHOUSE_CLUSTER_NAME}}" + else "" +) +engine = ( + "ReplicatedReplacingMergeTree" + if "{{CLICKHOUSE_CLUSTER_NAME}}" + else "ReplacingMergeTree" +) + +old_xapi_table = "{{ RALPH_DATABASE }}.old_{{ ASPECTS_RAW_XAPI_TABLE }}" -old_xapi_table = "{{ASPECTS_XAPI_DATABASE}}.old_{{ASPECTS_RAW_XAPI_TABLE}}" def upgrade(): # Partition event_sink.user_profile table # 1. Rename old table op.execute( f""" - RENAME TABLE {{ ASPECTS_XAPI_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} + RENAME TABLE {{ RALPH_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} TO {old_xapi_table} {on_cluster} """ @@ -26,7 +35,7 @@ def upgrade(): # 2. Create partitioned table from old data op.execute( f""" - CREATE TABLE IF NOT EXISTS {{ ASPECTS_XAPI_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} + CREATE TABLE IF NOT EXISTS {{ RALPH_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} {on_cluster} ( event_id UUID NOT NULL, @@ -41,7 +50,7 @@ def upgrade(): # 3. Insert data from the old table into the new one op.execute( f""" - INSERT INTO {{ ASPECTS_XAPI_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} + INSERT INTO {{ RALPH_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} SELECT event_id, emission_time, event FROM {old_xapi_table} """ ) @@ -59,7 +68,7 @@ def downgrade(): # 1a. Rename old table op.execute( f""" - RENAME TABLE {{ ASPECTS_XAPI_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} + RENAME TABLE {{ RALPH_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} TO {old_xapi_table} {on_cluster} """ @@ -68,7 +77,7 @@ def downgrade(): # 2. Create un-partitioned table from old data op.execute( f""" - CREATE OR REPLACE TABLE {{ ASPECTS_XAPI_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} + CREATE OR REPLACE TABLE {{ RALPH_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} {on_cluster} ( event_id UUID NOT NULL, @@ -82,10 +91,9 @@ def downgrade(): # 3. Insert into new table from old one op.execute( f""" - INSERT INTO {{ ASPECTS_XAPI_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} + INSERT INTO {{ RALPH_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} SELECT * FROM {old_xapi_table} """ - ) # 4. Drop the old table op.execute( diff --git a/tutoraspects/templates/aspects/apps/vector/local.toml b/tutoraspects/templates/aspects/apps/vector/local.toml index 1154f9042..b0131b1f3 100644 --- a/tutoraspects/templates/aspects/apps/vector/local.toml +++ b/tutoraspects/templates/aspects/apps/vector/local.toml @@ -8,6 +8,6 @@ type = "docker_logs" [transforms.openedx_containers] type = "filter" inputs = ["docker_logs"] -condition = 'includes(["lms", "cms", "lms-job", "cms-job"], .label."com.docker.compose.service")' +condition = 'includes(["lms", "cms", "lms-worker", "cms-worker", "lms-job", "cms-job"], .label."com.docker.compose.service")' {% include "aspects/apps/vector/partials/common-post.toml" %} diff --git a/tutoraspects/templates/aspects/apps/vector/partials/common-post.toml b/tutoraspects/templates/aspects/apps/vector/partials/common-post.toml index f76053ab4..6bce1cb0b 100644 --- a/tutoraspects/templates/aspects/apps/vector/partials/common-post.toml +++ b/tutoraspects/templates/aspects/apps/vector/partials/common-post.toml @@ -76,7 +76,6 @@ parsed, err_regex = parse_regex(.message, r'^.* \[xapi_tracking\] [^{}]* (?P Date: Mon, 20 Oct 2025 14:27:50 -0400 Subject: [PATCH 02/13] build: Update tests to also test the Vector configurations --- .github/workflows/integration-test.yml | 292 +++++++++++++++++++++++++ 1 file changed, 292 insertions(+) diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 142bc81f4..e87c9b7c4 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -299,6 +299,298 @@ jobs: echo "------------------------" } + # Loop through each job-related pod and print logs + for job_name in "${job_list[@]}"; do + # Get the pods related to the current job + job_pod_list=($(kubectl get pods | grep -E "${job_name}-.*" | awk '{print $1}')) + + for pod_name in "${job_pod_list[@]}"; do + print_pod_logs "$pod_name" + done + done + local-vector: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v5 + - name: setup python + uses: actions/setup-python@v6 + with: + python-version: 3.12 + - name: Install python reqs + run: pip install -r requirements/dev.txt + - name: Install aspects + run: pip install -e . + - name: Tutor config save + run: tutor config save --set ASPECTS_XAPI_SOURCE=vector + - name: Confirm DB + # This should be "openedx" + run: tutor config printvalue ASPECTS_XAPI_DATABASE + - name: Setup Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: false + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: false + swap-storage: true + - name: Tutor build openedx + run: tutor images build openedx aspects aspects-superset + - name: Tutor start + run: tutor local start -d + - name: Tutor init + run: tutor local do init + - name: Test alembic + run: | + tutor local do alembic -c "downgrade base" + tutor local do alembic -c "upgrade head" + - name: Init clickhouse + run: tutor local do init-clickhouse + # This should: + # 1. Run all models, since alembic test removed our state + # 2. Find no models on the first run, state should now be up to date now + # 3. Force run all models + # 4. Successfully run tests + - name: Test dbt + run: | + tutor local do dbt -c "run" + tutor local do dbt -c "run" + tutor local do dbt --only_changed False -c "run" + tutor local do dbt --only_changed False -c "test" + - name: Load test + run: tutor local do load-xapi-test-data + - name: Import demo course + run: tutor local do importdemocourse + - name: Test commands + run: | + tutor local do dump-data-to-clickhouse --options "--object course_overviews --force" + make extract_translations + tutor local do import-assets + tutor local do collect-dbt-lineage + tutor local run lms python manage.py lms transform_tracking_logs --source_provider LOCAL --source_config '{"key": "/openedx/data/", "prefix": "tracking.log", "container": "logs"}' --destination_provider LRS --transformer_type xapi + - name: Performance metrics + run: tutor local do performance-metrics --fail_on_error + - name: Tutor stop + run: tutor local stop + + dev-vector: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v5 + - name: setup python + uses: actions/setup-python@v6 + with: + python-version: 3.12 + - name: Install python reqs + run: pip install -r requirements/dev.txt + - name: Install aspects + run: pip install -e . + - name: Tutor config save + run: tutor config save --set ASPECTS_XAPI_SOURCE=vector + - name: Confirm DB + # This should be "openedx" + run: tutor config printvalue ASPECTS_XAPI_DATABASE + - name: Setup Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: false + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: false + swap-storage: true + - name: Tutor build openedx + run: tutor images build openedx-dev aspects aspects-superset + - name: Tutor start + run: tutor dev start -d + - name: Tutor init + run: tutor dev do init + - name: Test alembic + run: | + tutor dev do alembic -c "downgrade base" + tutor dev do alembic -c "upgrade head" + - name: Init clickhouse + run: tutor dev do init-clickhouse + # This should: + # 1. Run all models, since alembic test removed our state + # 2. Find no models on the first run, state should now be up to date now + # 3. Force run all models + # 4. Successfully run tests + - name: Test dbt + run: | + tutor dev do dbt -c "run" + tutor dev do dbt -c "run" + tutor dev do dbt --only_changed False -c "run" + tutor dev do dbt --only_changed False -c "test" + - name: Load test + run: tutor dev do load-xapi-test-data + - name: Import demo course + run: tutor dev do importdemocourse + - name: Test commands + run: | + tutor dev do dump-data-to-clickhouse --options "--object course_overviews --force" + make extract_translations + tutor dev do import-assets + - name: Performance metrics + run: tutor dev do performance-metrics --fail_on_error + - name: Tutor stop + run: tutor dev stop + + k8s-vector: + runs-on: ubuntu-latest + steps: + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: true + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: false + swap-storage: true + - name: Checkout + uses: actions/checkout@v5 + - name: setup python + uses: actions/setup-python@v6 + with: + python-version: 3.12 + - name: Generate env + run: | + pip install -r requirements/dev.txt + pip install -e . + tutor config save --set ASPECTS_XAPI_SOURCE=vector + tutor config printvalue ASPECTS_XAPI_DATABASE + - name: Run Kubernetes tools + uses: stefanprodan/kube-tools@v1 + with: + kubectl: 1.23.0 + kustomize: 4.4.1 + helmv3: 3.7.2 + kubeconform: 0.4.13 + command: | + kustomize build $TUTOR_ROOT/env | kubeconform -strict -ignore-missing-schemas -kubernetes-version 1.22.0 + - name: Setup Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Tutor build openedx + run: tutor images build openedx aspects aspects-superset + - name: Create k8s Kind Cluster + uses: helm/kind-action@v1.12.0 + - name: Mount docker image + run: | + kind get clusters + kind load docker-image $(tutor images printtag openedx) --name chart-testing + kind load docker-image $(tutor images printtag aspects) --name chart-testing + kind load docker-image $(tutor images printtag aspects-superset) --name chart-testing + - name: Setup namespace + run: | + kubectl config set-context --current --namespace=openedx + kubectl get pods + - name: Init k8s environment + run: | + tutor k8s start + tutor k8s do init + - name: Test alembic + run: | + tutor k8s do alembic -c "downgrade base" + tutor k8s do alembic -c "upgrade head" + - name: Init clickhouse + run: tutor k8s do init-clickhouse + # This should: + # 1. Run all models, since alembic test removed our state + # 2. Find no models on the first run, state should now be up to date now + # 3. Force run all models + # 4. Successfully run tests + - name: Test dbt + run: | + tutor k8s do dbt -c "run" + tutor k8s do dbt -c "run" + tutor k8s do dbt --only_changed False -c "run" + tutor k8s do dbt --only_changed False -c "test" + - name: Load test + run: tutor k8s do load-xapi-test-data + - name: Import demo course + run: tutor k8s do importdemocourse + - name: Test commands + run: | + tutor k8s do dump-data-to-clickhouse --options "--object course_overviews --force" + make extract_translations + tutor k8s do import-assets + - name: Performance metrics + run: tutor k8s do performance-metrics --fail_on_error + - name: Check failure logs + if: failure() + run: | + # Use an array to store pod names + pod_list=($(kubectl get pods -o jsonpath='{.items[*].metadata.name}')) + + failed_jobs="" + + # Loop through each pod and check for failure + for pod_name in "${pod_list[@]}"; do + # Get the pod phase/status + pod_phase=$(kubectl get pod "$pod_name" -o jsonpath='{.status.phase}') + + if [[ "$pod_phase" != "Running" && "$pod_phase" != "Succeeded" ]]; then + # Job is not in Running or Succeeded state, consider it as failed + failed_jobs="$failed_jobs $pod_name" + # Print the logs for the failing pod + echo "Failure logs for pod: $pod_name" + kubectl logs "$pod_name" + fi + done + + # Check if any jobs failed + if [ -n "$failed_jobs" ]; then + echo "The following jobs failed: $failed_jobs" + else + echo "All jobs succeeded." + fi + - name: Check service logs + if: failure() + run: | + # Use an array to store service names + service_list=($(kubectl get services -o jsonpath='{.items[*].metadata.name}')) + + # Loop through each service and print logs + for service_name in "${service_list[@]}"; do + echo "Logs for service: $service_name" + kubectl logs "svc/$service_name" + echo "------------------------" + done + - name: Check jobs logs + if: "!cancelled()" + run: | + job_list=($(kubectl get jobs | grep job | awk '{print $1}')) + + # Function to print logs for a pod + print_pod_logs() { + local pod_name="$1" + echo "Logs for pod: $pod_name" + kubectl logs "$pod_name" + echo "------------------------" + } + # Loop through each job-related pod and print logs for job_name in "${job_list[@]}"; do # Get the pods related to the current job From 1f5371d69706558aceaff6f28101b8d8ff64d7db Mon Sep 17 00:00:00 2001 From: Ty Hob Date: Tue, 21 Oct 2025 09:18:51 -0400 Subject: [PATCH 03/13] style: Fix line length error --- .github/workflows/integration-test.yml | 2 +- tutoraspects/plugin.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index e87c9b7c4..9510d5960 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -480,7 +480,7 @@ jobs: pip install -r requirements/dev.txt pip install -e . tutor config save --set ASPECTS_XAPI_SOURCE=vector - tutor config printvalue ASPECTS_XAPI_DATABASE + tutor config printvalue ASPECTS_XAPI_DATABASE | grep openedx - name: Run Kubernetes tools uses: stefanprodan/kube-tools@v1 with: diff --git a/tutoraspects/plugin.py b/tutoraspects/plugin.py index c2ad666ac..00801407e 100644 --- a/tutoraspects/plugin.py +++ b/tutoraspects/plugin.py @@ -163,7 +163,13 @@ ("ASPECTS_XAPI_SOURCE", "ralph"), ( "ASPECTS_XAPI_DATABASE", - "{% if ASPECTS_XAPI_SOURCE == 'vector' %}{{ ASPECTS_VECTOR_DATABASE }}{% else %}{{ RALPH_DATABASE }}{% endif %}", + """ + {%- if ASPECTS_XAPI_SOURCE == 'vector' -%} + {{ ASPECTS_VECTOR_DATABASE }} + {%- else -%} + {{ RALPH_DATABASE }} + {%- endif -%} + """, ), ("ASPECTS_RAW_XAPI_TABLE", "xapi_events_all"), # ClickHouse event sink settings From 11606bfd4d0671a5d8bc72497cc6732c673846fa Mon Sep 17 00:00:00 2001 From: Ty Hob Date: Tue, 21 Oct 2025 09:22:16 -0400 Subject: [PATCH 04/13] style: Fix formatting --- tutoraspects/plugin.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tutoraspects/plugin.py b/tutoraspects/plugin.py index 00801407e..e122c8bc9 100644 --- a/tutoraspects/plugin.py +++ b/tutoraspects/plugin.py @@ -541,7 +541,9 @@ def _mount_superset_compose( # run it as part of the `init` job. try: for service, template_path, priority in MY_INIT_TASKS: - hooks.Filters.COMMANDS_INIT.add_item((service, template_path)) # pylint: disable=no-member + hooks.Filters.COMMANDS_INIT.add_item( + (service, template_path) + ) # pylint: disable=no-member except AttributeError: for service, template_path, priority in MY_INIT_TASKS: full_path = os.path.join( From 291209402a1ab43fa64785e44eaf67888712f117 Mon Sep 17 00:00:00 2001 From: Ty Hob Date: Thu, 23 Oct 2025 09:22:41 -0400 Subject: [PATCH 05/13] refactor!: Add new config for Alembic database, simplify CH grants Previously Alembic state was stored in ASPECTS_XAPI_DATABASE, which can change when switching between Ralph and Vector pipelines and cause Alembic to lose state and try to re-run all migrations. This is now explicit. Also makes sure Ralph uses the RALPH_DATABASE, simplifies and re-organizes the ClickHouse init script and makes sure the Vector user can access databases needed for inserting into downstream MVs. --- tutoraspects/plugin.py | 3 +- .../apps/aspects/migrations/alembic/env.py | 2 +- ...0011_vector_replacingmergetree_xapi_raw.py | 27 ++++++++++-------- .../templates/aspects/apps/ralph/config/env | 2 +- .../jobs/init/clickhouse/init-clickhouse.sh | 28 +++++++++++++------ 5 files changed, 38 insertions(+), 24 deletions(-) diff --git a/tutoraspects/plugin.py b/tutoraspects/plugin.py index e122c8bc9..934953633 100644 --- a/tutoraspects/plugin.py +++ b/tutoraspects/plugin.py @@ -183,6 +183,7 @@ ("ASPECTS_VECTOR_RAW_TRACKING_LOGS_TABLE", "_tracking"), ("ASPECTS_VECTOR_RAW_XAPI_TABLE", "xapi_events_all"), ("ASPECTS_DATA_TTL_EXPRESSION", "toDateTime(emission_time) + INTERVAL 1 YEAR"), + ("ASPECTS_ALEMBIC_MIGRATIONS_DATABASE", "{{RALPH_DATABASE}}"), # Make sure LMS / CMS have event-routing-backends installed ###################### # ClickHouse Settings @@ -277,7 +278,7 @@ "{% else %}" "{{CLICKHOUSE_HOST}}" "{% endif %}" - ":{{CLICKHOUSE_INTERNAL_NATIVE_PORT}}/{{ASPECTS_XAPI_DATABASE}}" + ":{{CLICKHOUSE_INTERNAL_NATIVE_PORT}}/{{ASPECTS_ALEMBIC_MIGRATIONS_DATABASE}}" "{% if CLICKHOUSE_SECURE_CONNECTION %}?secure=True{% endif %}", ), ###################### diff --git a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/env.py b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/env.py index 94f8a9418..16e31ddc3 100644 --- a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/env.py +++ b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/env.py @@ -32,7 +32,7 @@ def make_replicated_zk_path(cluster, table_name): """ Allows CH cluster functionality in Alembic """ - database = "{{ ASPECTS_XAPI_DATABASE }}" + database = "{{ ASPECTS_ALEMBIC_MIGRATIONS_DATABASE }}" return f'/clickhouse/tables/{cluster}/{database}/{table_name}' diff --git a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0011_vector_replacingmergetree_xapi_raw.py b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0011_vector_replacingmergetree_xapi_raw.py index 1c971d9d4..d78d71a85 100644 --- a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0011_vector_replacingmergetree_xapi_raw.py +++ b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0011_vector_replacingmergetree_xapi_raw.py @@ -1,6 +1,5 @@ from alembic import op - revision = "0011" down_revision = "0010" branch_labels = None @@ -9,9 +8,17 @@ DESTINATION_TABLE = "{{ ASPECTS_VECTOR_DATABASE }}.{{ ASPECTS_VECTOR_RAW_XAPI_TABLE }}" TMP_TABLE_NEW = f"{DESTINATION_TABLE}_tmp_{revision}" TMP_TABLE_ORIG = f"{DESTINATION_TABLE}_tmp_mergetree_{revision}" -on_cluster = " ON CLUSTER '{{CLICKHOUSE_CLUSTER_NAME}}' " if "{{CLICKHOUSE_CLUSTER_NAME}}" else "" +on_cluster = ( + " ON CLUSTER '{{CLICKHOUSE_CLUSTER_NAME}}' " + if "{{CLICKHOUSE_CLUSTER_NAME}}" + else "" +) old_engine = "ReplicatedMergeTree" if "{{CLICKHOUSE_CLUSTER_NAME}}" else "MergeTree" -engine = "ReplicatedReplacingMergeTree" if "{{CLICKHOUSE_CLUSTER_NAME}}" else "ReplacingMergeTree" +engine = ( + "ReplicatedReplacingMergeTree" + if "{{CLICKHOUSE_CLUSTER_NAME}}" + else "ReplacingMergeTree" +) def upgrade(): @@ -37,18 +44,14 @@ def upgrade(): f""" ALTER TABLE {TMP_TABLE_NEW} {on_cluster} - ATTACH PARTITION tuple() FROM + ATTACH PARTITION tuple() FROM {DESTINATION_TABLE}; """ ) # 3. Swap both tables. We can't do this in a single statement because CH Cloud # uses replicated tables and will error. - op.execute( - f"RENAME TABLE {DESTINATION_TABLE} TO {TMP_TABLE_ORIG} {on_cluster}" - ) - op.execute( - f"RENAME TABLE {TMP_TABLE_NEW} TO {DESTINATION_TABLE} {on_cluster}" - ) + op.execute(f"RENAME TABLE {DESTINATION_TABLE} TO {TMP_TABLE_ORIG} {on_cluster}") + op.execute(f"RENAME TABLE {TMP_TABLE_NEW} TO {DESTINATION_TABLE} {on_cluster}") # 4. Force deduplication of the existing data and may take a very long time # on a larger dataset, but since Aspects isn't in production anywhere yet this # seems like a reasonable thing to do. If you're looking at this as fodder for @@ -89,7 +92,7 @@ def downgrade(): f""" ALTER TABLE {TMP_TABLE_ORIG} {on_cluster} - ATTACH PARTITION tuple() FROM + ATTACH PARTITION tuple() FROM {DESTINATION_TABLE}; """ ) @@ -104,7 +107,7 @@ def downgrade(): # a future migration, make sure to understand the potential issues here. op.execute( f""" - OPTIMIZE TABLE {DESTINATION_TABLE} + OPTIMIZE TABLE {DESTINATION_TABLE} {on_cluster} FINAL DEDUPLICATE; """ diff --git a/tutoraspects/templates/aspects/apps/ralph/config/env b/tutoraspects/templates/aspects/apps/ralph/config/env index 0a25e7309..e1293708e 100644 --- a/tutoraspects/templates/aspects/apps/ralph/config/env +++ b/tutoraspects/templates/aspects/apps/ralph/config/env @@ -5,7 +5,7 @@ RALPH_APP_DIR=/app/.ralph # ClickHouse database backend RALPH_BACKENDS__LRS__CLICKHOUSE__HOST={{CLICKHOUSE_HOST}} RALPH_BACKENDS__LRS__CLICKHOUSE__PORT={{CLICKHOUSE_INTERNAL_HTTP_PORT}} -RALPH_BACKENDS__LRS__CLICKHOUSE__DATABASE={{ASPECTS_XAPI_DATABASE}} +RALPH_BACKENDS__LRS__CLICKHOUSE__DATABASE={{RALPH_DATABASE}} RALPH_BACKENDS__LRS__CLICKHOUSE__EVENT_TABLE_NAME={{ASPECTS_RAW_XAPI_TABLE}} RALPH_BACKENDS__LRS__CLICKHOUSE__TEST_HOST=clickhouse RALPH_BACKENDS__LRS__CLICKHOUSE__USERNAME={{CLICKHOUSE_ADMIN_USER}} diff --git a/tutoraspects/templates/aspects/jobs/init/clickhouse/init-clickhouse.sh b/tutoraspects/templates/aspects/jobs/init/clickhouse/init-clickhouse.sh index 6132090ad..6410bc1da 100644 --- a/tutoraspects/templates/aspects/jobs/init/clickhouse/init-clickhouse.sh +++ b/tutoraspects/templates/aspects/jobs/init/clickhouse/init-clickhouse.sh @@ -35,26 +35,32 @@ CREATE USER IF NOT EXISTS {{ ASPECTS_CLICKHOUSE_VECTOR_USER }} {{ ON_CLUSTER }} CREATE USER IF NOT EXISTS {{ ASPECTS_CLICKHOUSE_REPORT_USER }} {{ ON_CLUSTER }} IDENTIFIED WITH sha256_password BY '{{ ASPECTS_CLICKHOUSE_REPORT_PASSWORD }}'; CREATE USER IF NOT EXISTS {{ ASPECTS_CLICKHOUSE_CMS_USER }} {{ ON_CLUSTER }} IDENTIFIED WITH sha256_password BY '{{ ASPECTS_CLICKHOUSE_CMS_PASSWORD }}'; --- Update user passwords if they do exist +-- Update user passwords to match configuration if they do exist ALTER USER {{ ASPECTS_CLICKHOUSE_LRS_USER }} {{ ON_CLUSTER }} IDENTIFIED WITH sha256_password BY '{{ ASPECTS_CLICKHOUSE_LRS_PASSWORD }}'; ALTER USER {{ ASPECTS_CLICKHOUSE_VECTOR_USER }} {{ ON_CLUSTER }} IDENTIFIED WITH sha256_password BY '{{ ASPECTS_CLICKHOUSE_VECTOR_PASSWORD }}'; ALTER USER {{ ASPECTS_CLICKHOUSE_REPORT_USER }} {{ ON_CLUSTER }} IDENTIFIED WITH sha256_password BY '{{ ASPECTS_CLICKHOUSE_REPORT_PASSWORD }}'; ALTER USER {{ ASPECTS_CLICKHOUSE_CMS_USER }} {{ ON_CLUSTER }} IDENTIFIED WITH sha256_password BY '{{ ASPECTS_CLICKHOUSE_CMS_PASSWORD }}'; --- Grant permissions to the users +-- Grant permissions to the Ralph user GRANT {{ ON_CLUSTER }} INSERT, SELECT ON {{ RALPH_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_LRS_USER }}'; -GRANT {{ ON_CLUSTER }} SELECT ON {{ RALPH_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_REPORT_USER }}'; +GRANT {{ ON_CLUSTER }} SELECT ON {{ DBT_PROFILE_TARGET_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_LRS_USER }}'; +GRANT {{ ON_CLUSTER }} SELECT ON {{ ASPECTS_EVENT_SINK_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_LRS_USER }}'; -GRANT {{ ON_CLUSTER }} INSERT, SELECT ON {{ ASPECTS_EVENT_SINK_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_CMS_USER }}'; -GRANT {{ ON_CLUSTER }} SELECT ON {{ ASPECTS_EVENT_SINK_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_REPORT_USER }}'; +-- Grant permissions to the Vector user +GRANT {{ ON_CLUSTER }} INSERT, SELECT ON {{ ASPECTS_VECTOR_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_VECTOR_USER }}'; +GRANT {{ ON_CLUSTER }} SELECT ON {{ DBT_PROFILE_TARGET_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_VECTOR_USER }}'; +GRANT {{ ON_CLUSTER }} SELECT ON {{ ASPECTS_EVENT_SINK_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_VECTOR_USER }}'; -GRANT {{ ON_CLUSTER }} CREATE TABLE, DROP TABLE, CREATE VIEW, DROP VIEW, SELECT, INSERT, UPDATE, DELETE, dictGet ON {{ DBT_PROFILE_TARGET_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_REPORT_USER }}'; +-- Grant permissions to the event sink user +GRANT {{ ON_CLUSTER }} INSERT, SELECT, DELETE ON {{ ASPECTS_EVENT_SINK_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_CMS_USER }}'; +-- Grant permissions to the dbt / Superset user + -- Source databases GRANT {{ ON_CLUSTER }} SELECT, DROP TABLE, DROP VIEW ON {{ RALPH_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_REPORT_USER }}'; -GRANT {{ ON_CLUSTER }} INSERT, SELECT, DELETE ON {{ ASPECTS_EVENT_SINK_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_CMS_USER }}'; -GRANT {{ ON_CLUSTER }} SELECT, dictGet ON {{ ASPECTS_EVENT_SINK_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_REPORT_USER }}'; -GRANT {{ ON_CLUSTER }} INSERT, SELECT ON {{ ASPECTS_VECTOR_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_VECTOR_USER }}'; GRANT {{ ON_CLUSTER }} SELECT, DROP TABLE, DROP VIEW ON {{ ASPECTS_VECTOR_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_REPORT_USER }}'; +GRANT {{ ON_CLUSTER }} SELECT, dictGet ON {{ ASPECTS_EVENT_SINK_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_REPORT_USER }}'; + + -- System database GRANT {{ ON_CLUSTER }} SELECT ON system.asynchronous_metrics TO '{{ ASPECTS_CLICKHOUSE_REPORT_USER }}'; GRANT {{ ON_CLUSTER }} SELECT ON system.disks TO '{{ ASPECTS_CLICKHOUSE_REPORT_USER }}'; GRANT {{ ON_CLUSTER }} SELECT ON system.events TO '{{ ASPECTS_CLICKHOUSE_REPORT_USER }}'; @@ -62,6 +68,10 @@ GRANT {{ ON_CLUSTER }} SELECT ON system.metrics TO '{{ ASPECTS_CLICKHOUSE_REPORT GRANT {{ ON_CLUSTER }} SELECT ON system.replication_queue TO '{{ ASPECTS_CLICKHOUSE_REPORT_USER }}'; GRANT {{ ON_CLUSTER }} SELECT ON system.query_log TO '{{ ASPECTS_CLICKHOUSE_REPORT_USER }}'; + -- Target database +GRANT {{ ON_CLUSTER }} CREATE TABLE, DROP TABLE, CREATE VIEW, DROP VIEW, SELECT, INSERT, UPDATE, DELETE, dictGet ON {{ DBT_PROFILE_TARGET_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_REPORT_USER }}'; + + -- Patch from clickhouse-extra-sql follows... {{ patch("clickhouse-extra-sql") }} From 10d194f0bcf704b02de5daf45d1fc501ebff74e5 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Fri, 6 Mar 2026 16:38:49 -0500 Subject: [PATCH 06/13] feat: set vector as default data pipeline --- tutoraspects/commands_v1.py | 2 - .../local-docker-compose-jobs-services | 3 ++ tutoraspects/plugin.py | 14 +++-- .../alembic/versions/0008_vector.py | 4 +- ...0011_vector_replacingmergetree_xapi_raw.py | 2 +- .../aspects/apps/aspects/scripts/bootstrap.sh | 51 ++++++++++++------- .../apps/vector/partials/common-post.toml | 2 +- .../aspects/build/aspects/Dockerfile | 11 ++-- 8 files changed, 54 insertions(+), 35 deletions(-) diff --git a/tutoraspects/commands_v1.py b/tutoraspects/commands_v1.py index 9eefcc907..1d34265e5 100644 --- a/tutoraspects/commands_v1.py +++ b/tutoraspects/commands_v1.py @@ -74,7 +74,6 @@ def dbt(only_changed: bool, command: string) -> list[tuple[str, str]]: return [ ( "aspects", - "echo 'Making dbt script executable...' && " f"echo 'Running dbt, only_changed: {only_changed} command: {command}' && " f"bash /app/aspects/scripts/dbt.sh {only_changed} {command} && " "echo 'Done!';", @@ -108,7 +107,6 @@ def alembic(command: string) -> list[tuple[str, str]]: return [ ( "aspects", - "echo 'Making dbt script executable...' && " f"bash /app/aspects/scripts/alembic.sh {command} && " "echo 'Done!';", ), diff --git a/tutoraspects/patches/local-docker-compose-jobs-services b/tutoraspects/patches/local-docker-compose-jobs-services index e2d24601d..faac08606 100644 --- a/tutoraspects/patches/local-docker-compose-jobs-services +++ b/tutoraspects/patches/local-docker-compose-jobs-services @@ -16,6 +16,9 @@ aspects-job: volumes: - ../../env/plugins/aspects/apps/aspects:/app/aspects - ../../env/plugins/aspects/apps/aspects/scripts/:/app/aspects/scripts:ro + {%- for mount in iter_mounts(MOUNTS, "aspects-job") %} + - {{ mount }} + {%- endfor %} {% if RUN_CLICKHOUSE %} depends_on: - clickhouse diff --git a/tutoraspects/plugin.py b/tutoraspects/plugin.py index 934953633..894bdfad2 100644 --- a/tutoraspects/plugin.py +++ b/tutoraspects/plugin.py @@ -33,9 +33,9 @@ ("ASPECTS_VERSION", __version__), # For our default deployment we currently use Celery -> Ralph for transport, # so Vector is off by default. - ("RUN_VECTOR", False), + ("RUN_VECTOR", True), ("RUN_CLICKHOUSE", True), - ("RUN_RALPH", True), + ("RUN_RALPH", False), ("RUN_SUPERSET", True), ("DOCKER_IMAGE_ASPECTS", "edunext/aspects:{{ ASPECTS_VERSION }}"), ("DOCKER_IMAGE_CLICKHOUSE", "clickhouse/clickhouse-server:25.8"), @@ -160,7 +160,7 @@ }, ), # ClickHouse xAPI settings - ("ASPECTS_XAPI_SOURCE", "ralph"), + ("ASPECTS_XAPI_SOURCE", "vector"), ( "ASPECTS_XAPI_DATABASE", """ @@ -181,7 +181,6 @@ ("ASPECTS_VECTOR_STORE_XAPI", True), ("ASPECTS_VECTOR_DATABASE", "openedx"), ("ASPECTS_VECTOR_RAW_TRACKING_LOGS_TABLE", "_tracking"), - ("ASPECTS_VECTOR_RAW_XAPI_TABLE", "xapi_events_all"), ("ASPECTS_DATA_TTL_EXPRESSION", "toDateTime(emission_time) + INTERVAL 1 YEAR"), ("ASPECTS_ALEMBIC_MIGRATIONS_DATABASE", "{{RALPH_DATABASE}}"), # Make sure LMS / CMS have event-routing-backends installed @@ -263,7 +262,7 @@ ( "CLICKHOUSE_REPORT_URL", "{{ASPECTS_CLICKHOUSE_REPORT_USER}}:{{ASPECTS_CLICKHOUSE_REPORT_PASSWORD}}" - "@{{CLICKHOUSE_URL}}/{{ASPECTS_XAPI_DATABASE}}", + "@{{CLICKHOUSE_URL}}/{{ASPECTS_XAPI_SOURCE}}", ), ( "CLICKHOUSE_REPORT_SQLALCHEMY_URI", @@ -517,6 +516,11 @@ def _mount_superset_compose( """ if name == "superset": volumes += [("superset", "/app")] + elif name == "aspects-dbt": + volumes += [ + ("aspects-job", "/app/aspects-dbt"), + ("aspects-docs", "/app/aspects-dbt"), + ] return volumes diff --git a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0008_vector.py b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0008_vector.py index 0cf0cedce..cd8b9d65f 100644 --- a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0008_vector.py +++ b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0008_vector.py @@ -24,7 +24,7 @@ def upgrade(): ) op.execute( f""" - CREATE TABLE IF NOT EXISTS {{ ASPECTS_VECTOR_DATABASE }}.{{ ASPECTS_VECTOR_RAW_XAPI_TABLE }} + CREATE TABLE IF NOT EXISTS {{ ASPECTS_VECTOR_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} {on_cluster} ( event_id UUID, @@ -40,7 +40,7 @@ def upgrade(): def downgrade(): op.execute( - "DROP TABLE IF EXISTS {{ ASPECTS_VECTOR_DATABASE }}.{{ ASPECTS_VECTOR_RAW_XAPI_TABLE }}" + "DROP TABLE IF EXISTS {{ ASPECTS_VECTOR_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }}" f"{on_cluster}" ) op.execute( diff --git a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0011_vector_replacingmergetree_xapi_raw.py b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0011_vector_replacingmergetree_xapi_raw.py index d78d71a85..8ef68ddd9 100644 --- a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0011_vector_replacingmergetree_xapi_raw.py +++ b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0011_vector_replacingmergetree_xapi_raw.py @@ -5,7 +5,7 @@ branch_labels = None depends_on = None -DESTINATION_TABLE = "{{ ASPECTS_VECTOR_DATABASE }}.{{ ASPECTS_VECTOR_RAW_XAPI_TABLE }}" +DESTINATION_TABLE = "{{ ASPECTS_VECTOR_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }}" TMP_TABLE_NEW = f"{DESTINATION_TABLE}_tmp_{revision}" TMP_TABLE_ORIG = f"{DESTINATION_TABLE}_tmp_mergetree_{revision}" on_cluster = ( diff --git a/tutoraspects/templates/aspects/apps/aspects/scripts/bootstrap.sh b/tutoraspects/templates/aspects/apps/aspects/scripts/bootstrap.sh index 34d38a1cc..783e4ea8c 100644 --- a/tutoraspects/templates/aspects/apps/aspects/scripts/bootstrap.sh +++ b/tutoraspects/templates/aspects/apps/aspects/scripts/bootstrap.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash - set -eo pipefail if [ -z "${DBT_SSH_KEY+x}" ] @@ -14,29 +13,45 @@ then ssh-add /root/.ssh/id_rsa fi -export branch=$(git -C aspects-dbt/ branch --show-current) -export repo=$(git -C aspects-dbt/ config --get remote.origin.url) -if [ "$DBT_BRANCH" != "$branch" ] || [ "$DBT_REPOSITORY" != "$repo" ]; -then - rm -rf aspects-dbt +git config --global --add safe.directory '*' - echo "Installing aspects-dbt" - echo "git clone -b ${DBT_BRANCH} ${DBT_REPOSITORY}" - git clone -b ${DBT_BRANCH} ${DBT_REPOSITORY} aspects-dbt +MOUNTED=false +if [ -d "aspects-dbt/.git" ]; then + current_branch=$(git -C aspects-dbt/ branch --show-current) + current_repo=$(git -C aspects-dbt/ config --get remote.origin.url) + if [ ! -f "aspects-dbt/.git/shallow" ]; then + MOUNTED=true + fi +fi - cd aspects-dbt +if [ "$MOUNTED" = true ]; then + echo "Using mounted repo (branch: ${current_branch})" +else + # Only re-clone if branch/repo differ or directory is missing + if [ ! -d "aspects-dbt/.git" ] \ + || [ "${DBT_BRANCH}" != "${current_branch}" ] \ + || [ "${DBT_REPOSITORY}" != "${current_repo}" ]; then - if [ -e "./requirements.txt" ] - then - echo "Installing dbt python requirements" - pip install -r ./requirements.txt + rm -rf aspects-dbt + + echo "Installing aspects-dbt" + echo "git clone -b ${DBT_BRANCH} ${DBT_REPOSITORY}" + git clone -b "${DBT_BRANCH}" "${DBT_REPOSITORY}" aspects-dbt else - echo "No requirements.txt file found; skipping" + echo "Using existing cloned repo (branch: ${current_branch})" fi +fi - echo "Installing dbt dependencies" - dbt deps +cd aspects-dbt +if [ -e "./requirements.txt" ]; then + echo "Installing dbt python requirements" + uv pip install -r ./requirements.txt --system +else + echo "No requirements.txt file found; skipping" fi -mkdir -p $DBT_STATE +echo "Installing dbt dependencies" +dbt deps + +mkdir -p "${DBT_STATE}" \ No newline at end of file diff --git a/tutoraspects/templates/aspects/apps/vector/partials/common-post.toml b/tutoraspects/templates/aspects/apps/vector/partials/common-post.toml index 6bce1cb0b..dc2d91216 100644 --- a/tutoraspects/templates/aspects/apps/vector/partials/common-post.toml +++ b/tutoraspects/templates/aspects/apps/vector/partials/common-post.toml @@ -125,7 +125,7 @@ date_time_best_effort = true inputs = ["xapi"] endpoint = "{% if CLICKHOUSE_SECURE_CONNECTION %}https{% else %}http{% endif %}://{{ CLICKHOUSE_HOST }}:{{ CLICKHOUSE_INTERNAL_HTTP_PORT }}" database = "{{ ASPECTS_VECTOR_DATABASE }}" -table = "{{ ASPECTS_VECTOR_RAW_XAPI_TABLE }}" +table = "{{ ASPECTS_RAW_XAPI_TABLE }}" healthcheck = false {% endif %} diff --git a/tutoraspects/templates/aspects/build/aspects/Dockerfile b/tutoraspects/templates/aspects/build/aspects/Dockerfile index eb8885f30..ccfe6b24b 100644 --- a/tutoraspects/templates/aspects/build/aspects/Dockerfile +++ b/tutoraspects/templates/aspects/build/aspects/Dockerfile @@ -2,16 +2,15 @@ FROM python:3.12 WORKDIR /app +COPY --from=ghcr.io/astral-sh/uv:0.10.8 /uv /uvx /bin/ +ENV PATH="/root/.local/bin/:$PATH" COPY ./requirements.txt /app/requirements.txt -RUN pip install -r /app/requirements.txt - -RUN git clone -b {{ DBT_BRANCH }} {{ DBT_REPOSITORY }} aspects-dbt +RUN uv pip install -r /app/requirements.txt --system +RUN git clone --depth 1 -b {{ DBT_BRANCH }} {{ DBT_REPOSITORY }} aspects-dbt WORKDIR /app/aspects-dbt - -RUN pip install -r requirements.txt - +RUN uv pip install -r requirements.txt --system RUN dbt deps WORKDIR /app From 87912edc16b90a14b73ef3e2c6585c014054d1e4 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Fri, 6 Mar 2026 16:51:12 -0500 Subject: [PATCH 07/13] chore: format files --- tutoraspects/commands_v1.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tutoraspects/commands_v1.py b/tutoraspects/commands_v1.py index 1d34265e5..6973e9f58 100644 --- a/tutoraspects/commands_v1.py +++ b/tutoraspects/commands_v1.py @@ -107,8 +107,7 @@ def alembic(command: string) -> list[tuple[str, str]]: return [ ( "aspects", - f"bash /app/aspects/scripts/alembic.sh {command} && " - "echo 'Done!';", + f"bash /app/aspects/scripts/alembic.sh {command} && " "echo 'Done!';", ), ] From 09758329f521c4a6c3cb27fcac141d019233c518 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Tue, 10 Mar 2026 10:25:23 -0500 Subject: [PATCH 08/13] fix: set xapi_database on EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG --- tutoraspects/patches/openedx-common-settings | 1 + .../templates/aspects/jobs/init/clickhouse/init-clickhouse.sh | 1 + 2 files changed, 2 insertions(+) diff --git a/tutoraspects/patches/openedx-common-settings b/tutoraspects/patches/openedx-common-settings index d39ae7d21..fda4d445f 100644 --- a/tutoraspects/patches/openedx-common-settings +++ b/tutoraspects/patches/openedx-common-settings @@ -8,6 +8,7 @@ EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG = { "url": "{% if CLICKHOUSE_SECURE_CONNECTION %}https{% else %}http{% endif %}://{{ CLICKHOUSE_HOST }}:{{ CLICKHOUSE_INTERNAL_HTTP_PORT }}", "username": "{{ ASPECTS_CLICKHOUSE_CMS_USER }}", "password": "{{ ASPECTS_CLICKHOUSE_CMS_PASSWORD }}", + "xapi_database": "{{ ASPECTS_XAPI_DATABASE }}", "database": "{{ ASPECTS_EVENT_SINK_DATABASE }}", "timeout_secs": {{ ASPECTS_EVENT_SINK_CLICKHOUSE_TIMEOUT_SECS }} } diff --git a/tutoraspects/templates/aspects/jobs/init/clickhouse/init-clickhouse.sh b/tutoraspects/templates/aspects/jobs/init/clickhouse/init-clickhouse.sh index 6410bc1da..2f76821d0 100644 --- a/tutoraspects/templates/aspects/jobs/init/clickhouse/init-clickhouse.sh +++ b/tutoraspects/templates/aspects/jobs/init/clickhouse/init-clickhouse.sh @@ -53,6 +53,7 @@ GRANT {{ ON_CLUSTER }} SELECT ON {{ ASPECTS_EVENT_SINK_DATABASE }}.* TO '{{ ASPE -- Grant permissions to the event sink user GRANT {{ ON_CLUSTER }} INSERT, SELECT, DELETE ON {{ ASPECTS_EVENT_SINK_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_CMS_USER }}'; +GRANT {{ ON_CLUSTER }} INSERT, SELECT, DELETE ON {{ ASPECTS_XAPI_DATABASE }}.* TO '{{ ASPECTS_CLICKHOUSE_CMS_USER }}'; -- Grant permissions to the dbt / Superset user -- Source databases From c4733f48087cffc27eefc39895ce6a533fe8edf5 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Tue, 10 Mar 2026 10:28:57 -0500 Subject: [PATCH 09/13] fix: fix clickhouse report url --- tutoraspects/plugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutoraspects/plugin.py b/tutoraspects/plugin.py index 894bdfad2..cd058f668 100644 --- a/tutoraspects/plugin.py +++ b/tutoraspects/plugin.py @@ -262,7 +262,7 @@ ( "CLICKHOUSE_REPORT_URL", "{{ASPECTS_CLICKHOUSE_REPORT_USER}}:{{ASPECTS_CLICKHOUSE_REPORT_PASSWORD}}" - "@{{CLICKHOUSE_URL}}/{{ASPECTS_XAPI_SOURCE}}", + "@{{CLICKHOUSE_URL}}/{{ASPECTS_XAPI_DATABASE}}", ), ( "CLICKHOUSE_REPORT_SQLALCHEMY_URI", From 5ed691646f994be5cddfe48dce6733f5e8926ae0 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Mon, 16 Mar 2026 10:17:19 -0500 Subject: [PATCH 10/13] fix: update default load test config to support vector --- .github/workflows/integration-test.yml | 6 +++--- tutoraspects/patches/xapi-db-load-config-yaml | 9 +++++---- tutoraspects/templates/aspects/apps/vector/local.toml | 2 +- .../templates/aspects/build/aspects/requirements.txt | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 9510d5960..dd9c3de65 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -28,7 +28,7 @@ jobs: - name: Install aspects run: pip install -e . - name: Tutor config save - run: tutor config save + run: tutor config save --set ASPECTS_XAPI_SOURCE=ralph - name: Setup Docker Buildx uses: docker/setup-buildx-action@v3 - name: Free Disk Space (Ubuntu) @@ -101,7 +101,7 @@ jobs: - name: Install aspects run: pip install -e . - name: Tutor config save - run: tutor config save + run: tutor config save --set ASPECTS_XAPI_SOURCE=ralph - name: Setup Docker Buildx uses: docker/setup-buildx-action@v3 - name: Free Disk Space (Ubuntu) @@ -185,7 +185,7 @@ jobs: run: | pip install -r requirements/dev.txt pip install -e . - tutor config save + tutor config save --set ASPECTS_XAPI_SOURCE=ralph - name: Run Kubernetes tools uses: stefanprodan/kube-tools@v1 with: diff --git a/tutoraspects/patches/xapi-db-load-config-yaml b/tutoraspects/patches/xapi-db-load-config-yaml index 7c347801d..bccd12e72 100644 --- a/tutoraspects/patches/xapi-db-load-config-yaml +++ b/tutoraspects/patches/xapi-db-load-config-yaml @@ -1,6 +1,6 @@ # Ralph / ClickHouse backend configuration # ######################################## -backend: ralph_clickhouse +backend: vector db_host: {{ CLICKHOUSE_HOST }} db_port: {{ CLICKHOUSE_INTERNAL_HTTP_PORT }} db_name: {{ ASPECTS_XAPI_DATABASE }} @@ -11,9 +11,10 @@ lrs_username: "{{RALPH_LMS_USERNAME}}" lrs_password: "{{RALPH_LMS_PASSWORD}}" # Run options -log_dir: -num_batches: 3 -batch_size: 100 +log_dir: logs +num_xapi_batches: 10 +batch_size: 100000 +num_workers: 4 # Overall start and end date for the entire run start_date: 2014-01-01 diff --git a/tutoraspects/templates/aspects/apps/vector/local.toml b/tutoraspects/templates/aspects/apps/vector/local.toml index b0131b1f3..00e8ecbd8 100644 --- a/tutoraspects/templates/aspects/apps/vector/local.toml +++ b/tutoraspects/templates/aspects/apps/vector/local.toml @@ -8,6 +8,6 @@ type = "docker_logs" [transforms.openedx_containers] type = "filter" inputs = ["docker_logs"] -condition = 'includes(["lms", "cms", "lms-worker", "cms-worker", "lms-job", "cms-job"], .label."com.docker.compose.service")' +condition = 'includes(["lms", "cms", "lms-worker", "cms-worker", "lms-job", "cms-job", "aspects-job"], .label."com.docker.compose.service")' {% include "aspects/apps/vector/partials/common-post.toml" %} diff --git a/tutoraspects/templates/aspects/build/aspects/requirements.txt b/tutoraspects/templates/aspects/build/aspects/requirements.txt index bbbf5e7c1..9d2e003ac 100644 --- a/tutoraspects/templates/aspects/build/aspects/requirements.txt +++ b/tutoraspects/templates/aspects/build/aspects/requirements.txt @@ -1,5 +1,5 @@ # alembic packages alembic==1.14.1 clickhouse-sqlalchemy==0.3.2 -git+https://github.com/openedx/xapi-db-load@1.5.0 +git+https://github.com/Ian2012/xapi-db-load@tmp-v2 pyyaml From 1a93e3804ef60dcb5ccae9bb50c52fdf3d45f5b3 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Wed, 18 Mar 2026 08:44:23 -0500 Subject: [PATCH 11/13] chore: disable batching by default --- tutoraspects/plugin.py | 2 +- tutoraspects/templates/aspects/apps/vector/k8s.toml | 2 +- tutoraspects/templates/aspects/apps/vector/local.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tutoraspects/plugin.py b/tutoraspects/plugin.py index cd058f668..5ccc050c6 100644 --- a/tutoraspects/plugin.py +++ b/tutoraspects/plugin.py @@ -61,7 +61,7 @@ # reasons. # Turn on event batching by default, performance is severely impacted by # turning this off. - ("EVENT_ROUTING_BACKEND_BATCHING_ENABLED", True), + ("EVENT_ROUTING_BACKEND_BATCHING_ENABLED", False), # Events are sent when they hit either the batch size or the batch interval # time limit (defaults here are 100 events or 5 seconds). # https://event-routing-backends.readthedocs.io/en/latest/getting_started.html#batching-configuration diff --git a/tutoraspects/templates/aspects/apps/vector/k8s.toml b/tutoraspects/templates/aspects/apps/vector/k8s.toml index 025aa357a..8af523232 100644 --- a/tutoraspects/templates/aspects/apps/vector/k8s.toml +++ b/tutoraspects/templates/aspects/apps/vector/k8s.toml @@ -8,6 +8,6 @@ extra_namespace_label_selector = "kubernetes.io/metadata.name={{ K8S_NAMESPACE } [transforms.openedx_containers] type = "filter" inputs = ["kubernetes_logs"] -condition = '.kubernetes.pod_namespace == "{{ K8S_NAMESPACE }}" && includes(["lms", "cms", "lms-job", "cms-job", "lms-worker", "cms-worker"], .kubernetes.container_name)' +condition = '.kubernetes.pod_namespace == "{{ K8S_NAMESPACE }}" && includes(["lms", "cms", "lms-worker", "cms-worker", "lms-job", "cms-job", "aspects-job", "aspects-consumer"], .kubernetes.container_name)' {% include "aspects/apps/vector/partials/common-post.toml" %} diff --git a/tutoraspects/templates/aspects/apps/vector/local.toml b/tutoraspects/templates/aspects/apps/vector/local.toml index 00e8ecbd8..ccbbdc816 100644 --- a/tutoraspects/templates/aspects/apps/vector/local.toml +++ b/tutoraspects/templates/aspects/apps/vector/local.toml @@ -8,6 +8,6 @@ type = "docker_logs" [transforms.openedx_containers] type = "filter" inputs = ["docker_logs"] -condition = 'includes(["lms", "cms", "lms-worker", "cms-worker", "lms-job", "cms-job", "aspects-job"], .label."com.docker.compose.service")' +condition = 'includes(["lms", "cms", "lms-worker", "cms-worker", "lms-job", "cms-job", "aspects-job", "aspects-consumer"], .label."com.docker.compose.service")' {% include "aspects/apps/vector/partials/common-post.toml" %} From 6995b2b40f80143240db6ca13d44469707d3e609 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Thu, 19 Mar 2026 10:44:45 -0500 Subject: [PATCH 12/13] chore: restore defaults for load test --- .github/workflows/integration-test.yml | 291 ------------------ README.rst | 26 ++ tutoraspects/patches/xapi-db-load-config-yaml | 4 +- tutoraspects/plugin.py | 2 +- .../aspects/build/aspects/requirements.txt | 2 +- 5 files changed, 30 insertions(+), 295 deletions(-) diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index ff34b830b..b948dcf9f 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -305,298 +305,7 @@ jobs: print_pod_logs "$pod_name" done done - local-vector: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v5 - - name: setup python - uses: actions/setup-python@v6 - with: - python-version: 3.12 - - name: Install python reqs - run: pip install -r requirements/dev.txt - - name: Install aspects - run: pip install -e . - - name: Tutor config save - run: tutor config save --set ASPECTS_XAPI_SOURCE=vector - - name: Confirm DB - # This should be "openedx" - run: tutor config printvalue ASPECTS_XAPI_DATABASE - - name: Setup Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: false - - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow - android: true - dotnet: true - haskell: true - large-packages: false - swap-storage: true - - name: Tutor build openedx - run: tutor images build openedx aspects aspects-superset - - name: Tutor start - run: tutor local start -d - - name: Tutor init - run: tutor local do init - - name: Test alembic - run: | - tutor local do alembic -c "downgrade base" - tutor local do alembic -c "upgrade head" - - name: Init clickhouse - run: tutor local do init-clickhouse - # This should: - # 1. Run all models, since alembic test removed our state - # 2. Find no models on the first run, state should now be up to date now - # 3. Force run all models - # 4. Successfully run tests - - name: Test dbt - run: | - tutor local do dbt -c "run" - tutor local do dbt -c "run" - tutor local do dbt --only_changed False -c "run" - tutor local do dbt --only_changed False -c "test" - - name: Load test - run: tutor local do load-xapi-test-data - - name: Import demo course - run: tutor local do importdemocourse - - name: Test commands - run: | - tutor local do dump-data-to-clickhouse --options "--object course_overviews --force" - make extract_translations - tutor local do import-assets - tutor local do collect-dbt-lineage - tutor local run lms python manage.py lms transform_tracking_logs --source_provider LOCAL --source_config '{"key": "/openedx/data/", "prefix": "tracking.log", "container": "logs"}' --destination_provider LRS --transformer_type xapi - - name: Performance metrics - run: tutor local do performance-metrics --fail_on_error - - name: Tutor stop - run: tutor local stop - - dev-vector: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v5 - - name: setup python - uses: actions/setup-python@v6 - with: - python-version: 3.12 - - name: Install python reqs - run: pip install -r requirements/dev.txt - - name: Install aspects - run: pip install -e . - - name: Tutor config save - run: tutor config save --set ASPECTS_XAPI_SOURCE=vector - - name: Confirm DB - # This should be "openedx" - run: tutor config printvalue ASPECTS_XAPI_DATABASE - - name: Setup Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: false - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow - android: true - dotnet: true - haskell: true - large-packages: false - swap-storage: true - - name: Tutor build openedx - run: tutor images build openedx-dev aspects aspects-superset - - name: Tutor start - run: tutor dev start -d - - name: Tutor init - run: tutor dev do init - - name: Test alembic - run: | - tutor dev do alembic -c "downgrade base" - tutor dev do alembic -c "upgrade head" - - name: Init clickhouse - run: tutor dev do init-clickhouse - # This should: - # 1. Run all models, since alembic test removed our state - # 2. Find no models on the first run, state should now be up to date now - # 3. Force run all models - # 4. Successfully run tests - - name: Test dbt - run: | - tutor dev do dbt -c "run" - tutor dev do dbt -c "run" - tutor dev do dbt --only_changed False -c "run" - tutor dev do dbt --only_changed False -c "test" - - name: Load test - run: tutor dev do load-xapi-test-data - - name: Import demo course - run: tutor dev do importdemocourse - - name: Test commands - run: | - tutor dev do dump-data-to-clickhouse --options "--object course_overviews --force" - make extract_translations - tutor dev do import-assets - - name: Performance metrics - run: tutor dev do performance-metrics --fail_on_error - - name: Tutor stop - run: tutor dev stop - - k8s-vector: - runs-on: ubuntu-latest - steps: - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: true - - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow - android: true - dotnet: true - haskell: true - large-packages: false - swap-storage: true - - name: Checkout - uses: actions/checkout@v5 - - name: setup python - uses: actions/setup-python@v6 - with: - python-version: 3.12 - - name: Generate env - run: | - pip install -r requirements/dev.txt - pip install -e . - tutor config save --set ASPECTS_XAPI_SOURCE=vector - tutor config printvalue ASPECTS_XAPI_DATABASE | grep openedx - - name: Run Kubernetes tools - uses: stefanprodan/kube-tools@v1 - with: - kubectl: 1.23.0 - kustomize: 4.4.1 - helmv3: 3.7.2 - kubeconform: 0.4.13 - command: | - kustomize build $TUTOR_ROOT/env | kubeconform -strict -ignore-missing-schemas -kubernetes-version 1.22.0 - - name: Setup Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Tutor build openedx - run: tutor images build openedx aspects aspects-superset - - name: Create k8s Kind Cluster - uses: helm/kind-action@v1.12.0 - - name: Mount docker image - run: | - kind get clusters - kind load docker-image $(tutor images printtag openedx) --name chart-testing - kind load docker-image $(tutor images printtag aspects) --name chart-testing - kind load docker-image $(tutor images printtag aspects-superset) --name chart-testing - - name: Setup namespace - run: | - kubectl config set-context --current --namespace=openedx - kubectl get pods - - name: Init k8s environment - run: | - tutor k8s start - tutor k8s do init - - name: Test alembic - run: | - tutor k8s do alembic -c "downgrade base" - tutor k8s do alembic -c "upgrade head" - - name: Init clickhouse - run: tutor k8s do init-clickhouse - # This should: - # 1. Run all models, since alembic test removed our state - # 2. Find no models on the first run, state should now be up to date now - # 3. Force run all models - # 4. Successfully run tests - - name: Test dbt - run: | - tutor k8s do dbt -c "run" - tutor k8s do dbt -c "run" - tutor k8s do dbt --only_changed False -c "run" - tutor k8s do dbt --only_changed False -c "test" - - name: Load test - run: tutor k8s do load-xapi-test-data - - name: Import demo course - run: tutor k8s do importdemocourse - - name: Test commands - run: | - tutor k8s do dump-data-to-clickhouse --options "--object course_overviews --force" - make extract_translations - tutor k8s do import-assets - - name: Performance metrics - run: tutor k8s do performance-metrics --fail_on_error - - name: Check failure logs - if: failure() - run: | - # Use an array to store pod names - pod_list=($(kubectl get pods -o jsonpath='{.items[*].metadata.name}')) - - failed_jobs="" - - # Loop through each pod and check for failure - for pod_name in "${pod_list[@]}"; do - # Get the pod phase/status - pod_phase=$(kubectl get pod "$pod_name" -o jsonpath='{.status.phase}') - - if [[ "$pod_phase" != "Running" && "$pod_phase" != "Succeeded" ]]; then - # Job is not in Running or Succeeded state, consider it as failed - failed_jobs="$failed_jobs $pod_name" - # Print the logs for the failing pod - echo "Failure logs for pod: $pod_name" - kubectl logs "$pod_name" - fi - done - - # Check if any jobs failed - if [ -n "$failed_jobs" ]; then - echo "The following jobs failed: $failed_jobs" - else - echo "All jobs succeeded." - fi - - name: Check service logs - if: failure() - run: | - # Use an array to store service names - service_list=($(kubectl get services -o jsonpath='{.items[*].metadata.name}')) - - # Loop through each service and print logs - for service_name in "${service_list[@]}"; do - echo "Logs for service: $service_name" - kubectl logs "svc/$service_name" - echo "------------------------" - done - - name: Check jobs logs - if: "!cancelled()" - run: | - job_list=($(kubectl get jobs | grep job | awk '{print $1}')) - - # Function to print logs for a pod - print_pod_logs() { - local pod_name="$1" - echo "Logs for pod: $pod_name" - kubectl logs "$pod_name" - echo "------------------------" - } - - # Loop through each job-related pod and print logs - for job_name in "${job_list[@]}"; do - # Get the pods related to the current job - job_pod_list=($(kubectl get pods | grep -E "${job_name}-.*" | awk '{print $1}')) - - for pod_name in "${job_pod_list[@]}"; do - print_pod_logs "$pod_name" - done - done local-vector: runs-on: ubuntu-latest steps: diff --git a/README.rst b/README.rst index 94a08b5c6..529c92561 100644 --- a/README.rst +++ b/README.rst @@ -28,6 +28,32 @@ Compatibility Current versions of the plugin are compatible with Tutor 19.0.0 and later and support Open edX releases from Sumac onward. Older releases can support Open edX versions as far back as Nutmeg. Details are available in the `Aspects Documentation `_. +Breaking Changes +================ + +The default data pipeline has changed from Ralph to Vector improving performance and simplyfing the architecture. + +Key changes: + +- Vector is now the default for xAPI event ingestion +- The ``ASPECTS_VECTOR_RAW_XAPI_TABLE`` setting has been replaced with ``ASPECTS_RAW_XAPI_TABLE`` +- The default database has changed from ``xapi`` (Ralph) to ``openedx`` (Vector) + +To keep using Ralph as your data pipeline: + +.. code-block:: bash + + tutor config save --set ASPECTS_XAPI_SOURCE=ralph + tutor config save --set RUN_RALPH=True + tutor config save --set RUN_VECTOR=False + +This will configure Aspects to use Ralph with the ``xapi`` database, preserving your existing data. + +If you have customized ``ASPECTS_VECTOR_RAW_XAPI_TABLE`` in your configuration, update it to use ``ASPECTS_RAW_XAPI_TABLE`` instead. + +For new installations or users switching to Vector, your data will be stored in the ``openedx`` database. You can migrate existing data from the ``xapi`` database to ``openedx`` if needed. + + Installation ============ diff --git a/tutoraspects/patches/xapi-db-load-config-yaml b/tutoraspects/patches/xapi-db-load-config-yaml index bccd12e72..fb1b5ab8f 100644 --- a/tutoraspects/patches/xapi-db-load-config-yaml +++ b/tutoraspects/patches/xapi-db-load-config-yaml @@ -12,8 +12,8 @@ lrs_password: "{{RALPH_LMS_PASSWORD}}" # Run options log_dir: logs -num_xapi_batches: 10 -batch_size: 100000 +num_xapi_batches: 3 +batch_size: 100 num_workers: 4 # Overall start and end date for the entire run diff --git a/tutoraspects/plugin.py b/tutoraspects/plugin.py index 448356817..0aa4d82d5 100644 --- a/tutoraspects/plugin.py +++ b/tutoraspects/plugin.py @@ -389,7 +389,7 @@ # For now we are pulling this from github, which should allow maximum # flexibility for forking, running branches, specific versions, etc. ("DBT_REPOSITORY", "https://github.com/openedx/aspects-dbt"), - ("DBT_BRANCH", "v6.1.1"), + ("DBT_BRANCH", "v7.0.0"), ("DBT_SSH_KEY", ""), ("DBT_STATE_DIR", "/app/aspects-dbt/state"), ("DBT_PROFILES_DIR", "/app/aspects/dbt/"), diff --git a/tutoraspects/templates/aspects/build/aspects/requirements.txt b/tutoraspects/templates/aspects/build/aspects/requirements.txt index 9d2e003ac..b018e615d 100644 --- a/tutoraspects/templates/aspects/build/aspects/requirements.txt +++ b/tutoraspects/templates/aspects/build/aspects/requirements.txt @@ -1,5 +1,5 @@ # alembic packages alembic==1.14.1 clickhouse-sqlalchemy==0.3.2 -git+https://github.com/Ian2012/xapi-db-load@tmp-v2 +git+https://github.com/openedx/xapi-db-load@3.1.0 pyyaml From 4292093c3a3009647285116308e4f1d8190258bf Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Tue, 19 May 2026 07:30:31 -0500 Subject: [PATCH 13/13] chore: cleanup bootstrap.sh script --- .../aspects/apps/aspects/scripts/bootstrap.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tutoraspects/templates/aspects/apps/aspects/scripts/bootstrap.sh b/tutoraspects/templates/aspects/apps/aspects/scripts/bootstrap.sh index be4deb0b7..12275e506 100644 --- a/tutoraspects/templates/aspects/apps/aspects/scripts/bootstrap.sh +++ b/tutoraspects/templates/aspects/apps/aspects/scripts/bootstrap.sh @@ -17,15 +17,16 @@ git config --global --add safe.directory '*' MOUNTED=false if [ -d "aspects-dbt/.git" ]; then - current_branch=$(git -C aspects-dbt/ describe --tags --exact-match 2>/dev/null || \ - git -C aspects-dbt/ branch --show-current 2>/dev/null || \ - git -C aspects-dbt/ rev-parse --short HEAD) - current_repo=$(git -C aspects-dbt/ config --get remote.origin.url) if [ ! -f "aspects-dbt/.git/shallow" ]; then MOUNTED=true fi fi +current_branch=$(git -C aspects-dbt/ describe --tags --exact-match 2>/dev/null || \ + git -C aspects-dbt/ branch --show-current 2>/dev/null || \ + git -C aspects-dbt/ rev-parse --short HEAD) +current_repo=$(git -C aspects-dbt/ config --get remote.origin.url) + if [ "$MOUNTED" = true ]; then echo "Using mounted repo (branch: ${current_branch})" else @@ -56,4 +57,4 @@ fi echo "Installing dbt dependencies" dbt deps -mkdir -p "${DBT_STATE}" \ No newline at end of file +mkdir -p "${DBT_STATE}"