Release Canary #753
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Release Canary | |
| on: | |
| workflow_dispatch: | |
| workflow_run: | |
| workflows: ["Release Dev"] | |
| types: [completed] | |
| permissions: | |
| actions: read | |
| contents: read | |
| defaults: | |
| run: | |
| shell: bash | |
| jobs: | |
| macos: | |
| name: macOS Homebrew | |
| if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }} | |
| runs-on: macos-latest-xlarge | |
| timeout-minutes: 20 | |
| steps: | |
| - name: Ensure VM driver | |
| run: | | |
| launchctl setenv OPENSHELL_DRIVERS vm | |
| - name: Install and check status | |
| run: | | |
| curl -LsSf https://raw.githubusercontent.com/NVIDIA/OpenShell/${{ github.event.workflow_run.head_sha || github.sha }}/install.sh | sh | |
| openshell status | |
| ubuntu: | |
| name: Ubuntu Docker | |
| if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }} | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 20 | |
| steps: | |
| - name: Ensure Docker | |
| run: | | |
| if ! command -v docker >/dev/null 2>&1; then | |
| sudo apt-get update | |
| sudo apt-get install -y docker.io | |
| fi | |
| sudo systemctl start docker || sudo service docker start | |
| mkdir -p "${HOME}/.config/openshell" | |
| printf 'OPENSHELL_DRIVERS=docker\n' > "${HOME}/.config/openshell/gateway.env" | |
| docker info | |
| - name: Install and check status | |
| run: | | |
| curl -LsSf https://raw.githubusercontent.com/NVIDIA/OpenShell/${{ github.event.workflow_run.head_sha || github.sha }}/install.sh | sh | |
| openshell status | |
| fedora: | |
| name: Fedora RPM | |
| if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }} | |
| runs-on: linux-amd64-cpu8 | |
| timeout-minutes: 20 | |
| env: | |
| FEDORA_CANARY_CONTAINER: openshell-fedora-canary-${{ github.run_id }}-${{ github.run_attempt }} | |
| steps: | |
| - name: Start Fedora systemd container and root user manager | |
| run: | | |
| set -euo pipefail | |
| docker run --detach \ | |
| --name "${FEDORA_CANARY_CONTAINER}" \ | |
| --privileged \ | |
| --cgroupns=host \ | |
| --tmpfs /run \ | |
| --tmpfs /tmp \ | |
| --volume /sys/fs/cgroup:/sys/fs/cgroup:rw \ | |
| fedora:latest \ | |
| bash -lc 'dnf install -y curl dbus-daemon podman systemd && exec /usr/sbin/init' | |
| for _ in $(seq 1 120); do | |
| if docker exec "${FEDORA_CANARY_CONTAINER}" systemctl list-units --no-pager >/dev/null 2>&1; then | |
| break | |
| fi | |
| if [ "$(docker inspect -f '{{.State.Running}}' "${FEDORA_CANARY_CONTAINER}")" != "true" ]; then | |
| echo "::error::Fedora systemd container exited before systemd became reachable" | |
| docker logs "${FEDORA_CANARY_CONTAINER}" >&2 || true | |
| exit 1 | |
| fi | |
| sleep 1 | |
| done | |
| if ! docker exec "${FEDORA_CANARY_CONTAINER}" systemctl list-units --no-pager >/dev/null 2>&1; then | |
| echo "::error::Fedora systemd container did not become reachable within 120s" | |
| docker logs "${FEDORA_CANARY_CONTAINER}" >&2 || true | |
| exit 1 | |
| fi | |
| docker exec --interactive "${FEDORA_CANARY_CONTAINER}" env \ | |
| HOME=/root \ | |
| XDG_RUNTIME_DIR=/run/user/0 \ | |
| DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/0/bus \ | |
| bash -s <<'EOF' | |
| set -euo pipefail | |
| # install.sh manages the RPM gateway as a systemd user unit. This | |
| # container is booted with systemd as PID 1, but it still has no | |
| # login session. Start root's user manager explicitly so the | |
| # installer can test service restart and gateway registration | |
| # instead of its "restart later" fallback. | |
| mkdir -p "${XDG_RUNTIME_DIR}" | |
| chmod 700 "${XDG_RUNTIME_DIR}" | |
| systemctl start user-runtime-dir@0.service || true | |
| systemctl start user@0.service | |
| for _ in $(seq 1 30); do | |
| if systemctl --user daemon-reload; then | |
| break | |
| fi | |
| sleep 1 | |
| done | |
| if ! systemctl --user daemon-reload; then | |
| systemctl status user@0.service --no-pager >&2 || true | |
| journalctl -u user@0.service --no-pager -n 80 >&2 || true | |
| systemctl --user status --no-pager >&2 || true | |
| exit 1 | |
| fi | |
| EOF | |
| - name: Install and check status | |
| run: | | |
| set -euo pipefail | |
| docker exec --interactive "${FEDORA_CANARY_CONTAINER}" env \ | |
| HOME=/root \ | |
| XDG_RUNTIME_DIR=/run/user/0 \ | |
| DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/0/bus \ | |
| INSTALL_SH_URL="https://raw.githubusercontent.com/NVIDIA/OpenShell/${{ github.event.workflow_run.head_sha || github.sha }}/install.sh" \ | |
| bash -s <<'EOF' | |
| set -euo pipefail | |
| mkdir -p "${HOME}/.config/openshell" | |
| printf 'OPENSHELL_DRIVERS=podman\n' > "${HOME}/.config/openshell/gateway.env" | |
| podman info | |
| curl -LsSf "${INSTALL_SH_URL}" | sh | |
| openshell status | |
| EOF | |
| - name: Stop Fedora systemd container | |
| if: always() | |
| run: | | |
| docker rm -f "${FEDORA_CANARY_CONTAINER}" >/dev/null 2>&1 || true | |
| ubuntu-snap: | |
| name: Ubuntu Snap | |
| if: ${{ github.event.workflow_run.conclusion == 'success' }} | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 20 | |
| steps: | |
| - name: Install snapd | |
| run: | | |
| set -euo pipefail | |
| sudo apt-get update | |
| sudo apt-get install -y snapd | |
| sudo systemctl enable --now snapd.socket | |
| sudo systemctl start snapd | |
| sudo snap wait system seed.loaded | |
| - name: Install Docker snap | |
| run: | | |
| set -euo pipefail | |
| sudo snap install docker | |
| - name: Download snap from release-dev artifacts | |
| uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 | |
| with: | |
| github-token: ${{ github.token }} | |
| run-id: ${{ github.event.workflow_run.id }} | |
| pattern: snap-linux-amd64 | |
| path: release/ | |
| merge-multiple: true | |
| - name: Install snap (dangerous — from release, not store) | |
| run: | | |
| set -euo pipefail | |
| sudo snap install ./release/*.snap --dangerous | |
| - name: Connect interfaces | |
| run: | | |
| set -euo pipefail | |
| sudo snap connect openshell:docker docker:docker-daemon | |
| sudo snap connect openshell:log-observe | |
| sudo snap connect openshell:system-observe | |
| sudo snap connect openshell:ssh-keys | |
| - name: Register snap gateway and check status | |
| run: | | |
| set -euo pipefail | |
| openshell --version | |
| sudo snap services openshell | |
| openshell gateway add http://127.0.0.1:17670 --local --name snap-docker | |
| openshell gateway select snap-docker | |
| openshell status | |
| kubernetes: | |
| name: Kubernetes Helm (kind) | |
| if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }} | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 20 | |
| env: | |
| KIND_CLUSTER_NAME: release-canary-${{ github.run_id }} | |
| RELEASE_NAME: openshell | |
| RELEASE_NAMESPACE: openshell | |
| KIND_GATEWAY_NAME: kind | |
| steps: | |
| - name: Install Helm | |
| uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 # v5.0.0 | |
| - name: Create kind cluster | |
| uses: helm/kind-action@ef37e7f390d99f746eb8b610417061a60e82a6cc # v1.14.0 | |
| with: | |
| cluster_name: ${{ env.KIND_CLUSTER_NAME }} | |
| wait: 120s | |
| - name: Install OpenShell Helm chart from GHCR OCI | |
| run: | | |
| set -euo pipefail | |
| helm install "$RELEASE_NAME" oci://ghcr.io/nvidia/openshell/helm-chart \ | |
| --version 0.0.0-dev \ | |
| --namespace "$RELEASE_NAMESPACE" --create-namespace \ | |
| --set server.disableTls=true \ | |
| --wait --timeout 5m | |
| - name: Verify gateway pod is Ready | |
| run: | | |
| set -euo pipefail | |
| kubectl wait --namespace "$RELEASE_NAMESPACE" \ | |
| --for=condition=Ready pod \ | |
| --selector="app.kubernetes.io/name=openshell,app.kubernetes.io/instance=${RELEASE_NAME}" \ | |
| --timeout=300s | |
| - name: Port-forward gateway service | |
| run: | | |
| set -euo pipefail | |
| nohup kubectl port-forward --namespace "$RELEASE_NAMESPACE" \ | |
| "svc/${RELEASE_NAME}" 8080:8080 \ | |
| > port-forward.log 2>&1 & | |
| echo $! > port-forward.pid | |
| for _ in $(seq 1 30); do | |
| if (echo > /dev/tcp/127.0.0.1/8080) >/dev/null 2>&1; then | |
| echo "port-forward is reachable" | |
| exit 0 | |
| fi | |
| sleep 1 | |
| done | |
| echo "port-forward did not become reachable" >&2 | |
| cat port-forward.log >&2 | |
| exit 1 | |
| - name: Install OpenShell CLI | |
| run: | | |
| set -euo pipefail | |
| mkdir -p "${HOME}/.config/openshell" | |
| printf 'OPENSHELL_DRIVERS=docker\n' > "${HOME}/.config/openshell/gateway.env" | |
| curl -LsSf https://raw.githubusercontent.com/NVIDIA/OpenShell/${{ github.event.workflow_run.head_sha || github.sha }}/install.sh | sh | |
| - name: Register kind gateway and check status | |
| run: | | |
| set -euo pipefail | |
| openshell gateway add http://127.0.0.1:8080 --local --name "$KIND_GATEWAY_NAME" | |
| openshell status | |
| - name: Diagnostics on failure | |
| if: failure() | |
| run: | | |
| set +e | |
| echo "--- helm status ---" | |
| helm status "$RELEASE_NAME" --namespace "$RELEASE_NAMESPACE" | |
| echo "--- helm get manifest ---" | |
| helm get manifest "$RELEASE_NAME" --namespace "$RELEASE_NAMESPACE" | |
| echo "--- get all ---" | |
| kubectl get all --namespace "$RELEASE_NAMESPACE" | |
| echo "--- describe pods ---" | |
| kubectl describe pods --namespace "$RELEASE_NAMESPACE" | |
| echo "--- pod logs ---" | |
| kubectl logs --namespace "$RELEASE_NAMESPACE" \ | |
| --selector="app.kubernetes.io/name=openshell,app.kubernetes.io/instance=${RELEASE_NAME}" \ | |
| --tail=200 --all-containers --prefix | |
| echo "--- port-forward log ---" | |
| cat port-forward.log 2>/dev/null | |
| echo "--- openshell gateway list ---" | |
| openshell gateway list 2>/dev/null | |
| echo "--- openshell version ---" | |
| openshell --version 2>/dev/null |