diff --git a/evals/claude-code/eval.yaml b/evals/claude-code/eval.yaml index e743603bb..3fff7de1c 100644 --- a/evals/claude-code/eval.yaml +++ b/evals/claude-code/eval.yaml @@ -53,6 +53,46 @@ config: toolPattern: ".*" minToolCalls: 1 maxToolCalls: 20 + - glob: ../tasks/kubevirt/*/*.yaml + labelSelector: + expected-tool: vm_create + assertions: + toolsUsed: + - server: kubernetes + tool: vm_create + - glob: ../tasks/kubevirt/*/*.yaml + labelSelector: + expected-tool: vm_clone + assertions: + toolsUsed: + - server: kubernetes + tool: vm_clone + - glob: ../tasks/kubevirt/*/*.yaml + labelSelector: + expected-tool: vm_lifecycle + assertions: + toolsUsed: + - server: kubernetes + tool: vm_lifecycle + - glob: ../tasks/kubevirt/*/*.yaml + labelSelector: + expected-tool: resources_delete + assertions: + toolsUsed: + - server: kubernetes + tool: resources_delete + - glob: ../tasks/kubevirt/*/*.yaml + labelSelector: + expected-tool: resources_create_or_update + assertions: + toolsUsed: + - server: kubernetes + tool: resources_create_or_update + - path: ../tasks/kubevirt/troubleshoot-vm/task.yaml + assertions: + promptsUsed: + - server: kubernetes + prompt: vm-troubleshoot # Kiali tasks - glob: ../tasks/*/*/*.yaml labelSelector: diff --git a/evals/openai-agent/eval.yaml b/evals/openai-agent/eval.yaml index 13f5b6dd0..a1208f57d 100644 --- a/evals/openai-agent/eval.yaml +++ b/evals/openai-agent/eval.yaml @@ -53,6 +53,46 @@ config: toolPattern: ".*" minToolCalls: 1 maxToolCalls: 20 + - glob: ../tasks/kubevirt/*/*.yaml + labelSelector: + expected-tool: vm_create + assertions: + toolsUsed: + - server: kubernetes + tool: vm_create + - glob: ../tasks/kubevirt/*/*.yaml + labelSelector: + expected-tool: vm_clone + assertions: + toolsUsed: + - server: kubernetes + tool: vm_clone + - glob: ../tasks/kubevirt/*/*.yaml + labelSelector: + expected-tool: vm_lifecycle + assertions: + toolsUsed: + - server: kubernetes + tool: vm_lifecycle + - glob: ../tasks/kubevirt/*/*.yaml + labelSelector: + expected-tool: resources_delete + assertions: + toolsUsed: + - server: kubernetes + tool: resources_delete + - glob: ../tasks/kubevirt/*/*.yaml + labelSelector: + expected-tool: resources_create_or_update + assertions: + toolsUsed: + - server: kubernetes + tool: resources_create_or_update + - path: ../tasks/kubevirt/troubleshoot-vm/task.yaml + assertions: + promptsUsed: + - server: kubernetes + prompt: vm-troubleshoot # Kiali tasks - glob: ../tasks/*/*/*.yaml labelSelector: diff --git a/evals/tasks/kubevirt/claude-code/eval.yaml b/evals/tasks/kubevirt/claude-code/eval.yaml deleted file mode 100644 index e35e53f6a..000000000 --- a/evals/tasks/kubevirt/claude-code/eval.yaml +++ /dev/null @@ -1,18 +0,0 @@ -kind: Eval -metadata: - name: "kubevirt-basic-operations" -config: - agent: - type: "builtin.claude-code" - mcpConfigFile: ../../../mcp-config.yaml - extensions: - kubernetes: - package: https://github.com/mcpchecker/kubernetes-extension@v0.0.4 - taskSets: - - glob: ../*/task.yaml - assertions: - toolsUsed: - - server: kubernetes - toolPattern: ".*" - minToolCalls: 1 - maxToolCalls: 20 diff --git a/evals/tasks/kubevirt/clone-vm/task.yaml b/evals/tasks/kubevirt/clone-vm/task.yaml index ec61ede16..c4424e043 100644 --- a/evals/tasks/kubevirt/clone-vm/task.yaml +++ b/evals/tasks/kubevirt/clone-vm/task.yaml @@ -4,6 +4,7 @@ metadata: labels: suite: kubevirt requires: kubevirt + expected-tool: vm_clone name: "clone-vm" difficulty: medium spec: @@ -15,19 +16,19 @@ spec: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-clone ignoreNotFound: true - k8s.create: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-clone - k8s.create: apiVersion: kubevirt.io/v1 kind: VirtualMachine metadata: name: source-vm - namespace: vm-test + namespace: kvt-clone labels: app: source spec: @@ -48,7 +49,7 @@ spec: inline: |- #!/usr/bin/env bash set -e - NS="vm-test" + NS="kvt-clone" SOURCE_VM="source-vm" TARGET_VM="cloned-vm" @@ -83,27 +84,27 @@ spec: - script: inline: |- #!/usr/bin/env bash - kubectl delete virtualmachineclone --all -n vm-test --ignore-not-found=true 2>/dev/null || true + kubectl delete virtualmachineclone --all -n kvt-clone --ignore-not-found=true 2>/dev/null || true - k8s.delete: apiVersion: kubevirt.io/v1 kind: VirtualMachine metadata: name: cloned-vm - namespace: vm-test + namespace: kvt-clone ignoreNotFound: true - k8s.delete: apiVersion: kubevirt.io/v1 kind: VirtualMachine metadata: name: source-vm - namespace: vm-test + namespace: kvt-clone ignoreNotFound: true - k8s.delete: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-clone ignoreNotFound: true prompt: inline: | - Clone the virtual machine named source-vm in the vm-test namespace to a new virtual machine named cloned-vm. + Clone the virtual machine named source-vm in the kvt-clone namespace to a new virtual machine named cloned-vm. diff --git a/evals/tasks/kubevirt/create-vm-basic/task.yaml b/evals/tasks/kubevirt/create-vm-basic/task.yaml index 64a552346..b6ea4602b 100644 --- a/evals/tasks/kubevirt/create-vm-basic/task.yaml +++ b/evals/tasks/kubevirt/create-vm-basic/task.yaml @@ -4,6 +4,7 @@ metadata: labels: suite: kubevirt requires: kubevirt + expected-tool: vm_create name: "create-basic-vm" difficulty: easy spec: @@ -15,19 +16,19 @@ spec: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-create-basic ignoreNotFound: true - k8s.create: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-create-basic verify: - script: inline: |- #!/usr/bin/env bash source ../helpers/verify-vm.sh - NS="vm-test" + NS="kvt-create-basic" verify_vm_exists "test-vm" "$NS" || exit 1 verify_container_disk "test-vm" "$NS" "fedora" || exit 1 @@ -42,13 +43,13 @@ spec: kind: VirtualMachine metadata: name: test-vm - namespace: vm-test + namespace: kvt-create-basic ignoreNotFound: true - k8s.delete: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-create-basic ignoreNotFound: true prompt: - inline: Create a Fedora virtual machine named test-vm in the vm-test namespace. + inline: Create a Fedora virtual machine named test-vm in the kvt-create-basic namespace. diff --git a/evals/tasks/kubevirt/create-vm-ubuntu/task.yaml b/evals/tasks/kubevirt/create-vm-ubuntu/task.yaml index f9d86f255..f939d85a6 100644 --- a/evals/tasks/kubevirt/create-vm-ubuntu/task.yaml +++ b/evals/tasks/kubevirt/create-vm-ubuntu/task.yaml @@ -4,6 +4,7 @@ metadata: labels: suite: kubevirt requires: kubevirt + expected-tool: vm_create name: "create-ubuntu-vm" difficulty: easy spec: @@ -15,19 +16,19 @@ spec: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-create-ubuntu ignoreNotFound: true - k8s.create: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-create-ubuntu verify: - script: inline: |- #!/usr/bin/env bash source ../helpers/verify-vm.sh - NS="vm-test" + NS="kvt-create-ubuntu" verify_vm_exists "ubuntu-vm" "$NS" || exit 1 verify_container_disk "ubuntu-vm" "$NS" "ubuntu" || exit 1 @@ -42,13 +43,13 @@ spec: kind: VirtualMachine metadata: name: ubuntu-vm - namespace: vm-test + namespace: kvt-create-ubuntu ignoreNotFound: true - k8s.delete: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-create-ubuntu ignoreNotFound: true prompt: - inline: Create an Ubuntu virtual machine named ubuntu-vm in the vm-test namespace. + inline: Create an Ubuntu virtual machine named ubuntu-vm in the kvt-create-ubuntu namespace. diff --git a/evals/tasks/kubevirt/create-vm-with-instancetype/task.yaml b/evals/tasks/kubevirt/create-vm-with-instancetype/task.yaml index ded879c20..26afd6154 100644 --- a/evals/tasks/kubevirt/create-vm-with-instancetype/task.yaml +++ b/evals/tasks/kubevirt/create-vm-with-instancetype/task.yaml @@ -4,6 +4,7 @@ metadata: labels: suite: kubevirt requires: kubevirt + expected-tool: vm_create name: "create-vm-with-instancetype" difficulty: easy spec: @@ -15,19 +16,19 @@ spec: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-create-instancetype ignoreNotFound: true - k8s.create: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-create-instancetype verify: - script: inline: |- #!/usr/bin/env bash source ../helpers/verify-vm.sh - NS="vm-test" + NS="kvt-create-instancetype" verify_vm_exists "test-vm-instancetype" "$NS" || exit 1 verify_instancetype "test-vm-instancetype" "$NS" "u1.medium" || exit 1 @@ -44,13 +45,13 @@ spec: kind: VirtualMachine metadata: name: test-vm-instancetype - namespace: vm-test + namespace: kvt-create-instancetype ignoreNotFound: true - k8s.delete: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-create-instancetype ignoreNotFound: true prompt: - inline: Create a Fedora virtual machine named test-vm-instancetype in the vm-test namespace with instancetype 'u1.medium'. + inline: Create a Fedora virtual machine named test-vm-instancetype in the kvt-create-instancetype namespace with instancetype 'u1.medium'. diff --git a/evals/tasks/kubevirt/create-vm-with-size/task.yaml b/evals/tasks/kubevirt/create-vm-with-size/task.yaml index d261225ea..2268db2f5 100644 --- a/evals/tasks/kubevirt/create-vm-with-size/task.yaml +++ b/evals/tasks/kubevirt/create-vm-with-size/task.yaml @@ -4,6 +4,7 @@ metadata: labels: suite: kubevirt requires: kubevirt + expected-tool: vm_create name: "create-vm-with-size" difficulty: easy spec: @@ -15,19 +16,19 @@ spec: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-create-size ignoreNotFound: true - k8s.create: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-create-size verify: - script: inline: |- #!/usr/bin/env bash source ../helpers/verify-vm.sh - NS="vm-test" + NS="kvt-create-size" verify_vm_exists "test-vm-size" "$NS" || exit 1 verify_has_resources_or_instancetype "test-vm-size" "$NS" || exit 1 @@ -44,13 +45,13 @@ spec: kind: VirtualMachine metadata: name: test-vm-size - namespace: vm-test + namespace: kvt-create-size ignoreNotFound: true - k8s.delete: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-create-size ignoreNotFound: true prompt: - inline: Create a Fedora virtual machine named test-vm-size in the vm-test namespace with size 'large' + inline: Create a Fedora virtual machine named test-vm-size in the kvt-create-size namespace with size 'large' diff --git a/evals/tasks/kubevirt/create-vm-with-vlan/task.yaml b/evals/tasks/kubevirt/create-vm-with-vlan/task.yaml index 9f4b203cd..5fc735b05 100644 --- a/evals/tasks/kubevirt/create-vm-with-vlan/task.yaml +++ b/evals/tasks/kubevirt/create-vm-with-vlan/task.yaml @@ -4,6 +4,7 @@ metadata: labels: suite: kubevirt requires: kubevirt + expected-tool: vm_create name: "create-vm-with-vlan" difficulty: hard spec: @@ -15,19 +16,19 @@ spec: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-create-vlan ignoreNotFound: true - k8s.create: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-create-vlan - k8s.create: apiVersion: k8s.cni.cncf.io/v1 kind: NetworkAttachmentDefinition metadata: name: vlan-network - namespace: vm-test + namespace: kvt-create-vlan spec: config: '{"bridge":"br10","cniVersion":"0.3.1","ipam":null,"macspoofchk":false,"mtu":1400,"name":"linux-bridge-net-vlan100","type":"bridge","vlan":100}' verify: @@ -35,7 +36,7 @@ spec: inline: |- #!/usr/bin/env bash source ../helpers/verify-vm.sh - NS="vm-test" + NS="kvt-create-vlan" verify_vm_exists "test-vm" "$NS" || exit 1 verify_container_disk "test-vm" "$NS" "fedora" || exit 1 @@ -51,20 +52,20 @@ spec: kind: VirtualMachine metadata: name: test-vm - namespace: vm-test + namespace: kvt-create-vlan ignoreNotFound: true - k8s.delete: apiVersion: k8s.cni.cncf.io/v1 kind: NetworkAttachmentDefinition metadata: name: vlan-network - namespace: vm-test + namespace: kvt-create-vlan ignoreNotFound: true - k8s.delete: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-create-vlan ignoreNotFound: true prompt: - inline: Please create a Fedora virtual machine named test-vm in the vm-test namespace with a secondary network interface connected to the vlan-network multus network. + inline: Please create a Fedora virtual machine named test-vm in the kvt-create-vlan namespace with a secondary network interface connected to the vlan-network multus network. diff --git a/evals/tasks/kubevirt/delete-vm/task.yaml b/evals/tasks/kubevirt/delete-vm/task.yaml index 0114f974e..4bb9d2e02 100644 --- a/evals/tasks/kubevirt/delete-vm/task.yaml +++ b/evals/tasks/kubevirt/delete-vm/task.yaml @@ -4,6 +4,7 @@ metadata: labels: suite: kubevirt requires: kubevirt + expected-tool: resources_delete name: "delete-vm" difficulty: medium spec: @@ -15,19 +16,19 @@ spec: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-delete ignoreNotFound: true - k8s.create: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-delete - k8s.create: apiVersion: kubevirt.io/v1 kind: VirtualMachine metadata: name: deleted-vm - namespace: vm-test + namespace: kvt-delete spec: runStrategy: Always template: @@ -51,7 +52,7 @@ spec: kind: VirtualMachine metadata: name: deleted-vm - namespace: vm-test + namespace: kvt-delete condition: Ready timeout: 30s verify: @@ -60,14 +61,14 @@ spec: - script: inline: |- #!/usr/bin/env bash - kubectl wait --for=delete vm/deleted-vm -n vm-test --timeout 30s || exit 1 + kubectl wait --for=delete vm/deleted-vm -n kvt-delete --timeout 30s || exit 1 cleanup: - k8s.delete: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-delete ignoreNotFound: true prompt: inline: | - Please delete the virtual machine named deleted-vm in the vm-test namespace. + Please delete the virtual machine named deleted-vm in the kvt-delete namespace. diff --git a/evals/tasks/kubevirt/openai-agent/eval.yaml b/evals/tasks/kubevirt/openai-agent/eval.yaml deleted file mode 100644 index 75784a467..000000000 --- a/evals/tasks/kubevirt/openai-agent/eval.yaml +++ /dev/null @@ -1,19 +0,0 @@ -kind: Eval -metadata: - name: "kubevirt-basic-operations" -config: - agent: - type: "builtin.openai-agent" - model: "gemini-2.0-flash" - mcpConfigFile: ../../../mcp-config.yaml - extensions: - kubernetes: - package: https://github.com/mcpchecker/kubernetes-extension@v0.0.3 - taskSets: - - glob: ../*/task.yaml - assertions: - toolsUsed: - - server: kubernetes - toolPattern: ".*" - minToolCalls: 1 - maxToolCalls: 20 diff --git a/evals/tasks/kubevirt/pause-vm/task.yaml b/evals/tasks/kubevirt/pause-vm/task.yaml index 2f1d57162..1722ad5b9 100644 --- a/evals/tasks/kubevirt/pause-vm/task.yaml +++ b/evals/tasks/kubevirt/pause-vm/task.yaml @@ -4,6 +4,7 @@ metadata: labels: suite: kubevirt requires: kubevirt + expected-tool: vm_lifecycle name: "pause-vm" difficulty: medium spec: @@ -15,19 +16,19 @@ spec: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-pause ignoreNotFound: true - k8s.create: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-pause - k8s.create: apiVersion: kubevirt.io/v1 kind: VirtualMachine metadata: name: paused-vm - namespace: vm-test + namespace: kvt-pause spec: runStrategy: Always template: @@ -51,7 +52,7 @@ spec: kind: VirtualMachine metadata: name: paused-vm - namespace: vm-test + namespace: kvt-pause condition: Ready timeout: 30s verify: @@ -60,7 +61,7 @@ spec: kind: VirtualMachine metadata: name: paused-vm - namespace: vm-test + namespace: kvt-pause condition: Paused timeout: 5s cleanup: @@ -69,14 +70,14 @@ spec: kind: VirtualMachine metadata: name: paused-vm - namespace: vm-test + namespace: kvt-pause ignoreNotFound: true - k8s.delete: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-pause ignoreNotFound: true prompt: inline: | - Please pause the virtual machine named paused-vm in the vm-test namespace. + Please pause the virtual machine named paused-vm in the kvt-pause namespace. diff --git a/evals/tasks/kubevirt/restore-vm/task.yaml b/evals/tasks/kubevirt/restore-vm/task.yaml index 593b3c6c6..8ff0c89db 100644 --- a/evals/tasks/kubevirt/restore-vm/task.yaml +++ b/evals/tasks/kubevirt/restore-vm/task.yaml @@ -4,6 +4,7 @@ metadata: labels: suite: kubevirt requires: kubevirt + expected-tool: resources_create_or_update name: "restore-vm" difficulty: medium spec: @@ -15,19 +16,19 @@ spec: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-restore ignoreNotFound: true - k8s.create: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-restore - k8s.create: apiVersion: kubevirt.io/v1 kind: VirtualMachine metadata: name: restore-test-vm - namespace: vm-test + namespace: kvt-restore spec: runStrategy: Always template: @@ -43,7 +44,7 @@ spec: kind: VirtualMachineSnapshot metadata: name: restore-snapshot - namespace: vm-test + namespace: kvt-restore spec: source: apiGroup: kubevirt.io @@ -54,7 +55,7 @@ spec: kind: VirtualMachineSnapshot metadata: name: restore-snapshot - namespace: vm-test + namespace: kvt-restore condition: Ready timeout: 30s verify: @@ -62,7 +63,7 @@ spec: inline: |- #!/usr/bin/env bash set -e - NS="vm-test" + NS="kvt-restore" RESTORE_NAME="test-restore" # Verify restore was created @@ -93,35 +94,35 @@ spec: kind: VirtualMachineRestore metadata: name: test-restore - namespace: vm-test + namespace: kvt-restore ignoreNotFound: true - k8s.delete: apiVersion: snapshot.kubevirt.io/v1beta1 kind: VirtualMachineSnapshot metadata: name: restore-snapshot - namespace: vm-test + namespace: kvt-restore ignoreNotFound: true - k8s.delete: apiVersion: kubevirt.io/v1 kind: VirtualMachine metadata: name: restore-test-vm - namespace: vm-test + namespace: kvt-restore ignoreNotFound: true - k8s.delete: apiVersion: kubevirt.io/v1 kind: VirtualMachine metadata: name: restored-vm - namespace: vm-test + namespace: kvt-restore ignoreNotFound: true - k8s.delete: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-restore ignoreNotFound: true prompt: inline: | - Restore the snapshot named restore-snapshot in the vm-test namespace to a new virtual machine named restored-vm. Use the restore name test-restore. + Restore the snapshot named restore-snapshot in the kvt-restore namespace to a new virtual machine named restored-vm. Use the restore name test-restore. diff --git a/evals/tasks/kubevirt/snapshot-vm/task.yaml b/evals/tasks/kubevirt/snapshot-vm/task.yaml index 714b37f34..eead6b3e2 100644 --- a/evals/tasks/kubevirt/snapshot-vm/task.yaml +++ b/evals/tasks/kubevirt/snapshot-vm/task.yaml @@ -4,6 +4,7 @@ metadata: labels: suite: kubevirt requires: kubevirt + expected-tool: resources_create_or_update name: "snapshot-vm" difficulty: medium spec: @@ -15,19 +16,19 @@ spec: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-snapshot ignoreNotFound: true - k8s.create: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-snapshot - k8s.create: apiVersion: kubevirt.io/v1 kind: VirtualMachine metadata: name: snapshot-test-vm - namespace: vm-test + namespace: kvt-snapshot spec: runStrategy: Always template: @@ -43,7 +44,7 @@ spec: inline: |- #!/usr/bin/env bash set -e - NS="vm-test" + NS="kvt-snapshot" SNAPSHOT_NAME="test-snapshot" # Verify snapshot was created @@ -67,21 +68,21 @@ spec: kind: VirtualMachineSnapshot metadata: name: test-snapshot - namespace: vm-test + namespace: kvt-snapshot ignoreNotFound: true - k8s.delete: apiVersion: kubevirt.io/v1 kind: VirtualMachine metadata: name: snapshot-test-vm - namespace: vm-test + namespace: kvt-snapshot ignoreNotFound: true - k8s.delete: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-snapshot ignoreNotFound: true prompt: inline: | - Create a snapshot named test-snapshot of the virtual machine snapshot-test-vm in the vm-test namespace. + Create a snapshot named test-snapshot of the virtual machine snapshot-test-vm in the kvt-snapshot namespace. diff --git a/evals/tasks/kubevirt/troubleshoot-vm/task.yaml b/evals/tasks/kubevirt/troubleshoot-vm/task.yaml index d1e257de7..a1329f931 100644 --- a/evals/tasks/kubevirt/troubleshoot-vm/task.yaml +++ b/evals/tasks/kubevirt/troubleshoot-vm/task.yaml @@ -16,13 +16,13 @@ spec: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-troubleshoot ignoreNotFound: true - k8s.create: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-troubleshoot # Create a VM that references a missing Secret for cloud-init. # The agent should identify the missing Secret and create it to fix the VM. - k8s.create: @@ -30,7 +30,7 @@ spec: kind: VirtualMachine metadata: name: broken-vm - namespace: vm-test + namespace: kvt-troubleshoot labels: app: broken-vm spec: @@ -68,7 +68,7 @@ spec: - script: inline: |- #!/usr/bin/env bash - NS="vm-test" + NS="kvt-troubleshoot" echo "=== Verification: Checking if agent fixed the VM ===" @@ -89,40 +89,12 @@ spec: # Wait for VM to become ready after the fix (with timeout) echo "Waiting for VM to become ready after fix..." - READY=false - for i in {1..30}; do - VM_READY=$(kubectl get virtualmachine broken-vm -n "$NS" -o jsonpath='{.status.ready}' 2>/dev/null || echo "false") - if [[ "$VM_READY" == "true" ]]; then - READY=true - break - fi - sleep 5 - done - - if [[ "$READY" == "true" ]]; then - echo "✓ VM is now ready - fix was successful!" - else + if ! kubectl wait --for=condition=Ready virtualmachine/broken-vm -n "$NS" --timeout=150s 2>/dev/null; then VM_STATUS=$(kubectl get virtualmachine broken-vm -n "$NS" -o jsonpath='{.status.printableStatus}' 2>/dev/null || echo "Unknown") - echo "⚠ VM is not ready yet (status: $VM_STATUS) - fix may need more time or was incomplete" - fi - - # Check if virt-launcher pod exists and is running - LAUNCHER_POD=$(kubectl get pods -n "$NS" -l kubevirt.io=virt-launcher,vm.kubevirt.io/name=broken-vm -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true) - if [[ -n "$LAUNCHER_POD" ]]; then - POD_PHASE=$(kubectl get pod "$LAUNCHER_POD" -n "$NS" -o jsonpath='{.status.phase}' 2>/dev/null || true) - echo "✓ virt-launcher pod exists (phase: $POD_PHASE)" - else - echo "ℹ No virt-launcher pod found yet" + echo "✗ VM is not ready (status: $VM_STATUS) - fix was unsuccessful" + exit 1 fi - - echo "" - echo "=== Troubleshooting and Fix Eval Complete ===" - echo "The agent should have:" - echo " 1. Used the vm-troubleshoot prompt with namespace=$NS and name=broken-vm" - echo " 2. Identified the root cause (missing Secret vm-cloud-init)" - echo " 3. Created the missing Secret to fix the VM" - echo " 4. Reported the action taken and result" - echo "" + echo "✓ VM is now ready - fix was successful!" exit 0 cleanup: @@ -131,24 +103,24 @@ spec: kind: VirtualMachine metadata: name: broken-vm - namespace: vm-test + namespace: kvt-troubleshoot ignoreNotFound: true - k8s.delete: apiVersion: v1 kind: Secret metadata: name: vm-cloud-init - namespace: vm-test + namespace: kvt-troubleshoot ignoreNotFound: true - k8s.delete: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-troubleshoot ignoreNotFound: true prompt: inline: |- - There is a VirtualMachine named "broken-vm" in the vm-test namespace that is not working correctly. + There is a VirtualMachine named "broken-vm" in the kvt-troubleshoot namespace that is not working correctly. Please use the vm-troubleshoot prompt to diagnose the issue with this VirtualMachine. Follow the troubleshooting guide to identify the problem, fix it, and report your findings including: diff --git a/evals/tasks/kubevirt/update-vm-resources/task.yaml b/evals/tasks/kubevirt/update-vm-resources/task.yaml index 4cca3ef2c..7e3605a8f 100644 --- a/evals/tasks/kubevirt/update-vm-resources/task.yaml +++ b/evals/tasks/kubevirt/update-vm-resources/task.yaml @@ -4,6 +4,7 @@ metadata: labels: suite: kubevirt requires: kubevirt + expected-tool: resources_create_or_update name: "update-vm-resources" difficulty: hard spec: @@ -15,19 +16,19 @@ spec: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-update ignoreNotFound: true - k8s.create: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-update - k8s.create: apiVersion: kubevirt.io/v1 kind: VirtualMachine metadata: name: test-vm-update - namespace: vm-test + namespace: kvt-update spec: runStrategy: Halted template: @@ -51,7 +52,7 @@ spec: inline: |- #!/usr/bin/env bash source ../helpers/verify-vm.sh - NS="vm-test" + NS="kvt-update" verify_vm_exists "test-vm-update" "$NS" || exit 1 verify_cpu_cores "test-vm-update" "$NS" 2 || exit 1 @@ -65,17 +66,17 @@ spec: kind: VirtualMachine metadata: name: test-vm-update - namespace: vm-test + namespace: kvt-update ignoreNotFound: true - k8s.delete: apiVersion: v1 kind: Namespace metadata: - name: vm-test + name: kvt-update ignoreNotFound: true prompt: inline: |- - A VirtualMachine named test-vm-update exists in the vm-test namespace. + A VirtualMachine named test-vm-update exists in the kvt-update namespace. It currently has 1 vCPU and 2Gi of memory.