diff --git a/README.md b/README.md index 955fc981..d745fc97 100644 --- a/README.md +++ b/README.md @@ -158,13 +158,15 @@ A TD image based on Ubuntu 25.04 can be generated with the following commands: ```bash cd tdx/guest-tools/image/ -sudo ./create-td-image.sh -v 25.04 +sudo ./create-td-image.sh -v 25.04 -k "" ``` You can pass `24.04` or `25.04` to the `-v` to generate a TD image based on Ubuntu 24.04 or 25.04. The resulting image will be based on an ([`Ubuntu cloud image`](https://cloud-images.ubuntu.com/)), -the default root password is `123456`, and other default settings are used. +with other default settings. +Login is via the SSH private key corresponding to the one you used on the command line, with no +root or regular user password login. Please note the most important options described after the commands and take a look at the `create-td-image.sh` script for more available options. Important options for TD image creation: diff --git a/gpu-cc/h100/setup-gpus.sh b/gpu-cc/h100/setup-gpus.sh index e5cf1567..bdd87da8 100755 --- a/gpu-cc/h100/setup-gpus.sh +++ b/gpu-cc/h100/setup-gpus.sh @@ -49,10 +49,12 @@ if [ "$EUID" -ne 0 ] exit fi -if [ ! -d "nvtrust" ]; then - rm -rf nvtrust - git clone -b 2025.4.11.001 --recursive https://github.com/NVIDIA/nvtrust.git -fi +get_nvtrust() { + if [ ! -d "nvtrust" ]; then + rm -rf nvtrust + git clone -b 2025.4.11.001 --recursive https://github.com/NVIDIA/nvtrust.git + fi +} nvidia_h100_bdfs() { while read -r line @@ -73,12 +75,14 @@ nvidia_nvlink_bdfs() { } enable_cc_mode() { + if [ ! -f ./nvtrust/host_tools/python/nvidia_gpu_tools.py ]; then get_nvtrust; fi GPU_BDF=$1 ./nvtrust/host_tools/python/nvidia_gpu_tools.py --set-ppcie-mode=off --reset-after-ppcie-mode-switch --gpu-bdf=${GPU_BDF} ./nvtrust/host_tools/python/nvidia_gpu_tools.py --set-cc-mode=on --reset-after-cc-mode-switch --gpu-bdf=${GPU_BDF} } enable_ppcie_mode() { + if [ ! -f ./nvtrust/host_tools/python/nvidia_gpu_tools.py ]; then get_nvtrust; fi GPU_BDF=$1 ./nvtrust/host_tools/python/nvidia_gpu_tools.py --set-cc-mode=off --reset-after-cc-mode-switch --gpu-bdf=${GPU_BDF} ./nvtrust/host_tools/python/nvidia_gpu_tools.py --set-ppcie-mode=on --reset-after-ppcie-mode-switch --gpu-bdf=${GPU_BDF} @@ -97,6 +101,8 @@ gpus_bdfs() { GPUS=$(gpus_bdfs) NB_GPUS=$(echo ${GPUS} | wc -w) +SB_STATE_OUTPUT=$(mokutil --sb-state 2>&1) +SB_STATE= if [ ! -z "$1" ]; then if [ "$1" != "*" ]; then @@ -104,7 +110,7 @@ if [ ! -z "$1" ]; then fi # Setup NVSwitches (if nb of GPUs equal to 8) - if [ ${NB_GPUS} -eq 8 ]; then + if [ ${NB_GPUS} -eq 8 ] && [ "$(mokutil --sb-state)" != "SecureBoot enabled" ]; then NVSWITCHES=$(nvidia_nvlink_bdfs) for nvswitch_bdf in ${NVSWITCHES} do @@ -115,13 +121,15 @@ if [ ! -z "$1" ]; then for gpu_bdf in ${GPUS} do - if [ ${NB_GPUS} -eq 8 ]; then - echo "======= Prepare ${gpu_bdf} for PPCIe" - enable_ppcie_mode ${gpu_bdf} - else - echo "======= Prepare ${gpu_bdf} for CC" - enable_cc_mode ${gpu_bdf} - fi + if [ "$(mokutil --sb-state)" != "SecureBoot enabled" ]; then + if [ ${NB_GPUS} -eq 8 ]; then + echo "======= Prepare ${gpu_bdf} for PPCIe" + enable_ppcie_mode ${gpu_bdf} + else + echo "======= Prepare ${gpu_bdf} for CC" + enable_cc_mode ${gpu_bdf} + fi + fi # virsh expect input format : pci_0000_b8_00_0 virsh_gpu_bdf=$(echo "${gpu_bdf}" | tr :. _) diff --git a/guest-tools/image/cloud-init-data/.gitignore b/guest-tools/image/cloud-init-data/.gitignore new file mode 100644 index 00000000..31dffaf5 --- /dev/null +++ b/guest-tools/image/cloud-init-data/.gitignore @@ -0,0 +1 @@ +user-data diff --git a/guest-tools/image/cloud-init-data/user-data b/guest-tools/image/cloud-init-data/user-data deleted file mode 100644 index fc6b0d18..00000000 --- a/guest-tools/image/cloud-init-data/user-data +++ /dev/null @@ -1,47 +0,0 @@ -#cloud-config - -write_files: -- content: | - network: - version: 2 - renderer: networkd - ethernets: - en: - match: - name: "en*" - dhcp4: true - dhcp-identifier: mac - path: /etc/netplan/netplan.yaml -- content: | - =========================================================================== - Welcome to Ubuntu - Created by Kobuk team - =========================================================================== - path: /etc/motd -- content: | - SUBSYSTEM=="misc",KERNEL=="tdx-guest",MODE="0666" - path: /etc/udev/rules.d/90-tdx.rules - -packages: - - python3-pip - - golang-go - - golang - - golang-doc - - ntp - -# HACK way to set root password -# https://github.com/vmware/photon/issues/931 -# set root password to 123456 -bootcmd: - - /bin/sed -E -i 's/^root:([^:]+):.*$/root:\$1\$root\$j0bp.KLPyr.u9kgQ428D10:17764:0:99999:7:::/' /etc/shadow - -power_state: - delay: now - mode: poweroff - message: Bye Bye - timeout: 1 - condition: True - -user: tdx -password: 123456 -chpasswd: { expire: False } diff --git a/guest-tools/image/cloud-init-data/user-data.template b/guest-tools/image/cloud-init-data/user-data.template index 6718b908..ff0f5349 100644 --- a/guest-tools/image/cloud-init-data/user-data.template +++ b/guest-tools/image/cloud-init-data/user-data.template @@ -29,11 +29,8 @@ packages: - golang-doc - ntp -# HACK way to set root password +# do not set the root pw. Use sudo # https://github.com/vmware/photon/issues/931 -# set root password to 123456 -bootcmd: - - /bin/sed -E -i 's/^root:([^:]+):.*$/root:\$1\$root\$j0bp.KLPyr.u9kgQ428D10:17764:0:99999:7:::/' /etc/shadow power_state: delay: now diff --git a/guest-tools/image/create-td-image.sh b/guest-tools/image/create-td-image.sh index 7be44af7..a6e106b7 100755 --- a/guest-tools/image/create-td-image.sh +++ b/guest-tools/image/create-td-image.sh @@ -39,7 +39,7 @@ # UBUNTU_VERSION: the ubuntu version (24.04, 24.10, ...) # GUEST_USER: the username in the image -# GUEST_PASSWORD: the user password in the image +# GUEST_SSH_KEY: the SSH key to put in $GUEST_USER/.ssh/authorized_keys # GUEST_HOSTNAME: the guest hostname SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) @@ -54,7 +54,6 @@ FORCE_RECREATE=false TMP_GUEST_IMG_PATH="/tmp/tdx-guest-tmp.qcow2" SIZE=100 GUEST_USER=${GUEST_USER:-"tdx"} -GUEST_PASSWORD=${GUEST_PASSWORD:-"123456"} GUEST_HOSTNAME=${GUEST_HOSTNAME:-"tdx-guest"} ok() { @@ -100,7 +99,7 @@ Usage: $(basename "$0") [OPTION]... -f Force to recreate the output image -n Guest host name, default is "tdx-guest" -u Guest user name, default is "tdx" - -p Guest password, default is "123456" + -k Guest SSH authorized key entry (should be quoted) -s Specify the size of guest image -v Ubuntu version (24.04, 25.04) -o Specify the output file, default is tdx-guest-ubuntu-.qcow2. @@ -110,13 +109,13 @@ EOM } process_args() { - while getopts "v:o:s:n:u:p:r:fch" option; do + while getopts "v:o:s:n:u:k:r:fch" option; do case "$option" in o) GUEST_IMG_PATH=$(realpath "$OPTARG") ;; s) SIZE=${OPTARG} ;; n) GUEST_HOSTNAME=${OPTARG} ;; u) GUEST_USER=${OPTARG} ;; - p) GUEST_PASSWORD=${OPTARG} ;; + k) GUEST_SSH_KEY=${OPTARG} ;; f) FORCE_RECREATE=true ;; v) UBUNTU_VERSION=${OPTARG} ;; h) @@ -135,6 +134,10 @@ process_args() { error "Please specify the ubuntu release by setting UBUNTU_VERSION or passing it via -v" fi + if [[ -z "${GUEST_SSH_KEY}" ]]; then + error "Please specify the SSH authorized key to be configured for the user ${GUEST_USER}" + fi + # generate variables CLOUD_IMG="ubuntu-${UBUNTU_VERSION}-server-cloudimg-amd64.img" CLOUD_IMG_PATH=$(realpath "${SCRIPT_DIR}/${CLOUD_IMG}") @@ -276,9 +279,15 @@ config_cloud_init() { # configure the user-data cat <> user-data -user: $GUEST_USER -password: $GUEST_PASSWORD -chpasswd: { expire: False } +users: +- name: $GUEST_USER + gecos: TDX admin user + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + groups: users, sudo, adm + shell: /bin/bash + lock_passwd: true + ssh_authorized_keys: + - $GUEST_SSH_KEY EOT # configure the meta-dta diff --git a/guest-tools/image/setup.sh b/guest-tools/image/setup.sh index b481ba22..91202926 100755 --- a/guest-tools/image/setup.sh +++ b/guest-tools/image/setup.sh @@ -24,15 +24,6 @@ apt update # linux-tools-common for perf, please make sure that linux-tools is also installed apt install -y cpuid linux-tools-common msr-tools python3 python3-pip -# setup ssh -# allow password auth + root login -sed -i 's|[#]*PasswordAuthentication .*|PasswordAuthentication yes|g' /etc/ssh/sshd_config -sed -i 's|[#]*PermitRootLogin .*|PermitRootLogin yes|g' /etc/ssh/sshd_config -sed -i 's|[#]*KbdInteractiveAuthentication .*|KbdInteractiveAuthentication yes|g' /etc/ssh/sshd_config -# livecd-rootfs adds 60-cloudimg-settings.conf file to set PasswordAuthentication to no -# if the file exists, remove it -rm -f /etc/ssh/sshd_config.d/60-cloudimg-settings.conf - # Enable TDX /tmp/tdx/setup-tdx-guest.sh diff --git a/guest-tools/run_td b/guest-tools/run_td index 44f021b6..1a4bc02c 100755 --- a/guest-tools/run_td +++ b/guest-tools/run_td @@ -44,15 +44,13 @@ def do_print(): with open(pidfile) as pid_file: pid=int(pid_file.read()) print(f'TD started by QEMU with PID: {pid}.') - print(f'To log in with the non-root user (default: tdx / password: 123456), as specified in setup-tdx-config, use:') + print(f'To log in with the user specified in image/cloud-init-data/user-data, use:') print(f' $ ssh -p {ssh_port} @localhost') - print('To log in as root (default password: 123456), use:') - print(f' $ ssh -p {ssh_port} root@localhost') except: pass def do_clean(): - print('Clean VM') + print('Clean VM..') with open(pidfile) as pid_file: pid=int(pid_file.read()) os.kill(pid, signal.SIGTERM) @@ -81,17 +79,39 @@ def add_gpus(cmd, gpus): cmd.extend(gpu_cmd) index = index + 1 -def pci_devices(vendor, product) -> list[str]: - output = subprocess.check_output(["lspci", "-d", f"{vendor}:{product}"], stderr=subprocess.STDOUT) +def pci_devices(vendor, device, subsystem) -> list[str]: + output = subprocess.check_output(["lspci", "-D", "-n", "-m", "-d", f"{vendor}:{device}"], stderr=subprocess.STDOUT) devices = [] for line in output.splitlines(): - address = line.decode().strip().split(" ")[0] - devices.append(address) + l = line.decode().strip().split(" ") + # assume the last argument is always subsystem, + # which man not be reliable, see man lspci.. + if l[-1].strip('"') == subsystem: + address = l[0] + devices.append(address) return devices def setup_ppcie(cmd): - gpus = pci_devices("10de", "2335") - nvswitches = pci_devices("10de", "22a3") + h200_gpus = pci_devices("10de", "2335", "18be") + h200_switches = pci_devices("10de", "22a3", "1796") + + b200_gpus = pci_devices("10de", "2901", "199b") + mlxswitches = pci_devices("15b3", "1021", "0087") + + if h200_gpus and b200_gpus: + print("Error, no known system with both H200 and B200") + exit(1) + elif h200_gpus: + gpus = h200_gpus + nvswitches = h200_switches + elif b200_gpus: + gpus = b200_gpus + nvswitches = mlxswitches + else: + print("Error, did not find H200 or B200 gpus") + + if not nvswitches: + print("Error, did not find nvswitches required for 8 gpu ppcie/cc config") if len(gpus) != 8: print("PPCIe mode requires 8 GPUs") @@ -100,7 +120,8 @@ def setup_ppcie(cmd): print("PPCIe mode requires 4 NVSwitches") exit(1) - # prepare_gpus(gpus) + all_devices = gpus + nvswitches + prepare_gpus(all_devices) cmd.extend([ "-object", "iommufd,id=iommufd0", @@ -138,13 +159,13 @@ def setup_ppcie(cmd): "-fw_cfg", "name=opt/ovmf/X-PciMmio64Mb8,string=262144", "-device", "pcie-root-port,port=24,chassis=9,id=pci.9,bus=pcie.0,multifunction=on,addr=0x4", - "-device", f"vfio-pci,host=0000:{nvswitches[0]},bus=pci.9,addr=0x0,iommufd=iommufd0", + "-device", f"vfio-pci,host={nvswitches[0]},bus=pci.9,addr=0x0,iommufd=iommufd0", "-device", "pcie-root-port,port=25,chassis=10,id=pci.10,bus=pcie.0,addr=0x4.0x1", - "-device", f"vfio-pci,host=0000:{nvswitches[1]},bus=pci.10,addr=0x0,iommufd=iommufd0", + "-device", f"vfio-pci,host={nvswitches[1]},bus=pci.10,addr=0x0,iommufd=iommufd0", "-device", "pcie-root-port,port=26,chassis=11,id=pci.11,bus=pcie.0,addr=0x4.0x2", - "-device", f"vfio-pci,host=0000:{nvswitches[2]},bus=pci.11,addr=0x0,iommufd=iommufd0", + "-device", f"vfio-pci,host={nvswitches[2]},bus=pci.11,addr=0x0,iommufd=iommufd0", "-device", "pcie-root-port,port=27,chassis=12,id=pci.12,bus=pcie.0,addr=0x4.0x3", - "-device", f"vfio-pci,host=0000:{nvswitches[3]},bus=pci.12,addr=0x0,iommufd=iommufd0", + "-device", f"vfio-pci,host={nvswitches[3]},bus=pci.12,addr=0x0,iommufd=iommufd0", ]) def do_run(img_path, vcpus, mem, gpus): @@ -187,7 +208,7 @@ def do_run(img_path, vcpus, mem, gpus): add_vsock(qemu_cmds) - if args.gpus and len(args.gpus) == 8: + if gpus == "all": # special case overload setup_ppcie(qemu_cmds) else: add_gpus(qemu_cmds, gpus) @@ -200,6 +221,7 @@ def run_td(args): try: do_clean() except: + print("do_clean exception") pass if args.clean: return @@ -210,14 +232,21 @@ def run_td(args): if args.vcpus: td_vcpus=args.vcpus - do_run(td_img, td_vcpus, td_mem, args.gpus.split(',') if args.gpus else []) + if args.gpus == "all": + gpus = args.gpus + elif args.gpus: + gpus = args.gpus.split(',') + else: + gpus = [] + + do_run(td_img, td_vcpus, td_mem, gpus) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--image", type=str, help="Guest image") parser.add_argument("--vcpus", type=str, help="Number of VCPUs. 32 by default.", default='32') parser.add_argument("--mem", type=str, help="Memory. 100G by default", default='100G') - parser.add_argument("--gpus", type=str, help="GPUs to pass-through") + parser.add_argument("--gpus", type=str, help="GPUs to pass-through in lspci -D format or 'all'") parser.add_argument("--clean", action='store_true', help="Clean the current VM") parser.add_argument("--foreground", action='store_true', help="Run in foreground") args = parser.parse_args()