Spyre vLLM Setup Container
Source examples/online_inference/spyre_vllm_setup_container.sh.
#!/bin/bash -e
# This script sets up the runtime environment for launching a vLLM API
# server using Spyre AIU cards.
# Used for local development and testing in tandem with podman run command.
# Not for use on an openshift cluster.
# 1. Validates TORCH_SENDNN cache settings.
# 2. Detects and configures available AIU devices.
# 3. Activates the Python virtual environment if not already active.
# 4. Launches the vLLM server with the computed arguments.
# --- Argument parsing ---
INTERACTIVE=false
server_args=()
while [[ $# -gt 0 ]]; do
case "$1" in
--interactive)
INTERACTIVE=true
shift
;;
*)
server_args+=("$1")
shift
;;
esac
done
# --- Validate TORCH_SENDNN cache settings ---
if [[ "${TORCH_SENDNN_CACHE_ENABLE:-0}" = "1" ]]; then
if [[ -z "${TORCH_SENDNN_CACHE_DIR:-}" ]]; then
echo "Error: TORCH_SENDNN_CACHE_DIR is not set."
exit 1
fi
if [[ ! -d "${TORCH_SENDNN_CACHE_DIR}" ]]; then
echo "Error: Cache directory ${TORCH_SENDNN_CACHE_DIR} does not exist."
exit 1
fi
perms=$(stat -c "%a" "${TORCH_SENDNN_CACHE_DIR}")
if [[ "${perms}" != "777" ]]; then
echo "Error: Cache directory ${TORCH_SENDNN_CACHE_DIR} does not have 777 permissions. Current: ${perms}"
exit 1
fi
fi
# --- Detect AIU cards ---
if [[ -z "${VLLM_AIU_PCIE_IDS:-}" ]]; then
export VLLM_AIU_PCIE_IDS=$(lspci -n -d 1014:06a7 | cut -d ' ' -f 1)
fi
# Create a senlib_config.json to use only specified AIU id's.
tmpfile=$(mktemp -t senlib_config_XXXXXXX.json)
cat <<EOF | jq --argjson newValues "$(for i in ${VLLM_AIU_PCIE_IDS}; do echo "$i"; done | jq -R . | jq -s .)" '.GENERAL.sen_bus_id = $newValues' > "$tmpfile"
{
"GENERAL": {
"target": "SOC",
"sen_bus_id": [
]
},
"METRICS": {
"general": {
"enable": false
}
}
}
EOF
sudo mv "$tmpfile" /etc/aiu/senlib_config.json
# --- Reconfigure AIUs and environment ---
. /etc/bashrc-sentient-env.sh
setup_multi_aiu_env
# --- Activate the vLLM virtualenv ---
source /opt/vllm/bin/activate
# --- If interactive, skip server launch ---
if [[ "$INTERACTIVE" == "true" ]]; then
echo "Interactive mode: skipping vLLM server launch."
else
# --- Ensure model path is set ---
if [[ -z "${VLLM_MODEL_PATH:-}" ]]; then
echo "Error: VLLM_MODEL_PATH is not set."
exit 1
fi
# --- Launch the server ---
DEFAULT_ARGS=(--model "${VLLM_MODEL_PATH}" -tp "${AIU_WORLD_SIZE}")
exec python -m vllm.entrypoints.openai.api_server "${DEFAULT_ARGS[@]}" "${server_args[@]}"
fi