当前位置：首页 > news >正文

Ubuntu 单机多卡部署脚本： vLLM + DeepSeek 70B

news 2025/7/15 21:29:49

# 部署脚本：Ubuntu + vLLM + DeepSeek 70B ,程序分两部分

# 执行前请确保：1. 系统为 Ubuntu 20.04/22.04 2. 拥有NVIDIA显卡(显存≥24G)

# init.sh
#!/bin/bash
# 系统更新与基础依赖

sudo apt update && sudo apt upgrade -y

sudo apt install -y build-essential python3-pip curl git wget

# NVIDIA驱动安装(需重启)

#sudo add-apt-repository ppa:graphics-drivers/ppa -y

#sudo apt update

#sudo apt install -y nvidia-driver-550-server

echo "请手动重启系统后再次执行本脚本！"

echo "sudo reboot "
exit 0 # 首次执行到此退出

#test.sh

# ---- 以下为重启后执行的第二部分 ----
#!/bin/bash

#pip 使用国内源
mkdir -p $HOME/.pip/

cat <<EOF
index-url = http://mirrors.aliyun.com/pypi/simple/

[install]
trusted-host=mirrors.aliyun.com
EOF >$HOME/.pip/pip.conf

# Miniconda安装

# wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
# 使用国内源
wget -c https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash Miniconda3-latest-Linux-x86_64.sh -b -p $HOME/miniconda

source $HOME/miniconda/bin/activate

# 创建虚拟环境

conda create -n vllm python=3.11 -y

conda activate vllm

# 安装核心组件

pip install vllm huggingface_hub modelscope

# 模型下载示例(替换为实际模型ID)

MODEL_ID="deepseek-ai/DeepSeek-R1-Distill-Llama-70B"

python - <<EOF

from modelscope import snapshot_download

snapshot_download('$MODEL_ID', cache_dir='./models')

EOF

# 启动服务(根据GPU数量调整tensor-parallel-size)

# 单机多卡,这里的 --tensor-parallel-size 参数为 4 ,直接用4卡、也可以用8卡用8

vllm serve --model ./models/$MODEL_ID --tensor-parallel-size 4 --gpu-memory-utilization 0.95 --port 6006 --max-model-len 8192 &
#记录一下PID做测试
SERVER_PID=$!

# 检查 PID 和端口的函数

check_status() {

# 检查 PID 是否存在

if ! kill -0 $SERVER_PID > /dev/null 2>&1; then

echo "程序异常退出"

exit 1

# 检查端口 8000 是否在使用

if netstat -tuln | grep -q ':8000\s'; then

echo "准备就绪,测试一下"
sleep 3
curl -i -k -X POST "http://localhost:6006/v1/chat/completions" -H "Content-Type: application/json" --data '{ "model": "/pde_ai/ssd_models/llm/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "Max_tokens": 1024, "Temperature": 0, "messages": [ { "role": "user", "content": "中国的首都在哪里?" } ] }'

return 0

else

return 1

}

# 主循环

while true; do

if check_status; then

# 当 PID 存在且端口 8000 在使用时

sleep 1 # 避免过于频繁的检查

else

# 当 PID 不存在时

echo "程序异常退出"