Qwen2.5-VL的vllm部署方案(图像分析)

1160 字

6 分钟

Qwen2.5-VL的vllm部署方案(图像分析)

2025-06-15

How-to

Qwen系列

/

vLLM

环境搭建工作#

1 使用 Conda 创建环境#

1
conda create -n qwen python=3.11 -y
2

3
conda activate qwen
4

5
sudo apt update && sudo apt upgrade

2 模型下载#

1.首先安装所需依赖

1
pip install "vllm>0.7.2" "transformers>=4.49.0" accelerate qwen-vl-utils modelscope

注释 vllm 用于高效推理大模型。 transformers 和 accelerate 是 Hugging Face 生态的核心库。 qwen-vl-utils 直接关联 Qwen 的视觉 - 语言模型。 modelscope 用于下载或管理预训练模型。

因为网络问题,可以走国内代理

1
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple   "vllm>0.7.2" "transformers>=4.49.0" accelerate qwen-vl-utils modelscope

2.这里我是用的是 Qwen2.5-VL-7B-Instruct-AWQ(量化过的模型)

我的电脑配置是 (4090,24g 显存,实测此模型基本占满显存)

1
modelscope download --model Qwen/Qwen2.5-VL-7B-Instruct-AWQ --local_dir /home/kairos/qwen

注意 local_dir 后面换成自己的目录 (/home/kairos/qwen)

3.使用 vllm 框架启动模型服务 可以通过 nividia-smi,查看是不是成功开启服务

1
vllm serve /home/kairos/qwen --port 8001 --host 0.0.0.0 --dtype bfloat16 --limit-mm-per-prompt image=5,video=5 --max-model-len 8001

注意端口不要与你现有的冲突

模型正式测试与调用#

1 使用 OpenAI Python SDK 调用#

1.可以使用本地 usb 摄像头配合 opencv 手动截图单独存放在 captures 文件夹下,供后续大模型调用分析

1
#!/usr/bin/env python3
2
import cv2
3
import os
4
import time
5
from datetime import datetime
6

7
# 配置参数
8
CAMERA_DEVICE = 0       # /dev/video1
9
OUTPUT_FOLDER = "captures" # 保存截图的文件夹
10
AUTO_SAVE_SEC = 0      # 自动截图间隔（秒），0表示手动模式
11
PRESS_KEY = 's'            # 手动保存的按键
12

13
# 创建输出文件夹
14
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
15

16
# 初始化摄像头
17
cap = cv2.VideoCapture(CAMERA_DEVICE)
18
if not cap.isOpened():
19
    print("无法打开摄像头！")
20
    exit()
21

22
print(f"摄像头已启动，按 '{PRESS_KEY}' 保存截图，ESC键退出")
23

24
last_save = 0
25
while True:
26
    ret, frame = cap.read()
27
    if not ret:
28
        print("无法获取画面")
29
        break
30

31
    # 显示实时画面
32
    cv2.imshow('Camera', frame)
33

34
    # 自动保存逻辑
35
    if AUTO_SAVE_SEC > 0 and (time.time() - last_save) > AUTO_SAVE_SEC:
36
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
37
        filename = f"{OUTPUT_FOLDER}/auto_{timestamp}.jpg"
38
        cv2.imwrite(filename, frame)
39
        print(f"自动保存: {filename}")
40
        last_save = time.time()
41

42
    # 按键处理
43
    key = cv2.waitKey(1) & 0xFF
44
    if key == 27:  # ESC键退出
45
        break
46
    elif chr(key) == PRESS_KEY:  # 手动保存
47
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
48
        filename = f"{OUTPUT_FOLDER}/manual_{timestamp}.jpg"
49
        cv2.imwrite(filename, frame)
50
        print(f"手动保存: {filename}")
51

52
# 释放资源
53
cap.release()
54
cv2.destroyAllWindows()

2.让大模型逐一读取 captures 内所有图像

1
from openai import OpenAI
2
import base64
3
import os
4
import shutil
5
from pathlib import Path
6
import time
7
from watchdog.observers import Observer
8
from watchdog.events import FileSystemEventHandler
9
import logging
10
import cv2
11
import numpy as np
12
import json
13

14
# 设置日志
15
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
16

17
# Set OpenAI's API key and API base to use vLLM's API server.
18
openai_api_key = "EMPTY"
19
openai_api_base = "http://localhost:8001/v1"
20
client = OpenAI(
21
    api_key=openai_api_key,
22
    base_url=openai_api_base,
23
)
24

25
# 确保目标文件夹存在
26
FIRE_IMAGES_DIR = "/home/kairos/really_fire"
27
os.makedirs(FIRE_IMAGES_DIR, exist_ok=True)
28

29
# 读取本地图片并转换为base64
30
def encode_image_to_base64(image_path):
31
    with open(image_path, 'rb') as image_file:
32
        return base64.b64encode(image_file.read()).decode('utf-8')
33

34
def draw_boxes(image_path, boxes, output_path):
35
    """在图片上绘制边框"""
36
    img = cv2.imread(image_path)
37
    for box in boxes:
38
        x1, y1, x2, y2 = box
39
        # 绘制红色边框，线宽为2
40
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2)
41
    cv2.imwrite(output_path, img)
42

43
# 处理图片的函数
44
def process_image(image_path):
45
    try:
46
        base64_image = encode_image_to_base64(image_path)
47
        chat_response = client.chat.completions.create(
48
            model="/home/kairos/qwen",
49
            messages=[
50
                {
51
                    "role": "system",
52
                    "content": """你是一个专门检测火焰的图像识别助手。你的任务是：
53
1. 判断图片中是否存在火焰、火灾、烟雾等相关内容
54
2. 如果存在，请提供每个火焰/烟雾区域的坐标位置（使用像素坐标）
55
3. 按以下格式回复：
56
{
57
    "has_fire": true/false,
58
    "description": "发现火焰/烟雾：[描述]" 或 "未发现火焰相关内容",
59
    "boxes": [[x1,y1,x2,y2], ...] // 每个区域的左上角和右下角坐标
60
}"""
61
                },
62
                {
63
                    "role": "user",
64
                    "content": [
65
                        {
66
                            "type": "image_url",
67
                            "image_url": {
68
                                "url": f"data:image/jpeg;base64,{base64_image}"
69
                            },
70
                        },
71
                        {"type":"text","text":"请检测图片中的火焰、火灾或烟雾，并提供准确的位置坐标。"},
72
                    ],
73
                },
74
            ],
75
        )
76

77
        response_text = chat_response.choices[0].message.content
78
        logging.info(f"检测图片 {image_path}:")
79
        logging.info(f"检测结果: {response_text}")
80

81
        try:
82
            # 解析JSON响应
83
            result = json.loads(response_text)
84

85
            if result.get("has_fire", False):
86
                # 获取原始文件名和扩展名
87
                original_filename = os.path.basename(image_path)
88
                name, ext = os.path.splitext(original_filename)
89
                timestamp = time.strftime("%Y%m%d_%H%M%S")
90

91
                # 生成新的文件名
92
                new_filename = f"{timestamp}_{name}_marked{ext}"
93
                target_path = os.path.join(FIRE_IMAGES_DIR, new_filename)
94

95
                # 在图片上绘制边框并保存
96
                if result.get("boxes"):
97
                    draw_boxes(image_path, result["boxes"], target_path)
98
                    logging.info(f"已标记火焰/烟雾区域并保存至: {target_path}")
99
                else:
100
                    # 如果没有边框信息，直接复制原图
101
                    shutil.copy2(image_path, target_path)
102
                    logging.info(f"发现火焰/烟雾，但无法确定具体位置，已保存原图至: {target_path}")
103

104
        except json.JSONDecodeError:
105
            logging.error("无法解析模型返回的JSON格式")
106
        except Exception as e:
107
            logging.error(f"处理检测结果时出错: {str(e)}")
108

109
    except Exception as e:
110
        logging.error(f"处理图片 {image_path} 时出错: {str(e)}")
111

112
# 文件监控处理类
113
class ImageHandler(FileSystemEventHandler):
114
    def on_created(self, event):
115
        if event.is_directory:
116
            return
117
        if event.src_path.lower().endswith(('.jpg', '.jpeg')):
118
            logging.info(f"检测到新图片: {event.src_path}")
119
            process_image(event.src_path)
120

121
    def on_modified(self, event):
122
        if event.is_directory:
123
            return
124
        if event.src_path.lower().endswith(('.jpg', '.jpeg')):
125
            logging.info(f"检测到图片修改: {event.src_path}")
126
            process_image(event.src_path)
127

128
def main():
129
    # 设置监控路径
130
    path = "/home/kairos/captures"
131

132
    # 处理已存在的图片
133
    logging.info("开始检测现有图片...")
134
    for file in Path(path).glob("*.jp*g"):
135
        process_image(str(file))
136

137
    # 设置监控
138
    event_handler = ImageHandler()
139
    observer = Observer()
140
    observer.schedule(event_handler, path, recursive=False)
141
    observer.start()
142

143
    logging.info(f"开始监控文件夹: {path}")
144
    try:
145
        while True:
146
            time.sleep(1)
147
    except KeyboardInterrupt:
148
        observer.stop()
149
        logging.info("停止监控")
150

151
    observer.join()
152

153
if __name__ == "__main__":
154
    main()