
你是否经历过这些场景?
根本问题在于:UI 自动化脚本是“脆弱的硬编码”,而页面是“动态演进的”。
现在,我们有了新解法:让 AI 理解页面语义,并通过 MCP 安全调用 Playwright 能力,实现 **“感知 → 决策 → 执行 → 验证”闭环,真正走向 无人值守**。
1[用户自然语言指令]
2 ↓
3[AI Agent(如 GPT-4o)]
4 ↓ (调用 Tool)
5[MCP Server] ←→ [Playwright 浏览器池]
6 ↑
7[审计/限流/权限控制]
关键设计原则:
click_button, assert_text_visible;1# tools/playwright_tools.py
2from playwright.async_api import async_playwright, Browser
3from pydantic import BaseModel, Field
4import base64
5from mcp_server import get_browser_pool # 浏览器连接池
6
7class ClickButtonInput(BaseModel):
8 description: str = Field(..., description="按钮语义描述,如'登录按钮'、'提交订单'")
9 timeout: int = Field(5000, ge=1000, le=30000)
10
11class ClickButtonOutput(BaseModel):
12 success: bool
13 screenshot_after: str # Base64 图片,用于审计
14
15@mcp.tool()
16async def click_button(input: ClickButtonInput) -> ClickButtonOutput:
17 browser: Browser = await get_browser_pool().acquire()
18 try:
19 page = await browser.new_page()
20 await page.goto("https://your-app.com") # 实际应从上下文获取 URL
21
22 # 关键:用 AI 辅助定位
23 locator = await ai_assisted_locator(page, input.description)
24 if not locator:
25 raise ValueError(f"未找到符合描述的按钮: {input.description}")
26
27 await locator.click(timeout=input.timeout)
28
29 # 截图
30 img_bytes = await page.screenshot()
31 screenshot_b64 = base64.b64encode(img_bytes).decode()
32
33 return ClickButtonOutput(success=True, screenshot_after=screenshot_b64)
34 finally:
35 await browser.close() # 或归还到池
1async def ai_assisted_locator(page, description: str):
2 # 获取页面所有可点击元素
3 buttons = await page.query_selector_all("button, [role='button'], input[type='submit']")
4
5 candidates = []
6 for btn in buttons:
7 text = await btn.text_content() or ""
8 aria_label = await btn.get_attribute("aria-label") or ""
9 title = await btn.get_attribute("title") or ""
10 combined = f"{text} {aria_label} {title}".strip()
11 if combined:
12 candidates.append((btn, combined))
13
14 if not candidates:
15 return None
16
17 # 调用小模型计算语义相似度(本地部署,如 BGE)
18 from sentence_transformers import SentenceTransformer
19 model = SentenceTransformer('BAAI/bge-small-zh-v1.5')
20
21 desc_vec = model.encode([description])
22 candidate_texts = [c[1] for c in candidates]
23 cand_vecs = model.encode(candidate_texts)
24
25 similarities = cosine_similarity(desc_vec, cand_vecs)[0]
26 best_idx = int(np.argmax(similarities))
27
28 if similarities[best_idx] > 0.6: # 阈值可配置
29 return candidates[best_idx][0]
30 return None
✅ 优势:即使 class 改了,只要文案/语义不变,仍能定位。
传统写法:
1assert page.text_content("#order-status") == "已支付"
智能写法:
1@mcp.tool()
2async def assert_visual_match(input: VisualAssertInput) -> VisualAssertOutput:
3 current_img = await page.screenshot(full_page=True)
4 baseline = load_baseline_image(input.scenario_name)
5
6 diff = calculate_image_diff(baseline, current_img)
7 if diff > input.threshold:
8 # 自动上传差异图到对象存储
9 diff_img = generate_diff_image(baseline, current_img)
10 url = upload_to_s3(diff_img, f"diff_{input.scenario_name}_{ts}.png")
11 return VisualAssertOutput(match=False, diff_url=url)
12 return VisualAssertOutput(match=True)
📌 适用场景:首页布局、报表样式、多语言文案校验。
1# patrol_tasks.yaml
2- name: "首页可用性巡检"
3 steps:
4 - tool: navigate_to
5 args: {url: "https://prod.com"}
6 - tool: assert_visual_match
7 args: {scenario_name: "homepage", threshold: 0.05}
8 - tool: click_button
9 args: {description: "搜索框旁边的查询按钮"}
10
11- name: "订单列表加载"
12 steps:
13 - tool: login_as_test_user
14 - tool: navigate_to
15 args: {url: "https://prod.com/orders"}
16 - tool: assert_element_count
17 args: {selector: ".order-item", min_count: 1}
1# patrol_executor.py
2async def run_patrol_task(task_config: dict):
3 results = []
4 for step in task_config["steps"]:
5 tool_name = step["tool"]
6 args = step["args"]
7
8 # 调用对应 MCP Tool
9 result = await mcp_router.call(tool_name, args)
10 results.append(result)
11
12 # 若失败且非预期,立即中断
13 if not result.get("success", True) and not step.get("allow_failure"):
14 break
15
16 # 生成报告
17 report = generate_patrol_report(task_config["name"], results)
18 if any(r.get("match") == False for r in results):
19 send_alert(report) # 企业微信/钉钉
🕒 收益:每天凌晨 2 点自动跑,早上只收到 真实业务异常 报警,而非“定位失败”。
1# config.py
2RUN_MODE = os.getenv("PLAYWRIGHT_RUN_MODE", "mock") # mock / real
3
4@mcp.tool()
5async def submit_payment(input: SubmitPaymentInput):
6 if RUN_MODE == "mock":
7 # 拦截支付请求,返回成功
8 await page.route("**/api/pay", lambda route: route.fulfill(json={"status": "success"}))
9 await page.click("#pay-button")
10 return {"status": "mock_success"}
11 else:
12 # 生产模式:仅允许特定白名单用户 + 人工审批
13 if current_user not in WHITELIST_USERS:
14 raise PermissionError("禁止在生产环境自动支付")
15 # 此处可触发审批工单,暂停执行
在 MCP 初始化时加载禁区规则:
1DANGER_SELECTORS = [
2 "[data-testid='delete-account']",
3 "button:has-text('永久删除')",
4 "#confirm-payment"
5]
6
7async def ai_assisted_locator(...):
8 locator = ... # 如前
9 if locator:
10 selector = await locator.evaluate("el => el.outerHTML")
11 if any(danger in selector for danger in DANGER_SELECTORS):
12 raise PermissionError("尝试操作高危元素")
13 return locator
指标 | 引入前 | 引入后(3 个月) | 提升 |
|---|---|---|---|
UI 用例维护成本 | 15 人日/月 | 4 人日/月 | ↓ 73% |
夜间回归误报率 | 68% | 12% | ↓ 82% |
新页面覆盖速度 | 3 天/页 | 0.5 天/页 | ↑ 6x |
生产巡检覆盖率 | 0% | 40+ 核心路径 | — |
💡 关键:不是完全替代人工,而是把人力从“修定位”释放到“设计场景”。
playwright-pytest + Docker 启动多个 Chrome 实例;当你做到:
你就拥有了 无人值守的 UI 自动化系统。