GLM 调用指南

最近更新时间:2026-05-28 14:57:30

我的收藏

概述

智谱 GLM 系列模型已接入 TokenHub 平台。您可以通过 OpenAI 兼容协议调用 GLM 系列模型,实现通用对话、深度推理、工具调用和多模态理解等能力。
通用调用方式(BaseURL、API Key、messages 拼接、Function Calling、流式输出等)请参见 语言模型调用概览。本文仅介绍 GLM 系列的特有能力和使用方式。

前提条件

已注册腾讯云账号并开通 TokenHub 服务。
已在 TokenHub 控制台 获取 API Key。
已根据所用语言安装对应 SDK 或具备 HTTP 请求能力。

快速开始

以下示例展示如何调用 glm-5.1 模型完成一次基础对话。请将 YOUR_API_KEY 替换为您创建的 API Key。
cURL
Python
Node.js
Java
Go
curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-H 'Content-Type: application/json' \\
-d '{
"model": "glm-5.1",
"messages": [{"role": "user", "content": "你好"}],
"max_tokens": 1024
}'
from openai import OpenAI

client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://tokenhub.tencentmaas.com/v1",
)

response = client.chat.completions.create(
model="glm-5.1",
messages=[{"role": "user", "content": "你好"}],
max_tokens=1024,
)
print(response.choices[0].message.content)
import OpenAI from 'openai';

const client = new OpenAI({
apiKey: 'YOUR_API_KEY',
baseURL: 'https://tokenhub.tencentmaas.com/v1',
});

const response = await client.chat.completions.create({
model: 'glm-5.1',
messages: [{ role: 'user', content: '你好' }],
max_tokens: 1024,
});
console.log(response.choices[0].message.content);
import okhttp3.*;
import com.google.gson.Gson;
import java.util.*;

public class GlmQuickStart {
public static void main(String[] args) throws Exception {
Map<String, Object> body = new HashMap<>();
body.put("model", "glm-5.1");
body.put("messages", List.of(Map.of("role", "user", "content", "你好")));
body.put("max_tokens", 1024);

Request request = new Request.Builder()
.url("https://tokenhub.tencentmaas.com/v1/chat/completions")
.header("Authorization", "Bearer YOUR_API_KEY")
.post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json")))
.build();

try (Response response = new OkHttpClient().newCall(request).execute()) {
System.out.println(response.body().string());
}
}
}
package main

import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
)

func main() {
body, _ := json.Marshal(map[string]interface{}{
"model": "glm-5.1",
"messages": []map[string]string{{"role": "user", "content": "你好"}},
"max_tokens": 1024,
})

req, _ := http.NewRequest("POST",
"https://tokenhub.tencentmaas.com/v1/chat/completions",
bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer YOUR_API_KEY")
req.Header.Set("Content-Type", "application/json")

resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()

data, _ := io.ReadAll(resp.Body)
fmt.Println(string(data))
}
说明:
GLM 系列模型默认开启思考模式,响应中会包含 reasoning_content 字段。如不需要思考过程,请参见下文 思考模式 章节关闭。

支持的模型

model 参数值
定位
多模态
推荐场景
glm-5.1
旗舰
不支持
通用对话、创作、知识问答、复杂推理
glm-5
上一代旗舰
不支持
稳定性优先的场景
glm-5-turbo
Agent 优化
不支持
工具调用、长链路 Agent 任务
glm-5v-turbo
多模态
支持图片、视频、文件
图像理解、视频分析、文档解析
所有模型的上下文窗口为 200K tokens,最大输出为 128K tokens。

思考模式

GLM 系列将对话能力与推理能力合为一体。您可以通过 thinking 参数控制是否启用思考能力,无需切换 model 参数。

开启或关闭思考

通过 thinking 字段控制思考行为。该字段为对象格式,包含一个 type 属性:
字段
类型
取值
默认值
说明
type
String
enabled / disabled
enabled
控制当前请求是否启用思考能力
以下示例展示如何在请求中关闭思考:
cURL
Python
Node.js
Java
Go
curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-H 'Content-Type: application/json' \\
-d '{
"model": "glm-5.1",
"messages": [{"role": "user", "content": "你好"}],
"thinking": {"type": "disabled"}
}'
response = client.chat.completions.create(
model="glm-5.1",
messages=[{"role": "user", "content": "你好"}],
extra_body={"thinking": {"type": "disabled"}},
)
const response = await client.chat.completions.create({
model: 'glm-5.1',
messages: [{ role: 'user', content: '你好' }],
// @ts-ignore - thinking 为 GLM 扩展字段
thinking: { type: 'disabled' },
});
import okhttp3.*;
import com.google.gson.Gson;
import java.util.*;

public class GlmThinkingDisabled {
public static void main(String[] args) throws Exception {
Map<String, Object> body = new HashMap<>();
body.put("model", "glm-5.1");
body.put("messages", List.of(Map.of("role", "user", "content", "你好")));
body.put("thinking", Map.of("type", "disabled"));

Request request = new Request.Builder()
.url("https://tokenhub.tencentmaas.com/v1/chat/completions")
.header("Authorization", "Bearer YOUR_API_KEY")
.post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json")))
.build();

try (Response response = new OkHttpClient().newCall(request).execute()) {
System.out.println(response.body().string());
}
}
}
package main

import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
)

func main() {
body, _ := json.Marshal(map[string]interface{}{
"model": "glm-5.1",
"messages": []map[string]string{{"role": "user", "content": "你好"}},
"thinking": map[string]string{"type": "disabled"},
})

req, _ := http.NewRequest("POST",
"https://tokenhub.tencentmaas.com/v1/chat/completions",
bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer YOUR_API_KEY")
req.Header.Set("Content-Type", "application/json")

resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()

data, _ := io.ReadAll(resp.Body)
fmt.Println(string(data))
}
说明:
thinking 不是 OpenAI 标准字段。使用 OpenAI SDK 时需通过 SDK 提供的额外字段机制(Python 用 extra_body、Node.js 直接传字段);HTTP 直接调用时放在请求体顶层。

获取思考内容

开启思考后,响应中会新增 reasoning_content 字段,与 content 同级:
{
"choices": [{
"message": {
"role": "assistant",
"reasoning_content": "让我分析一下这个问题...",
"content": "最终答案是..."
}
}]
}
由于 reasoning_content 不是 OpenAI 标准字段,使用各语言 SDK 时需通过判空或反射方式访问;HTTP 直接调用时直接读取响应 JSON 即可。
Python
Node.js
Java
Go
message = response.choices[0].message
if hasattr(message, "reasoning_content") and message.reasoning_content:
print("思考过程:", message.reasoning_content)
print("回答:", message.content)
const message = response.choices[0].message;
// @ts-ignore - reasoning_content 为 GLM 扩展字段
if (message.reasoning_content) {
console.log('思考过程:', message.reasoning_content);
}
console.log('回答:', message.content);
// HTTP 调用拿到响应字符串后,用 Gson 解析 reasoning_content 和 content 字段
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;

String respBody = response.body().string();
JsonObject json = JsonParser.parseString(respBody).getAsJsonObject();
JsonObject message = json.getAsJsonArray("choices")
.get(0).getAsJsonObject()
.getAsJsonObject("message");

if (message.has("reasoning_content") && !message.get("reasoning_content").isJsonNull()) {
System.out.println("思考过程:" + message.get("reasoning_content").getAsString());
}
System.out.println("回答:" + message.get("content").getAsString());
type Message struct {
Role string `json:"role"`
Content string `json:"content"`
ReasoningContent string `json:"reasoning_content,omitempty"`
}

type Choice struct {
Index int `json:"index"`
Message Message `json:"message"`
}

type ChatResponse struct {
Choices []Choice `json:"choices"`
}

var result ChatResponse
json.Unmarshal(data, &result)

if result.Choices[0].Message.ReasoningContent != "" {
fmt.Println("思考过程:", result.Choices[0].Message.ReasoningContent)
}
fmt.Println("回答:", result.Choices[0].Message.Content)

多轮对话处理

构建后续轮次的 messages 时,无需回写 reasoning_content。仅将 content 字段作为 assistant 消息传入即可。

流式调用

启用思考模式时建议使用流式调用(stream=True)。思考内容可能较长,非流式调用容易触发网关超时。
流式模式下,reasoning_content 会在 content 之前完整输出。客户端处理逻辑:累积 delta.reasoning_content 输出思考过程,再累积 delta.content 输出最终回答。
cURL
Python
Node.js
Java
Go
curl -N -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-H 'Content-Type: application/json' \\
-d '{
"model": "glm-5.1",
"messages": [{"role": "user", "content": "解释量子纠缠"}],
"stream": true,
"stream_options": {"include_usage": true},
"thinking": {"type": "enabled"}
}'
stream = client.chat.completions.create(
model="glm-5.1",
messages=[{"role": "user", "content": "解释量子纠缠"}],
stream=True,
stream_options={"include_usage": True},
extra_body={"thinking": {"type": "enabled"}},
)

is_answering = False
for chunk in stream:
if not chunk.choices:
continue
delta = chunk.choices[0].delta
if hasattr(delta, "reasoning_content") and delta.reasoning_content:
print(delta.reasoning_content, end="", flush=True)
if hasattr(delta, "content") and delta.content:
if not is_answering:
print("\\n--- 回答 ---\\n")
is_answering = True
print(delta.content, end="", flush=True)
const stream = await client.chat.completions.create({
model: 'glm-5.1',
messages: [{ role: 'user', content: '解释量子纠缠' }],
stream: true,
stream_options: { include_usage: true },
// @ts-ignore - thinking 为 GLM 扩展字段
thinking: { type: 'enabled' },
});

let isAnswering = false;
for await (const chunk of stream) {
if (!chunk.choices?.length) continue;
const delta = chunk.choices[0].delta;
// @ts-ignore - reasoning_content 为 GLM 扩展字段
if (delta.reasoning_content) {
process.stdout.write(delta.reasoning_content);
}
if (delta.content) {
if (!isAnswering) {
process.stdout.write('\\n--- 回答 ---\\n');
isAnswering = true;
}
process.stdout.write(delta.content);
}
}
import okhttp3.*;
import com.google.gson.*;
import java.util.*;
import java.io.BufferedReader;
import java.io.InputStreamReader;

public class GlmStream {
public static void main(String[] args) throws Exception {
Map<String, Object> body = new HashMap<>();
body.put("model", "glm-5.1");
body.put("messages", List.of(Map.of("role", "user", "content", "解释量子纠缠")));
body.put("stream", true);
body.put("stream_options", Map.of("include_usage", true));
body.put("thinking", Map.of("type", "enabled"));

Request request = new Request.Builder()
.url("https://tokenhub.tencentmaas.com/v1/chat/completions")
.header("Authorization", "Bearer YOUR_API_KEY")
.post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json")))
.build();

try (Response response = new OkHttpClient().newCall(request).execute();
BufferedReader reader = new BufferedReader(
new InputStreamReader(response.body().byteStream()))) {
String line;
boolean isAnswering = false;
while ((line = reader.readLine()) != null) {
if (!line.startsWith("data: ")) continue;
String data = line.substring(6);
if (data.equals("[DONE]")) break;

JsonObject chunk = JsonParser.parseString(data).getAsJsonObject();
JsonArray choices = chunk.getAsJsonArray("choices");
if (choices == null || choices.size() == 0) continue;
JsonObject delta = choices.get(0).getAsJsonObject().getAsJsonObject("delta");

if (delta.has("reasoning_content") && !delta.get("reasoning_content").isJsonNull()) {
System.out.print(delta.get("reasoning_content").getAsString());
}
if (delta.has("content") && !delta.get("content").isJsonNull()) {
if (!isAnswering) {
System.out.println("\\n--- 回答 ---");
isAnswering = true;
}
System.out.print(delta.get("content").getAsString());
}
}
}
}
}
package main

import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"net/http"
"strings"
)

type StreamDelta struct {
Content string `json:"content,omitempty"`
ReasoningContent string `json:"reasoning_content,omitempty"`
}

type StreamChoice struct {
Delta StreamDelta `json:"delta"`
}

type StreamChunk struct {
Choices []StreamChoice `json:"choices"`
}

func main() {
body, _ := json.Marshal(map[string]interface{}{
"model": "glm-5.1",
"messages": []map[string]string{{"role": "user", "content": "解释量子纠缠"}},
"stream": true,
"stream_options": map[string]bool{"include_usage": true},
"thinking": map[string]string{"type": "enabled"},
})

req, _ := http.NewRequest("POST",
"https://tokenhub.tencentmaas.com/v1/chat/completions",
bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer YOUR_API_KEY")
req.Header.Set("Content-Type", "application/json")

resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()

isAnswering := false
scanner := bufio.NewScanner(resp.Body)
for scanner.Scan() {
line := scanner.Text()
if !strings.HasPrefix(line, "data: ") {
continue
}
data := strings.TrimPrefix(line, "data: ")
if data == "[DONE]" {
break
}
var chunk StreamChunk
if err := json.Unmarshal([]byte(data), &chunk); err != nil {
continue
}
if len(chunk.Choices) == 0 {
continue
}
delta := chunk.Choices[0].Delta
if delta.ReasoningContent != "" {
fmt.Print(delta.ReasoningContent)
}
if delta.Content != "" {
if !isAnswering {
fmt.Println("\\n--- 回答 ---")
isAnswering = true
}
fmt.Print(delta.Content)
}
}
}

工具调用

GLM 系列的 Function Calling 遵循 OpenAI 标准协议(tools / tool_choice)。通用用法请参见 调用概览

流式场景下的参数拼接

GLM 模型在流式调用时,tool_call.arguments 会分多个 chunk 增量返回,客户端需要按 tool_call.index 累积拼接:
Python
Node.js
Java
Go
completion = client.chat.completions.create(
model="glm-5.1",
messages=[{"role": "user", "content": "查询深圳天气"}],
tools=tools,
stream=True,
extra_body={"thinking": {"type": "disabled"}},
)

arg_buffer = ""
tool_name = ""
for chunk in completion:
if not chunk.choices:
continue
delta = chunk.choices[0].delta
if hasattr(delta, "tool_calls") and delta.tool_calls:
for tc in delta.tool_calls:
if tc.function and tc.function.name:
tool_name = tc.function.name
if tc.function and tc.function.arguments:
arg_buffer += tc.function.arguments

print(f"调用工具:{tool_name}")
print(f"完整参数:{arg_buffer}") # 输出:{"city": "深圳"}
const completion = await client.chat.completions.create({
model: 'glm-5.1',
messages: [{ role: 'user', content: '查询深圳天气' }],
tools: tools,
stream: true,
// @ts-ignore - thinking 为 GLM 扩展字段
thinking: { type: 'disabled' },
});

let argBuffer = '';
let toolName = '';
for await (const chunk of completion) {
if (!chunk.choices?.length) continue;
const delta = chunk.choices[0].delta;
if (delta.tool_calls) {
for (const tc of delta.tool_calls) {
if (tc.function?.name) toolName = tc.function.name;
if (tc.function?.arguments) argBuffer += tc.function.arguments;
}
}
}

console.log(`调用工具:${toolName}`);
console.log(`完整参数:${argBuffer}`); // 输出:{"city": "深圳"}
import okhttp3.*;
import com.google.gson.*;
import java.util.*;
import java.io.BufferedReader;
import java.io.InputStreamReader;

// tools 定义请参见调用概览的 Function Calling 章节
Map<String, Object> body = new HashMap<>();
body.put("model", "glm-5.1");
body.put("messages", List.of(Map.of("role", "user", "content", "查询深圳天气")));
body.put("tools", tools);
body.put("stream", true);
body.put("thinking", Map.of("type", "disabled"));

Request request = new Request.Builder()
.url("https://tokenhub.tencentmaas.com/v1/chat/completions")
.header("Authorization", "Bearer YOUR_API_KEY")
.post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json")))
.build();

StringBuilder argBuffer = new StringBuilder();
String toolName = "";

try (Response response = new OkHttpClient().newCall(request).execute();
BufferedReader reader = new BufferedReader(
new InputStreamReader(response.body().byteStream()))) {
String line;
while ((line = reader.readLine()) != null) {
if (!line.startsWith("data: ")) continue;
String data = line.substring(6);
if (data.equals("[DONE]")) break;

JsonObject chunk = JsonParser.parseString(data).getAsJsonObject();
JsonArray choices = chunk.getAsJsonArray("choices");
if (choices == null || choices.size() == 0) continue;
JsonObject delta = choices.get(0).getAsJsonObject().getAsJsonObject("delta");

if (delta.has("tool_calls") && delta.get("tool_calls").isJsonArray()) {
for (JsonElement tcEl : delta.getAsJsonArray("tool_calls")) {
JsonObject fn = tcEl.getAsJsonObject().getAsJsonObject("function");
if (fn != null) {
if (fn.has("name") && !fn.get("name").isJsonNull()) {
toolName = fn.get("name").getAsString();
}
if (fn.has("arguments") && !fn.get("arguments").isJsonNull()) {
argBuffer.append(fn.get("arguments").getAsString());
}
}
}
}
}
}
System.out.println("调用工具:" + toolName);
System.out.println("完整参数:" + argBuffer);
package main

import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"net/http"
"strings"
)

type ToolCallFunction struct {
Name string `json:"name,omitempty"`
Arguments string `json:"arguments,omitempty"`
}

type ToolCall struct {
Index int `json:"index"`
Function ToolCallFunction `json:"function"`
}

type ToolStreamDelta struct {
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
}

type ToolStreamChoice struct {
Delta ToolStreamDelta `json:"delta"`
}

type ToolStreamChunk struct {
Choices []ToolStreamChoice `json:"choices"`
}

func main() {
// tools 定义请参见调用概览的 Function Calling 章节
body, _ := json.Marshal(map[string]interface{}{
"model": "glm-5.1",
"messages": []map[string]string{{"role": "user", "content": "查询深圳天气"}},
"tools": tools,
"stream": true,
"thinking": map[string]string{"type": "disabled"},
})

req, _ := http.NewRequest("POST",
"https://tokenhub.tencentmaas.com/v1/chat/completions",
bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer YOUR_API_KEY")
req.Header.Set("Content-Type", "application/json")

resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()

var argBuffer strings.Builder
var toolName string
scanner := bufio.NewScanner(resp.Body)
for scanner.Scan() {
line := scanner.Text()
if !strings.HasPrefix(line, "data: ") {
continue
}
data := strings.TrimPrefix(line, "data: ")
if data == "[DONE]" {
break
}
var chunk ToolStreamChunk
if err := json.Unmarshal([]byte(data), &chunk); err != nil {
continue
}
if len(chunk.Choices) == 0 {
continue
}
for _, tc := range chunk.Choices[0].Delta.ToolCalls {
if tc.Function.Name != "" {
toolName = tc.Function.Name
}
if tc.Function.Arguments != "" {
argBuffer.WriteString(tc.Function.Arguments)
}
}
}
fmt.Printf("调用工具:%s\\n完整参数:%s\\n", toolName, argBuffer.String())
}

tool_stream 参数

GLM 系列支持 tool_stream 参数(Boolean 类型),用于控制工具调用参数的流式分片粒度:
extra_body={"tool_stream": True, "thinking": {"type": "disabled"}}

多模态调用

glm-5v-turbo 是 GLM 系列中唯一支持多模态输入的模型,支持图像、视频和文件输入,输出为文本。

使用限制

图像、视频、文件不可在同一请求中混合传入。
文件输入仅支持 URL,不支持 Base64。
图像输入支持 URL 和 Base64 两种方式。

图像输入

已验证支持的格式:PNG、JPG、JPEG、WebP。其他格式如需使用,请先进行小样本测试确认。
cURL
Python
Node.js
Java
Go
curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-H 'Content-Type: application/json' \\
-d '{
"model": "glm-5v-turbo",
"messages": [{
"role": "user",
"content": [
{"type": "text", "text": "请描述这张图片"},
{"type": "image_url", "image_url": {"url": "https://example.com/photo.png"}}
]
}],
"max_tokens": 1024,
"thinking": {"type": "disabled"}
}'
response = client.chat.completions.create(
model="glm-5v-turbo",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "请描述这张图片"},
{"type": "image_url", "image_url": {"url": "https://example.com/photo.png"}},
],
}],
max_tokens=1024,
extra_body={"thinking": {"type": "disabled"}},
)
print(response.choices[0].message.content)
const response = await client.chat.completions.create({
model: 'glm-5v-turbo',
messages: [{
role: 'user',
content: [
{ type: 'text', text: '请描述这张图片' },
{ type: 'image_url', image_url: { url: 'https://example.com/photo.png' } },
],
}],
max_tokens: 1024,
// @ts-ignore - thinking 为 GLM 扩展字段
thinking: { type: 'disabled' },
});
console.log(response.choices[0].message.content);
import okhttp3.*;
import com.google.gson.Gson;
import java.util.*;

public class GlmImageInput {
public static void main(String[] args) throws Exception {
List<Map<String, Object>> content = List.of(
Map.of("type", "text", "text", "请描述这张图片"),
Map.of("type", "image_url", "image_url",
Map.of("url", "https://example.com/photo.png"))
);

Map<String, Object> body = new HashMap<>();
body.put("model", "glm-5v-turbo");
body.put("messages", List.of(Map.of("role", "user", "content", content)));
body.put("max_tokens", 1024);
body.put("thinking", Map.of("type", "disabled"));

Request request = new Request.Builder()
.url("https://tokenhub.tencentmaas.com/v1/chat/completions")
.header("Authorization", "Bearer YOUR_API_KEY")
.post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json")))
.build();

try (Response response = new OkHttpClient().newCall(request).execute()) {
System.out.println(response.body().string());
}
}
}
package main

import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
)

func main() {
body, _ := json.Marshal(map[string]interface{}{
"model": "glm-5v-turbo",
"messages": []map[string]interface{}{
{
"role": "user",
"content": []map[string]interface{}{
{"type": "text", "text": "请描述这张图片"},
{"type": "image_url", "image_url": map[string]string{
"url": "https://example.com/photo.png",
}},
},
},
},
"max_tokens": 1024,
"thinking": map[string]string{"type": "disabled"},
})

req, _ := http.NewRequest("POST",
"https://tokenhub.tencentmaas.com/v1/chat/completions",
bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer YOUR_API_KEY")
req.Header.Set("Content-Type", "application/json")

resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()

data, _ := io.ReadAll(resp.Body)
fmt.Println(string(data))
}
Base64 方式(Python 示例):
将本地图片读取为 Base64 后,通过 Data URI 形式传入 image_url.url
import base64

with open("local.jpg", "rb") as f:
b64 = base64.b64encode(f.read()).decode()

response = client.chat.completions.create(
model="glm-5v-turbo",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "图中有什么?"},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}},
],
}],
max_tokens=1024,
extra_body={"thinking": {"type": "disabled"}},
)

视频输入

已验证支持的格式:MP4、MPEG、MOV、AVI、WebM、WMV、3GPP。其他格式如需使用,请先进行小样本测试确认。
将上文图像示例中的 image_url 字段替换为 video_url,即可传入视频文件 URL:
cURL
Python
Node.js
Java
Go
curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-H 'Content-Type: application/json' \\
-d '{
"model": "glm-5v-turbo",
"messages": [{
"role": "user",
"content": [
{"type": "text", "text": "请总结这段视频的内容"},
{"type": "video_url", "video_url": {"url": "https://example.com/demo.mp4"}}
]
}],
"max_tokens": 2048,
"thinking": {"type": "disabled"}
}'
response = client.chat.completions.create(
model="glm-5v-turbo",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "请总结这段视频的内容"},
{"type": "video_url", "video_url": {"url": "https://example.com/demo.mp4"}},
],
}],
max_tokens=2048,
extra_body={"thinking": {"type": "disabled"}},
)
print(response.choices[0].message.content)
const response = await client.chat.completions.create({
model: 'glm-5v-turbo',
messages: [{
role: 'user',
content: [
{ type: 'text', text: '请总结这段视频的内容' },
{ type: 'video_url', video_url: { url: 'https://example.com/demo.mp4' } },
],
}],
max_tokens: 2048,
// @ts-ignore - thinking 为 GLM 扩展字段
thinking: { type: 'disabled' },
});
console.log(response.choices[0].message.content);
List<Map<String, Object>> content = List.of(
Map.of("type", "text", "text", "请总结这段视频的内容"),
Map.of("type", "video_url", "video_url",
Map.of("url", "https://example.com/demo.mp4"))
);

Map<String, Object> body = new HashMap<>();
body.put("model", "glm-5v-turbo");
body.put("messages", List.of(Map.of("role", "user", "content", content)));
body.put("max_tokens", 2048);
body.put("thinking", Map.of("type", "disabled"));
// 其余 HTTP 请求逻辑同图像示例
body, _ := json.Marshal(map[string]interface{}{
"model": "glm-5v-turbo",
"messages": []map[string]interface{}{
{
"role": "user",
"content": []map[string]interface{}{
{"type": "text", "text": "请总结这段视频的内容"},
{"type": "video_url", "video_url": map[string]string{
"url": "https://example.com/demo.mp4",
}},
},
},
},
"max_tokens": 2048,
"thinking": map[string]string{"type": "disabled"},
})
// 其余 HTTP 请求逻辑同图像示例

文件输入

已验证支持的格式:PDF、TXT、DOC。其他格式如需使用,请先进行小样本测试确认。
文件输入仅支持通过 URL 传入,不支持 Base64 编码。如本地有文件需要解析,请先上传至对象存储服务(如腾讯云 COS),再使用生成的 URL。
cURL
Python
Node.js
Java
Go
curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-H 'Content-Type: application/json' \\
-d '{
"model": "glm-5v-turbo",
"messages": [{
"role": "user",
"content": [
{"type": "text", "text": "请提取这份文档的核心要点"},
{"type": "file_url", "file_url": {"url": "https://example.com/report.pdf"}}
]
}],
"max_tokens": 4096,
"thinking": {"type": "disabled"}
}'
response = client.chat.completions.create(
model="glm-5v-turbo",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "请提取这份文档的核心要点"},
{"type": "file_url", "file_url": {"url": "https://example.com/report.pdf"}},
],
}],
max_tokens=4096,
extra_body={"thinking": {"type": "disabled"}},
)
print(response.choices[0].message.content)
const response = await client.chat.completions.create({
model: 'glm-5v-turbo',
messages: [{
role: 'user',
content: [
{ type: 'text', text: '请提取这份文档的核心要点' },
{ type: 'file_url', file_url: { url: 'https://example.com/report.pdf' } },
],
}],
max_tokens: 4096,
// @ts-ignore - thinking 为 GLM 扩展字段
thinking: { type: 'disabled' },
});
console.log(response.choices[0].message.content);
List<Map<String, Object>> content = List.of(
Map.of("type", "text", "text", "请提取这份文档的核心要点"),
Map.of("type", "file_url", "file_url",
Map.of("url", "https://example.com/report.pdf"))
);

Map<String, Object> body = new HashMap<>();
body.put("model", "glm-5v-turbo");
body.put("messages", List.of(Map.of("role", "user", "content", content)));
body.put("max_tokens", 4096);
body.put("thinking", Map.of("type", "disabled"));
// 其余 HTTP 请求逻辑同图像示例
body, _ := json.Marshal(map[string]interface{}{
"model": "glm-5v-turbo",
"messages": []map[string]interface{}{
{
"role": "user",
"content": []map[string]interface{}{
{"type": "text", "text": "请提取这份文档的核心要点"},
{"type": "file_url", "file_url": map[string]string{
"url": "https://example.com/report.pdf",
}},
},
},
},
"max_tokens": 4096,
"thinking": map[string]string{"type": "disabled"},
})
// 其余 HTTP 请求逻辑同图像示例

使用限制

限制项
说明
思考模式默认开启
不传 thinking 参数时默认启用,响应中会包含 reasoning_content 字段。不需要时请显式关闭。
非流式调用超时风险
思考模式下输出较长,建议使用 stream=True
多模态仅限 glm-5v-turbo
其他三个模型不支持图像、视频、文件输入。
多模态输入不可混合
图像、视频、文件在同一请求中只能传入一类。
文件输入仅支持 URL
file_url 不支持 Base64 或 Data URI。
工具参数增量返回
流式调用时 tool_call.arguments 分多个 chunk 返回,需客户端拼接。
请求体积上限
单次请求 body 不超过 100 MB。

相关文档

语言模型调用概览:TokenHub 语言模型通用调用文档,包含 BaseURL、API Key、多轮对话、Function Calling、Anthropic 协议等通用说明。
TokenHub 控制台:API Key 创建与管理入口。