概述
智谱 GLM 系列模型已接入 TokenHub 平台。您可以通过 OpenAI 兼容协议调用 GLM 系列模型,实现通用对话、深度推理、工具调用和多模态理解等能力。
前提条件
已注册腾讯云账号并开通 TokenHub 服务。
已在 TokenHub 控制台 获取 API Key。
已根据所用语言安装对应 SDK 或具备 HTTP 请求能力。
快速开始
以下示例展示如何调用
glm-5.1 模型完成一次基础对话。请将 YOUR_API_KEY 替换为您创建的 API Key。curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\-H 'Authorization: Bearer YOUR_API_KEY' \\-H 'Content-Type: application/json' \\-d '{"model": "glm-5.1","messages": [{"role": "user", "content": "你好"}],"max_tokens": 1024}'
from openai import OpenAIclient = OpenAI(api_key="YOUR_API_KEY",base_url="https://tokenhub.tencentmaas.com/v1",)response = client.chat.completions.create(model="glm-5.1",messages=[{"role": "user", "content": "你好"}],max_tokens=1024,)print(response.choices[0].message.content)
import OpenAI from 'openai';const client = new OpenAI({apiKey: 'YOUR_API_KEY',baseURL: 'https://tokenhub.tencentmaas.com/v1',});const response = await client.chat.completions.create({model: 'glm-5.1',messages: [{ role: 'user', content: '你好' }],max_tokens: 1024,});console.log(response.choices[0].message.content);
import okhttp3.*;import com.google.gson.Gson;import java.util.*;public class GlmQuickStart {public static void main(String[] args) throws Exception {Map<String, Object> body = new HashMap<>();body.put("model", "glm-5.1");body.put("messages", List.of(Map.of("role", "user", "content", "你好")));body.put("max_tokens", 1024);Request request = new Request.Builder().url("https://tokenhub.tencentmaas.com/v1/chat/completions").header("Authorization", "Bearer YOUR_API_KEY").post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json"))).build();try (Response response = new OkHttpClient().newCall(request).execute()) {System.out.println(response.body().string());}}}
package mainimport ("bytes""encoding/json""fmt""io""net/http")func main() {body, _ := json.Marshal(map[string]interface{}{"model": "glm-5.1","messages": []map[string]string{{"role": "user", "content": "你好"}},"max_tokens": 1024,})req, _ := http.NewRequest("POST","https://tokenhub.tencentmaas.com/v1/chat/completions",bytes.NewBuffer(body))req.Header.Set("Authorization", "Bearer YOUR_API_KEY")req.Header.Set("Content-Type", "application/json")resp, _ := http.DefaultClient.Do(req)defer resp.Body.Close()data, _ := io.ReadAll(resp.Body)fmt.Println(string(data))}
说明:
支持的模型
model 参数值 | 定位 | 多模态 | 推荐场景 |
glm-5.1 | 旗舰 | 不支持 | 通用对话、创作、知识问答、复杂推理 |
glm-5 | 上一代旗舰 | 不支持 | 稳定性优先的场景 |
glm-5-turbo | Agent 优化 | 不支持 | 工具调用、长链路 Agent 任务 |
glm-5v-turbo | 多模态 | 支持图片、视频、文件 | 图像理解、视频分析、文档解析 |
所有模型的上下文窗口为 200K tokens,最大输出为 128K tokens。
思考模式
GLM 系列将对话能力与推理能力合为一体。您可以通过
thinking 参数控制是否启用思考能力,无需切换 model 参数。开启或关闭思考
通过
thinking 字段控制思考行为。该字段为对象格式,包含一个 type 属性:字段 | 类型 | 取值 | 默认值 | 说明 |
type | String | enabled / disabled | enabled | 控制当前请求是否启用思考能力 |
以下示例展示如何在请求中关闭思考:
curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\-H 'Authorization: Bearer YOUR_API_KEY' \\-H 'Content-Type: application/json' \\-d '{"model": "glm-5.1","messages": [{"role": "user", "content": "你好"}],"thinking": {"type": "disabled"}}'
response = client.chat.completions.create(model="glm-5.1",messages=[{"role": "user", "content": "你好"}],extra_body={"thinking": {"type": "disabled"}},)
const response = await client.chat.completions.create({model: 'glm-5.1',messages: [{ role: 'user', content: '你好' }],// @ts-ignore - thinking 为 GLM 扩展字段thinking: { type: 'disabled' },});
import okhttp3.*;import com.google.gson.Gson;import java.util.*;public class GlmThinkingDisabled {public static void main(String[] args) throws Exception {Map<String, Object> body = new HashMap<>();body.put("model", "glm-5.1");body.put("messages", List.of(Map.of("role", "user", "content", "你好")));body.put("thinking", Map.of("type", "disabled"));Request request = new Request.Builder().url("https://tokenhub.tencentmaas.com/v1/chat/completions").header("Authorization", "Bearer YOUR_API_KEY").post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json"))).build();try (Response response = new OkHttpClient().newCall(request).execute()) {System.out.println(response.body().string());}}}
package mainimport ("bytes""encoding/json""fmt""io""net/http")func main() {body, _ := json.Marshal(map[string]interface{}{"model": "glm-5.1","messages": []map[string]string{{"role": "user", "content": "你好"}},"thinking": map[string]string{"type": "disabled"},})req, _ := http.NewRequest("POST","https://tokenhub.tencentmaas.com/v1/chat/completions",bytes.NewBuffer(body))req.Header.Set("Authorization", "Bearer YOUR_API_KEY")req.Header.Set("Content-Type", "application/json")resp, _ := http.DefaultClient.Do(req)defer resp.Body.Close()data, _ := io.ReadAll(resp.Body)fmt.Println(string(data))}
说明:
thinking 不是 OpenAI 标准字段。使用 OpenAI SDK 时需通过 SDK 提供的额外字段机制(Python 用 extra_body、Node.js 直接传字段);HTTP 直接调用时放在请求体顶层。获取思考内容
开启思考后,响应中会新增
reasoning_content 字段,与 content 同级:{"choices": [{"message": {"role": "assistant","reasoning_content": "让我分析一下这个问题...","content": "最终答案是..."}}]}
由于
reasoning_content 不是 OpenAI 标准字段,使用各语言 SDK 时需通过判空或反射方式访问;HTTP 直接调用时直接读取响应 JSON 即可。message = response.choices[0].messageif hasattr(message, "reasoning_content") and message.reasoning_content:print("思考过程:", message.reasoning_content)print("回答:", message.content)
const message = response.choices[0].message;// @ts-ignore - reasoning_content 为 GLM 扩展字段if (message.reasoning_content) {console.log('思考过程:', message.reasoning_content);}console.log('回答:', message.content);
// HTTP 调用拿到响应字符串后,用 Gson 解析 reasoning_content 和 content 字段import com.google.gson.JsonObject;import com.google.gson.JsonParser;String respBody = response.body().string();JsonObject json = JsonParser.parseString(respBody).getAsJsonObject();JsonObject message = json.getAsJsonArray("choices").get(0).getAsJsonObject().getAsJsonObject("message");if (message.has("reasoning_content") && !message.get("reasoning_content").isJsonNull()) {System.out.println("思考过程:" + message.get("reasoning_content").getAsString());}System.out.println("回答:" + message.get("content").getAsString());
type Message struct {Role string `json:"role"`Content string `json:"content"`ReasoningContent string `json:"reasoning_content,omitempty"`}type Choice struct {Index int `json:"index"`Message Message `json:"message"`}type ChatResponse struct {Choices []Choice `json:"choices"`}var result ChatResponsejson.Unmarshal(data, &result)if result.Choices[0].Message.ReasoningContent != "" {fmt.Println("思考过程:", result.Choices[0].Message.ReasoningContent)}fmt.Println("回答:", result.Choices[0].Message.Content)
多轮对话处理
构建后续轮次的
messages 时,无需回写 reasoning_content。仅将 content 字段作为 assistant 消息传入即可。流式调用
启用思考模式时建议使用流式调用(
stream=True)。思考内容可能较长,非流式调用容易触发网关超时。流式模式下,
reasoning_content 会在 content 之前完整输出。客户端处理逻辑:累积 delta.reasoning_content 输出思考过程,再累积 delta.content 输出最终回答。curl -N -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\-H 'Authorization: Bearer YOUR_API_KEY' \\-H 'Content-Type: application/json' \\-d '{"model": "glm-5.1","messages": [{"role": "user", "content": "解释量子纠缠"}],"stream": true,"stream_options": {"include_usage": true},"thinking": {"type": "enabled"}}'
stream = client.chat.completions.create(model="glm-5.1",messages=[{"role": "user", "content": "解释量子纠缠"}],stream=True,stream_options={"include_usage": True},extra_body={"thinking": {"type": "enabled"}},)is_answering = Falsefor chunk in stream:if not chunk.choices:continuedelta = chunk.choices[0].deltaif hasattr(delta, "reasoning_content") and delta.reasoning_content:print(delta.reasoning_content, end="", flush=True)if hasattr(delta, "content") and delta.content:if not is_answering:print("\\n--- 回答 ---\\n")is_answering = Trueprint(delta.content, end="", flush=True)
const stream = await client.chat.completions.create({model: 'glm-5.1',messages: [{ role: 'user', content: '解释量子纠缠' }],stream: true,stream_options: { include_usage: true },// @ts-ignore - thinking 为 GLM 扩展字段thinking: { type: 'enabled' },});let isAnswering = false;for await (const chunk of stream) {if (!chunk.choices?.length) continue;const delta = chunk.choices[0].delta;// @ts-ignore - reasoning_content 为 GLM 扩展字段if (delta.reasoning_content) {process.stdout.write(delta.reasoning_content);}if (delta.content) {if (!isAnswering) {process.stdout.write('\\n--- 回答 ---\\n');isAnswering = true;}process.stdout.write(delta.content);}}
import okhttp3.*;import com.google.gson.*;import java.util.*;import java.io.BufferedReader;import java.io.InputStreamReader;public class GlmStream {public static void main(String[] args) throws Exception {Map<String, Object> body = new HashMap<>();body.put("model", "glm-5.1");body.put("messages", List.of(Map.of("role", "user", "content", "解释量子纠缠")));body.put("stream", true);body.put("stream_options", Map.of("include_usage", true));body.put("thinking", Map.of("type", "enabled"));Request request = new Request.Builder().url("https://tokenhub.tencentmaas.com/v1/chat/completions").header("Authorization", "Bearer YOUR_API_KEY").post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json"))).build();try (Response response = new OkHttpClient().newCall(request).execute();BufferedReader reader = new BufferedReader(new InputStreamReader(response.body().byteStream()))) {String line;boolean isAnswering = false;while ((line = reader.readLine()) != null) {if (!line.startsWith("data: ")) continue;String data = line.substring(6);if (data.equals("[DONE]")) break;JsonObject chunk = JsonParser.parseString(data).getAsJsonObject();JsonArray choices = chunk.getAsJsonArray("choices");if (choices == null || choices.size() == 0) continue;JsonObject delta = choices.get(0).getAsJsonObject().getAsJsonObject("delta");if (delta.has("reasoning_content") && !delta.get("reasoning_content").isJsonNull()) {System.out.print(delta.get("reasoning_content").getAsString());}if (delta.has("content") && !delta.get("content").isJsonNull()) {if (!isAnswering) {System.out.println("\\n--- 回答 ---");isAnswering = true;}System.out.print(delta.get("content").getAsString());}}}}}
package mainimport ("bufio""bytes""encoding/json""fmt""net/http""strings")type StreamDelta struct {Content string `json:"content,omitempty"`ReasoningContent string `json:"reasoning_content,omitempty"`}type StreamChoice struct {Delta StreamDelta `json:"delta"`}type StreamChunk struct {Choices []StreamChoice `json:"choices"`}func main() {body, _ := json.Marshal(map[string]interface{}{"model": "glm-5.1","messages": []map[string]string{{"role": "user", "content": "解释量子纠缠"}},"stream": true,"stream_options": map[string]bool{"include_usage": true},"thinking": map[string]string{"type": "enabled"},})req, _ := http.NewRequest("POST","https://tokenhub.tencentmaas.com/v1/chat/completions",bytes.NewBuffer(body))req.Header.Set("Authorization", "Bearer YOUR_API_KEY")req.Header.Set("Content-Type", "application/json")resp, _ := http.DefaultClient.Do(req)defer resp.Body.Close()isAnswering := falsescanner := bufio.NewScanner(resp.Body)for scanner.Scan() {line := scanner.Text()if !strings.HasPrefix(line, "data: ") {continue}data := strings.TrimPrefix(line, "data: ")if data == "[DONE]" {break}var chunk StreamChunkif err := json.Unmarshal([]byte(data), &chunk); err != nil {continue}if len(chunk.Choices) == 0 {continue}delta := chunk.Choices[0].Deltaif delta.ReasoningContent != "" {fmt.Print(delta.ReasoningContent)}if delta.Content != "" {if !isAnswering {fmt.Println("\\n--- 回答 ---")isAnswering = true}fmt.Print(delta.Content)}}}
工具调用
流式场景下的参数拼接
GLM 模型在流式调用时,
tool_call.arguments 会分多个 chunk 增量返回,客户端需要按 tool_call.index 累积拼接:completion = client.chat.completions.create(model="glm-5.1",messages=[{"role": "user", "content": "查询深圳天气"}],tools=tools,stream=True,extra_body={"thinking": {"type": "disabled"}},)arg_buffer = ""tool_name = ""for chunk in completion:if not chunk.choices:continuedelta = chunk.choices[0].deltaif hasattr(delta, "tool_calls") and delta.tool_calls:for tc in delta.tool_calls:if tc.function and tc.function.name:tool_name = tc.function.nameif tc.function and tc.function.arguments:arg_buffer += tc.function.argumentsprint(f"调用工具:{tool_name}")print(f"完整参数:{arg_buffer}") # 输出:{"city": "深圳"}
const completion = await client.chat.completions.create({model: 'glm-5.1',messages: [{ role: 'user', content: '查询深圳天气' }],tools: tools,stream: true,// @ts-ignore - thinking 为 GLM 扩展字段thinking: { type: 'disabled' },});let argBuffer = '';let toolName = '';for await (const chunk of completion) {if (!chunk.choices?.length) continue;const delta = chunk.choices[0].delta;if (delta.tool_calls) {for (const tc of delta.tool_calls) {if (tc.function?.name) toolName = tc.function.name;if (tc.function?.arguments) argBuffer += tc.function.arguments;}}}console.log(`调用工具:${toolName}`);console.log(`完整参数:${argBuffer}`); // 输出:{"city": "深圳"}
import okhttp3.*;import com.google.gson.*;import java.util.*;import java.io.BufferedReader;import java.io.InputStreamReader;// tools 定义请参见调用概览的 Function Calling 章节Map<String, Object> body = new HashMap<>();body.put("model", "glm-5.1");body.put("messages", List.of(Map.of("role", "user", "content", "查询深圳天气")));body.put("tools", tools);body.put("stream", true);body.put("thinking", Map.of("type", "disabled"));Request request = new Request.Builder().url("https://tokenhub.tencentmaas.com/v1/chat/completions").header("Authorization", "Bearer YOUR_API_KEY").post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json"))).build();StringBuilder argBuffer = new StringBuilder();String toolName = "";try (Response response = new OkHttpClient().newCall(request).execute();BufferedReader reader = new BufferedReader(new InputStreamReader(response.body().byteStream()))) {String line;while ((line = reader.readLine()) != null) {if (!line.startsWith("data: ")) continue;String data = line.substring(6);if (data.equals("[DONE]")) break;JsonObject chunk = JsonParser.parseString(data).getAsJsonObject();JsonArray choices = chunk.getAsJsonArray("choices");if (choices == null || choices.size() == 0) continue;JsonObject delta = choices.get(0).getAsJsonObject().getAsJsonObject("delta");if (delta.has("tool_calls") && delta.get("tool_calls").isJsonArray()) {for (JsonElement tcEl : delta.getAsJsonArray("tool_calls")) {JsonObject fn = tcEl.getAsJsonObject().getAsJsonObject("function");if (fn != null) {if (fn.has("name") && !fn.get("name").isJsonNull()) {toolName = fn.get("name").getAsString();}if (fn.has("arguments") && !fn.get("arguments").isJsonNull()) {argBuffer.append(fn.get("arguments").getAsString());}}}}}}System.out.println("调用工具:" + toolName);System.out.println("完整参数:" + argBuffer);
package mainimport ("bufio""bytes""encoding/json""fmt""net/http""strings")type ToolCallFunction struct {Name string `json:"name,omitempty"`Arguments string `json:"arguments,omitempty"`}type ToolCall struct {Index int `json:"index"`Function ToolCallFunction `json:"function"`}type ToolStreamDelta struct {ToolCalls []ToolCall `json:"tool_calls,omitempty"`}type ToolStreamChoice struct {Delta ToolStreamDelta `json:"delta"`}type ToolStreamChunk struct {Choices []ToolStreamChoice `json:"choices"`}func main() {// tools 定义请参见调用概览的 Function Calling 章节body, _ := json.Marshal(map[string]interface{}{"model": "glm-5.1","messages": []map[string]string{{"role": "user", "content": "查询深圳天气"}},"tools": tools,"stream": true,"thinking": map[string]string{"type": "disabled"},})req, _ := http.NewRequest("POST","https://tokenhub.tencentmaas.com/v1/chat/completions",bytes.NewBuffer(body))req.Header.Set("Authorization", "Bearer YOUR_API_KEY")req.Header.Set("Content-Type", "application/json")resp, _ := http.DefaultClient.Do(req)defer resp.Body.Close()var argBuffer strings.Buildervar toolName stringscanner := bufio.NewScanner(resp.Body)for scanner.Scan() {line := scanner.Text()if !strings.HasPrefix(line, "data: ") {continue}data := strings.TrimPrefix(line, "data: ")if data == "[DONE]" {break}var chunk ToolStreamChunkif err := json.Unmarshal([]byte(data), &chunk); err != nil {continue}if len(chunk.Choices) == 0 {continue}for _, tc := range chunk.Choices[0].Delta.ToolCalls {if tc.Function.Name != "" {toolName = tc.Function.Name}if tc.Function.Arguments != "" {argBuffer.WriteString(tc.Function.Arguments)}}}fmt.Printf("调用工具:%s\\n完整参数:%s\\n", toolName, argBuffer.String())}
tool_stream 参数
GLM 系列支持
tool_stream 参数(Boolean 类型),用于控制工具调用参数的流式分片粒度:extra_body={"tool_stream": True, "thinking": {"type": "disabled"}}
多模态调用
glm-5v-turbo 是 GLM 系列中唯一支持多模态输入的模型,支持图像、视频和文件输入,输出为文本。使用限制
图像、视频、文件不可在同一请求中混合传入。
文件输入仅支持 URL,不支持 Base64。
图像输入支持 URL 和 Base64 两种方式。
图像输入
已验证支持的格式:PNG、JPG、JPEG、WebP。其他格式如需使用,请先进行小样本测试确认。
curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\-H 'Authorization: Bearer YOUR_API_KEY' \\-H 'Content-Type: application/json' \\-d '{"model": "glm-5v-turbo","messages": [{"role": "user","content": [{"type": "text", "text": "请描述这张图片"},{"type": "image_url", "image_url": {"url": "https://example.com/photo.png"}}]}],"max_tokens": 1024,"thinking": {"type": "disabled"}}'
response = client.chat.completions.create(model="glm-5v-turbo",messages=[{"role": "user","content": [{"type": "text", "text": "请描述这张图片"},{"type": "image_url", "image_url": {"url": "https://example.com/photo.png"}},],}],max_tokens=1024,extra_body={"thinking": {"type": "disabled"}},)print(response.choices[0].message.content)
const response = await client.chat.completions.create({model: 'glm-5v-turbo',messages: [{role: 'user',content: [{ type: 'text', text: '请描述这张图片' },{ type: 'image_url', image_url: { url: 'https://example.com/photo.png' } },],}],max_tokens: 1024,// @ts-ignore - thinking 为 GLM 扩展字段thinking: { type: 'disabled' },});console.log(response.choices[0].message.content);
import okhttp3.*;import com.google.gson.Gson;import java.util.*;public class GlmImageInput {public static void main(String[] args) throws Exception {List<Map<String, Object>> content = List.of(Map.of("type", "text", "text", "请描述这张图片"),Map.of("type", "image_url", "image_url",Map.of("url", "https://example.com/photo.png")));Map<String, Object> body = new HashMap<>();body.put("model", "glm-5v-turbo");body.put("messages", List.of(Map.of("role", "user", "content", content)));body.put("max_tokens", 1024);body.put("thinking", Map.of("type", "disabled"));Request request = new Request.Builder().url("https://tokenhub.tencentmaas.com/v1/chat/completions").header("Authorization", "Bearer YOUR_API_KEY").post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json"))).build();try (Response response = new OkHttpClient().newCall(request).execute()) {System.out.println(response.body().string());}}}
package mainimport ("bytes""encoding/json""fmt""io""net/http")func main() {body, _ := json.Marshal(map[string]interface{}{"model": "glm-5v-turbo","messages": []map[string]interface{}{{"role": "user","content": []map[string]interface{}{{"type": "text", "text": "请描述这张图片"},{"type": "image_url", "image_url": map[string]string{"url": "https://example.com/photo.png",}},},},},"max_tokens": 1024,"thinking": map[string]string{"type": "disabled"},})req, _ := http.NewRequest("POST","https://tokenhub.tencentmaas.com/v1/chat/completions",bytes.NewBuffer(body))req.Header.Set("Authorization", "Bearer YOUR_API_KEY")req.Header.Set("Content-Type", "application/json")resp, _ := http.DefaultClient.Do(req)defer resp.Body.Close()data, _ := io.ReadAll(resp.Body)fmt.Println(string(data))}
Base64 方式(Python 示例):
将本地图片读取为 Base64 后,通过 Data URI 形式传入
image_url.url:import base64with open("local.jpg", "rb") as f:b64 = base64.b64encode(f.read()).decode()response = client.chat.completions.create(model="glm-5v-turbo",messages=[{"role": "user","content": [{"type": "text", "text": "图中有什么?"},{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}},],}],max_tokens=1024,extra_body={"thinking": {"type": "disabled"}},)
视频输入
已验证支持的格式:MP4、MPEG、MOV、AVI、WebM、WMV、3GPP。其他格式如需使用,请先进行小样本测试确认。
将上文图像示例中的
image_url 字段替换为 video_url,即可传入视频文件 URL:curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\-H 'Authorization: Bearer YOUR_API_KEY' \\-H 'Content-Type: application/json' \\-d '{"model": "glm-5v-turbo","messages": [{"role": "user","content": [{"type": "text", "text": "请总结这段视频的内容"},{"type": "video_url", "video_url": {"url": "https://example.com/demo.mp4"}}]}],"max_tokens": 2048,"thinking": {"type": "disabled"}}'
response = client.chat.completions.create(model="glm-5v-turbo",messages=[{"role": "user","content": [{"type": "text", "text": "请总结这段视频的内容"},{"type": "video_url", "video_url": {"url": "https://example.com/demo.mp4"}},],}],max_tokens=2048,extra_body={"thinking": {"type": "disabled"}},)print(response.choices[0].message.content)
const response = await client.chat.completions.create({model: 'glm-5v-turbo',messages: [{role: 'user',content: [{ type: 'text', text: '请总结这段视频的内容' },{ type: 'video_url', video_url: { url: 'https://example.com/demo.mp4' } },],}],max_tokens: 2048,// @ts-ignore - thinking 为 GLM 扩展字段thinking: { type: 'disabled' },});console.log(response.choices[0].message.content);
List<Map<String, Object>> content = List.of(Map.of("type", "text", "text", "请总结这段视频的内容"),Map.of("type", "video_url", "video_url",Map.of("url", "https://example.com/demo.mp4")));Map<String, Object> body = new HashMap<>();body.put("model", "glm-5v-turbo");body.put("messages", List.of(Map.of("role", "user", "content", content)));body.put("max_tokens", 2048);body.put("thinking", Map.of("type", "disabled"));// 其余 HTTP 请求逻辑同图像示例
body, _ := json.Marshal(map[string]interface{}{"model": "glm-5v-turbo","messages": []map[string]interface{}{{"role": "user","content": []map[string]interface{}{{"type": "text", "text": "请总结这段视频的内容"},{"type": "video_url", "video_url": map[string]string{"url": "https://example.com/demo.mp4",}},},},},"max_tokens": 2048,"thinking": map[string]string{"type": "disabled"},})// 其余 HTTP 请求逻辑同图像示例
文件输入
已验证支持的格式:PDF、TXT、DOC。其他格式如需使用,请先进行小样本测试确认。
文件输入仅支持通过 URL 传入,不支持 Base64 编码。如本地有文件需要解析,请先上传至对象存储服务(如腾讯云 COS),再使用生成的 URL。
curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\-H 'Authorization: Bearer YOUR_API_KEY' \\-H 'Content-Type: application/json' \\-d '{"model": "glm-5v-turbo","messages": [{"role": "user","content": [{"type": "text", "text": "请提取这份文档的核心要点"},{"type": "file_url", "file_url": {"url": "https://example.com/report.pdf"}}]}],"max_tokens": 4096,"thinking": {"type": "disabled"}}'
response = client.chat.completions.create(model="glm-5v-turbo",messages=[{"role": "user","content": [{"type": "text", "text": "请提取这份文档的核心要点"},{"type": "file_url", "file_url": {"url": "https://example.com/report.pdf"}},],}],max_tokens=4096,extra_body={"thinking": {"type": "disabled"}},)print(response.choices[0].message.content)
const response = await client.chat.completions.create({model: 'glm-5v-turbo',messages: [{role: 'user',content: [{ type: 'text', text: '请提取这份文档的核心要点' },{ type: 'file_url', file_url: { url: 'https://example.com/report.pdf' } },],}],max_tokens: 4096,// @ts-ignore - thinking 为 GLM 扩展字段thinking: { type: 'disabled' },});console.log(response.choices[0].message.content);
List<Map<String, Object>> content = List.of(Map.of("type", "text", "text", "请提取这份文档的核心要点"),Map.of("type", "file_url", "file_url",Map.of("url", "https://example.com/report.pdf")));Map<String, Object> body = new HashMap<>();body.put("model", "glm-5v-turbo");body.put("messages", List.of(Map.of("role", "user", "content", content)));body.put("max_tokens", 4096);body.put("thinking", Map.of("type", "disabled"));// 其余 HTTP 请求逻辑同图像示例
body, _ := json.Marshal(map[string]interface{}{"model": "glm-5v-turbo","messages": []map[string]interface{}{{"role": "user","content": []map[string]interface{}{{"type": "text", "text": "请提取这份文档的核心要点"},{"type": "file_url", "file_url": map[string]string{"url": "https://example.com/report.pdf",}},},},},"max_tokens": 4096,"thinking": map[string]string{"type": "disabled"},})// 其余 HTTP 请求逻辑同图像示例
使用限制
限制项 | 说明 |
思考模式默认开启 | 不传 thinking 参数时默认启用,响应中会包含 reasoning_content 字段。不需要时请显式关闭。 |
非流式调用超时风险 | 思考模式下输出较长,建议使用 stream=True。 |
多模态仅限 glm-5v-turbo | 其他三个模型不支持图像、视频、文件输入。 |
多模态输入不可混合 | 图像、视频、文件在同一请求中只能传入一类。 |
文件输入仅支持 URL | file_url 不支持 Base64 或 Data URI。 |
工具参数增量返回 | 流式调用时 tool_call.arguments 分多个 chunk 返回,需客户端拼接。 |
请求体积上限 | 单次请求 body 不超过 100 MB。 |
相关文档
语言模型调用概览:TokenHub 语言模型通用调用文档,包含 BaseURL、API Key、多轮对话、Function Calling、Anthropic 协议等通用说明。
TokenHub 控制台:API Key 创建与管理入口。