Kimi 调用指南

最近更新时间:2026-05-28 14:57:30

我的收藏
Kimi 系列模型兼容 OpenAI / Anthropic 协议,可直接通过 OpenAI SDK 或标准 HTTP 接口调用。本文给出常用场景的 5 语言调用示例(cURL / Python / Node.js / Java / Go),并说明 Kimi 在思考模式、多模态等方面的特殊用法。
注意:
本文以 TokenHub 网关为例,所有调用均通过 https://tokenhub.tencentmaas.com/v1 接入,使用 TokenHub 自有 API Key 完成鉴权。

支持的模型

TokenHub 当前支持以下 Kimi 模型(具体以 模型列表 为准):
模型 ID
类型
思考能力
视觉能力
视频能力
kimi-k2.6
通用对话模型
可开关(默认开启)
支持
支持
kimi-k2.5
通用对话模型
可开关(默认开启)
支持
不支持
说明:
Kimi K2.6 / K2.5 同时是对话模型和思考模型,无需像其他厂商那样在普通模型和思考模型之间切换 model ID,只需通过 thinking 参数控制是否启用思考能力即可。

与其他模型的关键差异

维度
Kimi K2.6 / K2.5
OpenAI / Claude / GLM 等
思考能力开关
通过 thinking.type 参数显式控制
通常通过切换 model 或单独的 reasoning 参数控制
推理过程字段
响应中独立返回 reasoning_content
多数模型不暴露推理过程
OpenAI SDK 访问推理字段
必须用 hasattr / getattr
-
多轮对话推理保留
thinking.keep 控制是否透传历史 reasoning_content
-
temperature
固定为 0.6,传入其他值会返回 400 错误
默认可在 0~2 自由调节
max_tokens 推荐值
≥ 16000(推理 + 回答共享额度)
通常 1024~4096 即可
多模态图片输入
支持 Base64 编码与公网 URL 直链 两种方式
普遍支持 URL 直链
视频输入
仅 K2.6 支持
多数模型不支持
多轮对话 messages 回写
启用思考时必须回写 content reasoning_content
通常只需回写 content

通用调用示例

以下是 Kimi 模型的常用调用场景,均使用 OpenAI 兼容协议。除非特别说明,示例不启用思考,目的是让您先跑通最简调用;启用思考的写法请参见 Kimi 思考模式

基础对话

请将 YOUR_API_KEY 替换为您创建的 API Key。
cURL
Python
Node.js
Java
Go
curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-H 'Content-Type: application/json' \\
-d '{
"model": "kimi-k2.6",
"messages": [
{"role": "user", "content": "你好,请介绍一下你自己"}
]
}'
from openai import OpenAI

client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://tokenhub.tencentmaas.com/v1",
)

response = client.chat.completions.create(
model="kimi-k2.6",
messages=[
{"role": "user", "content": "你好,请介绍一下你自己"},
],
)
print(response.choices[0].message.content)
import OpenAI from 'openai';

const client = new OpenAI({
apiKey: 'YOUR_API_KEY',
baseURL: 'https://tokenhub.tencentmaas.com/v1',
});

const response = await client.chat.completions.create({
model: 'kimi-k2.6',
messages: [
{ role: 'user', content: '你好,请介绍一下你自己' },
],
});
console.log(response.choices[0].message.content);
// 基于 OpenAI 兼容协议,使用 OkHttp 直接调用 HTTP 接口
import okhttp3.*;
import com.google.gson.Gson;
import java.util.*;

public class BasicChat {
public static void main(String[] args) throws Exception {
Map<String, Object> body = new HashMap<>();
body.put("model", "kimi-k2.6");
body.put("messages", Arrays.asList(
Map.of("role", "user", "content", "你好,请介绍一下你自己")
));

RequestBody requestBody = RequestBody.create(
new Gson().toJson(body),
MediaType.parse("application/json")
);

Request request = new Request.Builder()
.url("https://tokenhub.tencentmaas.com/v1/chat/completions")
.header("Authorization", "Bearer YOUR_API_KEY")
.post(requestBody)
.build();

try (Response response = new OkHttpClient().newCall(request).execute()) {
System.out.println(response.body().string());
}
}
}
package main

import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
)

func main() {
body := map[string]interface{}{
"model": "kimi-k2.6",
"messages": []map[string]string{
{"role": "user", "content": "你好,请介绍一下你自己"},
},
}
payload, _ := json.Marshal(body)

req, _ := http.NewRequest("POST",
"https://tokenhub.tencentmaas.com/v1/chat/completions",
bytes.NewBuffer(payload))
req.Header.Set("Authorization", "Bearer YOUR_API_KEY")
req.Header.Set("Content-Type", "application/json")

resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()

data, _ := io.ReadAll(resp.Body)
fmt.Println(string(data))
}

流式输出

启用 stream: true 即可流式获取响应,便于打字机效果展示,并能避免长响应触发网关超时。
cURL
Python
Node.js
Java
Go
curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-H 'Content-Type: application/json' \\
-d '{
"model": "kimi-k2.6",
"stream": true,
"messages": [
{"role": "user", "content": "请用一句话介绍一下你自己"}
]
}'
from openai import OpenAI

client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://tokenhub.tencentmaas.com/v1",
)

stream = client.chat.completions.create(
model="kimi-k2.6",
messages=[{"role": "user", "content": "请用一句话介绍一下你自己"}],
stream=True,
)
for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
import OpenAI from 'openai';

const client = new OpenAI({
apiKey: 'YOUR_API_KEY',
baseURL: 'https://tokenhub.tencentmaas.com/v1',
});

const stream = await client.chat.completions.create({
model: 'kimi-k2.6',
messages: [{ role: 'user', content: '请用一句话介绍一下你自己' }],
stream: true,
});

for await (const chunk of stream) {
const content = chunk.choices?.[0]?.delta?.content;
if (content) process.stdout.write(content);
}
import okhttp3.*;
import okhttp3.sse.*;
import com.google.gson.Gson;
import java.util.*;

public class StreamChat {
public static void main(String[] args) {
Map<String, Object> body = new HashMap<>();
body.put("model", "kimi-k2.6");
body.put("stream", true);
body.put("messages", List.of(
Map.of("role", "user", "content", "请用一句话介绍一下你自己")
));

Request request = new Request.Builder()
.url("https://tokenhub.tencentmaas.com/v1/chat/completions")
.header("Authorization", "Bearer YOUR_API_KEY")
.header("Content-Type", "application/json")
.post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json")))
.build();

EventSources.createFactory(new OkHttpClient()).newEventSource(request,
new EventSourceListener() {
@Override public void onEvent(EventSource es, String id, String type, String data) {
System.out.println(data);
}
});
}
}
package main

import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"net/http"
"strings"
)

func main() {
body, _ := json.Marshal(map[string]interface{}{
"model": "kimi-k2.6",
"stream": true,
"messages": []map[string]string{
{"role": "user", "content": "请用一句话介绍一下你自己"},
},
})

req, _ := http.NewRequest("POST",
"https://tokenhub.tencentmaas.com/v1/chat/completions",
bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer YOUR_API_KEY")
req.Header.Set("Content-Type", "application/json")

resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()

scanner := bufio.NewScanner(resp.Body)
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "data: ") {
fmt.Println(strings.TrimPrefix(line, "data: "))
}
}
}

System Prompt

通过 system 角色的消息可以为模型设定全局指令、人设、回复风格等。
cURL
Python
Node.js
Java
Go
curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-H 'Content-Type: application/json' \\
-d '{
"model": "kimi-k2.6",
"messages": [
{"role": "system", "content": "你是 Kimi,一名严谨的物理学助教,回答需配合简单类比。"},
{"role": "user", "content": "请解释什么是量子纠缠"}
]
}'
from openai import OpenAI

client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://tokenhub.tencentmaas.com/v1",
)

response = client.chat.completions.create(
model="kimi-k2.6",
messages=[
{"role": "system", "content": "你是 Kimi,一名严谨的物理学助教,回答需配合简单类比。"},
{"role": "user", "content": "请解释什么是量子纠缠"},
],
)
print(response.choices[0].message.content)
import OpenAI from 'openai';

const client = new OpenAI({
apiKey: 'YOUR_API_KEY',
baseURL: 'https://tokenhub.tencentmaas.com/v1',
});

const response = await client.chat.completions.create({
model: 'kimi-k2.6',
messages: [
{ role: 'system', content: '你是 Kimi,一名严谨的物理学助教,回答需配合简单类比。' },
{ role: 'user', content: '请解释什么是量子纠缠' },
],
});
console.log(response.choices[0].message.content);
import okhttp3.*;
import com.google.gson.Gson;
import java.util.*;

public class SystemPromptChat {
public static void main(String[] args) throws Exception {
Map<String, Object> body = new HashMap<>();
body.put("model", "kimi-k2.6");
body.put("messages", List.of(
Map.of("role", "system", "content", "你是 Kimi,一名严谨的物理学助教,回答需配合简单类比。"),
Map.of("role", "user", "content", "请解释什么是量子纠缠")
));

Request request = new Request.Builder()
.url("https://tokenhub.tencentmaas.com/v1/chat/completions")
.header("Authorization", "Bearer YOUR_API_KEY")
.post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json")))
.build();

try (Response response = new OkHttpClient().newCall(request).execute()) {
System.out.println(response.body().string());
}
}
}
package main

import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
)

func main() {
body, _ := json.Marshal(map[string]interface{}{
"model": "kimi-k2.6",
"messages": []map[string]string{
{"role": "system", "content": "你是 Kimi,一名严谨的物理学助教,回答需配合简单类比。"},
{"role": "user", "content": "请解释什么是量子纠缠"},
},
})

req, _ := http.NewRequest("POST",
"https://tokenhub.tencentmaas.com/v1/chat/completions",
bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer YOUR_API_KEY")
req.Header.Set("Content-Type", "application/json")

resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()

data, _ := io.ReadAll(resp.Body)
fmt.Println(string(data))
}

多轮对话(基础版)

Kimi API 与其他主流模型一样是无状态的,需要在每次请求中传入完整的 messages 历史。下述示例不开启思考,所以只回写 content 即可;如果启用了思考,必须连同 reasoning_content 一起回写,详情请参见 多轮对话回写 reasoning_content
cURL
Python
Node.js
Java
Go
curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-H 'Content-Type: application/json' \\
-d '{
"model": "kimi-k2.6",
"messages": [
{"role": "system", "content": "你是 Kimi。"},
{"role": "user", "content": "推荐一本科普读物"},
{"role": "assistant", "content": "推荐《时间简史》。"},
{"role": "user", "content": "再说一本进阶的"}
]
}'
from openai import OpenAI

client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://tokenhub.tencentmaas.com/v1",
)

messages = [
{"role": "system", "content": "你是 Kimi。"},
{"role": "user", "content": "推荐一本科普读物"},
{"role": "assistant", "content": "推荐《时间简史》。"},
{"role": "user", "content": "再说一本进阶的"},
]
response = client.chat.completions.create(model="kimi-k2.6", messages=messages)
print(response.choices[0].message.content)
import OpenAI from 'openai';

const client = new OpenAI({
apiKey: 'YOUR_API_KEY',
baseURL: 'https://tokenhub.tencentmaas.com/v1',
});

const messages = [
{ role: 'system', content: '你是 Kimi。' },
{ role: 'user', content: '推荐一本科普读物' },
{ role: 'assistant', content: '推荐《时间简史》。' },
{ role: 'user', content: '再说一本进阶的' },
];
const response = await client.chat.completions.create({
model: 'kimi-k2.6',
messages,
});
console.log(response.choices[0].message.content);
import okhttp3.*;
import com.google.gson.Gson;
import java.util.*;

public class MultiTurnChat {
public static void main(String[] args) throws Exception {
Map<String, Object> body = new HashMap<>();
body.put("model", "kimi-k2.6");
body.put("messages", List.of(
Map.of("role", "system", "content", "你是 Kimi。"),
Map.of("role", "user", "content", "推荐一本科普读物"),
Map.of("role", "assistant", "content", "推荐《时间简史》。"),
Map.of("role", "user", "content", "再说一本进阶的")
));

Request request = new Request.Builder()
.url("https://tokenhub.tencentmaas.com/v1/chat/completions")
.header("Authorization", "Bearer YOUR_API_KEY")
.post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json")))
.build();

try (Response response = new OkHttpClient().newCall(request).execute()) {
System.out.println(response.body().string());
}
}
}
package main

import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
)

func main() {
body, _ := json.Marshal(map[string]interface{}{
"model": "kimi-k2.6",
"messages": []map[string]string{
{"role": "system", "content": "你是 Kimi。"},
{"role": "user", "content": "推荐一本科普读物"},
{"role": "assistant", "content": "推荐《时间简史》。"},
{"role": "user", "content": "再说一本进阶的"},
},
})

req, _ := http.NewRequest("POST",
"https://tokenhub.tencentmaas.com/v1/chat/completions",
bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer YOUR_API_KEY")
req.Header.Set("Content-Type", "application/json")

resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()

data, _ := io.ReadAll(resp.Body)
fmt.Println(string(data))
}

Function Calling(工具调用)

Kimi 支持标准的 OpenAI Function Calling 协议。下例演示如何注册一个查天气的工具,并在模型决定调用工具后把工具结果回填给模型。
cURL
Python
Node.js
Java
Go
curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-H 'Content-Type: application/json' \\
-d '{
"model": "kimi-k2.6",
"messages": [
{"role": "user", "content": "今天北京天气怎么样?"}
],
"tools": [{
"type": "function",
"function": {
"name": "get_weather",
"description": "查询指定城市的天气",
"parameters": {
"type": "object",
"properties": {
"city": {"type": "string", "description": "城市名称"}
},
"required": ["city"]
}
}
}],
"tool_choice": "auto"
}'
import json
from openai import OpenAI

client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://tokenhub.tencentmaas.com/v1",
)

tools = [{
"type": "function",
"function": {
"name": "get_weather",
"description": "查询指定城市的天气",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string", "description": "城市名称"}},
"required": ["city"],
},
},
}]

messages = [{"role": "user", "content": "今天北京天气怎么样?"}]

# 第 1 轮:模型决定是否调用工具
resp = client.chat.completions.create(
model="kimi-k2.6", messages=messages, tools=tools, tool_choice="auto",
)
msg = resp.choices[0].message
messages.append(msg.model_dump(exclude_none=True))

# 如果模型选择了工具调用,执行工具并把结果回填
if msg.tool_calls:
for call in msg.tool_calls:
args = json.loads(call.function.arguments)
# 这里替换为真实业务逻辑
result = {"city": args["city"], "temperature": "22°C", "weather": "晴"}
messages.append({
"role": "tool",
"tool_call_id": call.id,
"content": json.dumps(result, ensure_ascii=False),
})

# 第 2 轮:把工具结果送回模型,得到最终回复
final = client.chat.completions.create(model="kimi-k2.6", messages=messages, tools=tools)
print(final.choices[0].message.content)
else:
print(msg.content)
import OpenAI from 'openai';

const client = new OpenAI({
apiKey: 'YOUR_API_KEY',
baseURL: 'https://tokenhub.tencentmaas.com/v1',
});

const tools = [{
type: 'function',
function: {
name: 'get_weather',
description: '查询指定城市的天气',
parameters: {
type: 'object',
properties: { city: { type: 'string', description: '城市名称' } },
required: ['city'],
},
},
}];

const messages = [{ role: 'user', content: '今天北京天气怎么样?' }];

const resp = await client.chat.completions.create({
model: 'kimi-k2.6',
messages,
tools,
tool_choice: 'auto',
});
const msg = resp.choices[0].message;
messages.push(msg);

if (msg.tool_calls) {
for (const call of msg.tool_calls) {
const args = JSON.parse(call.function.arguments);
// 这里替换为真实业务逻辑
const result = { city: args.city, temperature: '22°C', weather: '晴' };
messages.push({
role: 'tool',
tool_call_id: call.id,
content: JSON.stringify(result),
});
}
const final = await client.chat.completions.create({
model: 'kimi-k2.6',
messages,
tools,
});
console.log(final.choices[0].message.content);
} else {
console.log(msg.content);
}
import okhttp3.*;
import com.google.gson.*;
import java.util.*;

public class FunctionCallingDemo {
static final String URL = "https://tokenhub.tencentmaas.com/v1/chat/completions";
static final String API_KEY = "YOUR_API_KEY";
static final OkHttpClient HTTP = new OkHttpClient();
static final Gson GSON = new Gson();

static String chat(List<Map<String, Object>> messages, List<Map<String, Object>> tools) throws Exception {
Map<String, Object> body = new HashMap<>();
body.put("model", "kimi-k2.6");
body.put("messages", messages);
body.put("tools", tools);
body.put("tool_choice", "auto");

Request req = new Request.Builder()
.url(URL)
.header("Authorization", "Bearer " + API_KEY)
.post(RequestBody.create(GSON.toJson(body), MediaType.parse("application/json")))
.build();
try (Response resp = HTTP.newCall(req).execute()) {
return resp.body().string();
}
}

public static void main(String[] args) throws Exception {
List<Map<String, Object>> tools = List.of(Map.of(
"type", "function",
"function", Map.of(
"name", "get_weather",
"description", "查询指定城市的天气",
"parameters", Map.of(
"type", "object",
"properties", Map.of("city", Map.of("type", "string", "description", "城市名称")),
"required", List.of("city")
)
)
));

List<Map<String, Object>> messages = new ArrayList<>();
messages.add(Map.of("role", "user", "content", "今天北京天气怎么样?"));

// 第 1 轮:模型决定是否调用工具
String r1 = chat(messages, tools);
JsonObject msg = JsonParser.parseString(r1).getAsJsonObject()
.getAsJsonArray("choices").get(0).getAsJsonObject()
.getAsJsonObject("message");
messages.add(GSON.fromJson(msg, Map.class));

if (msg.has("tool_calls")) {
for (JsonElement el : msg.getAsJsonArray("tool_calls")) {
JsonObject call = el.getAsJsonObject();
JsonObject argsObj = JsonParser.parseString(
call.getAsJsonObject("function").get("arguments").getAsString()
).getAsJsonObject();
// 这里替换为真实业务逻辑
Map<String, String> result = Map.of(
"city", argsObj.get("city").getAsString(),
"temperature", "22°C",
"weather", "晴"
);
messages.add(Map.of(
"role", "tool",
"tool_call_id", call.get("id").getAsString(),
"content", GSON.toJson(result)
));
}
// 第 2 轮:把工具结果送回模型
System.out.println(chat(messages, tools));
} else {
System.out.println(msg.get("content").getAsString());
}
}
}
package main

import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
)

const (
URL = "https://tokenhub.tencentmaas.com/v1/chat/completions"
APIKEY = "YOUR_API_KEY"
)

func chat(messages []map[string]interface{}, tools []map[string]interface{}) (map[string]interface{}, error) {
body, _ := json.Marshal(map[string]interface{}{
"model": "kimi-k2.6",
"messages": messages,
"tools": tools,
"tool_choice": "auto",
})
req, _ := http.NewRequest("POST", URL, bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer "+APIKEY)
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
data, _ := io.ReadAll(resp.Body)
var out map[string]interface{}
json.Unmarshal(data, &out)
return out, nil
}

func main() {
tools := []map[string]interface{}{{
"type": "function",
"function": map[string]interface{}{
"name": "get_weather",
"description": "查询指定城市的天气",
"parameters": map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"city": map[string]string{"type": "string", "description": "城市名称"},
},
"required": []string{"city"},
},
},
}}

messages := []map[string]interface{}{
{"role": "user", "content": "今天北京天气怎么样?"},
}

// 第 1 轮:模型决定是否调用工具
r1, _ := chat(messages, tools)
msg := r1["choices"].([]interface{})[0].(map[string]interface{})["message"].(map[string]interface{})
messages = append(messages, msg)

if calls, ok := msg["tool_calls"].([]interface{}); ok {
for _, c := range calls {
call := c.(map[string]interface{})
argsStr := call["function"].(map[string]interface{})["arguments"].(string)
var args map[string]string
json.Unmarshal([]byte(argsStr), &args)
// 这里替换为真实业务逻辑
result, _ := json.Marshal(map[string]string{
"city": args["city"],
"temperature": "22°C",
"weather": "晴",
})
messages = append(messages, map[string]interface{}{
"role": "tool",
"tool_call_id": call["id"],
"content": string(result),
})
}
// 第 2 轮:把工具结果送回模型
r2, _ := chat(messages, tools)
fmt.Printf("%+v\\n", r2)
} else {
fmt.Println(msg["content"])
}
}

Kimi 思考模式

Kimi K2.6 / K2.5 同时是对话模型和思考模型,通过 thinking 字段开关思考能力,这是它与 OpenAI / GLM 等模型最大的不同。

1. thinking 参数

thinking 字段位于请求体顶层,结构如下:
"thinking": {
"type": "enabled",
"keep": "all"
}
字段
类型
默认值
取值
说明
type
string
"enabled"
"enabled" / "disabled"
当前请求是否启用思考能力
keep
string | null
null
"all" / 不传
多轮对话中是否透传历史 reasoning_content
注意:
thinking 不是 OpenAI 标准字段,使用官方 SDK 时需通过 extra_body(Python)或直接展开到顶层(Node.js)透传;HTTP 直接调用时放在请求体顶层即可。

2. reasoning_content 字段

启用思考后,响应消息中会新增一个 content 同级reasoning_content 字段,承载模型的推理过程:
{
"choices": [{
"message": {
"role": "assistant",
"reasoning_content": "首先我们需要分析...",
"content": "最终答案是 ..."
}
}]
}

OpenAI SDK 访问限制(重要)

OpenAI 官方 SDK 的 ChatCompletionMessage / ChoiceDelta 类型不直接声明 reasoning_content 属性,因此不能用 obj.reasoning_content 直接访问,必须用以下方式:
# ❌ 错误
content = message.reasoning_content

# ✅ 正确
if hasattr(message, "reasoning_content"):
content = getattr(message, "reasoning_content")
如果您是通过 HTTP 直接调用、或使用 requests / httpx 等通用框架解析 JSON,则无此限制,可直接读取同级字段。

3. 流式思考输出

启用思考时强烈建议使用流式调用stream: true):
避免超时:思考内容可能较长,整体响应时间较久,非流式容易触发网关超时。
顺序明确:流式模式下 reasoning_content 一定在 content 之前完整输出,便于 UI 区分思考中正在回答两种状态。
cURL
Python
Node.js
Java
Go
curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-H 'Content-Type: application/json' \\
-d '{
"model": "kimi-k2.6",
"max_tokens": 32768,
"stream": true,
"thinking": {"type": "enabled"},
"messages": [
{"role": "user", "content": "请用一句话解释傅里叶变换。"}
]
}'
# 响应为 SSE 流:每个 data: 行包含一个 chunk,
# delta.reasoning_content 一定先于 delta.content 出现
from openai import OpenAI

client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://tokenhub.tencentmaas.com/v1",
)

stream = client.chat.completions.create(
model="kimi-k2.6",
messages=[{"role": "user", "content": "请用一句话解释傅里叶变换。"}],
max_tokens=32768,
stream=True,
extra_body={"thinking": {"type": "enabled"}},
)

thinking = False
for chunk in stream:
if not chunk.choices:
continue
delta = chunk.choices[0].delta
# 思考阶段
if hasattr(delta, "reasoning_content") and getattr(delta, "reasoning_content"):
if not thinking:
print("=== 开始思考 ===")
thinking = True
print(getattr(delta, "reasoning_content"), end="", flush=True)
# 回答阶段
if delta.content:
if thinking:
print("\\n=== 思考结束 ===")
thinking = False
print(delta.content, end="", flush=True)
import OpenAI from 'openai';

const client = new OpenAI({
apiKey: 'YOUR_API_KEY',
baseURL: 'https://tokenhub.tencentmaas.com/v1',
});

// Node.js SDK 暂未原生支持 thinking / extra_body,直接展开到顶层即可
// 注:若使用 TypeScript,最后一个对象后面加 `as any` 绕过类型检查
const stream = await client.chat.completions.create({
model: 'kimi-k2.6',
messages: [{ role: 'user', content: '请用一句话解释傅里叶变换。' }],
max_tokens: 32768,
stream: true,
thinking: { type: 'enabled' },
});

let thinking = false;
for await (const chunk of stream) {
const delta = chunk.choices?.[0]?.delta;
if (!delta) continue;
if (delta.reasoning_content) {
if (!thinking) { console.log('=== 开始思考 ==='); thinking = true; }
process.stdout.write(delta.reasoning_content);
}
if (delta.content) {
if (thinking) { console.log('\\n=== 思考结束 ==='); thinking = false; }
process.stdout.write(delta.content);
}
}
import okhttp3.*;
import okhttp3.sse.*;
import com.google.gson.*;
import java.util.*;

public class ThinkingStream {
public static void main(String[] args) {
Map<String, Object> body = new HashMap<>();
body.put("model", "kimi-k2.6");
body.put("max_tokens", 32768);
body.put("stream", true);
body.put("thinking", Map.of("type", "enabled"));
body.put("messages", List.of(
Map.of("role", "user", "content", "请用一句话解释傅里叶变换。")
));

Request request = new Request.Builder()
.url("https://tokenhub.tencentmaas.com/v1/chat/completions")
.header("Authorization", "Bearer YOUR_API_KEY")
.header("Content-Type", "application/json")
.post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json")))
.build();

EventSources.createFactory(new OkHttpClient()).newEventSource(request,
new EventSourceListener() {
@Override public void onEvent(EventSource es, String id, String type, String data) {
if ("[DONE]".equals(data)) return;
JsonObject delta = JsonParser.parseString(data).getAsJsonObject()
.getAsJsonArray("choices").get(0).getAsJsonObject()
.getAsJsonObject("delta");
if (delta.has("reasoning_content")) {
System.out.print(delta.get("reasoning_content").getAsString());
}
if (delta.has("content") && !delta.get("content").isJsonNull()) {
System.out.print(delta.get("content").getAsString());
}
}
});
}
}
package main

import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"net/http"
"strings"
)

func main() {
body, _ := json.Marshal(map[string]interface{}{
"model": "kimi-k2.6",
"max_tokens": 32768,
"stream": true,
"thinking": map[string]string{"type": "enabled"},
"messages": []map[string]string{
{"role": "user", "content": "请用一句话解释傅里叶变换。"},
},
})

req, _ := http.NewRequest("POST",
"https://tokenhub.tencentmaas.com/v1/chat/completions",
bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer YOUR_API_KEY")
req.Header.Set("Content-Type", "application/json")

resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()

scanner := bufio.NewScanner(resp.Body)
thinking := false
for scanner.Scan() {
line := scanner.Text()
if !strings.HasPrefix(line, "data: ") {
continue
}
data := strings.TrimPrefix(line, "data: ")
if data == "[DONE]" {
break
}
var chunk map[string]interface{}
if err := json.Unmarshal([]byte(data), &chunk); err != nil {
continue
}
choices, _ := chunk["choices"].([]interface{})
if len(choices) == 0 {
continue
}
delta, _ := choices[0].(map[string]interface{})["delta"].(map[string]interface{})
if rc, ok := delta["reasoning_content"].(string); ok && rc != "" {
if !thinking {
fmt.Println("=== 开始思考 ===")
thinking = true
}
fmt.Print(rc)
}
if c, ok := delta["content"].(string); ok && c != "" {
if thinking {
fmt.Println("\\n=== 思考结束 ===")
thinking = false
}
fmt.Print(c)
}
}
}

4. Preserved Thinking(多轮保留思考)

thinking.keep 控制历史轮次reasoning_content 是否参与下一轮推理:
取值
含义
适用场景
不传 / null(默认)
历史轮次的推理内容不透传,上下文更短,成本更低
普通多轮对话
"all"
完整保留历史轮次的推理过程,让模型延续之前的思考脉络
复杂多步推理、Agent 工具调用、长程代码任务
注意:
keep 只影响历史思考是否传给模型,不影响当前轮次是否产生思考;推荐在需要连续推理的场景下与 type: "enabled" 搭配使用。

5. 多轮对话回写 reasoning_content

启用思考时,必须把上一轮 API 返回的 reasoning_contentcontent 一并回写到 messages,否则模型在后续轮次中会丢失推理脉络。
cURL
Python
Node.js
Java
Go
curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-H 'Content-Type: application/json' \\
-d '{
"model": "kimi-k2.6",
"stream": true,
"thinking": {"type": "enabled", "keep": "all"},
"messages": [
{"role": "system", "content": "你是 Kimi。"},
{"role": "user", "content": "第一个问题..."},
{
"role": "assistant",
"reasoning_content": "<上一轮 API 返回的 reasoning_content>",
"content": "<上一轮 API 返回的 content>"
},
{"role": "user", "content": "请基于之前的分析继续推导。"}
]
}'
from openai import OpenAI

client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://tokenhub.tencentmaas.com/v1",
)

messages = [
{"role": "system", "content": "你是 Kimi。"},
{"role": "user", "content": "第一个问题..."},
{
"role": "assistant",
"reasoning_content": "<上一轮 API 返回的 reasoning_content>",
"content": "<上一轮 API 返回的 content>",
},
{"role": "user", "content": "请基于之前的分析继续推导。"},
]

response = client.chat.completions.create(
model="kimi-k2.6",
messages=messages,
stream=True,
extra_body={"thinking": {"type": "enabled", "keep": "all"}},
)

for chunk in response:
delta = chunk.choices[0].delta
if getattr(delta, "reasoning_content", None):
print(delta.reasoning_content, end="", flush=True)
if delta.content:
print(delta.content, end="", flush=True)
print()
import OpenAI from 'openai';

const client = new OpenAI({
apiKey: 'YOUR_API_KEY',
baseURL: 'https://tokenhub.tencentmaas.com/v1',
});

const messages = [
{ role: 'system', content: '你是 Kimi。' },
{ role: 'user', content: '第一个问题...' },
{
role: 'assistant',
reasoning_content: '<上一轮 API 返回的 reasoning_content>',
content: '<上一轮 API 返回的 content>',
},
{ role: 'user', content: '请基于之前的分析继续推导。' },
];

const stream = await client.chat.completions.create({
model: 'kimi-k2.6',
messages,
stream: true,
thinking: { type: 'enabled', keep: 'all' },
});

for await (const chunk of stream) {
const delta = chunk.choices?.[0]?.delta;
if (!delta) continue;
if (delta.reasoning_content) process.stdout.write(delta.reasoning_content);
if (delta.content) process.stdout.write(delta.content);
}
console.log();
import okhttp3.*;
import com.google.gson.Gson;
import java.util.*;

public class MultiTurnWithThinking {
public static void main(String[] args) {
Map<String, Object> body = new HashMap<>();
body.put("model", "kimi-k2.6");
body.put("stream", true);
body.put("thinking", Map.of("type", "enabled", "keep", "all"));
body.put("messages", List.of(
Map.of("role", "system", "content", "你是 Kimi。"),
Map.of("role", "user", "content", "第一个问题..."),
Map.of(
"role", "assistant",
"reasoning_content", "<上一轮 API 返回的 reasoning_content>",
"content", "<上一轮 API 返回的 content>"
),
Map.of("role", "user", "content", "请基于之前的分析继续推导。")
));

Request request = new Request.Builder()
.url("https://tokenhub.tencentmaas.com/v1/chat/completions")
.header("Authorization", "Bearer YOUR_API_KEY")
.post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json")))
.build();

try (Response response = new OkHttpClient().newCall(request).execute();
java.io.BufferedReader r = new java.io.BufferedReader(
new java.io.InputStreamReader(response.body().byteStream()))) {
String line;
while ((line = r.readLine()) != null) {
if (line.startsWith("data: ")) System.out.println(line.substring(6));
}
}
}
}
package main

import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"net/http"
"strings"
)

func main() {
body, _ := json.Marshal(map[string]interface{}{
"model": "kimi-k2.6",
"stream": true,
"thinking": map[string]string{"type": "enabled", "keep": "all"},
"messages": []map[string]interface{}{
{"role": "system", "content": "你是 Kimi。"},
{"role": "user", "content": "第一个问题..."},
{
"role": "assistant",
"reasoning_content": "<上一轮 API 返回的 reasoning_content>",
"content": "<上一轮 API 返回的 content>",
},
{"role": "user", "content": "请基于之前的分析继续推导。"},
},
})

req, _ := http.NewRequest("POST",
"https://tokenhub.tencentmaas.com/v1/chat/completions",
bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer YOUR_API_KEY")
req.Header.Set("Content-Type", "application/json")

resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()

scanner := bufio.NewScanner(resp.Body)
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "data: ") {
fmt.Println(strings.TrimPrefix(line, "data: "))
}
}
}

多模态调用

1. 图片输入:Base64 编码

适合本地文件 / 内网图片场景:
cURL
Python
Node.js
Java
Go
# 先把图片读为 base64
IMAGE_B64=$(base64 -i image.jpg | tr -d '\\n')

# 用临时文件传 body,避免 base64 字符串过大触发 "Argument list too long"
cat > /tmp/req.json <<EOF
{
"model": "kimi-k2.6",
"messages": [{
"role": "user",
"content": [
{"type": "text", "text": "请描述这张图片"},
{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,${IMAGE_B64}"}}
]
}]
}
EOF

curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-H 'Content-Type: application/json' \\
-d @/tmp/req.json
import base64
from openai import OpenAI

client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://tokenhub.tencentmaas.com/v1",
)

with open("image.jpg", "rb") as f:
image_b64 = base64.b64encode(f.read()).decode()

response = client.chat.completions.create(
model="kimi-k2.6",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "请描述这张图片"},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}},
],
}],
)
print(response.choices[0].message.content)
import fs from 'node:fs';
import OpenAI from 'openai';

const client = new OpenAI({
apiKey: 'YOUR_API_KEY',
baseURL: 'https://tokenhub.tencentmaas.com/v1',
});

const imageB64 = fs.readFileSync('image.jpg').toString('base64');

const response = await client.chat.completions.create({
model: 'kimi-k2.6',
messages: [{
role: 'user',
content: [
{ type: 'text', text: '请描述这张图片' },
{ type: 'image_url', image_url: { url: `data:image/jpeg;base64,${imageB64}` } },
],
}],
});
console.log(response.choices[0].message.content);
import okhttp3.*;
import com.google.gson.Gson;
import java.nio.file.*;
import java.util.*;

public class ImageBase64Chat {
public static void main(String[] args) throws Exception {
byte[] bytes = Files.readAllBytes(Paths.get("image.jpg"));
String imageB64 = Base64.getEncoder().encodeToString(bytes);

Map<String, Object> body = new HashMap<>();
body.put("model", "kimi-k2.6");
body.put("messages", List.of(Map.of(
"role", "user",
"content", List.of(
Map.of("type", "text", "text", "请描述这张图片"),
Map.of("type", "image_url", "image_url", Map.of(
"url", "data:image/jpeg;base64," + imageB64
))
)
)));

Request request = new Request.Builder()
.url("https://tokenhub.tencentmaas.com/v1/chat/completions")
.header("Authorization", "Bearer YOUR_API_KEY")
.post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json")))
.build();

try (Response response = new OkHttpClient().newCall(request).execute()) {
System.out.println(response.body().string());
}
}
}
package main

import (
"bytes"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
)

func main() {
img, _ := os.ReadFile("image.jpg")
imageB64 := base64.StdEncoding.EncodeToString(img)

body, _ := json.Marshal(map[string]interface{}{
"model": "kimi-k2.6",
"messages": []map[string]interface{}{
{
"role": "user",
"content": []map[string]interface{}{
{"type": "text", "text": "请描述这张图片"},
{
"type": "image_url",
"image_url": map[string]string{
"url": "data:image/jpeg;base64," + imageB64,
},
},
},
},
},
})

req, _ := http.NewRequest("POST",
"https://tokenhub.tencentmaas.com/v1/chat/completions",
bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer YOUR_API_KEY")
req.Header.Set("Content-Type", "application/json")

resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()

data, _ := io.ReadAll(resp.Body)
fmt.Println(string(data))
}

2. 图片输入:公网 URL 直链

适合已托管在 CDN / 对象存储的图片,可显著减小请求体积:
cURL
Python
Node.js
Java
Go
curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-H 'Content-Type: application/json' \\
-d '{
"model": "kimi-k2.6",
"messages": [{
"role": "user",
"content": [
{"type": "text", "text": "请描述这张图片"},
{"type": "image_url", "image_url": {"url": "https://www.gstatic.com/webp/gallery/1.jpg"}}
]
}]
}'
from openai import OpenAI

client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://tokenhub.tencentmaas.com/v1",
)

response = client.chat.completions.create(
model="kimi-k2.6",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "请描述这张图片"},
{"type": "image_url", "image_url": {"url": "https://www.gstatic.com/webp/gallery/1.jpg"}},
],
}],
)
print(response.choices[0].message.content)
import OpenAI from 'openai';

const client = new OpenAI({
apiKey: 'YOUR_API_KEY',
baseURL: 'https://tokenhub.tencentmaas.com/v1',
});

const response = await client.chat.completions.create({
model: 'kimi-k2.6',
messages: [{
role: 'user',
content: [
{ type: 'text', text: '请描述这张图片' },
{ type: 'image_url', image_url: { url: 'https://www.gstatic.com/webp/gallery/1.jpg' } },
],
}],
});
console.log(response.choices[0].message.content);
import okhttp3.*;
import com.google.gson.Gson;
import java.util.*;

public class ImageUrlChat {
public static void main(String[] args) throws Exception {
Map<String, Object> body = new HashMap<>();
body.put("model", "kimi-k2.6");
body.put("messages", List.of(Map.of(
"role", "user",
"content", List.of(
Map.of("type", "text", "text", "请描述这张图片"),
Map.of("type", "image_url", "image_url", Map.of("url", "https://www.gstatic.com/webp/gallery/1.jpg"))
)
)));

Request request = new Request.Builder()
.url("https://tokenhub.tencentmaas.com/v1/chat/completions")
.header("Authorization", "Bearer YOUR_API_KEY")
.post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json")))
.build();

try (Response response = new OkHttpClient().newCall(request).execute()) {
System.out.println(response.body().string());
}
}
}
package main

import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
)

func main() {
body, _ := json.Marshal(map[string]interface{}{
"model": "kimi-k2.6",
"messages": []map[string]interface{}{
{
"role": "user",
"content": []map[string]interface{}{
{"type": "text", "text": "请描述这张图片"},
{
"type": "image_url",
"image_url": map[string]string{"url": "https://www.gstatic.com/webp/gallery/1.jpg"},
},
},
},
},
})

req, _ := http.NewRequest("POST",
"https://tokenhub.tencentmaas.com/v1/chat/completions",
bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer YOUR_API_KEY")
req.Header.Set("Content-Type", "application/json")

resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()

data, _ := io.ReadAll(resp.Body)
fmt.Println(string(data))
}
注意:
使用 URL 直链时,请确保该 URL 公网可访问且无需鉴权;私网 / 内网图片或带签名校验的 URL 仍需先下载并以 Base64 方式上传。

3. 视频输入(仅 K2.6 支持)

视频输入与图片类似,使用 Base64 编码并指定 data:video/<format>;base64,... 格式即可:
cURL
Python
Node.js
Java
Go
# 先把视频读为 base64
VIDEO_B64=$(base64 -i demo.mp4 | tr -d '\\n')

# 用临时文件传 body,避免 base64 字符串过大触发 "Argument list too long"
cat > /tmp/req.json <<EOF
{
"model": "kimi-k2.6",
"messages": [{
"role": "user",
"content": [
{"type": "text", "text": "请总结视频内容"},
{"type": "video_url", "video_url": {"url": "data:video/mp4;base64,${VIDEO_B64}"}}
]
}]
}
EOF

curl -X POST 'https://tokenhub.tencentmaas.com/v1/chat/completions' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-H 'Content-Type: application/json' \\
-d @/tmp/req.json
import base64
from openai import OpenAI

client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://tokenhub.tencentmaas.com/v1",
)

with open("demo.mp4", "rb") as f:
video_b64 = base64.b64encode(f.read()).decode()

response = client.chat.completions.create(
model="kimi-k2.6",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "请总结视频内容"},
{"type": "video_url", "video_url": {"url": f"data:video/mp4;base64,{video_b64}"}},
],
}],
)
print(response.choices[0].message.content)
import fs from 'node:fs';
import OpenAI from 'openai';

const client = new OpenAI({
apiKey: 'YOUR_API_KEY',
baseURL: 'https://tokenhub.tencentmaas.com/v1',
});

const videoB64 = fs.readFileSync('demo.mp4').toString('base64');

const response = await client.chat.completions.create({
model: 'kimi-k2.6',
messages: [{
role: 'user',
content: [
{ type: 'text', text: '请总结视频内容' },
{ type: 'video_url', video_url: { url: `data:video/mp4;base64,${videoB64}` } },
],
}],
});
console.log(response.choices[0].message.content);
import okhttp3.*;
import com.google.gson.Gson;
import java.nio.file.*;
import java.util.*;

public class VideoChat {
public static void main(String[] args) throws Exception {
byte[] bytes = Files.readAllBytes(Paths.get("demo.mp4"));
String videoB64 = Base64.getEncoder().encodeToString(bytes);

Map<String, Object> body = new HashMap<>();
body.put("model", "kimi-k2.6");
body.put("messages", List.of(Map.of(
"role", "user",
"content", List.of(
Map.of("type", "text", "text", "请总结视频内容"),
Map.of("type", "video_url", "video_url", Map.of(
"url", "data:video/mp4;base64," + videoB64
))
)
)));

Request request = new Request.Builder()
.url("https://tokenhub.tencentmaas.com/v1/chat/completions")
.header("Authorization", "Bearer YOUR_API_KEY")
.post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json")))
.build();

try (Response response = new OkHttpClient().newCall(request).execute()) {
System.out.println(response.body().string());
}
}
}
package main

import (
"bytes"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
)

func main() {
video, _ := os.ReadFile("demo.mp4")
videoB64 := base64.StdEncoding.EncodeToString(video)

body, _ := json.Marshal(map[string]interface{}{
"model": "kimi-k2.6",
"messages": []map[string]interface{}{
{
"role": "user",
"content": []map[string]interface{}{
{"type": "text", "text": "请总结视频内容"},
{
"type": "video_url",
"video_url": map[string]string{
"url": "data:video/mp4;base64," + videoB64,
},
},
},
},
},
})

req, _ := http.NewRequest("POST",
"https://tokenhub.tencentmaas.com/v1/chat/completions",
bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer YOUR_API_KEY")
req.Header.Set("Content-Type", "application/json")

resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()

data, _ := io.ReadAll(resp.Body)
fmt.Println(string(data))
}
说明:
单次请求 body 大小受网关限制(通常 100MB 以内),过大的视频请先压缩或截取关键片段后再上传。

推荐参数与最佳实践

参数 / 实践
建议
说明
max_tokens
≥ 16000(建议 32768)
推理 + 回答共享 max_tokens 额度,过小易被截断。
temperature
不显式设置
K2.6 / K2.5 系列固定使用 0.6,传入其他值会返回 400 错误(invalid temperature: only 0.6 is allowed),建议直接不传。
stream
建议开启
思考模式下输出较长,流式可改善体验并避免网关超时。
多模态优先级
URL 直链 > Base64
公网图片优先用 URL,本地文件再使用 Base64,单次请求 body 不超过 100MB。
多轮对话回写
启用思考时整体回写 message
reasoning_contentcontent 必须一起写回 messages,不要丢字段。
OpenAI SDK 访问推理
hasattr / getattr
不要直接 .reasoning_content,否则属性访问报错。