文本对话模型
语言大模型具备文本理解和文字对话的能力。如当您传入文本信息时,大模型可以理解信息,并结合这些信息进行回复。通过这篇教程,您可以了解如何使用模型服务 API,来调用模型理解文本,生成文本内容,并可以基于此 API 构建或者扩展自己的应用或者自动化任务。
1.模型系列
DeepSeek系列
DeepSeek系列 | 上下文长度 | RPM | TPM | Tokens计费 |
---|---|---|---|---|
DeepSeek-R1-Distill-Qwen-7B | 32K | 3w | 500w | 限时免费 |
DeepSeek-R1 | 64K | 输入:4元/M Tokens 输出:16元/M Tokens | ||
DeepSeek-V3 | 64K | 输入:2元/M Tokens 输出:8元/M Tokens | ||
DeepSeek-R1-Distill-Qwen-32B | 32K | 输入:1.5元/M Tokens 输出:6元/M Tokens | ||
DeepSeek-R1-Distill-Qwen-14B | 32K | 输入:1元/M Tokens 输出:3元/M Tokens | ||
DeepSeek-R1-Distill-Llama-8B | 32K | 输入:0.6元/M Tokens 输出:2.4元/M Tokens | ||
DeepSeek-R1-N011-Distill-Llama-70B | 32K | 输入:2元/M Tokens 输出:8元/M Tokens |
Qwen系列
Qwen系列 | 上下文长度 | QPM | TPM | Tokens计费 |
---|---|---|---|---|
QwQ-32B | 32K | 600 | 100w | 输入:¥1.5/ M Tokens 输出:¥6/ M Tokens |
Qwen3-235B-A22B | 128K | 600 | 100w | 输入:¥4/ M Tokens 输出:¥40/ M Tokens |
Qwen3-32B | 128K | 600 | 100w | 输入:¥2/ M Tokens 输出:¥20/ M Tokens |
MiniMax系列
MiniMax系列 | 上下文长度 | RPM | TPM | Tokens计费 |
---|---|---|---|---|
MiniMax-M1-80K | 1M | 120 | 72w | 输入:¥4/ M Tokens 输出:¥16/ M Tokens |
MiniMax-Text-01 | 1M | 120 | 72w | 输入:¥1/ M Tokens 输出:¥8/ M Tokens |
GLM-Z1系列
GLM-Z1系列 | 上下文长度 | 并发数 | Tokens计费 |
---|---|---|---|
GLM-Z1-Flash | 32K | 40 | 免费 |
GLM-Z1-Air | 32K | 40 | 输入:¥0.5/ M Tokens 输出:¥0.5/M Tokens |
GLM-Z1-AirX | 32K | 40 | 输入:¥5/M Tokens 输出:¥5/M Tokens |
GLM-4系列
GLM-4系列 | 上下文长度 | 并发数 | Tokens计费 |
---|---|---|---|
GLM-4-Flash | 128K | 1000 | 免费 |
GLM-4-9B | 128K | 10 | 输入:¥2/M Tokens 输出:¥2/M Tokens |
GLM-4-Plus | 128K | 100 | 输入:¥5/M Tokens 输出:¥5/M Tokens |
GLM-4-Air | 128K | 30 | 输入:¥0.5/M Tokens 输出:¥0.5/M Tokens |
GLM-4-Long | 1M | 30 | 输入:¥1/M Tokens 输出:¥1/M Tokens |
GLM-4-AirX | 8K | 30 | 输入:¥10/M Tokens 输出:¥10/M Tokens |
GLM-4-FlashX | 128K | 100 | 输入:¥0.1/M Tokens 输出:¥0.1/M Tokens |
ERNIE 4.5 Turbo系列
ERNIE系列 | 上下文长度 | RPM | TPM | Tokens计费 |
---|---|---|---|---|
ERNIE-4.5-Turbo-32K | 32K | 5k | 40w | 输入:¥1.14/ M Tokens 输出:¥4.57/M Tokens |
ERNIE-4.5-Turbo-128K | 128K | 5k | 40w | 输入:¥1.14/ M Tokens 输出:¥4.57/M Tokens |
注:所有模型总赠送体验额度为40元。
2.使用前提
您已创建大模型平台API_Key,用于模型调用。
- 若您还未申请,请前往 AI 智算云平台-大模型平台-模型广场
3.API接入方式
文本模型支持2种使用方式:本地客户端接入、代码接入
3.1 本地客户端接入
支持三种主流工具:Chatbox、Cherry Studio、AnythingLLM
3.2 代码接入
DeepSeek/GLM-Z1/GLM-4/MiniMax-M1/ERNIE4.5 调用方式
支持 curl、python、golang、java、nodejs 等
- 查询所有可用模型
curl --location 'https://$BASE_URL/chat/completions' \
--header 'Authorization: Bearer 申请到的key' \
--header 'Content-Type: application/json' \
--data '{
"stream": true,
"model": "Qwen3-235B-A22B",
"messages": [
{
"role": "user",
"content": "飞机怎么会飞?"
}
],
"enable_thinking": true
}'
- 推理
curl --request POST \
--url https://$BASE_URL/v1/chat/completions \
--header 'authorization: Bearer 申请到的key' \
--header 'content-type: application/json' \
--data '{
"model": "模型ID",
"messages": [
{
"role": "user",
"content": "Hello World"
}
]
}'
import openai
client = openai.OpenAI(
api_key="申请到的key",
base_url="https://$BASE_URL/v1/"
)
response = client.chat.completions.create(
model="模型ID", # model to send to the proxy
messages=[{"role": "user", "content": "Hello World"}],
)
print(response)
package main
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
)
func main() {
url := "https://$BASE_URL/v1/chat/completions"
// Define and marshal the payload
payload, _ := json.Marshal(map[string]interface{}{
"model": "模型ID",
"messages": []map[string]string{{
"role": "user",
"content": "Hello World",
}},
"stream": false,
"max_tokens": 512,
"temperature": 0.6,
})
// Create and send the request
req, _ := http.NewRequest("POST", url, bytes.NewBuffer(payload))
req.Header.Set("Authorization", "Bearer 申请的api-key")
req.Header.Set("Content-Type", "application/json")
res, err := http.DefaultClient.Do(req)
if err != nil {
fmt.Println("Error:", err)
return
}
defer res.Body.Close()
// Read and print the response
body, _ := ioutil.ReadAll(res.Body)
fmt.Println("Response:", string(body))
}
import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.Unirest;
import org.json.JSONObject;
import org.json.JSONArray;
public class Test {
public static void main(String[] args) throws Exception{
JSONObject body = new JSONObject()
.put("model", "模型ID")
.put("messages", new JSONArray().put(new JSONObject().put("role", "user").put("content", "Hello World")))
.put("stream", false)
.put("max_tokens", 512)
.put("temperature", 0.6);
HttpResponse<String> response = Unirest.post("https://$BASE_URL/v1/chat/completions")
.header("Authorization", "Bearer 申请的api-key")
.header("Content-Type", "application/json")
.body(body.())
.asString();
System.out.println(response.getBody());
}
}
const axios = require('axios');
const url = 'https://$BASE_URL/v1/chat/completions';
const payload = {
model: '模型ID',
messages: [
{
role: 'user',
content: 'Hello world',
},
],
stream: false,
max_tokens: 512,
temperature: 0.6,
};
axios
.post(url, payload, {
headers: {
Authorization: 'Bearer 申请的api-key',
'Content-Type': 'application/json',
},
})
.then((response) => {
console.log('Response Status:', response.status);
console.log('Response Body:', response.data.choices[0].message.content );
})
.catch((err) => {
console.error('Error:', err);
});
Qwen3 调用方式
仅支持流式输出,通过stream=true
来打开;
同时通过enable_thinking
参数来控制是否有推理过程,true:有,false:没有
支持curl、python、golang、java、nodejs等
curl --location 'https://$BASE_URL/chat/completions' \
--header 'Authorization: Bearer 申请到的key' \
--header 'Content-Type: application/json' \
--data '{
"stream": true,
"model": "Qwen3-235B-A22B",
"messages": [
{
"role": "user",
"content": "飞机怎么会飞?"
}
],
"enable_thinking": true
}'
from openai import OpenAI
# 初始化OpenAI客户端
client = OpenAI(
api_key="申请到的key",
base_url="https://$BASE_URL",
)
messages = [{"role": "user", "content": "你是谁"}]
completion = client.chat.completions.create(
model="Qwen3-235B-A22B",
messages=messages,
# enable_thinking 参数开启思考过程,该参数对 QwQ 模型无效
extra_body={"enable_thinking": True},
stream=True,
)
reasoning_content = "" # 完整思考过程
answer_content = "" # 完整回复
is_answering = False # 是否进入回复阶段
print("\n" + "=" * 20 + "思考过程" + "=" * 20 + "\n")
for chunk in completion:
if not chunk.choices:
print("\nUsage:")
print(chunk.usage)
continue
delta = chunk.choices[0].delta
# 只收集思考内容
if hasattr(delta, "reasoning_content") and delta.reasoning_content is not None:
if not is_answering:
print(delta.reasoning_content, end="", flush=True)
reasoning_content += delta.reasoning_content
# 收到content,开始进行回复
if hasattr(delta, "content") and delta.content:
if not is_answering:
print("\n" + "=" * 20 + "完整回复" + "=" * 20 + "\n")
is_answering = True
print(delta.content, end="", flush=True)
answer_content += delta.content
package main
import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"os"
"strings"
"sync/atomic"
)
const API_URL = "https://$BASE_URL/chat/completions"
type Message struct {
Role string `json:"role"`
Content string `json:"content"`
}
type CompletionRequest struct {
Model string `json:"model"`
Messages []Message `json:"messages"`
Stream bool `json:"stream"`
ExtraBody ExtraBody `json:"extra_body"`
}
type ExtraBody struct {
EnableThinking bool `json:"enable_thinking"`
}
type CompletionChunk struct {
Choices []Choice `json:"choices"`
Usage Usage `json:"usage"`
}
type Choice struct {
Delta Delta `json:"delta"`
}
type Delta struct {
ReasoningContent string `json:"reasoning_content"`
Content string `json:"content"`
}
type Usage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
}
func main() {
apiKey := "申请到的key"
if apiKey == "" {
log.Fatal("apiKey not set")
}
messages := []Message{{Role: "user", Content: "你是谁"}}
request := CompletionRequest{
Model: "Qwen3-235B-A22B",
Messages: messages,
Stream: true,
ExtraBody: ExtraBody{
EnableThinking: true,
},
}
requestBody, err := json.Marshal(request)
if err != nil {
log.Fatalf("Error creating request body: %v", err)
}
fmt.Println("\n" + strings.Repeat("=", 20) + "思考过程" + strings.Repeat("=", 20) + "\n")
req, err := http.NewRequest("POST", API_URL, bytes.NewBuffer(requestBody))
if err != nil {
log.Fatalf("Error creating request: %v", err)
}
req.Header.Set("Authorization", "Bearer "+apiKey)
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Accept", "text/event-stream")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
log.Fatalf("Error making request: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
log.Fatalf("Request failed with status %d: %s", resp.StatusCode, string(body))
}
var isAnswering atomic.Bool
var reasoningContent strings.Builder
var answerContent strings.Builder
// 使用bufio.Scanner逐行读取SSE响应
scanner := bufio.NewScanner(resp.Body)
for scanner.Scan() {
line := scanner.Text()
if strings.TrimSpace(line) == "" {
continue
}
// 处理SSE格式的行(以"data: "开头)
if strings.HasPrefix(line, "data: ") {
data := strings.TrimPrefix(line, "data: ")
if data == "[DONE]" {
break
}
var chunk CompletionChunk
if err := json.Unmarshal([]byte(data), &chunk); err != nil {
log.Printf("Error decoding JSON: %v, data: %s", err, data)
continue
}
if len(chunk.Choices) == 0 {
if chunk.Usage.TotalTokens > 0 {
fmt.Println("\nUsage:")
fmt.Printf("Prompt Tokens: %d, Completion Tokens: %d, Total Tokens: %d\n",
chunk.Usage.PromptTokens,
chunk.Usage.CompletionTokens,
chunk.Usage.TotalTokens)
}
continue
}
delta := chunk.Choices[0].Delta
// Process reasoning content
if delta.ReasoningContent != "" {
if !isAnswering.Load() {
fmt.Print(delta.ReasoningContent)
}
reasoningContent.WriteString(delta.ReasoningContent)
}
// Process answer content
if delta.Content != "" {
if !isAnswering.Load() {
fmt.Println("\n" + strings.Repeat("=", 20) + "完整回复" + strings.Repeat("=", 20) + "\n")
isAnswering.Store(true)
}
fmt.Print(delta.Content)
answerContent.WriteString(delta.Content)
}
}
}
if err := scanner.Err(); err != nil {
log.Printf("Error reading response: %v", err)
}
}
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.json.JsonMapper;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicBoolean;
public class OpenAIClient {
private static final String API_URL = "https://$BASE_URL/chat/completions";
private static final ObjectMapper objectMapper = JsonMapper.builder().disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES).build();
public static class Message {
private String role;
private String content;
public Message(String role, String content) {
this.role = role;
this.content = content;
}
// Getters and setters
public String getRole() { return role; }
public String getContent() { return content; }
public void setRole(String role) { this.role = role; }
public void setContent(String content) { this.content = content; }
}
public static class CompletionRequest {
private String model;
private List<Message> messages;
private boolean stream = true;
@JsonProperty("extra_body")
private ExtraBody extraBody;
// Constructor
public CompletionRequest(String model, List<Message> messages, boolean enableThinking) {
this.model = model;
this.messages = messages;
this.extraBody = new ExtraBody(enableThinking);
}
// Getters and setters
public String getModel() { return model; }
public List<Message> getMessages() { return messages; }
public boolean isStream() { return stream; }
public ExtraBody getExtraBody() { return extraBody; }
}
public static class ExtraBody {
@JsonProperty("enable_thinking")
private boolean enableThinking;
public ExtraBody(boolean enableThinking) {
this.enableThinking = enableThinking;
}
public boolean isEnableThinking() { return enableThinking; }
}
public static class CompletionChunk {
private List<Choice> choices;
private Usage usage;
// Getters and setters
public List<Choice> getChoices() { return choices; }
public Usage getUsage() { return usage; }
}
public static class Choice {
private Delta delta;
public Delta getDelta() { return delta; }
}
public static class Delta {
@JsonProperty("reasoning_content")
private String reasoningContent;
private String content;
public String getReasoningContent() { return reasoningContent; }
public String getContent() { return content; }
}
public static class Usage {
@JsonProperty("prompt_tokens")
private int promptTokens;
@JsonProperty("completion_tokens")
private int completionTokens;
@JsonProperty("total_tokens")
private int totalTokens;
// Getters
public int getPromptTokens() { return promptTokens; }
public int getCompletionTokens() { return completionTokens; }
public int getTotalTokens() { return totalTokens; }
}
public static void main(String[] args) {
String apiKey = "申请到的key";
if (apiKey == null || apiKey.isEmpty()) {
System.err.println("apiKey not set");
System.exit(1);
}
List<Message> messages = List.of(new Message("user", "你是谁"));
CompletionRequest request = new CompletionRequest("Qwen3-235B-A22B", messages, true);
try {
String requestBody = objectMapper.writeValueAsString(request);
System.out.println("\n" + "=".repeat(20) + "思考过程" + "=".repeat(20) + "\n");
HttpClient client = HttpClient.newHttpClient();
HttpRequest httpRequest = HttpRequest.newBuilder()
.uri(URI.create(API_URL))
.header("Authorization", "Bearer " + apiKey)
.header("Content-Type", "application/json")
.header("Accept", "text/event-stream")
.POST(HttpRequest.BodyPublishers.ofString(requestBody))
.build();
AtomicBoolean isAnswering = new AtomicBoolean(false);
StringBuilder reasoningContent = new StringBuilder();
StringBuilder answerContent = new StringBuilder();
CompletableFuture<HttpResponse<String>> future = client.sendAsync(
httpRequest,
HttpResponse.BodyHandlers.ofString(StandardCharsets.UTF_8)
);
future.thenAccept(response -> {
if (response.statusCode() != 200) {
System.err.println("Request failed with status: " + response.statusCode());
System.err.println(response.body());
return;
}
// Process stream line by line
String[] lines = response.body().split("\n");
for (String line : lines) {
if (line.trim().isEmpty()) continue;
try {
// Remove "data: " prefix if present
String jsonStr = line.startsWith("data: ") ? line.substring(6) : line;
if (jsonStr.equals("[DONE]")) continue;
CompletionChunk chunk = objectMapper.readValue(jsonStr, CompletionChunk.class);
if (chunk.getChoices() == null || chunk.getChoices().isEmpty()) {
if (chunk.getUsage() != null) {
System.out.println("\nUsage:");
System.out.printf("Prompt Tokens: %d, Completion Tokens: %d, Total Tokens: %d%n",
chunk.getUsage().getPromptTokens(),
chunk.getUsage().getCompletionTokens(),
chunk.getUsage().getTotalTokens());
}
continue;
}
Delta delta = chunk.getChoices().get(0).getDelta();
// Process reasoning content
if (delta.getReasoningContent() != null) {
if (!isAnswering.get()) {
System.out.print(delta.getReasoningContent());
}
reasoningContent.append(delta.getReasoningContent());
}
// Process answer content
if (delta.getContent() != null) {
if (!isAnswering.get()) {
System.out.println("\n" + "=".repeat(20) + "完整回复" + "=".repeat(20) + "\n");
isAnswering.set(true);
}
System.out.print(delta.getContent());
answerContent.append(delta.getContent());
}
} catch (JsonProcessingException e) {
System.err.println("Error parsing JSON: " + e.getMessage());
}
}
}).join();
} catch (JsonProcessingException e) {
System.err.println("Error creating request body: " + e.getMessage());
}
}
}
const axios = require('axios');
require('dotenv').config();
async function main() {
const apiKey = "申请到的key";
if (!apiKey) {
throw new Error('apiKey not set');
}
const client = axios.create({
baseURL: 'https://$BASE_URL',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
'Accept': 'text/event-stream'
},
responseType: 'stream'
});
const messages = [
{ role: 'user', content: '你是谁' }
];
const requestData = {
model: 'Qwen3-235B-A22B',
messages,
stream: true,
extra_body: {
enable_thinking: true
}
};
let reasoningContent = ''; // 完整思考过程
let answerContent = ''; // 完整回复
let isAnswering = false; // 是否进入回复阶段
console.log('\n' + '='.repeat(20) + '思考过程' + '='.repeat(20) + '\n');
try {
const response = await client.post('/chat/completions', requestData);
response.data.on('data', (chunk) => {
try {
// 假设API返回的是JSON格式的流数据
const lines = chunk.toString().split('\n').filter(line => line.trim() !== '');
for (const line of lines) {
// 移除可能的数据前缀(如SSE的"data: ")
const cleanLine = line.replace(/^data: /, '').trim();
if (cleanLine === '[DONE]') continue;
const data = JSON.parse(cleanLine);
if (!data.choices || data.choices.length === 0) {
if (data.usage) {
console.log('\nUsage:');
console.log(`Prompt Tokens: ${data.usage.prompt_tokens}, Completion Tokens: ${data.usage.completion_tokens}, Total Tokens: ${data.usage.total_tokens}`);
}
continue;
}
const delta = data.choices[0].delta;
// 处理思考内容
if (delta.reasoning_content) {
if (!isAnswering) {
process.stdout.write(delta.reasoning_content);
}
reasoningContent += delta.reasoning_content;
}
// 处理回复内容
if (delta.content) {
if (!isAnswering) {
console.log('\n' + '='.repeat(20) + '完整回复' + '='.repeat(20) + '\n');
isAnswering = true;
}
process.stdout.write(delta.content);
answerContent += delta.content;
}
}
} catch (err) {
console.error('Error processing chunk:', err);
}
});
response.data.on('end', () => {
console.log('\n\nStream ended');
// 这里可以访问完整的 reasoningContent 和 answerContent
});
} catch (error) {
console.error('Error making request:', error.response?.data || error.message);
}
}
main();