Compare commits
16 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f5cbab77cf | |||
| d4706d6b8e | |||
| 6c8016e5f8 | |||
| 7160012fe2 | |||
| c62276fcc4 | |||
| 15a3b44689 | |||
| 8f3c7280cf | |||
| e5e73a33f0 | |||
| 2d15f63eaa | |||
| 6f3072895a | |||
| 1763145fea | |||
| 66831a1bde | |||
| fd44ac7c0c | |||
| f5bf67c636 | |||
| 7becf62a7a | |||
| 40c0333eaa |
+2
-1
@@ -63,7 +63,8 @@
|
||||
- Add suffix `-high` to set high reasoning effort (e.g., `o3-mini-high`)
|
||||
- Add suffix `-medium` to set medium reasoning effort
|
||||
- Add suffix `-low` to set low reasoning effort
|
||||
17. 🔄 Thinking to content option `thinking_to_content` in `Channel->Edit->Channel Extra Settings`, default is `false`, when `true`, the `reasoning_conetnt` of the thinking content will be converted to `<think>` tags and concatenated to the content returned.
|
||||
17. 🔄 Thinking to content option `thinking_to_content` in `Channel->Edit->Channel Extra Settings`, default is `false`, when `true`, the `reasoning_content` of the thinking content will be converted to `<think>` tags and concatenated to the content returned.
|
||||
18. 🔄 Model rate limit, support setting total request limit and successful request limit in `System Settings->Rate Limit Settings`
|
||||
|
||||
## Model Support
|
||||
This version additionally supports:
|
||||
|
||||
@@ -69,7 +69,8 @@
|
||||
- 添加后缀 `-high` 设置为 high reasoning effort (例如: `o3-mini-high`)
|
||||
- 添加后缀 `-medium` 设置为 medium reasoning effort (例如: `o3-mini-medium`)
|
||||
- 添加后缀 `-low` 设置为 low reasoning effort (例如: `o3-mini-low`)
|
||||
18. 🔄 思考转内容,支持在 `渠道-编辑-渠道额外设置` 中设置 `thinking_to_content` 选项,默认`false`,开启后会将思考内容`reasoning_conetnt`转换为`<think>`标签拼接到内容中返回。
|
||||
18. 🔄 思考转内容,支持在 `渠道-编辑-渠道额外设置` 中设置 `thinking_to_content` 选项,默认`false`,开启后会将思考内容`reasoning_content`转换为`<think>`标签拼接到内容中返回。
|
||||
19. 🔄 模型限流,支持在 `系统设置-速率限制设置` 中设置模型限流,支持设置总请求数限制和成功请求数限制
|
||||
|
||||
## 模型支持
|
||||
此版本额外支持以下模型:
|
||||
|
||||
+1
-1
@@ -276,7 +276,7 @@ var ChannelBaseURLs = []string{
|
||||
"https://api.cohere.ai", //34
|
||||
"https://api.minimax.chat", //35
|
||||
"", //36
|
||||
"", //37
|
||||
"https://api.dify.ai", //37
|
||||
"https://api.jina.ai", //38
|
||||
"https://api.cloudflare.com", //39
|
||||
"https://api.siliconflow.cn", //40
|
||||
|
||||
+88
-86
@@ -83,92 +83,94 @@ var defaultModelRatio = map[string]float64{
|
||||
"text-curie-001": 1,
|
||||
//"text-davinci-002": 10,
|
||||
//"text-davinci-003": 10,
|
||||
"text-davinci-edit-001": 10,
|
||||
"code-davinci-edit-001": 10,
|
||||
"whisper-1": 15, // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
|
||||
"tts-1": 7.5, // 1k characters -> $0.015
|
||||
"tts-1-1106": 7.5, // 1k characters -> $0.015
|
||||
"tts-1-hd": 15, // 1k characters -> $0.03
|
||||
"tts-1-hd-1106": 15, // 1k characters -> $0.03
|
||||
"davinci": 10,
|
||||
"curie": 10,
|
||||
"babbage": 10,
|
||||
"ada": 10,
|
||||
"text-embedding-3-small": 0.01,
|
||||
"text-embedding-3-large": 0.065,
|
||||
"text-embedding-ada-002": 0.05,
|
||||
"text-search-ada-doc-001": 10,
|
||||
"text-moderation-stable": 0.1,
|
||||
"text-moderation-latest": 0.1,
|
||||
"claude-instant-1": 0.4, // $0.8 / 1M tokens
|
||||
"claude-2.0": 4, // $8 / 1M tokens
|
||||
"claude-2.1": 4, // $8 / 1M tokens
|
||||
"claude-3-haiku-20240307": 0.125, // $0.25 / 1M tokens
|
||||
"claude-3-5-haiku-20241022": 0.5, // $1 / 1M tokens
|
||||
"claude-3-sonnet-20240229": 1.5, // $3 / 1M tokens
|
||||
"claude-3-5-sonnet-20240620": 1.5,
|
||||
"claude-3-5-sonnet-20241022": 1.5,
|
||||
"claude-3-opus-20240229": 7.5, // $15 / 1M tokens
|
||||
"ERNIE-4.0-8K": 0.120 * RMB,
|
||||
"ERNIE-3.5-8K": 0.012 * RMB,
|
||||
"ERNIE-3.5-8K-0205": 0.024 * RMB,
|
||||
"ERNIE-3.5-8K-1222": 0.012 * RMB,
|
||||
"ERNIE-Bot-8K": 0.024 * RMB,
|
||||
"ERNIE-3.5-4K-0205": 0.012 * RMB,
|
||||
"ERNIE-Speed-8K": 0.004 * RMB,
|
||||
"ERNIE-Speed-128K": 0.004 * RMB,
|
||||
"ERNIE-Lite-8K-0922": 0.008 * RMB,
|
||||
"ERNIE-Lite-8K-0308": 0.003 * RMB,
|
||||
"ERNIE-Tiny-8K": 0.001 * RMB,
|
||||
"BLOOMZ-7B": 0.004 * RMB,
|
||||
"Embedding-V1": 0.002 * RMB,
|
||||
"bge-large-zh": 0.002 * RMB,
|
||||
"bge-large-en": 0.002 * RMB,
|
||||
"tao-8k": 0.002 * RMB,
|
||||
"PaLM-2": 1,
|
||||
"gemini-pro": 1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
|
||||
"gemini-pro-vision": 1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
|
||||
"gemini-1.0-pro-vision-001": 1,
|
||||
"gemini-1.0-pro-001": 1,
|
||||
"gemini-1.5-pro-latest": 1.75, // $3.5 / 1M tokens
|
||||
"gemini-1.5-pro-exp-0827": 1.75, // $3.5 / 1M tokens
|
||||
"gemini-1.5-flash-latest": 1,
|
||||
"gemini-1.5-flash-exp-0827": 1,
|
||||
"gemini-1.0-pro-latest": 1,
|
||||
"gemini-1.0-pro-vision-latest": 1,
|
||||
"gemini-ultra": 1,
|
||||
"chatglm_turbo": 0.3572, // ¥0.005 / 1k tokens
|
||||
"chatglm_pro": 0.7143, // ¥0.01 / 1k tokens
|
||||
"chatglm_std": 0.3572, // ¥0.005 / 1k tokens
|
||||
"chatglm_lite": 0.1429, // ¥0.002 / 1k tokens
|
||||
"glm-4": 7.143, // ¥0.1 / 1k tokens
|
||||
"glm-4v": 0.05 * RMB, // ¥0.05 / 1k tokens
|
||||
"glm-4-alltools": 0.1 * RMB, // ¥0.1 / 1k tokens
|
||||
"glm-3-turbo": 0.3572,
|
||||
"glm-4-plus": 0.05 * RMB,
|
||||
"glm-4-0520": 0.1 * RMB,
|
||||
"glm-4-air": 0.001 * RMB,
|
||||
"glm-4-airx": 0.01 * RMB,
|
||||
"glm-4-long": 0.001 * RMB,
|
||||
"glm-4-flash": 0,
|
||||
"glm-4v-plus": 0.01 * RMB,
|
||||
"qwen-turbo": 0.8572, // ¥0.012 / 1k tokens
|
||||
"qwen-plus": 10, // ¥0.14 / 1k tokens
|
||||
"text-embedding-v1": 0.05, // ¥0.0007 / 1k tokens
|
||||
"SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v3.1": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v3.5": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v4.0": 1.2858,
|
||||
"360GPT_S2_V9": 0.8572, // ¥0.012 / 1k tokens
|
||||
"360gpt-turbo": 0.0858, // ¥0.0012 / 1k tokens
|
||||
"360gpt-turbo-responsibility-8k": 0.8572, // ¥0.012 / 1k tokens
|
||||
"360gpt-pro": 0.8572, // ¥0.012 / 1k tokens
|
||||
"360gpt2-pro": 0.8572, // ¥0.012 / 1k tokens
|
||||
"embedding-bert-512-v1": 0.0715, // ¥0.001 / 1k tokens
|
||||
"embedding_s1_v1": 0.0715, // ¥0.001 / 1k tokens
|
||||
"semantic_similarity_s1_v1": 0.0715, // ¥0.001 / 1k tokens
|
||||
"hunyuan": 7.143, // ¥0.1 / 1k tokens // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0
|
||||
"text-davinci-edit-001": 10,
|
||||
"code-davinci-edit-001": 10,
|
||||
"whisper-1": 15, // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
|
||||
"tts-1": 7.5, // 1k characters -> $0.015
|
||||
"tts-1-1106": 7.5, // 1k characters -> $0.015
|
||||
"tts-1-hd": 15, // 1k characters -> $0.03
|
||||
"tts-1-hd-1106": 15, // 1k characters -> $0.03
|
||||
"davinci": 10,
|
||||
"curie": 10,
|
||||
"babbage": 10,
|
||||
"ada": 10,
|
||||
"text-embedding-3-small": 0.01,
|
||||
"text-embedding-3-large": 0.065,
|
||||
"text-embedding-ada-002": 0.05,
|
||||
"text-search-ada-doc-001": 10,
|
||||
"text-moderation-stable": 0.1,
|
||||
"text-moderation-latest": 0.1,
|
||||
"claude-instant-1": 0.4, // $0.8 / 1M tokens
|
||||
"claude-2.0": 4, // $8 / 1M tokens
|
||||
"claude-2.1": 4, // $8 / 1M tokens
|
||||
"claude-3-haiku-20240307": 0.125, // $0.25 / 1M tokens
|
||||
"claude-3-5-haiku-20241022": 0.5, // $1 / 1M tokens
|
||||
"claude-3-sonnet-20240229": 1.5, // $3 / 1M tokens
|
||||
"claude-3-5-sonnet-20240620": 1.5,
|
||||
"claude-3-5-sonnet-20241022": 1.5,
|
||||
"claude-3-7-sonnet-20250219": 1.5,
|
||||
"claude-3-7-sonnet-20250219-thinking": 1.5,
|
||||
"claude-3-opus-20240229": 7.5, // $15 / 1M tokens
|
||||
"ERNIE-4.0-8K": 0.120 * RMB,
|
||||
"ERNIE-3.5-8K": 0.012 * RMB,
|
||||
"ERNIE-3.5-8K-0205": 0.024 * RMB,
|
||||
"ERNIE-3.5-8K-1222": 0.012 * RMB,
|
||||
"ERNIE-Bot-8K": 0.024 * RMB,
|
||||
"ERNIE-3.5-4K-0205": 0.012 * RMB,
|
||||
"ERNIE-Speed-8K": 0.004 * RMB,
|
||||
"ERNIE-Speed-128K": 0.004 * RMB,
|
||||
"ERNIE-Lite-8K-0922": 0.008 * RMB,
|
||||
"ERNIE-Lite-8K-0308": 0.003 * RMB,
|
||||
"ERNIE-Tiny-8K": 0.001 * RMB,
|
||||
"BLOOMZ-7B": 0.004 * RMB,
|
||||
"Embedding-V1": 0.002 * RMB,
|
||||
"bge-large-zh": 0.002 * RMB,
|
||||
"bge-large-en": 0.002 * RMB,
|
||||
"tao-8k": 0.002 * RMB,
|
||||
"PaLM-2": 1,
|
||||
"gemini-pro": 1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
|
||||
"gemini-pro-vision": 1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
|
||||
"gemini-1.0-pro-vision-001": 1,
|
||||
"gemini-1.0-pro-001": 1,
|
||||
"gemini-1.5-pro-latest": 1.75, // $3.5 / 1M tokens
|
||||
"gemini-1.5-pro-exp-0827": 1.75, // $3.5 / 1M tokens
|
||||
"gemini-1.5-flash-latest": 1,
|
||||
"gemini-1.5-flash-exp-0827": 1,
|
||||
"gemini-1.0-pro-latest": 1,
|
||||
"gemini-1.0-pro-vision-latest": 1,
|
||||
"gemini-ultra": 1,
|
||||
"chatglm_turbo": 0.3572, // ¥0.005 / 1k tokens
|
||||
"chatglm_pro": 0.7143, // ¥0.01 / 1k tokens
|
||||
"chatglm_std": 0.3572, // ¥0.005 / 1k tokens
|
||||
"chatglm_lite": 0.1429, // ¥0.002 / 1k tokens
|
||||
"glm-4": 7.143, // ¥0.1 / 1k tokens
|
||||
"glm-4v": 0.05 * RMB, // ¥0.05 / 1k tokens
|
||||
"glm-4-alltools": 0.1 * RMB, // ¥0.1 / 1k tokens
|
||||
"glm-3-turbo": 0.3572,
|
||||
"glm-4-plus": 0.05 * RMB,
|
||||
"glm-4-0520": 0.1 * RMB,
|
||||
"glm-4-air": 0.001 * RMB,
|
||||
"glm-4-airx": 0.01 * RMB,
|
||||
"glm-4-long": 0.001 * RMB,
|
||||
"glm-4-flash": 0,
|
||||
"glm-4v-plus": 0.01 * RMB,
|
||||
"qwen-turbo": 0.8572, // ¥0.012 / 1k tokens
|
||||
"qwen-plus": 10, // ¥0.14 / 1k tokens
|
||||
"text-embedding-v1": 0.05, // ¥0.0007 / 1k tokens
|
||||
"SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v3.1": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v3.5": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v4.0": 1.2858,
|
||||
"360GPT_S2_V9": 0.8572, // ¥0.012 / 1k tokens
|
||||
"360gpt-turbo": 0.0858, // ¥0.0012 / 1k tokens
|
||||
"360gpt-turbo-responsibility-8k": 0.8572, // ¥0.012 / 1k tokens
|
||||
"360gpt-pro": 0.8572, // ¥0.012 / 1k tokens
|
||||
"360gpt2-pro": 0.8572, // ¥0.012 / 1k tokens
|
||||
"embedding-bert-512-v1": 0.0715, // ¥0.001 / 1k tokens
|
||||
"embedding_s1_v1": 0.0715, // ¥0.001 / 1k tokens
|
||||
"semantic_similarity_s1_v1": 0.0715, // ¥0.001 / 1k tokens
|
||||
"hunyuan": 7.143, // ¥0.1 / 1k tokens // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0
|
||||
// https://platform.lingyiwanwu.com/docs#-计费单元
|
||||
// 已经按照 7.2 来换算美元价格
|
||||
"yi-34b-chat-0205": 0.18,
|
||||
|
||||
@@ -85,6 +85,7 @@ func Relay(c *gin.Context) {
|
||||
|
||||
if openaiErr != nil {
|
||||
if openaiErr.StatusCode == http.StatusTooManyRequests {
|
||||
common.LogError(c, fmt.Sprintf("origin 429 error: %s", openaiErr.Error.Message))
|
||||
openaiErr.Error.Message = "当前分组上游负载已饱和,请稍后再试"
|
||||
}
|
||||
openaiErr.Error.Message = common.MessageWithRequestId(openaiErr.Error.Message, requestId)
|
||||
|
||||
+19
-2
@@ -1,6 +1,9 @@
|
||||
package dto
|
||||
|
||||
import "encoding/json"
|
||||
import (
|
||||
"encoding/json"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type ResponseFormat struct {
|
||||
Type string `json:"type,omitempty"`
|
||||
@@ -47,6 +50,7 @@ type GeneralOpenAIRequest struct {
|
||||
Dimensions int `json:"dimensions,omitempty"`
|
||||
Modalities any `json:"modalities,omitempty"`
|
||||
Audio any `json:"audio,omitempty"`
|
||||
ExtraBody any `json:"extra_body,omitempty"`
|
||||
}
|
||||
|
||||
type OpenAITools struct {
|
||||
@@ -153,11 +157,24 @@ func (m *Message) StringContent() string {
|
||||
if m.parsedStringContent != nil {
|
||||
return *m.parsedStringContent
|
||||
}
|
||||
|
||||
var stringContent string
|
||||
if err := json.Unmarshal(m.Content, &stringContent); err == nil {
|
||||
m.parsedStringContent = &stringContent
|
||||
return stringContent
|
||||
}
|
||||
return string(m.Content)
|
||||
|
||||
contentStr := new(strings.Builder)
|
||||
arrayContent := m.ParseContent()
|
||||
for _, content := range arrayContent {
|
||||
if content.Type == ContentTypeText {
|
||||
contentStr.WriteString(content.Text)
|
||||
}
|
||||
}
|
||||
stringContent = contentStr.String()
|
||||
m.parsedStringContent = &stringContent
|
||||
|
||||
return stringContent
|
||||
}
|
||||
|
||||
func (m *Message) SetStringContent(content string) {
|
||||
|
||||
@@ -0,0 +1,172 @@
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"one-api/common"
|
||||
"one-api/setting"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/go-redis/redis/v8"
|
||||
)
|
||||
|
||||
const (
|
||||
ModelRequestRateLimitCountMark = "MRRL"
|
||||
ModelRequestRateLimitSuccessCountMark = "MRRLS"
|
||||
)
|
||||
|
||||
// 检查Redis中的请求限制
|
||||
func checkRedisRateLimit(ctx context.Context, rdb *redis.Client, key string, maxCount int, duration int64) (bool, error) {
|
||||
// 如果maxCount为0,表示不限制
|
||||
if maxCount == 0 {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// 获取当前计数
|
||||
length, err := rdb.LLen(ctx, key).Result()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
// 如果未达到限制,允许请求
|
||||
if length < int64(maxCount) {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// 检查时间窗口
|
||||
oldTimeStr, _ := rdb.LIndex(ctx, key, -1).Result()
|
||||
oldTime, err := time.Parse(timeFormat, oldTimeStr)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
nowTimeStr := time.Now().Format(timeFormat)
|
||||
nowTime, err := time.Parse(timeFormat, nowTimeStr)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
// 如果在时间窗口内已达到限制,拒绝请求
|
||||
subTime := nowTime.Sub(oldTime).Seconds()
|
||||
if int64(subTime) < duration {
|
||||
rdb.Expire(ctx, key, common.RateLimitKeyExpirationDuration)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// 记录Redis请求
|
||||
func recordRedisRequest(ctx context.Context, rdb *redis.Client, key string, maxCount int) {
|
||||
// 如果maxCount为0,不记录请求
|
||||
if maxCount == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
now := time.Now().Format(timeFormat)
|
||||
rdb.LPush(ctx, key, now)
|
||||
rdb.LTrim(ctx, key, 0, int64(maxCount-1))
|
||||
rdb.Expire(ctx, key, common.RateLimitKeyExpirationDuration)
|
||||
}
|
||||
|
||||
// Redis限流处理器
|
||||
func redisRateLimitHandler(duration int64, totalMaxCount, successMaxCount int) gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
userId := strconv.Itoa(c.GetInt("id"))
|
||||
ctx := context.Background()
|
||||
rdb := common.RDB
|
||||
|
||||
// 1. 检查总请求数限制(当totalMaxCount为0时会自动跳过)
|
||||
totalKey := fmt.Sprintf("rateLimit:%s:%s", ModelRequestRateLimitCountMark, userId)
|
||||
allowed, err := checkRedisRateLimit(ctx, rdb, totalKey, totalMaxCount, duration)
|
||||
if err != nil {
|
||||
fmt.Println("检查总请求数限制失败:", err.Error())
|
||||
abortWithOpenAiMessage(c, http.StatusInternalServerError, "rate_limit_check_failed")
|
||||
return
|
||||
}
|
||||
if !allowed {
|
||||
abortWithOpenAiMessage(c, http.StatusTooManyRequests, fmt.Sprintf("您已达到总请求数限制:%d分钟内最多请求%d次,包括失败次数,请检查您的请求是否正确", setting.ModelRequestRateLimitDurationMinutes, totalMaxCount))
|
||||
}
|
||||
|
||||
// 2. 检查成功请求数限制
|
||||
successKey := fmt.Sprintf("rateLimit:%s:%s", ModelRequestRateLimitSuccessCountMark, userId)
|
||||
allowed, err = checkRedisRateLimit(ctx, rdb, successKey, successMaxCount, duration)
|
||||
if err != nil {
|
||||
fmt.Println("检查成功请求数限制失败:", err.Error())
|
||||
abortWithOpenAiMessage(c, http.StatusInternalServerError, "rate_limit_check_failed")
|
||||
return
|
||||
}
|
||||
if !allowed {
|
||||
abortWithOpenAiMessage(c, http.StatusTooManyRequests, fmt.Sprintf("您已达到请求数限制:%d分钟内最多请求%d次", setting.ModelRequestRateLimitDurationMinutes, successMaxCount))
|
||||
return
|
||||
}
|
||||
|
||||
// 3. 记录总请求(当totalMaxCount为0时会自动跳过)
|
||||
recordRedisRequest(ctx, rdb, totalKey, totalMaxCount)
|
||||
|
||||
// 4. 处理请求
|
||||
c.Next()
|
||||
|
||||
// 5. 如果请求成功,记录成功请求
|
||||
if c.Writer.Status() < 400 {
|
||||
recordRedisRequest(ctx, rdb, successKey, successMaxCount)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 内存限流处理器
|
||||
func memoryRateLimitHandler(duration int64, totalMaxCount, successMaxCount int) gin.HandlerFunc {
|
||||
inMemoryRateLimiter.Init(common.RateLimitKeyExpirationDuration)
|
||||
|
||||
return func(c *gin.Context) {
|
||||
userId := strconv.Itoa(c.GetInt("id"))
|
||||
totalKey := ModelRequestRateLimitCountMark + userId
|
||||
successKey := ModelRequestRateLimitSuccessCountMark + userId
|
||||
|
||||
// 1. 检查总请求数限制(当totalMaxCount为0时跳过)
|
||||
if totalMaxCount > 0 && !inMemoryRateLimiter.Request(totalKey, totalMaxCount, duration) {
|
||||
c.Status(http.StatusTooManyRequests)
|
||||
c.Abort()
|
||||
return
|
||||
}
|
||||
|
||||
// 2. 检查成功请求数限制
|
||||
// 使用一个临时key来检查限制,这样可以避免实际记录
|
||||
checkKey := successKey + "_check"
|
||||
if !inMemoryRateLimiter.Request(checkKey, successMaxCount, duration) {
|
||||
c.Status(http.StatusTooManyRequests)
|
||||
c.Abort()
|
||||
return
|
||||
}
|
||||
|
||||
// 3. 处理请求
|
||||
c.Next()
|
||||
|
||||
// 4. 如果请求成功,记录到实际的成功请求计数中
|
||||
if c.Writer.Status() < 400 {
|
||||
inMemoryRateLimiter.Request(successKey, successMaxCount, duration)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ModelRequestRateLimit 模型请求限流中间件
|
||||
func ModelRequestRateLimit() func(c *gin.Context) {
|
||||
// 如果未启用限流,直接放行
|
||||
if !setting.ModelRequestRateLimitEnabled {
|
||||
return defNext
|
||||
}
|
||||
|
||||
// 计算限流参数
|
||||
duration := int64(setting.ModelRequestRateLimitDurationMinutes * 60)
|
||||
totalMaxCount := setting.ModelRequestRateLimitCount
|
||||
successMaxCount := setting.ModelRequestRateLimitSuccessCount
|
||||
|
||||
// 根据存储类型选择限流处理器
|
||||
if common.RedisEnabled {
|
||||
return redisRateLimitHandler(duration, totalMaxCount, successMaxCount)
|
||||
} else {
|
||||
return memoryRateLimitHandler(duration, totalMaxCount, successMaxCount)
|
||||
}
|
||||
}
|
||||
@@ -85,6 +85,9 @@ func InitOptionMap() {
|
||||
common.OptionMap["QuotaForInvitee"] = strconv.Itoa(common.QuotaForInvitee)
|
||||
common.OptionMap["QuotaRemindThreshold"] = strconv.Itoa(common.QuotaRemindThreshold)
|
||||
common.OptionMap["ShouldPreConsumedQuota"] = strconv.Itoa(common.PreConsumedQuota)
|
||||
common.OptionMap["ModelRequestRateLimitCount"] = strconv.Itoa(setting.ModelRequestRateLimitCount)
|
||||
common.OptionMap["ModelRequestRateLimitDurationMinutes"] = strconv.Itoa(setting.ModelRequestRateLimitDurationMinutes)
|
||||
common.OptionMap["ModelRequestRateLimitSuccessCount"] = strconv.Itoa(setting.ModelRequestRateLimitSuccessCount)
|
||||
common.OptionMap["ModelRatio"] = common.ModelRatio2JSONString()
|
||||
common.OptionMap["ModelPrice"] = common.ModelPrice2JSONString()
|
||||
common.OptionMap["GroupRatio"] = setting.GroupRatio2JSONString()
|
||||
@@ -105,6 +108,7 @@ func InitOptionMap() {
|
||||
common.OptionMap["MjActionCheckSuccessEnabled"] = strconv.FormatBool(setting.MjActionCheckSuccessEnabled)
|
||||
common.OptionMap["CheckSensitiveEnabled"] = strconv.FormatBool(setting.CheckSensitiveEnabled)
|
||||
common.OptionMap["DemoSiteEnabled"] = strconv.FormatBool(setting.DemoSiteEnabled)
|
||||
common.OptionMap["ModelRequestRateLimitEnabled"] = strconv.FormatBool(setting.ModelRequestRateLimitEnabled)
|
||||
common.OptionMap["CheckSensitiveOnPromptEnabled"] = strconv.FormatBool(setting.CheckSensitiveOnPromptEnabled)
|
||||
//common.OptionMap["CheckSensitiveOnCompletionEnabled"] = strconv.FormatBool(constant.CheckSensitiveOnCompletionEnabled)
|
||||
common.OptionMap["StopOnSensitiveEnabled"] = strconv.FormatBool(setting.StopOnSensitiveEnabled)
|
||||
@@ -226,6 +230,9 @@ func updateOptionMap(key string, value string) (err error) {
|
||||
setting.DemoSiteEnabled = boolValue
|
||||
case "CheckSensitiveOnPromptEnabled":
|
||||
setting.CheckSensitiveOnPromptEnabled = boolValue
|
||||
case "ModelRequestRateLimitEnabled":
|
||||
setting.ModelRequestRateLimitEnabled = boolValue
|
||||
|
||||
//case "CheckSensitiveOnCompletionEnabled":
|
||||
// constant.CheckSensitiveOnCompletionEnabled = boolValue
|
||||
case "StopOnSensitiveEnabled":
|
||||
@@ -308,6 +315,12 @@ func updateOptionMap(key string, value string) (err error) {
|
||||
common.QuotaRemindThreshold, _ = strconv.Atoi(value)
|
||||
case "ShouldPreConsumedQuota":
|
||||
common.PreConsumedQuota, _ = strconv.Atoi(value)
|
||||
case "ModelRequestRateLimitCount":
|
||||
setting.ModelRequestRateLimitCount, _ = strconv.Atoi(value)
|
||||
case "ModelRequestRateLimitDurationMinutes":
|
||||
setting.ModelRequestRateLimitDurationMinutes, _ = strconv.Atoi(value)
|
||||
case "ModelRequestRateLimitSuccessCount":
|
||||
setting.ModelRequestRateLimitSuccessCount, _ = strconv.Atoi(value)
|
||||
case "RetryTimes":
|
||||
common.RetryTimes, _ = strconv.Atoi(value)
|
||||
case "DataExportInterval":
|
||||
|
||||
@@ -9,7 +9,8 @@ var awsModelIDMap = map[string]string{
|
||||
"claude-3-haiku-20240307": "anthropic.claude-3-haiku-20240307-v1:0",
|
||||
"claude-3-5-sonnet-20240620": "anthropic.claude-3-5-sonnet-20240620-v1:0",
|
||||
"claude-3-5-sonnet-20241022": "anthropic.claude-3-5-sonnet-20241022-v2:0",
|
||||
"claude-3-5-haiku-20241022": "anthropic.claude-3-5-haiku-20241022-v1:0",
|
||||
"claude-3-5-haiku-20241022": "anthropic.claude-3-5-haiku-20241022-v1:0",
|
||||
"claude-3-7-sonnet-20250219": "anthropic.claude-3-7-sonnet-20250219-v1:0",
|
||||
}
|
||||
|
||||
var ChannelName = "aws"
|
||||
|
||||
@@ -16,6 +16,7 @@ type AwsClaudeRequest struct {
|
||||
StopSequences []string `json:"stop_sequences,omitempty"`
|
||||
Tools []claude.Tool `json:"tools,omitempty"`
|
||||
ToolChoice any `json:"tool_choice,omitempty"`
|
||||
Thinking *claude.Thinking `json:"thinking,omitempty"`
|
||||
}
|
||||
|
||||
func copyRequest(req *claude.ClaudeRequest) *AwsClaudeRequest {
|
||||
@@ -30,5 +31,6 @@ func copyRequest(req *claude.ClaudeRequest) *AwsClaudeRequest {
|
||||
StopSequences: req.StopSequences,
|
||||
Tools: req.Tools,
|
||||
ToolChoice: req.ToolChoice,
|
||||
Thinking: req.Thinking,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,8 @@ var ModelList = []string{
|
||||
"claude-3-5-haiku-20241022",
|
||||
"claude-3-5-sonnet-20240620",
|
||||
"claude-3-5-sonnet-20241022",
|
||||
"claude-3-7-sonnet-20250219",
|
||||
"claude-3-7-sonnet-20250219-thinking",
|
||||
}
|
||||
|
||||
var ChannelName = "claude"
|
||||
|
||||
@@ -11,6 +11,9 @@ type ClaudeMediaMessage struct {
|
||||
Usage *ClaudeUsage `json:"usage,omitempty"`
|
||||
StopReason *string `json:"stop_reason,omitempty"`
|
||||
PartialJson string `json:"partial_json,omitempty"`
|
||||
Thinking string `json:"thinking,omitempty"`
|
||||
Signature string `json:"signature,omitempty"`
|
||||
Delta string `json:"delta,omitempty"`
|
||||
// tool_calls
|
||||
Id string `json:"id,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
@@ -54,9 +57,15 @@ type ClaudeRequest struct {
|
||||
TopP float64 `json:"top_p,omitempty"`
|
||||
TopK int `json:"top_k,omitempty"`
|
||||
//ClaudeMetadata `json:"metadata,omitempty"`
|
||||
Stream bool `json:"stream,omitempty"`
|
||||
Tools []Tool `json:"tools,omitempty"`
|
||||
ToolChoice any `json:"tool_choice,omitempty"`
|
||||
Stream bool `json:"stream,omitempty"`
|
||||
Tools []Tool `json:"tools,omitempty"`
|
||||
ToolChoice any `json:"tool_choice,omitempty"`
|
||||
Thinking *Thinking `json:"thinking,omitempty"`
|
||||
}
|
||||
|
||||
type Thinking struct {
|
||||
Type string `json:"type"`
|
||||
BudgetTokens int `json:"budget_tokens"`
|
||||
}
|
||||
|
||||
type ClaudeError struct {
|
||||
|
||||
@@ -92,6 +92,25 @@ func RequestOpenAI2ClaudeMessage(textRequest dto.GeneralOpenAIRequest) (*ClaudeR
|
||||
Stream: textRequest.Stream,
|
||||
Tools: claudeTools,
|
||||
}
|
||||
|
||||
if strings.HasSuffix(textRequest.Model, "-thinking") {
|
||||
if claudeRequest.MaxTokens == 0 {
|
||||
claudeRequest.MaxTokens = 8192
|
||||
}
|
||||
|
||||
// 因为BudgetTokens 必须大于1024
|
||||
if claudeRequest.MaxTokens < 1280 {
|
||||
claudeRequest.MaxTokens = 1280
|
||||
}
|
||||
|
||||
// BudgetTokens 为 max_tokens 的 80%
|
||||
claudeRequest.Thinking = &Thinking{
|
||||
Type: "enabled",
|
||||
BudgetTokens: int(float64(claudeRequest.MaxTokens) * 0.8),
|
||||
}
|
||||
claudeRequest.Model = strings.TrimSuffix(textRequest.Model, "-thinking")
|
||||
}
|
||||
|
||||
if claudeRequest.MaxTokens == 0 {
|
||||
claudeRequest.MaxTokens = 4096
|
||||
}
|
||||
@@ -308,12 +327,20 @@ func StreamResponseClaude2OpenAI(reqMode int, claudeResponse *ClaudeResponse) (*
|
||||
if claudeResponse.Delta != nil {
|
||||
choice.Index = claudeResponse.Index
|
||||
choice.Delta.SetContentString(claudeResponse.Delta.Text)
|
||||
if claudeResponse.Delta.Type == "input_json_delta" {
|
||||
switch claudeResponse.Delta.Type {
|
||||
case "input_json_delta":
|
||||
tools = append(tools, dto.ToolCall{
|
||||
Function: dto.FunctionCall{
|
||||
Arguments: claudeResponse.Delta.PartialJson,
|
||||
},
|
||||
})
|
||||
case "signature_delta":
|
||||
// 加密的不处理
|
||||
signatureContent := "\n"
|
||||
choice.Delta.ReasoningContent = &signatureContent
|
||||
case "thinking_delta":
|
||||
thinkingContent := claudeResponse.Delta.Thinking
|
||||
choice.Delta.ReasoningContent = &thinkingContent
|
||||
}
|
||||
}
|
||||
} else if claudeResponse.Type == "message_delta" {
|
||||
@@ -352,6 +379,8 @@ func ResponseClaude2OpenAI(reqMode int, claudeResponse *ClaudeResponse) *dto.Ope
|
||||
responseText = claudeResponse.Content[0].Text
|
||||
}
|
||||
tools := make([]dto.ToolCall, 0)
|
||||
thinkingContent := ""
|
||||
|
||||
if reqMode == RequestModeCompletion {
|
||||
content, _ := json.Marshal(strings.TrimPrefix(claudeResponse.Completion, " "))
|
||||
choice := dto.OpenAITextResponseChoice{
|
||||
@@ -367,7 +396,8 @@ func ResponseClaude2OpenAI(reqMode int, claudeResponse *ClaudeResponse) *dto.Ope
|
||||
} else {
|
||||
fullTextResponse.Id = claudeResponse.Id
|
||||
for _, message := range claudeResponse.Content {
|
||||
if message.Type == "tool_use" {
|
||||
switch message.Type {
|
||||
case "tool_use":
|
||||
args, _ := json.Marshal(message.Input)
|
||||
tools = append(tools, dto.ToolCall{
|
||||
ID: message.Id,
|
||||
@@ -377,6 +407,11 @@ func ResponseClaude2OpenAI(reqMode int, claudeResponse *ClaudeResponse) *dto.Ope
|
||||
Arguments: string(args),
|
||||
},
|
||||
})
|
||||
case "thinking":
|
||||
// 加密的不管, 只输出明文的推理过程
|
||||
thinkingContent = message.Thinking
|
||||
case "text":
|
||||
responseText = message.Text
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -391,6 +426,7 @@ func ResponseClaude2OpenAI(reqMode int, claudeResponse *ClaudeResponse) *dto.Ope
|
||||
if len(tools) > 0 {
|
||||
choice.Message.SetToolCalls(tools)
|
||||
}
|
||||
choice.Message.ReasoningContent = thinkingContent
|
||||
fullTextResponse.Model = claudeResponse.Model
|
||||
choices = append(choices, choice)
|
||||
fullTextResponse.Choices = choices
|
||||
|
||||
@@ -9,9 +9,18 @@ import (
|
||||
"one-api/dto"
|
||||
"one-api/relay/channel"
|
||||
relaycommon "one-api/relay/common"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
BotTypeChatFlow = 1 // chatflow default
|
||||
BotTypeAgent = 2
|
||||
BotTypeWorkFlow = 3
|
||||
BotTypeCompletion = 4
|
||||
)
|
||||
|
||||
type Adaptor struct {
|
||||
BotType int
|
||||
}
|
||||
|
||||
func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
|
||||
@@ -25,10 +34,28 @@ func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInf
|
||||
}
|
||||
|
||||
func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
|
||||
if strings.HasPrefix(info.UpstreamModelName, "agent") {
|
||||
a.BotType = BotTypeAgent
|
||||
} else if strings.HasPrefix(info.UpstreamModelName, "workflow") {
|
||||
a.BotType = BotTypeWorkFlow
|
||||
} else if strings.HasPrefix(info.UpstreamModelName, "chat") {
|
||||
a.BotType = BotTypeCompletion
|
||||
} else {
|
||||
a.BotType = BotTypeChatFlow
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
|
||||
return fmt.Sprintf("%s/v1/chat-messages", info.BaseUrl), nil
|
||||
switch a.BotType {
|
||||
case BotTypeWorkFlow:
|
||||
return fmt.Sprintf("%s/v1/workflows/run", info.BaseUrl), nil
|
||||
case BotTypeCompletion:
|
||||
return fmt.Sprintf("%s/v1/completion-messages", info.BaseUrl), nil
|
||||
case BotTypeAgent:
|
||||
fallthrough
|
||||
default:
|
||||
return fmt.Sprintf("%s/v1/chat-messages", info.BaseUrl), nil
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
|
||||
@@ -53,7 +80,6 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
|
||||
|
||||
func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
|
||||
return channel.DoApiRequest(a, c, info, requestBody)
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ type OllamaRequest struct {
|
||||
Topp float64 `json:"top_p,omitempty"`
|
||||
TopK int `json:"top_k,omitempty"`
|
||||
Stop any `json:"stop,omitempty"`
|
||||
MaxTokens uint `json:"max_tokens,omitempty"`
|
||||
Tools []dto.ToolCall `json:"tools,omitempty"`
|
||||
ResponseFormat any `json:"response_format,omitempty"`
|
||||
FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
|
||||
|
||||
@@ -58,6 +58,7 @@ func requestOpenAI2Ollama(request dto.GeneralOpenAIRequest) (*OllamaRequest, err
|
||||
TopK: request.TopK,
|
||||
Stop: Stop,
|
||||
Tools: request.Tools,
|
||||
MaxTokens: request.MaxTokens,
|
||||
ResponseFormat: request.ResponseFormat,
|
||||
FrequencyPenalty: request.FrequencyPenalty,
|
||||
PresencePenalty: request.PresencePenalty,
|
||||
|
||||
@@ -24,6 +24,7 @@ func SetRelayRouter(router *gin.Engine) {
|
||||
}
|
||||
relayV1Router := router.Group("/v1")
|
||||
relayV1Router.Use(middleware.TokenAuth())
|
||||
relayV1Router.Use(middleware.ModelRequestRateLimit())
|
||||
{
|
||||
// WebSocket 路由
|
||||
wsRouter := relayV1Router.Group("")
|
||||
|
||||
+21
-23
@@ -78,6 +78,9 @@ func getTokenEncoder(model string) *tiktoken.Tiktoken {
|
||||
}
|
||||
|
||||
func getTokenNum(tokenEncoder *tiktoken.Tiktoken, text string) int {
|
||||
if text == "" {
|
||||
return 0
|
||||
}
|
||||
return len(tokenEncoder.Encode(text, nil, nil))
|
||||
}
|
||||
|
||||
@@ -282,30 +285,25 @@ func CountTokenMessages(info *relaycommon.RelayInfo, messages []dto.Message, mod
|
||||
tokenNum += tokensPerMessage
|
||||
tokenNum += getTokenNum(tokenEncoder, message.Role)
|
||||
if len(message.Content) > 0 {
|
||||
if message.IsStringContent() {
|
||||
stringContent := message.StringContent()
|
||||
tokenNum += getTokenNum(tokenEncoder, stringContent)
|
||||
if message.Name != nil {
|
||||
tokenNum += tokensPerName
|
||||
tokenNum += getTokenNum(tokenEncoder, *message.Name)
|
||||
}
|
||||
} else {
|
||||
arrayContent := message.ParseContent()
|
||||
for _, m := range arrayContent {
|
||||
if m.Type == dto.ContentTypeImageURL {
|
||||
imageUrl := m.ImageUrl.(dto.MessageImageUrl)
|
||||
imageTokenNum, err := getImageToken(info, &imageUrl, model, stream)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
tokenNum += imageTokenNum
|
||||
log.Printf("image token num: %d", imageTokenNum)
|
||||
} else if m.Type == dto.ContentTypeInputAudio {
|
||||
// TODO: 音频token数量计算
|
||||
tokenNum += 100
|
||||
} else {
|
||||
tokenNum += getTokenNum(tokenEncoder, m.Text)
|
||||
if message.Name != nil {
|
||||
tokenNum += tokensPerName
|
||||
tokenNum += getTokenNum(tokenEncoder, *message.Name)
|
||||
}
|
||||
arrayContent := message.ParseContent()
|
||||
for _, m := range arrayContent {
|
||||
if m.Type == dto.ContentTypeImageURL {
|
||||
imageUrl := m.ImageUrl.(dto.MessageImageUrl)
|
||||
imageTokenNum, err := getImageToken(info, &imageUrl, model, stream)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
tokenNum += imageTokenNum
|
||||
log.Printf("image token num: %d", imageTokenNum)
|
||||
} else if m.Type == dto.ContentTypeInputAudio {
|
||||
// TODO: 音频token数量计算
|
||||
tokenNum += 100
|
||||
} else {
|
||||
tokenNum += getTokenNum(tokenEncoder, m.Text)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
package setting
|
||||
|
||||
var ModelRequestRateLimitEnabled = false
|
||||
var ModelRequestRateLimitDurationMinutes = 1
|
||||
var ModelRequestRateLimitCount = 0
|
||||
var ModelRequestRateLimitSuccessCount = 1000
|
||||
@@ -0,0 +1,80 @@
|
||||
import React, { useEffect, useState } from 'react';
|
||||
import { Card, Spin, Tabs } from '@douyinfe/semi-ui';
|
||||
import SettingsGeneral from '../pages/Setting/Operation/SettingsGeneral.js';
|
||||
import SettingsDrawing from '../pages/Setting/Operation/SettingsDrawing.js';
|
||||
import SettingsSensitiveWords from '../pages/Setting/Operation/SettingsSensitiveWords.js';
|
||||
import SettingsLog from '../pages/Setting/Operation/SettingsLog.js';
|
||||
import SettingsDataDashboard from '../pages/Setting/Operation/SettingsDataDashboard.js';
|
||||
import SettingsMonitoring from '../pages/Setting/Operation/SettingsMonitoring.js';
|
||||
import SettingsCreditLimit from '../pages/Setting/Operation/SettingsCreditLimit.js';
|
||||
import SettingsMagnification from '../pages/Setting/Operation/SettingsMagnification.js';
|
||||
import ModelSettingsVisualEditor from '../pages/Setting/Operation/ModelSettingsVisualEditor.js';
|
||||
import GroupRatioSettings from '../pages/Setting/Operation/GroupRatioSettings.js';
|
||||
import ModelRatioSettings from '../pages/Setting/Operation/ModelRatioSettings.js';
|
||||
|
||||
|
||||
import { API, showError, showSuccess } from '../helpers';
|
||||
import SettingsChats from '../pages/Setting/Operation/SettingsChats.js';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import RequestRateLimit from '../pages/Setting/RateLimit/SettingsRequestRateLimit.js';
|
||||
|
||||
const RateLimitSetting = () => {
|
||||
const { t } = useTranslation();
|
||||
let [inputs, setInputs] = useState({
|
||||
ModelRequestRateLimitEnabled: false,
|
||||
ModelRequestRateLimitCount: 0,
|
||||
ModelRequestRateLimitSuccessCount: 1000,
|
||||
ModelRequestRateLimitDurationMinutes: 1,
|
||||
});
|
||||
|
||||
let [loading, setLoading] = useState(false);
|
||||
|
||||
const getOptions = async () => {
|
||||
const res = await API.get('/api/option/');
|
||||
const { success, message, data } = res.data;
|
||||
if (success) {
|
||||
let newInputs = {};
|
||||
data.forEach((item) => {
|
||||
if (
|
||||
item.key.endsWith('Enabled')
|
||||
) {
|
||||
newInputs[item.key] = item.value === 'true' ? true : false;
|
||||
} else {
|
||||
newInputs[item.key] = item.value;
|
||||
}
|
||||
});
|
||||
|
||||
setInputs(newInputs);
|
||||
} else {
|
||||
showError(message);
|
||||
}
|
||||
};
|
||||
async function onRefresh() {
|
||||
try {
|
||||
setLoading(true);
|
||||
await getOptions();
|
||||
// showSuccess('刷新成功');
|
||||
} catch (error) {
|
||||
showError('刷新失败');
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
onRefresh();
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<>
|
||||
<Spin spinning={loading} size='large'>
|
||||
{/* AI请求速率限制 */}
|
||||
<Card style={{ marginTop: '10px' }}>
|
||||
<RequestRateLimit options={inputs} refresh={onRefresh} />
|
||||
</Card>
|
||||
</Spin>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default RateLimitSetting;
|
||||
@@ -856,7 +856,7 @@
|
||||
"IP黑名单": "IP blacklist",
|
||||
"不允许的IP,一行一个": "IPs not allowed, one per line",
|
||||
"请选择该渠道所支持的模型": "Please select the model supported by this channel",
|
||||
"次": "Second-rate",
|
||||
"次": "times",
|
||||
"达到限速报错内容": "Error content when the speed limit is reached",
|
||||
"不填则使用默认报错": "If not filled in, the default error will be reported.",
|
||||
"Midjouney 设置 (可选)": "Midjouney settings (optional)",
|
||||
@@ -1271,5 +1271,15 @@
|
||||
"留空则使用账号绑定的邮箱": "If left blank, the email address bound to the account will be used",
|
||||
"代理站地址": "Base URL",
|
||||
"对于官方渠道,new-api已经内置地址,除非是第三方代理站点或者Azure的特殊接入地址,否则不需要填写": "For official channels, the new-api has a built-in address. Unless it is a third-party proxy site or a special Azure access address, there is no need to fill it in",
|
||||
"渠道额外设置": "Channel extra settings"
|
||||
}
|
||||
"渠道额外设置": "Channel extra settings",
|
||||
"模型请求速率限制": "Model request rate limit",
|
||||
"启用用户模型请求速率限制(可能会影响高并发性能)": "Enable user model request rate limit (may affect high concurrency performance)",
|
||||
"限制周期": "Limit period",
|
||||
"用户每周期最多请求次数": "User max request times per period",
|
||||
"用户每周期最多请求完成次数": "User max successful request times per period",
|
||||
"包括失败请求的次数,0代表不限制": "Including failed request times, 0 means no limit",
|
||||
"频率限制的周期(分钟)": "Rate limit period (minutes)",
|
||||
"只包括请求成功的次数": "Only include successful request times",
|
||||
"保存模型速率限制": "Save model rate limit settings",
|
||||
"速率限制设置": "Rate limit settings"
|
||||
}
|
||||
|
||||
@@ -0,0 +1,159 @@
|
||||
import React, { useEffect, useState, useRef } from 'react';
|
||||
import { Button, Col, Form, Row, Spin } from '@douyinfe/semi-ui';
|
||||
import {
|
||||
compareObjects,
|
||||
API,
|
||||
showError,
|
||||
showSuccess,
|
||||
showWarning,
|
||||
} from '../../../helpers';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
export default function RequestRateLimit(props) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [inputs, setInputs] = useState({
|
||||
ModelRequestRateLimitEnabled: false,
|
||||
ModelRequestRateLimitCount: -1,
|
||||
ModelRequestRateLimitSuccessCount: 1000,
|
||||
ModelRequestRateLimitDurationMinutes: 1
|
||||
});
|
||||
const refForm = useRef();
|
||||
const [inputsRow, setInputsRow] = useState(inputs);
|
||||
|
||||
function onSubmit() {
|
||||
const updateArray = compareObjects(inputs, inputsRow);
|
||||
if (!updateArray.length) return showWarning(t('你似乎并没有修改什么'));
|
||||
const requestQueue = updateArray.map((item) => {
|
||||
let value = '';
|
||||
if (typeof inputs[item.key] === 'boolean') {
|
||||
value = String(inputs[item.key]);
|
||||
} else {
|
||||
value = inputs[item.key];
|
||||
}
|
||||
return API.put('/api/option/', {
|
||||
key: item.key,
|
||||
value,
|
||||
});
|
||||
});
|
||||
setLoading(true);
|
||||
Promise.all(requestQueue)
|
||||
.then((res) => {
|
||||
if (requestQueue.length === 1) {
|
||||
if (res.includes(undefined)) return;
|
||||
} else if (requestQueue.length > 1) {
|
||||
if (res.includes(undefined)) return showError(t('部分保存失败,请重试'));
|
||||
}
|
||||
showSuccess(t('保存成功'));
|
||||
props.refresh();
|
||||
})
|
||||
.catch(() => {
|
||||
showError(t('保存失败,请重试'));
|
||||
})
|
||||
.finally(() => {
|
||||
setLoading(false);
|
||||
});
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
const currentInputs = {};
|
||||
for (let key in props.options) {
|
||||
if (Object.keys(inputs).includes(key)) {
|
||||
currentInputs[key] = props.options[key];
|
||||
}
|
||||
}
|
||||
setInputs(currentInputs);
|
||||
setInputsRow(structuredClone(currentInputs));
|
||||
refForm.current.setValues(currentInputs);
|
||||
}, [props.options]);
|
||||
|
||||
return (
|
||||
<>
|
||||
<Spin spinning={loading}>
|
||||
<Form
|
||||
values={inputs}
|
||||
getFormApi={(formAPI) => (refForm.current = formAPI)}
|
||||
style={{ marginBottom: 15 }}
|
||||
>
|
||||
<Form.Section text={t('模型请求速率限制')}>
|
||||
<Row gutter={16}>
|
||||
<Col span={8}>
|
||||
<Form.Switch
|
||||
field={'ModelRequestRateLimitEnabled'}
|
||||
label={t('启用用户模型请求速率限制(可能会影响高并发性能)')}
|
||||
size='default'
|
||||
checkedText='|'
|
||||
uncheckedText='〇'
|
||||
onChange={(value) => {
|
||||
setInputs({
|
||||
...inputs,
|
||||
ModelRequestRateLimitEnabled: value,
|
||||
});
|
||||
}}
|
||||
/>
|
||||
</Col>
|
||||
</Row>
|
||||
<Row>
|
||||
<Col span={8}>
|
||||
<Form.InputNumber
|
||||
label={t('限制周期')}
|
||||
step={1}
|
||||
min={0}
|
||||
suffix={t('分钟')}
|
||||
extraText={t('频率限制的周期(分钟)')}
|
||||
field={'ModelRequestRateLimitDurationMinutes'}
|
||||
onChange={(value) =>
|
||||
setInputs({
|
||||
...inputs,
|
||||
ModelRequestRateLimitDurationMinutes: String(value),
|
||||
})
|
||||
}
|
||||
/>
|
||||
</Col>
|
||||
</Row>
|
||||
<Row>
|
||||
<Col span={8}>
|
||||
<Form.InputNumber
|
||||
label={t('用户每周期最多请求次数')}
|
||||
step={1}
|
||||
min={0}
|
||||
suffix={t('次')}
|
||||
extraText={t('包括失败请求的次数,0代表不限制')}
|
||||
field={'ModelRequestRateLimitCount'}
|
||||
onChange={(value) =>
|
||||
setInputs({
|
||||
...inputs,
|
||||
ModelRequestRateLimitCount: String(value),
|
||||
})
|
||||
}
|
||||
/>
|
||||
</Col>
|
||||
<Col span={8}>
|
||||
<Form.InputNumber
|
||||
label={t('用户每周期最多请求完成次数')}
|
||||
step={1}
|
||||
min={1}
|
||||
suffix={t('次')}
|
||||
extraText={t('只包括请求成功的次数')}
|
||||
field={'ModelRequestRateLimitSuccessCount'}
|
||||
onChange={(value) =>
|
||||
setInputs({
|
||||
...inputs,
|
||||
ModelRequestRateLimitSuccessCount: String(value),
|
||||
})
|
||||
}
|
||||
/>
|
||||
</Col>
|
||||
</Row>
|
||||
<Row>
|
||||
<Button size='default' onClick={onSubmit}>
|
||||
{t('保存模型速率限制')}
|
||||
</Button>
|
||||
</Row>
|
||||
</Form.Section>
|
||||
</Form>
|
||||
</Spin>
|
||||
</>
|
||||
);
|
||||
}
|
||||
@@ -8,6 +8,7 @@ import { isRoot } from '../../helpers';
|
||||
import OtherSetting from '../../components/OtherSetting';
|
||||
import PersonalSetting from '../../components/PersonalSetting';
|
||||
import OperationSetting from '../../components/OperationSetting';
|
||||
import RateLimitSetting from '../../components/RateLimitSetting.js';
|
||||
|
||||
const Setting = () => {
|
||||
const { t } = useTranslation();
|
||||
@@ -28,6 +29,11 @@ const Setting = () => {
|
||||
content: <OperationSetting />,
|
||||
itemKey: 'operation',
|
||||
});
|
||||
panes.push({
|
||||
tab: t('速率限制设置'),
|
||||
content: <RateLimitSetting />,
|
||||
itemKey: 'ratelimit',
|
||||
});
|
||||
panes.push({
|
||||
tab: t('系统设置'),
|
||||
content: <SystemSetting />,
|
||||
|
||||
Reference in New Issue
Block a user