Compare commits

...

20 Commits

Author SHA1 Message Date
CaIon 2a1ed60738 Merge branch 'alpha' 2025-06-17 14:49:13 +08:00
Calcium-Ion a13a63549b Merge pull request #1243 from cjm0810151/main
fix(audio): :bugs: fix webm audio strconv.ParseFloat: parsing "N/A"
2025-06-17 14:48:17 +08:00
Calcium-Ion 24ae1fa55e Merge pull request #1240 from RedwindA/fix/redis
Fix: optimize Redis expiration handling and refactor cache duration retrieval
2025-06-17 14:47:00 +08:00
chenjm ab5dfd8d97 fix(audio): :bugs: fix webm audio strconv.ParseFloat: parsing "N/A" 2025-06-17 10:04:36 +08:00
chenjm 99dacb1d4b fix(audio): :bugs: fix webm audio strconv.ParseFloat: parsing "N/A" 2025-06-17 09:21:56 +08:00
RedwindA 09ffc364f2 🔧 refactor(redis): replace direct constant usage with RedisKeyCacheSeconds function for cache duration 2025-06-17 03:24:39 +08:00
RedwindA e34d3c2160 🔧 fix(redis): only set expiration if greater than 0 in RedisHSetObj 2025-06-17 02:37:19 +08:00
CaIon 28a0eb2a6d Merge remote-tracking branch 'origin/alpha' into alpha 2025-06-17 00:09:38 +08:00
CaIon af103dcc7f 🧹 chore(relay): remove unused import in relay-palm.go 2025-06-17 00:09:26 +08:00
Calcium-Ion 71fe637d2a Merge pull request #1231 from RedwindA/feat/gemini-budget-in-name
feat(Gemini): implement thinking budget control in model name
2025-06-17 00:03:53 +08:00
Calcium-Ion 293decb96e Merge pull request #1208 from feitianbubu/pr/fix-hot-reload-sync-options
fix: enabled hot reload SyncOptions
2025-06-16 23:03:29 +08:00
CaIon 288f279379 🔧 refactor(relay): replace UUID generation with helper function for response IDs 2025-06-16 21:02:27 +08:00
Apple\Apple abceadcc6d Merge branch 'main' into alpha 2025-06-16 20:06:40 +08:00
Apple\Apple 35a08da791 🐛 fix(console-setting): ensure announcements are returned in newest-first order
Summary
• Added stable, descending sort to `GetAnnouncements()` so that the API always returns the latest announcements first.
• Introduced helper `getPublishTime()` to safely parse `publishDate` (RFC 3339) and fall back to zero value on failure.
• Switched to `sort.SliceStable` for deterministic ordering when timestamps are identical.
• Imported the standard `sort` package and removed redundant, duplicate date parsing.

Impact
Front-end no longer needs to perform client-side sorting; the latest announcement is guaranteed to appear at the top on all platforms and clients.
2025-06-16 20:05:54 +08:00
RedwindA 9209355f7d refactor: replace inline closure with a helper function 2025-06-16 19:41:42 +08:00
Calcium-Ion bd82e7c341 Merge pull request #1205 from a37836323/fix-azure-responses-api
修复Azure渠道对responses API的兼容性支持 - 为Azure渠道添加对responses API的特殊处理 - 兼容微软新…
2025-06-16 19:17:21 +08:00
RedwindA 45e9607995 update i18n 2025-06-15 23:40:58 +08:00
RedwindA 1e1a6aae55 feat: implement thinking budget control in model name 2025-06-15 23:20:41 +08:00
skynono 8c20aef5d1 fix: enabled hot reload SyncOptions 2025-06-12 17:17:07 +08:00
a37836323 a126015940 修复Azure渠道对responses API的兼容性支持 - 为Azure渠道添加对responses API的特殊处理 - 兼容微软新的API格式,使用preview版本的api-version - 修复了Azure渠道无法正确处理responses请求的问题 2025-06-11 22:11:47 +08:00
16 changed files with 150 additions and 32 deletions
+5 -1
View File
@@ -141,7 +141,11 @@ func RedisHSetObj(key string, obj interface{}, expiration time.Duration) error {
txn := RDB.TxPipeline()
txn.HSet(ctx, key, data)
txn.Expire(ctx, key, expiration)
// 只有在 expiration 大于 0 时才设置过期时间
if expiration > 0 {
txn.Expire(ctx, key, expiration)
}
_, err := txn.Exec(ctx)
if err != nil {
+27 -2
View File
@@ -249,13 +249,38 @@ func SaveTmpFile(filename string, data io.Reader) (string, error) {
}
// GetAudioDuration returns the duration of an audio file in seconds.
func GetAudioDuration(ctx context.Context, filename string) (float64, error) {
func GetAudioDuration(ctx context.Context, filename string, ext string) (float64, error) {
// ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {{input}}
c := exec.CommandContext(ctx, "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", filename)
output, err := c.Output()
if err != nil {
return 0, errors.Wrap(err, "failed to get audio duration")
}
durationStr := string(bytes.TrimSpace(output))
if durationStr == "N/A" {
// Create a temporary output file name
tmpFp, err := os.CreateTemp("", "audio-*"+ext)
if err != nil {
return 0, errors.Wrap(err, "failed to create temporary file")
}
tmpName := tmpFp.Name()
// Close immediately so ffmpeg can open the file on Windows.
_ = tmpFp.Close()
defer os.Remove(tmpName)
return strconv.ParseFloat(string(bytes.TrimSpace(output)), 64)
// ffmpeg -y -i filename -vcodec copy -acodec copy <tmpName>
ffmpegCmd := exec.CommandContext(ctx, "ffmpeg", "-y", "-i", filename, "-vcodec", "copy", "-acodec", "copy", tmpName)
if err := ffmpegCmd.Run(); err != nil {
return 0, errors.Wrap(err, "failed to run ffmpeg")
}
// Recalculate the duration of the new file
c = exec.CommandContext(ctx, "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", tmpName)
output, err := c.Output()
if err != nil {
return 0, errors.Wrap(err, "failed to get audio duration after ffmpeg")
}
durationStr = string(bytes.TrimSpace(output))
}
return strconv.ParseFloat(durationStr, 64)
}
+4 -6
View File
@@ -2,12 +2,10 @@ package constant
import "one-api/common"
var (
TokenCacheSeconds = common.SyncFrequency
UserId2GroupCacheSeconds = common.SyncFrequency
UserId2QuotaCacheSeconds = common.SyncFrequency
UserId2StatusCacheSeconds = common.SyncFrequency
)
// 使用函数来避免初始化顺序带来的赋值问题
func RedisKeyCacheSeconds() int {
return common.SyncFrequency
}
// Cache keys
const (
+3 -1
View File
@@ -105,10 +105,12 @@ func main() {
model.InitChannelCache()
}()
go model.SyncOptions(common.SyncFrequency)
go model.SyncChannelCache(common.SyncFrequency)
}
// 热更新配置
go model.SyncOptions(common.SyncFrequency)
// 数据看板
go model.UpdateQuotaData()
+1 -1
View File
@@ -10,7 +10,7 @@ import (
func cacheSetToken(token Token) error {
key := common.GenerateHMAC(token.Key)
token.Clean()
err := common.RedisHSetObj(fmt.Sprintf("token:%s", key), &token, time.Duration(constant.TokenCacheSeconds)*time.Second)
err := common.RedisHSetObj(fmt.Sprintf("token:%s", key), &token, time.Duration(constant.RedisKeyCacheSeconds())*time.Second)
if err != nil {
return err
}
+1 -1
View File
@@ -70,7 +70,7 @@ func updateUserCache(user User) error {
return common.RedisHSetObj(
getUserCacheKey(user.Id),
user.ToBaseUser(),
time.Duration(constant.UserId2QuotaCacheSeconds)*time.Second,
time.Duration(constant.RedisKeyCacheSeconds())*time.Second,
)
}
+1 -2
View File
@@ -3,7 +3,6 @@ package cohere
import (
"bufio"
"encoding/json"
"fmt"
"github.com/gin-gonic/gin"
"io"
"net/http"
@@ -78,7 +77,7 @@ func stopReasonCohere2OpenAI(reason string) string {
}
func cohereStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
responseId := fmt.Sprintf("chatcmpl-%s", common.GetUUID())
responseId := helper.GetResponseID(c)
createdTime := common.GetTimestamp()
usage := &dto.Usage{}
responseText := ""
+5 -2
View File
@@ -72,8 +72,11 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
// suffix -thinking and -nothinking
if strings.HasSuffix(info.OriginModelName, "-thinking") {
// 新增逻辑:处理 -thinking-<budget> 格式
if strings.Contains(info.OriginModelName, "-thinking-") {
parts := strings.Split(info.UpstreamModelName, "-thinking-")
info.UpstreamModelName = parts[0]
} else if strings.HasSuffix(info.OriginModelName, "-thinking") { // 旧的适配
info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-nothinking")
+46 -5
View File
@@ -12,6 +12,7 @@ import (
"one-api/relay/helper"
"one-api/service"
"one-api/setting/model_setting"
"strconv"
"strings"
"unicode/utf8"
@@ -36,6 +37,13 @@ var geminiSupportedMimeTypes = map[string]bool{
"video/flv": true,
}
// Gemini 允许的思考预算范围
const (
pro25MinBudget = 128
pro25MaxBudget = 32768
flash25MaxBudget = 24576
)
// Setting safety to the lowest possible values since Gemini is already powerless enough
func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon.RelayInfo) (*GeminiChatRequest, error) {
@@ -57,7 +65,40 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
}
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
if strings.HasSuffix(info.OriginModelName, "-thinking") {
// 新增逻辑:处理 -thinking-<budget> 格式
if strings.Contains(info.OriginModelName, "-thinking-") {
parts := strings.SplitN(info.OriginModelName, "-thinking-", 2)
if len(parts) == 2 && parts[1] != "" {
if budgetTokens, err := strconv.Atoi(parts[1]); err == nil {
// 从模型名称成功解析预算
isNew25Pro := strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") &&
!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-05-06") &&
!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-03-25")
if isNew25Pro {
// 新的2.5pro模型:ThinkingBudget范围为128-32768
if budgetTokens < pro25MinBudget {
budgetTokens = pro25MinBudget
} else if budgetTokens > pro25MaxBudget {
budgetTokens = pro25MaxBudget
}
} else {
// 其他模型:ThinkingBudget范围为0-24576
if budgetTokens < 0 {
budgetTokens = 0
} else if budgetTokens > flash25MaxBudget {
budgetTokens = flash25MaxBudget
}
}
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
ThinkingBudget: common.GetPointer(budgetTokens),
IncludeThoughts: true,
}
}
// 如果解析失败,则不设置ThinkingConfig,静默处理
}
} else if strings.HasSuffix(info.OriginModelName, "-thinking") { // 保留旧逻辑以兼容
// 硬编码不支持 ThinkingBudget 的旧模型
unsupportedModels := []string{
"gemini-2.5-pro-preview-05-06",
@@ -611,9 +652,9 @@ func getResponseToolCall(item *GeminiPart) *dto.ToolCallResponse {
}
}
func responseGeminiChat2OpenAI(response *GeminiChatResponse) *dto.OpenAITextResponse {
func responseGeminiChat2OpenAI(c *gin.Context, response *GeminiChatResponse) *dto.OpenAITextResponse {
fullTextResponse := dto.OpenAITextResponse{
Id: fmt.Sprintf("chatcmpl-%s", common.GetUUID()),
Id: helper.GetResponseID(c),
Object: "chat.completion",
Created: common.GetTimestamp(),
Choices: make([]dto.OpenAITextResponseChoice, 0, len(response.Candidates)),
@@ -754,7 +795,7 @@ func streamResponseGeminiChat2OpenAI(geminiResponse *GeminiChatResponse) (*dto.C
func GeminiChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
// responseText := ""
id := fmt.Sprintf("chatcmpl-%s", common.GetUUID())
id := helper.GetResponseID(c)
createAt := common.GetTimestamp()
var usage = &dto.Usage{}
var imageCount int
@@ -849,7 +890,7 @@ func GeminiChatHandler(c *gin.Context, resp *http.Response, info *relaycommon.Re
StatusCode: resp.StatusCode,
}, nil
}
fullTextResponse := responseGeminiChat2OpenAI(&geminiResponse)
fullTextResponse := responseGeminiChat2OpenAI(c, &geminiResponse)
fullTextResponse.Model = info.UpstreamModelName
usage := dto.Usage{
PromptTokens: geminiResponse.UsageMetadata.PromptTokenCount,
+7
View File
@@ -88,6 +88,13 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
requestURL := strings.Split(info.RequestURLPath, "?")[0]
requestURL = fmt.Sprintf("%s?api-version=%s", requestURL, apiVersion)
task := strings.TrimPrefix(requestURL, "/v1/")
// 特殊处理 responses API
if info.RelayMode == constant.RelayModeResponses {
requestURL = fmt.Sprintf("/openai/v1/responses?api-version=preview")
return relaycommon.GetFullRequestURL(info.BaseUrl, requestURL, info.ChannelType), nil
}
model_ := info.UpstreamModelName
// 2025年5月10日后创建的渠道不移除.
if info.ChannelCreateTime < constant2.AzureNoRemoveDotTime {
+5 -3
View File
@@ -8,6 +8,7 @@ import (
"math"
"mime/multipart"
"net/http"
"path/filepath"
"one-api/common"
"one-api/constant"
"one-api/dto"
@@ -345,13 +346,14 @@ func countAudioTokens(c *gin.Context) (int, error) {
if err = c.ShouldBind(&reqBody); err != nil {
return 0, errors.WithStack(err)
}
ext := filepath.Ext(reqBody.File.Filename) // 获取文件扩展名
reqFp, err := reqBody.File.Open()
if err != nil {
return 0, errors.WithStack(err)
}
defer reqFp.Close()
tmpFp, err := os.CreateTemp("", "audio-*")
tmpFp, err := os.CreateTemp("", "audio-*"+ext)
if err != nil {
return 0, errors.WithStack(err)
}
@@ -365,7 +367,7 @@ func countAudioTokens(c *gin.Context) (int, error) {
return 0, errors.WithStack(err)
}
duration, err := common.GetAudioDuration(c.Request.Context(), tmpFp.Name())
duration, err := common.GetAudioDuration(c.Request.Context(), tmpFp.Name(), ext)
if err != nil {
return 0, errors.WithStack(err)
}
+1 -2
View File
@@ -2,7 +2,6 @@ package palm
import (
"encoding/json"
"fmt"
"github.com/gin-gonic/gin"
"io"
"net/http"
@@ -73,7 +72,7 @@ func streamResponsePaLM2OpenAI(palmResponse *PaLMChatResponse) *dto.ChatCompleti
func palmStreamHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorWithStatusCode, string) {
responseText := ""
responseId := fmt.Sprintf("chatcmpl-%s", common.GetUUID())
responseId := helper.GetResponseID(c)
createdTime := common.GetTimestamp()
dataChan := make(chan string)
stopChan := make(chan bool)
+17 -1
View File
@@ -7,6 +7,7 @@ import (
"regexp"
"strings"
"time"
"sort"
)
var (
@@ -210,8 +211,23 @@ func validateFAQ(faqStr string) error {
return nil
}
func getPublishTime(item map[string]interface{}) time.Time {
if v, ok := item["publishDate"]; ok {
if s, ok2 := v.(string); ok2 {
if t, err := time.Parse(time.RFC3339, s); err == nil {
return t
}
}
}
return time.Time{}
}
func GetAnnouncements() []map[string]interface{} {
return getJSONList(GetConsoleSetting().Announcements)
list := getJSONList(GetConsoleSetting().Announcements)
sort.SliceStable(list, func(i, j int) bool {
return getPublishTime(list[i]).After(getPublishTime(list[j]))
})
return list
}
func GetFAQ() []map[string]interface{} {
+17 -2
View File
@@ -142,6 +142,11 @@ var defaultModelRatio = map[string]float64{
"gemini-2.5-flash-preview-04-17": 0.075,
"gemini-2.5-flash-preview-04-17-thinking": 0.075,
"gemini-2.5-flash-preview-04-17-nothinking": 0.075,
"gemini-2.5-flash-preview-05-20": 0.075,
"gemini-2.5-flash-preview-05-20-thinking": 0.075,
"gemini-2.5-flash-preview-05-20-nothinking": 0.075,
"gemini-2.5-flash-thinking-*": 0.075, // 用于为后续所有2.5 flash thinking budget 模型设置默认倍率
"gemini-2.5-pro-thinking-*": 0.625, // 用于为后续所有2.5 pro thinking budget 模型设置默认倍率
"text-embedding-004": 0.001,
"chatglm_turbo": 0.3572, // ¥0.005 / 1k tokens
"chatglm_pro": 0.7143, // ¥0.01 / 1k tokens
@@ -342,10 +347,20 @@ func UpdateModelRatioByJSONString(jsonStr string) error {
return json.Unmarshal([]byte(jsonStr), &modelRatioMap)
}
// 处理带有思考预算的模型名称,方便统一定价
func handleThinkingBudgetModel(name, prefix, wildcard string) string {
if strings.HasPrefix(name, prefix) && strings.Contains(name, "-thinking-") {
return wildcard
}
return name
}
func GetModelRatio(name string) (float64, bool) {
modelRatioMapMutex.RLock()
defer modelRatioMapMutex.RUnlock()
name = handleThinkingBudgetModel(name, "gemini-2.5-flash", "gemini-2.5-flash-thinking-*")
name = handleThinkingBudgetModel(name, "gemini-2.5-pro", "gemini-2.5-pro-thinking-*")
if strings.HasPrefix(name, "gpt-4-gizmo") {
name = "gpt-4-gizmo-*"
}
@@ -470,9 +485,9 @@ func getHardcodedCompletionModelRatio(name string) (float64, bool) {
return 4, true
} else if strings.HasPrefix(name, "gemini-2.0") {
return 4, true
} else if strings.HasPrefix(name, "gemini-2.5-pro-preview") {
} else if strings.HasPrefix(name, "gemini-2.5-pro") { // 移除preview来增加兼容性,这里假设正式版的倍率和preview一致
return 8, true
} else if strings.HasPrefix(name, "gemini-2.5-flash-preview") {
} else if strings.HasPrefix(name, "gemini-2.5-flash") { // 同上
if strings.HasSuffix(name, "-nothinking") {
return 4, false
} else {
+6
View File
@@ -1373,6 +1373,12 @@
"示例": "Example",
"缺省 MaxTokens": "Default MaxTokens",
"启用Claude思考适配(-thinking后缀)": "Enable Claude thinking adaptation (-thinking suffix)",
"和Claude不同,默认情况下Gemini的思考模型会自动决定要不要思考,就算不开启适配模型也可以正常使用,": "Unlike Claude, Gemini's thinking model automatically decides whether to think by default, and can be used normally even without enabling the adaptation model.",
"如果您需要计费,推荐设置无后缀模型价格按思考价格设置。": "If you need billing, it is recommended to set the no-suffix model price according to the thinking price.",
"支持使用 gemini-2.5-pro-preview-06-05-thinking-128 格式来精确传递思考预算。": "Supports using gemini-2.5-pro-preview-06-05-thinking-128 format to precisely pass thinking budget.",
"启用Gemini思考后缀适配": "Enable Gemini thinking suffix adaptation",
"适配-thinking、-thinking-预算数字和-nothinking后缀": "Adapt -thinking, -thinking-budgetNumber, and -nothinking suffixes",
"思考预算占比": "Thinking budget ratio",
"Claude思考适配 BudgetTokens = MaxTokens * BudgetTokens 百分比": "Claude thinking adaptation BudgetTokens = MaxTokens * BudgetTokens percentage",
"思考适配 BudgetTokens 百分比": "Thinking adaptation BudgetTokens percentage",
"0.1-1之间的小数": "Decimal between 0.1 and 1",
@@ -173,7 +173,8 @@ export default function SettingGeminiModel(props) {
<Text>
{t(
"和Claude不同,默认情况下Gemini的思考模型会自动决定要不要思考,就算不开启适配模型也可以正常使用," +
"如果您需要计费,推荐设置无后缀模型价格按思考价格设置"
"如果您需要计费,推荐设置无后缀模型价格按思考价格设置" +
"支持使用 gemini-2.5-pro-preview-06-05-thinking-128 格式来精确传递思考预算。"
)}
</Text>
</Col>
@@ -183,7 +184,7 @@ export default function SettingGeminiModel(props) {
<Form.Switch
label={t('启用Gemini思考后缀适配')}
field={'gemini.thinking_adapter_enabled'}
extraText={"适配-thinking-nothinking后缀"}
extraText={t('适配 -thinking、-thinking-预算数字 和 -nothinking 后缀')}
onChange={(value) =>
setInputs({
...inputs,
@@ -205,7 +206,7 @@ export default function SettingGeminiModel(props) {
<Row>
<Col xs={24} sm={12} md={8} lg={8} xl={8}>
<Form.InputNumber
label={t('请求模型带-thinking后缀的BudgetTokens数(超出24576的部分将被忽略)')}
label={t('思考预算占比')}
field={'gemini.thinking_adapter_budget_tokens_percentage'}
initValue={''}
extraText={t('0.1-1之间的小数')}