Merge pull request #2483 from seefs001/fix/vertex-function-response-id

fix: 模型设置增加针对Vertex渠道过滤content[].part[].functionResponse.id的选项，默认启用
fix: 在Vertex Adapter过滤content[].part[].functionResponse.id
2025-12-21 17:24:07 +08:00 · 2025-12-21 17:22:04 +08:00 · 2025-12-21 17:09:49 +08:00 · 2025-12-20 13:34:10 +08:00 · 2025-12-20 13:27:55 +08:00 · 2025-12-20 13:26:40 +08:00
183 changed files with 22372 additions and 1647 deletions
@@ -63,7 +63,7 @@
 # 是否统计图片token
 # GET_MEDIA_TOKEN=true
 # 是否在非流（stream=false）情况下统计图片token
-# GET_MEDIA_TOKEN_NOT_STREAM=true
+# GET_MEDIA_TOKEN_NOT_STREAM=false
 # 设置 Dify 渠道是否输出工作流和节点信息到客户端
 # DIFY_DEBUG=true

@@ -22,6 +22,10 @@ jobs:
        uses: actions/checkout@v3
        with:
          fetch-depth: 0
+      - name: Determine Version
+        run: |
+          VERSION=$(git describe --tags)
+          echo "VERSION=$VERSION" >> $GITHUB_ENV
      - uses: oven-sh/setup-bun@v2
        with:
          bun-version: latest
@@ -31,7 +35,7 @@ jobs:
        run: |
          cd web
          bun install
-          DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$(git describe --tags) bun run build
+          DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$VERSION bun run build
          cd ..
      - name: Set up Go
        uses: actions/setup-go@v3
@@ -40,13 +44,11 @@ jobs:
      - name: Build Backend (amd64)
        run: |
          go mod download
-          VERSION=$(git describe --tags)
          go build -ldflags "-s -w -X 'new-api/common.Version=$VERSION' -extldflags '-static'" -o new-api-$VERSION
      - name: Build Backend (arm64)
        run: |
          sudo apt-get update
          DEBIAN_FRONTEND=noninteractive sudo apt-get install -y gcc-aarch64-linux-gnu
-          VERSION=$(git describe --tags)
          CC=aarch64-linux-gnu-gcc CGO_ENABLED=1 GOOS=linux GOARCH=arm64 go build -ldflags "-s -w -X 'new-api/common.Version=$VERSION' -extldflags '-static'" -o new-api-arm64-$VERSION
      - name: Release
        uses: softprops/action-gh-release@v2
@@ -65,6 +67,10 @@ jobs:
        uses: actions/checkout@v3
        with:
          fetch-depth: 0
+      - name: Determine Version
+        run: |
+          VERSION=$(git describe --tags)
+          echo "VERSION=$VERSION" >> $GITHUB_ENV
      - uses: oven-sh/setup-bun@v2
        with:
          bun-version: latest
@@ -75,7 +81,7 @@ jobs:
        run: |
          cd web
          bun install
-          DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$(git describe --tags) bun run build
+          DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$VERSION bun run build
          cd ..
      - name: Set up Go
        uses: actions/setup-go@v3
@@ -84,7 +90,6 @@ jobs:
      - name: Build Backend
        run: |
          go mod download
-          VERSION=$(git describe --tags)
          go build -ldflags "-X 'new-api/common.Version=$VERSION'" -o new-api-macos-$VERSION
      - name: Release
        uses: softprops/action-gh-release@v2
@@ -105,6 +110,10 @@ jobs:
        uses: actions/checkout@v3
        with:
          fetch-depth: 0
+      - name: Determine Version
+        run: |
+          VERSION=$(git describe --tags)
+          echo "VERSION=$VERSION" >> $GITHUB_ENV
      - uses: oven-sh/setup-bun@v2
        with:
          bun-version: latest
@@ -114,7 +123,7 @@ jobs:
        run: |
          cd web
          bun install
-          DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$(git describe --tags) bun run build
+          DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$VERSION bun run build
          cd ..
      - name: Set up Go
        uses: actions/setup-go@v3
@@ -123,7 +132,6 @@ jobs:
      - name: Build Backend
        run: |
          go mod download
-          VERSION=$(git describe --tags)
          go build -ldflags "-s -w -X 'new-api/common.Version=$VERSION'" -o new-api-$VERSION.exe
      - name: Release
        uses: softprops/action-gh-release@v2
@@ -132,5 +140,3 @@ jobs:
          files: new-api-*.exe
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-
@@ -16,8 +16,10 @@ new-api
 tiktoken_cache
 .eslintcache
 .gocache
+.gomodcache/
 .cache
 web/bun.lock

 electron/node_modules
 electron/dist
+data/
@@ -14,7 +14,7 @@ ENV GO111MODULE=on CGO_ENABLED=0
 ARG TARGETOS
 ARG TARGETARCH
 ENV GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64}
-
+ENV GOEXPERIMENT=greenteagc

 WORKDIR /build

@@ -25,10 +25,11 @@ COPY . .
 COPY --from=builder /build/dist ./web/dist
 RUN go build -ldflags "-s -w -X 'github.com/QuantumNous/new-api/common.Version=$(cat VERSION)'" -o new-api

-FROM alpine
+FROM debian:bookworm-slim

-RUN apk upgrade --no-cache \
-    && apk add --no-cache ca-certificates tzdata \
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends ca-certificates tzdata libasan8 wget \
+    && rm -rf /var/lib/apt/lists/* \
    && update-ca-certificates

 COPY --from=builder2 /build/new-api /
@@ -193,6 +193,7 @@ docker run --name new-api -d --restart always \

 ### 🔐 Authorization and Security

+- 😈 Discord authorization login
 - 🤖 LinuxDO authorization login
 - 📱 Telegram authorization login
 - 🔑 OIDC unified authentication
@@ -237,6 +238,7 @@ docker run --name new-api -d --restart always \
 - `gemini-2.5-flash-nothinking` - Disable thinking mode
 - `gemini-2.5-pro-thinking` - Enable thinking mode
 - `gemini-2.5-pro-thinking-128` - Enable thinking mode with thinking budget of 128 tokens
+- You can also append `-low`, `-medium`, or `-high` to any Gemini model name to request the corresponding reasoning effort (no extra thinking-budget suffix needed).

 </details>

@@ -302,6 +304,8 @@ docker run --name new-api -d --restart always \
 | `SQL_DSN` | Database connection string | - |
 | `REDIS_CONN_STRING` | Redis connection string | - |
 | `STREAMING_TIMEOUT` | Streaming timeout (seconds) | `300` |
+| `STREAM_SCANNER_MAX_BUFFER_MB` | Max per-line buffer (MB) for the stream scanner; increase when upstream sends huge image/base64 payloads | `64` |
+| `MAX_REQUEST_BODY_MB` | Max request body size (MB, counted **after decompression**; prevents huge requests/zip bombs from exhausting memory). Exceeding it returns `413` | `32` |
 | `AZURE_DEFAULT_API_VERSION` | Azure API version | `2025-04-01-preview` |
 | `ERROR_LOG_ENABLED` | Error log switch | `false` |

@@ -234,6 +234,7 @@ docker run --name new-api -d --restart always \
 - `gemini-2.5-flash-nothinking` - Désactiver le mode de pensée
 - `gemini-2.5-pro-thinking` - Activer le mode de pensée
 - `gemini-2.5-pro-thinking-128` - Activer le mode de pensée avec budget de pensée de 128 tokens
+- Vous pouvez également ajouter les suffixes `-low`, `-medium` ou `-high` aux modèles Gemini pour fixer le niveau d’effort de raisonnement (sans suffixe de budget supplémentaire).

 </details>

@@ -299,6 +300,8 @@ docker run --name new-api -d --restart always \
 | `SQL_DSN` | Chaine de connexion à la base de données | - |
 | `REDIS_CONN_STRING` | Chaine de connexion Redis | - |
 | `STREAMING_TIMEOUT` | Délai d'expiration du streaming (secondes) | `300` |
+| `STREAM_SCANNER_MAX_BUFFER_MB` | Taille max du buffer par ligne (Mo) pour le scanner SSE ; à augmenter quand les sorties image/base64 sont très volumineuses (ex. images 4K) | `64` |
+| `MAX_REQUEST_BODY_MB` | Taille maximale du corps de requête (Mo, comptée **après décompression** ; évite les requêtes énormes/zip bombs qui saturent la mémoire). Dépassement ⇒ `413` | `32` |
 | `AZURE_DEFAULT_API_VERSION` | Version de l'API Azure | `2025-04-01-preview` |
 | `ERROR_LOG_ENABLED` | Interrupteur du journal d'erreurs | `false` |

@@ -438,4 +441,4 @@ Si ce projet vous est utile, bienvenue à nous donner une ⭐️ Étoile！

 <sub>Construit avec ❤️ par QuantumNous</sub>

-</div>
+</div>
@@ -243,6 +243,7 @@ docker run --name new-api -d --restart always \
 - `gemini-2.5-flash-nothinking` - 思考モードを無効にする
 - `gemini-2.5-pro-thinking` - 思考モードを有効にする
 - `gemini-2.5-pro-thinking-128` - 思考モードを有効にし、思考予算を128トークンに設定する
+- Gemini モデル名の末尾に `-low` / `-medium` / `-high` を付けることで推論強度を直接指定できます（追加の思考予算サフィックスは不要です）。

 </details>

@@ -308,6 +309,8 @@ docker run --name new-api -d --restart always \
 | `SQL_DSN** | データベース接続文字列 | - |
 | `REDIS_CONN_STRING` | Redis接続文字列 | - |
 | `STREAMING_TIMEOUT` | ストリーミング応答のタイムアウト時間（秒） | `300` |
+| `STREAM_SCANNER_MAX_BUFFER_MB` | ストリームスキャナの1行あたりバッファ上限（MB）。4K画像など巨大なbase64 `data:` ペイロードを扱う場合は値を増加させてください | `64` |
+| `MAX_REQUEST_BODY_MB` | リクエストボディ最大サイズ（MB、**解凍後**に計測。巨大リクエスト/zip bomb によるメモリ枯渇を防止）。超過時は `413` | `32` |
 | `AZURE_DEFAULT_API_VERSION` | Azure APIバージョン | `2025-04-01-preview` |
 | `ERROR_LOG_ENABLED` | エラーログスイッチ | `false` |

@@ -193,6 +193,7 @@ docker run --name new-api -d --restart always \

 ### 🔐 授权与安全

+- 😈 Discord 授权登录
 - 🤖 LinuxDO 授权登录
 - 📱 Telegram 授权登录
 - 🔑 OIDC 统一认证
@@ -238,6 +239,7 @@ docker run --name new-api -d --restart always \
 - `gemini-2.5-flash-nothinking` - 禁用思考模式
 - `gemini-2.5-pro-thinking` - 启用思考模式
 - `gemini-2.5-pro-thinking-128` - 启用思考模式，并设置思考预算为128tokens
+- 也可以直接在 Gemini 模型名称后追加 `-low` / `-medium` / `-high` 来控制思考力度（无需再设置思考预算后缀）

 </details>

@@ -296,15 +298,17 @@ docker run --name new-api -d --restart always \
 <details>
 <summary>常用环境变量配置</summary>

-| 变量名 | 说明 | 默认值 |
-|--------|------|--------|
-| `SESSION_SECRET` | 会话密钥（多机部署必须） | - |
-| `CRYPTO_SECRET` | 加密密钥（Redis 必须） | - |
-| `SQL_DSN` | 数据库连接字符串 | - |
-| `REDIS_CONN_STRING` | Redis 连接字符串 | - |
-| `STREAMING_TIMEOUT` | 流式超时时间（秒） | `300` |
-| `AZURE_DEFAULT_API_VERSION` | Azure API 版本 | `2025-04-01-preview` |
-| `ERROR_LOG_ENABLED` | 错误日志开关 | `false` |
+| 变量名 | 说明                                                           | 默认值 |
+|--------|--------------------------------------------------------------|--------|
+| `SESSION_SECRET` | 会话密钥（多机部署必须）                                                 | - |
+| `CRYPTO_SECRET` | 加密密钥（Redis 必须）                                               | - |
+| `SQL_DSN` | 数据库连接字符串                                                     | - |
+| `REDIS_CONN_STRING` | Redis 连接字符串                                                  | - |
+| `STREAMING_TIMEOUT` | 流式超时时间（秒）                                                    | `300` |
+| `STREAM_SCANNER_MAX_BUFFER_MB` | 流式扫描器单行最大缓冲（MB），图像生成等超大 `data:` 片段（如 4K 图片 base64）需适当调大 | `64` |
+| `MAX_REQUEST_BODY_MB` | 请求体最大大小（MB，**解压后**计；防止超大请求/zip bomb 导致内存暴涨），超过将返回 `413` | `32` |
+| `AZURE_DEFAULT_API_VERSION` | Azure API 版本                                                 | `2025-04-01-preview` |
+| `ERROR_LOG_ENABLED` | 错误日志开关                                                       | `false` |

 📖 **完整配置：** [环境变量文档](https://docs.newapi.pro/installation/environment-variables)

@@ -71,15 +71,66 @@ func getMP3Duration(r io.Reader) (float64, error) {

 // getWAVDuration 解析 WAV 文件头以获取时长。
 func getWAVDuration(r io.ReadSeeker) (float64, error) {
+	// 1. 强制复位指针
+	r.Seek(0, io.SeekStart)
+
 	dec := wav.NewDecoder(r)
+
+	// IsValidFile 会读取 fmt 块
 	if !dec.IsValidFile() {
 		return 0, errors.New("invalid wav file")
 	}
-	d, err := dec.Duration()
-	if err != nil {
-		return 0, errors.Wrap(err, "failed to get wav duration")
+
+	// 尝试寻找 data 块
+	if err := dec.FwdToPCM(); err != nil {
+		return 0, errors.Wrap(err, "failed to find PCM data chunk")
 	}
-	return d.Seconds(), nil
+
+	pcmSize := int64(dec.PCMSize)
+
+	// 如果读出来的 Size 是 0，尝试用文件大小反推
+	if pcmSize == 0 {
+		// 获取文件总大小
+		currentPos, _ := r.Seek(0, io.SeekCurrent) // 当前通常在 data chunk header 之后
+		endPos, _ := r.Seek(0, io.SeekEnd)
+		fileSize := endPos
+
+		// 恢复位置（虽然如果不继续读也没关系）
+		r.Seek(currentPos, io.SeekStart)
+
+		// 数据区大小 ≈ 文件总大小 - 当前指针位置(即Header大小)
+		// 注意：FwdToPCM 成功后，CurrentPos 应该刚好指向 Data 区数据的开始
+		// 或者是 Data Chunk ID + Size 之后。
+		// WAV Header 一般 44 字节。
+		if fileSize > 44 {
+			// 如果 FwdToPCM 成功，Reader 应该位于 data 块的数据起始处
+			// 所以剩余的所有字节理论上都是音频数据
+			pcmSize = fileSize - currentPos
+
+			// 简单的兜底：如果算出来还是负数或0，强制按文件大小-44计算
+			if pcmSize <= 0 {
+				pcmSize = fileSize - 44
+			}
+		}
+	}
+
+	numChans := int64(dec.NumChans)
+	bitDepth := int64(dec.BitDepth)
+	sampleRate := float64(dec.SampleRate)
+
+	if sampleRate == 0 || numChans == 0 || bitDepth == 0 {
+		return 0, errors.New("invalid wav header metadata")
+	}
+
+	bytesPerFrame := numChans * (bitDepth / 8)
+	if bytesPerFrame == 0 {
+		return 0, errors.New("invalid byte depth calculation")
+	}
+
+	totalFrames := pcmSize / bytesPerFrame
+
+	durationSeconds := float64(totalFrames) / sampleRate
+	return durationSeconds, nil
 }

 // getFLACDuration 解析 FLAC 文件的 STREAMINFO 块。
@@ -121,6 +121,9 @@ var BatchUpdateInterval int

 var RelayTimeout int // unit is second

+var RelayMaxIdleConns int
+var RelayMaxIdleConnsPerHost int
+
 var GeminiSafetySetting string

 // https://docs.cohere.com/docs/safety-modes Type; NONE/CONTEXTUAL/STRICT
@@ -32,7 +32,7 @@ func SendEmail(subject string, receiver string, content string) error {
 	}
 	encodedSubject := fmt.Sprintf("=?UTF-8?B?%s?=", base64.StdEncoding.EncodeToString([]byte(subject)))
 	mail := []byte(fmt.Sprintf("To: %s\r\n"+
-		"From: %s<%s>\r\n"+
+		"From: %s <%s>\r\n"+
 		"Subject: %s\r\n"+
 		"Date: %s\r\n"+
 		"Message-ID: %s\r\n"+ // 添加 Message-ID 头
@@ -4,6 +4,7 @@ import (
 	"embed"
 	"io/fs"
 	"net/http"
+	"os"

 	"github.com/gin-contrib/static"
 )
@@ -14,7 +15,7 @@ type embedFileSystem struct {
 	http.FileSystem
 }

-func (e embedFileSystem) Exists(prefix string, path string) bool {
+func (e *embedFileSystem) Exists(prefix string, path string) bool {
 	_, err := e.Open(path)
 	if err != nil {
 		return false
@@ -22,12 +23,21 @@ func (e embedFileSystem) Exists(prefix string, path string) bool {
 	return true
 }

+func (e *embedFileSystem) Open(name string) (http.File, error) {
+	if name == "/" {
+		// This will make sure the index page goes to NoRouter handler,
+		// which will use the replaced index bytes with analytic codes.
+		return nil, os.ErrNotExist
+	}
+	return e.FileSystem.Open(name)
+}
+
 func EmbedFolder(fsEmbed embed.FS, targetPath string) static.ServeFileSystem {
 	efs, err := fs.Sub(fsEmbed, targetPath)
 	if err != nil {
 		panic(err)
 	}
-	return embedFileSystem{
+	return &embedFileSystem{
 		FileSystem: http.FS(efs),
 	}
 }
@@ -2,7 +2,7 @@ package common

 import (
 	"bytes"
-	"errors"
+	"fmt"
 	"io"
 	"mime"
 	"mime/multipart"
@@ -12,24 +12,61 @@ import (
 	"time"

 	"github.com/QuantumNous/new-api/constant"
+	"github.com/pkg/errors"

 	"github.com/gin-gonic/gin"
 )

 const KeyRequestBody = "key_request_body"

-func GetRequestBody(c *gin.Context) ([]byte, error) {
-	requestBody, _ := c.Get(KeyRequestBody)
-	if requestBody != nil {
-		return requestBody.([]byte), nil
+var ErrRequestBodyTooLarge = errors.New("request body too large")
+
+func IsRequestBodyTooLargeError(err error) bool {
+	if err == nil {
+		return false
 	}
-	requestBody, err := io.ReadAll(c.Request.Body)
+	if errors.Is(err, ErrRequestBodyTooLarge) {
+		return true
+	}
+	var mbe *http.MaxBytesError
+	return errors.As(err, &mbe)
+}
+
+func GetRequestBody(c *gin.Context) ([]byte, error) {
+	cached, exists := c.Get(KeyRequestBody)
+	if exists && cached != nil {
+		if b, ok := cached.([]byte); ok {
+			return b, nil
+		}
+	}
+	maxMB := constant.MaxRequestBodyMB
+	if maxMB < 0 {
+		// no limit
+		body, err := io.ReadAll(c.Request.Body)
+		_ = c.Request.Body.Close()
+		if err != nil {
+			return nil, err
+		}
+		c.Set(KeyRequestBody, body)
+		return body, nil
+	}
+	maxBytes := int64(maxMB) << 20
+
+	limited := io.LimitReader(c.Request.Body, maxBytes+1)
+	body, err := io.ReadAll(limited)
 	if err != nil {
+		_ = c.Request.Body.Close()
+		if IsRequestBodyTooLargeError(err) {
+			return nil, errors.Wrap(ErrRequestBodyTooLarge, fmt.Sprintf("request body exceeds %d MB", maxMB))
+		}
 		return nil, err
 	}
 	_ = c.Request.Body.Close()
-	c.Set(KeyRequestBody, requestBody)
-	return requestBody.([]byte), nil
+	if int64(len(body)) > maxBytes {
+		return nil, errors.Wrap(ErrRequestBodyTooLarge, fmt.Sprintf("request body exceeds %d MB", maxMB))
+	}
+	c.Set(KeyRequestBody, body)
+	return body, nil
 }

 func UnmarshalBodyReusable(c *gin.Context, v any) error {
@@ -30,6 +30,11 @@ func printHelp() {
 func InitEnv() {
 	flag.Parse()

+	envVersion := os.Getenv("VERSION")
+	if envVersion != "" {
+		Version = envVersion
+	}
+
 	if *PrintVersion {
 		fmt.Println(Version)
 		os.Exit(0)
@@ -85,6 +90,8 @@ func InitEnv() {
 	SyncFrequency = GetEnvOrDefault("SYNC_FREQUENCY", 60)
 	BatchUpdateInterval = GetEnvOrDefault("BATCH_UPDATE_INTERVAL", 5)
 	RelayTimeout = GetEnvOrDefault("RELAY_TIMEOUT", 0)
+	RelayMaxIdleConns = GetEnvOrDefault("RELAY_MAX_IDLE_CONNS", 500)
+	RelayMaxIdleConnsPerHost = GetEnvOrDefault("RELAY_MAX_IDLE_CONNS_PER_HOST", 100)

 	// Initialize string variables with GetEnvOrDefaultString
 	GeminiSafetySetting = GetEnvOrDefaultString("GEMINI_SAFETY_SETTING", "BLOCK_NONE")
@@ -109,10 +116,14 @@ func initConstantEnv() {
 	constant.StreamingTimeout = GetEnvOrDefault("STREAMING_TIMEOUT", 300)
 	constant.DifyDebug = GetEnvOrDefaultBool("DIFY_DEBUG", true)
 	constant.MaxFileDownloadMB = GetEnvOrDefault("MAX_FILE_DOWNLOAD_MB", 20)
+	constant.StreamScannerMaxBufferMB = GetEnvOrDefault("STREAM_SCANNER_MAX_BUFFER_MB", 64)
+	// MaxRequestBodyMB 请求体最大大小（解压后），用于防止超大请求/zip bomb导致内存暴涨
+	constant.MaxRequestBodyMB = GetEnvOrDefault("MAX_REQUEST_BODY_MB", 64)
 	// ForceStreamOption 覆盖请求参数，强制返回usage信息
 	constant.ForceStreamOption = GetEnvOrDefaultBool("FORCE_STREAM_OPTION", true)
+	constant.CountToken = GetEnvOrDefaultBool("CountToken", true)
 	constant.GetMediaToken = GetEnvOrDefaultBool("GET_MEDIA_TOKEN", true)
-	constant.GetMediaTokenNotStream = GetEnvOrDefaultBool("GET_MEDIA_TOKEN_NOT_STREAM", true)
+	constant.GetMediaTokenNotStream = GetEnvOrDefaultBool("GET_MEDIA_TOKEN_NOT_STREAM", false)
 	constant.UpdateTask = GetEnvOrDefaultBool("UPDATE_TASK", true)
 	constant.AzureDefaultAPIVersion = GetEnvOrDefaultString("AZURE_DEFAULT_API_VERSION", "2025-04-01-preview")
 	constant.GeminiVisionMaxImageNum = GetEnvOrDefault("GEMINI_VISION_MAX_IMAGE_NUM", 16)
@@ -122,6 +133,8 @@ func initConstantEnv() {
 	constant.GenerateDefaultToken = GetEnvOrDefaultBool("GENERATE_DEFAULT_TOKEN", false)
 	// 是否启用错误日志
 	constant.ErrorLogEnabled = GetEnvOrDefaultBool("ERROR_LOG_ENABLED", false)
+	// 任务轮询时查询的最大数量
+	constant.TaskQueryLimit = GetEnvOrDefault("TASK_QUERY_LIMIT", 1000)

 	soraPatchStr := GetEnvOrDefaultString("TASK_PRICE_PATCH", "")
 	if soraPatchStr != "" {
@@ -2,6 +2,15 @@ package common

 import "net"

+func IsIP(s string) bool {
+	ip := net.ParseIP(s)
+	return ip != nil
+}
+
+func ParseIP(s string) net.IP {
+	return net.ParseIP(s)
+}
+
 func IsPrivateIP(ip net.IP) bool {
 	if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
 		return true
@@ -20,3 +29,23 @@ func IsPrivateIP(ip net.IP) bool {
 	}
 	return false
 }
+
+func IsIpInCIDRList(ip net.IP, cidrList []string) bool {
+	for _, cidr := range cidrList {
+		_, network, err := net.ParseCIDR(cidr)
+		if err != nil {
+			// 尝试作为单个IP处理
+			if whitelistIP := net.ParseIP(cidr); whitelistIP != nil {
+				if ip.Equal(whitelistIP) {
+					return true
+				}
+			}
+			continue
+		}
+
+		if network.Contains(ip) {
+			return true
+		}
+	}
+	return false
+}
@@ -23,11 +23,11 @@ func Marshal(v any) ([]byte, error) {
 }

 func GetJsonType(data json.RawMessage) string {
-	data = bytes.TrimSpace(data)
-	if len(data) == 0 {
+	trimmed := bytes.TrimSpace(data)
+	if len(trimmed) == 0 {
 		return "unknown"
 	}
-	firstChar := bytes.TrimSpace(data)[0]
+	firstChar := trimmed[0]
 	switch firstChar {
 	case '{':
 		return "object"
@@ -17,6 +17,13 @@ var (
 		"flux-",
 		"flux.1-",
 	}
+	OpenAITextModels = []string{
+		"gpt-",
+		"o1",
+		"o3",
+		"o4",
+		"chatgpt",
+	}
 )

 func IsOpenAIResponseOnlyModel(modelName string) bool {
@@ -40,3 +47,13 @@ func IsImageGenerationModel(modelName string) bool {
 	}
 	return false
 }
+
+func IsOpenAITextModel(modelName string) bool {
+	modelName = strings.ToLower(modelName)
+	for _, m := range OpenAITextModels {
+		if strings.Contains(modelName, m) {
+			return true
+		}
+	}
+	return false
+}
@@ -186,23 +186,7 @@ func isIPListed(ip net.IP, list []string) bool {
 		return false
 	}

-	for _, whitelistCIDR := range list {
-		_, network, err := net.ParseCIDR(whitelistCIDR)
-		if err != nil {
-			// 尝试作为单个IP处理
-			if whitelistIP := net.ParseIP(whitelistCIDR); whitelistIP != nil {
-				if ip.Equal(whitelistIP) {
-					return true
-				}
-			}
-			continue
-		}
-
-		if network.Contains(ip) {
-			return true
-		}
-	}
-	return false
+	return IsIpInCIDRList(ip, list)
 }

 // IsIPAccessAllowed 检查IP是否允许访问
@@ -3,12 +3,19 @@ package common
 import (
 	"encoding/base64"
 	"encoding/json"
-	"math/rand"
 	"net/url"
 	"regexp"
 	"strconv"
 	"strings"
 	"unsafe"
+
+	"github.com/samber/lo"
+)
+
+var (
+	maskURLPattern    = regexp.MustCompile(`(http|https)://[^\s/$.?#].[^\s]*`)
+	maskDomainPattern = regexp.MustCompile(`\b(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,}\b`)
+	maskIPPattern     = regexp.MustCompile(`\b(?:\d{1,3}\.){3}\d{1,3}\b`)
 )

 func GetStringIfEmpty(str string, defaultValue string) string {
@@ -19,12 +26,10 @@ func GetStringIfEmpty(str string, defaultValue string) string {
 }

 func GetRandomString(length int) string {
-	//rand.Seed(time.Now().UnixNano())
-	key := make([]byte, length)
-	for i := 0; i < length; i++ {
-		key[i] = keyChars[rand.Intn(len(keyChars))]
+	if length <= 0 {
+		return ""
 	}
-	return string(key)
+	return lo.RandomString(length, lo.AlphanumericCharset)
 }

 func MapToJsonStr(m map[string]interface{}) string {
@@ -170,8 +175,7 @@ func maskHostForPlainDomain(domain string) string {
 // api.openai.com -> ***.***.com
 func MaskSensitiveInfo(str string) string {
 	// Mask URLs
-	urlPattern := regexp.MustCompile(`(http|https)://[^\s/$.?#].[^\s]*`)
-	str = urlPattern.ReplaceAllStringFunc(str, func(urlStr string) string {
+	str = maskURLPattern.ReplaceAllStringFunc(str, func(urlStr string) string {
 		u, err := url.Parse(urlStr)
 		if err != nil {
 			return urlStr
@@ -224,14 +228,12 @@ func MaskSensitiveInfo(str string) string {
 	})

 	// Mask domain names without protocol (like openai.com, www.openai.com)
-	domainPattern := regexp.MustCompile(`\b(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,}\b`)
-	str = domainPattern.ReplaceAllStringFunc(str, func(domain string) string {
+	str = maskDomainPattern.ReplaceAllStringFunc(str, func(domain string) string {
 		return maskHostForPlainDomain(domain)
 	})

 	// Mask IP addresses
-	ipPattern := regexp.MustCompile(`\b(?:\d{1,3}\.){3}\d{1,3}\b`)
-	str = ipPattern.ReplaceAllString(str, "***.***.***.***")
+	str = maskIPPattern.ReplaceAllString(str, "***.***.***.***")

 	return str
 }
@@ -217,11 +217,6 @@ func IntMax(a int, b int) int {
 	}
 }

-func IsIP(s string) bool {
-	ip := net.ParseIP(s)
-	return ip != nil
-}
-
 func GetUUID() string {
 	code := uuid.New().String()
 	code = strings.Replace(code, "-", "", -1)
@@ -180,3 +180,27 @@ func GetChannelTypeName(channelType int) string {
 	}
 	return "Unknown"
 }
+
+type ChannelSpecialBase struct {
+	ClaudeBaseURL string
+	OpenAIBaseURL string
+}
+
+var ChannelSpecialBases = map[string]ChannelSpecialBase{
+	"glm-coding-plan": {
+		ClaudeBaseURL: "https://open.bigmodel.cn/api/anthropic",
+		OpenAIBaseURL: "https://open.bigmodel.cn/api/coding/paas/v4",
+	},
+	"glm-coding-plan-international": {
+		ClaudeBaseURL: "https://api.z.ai/api/anthropic",
+		OpenAIBaseURL: "https://api.z.ai/api/coding/paas/v4",
+	},
+	"kimi-coding-plan": {
+		ClaudeBaseURL: "https://api.kimi.com/coding",
+		OpenAIBaseURL: "https://api.kimi.com/coding/v1",
+	},
+	"doubao-coding-plan": {
+		ClaudeBaseURL: "https://ark.cn-beijing.volces.com/api/coding",
+		OpenAIBaseURL: "https://ark.cn-beijing.volces.com/api/coding/v3",
+	},
+}
@@ -3,8 +3,9 @@ package constant
 type ContextKey string

 const (
-	ContextKeyTokenCountMeta ContextKey = "token_count_meta"
-	ContextKeyPromptTokens   ContextKey = "prompt_tokens"
+	ContextKeyTokenCountMeta  ContextKey = "token_count_meta"
+	ContextKeyPromptTokens    ContextKey = "prompt_tokens"
+	ContextKeyEstimatedTokens ContextKey = "estimated_tokens"

 	ContextKeyOriginalModel    ContextKey = "original_model"
 	ContextKeyRequestStartTime ContextKey = "request_start_time"
@@ -17,6 +18,7 @@ const (
 	ContextKeyTokenSpecificChannelId ContextKey = "specific_channel_id"
 	ContextKeyTokenModelLimitEnabled ContextKey = "token_model_limit_enabled"
 	ContextKeyTokenModelLimit        ContextKey = "token_model_limit"
+	ContextKeyTokenCrossGroupRetry   ContextKey = "token_cross_group_retry"

 	/* channel related keys */
 	ContextKeyChannelId                ContextKey = "channel_id"
@@ -36,6 +38,10 @@ const (
 	ContextKeyChannelMultiKeyIndex     ContextKey = "channel_multi_key_index"
 	ContextKeyChannelKey               ContextKey = "channel_key"

+	ContextKeyAutoGroup           ContextKey = "auto_group"
+	ContextKeyAutoGroupIndex      ContextKey = "auto_group_index"
+	ContextKeyAutoGroupRetryIndex ContextKey = "auto_group_retry_index"
+
 	/* user related keys */
 	ContextKeyUserId      ContextKey = "id"
 	ContextKeyUserSetting ContextKey = "user_setting"
@@ -46,5 +52,7 @@ const (
 	ContextKeyUsingGroup  ContextKey = "group"
 	ContextKeyUserName    ContextKey = "username"

+	ContextKeyLocalCountTokens ContextKey = "local_count_tokens"
+
 	ContextKeySystemPromptOverride ContextKey = "system_prompt_override"
 )
@@ -3,16 +3,20 @@ package constant
 var StreamingTimeout int
 var DifyDebug bool
 var MaxFileDownloadMB int
+var StreamScannerMaxBufferMB int
 var ForceStreamOption bool
+var CountToken bool
 var GetMediaToken bool
 var GetMediaTokenNotStream bool
 var UpdateTask bool
+var MaxRequestBodyMB int
 var AzureDefaultAPIVersion string
 var GeminiVisionMaxImageNum int
 var NotifyLimitCount int
 var NotificationLimitDurationMinute int
 var GenerateDefaultToken bool
 var ErrorLogEnabled bool
+var TaskQueryLimit int

 // temporary variable for sora patch, will be removed in future
 var TaskPricePatches []string
@@ -15,6 +15,7 @@ const (
 	TaskActionTextGenerate      = "textGenerate"
 	TaskActionFirstTailGenerate = "firstTailGenerate"
 	TaskActionReferenceGenerate = "referenceGenerate"
+	TaskActionRemix             = "remixGenerate"
 )

 var SunoModel2Action = map[string]string{
@@ -2,9 +2,9 @@ package controller

 import (
 	"github.com/QuantumNous/new-api/common"
-	"github.com/QuantumNous/new-api/dto"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/setting/operation_setting"
+	"github.com/QuantumNous/new-api/types"
 	"github.com/gin-gonic/gin"
 )

@@ -29,7 +29,7 @@ func GetSubscription(c *gin.Context) {
 		expiredTime = 0
 	}
 	if err != nil {
-		openAIError := dto.OpenAIError{
+		openAIError := types.OpenAIError{
 			Message: err.Error(),
 			Type:    "upstream_error",
 		}
@@ -81,7 +81,7 @@ func GetUsage(c *gin.Context) {
 		quota, err = model.GetUserUsedQuota(userId)
 	}
 	if err != nil {
-		openAIError := dto.OpenAIError{
+		openAIError := types.OpenAIError{
 			Message: err.Error(),
 			Type:    "new_api_error",
 		}
@@ -351,7 +351,7 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
 			newAPIError: types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError),
 		}
 	}
-	info.PromptTokens = usage.PromptTokens
+	info.SetEstimatePromptTokens(usage.PromptTokens)

 	quota := 0
 	if !priceData.UsePrice {
@@ -11,7 +11,6 @@ import (
 	"github.com/QuantumNous/new-api/constant"
 	"github.com/QuantumNous/new-api/dto"
 	"github.com/QuantumNous/new-api/model"
-	"github.com/QuantumNous/new-api/relay/channel/volcengine"
 	"github.com/QuantumNous/new-api/service"

 	"github.com/gin-gonic/gin"
@@ -166,6 +165,30 @@ func GetAllChannels(c *gin.Context) {
 	return
 }

+func buildFetchModelsHeaders(channel *model.Channel, key string) (http.Header, error) {
+	var headers http.Header
+	switch channel.Type {
+	case constant.ChannelTypeAnthropic:
+		headers = GetClaudeAuthHeader(key)
+	default:
+		headers = GetAuthHeader(key)
+	}
+
+	headerOverride := channel.GetHeaderOverride()
+	for k, v := range headerOverride {
+		str, ok := v.(string)
+		if !ok {
+			return nil, fmt.Errorf("invalid header override for key %s", k)
+		}
+		if strings.Contains(str, "{api_key}") {
+			str = strings.ReplaceAll(str, "{api_key}", key)
+		}
+		headers.Set(k, str)
+	}
+
+	return headers, nil
+}
+
 func FetchUpstreamModels(c *gin.Context) {
 	id, err := strconv.Atoi(c.Param("id"))
 	if err != nil {
@@ -192,10 +215,20 @@ func FetchUpstreamModels(c *gin.Context) {
 	case constant.ChannelTypeAli:
 		url = fmt.Sprintf("%s/compatible-mode/v1/models", baseURL)
 	case constant.ChannelTypeZhipu_v4:
-		url = fmt.Sprintf("%s/api/paas/v4/models", baseURL)
+		if plan, ok := constant.ChannelSpecialBases[baseURL]; ok && plan.OpenAIBaseURL != "" {
+			url = fmt.Sprintf("%s/models", plan.OpenAIBaseURL)
+		} else {
+			url = fmt.Sprintf("%s/api/paas/v4/models", baseURL)
+		}
 	case constant.ChannelTypeVolcEngine:
-		if baseURL == volcengine.DoubaoCodingPlan {
-			url = fmt.Sprintf("%s/v1/models", volcengine.DoubaoCodingPlanOpenAIBaseURL)
+		if plan, ok := constant.ChannelSpecialBases[baseURL]; ok && plan.OpenAIBaseURL != "" {
+			url = fmt.Sprintf("%s/v1/models", plan.OpenAIBaseURL)
+		} else {
+			url = fmt.Sprintf("%s/v1/models", baseURL)
+		}
+	case constant.ChannelTypeMoonshot:
+		if plan, ok := constant.ChannelSpecialBases[baseURL]; ok && plan.OpenAIBaseURL != "" {
+			url = fmt.Sprintf("%s/models", plan.OpenAIBaseURL)
 		} else {
 			url = fmt.Sprintf("%s/v1/models", baseURL)
 		}
@@ -214,14 +247,13 @@ func FetchUpstreamModels(c *gin.Context) {
 	}
 	key = strings.TrimSpace(key)

-	// 获取响应体 - 根据渠道类型决定是否添加 AuthHeader
-	var body []byte
-	switch channel.Type {
-	case constant.ChannelTypeAnthropic:
-		body, err = GetResponseBody("GET", url, channel, GetClaudeAuthHeader(key))
-	default:
-		body, err = GetResponseBody("GET", url, channel, GetAuthHeader(key))
+	headers, err := buildFetchModelsHeaders(channel, key)
+	if err != nil {
+		common.ApiError(c, err)
+		return
 	}
+
+	body, err := GetResponseBody("GET", url, channel, headers)
 	if err != nil {
 		common.ApiError(c, err)
 		return
@@ -0,0 +1,223 @@
+package controller
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net/http"
+	"net/url"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/model"
+	"github.com/QuantumNous/new-api/setting/system_setting"
+
+	"github.com/gin-contrib/sessions"
+	"github.com/gin-gonic/gin"
+)
+
+type DiscordResponse struct {
+	AccessToken  string `json:"access_token"`
+	IDToken      string `json:"id_token"`
+	RefreshToken string `json:"refresh_token"`
+	TokenType    string `json:"token_type"`
+	ExpiresIn    int    `json:"expires_in"`
+	Scope        string `json:"scope"`
+}
+
+type DiscordUser struct {
+	UID  string `json:"id"`
+	ID   string `json:"username"`
+	Name string `json:"global_name"`
+}
+
+func getDiscordUserInfoByCode(code string) (*DiscordUser, error) {
+	if code == "" {
+		return nil, errors.New("无效的参数")
+	}
+
+	values := url.Values{}
+	values.Set("client_id", system_setting.GetDiscordSettings().ClientId)
+	values.Set("client_secret", system_setting.GetDiscordSettings().ClientSecret)
+	values.Set("code", code)
+	values.Set("grant_type", "authorization_code")
+	values.Set("redirect_uri", fmt.Sprintf("%s/oauth/discord", system_setting.ServerAddress))
+	formData := values.Encode()
+	req, err := http.NewRequest("POST", "https://discord.com/api/v10/oauth2/token", strings.NewReader(formData))
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+	req.Header.Set("Accept", "application/json")
+	client := http.Client{
+		Timeout: 5 * time.Second,
+	}
+	res, err := client.Do(req)
+	if err != nil {
+		common.SysLog(err.Error())
+		return nil, errors.New("无法连接至 Discord 服务器，请稍后重试！")
+	}
+	defer res.Body.Close()
+	var discordResponse DiscordResponse
+	err = json.NewDecoder(res.Body).Decode(&discordResponse)
+	if err != nil {
+		return nil, err
+	}
+
+	if discordResponse.AccessToken == "" {
+		common.SysError("Discord 获取 Token 失败，请检查设置！")
+		return nil, errors.New("Discord 获取 Token 失败，请检查设置！")
+	}
+
+	req, err = http.NewRequest("GET", "https://discord.com/api/v10/users/@me", nil)
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Authorization", "Bearer "+discordResponse.AccessToken)
+	res2, err := client.Do(req)
+	if err != nil {
+		common.SysLog(err.Error())
+		return nil, errors.New("无法连接至 Discord 服务器，请稍后重试！")
+	}
+	defer res2.Body.Close()
+	if res2.StatusCode != http.StatusOK {
+		common.SysError("Discord 获取用户信息失败！请检查设置！")
+		return nil, errors.New("Discord 获取用户信息失败！请检查设置！")
+	}
+
+	var discordUser DiscordUser
+	err = json.NewDecoder(res2.Body).Decode(&discordUser)
+	if err != nil {
+		return nil, err
+	}
+	if discordUser.UID == "" || discordUser.ID == "" {
+		common.SysError("Discord 获取用户信息为空！请检查设置！")
+		return nil, errors.New("Discord 获取用户信息为空！请检查设置！")
+	}
+	return &discordUser, nil
+}
+
+func DiscordOAuth(c *gin.Context) {
+	session := sessions.Default(c)
+	state := c.Query("state")
+	if state == "" || session.Get("oauth_state") == nil || state != session.Get("oauth_state").(string) {
+		c.JSON(http.StatusForbidden, gin.H{
+			"success": false,
+			"message": "state is empty or not same",
+		})
+		return
+	}
+	username := session.Get("username")
+	if username != nil {
+		DiscordBind(c)
+		return
+	}
+	if !system_setting.GetDiscordSettings().Enabled {
+		c.JSON(http.StatusOK, gin.H{
+			"success": false,
+			"message": "管理员未开启通过 Discord 登录以及注册",
+		})
+		return
+	}
+	code := c.Query("code")
+	discordUser, err := getDiscordUserInfoByCode(code)
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	user := model.User{
+		DiscordId: discordUser.UID,
+	}
+	if model.IsDiscordIdAlreadyTaken(user.DiscordId) {
+		err := user.FillUserByDiscordId()
+		if err != nil {
+			c.JSON(http.StatusOK, gin.H{
+				"success": false,
+				"message": err.Error(),
+			})
+			return
+		}
+	} else {
+		if common.RegisterEnabled {
+			if discordUser.ID != "" {
+				user.Username = discordUser.ID
+			} else {
+				user.Username = "discord_" + strconv.Itoa(model.GetMaxUserId()+1)
+			}
+			if discordUser.Name != "" {
+				user.DisplayName = discordUser.Name
+			} else {
+				user.DisplayName = "Discord User"
+			}
+			err := user.Insert(0)
+			if err != nil {
+				c.JSON(http.StatusOK, gin.H{
+					"success": false,
+					"message": err.Error(),
+				})
+				return
+			}
+		} else {
+			c.JSON(http.StatusOK, gin.H{
+				"success": false,
+				"message": "管理员关闭了新用户注册",
+			})
+			return
+		}
+	}
+
+	if user.Status != common.UserStatusEnabled {
+		c.JSON(http.StatusOK, gin.H{
+			"message": "用户已被封禁",
+			"success": false,
+		})
+		return
+	}
+	setupLogin(&user, c)
+}
+
+func DiscordBind(c *gin.Context) {
+	if !system_setting.GetDiscordSettings().Enabled {
+		c.JSON(http.StatusOK, gin.H{
+			"success": false,
+			"message": "管理员未开启通过 Discord 登录以及注册",
+		})
+		return
+	}
+	code := c.Query("code")
+	discordUser, err := getDiscordUserInfoByCode(code)
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	user := model.User{
+		DiscordId: discordUser.UID,
+	}
+	if model.IsDiscordIdAlreadyTaken(user.DiscordId) {
+		c.JSON(http.StatusOK, gin.H{
+			"success": false,
+			"message": "该 Discord 账户已被绑定",
+		})
+		return
+	}
+	session := sessions.Default(c)
+	id := session.Get("id")
+	user.Id = id.(int)
+	err = user.FillUserById()
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	user.DiscordId = discordUser.UID
+	err = user.Update(false)
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	c.JSON(http.StatusOK, gin.H{
+		"success": true,
+		"message": "bind",
+	})
+}
@@ -52,6 +52,8 @@ func GetStatus(c *gin.Context) {
 		"email_verification":          common.EmailVerificationEnabled,
 		"github_oauth":                common.GitHubOAuthEnabled,
 		"github_client_id":            common.GitHubClientId,
+		"discord_oauth":               system_setting.GetDiscordSettings().Enabled,
+		"discord_client_id":           system_setting.GetDiscordSettings().ClientId,
 		"linuxdo_oauth":               common.LinuxDOOAuthEnabled,
 		"linuxdo_client_id":           common.LinuxDOClientId,
 		"linuxdo_minimum_trust_level": common.LinuxDOMinimumTrustLevel,
@@ -18,6 +18,7 @@ import (
 	"github.com/QuantumNous/new-api/service"
 	"github.com/QuantumNous/new-api/setting/operation_setting"
 	"github.com/QuantumNous/new-api/setting/ratio_setting"
+	"github.com/QuantumNous/new-api/types"
 	"github.com/gin-gonic/gin"
 	"github.com/samber/lo"
 )
@@ -275,7 +276,7 @@ func RetrieveModel(c *gin.Context, modelType int) {
 			c.JSON(200, aiModel)
 		}
 	} else {
-		openAIError := dto.OpenAIError{
+		openAIError := types.OpenAIError{
 			Message: fmt.Sprintf("The model '%s' does not exist", modelId),
 			Type:    "invalid_request_error",
 			Param:   "model",
@@ -71,6 +71,14 @@ func UpdateOption(c *gin.Context) {
 			})
 			return
 		}
+	case "discord.enabled":
+		if option.Value == "true" && system_setting.GetDiscordSettings().ClientId == "" {
+			c.JSON(http.StatusOK, gin.H{
+				"success": false,
+				"message": "无法启用 Discord OAuth，请先填入 Discord Client Id 以及 Discord Client Secret！",
+			})
+			return
+		}
 	case "oidc.enabled":
 		if option.Value == "true" && system_setting.GetOIDCSettings().ClientId == "" {
 			c.JSON(http.StatusOK, gin.H{
@@ -3,12 +3,10 @@ package controller
 import (
 	"errors"
 	"fmt"
-	"time"

-	"github.com/QuantumNous/new-api/common"
-	"github.com/QuantumNous/new-api/constant"
 	"github.com/QuantumNous/new-api/middleware"
 	"github.com/QuantumNous/new-api/model"
+	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	"github.com/QuantumNous/new-api/types"

 	"github.com/gin-gonic/gin"
@@ -31,8 +29,11 @@ func Playground(c *gin.Context) {
 		return
 	}

-	group := common.GetContextKeyString(c, constant.ContextKeyUsingGroup)
-	modelName := c.GetString("original_model")
+	relayInfo, err := relaycommon.GenRelayInfo(c, types.RelayFormatOpenAI, nil, nil)
+	if err != nil {
+		newAPIError = types.NewError(err, types.ErrorCodeInvalidRequest, types.ErrOptionWithSkipRetry())
+		return
+	}

 	userId := c.GetInt("id")

@@ -46,16 +47,10 @@ func Playground(c *gin.Context) {

 	tempToken := &model.Token{
 		UserId: userId,
-		Name:   fmt.Sprintf("playground-%s", group),
-		Group:  group,
+		Name:   fmt.Sprintf("playground-%s", relayInfo.UsingGroup),
+		Group:  relayInfo.UsingGroup,
 	}
 	_ = middleware.SetupContextForToken(c, tempToken)
-	_, newAPIError = getChannel(c, group, modelName, 0)
-	if newAPIError != nil {
-		return
-	}
-	//middleware.SetupContextForSelectedChannel(c, channel, playgroundRequest.Model)
-	common.SetContextKey(c, constant.ContextKeyRequestStartTime, time.Now())

 	Relay(c, types.RelayFormatOpenAI)
 }
@@ -2,6 +2,7 @@ package controller

 import (
 	"bytes"
+	"errors"
 	"fmt"
 	"io"
 	"log"
@@ -64,8 +65,8 @@ func geminiRelayHandler(c *gin.Context, info *relaycommon.RelayInfo) *types.NewA
 func Relay(c *gin.Context, relayFormat types.RelayFormat) {

 	requestId := c.GetString(common.RequestIdKey)
-	group := common.GetContextKeyString(c, constant.ContextKeyUsingGroup)
-	originalModel := common.GetContextKeyString(c, constant.ContextKeyOriginalModel)
+	//group := common.GetContextKeyString(c, constant.ContextKeyUsingGroup)
+	//originalModel := common.GetContextKeyString(c, constant.ContextKeyOriginalModel)

 	var (
 		newAPIError *types.NewAPIError
@@ -104,7 +105,12 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {

 	request, err := helper.GetAndValidateRequest(c, relayFormat)
 	if err != nil {
-		newAPIError = types.NewError(err, types.ErrorCodeInvalidRequest)
+		// Map "request body too large" to 413 so clients can handle it correctly
+		if common.IsRequestBodyTooLargeError(err) || errors.Is(err, common.ErrRequestBodyTooLarge) {
+			newAPIError = types.NewErrorWithStatusCode(err, types.ErrorCodeReadRequestBodyFailed, http.StatusRequestEntityTooLarge, types.ErrOptionWithSkipRetry())
+		} else {
+			newAPIError = types.NewError(err, types.ErrorCodeInvalidRequest)
+		}
 		return
 	}

@@ -114,9 +120,17 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
 		return
 	}

-	meta := request.GetTokenCountMeta()
+	needSensitiveCheck := setting.ShouldCheckPromptSensitive()
+	needCountToken := constant.CountToken
+	// Avoid building huge CombineText (strings.Join) when token counting and sensitive check are both disabled.
+	var meta *types.TokenCountMeta
+	if needSensitiveCheck || needCountToken {
+		meta = request.GetTokenCountMeta()
+	} else {
+		meta = fastTokenCountMetaForPricing(request)
+	}

-	if setting.ShouldCheckPromptSensitive() {
+	if needSensitiveCheck && meta != nil {
 		contains, words := service.CheckSensitiveText(meta.CombineText)
 		if contains {
 			logger.LogWarn(c, fmt.Sprintf("user sensitive words detected: %s", strings.Join(words, ", ")))
@@ -125,13 +139,13 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
 		}
 	}

-	tokens, err := service.CountRequestToken(c, meta, relayInfo)
+	tokens, err := service.EstimateRequestToken(c, meta, relayInfo)
 	if err != nil {
 		newAPIError = types.NewError(err, types.ErrorCodeCountTokenFailed)
 		return
 	}

-	relayInfo.SetPromptTokens(tokens)
+	relayInfo.SetEstimatePromptTokens(tokens)

 	priceData, err := helper.ModelPriceHelper(c, relayInfo, tokens, meta)
 	if err != nil {
@@ -157,16 +171,32 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
 		}
 	}()

-	for i := 0; i <= common.RetryTimes; i++ {
-		channel, err := getChannel(c, group, originalModel, i)
-		if err != nil {
-			logger.LogError(c, err.Error())
-			newAPIError = err
+	retryParam := &service.RetryParam{
+		Ctx:        c,
+		TokenGroup: relayInfo.TokenGroup,
+		ModelName:  relayInfo.OriginModelName,
+		Retry:      common.GetPointer(0),
+	}
+
+	for ; retryParam.GetRetry() <= common.RetryTimes; retryParam.IncreaseRetry() {
+		channel, channelErr := getChannel(c, relayInfo, retryParam)
+		if channelErr != nil {
+			logger.LogError(c, channelErr.Error())
+			newAPIError = channelErr
 			break
 		}

 		addUsedChannel(c, channel.Id)
-		requestBody, _ := common.GetRequestBody(c)
+		requestBody, bodyErr := common.GetRequestBody(c)
+		if bodyErr != nil {
+			// Ensure consistent 413 for oversized bodies even when error occurs later (e.g., retry path)
+			if common.IsRequestBodyTooLargeError(bodyErr) || errors.Is(bodyErr, common.ErrRequestBodyTooLarge) {
+				newAPIError = types.NewErrorWithStatusCode(bodyErr, types.ErrorCodeReadRequestBodyFailed, http.StatusRequestEntityTooLarge, types.ErrOptionWithSkipRetry())
+			} else {
+				newAPIError = types.NewErrorWithStatusCode(bodyErr, types.ErrorCodeReadRequestBodyFailed, http.StatusBadRequest, types.ErrOptionWithSkipRetry())
+			}
+			break
+		}
 		c.Request.Body = io.NopCloser(bytes.NewBuffer(requestBody))

 		switch relayFormat {
@@ -186,7 +216,7 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {

 		processChannelError(c, *types.NewChannelError(channel.Id, channel.Type, channel.Name, channel.ChannelInfo.IsMultiKey, common.GetContextKeyString(c, constant.ContextKeyChannelKey), channel.GetAutoBan()), newAPIError)

-		if !shouldRetry(c, newAPIError, common.RetryTimes-i) {
+		if !shouldRetry(c, newAPIError, common.RetryTimes-retryParam.GetRetry()) {
 			break
 		}
 	}
@@ -211,8 +241,35 @@ func addUsedChannel(c *gin.Context, channelId int) {
 	c.Set("use_channel", useChannel)
 }

-func getChannel(c *gin.Context, group, originalModel string, retryCount int) (*model.Channel, *types.NewAPIError) {
-	if retryCount == 0 {
+func fastTokenCountMetaForPricing(request dto.Request) *types.TokenCountMeta {
+	if request == nil {
+		return &types.TokenCountMeta{}
+	}
+	meta := &types.TokenCountMeta{
+		TokenType: types.TokenTypeTokenizer,
+	}
+	switch r := request.(type) {
+	case *dto.GeneralOpenAIRequest:
+		if r.MaxCompletionTokens > r.MaxTokens {
+			meta.MaxTokens = int(r.MaxCompletionTokens)
+		} else {
+			meta.MaxTokens = int(r.MaxTokens)
+		}
+	case *dto.OpenAIResponsesRequest:
+		meta.MaxTokens = int(r.MaxOutputTokens)
+	case *dto.ClaudeRequest:
+		meta.MaxTokens = int(r.MaxTokens)
+	case *dto.ImageRequest:
+		// Pricing for image requests depends on ImagePriceRatio; safe to compute even when CountToken is disabled.
+		return r.GetTokenCountMeta()
+	default:
+		// Best-effort: leave CombineText empty to avoid large allocations.
+	}
+	return meta
+}
+
+func getChannel(c *gin.Context, info *relaycommon.RelayInfo, retryParam *service.RetryParam) (*model.Channel, *types.NewAPIError) {
+	if info.ChannelMeta == nil {
 		autoBan := c.GetBool("auto_ban")
 		autoBanInt := 1
 		if !autoBan {
@@ -225,14 +282,18 @@ func getChannel(c *gin.Context, group, originalModel string, retryCount int) (*m
 			AutoBan: &autoBanInt,
 		}, nil
 	}
-	channel, selectGroup, err := service.CacheGetRandomSatisfiedChannel(c, group, originalModel, retryCount)
+	channel, selectGroup, err := service.CacheGetRandomSatisfiedChannel(retryParam)
+
+	info.PriceData.GroupRatioInfo = helper.HandleGroupRatio(c, info)
+
 	if err != nil {
-		return nil, types.NewError(fmt.Errorf("获取分组 %s 下模型 %s 的可用渠道失败（retry）: %s", selectGroup, originalModel, err.Error()), types.ErrorCodeGetChannelFailed, types.ErrOptionWithSkipRetry())
+		return nil, types.NewError(fmt.Errorf("获取分组 %s 下模型 %s 的可用渠道失败（retry）: %s", selectGroup, info.OriginModelName, err.Error()), types.ErrorCodeGetChannelFailed, types.ErrOptionWithSkipRetry())
 	}
 	if channel == nil {
-		return nil, types.NewError(fmt.Errorf("分组 %s 下模型 %s 的可用渠道不存在（retry）", selectGroup, originalModel), types.ErrorCodeGetChannelFailed, types.ErrOptionWithSkipRetry())
+		return nil, types.NewError(fmt.Errorf("分组 %s 下模型 %s 的可用渠道不存在（retry）", selectGroup, info.OriginModelName), types.ErrorCodeGetChannelFailed, types.ErrOptionWithSkipRetry())
 	}
-	newAPIError := middleware.SetupContextForSelectedChannel(c, channel, originalModel)
+
+	newAPIError := middleware.SetupContextForSelectedChannel(c, channel, info.OriginModelName)
 	if newAPIError != nil {
 		return nil, newAPIError
 	}
@@ -285,7 +346,7 @@ func processChannelError(c *gin.Context, channelError types.ChannelError, err *t
 	logger.LogError(c, fmt.Sprintf("channel error (channel #%d, status code: %d): %s", channelError.ChannelId, err.StatusCode, err.Error()))
 	// 不要使用context获取渠道信息，异步处理时可能会出现渠道信息不一致的情况
 	// do not use context to get channel info, there may be inconsistent channel info when processing asynchronously
-	if service.ShouldDisableChannel(channelError.ChannelId, err) && channelError.AutoBan {
+	if service.ShouldDisableChannel(channelError.ChannelType, err) && channelError.AutoBan {
 		gopool.Go(func() {
 			service.DisableChannel(channelError, err.Error())
 		})
@@ -366,7 +427,7 @@ func RelayMidjourney(c *gin.Context) {
 }

 func RelayNotImplemented(c *gin.Context) {
-	err := dto.OpenAIError{
+	err := types.OpenAIError{
 		Message: "API not implemented",
 		Type:    "new_api_error",
 		Param:   "",
@@ -378,7 +439,7 @@ func RelayNotImplemented(c *gin.Context) {
 }

 func RelayNotFound(c *gin.Context) {
-	err := dto.OpenAIError{
+	err := types.OpenAIError{
 		Message: fmt.Sprintf("Invalid URL (%s %s)", c.Request.Method, c.Request.URL.Path),
 		Type:    "invalid_request_error",
 		Param:   "",
@@ -392,8 +453,6 @@ func RelayNotFound(c *gin.Context) {
 func RelayTask(c *gin.Context) {
 	retryTimes := common.RetryTimes
 	channelId := c.GetInt("channel_id")
-	group := c.GetString("group")
-	originalModel := c.GetString("original_model")
 	c.Set("use_channel", []string{fmt.Sprintf("%d", channelId)})
 	relayInfo, err := relaycommon.GenRelayInfo(c, types.RelayFormatTask, nil, nil)
 	if err != nil {
@@ -403,8 +462,14 @@ func RelayTask(c *gin.Context) {
 	if taskErr == nil {
 		retryTimes = 0
 	}
-	for i := 0; shouldRetryTaskRelay(c, channelId, taskErr, retryTimes) && i < retryTimes; i++ {
-		channel, newAPIError := getChannel(c, group, originalModel, i)
+	retryParam := &service.RetryParam{
+		Ctx:        c,
+		TokenGroup: relayInfo.TokenGroup,
+		ModelName:  relayInfo.OriginModelName,
+		Retry:      common.GetPointer(0),
+	}
+	for ; shouldRetryTaskRelay(c, channelId, taskErr, retryTimes) && retryParam.GetRetry() < retryTimes; retryParam.IncreaseRetry() {
+		channel, newAPIError := getChannel(c, relayInfo, retryParam)
 		if newAPIError != nil {
 			logger.LogError(c, fmt.Sprintf("CacheGetRandomSatisfiedChannel failed: %s", newAPIError.Error()))
 			taskErr = service.TaskErrorWrapperLocal(newAPIError.Err, "get_channel_failed", http.StatusInternalServerError)
@@ -414,10 +479,18 @@ func RelayTask(c *gin.Context) {
 		useChannel := c.GetStringSlice("use_channel")
 		useChannel = append(useChannel, fmt.Sprintf("%d", channelId))
 		c.Set("use_channel", useChannel)
-		logger.LogInfo(c, fmt.Sprintf("using channel #%d to retry (remain times %d)", channel.Id, i))
+		logger.LogInfo(c, fmt.Sprintf("using channel #%d to retry (remain times %d)", channel.Id, retryParam.GetRetry()))
 		//middleware.SetupContextForSelectedChannel(c, channel, originalModel)

-		requestBody, _ := common.GetRequestBody(c)
+		requestBody, err := common.GetRequestBody(c)
+		if err != nil {
+			if common.IsRequestBodyTooLargeError(err) || errors.Is(err, common.ErrRequestBodyTooLarge) {
+				taskErr = service.TaskErrorWrapperLocal(err, "read_request_body_failed", http.StatusRequestEntityTooLarge)
+			} else {
+				taskErr = service.TaskErrorWrapperLocal(err, "read_request_body_failed", http.StatusBadRequest)
+			}
+			break
+		}
 		c.Request.Body = io.NopCloser(bytes.NewBuffer(requestBody))
 		taskErr = taskRelayHandler(c, relayInfo)
 	}
@@ -29,7 +29,7 @@ func UpdateTaskBulk() {
 		time.Sleep(time.Duration(15) * time.Second)
 		common.SysLog("任务进度轮询开始")
 		ctx := context.TODO()
-		allTasks := model.GetAllUnFinishSyncTasks(500)
+		allTasks := model.GetAllUnFinishSyncTasks(constant.TaskQueryLimit)
 		platformTask := make(map[constant.TaskPlatform][]*model.Task)
 		for _, t := range allTasks {
 			platformTask[t.Platform] = append(platformTask[t.Platform], t)
@@ -88,7 +88,7 @@ func UpdateSunoTaskAll(ctx context.Context, taskChannelM map[int][]string, taskM
 	for channelId, taskIds := range taskChannelM {
 		err := updateSunoTaskAll(ctx, channelId, taskIds, taskM)
 		if err != nil {
-			logger.LogError(ctx, fmt.Sprintf("渠道 #%d 更新异步任务失败: %d", channelId, err.Error()))
+			logger.LogError(ctx, fmt.Sprintf("渠道 #%d 更新异步任务失败: %s", channelId, err.Error()))
 		}
 	}
 	return nil
@@ -116,9 +116,10 @@ func updateSunoTaskAll(ctx context.Context, channelId int, taskIds []string, tas
 	if adaptor == nil {
 		return errors.New("adaptor not found")
 	}
+	proxy := channel.GetSetting().Proxy
 	resp, err := adaptor.FetchTask(*channel.BaseURL, channel.Key, map[string]any{
 		"ids": taskIds,
-	})
+	}, proxy)
 	if err != nil {
 		common.SysLog(fmt.Sprintf("Get Task Do req error: %v", err))
 		return err
@@ -140,7 +141,7 @@ func updateSunoTaskAll(ctx context.Context, channelId int, taskIds []string, tas
 		return err
 	}
 	if !responseItems.IsSuccess() {
-		common.SysLog(fmt.Sprintf("渠道 #%d 未完成的任务有: %d, 成功获取到任务数: %d", channelId, len(taskIds), string(responseBody)))
+		common.SysLog(fmt.Sprintf("渠道 #%d 未完成的任务有: %d, 成功获取到任务数: %s", channelId, len(taskIds), string(responseBody)))
 		return err
 	}

@@ -67,6 +67,7 @@ func updateVideoSingleTask(ctx context.Context, adaptor channel.TaskAdaptor, cha
 	if channel.GetBaseURL() != "" {
 		baseURL = channel.GetBaseURL()
 	}
+	proxy := channel.GetSetting().Proxy

 	task := taskM[taskId]
 	if task == nil {
@@ -76,7 +77,7 @@ func updateVideoSingleTask(ctx context.Context, adaptor channel.TaskAdaptor, cha
 	resp, err := adaptor.FetchTask(baseURL, channel.Key, map[string]any{
 		"task_id": taskId,
 		"action":  task.Action,
-	})
+	}, proxy)
 	if err != nil {
 		return fmt.Errorf("fetchTask failed for task %s: %w", taskId, err)
 	}
@@ -142,7 +142,7 @@ func AddToken(c *gin.Context) {
 		common.ApiError(c, err)
 		return
 	}
-	if len(token.Name) > 30 {
+	if len(token.Name) > 50 {
 		c.JSON(http.StatusOK, gin.H{
 			"success": false,
 			"message": "令牌名称过长",
@@ -171,6 +171,7 @@ func AddToken(c *gin.Context) {
 		ModelLimits:        token.ModelLimits,
 		AllowIps:           token.AllowIps,
 		Group:              token.Group,
+		CrossGroupRetry:    token.CrossGroupRetry,
 	}
 	err = cleanToken.Insert()
 	if err != nil {
@@ -208,7 +209,7 @@ func UpdateToken(c *gin.Context) {
 		common.ApiError(c, err)
 		return
 	}
-	if len(token.Name) > 30 {
+	if len(token.Name) > 50 {
 		c.JSON(http.StatusOK, gin.H{
 			"success": false,
 			"message": "令牌名称过长",
@@ -248,6 +249,7 @@ func UpdateToken(c *gin.Context) {
 		cleanToken.ModelLimits = token.ModelLimits
 		cleanToken.AllowIps = token.AllowIps
 		cleanToken.Group = token.Group
+		cleanToken.CrossGroupRetry = token.CrossGroupRetry
 	}
 	err = cleanToken.Update()
 	if err != nil {
@@ -7,12 +7,12 @@ import (
 	"encoding/hex"
 	"encoding/json"
 	"fmt"
-	"io"
-	"log"
-	"net/http"
 	"github.com/QuantumNous/new-api/common"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/setting"
+	"io"
+	"log"
+	"net/http"
 	"time"

 	"github.com/gin-gonic/gin"
@@ -453,6 +453,7 @@ func GetSelf(c *gin.Context) {
 		"status":            user.Status,
 		"email":             user.Email,
 		"github_id":         user.GitHubId,
+		"discord_id":        user.DiscordId,
 		"oidc_id":           user.OidcId,
 		"wechat_id":         user.WeChatId,
 		"telegram_id":       user.TelegramId,
@@ -1,6 +1,7 @@
 package controller

 import (
+	"context"
 	"fmt"
 	"io"
 	"net/http"
@@ -10,6 +11,7 @@ import (
 	"github.com/QuantumNous/new-api/constant"
 	"github.com/QuantumNous/new-api/logger"
 	"github.com/QuantumNous/new-api/model"
+	"github.com/QuantumNous/new-api/service"

 	"github.com/gin-gonic/gin"
 )
@@ -75,11 +77,22 @@ func VideoProxy(c *gin.Context) {
 	}

 	var videoURL string
-	client := &http.Client{
-		Timeout: 60 * time.Second,
+	proxy := channel.GetSetting().Proxy
+	client, err := service.GetHttpClientWithProxy(proxy)
+	if err != nil {
+		logger.LogError(c.Request.Context(), fmt.Sprintf("Failed to create proxy client for task %s: %s", taskID, err.Error()))
+		c.JSON(http.StatusInternalServerError, gin.H{
+			"error": gin.H{
+				"message": "Failed to create proxy client",
+				"type":    "server_error",
+			},
+		})
+		return
 	}

-	req, err := http.NewRequestWithContext(c.Request.Context(), http.MethodGet, "", nil)
+	ctx, cancel := context.WithTimeout(c.Request.Context(), 60*time.Second)
+	defer cancel()
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, "", nil)
 	if err != nil {
 		logger.LogError(c.Request.Context(), fmt.Sprintf("Failed to create request: %s", err.Error()))
 		c.JSON(http.StatusInternalServerError, gin.H{
@@ -117,13 +130,12 @@ func VideoProxy(c *gin.Context) {
 			return
 		}
 		req.Header.Set("x-goog-api-key", apiKey)
-	case constant.ChannelTypeAli:
-		// Video URL is directly in task.FailReason
-		videoURL = task.FailReason
-	default:
-		// Default (Sora, etc.): Use original logic
+	case constant.ChannelTypeOpenAI, constant.ChannelTypeSora:
 		videoURL = fmt.Sprintf("%s/v1/videos/%s/content", baseURL, task.TaskID)
 		req.Header.Set("Authorization", "Bearer "+channel.Key)
+	default:
+		// Video URL is directly in task.FailReason
+		videoURL = task.FailReason
 	}

 	req.URL, err = url.Parse(videoURL)
@@ -35,10 +35,11 @@ func getGeminiVideoURL(channel *model.Channel, task *model.Task, apiKey string)
 		return "", fmt.Errorf("api key not available for task")
 	}

+	proxy := channel.GetSetting().Proxy
 	resp, err := adaptor.FetchTask(baseURL, apiKey, map[string]any{
 		"task_id": task.TaskID,
 		"action":  task.Action,
-	})
+	}, proxy)
 	if err != nil {
 		return "", fmt.Errorf("fetch task failed: %w", err)
 	}
@@ -1,53 +0,0 @@
-# API 鉴权文档
-
-## 认证方式
-
-### Access Token
-
-对于需要鉴权的 API 接口，必须同时提供以下两个请求头来进行 Access Token 认证：
-
-1. **请求头中的 `Authorization` 字段**
-
-    将 Access Token 放置于 HTTP 请求头部的 `Authorization` 字段中，格式如下：
-
-    ```
-    Authorization: <your_access_token>
-    ```
-
-    其中 `<your_access_token>` 需要替换为实际的 Access Token 值。
-
-2. **请求头中的 `New-Api-User` 字段**
-
-    将用户 ID 放置于 HTTP 请求头部的 `New-Api-User` 字段中，格式如下：
-
-    ```
-    New-Api-User: <your_user_id>
-    ```
-
-    其中 `<your_user_id>` 需要替换为实际的用户 ID。
-
-**注意：**
-
-*   **必须同时提供 `Authorization` 和 `New-Api-User` 两个请求头才能通过鉴权。**
-*   如果只提供其中一个请求头，或者两个请求头都未提供，则会返回 `401 Unauthorized` 错误。
-*   如果 `Authorization` 中的 Access Token 无效，则会返回 `401 Unauthorized` 错误，并提示“无权进行此操作，access token 无效”。
-*   如果 `New-Api-User` 中的用户 ID 与 Access Token 不匹配，则会返回 `401 Unauthorized` 错误，并提示“无权进行此操作，与登录用户不匹配，请重新登录”。
-*   如果没有提供 `New-Api-User` 请求头，则会返回 `401 Unauthorized` 错误，并提示“无权进行此操作，未提供 New-Api-User”。
-*   如果 `New-Api-User` 请求头格式错误，则会返回 `401 Unauthorized` 错误，并提示“无权进行此操作，New-Api-User 格式错误”。
-*   如果用户已被禁用，则会返回 `403 Forbidden` 错误，并提示“用户已被封禁”。
-*   如果用户权限不足，则会返回 `403 Forbidden` 错误，并提示“无权进行此操作，权限不足”。
-*   如果用户信息无效，则会返回 `403 Forbidden` 错误，并提示“无权进行此操作，用户信息无效”。
-
-## Curl 示例
-
-假设您的 Access Token 为 `access_token`，用户 ID 为 `123`，要访问的 API 接口为 `/api/user/self`，则可以使用以下 curl 命令：
-
-```bash
-curl -X GET \
-  -H "Authorization: access_token" \
-  -H "New-Api-User: 123" \
-  https://your-domain.com/api/user/self
-```
-
-请将 `access_token`、`123` 和 `https://your-domain.com` 替换为实际的值。
-
@@ -1,197 +0,0 @@
-# New API – Web 界面后端接口文档
-
-> 本文档汇总了 **New API** 后端提供给前端 Web 界面的全部 REST 接口（不含 *Relay* 相关接口）。
->
-> 接口前缀统一为 `https://<your-domain>`，以下仅列出 **路径**、**HTTP 方法**、**鉴权要求** 与 **功能简介**。
->
-> 鉴权级别说明：
-> * **公开** – 不需要登录即可调用
-> * **用户** – 需携带用户 Token（`middleware.UserAuth`）
-> * **管理员** – 需管理员 Token（`middleware.AdminAuth`）
-> * **Root** – 仅限最高权限 Root 用户（`middleware.RootAuth`）
-
---
-
-## 1. 初始化 / 系统状态
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET  | /api/setup | 公开 | 获取系统初始化状态 |
-| POST | /api/setup | 公开 | 完成首次安装向导 |
-| GET  | /api/status | 公开 | 获取运行状态摘要 |
-| GET  | /api/uptime/status | 公开 | Uptime-Kuma 兼容状态探针 |
-| GET  | /api/status/test | 管理员 | 测试后端与依赖组件是否正常 |
-
-## 2. 公共信息
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/models | 用户 | 获取前端可用模型列表 |
-| GET | /api/notice | 公开 | 获取公告栏内容 |
-| GET | /api/about | 公开 | 关于页面信息 |
-| GET | /api/home_page_content | 公开 | 首页自定义内容 |
-| GET | /api/pricing | 可匿名/用户 | 价格与套餐信息 |
-| GET | /api/ratio_config | 公开 | 模型倍率配置（仅公开字段） |
-
-## 3. 邮件 / 身份验证
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/verification | 公开 (限流) | 发送邮箱验证邮件 |
-| GET | /api/reset_password | 公开 (限流) | 发送重置密码邮件 |
-| POST | /api/user/reset | 公开 | 提交重置密码请求 |
-
-## 4. OAuth / 第三方登录
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/oauth/github | 公开 | GitHub OAuth 跳转 |
-| GET | /api/oauth/oidc | 公开 | OIDC 通用 OAuth 跳转 |
-| GET | /api/oauth/linuxdo | 公开 | LinuxDo OAuth 跳转 |
-| GET | /api/oauth/wechat | 公开 | 微信扫码登录跳转 |
-| GET | /api/oauth/wechat/bind | 公开 | 微信账户绑定 |
-| GET | /api/oauth/email/bind | 公开 | 邮箱绑定 |
-| GET | /api/oauth/telegram/login | 公开 | Telegram 登录 |
-| GET | /api/oauth/telegram/bind | 公开 | Telegram 账户绑定 |
-| GET | /api/oauth/state | 公开 | 获取随机 state（防 CSRF） |
-
-## 5. 用户模块
-### 5.1 账号注册/登录
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| POST | /api/user/register | 公开 | 注册新账号 |
-| POST | /api/user/login | 公开 | 用户登录 |
-| GET  | /api/user/logout | 用户 | 退出登录 |
-| GET  | /api/user/epay/notify | 公开 | Epay 支付回调 |
-| GET  | /api/user/groups | 公开 | 列出所有分组（无鉴权版） |
-
-### 5.2 用户自身操作 (需登录)
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/user/self/groups | 用户 | 获取自己所在分组 |
-| GET | /api/user/self | 用户 | 获取个人资料 |
-| GET | /api/user/models | 用户 | 获取模型可见性 |
-| PUT | /api/user/self | 用户 | 修改个人资料 |
-| DELETE | /api/user/self | 用户 | 注销账号 |
-| GET | /api/user/token | 用户 | 生成用户级别 Access Token |
-| GET | /api/user/aff | 用户 | 获取推广码信息 |
-| POST | /api/user/topup | 用户 | 余额直充 |
-| POST | /api/user/pay | 用户 | 提交支付订单 |
-| POST | /api/user/amount | 用户 | 余额支付 |
-| POST | /api/user/aff_transfer | 用户 | 推广额度转账 |
-| PUT | /api/user/setting | 用户 | 更新用户设置 |
-
-### 5.3 管理员用户管理
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/user/ | 管理员 | 获取全部用户列表 |
-| GET | /api/user/search | 管理员 | 搜索用户 |
-| GET | /api/user/:id | 管理员 | 获取单个用户信息 |
-| POST | /api/user/ | 管理员 | 创建用户 |
-| POST | /api/user/manage | 管理员 | 冻结/重置等管理操作 |
-| PUT | /api/user/ | 管理员 | 更新用户 |
-| DELETE | /api/user/:id | 管理员 | 删除用户 |
-
-## 6. 站点选项 (Root)
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/option/ | Root | 获取全局配置 |
-| PUT | /api/option/ | Root | 更新全局配置 |
-| POST | /api/option/rest_model_ratio | Root | 重置模型倍率 |
-| POST | /api/option/migrate_console_setting | Root | 迁移旧版控制台配置 |
-
-## 7. 模型倍率同步 (Root)
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/ratio_sync/channels | Root | 获取可同步渠道列表 |
-| POST | /api/ratio_sync/fetch | Root | 从上游拉取倍率 |
-
-## 8. 渠道管理 (管理员)
-| 方法 | 路径 | 说明 |
-|------|------|------|
-| GET | /api/channel/ | 获取渠道列表 |
-| GET | /api/channel/search | 搜索渠道 |
-| GET | /api/channel/models | 查询渠道模型能力 |
-| GET | /api/channel/models_enabled | 查询启用模型能力 |
-| GET | /api/channel/:id | 获取单个渠道 |
-| GET | /api/channel/test | 批量测试渠道连通性 |
-| GET | /api/channel/test/:id | 单个渠道测试 |
-| GET | /api/channel/update_balance | 批量刷新余额 |
-| GET | /api/channel/update_balance/:id | 单个刷新余额 |
-| POST | /api/channel/ | 新增渠道 |
-| PUT | /api/channel/ | 更新渠道 |
-| DELETE | /api/channel/disabled | 删除已禁用渠道 |
-| POST | /api/channel/tag/disabled | 批量禁用标签渠道 |
-| POST | /api/channel/tag/enabled | 批量启用标签渠道 |
-| PUT | /api/channel/tag | 编辑渠道标签 |
-| DELETE | /api/channel/:id | 删除渠道 |
-| POST | /api/channel/batch | 批量删除渠道 |
-| POST | /api/channel/fix | 修复渠道能力表 |
-| GET | /api/channel/fetch_models/:id | 拉取单渠道模型 |
-| POST | /api/channel/fetch_models | 拉取全部渠道模型 |
-| POST | /api/channel/batch/tag | 批量设置渠道标签 |
-| GET | /api/channel/tag/models | 根据标签获取模型 |
-| POST | /api/channel/copy/:id | 复制渠道 |
-
-## 9. Token 管理
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/token/ | 用户 | 获取全部 Token |
-| GET | /api/token/search | 用户 | 搜索 Token |
-| GET | /api/token/:id | 用户 | 获取单个 Token |
-| POST | /api/token/ | 用户 | 创建 Token |
-| PUT | /api/token/ | 用户 | 更新 Token |
-| DELETE | /api/token/:id | 用户 | 删除 Token |
-| POST | /api/token/batch | 用户 | 批量删除 Token |
-
-## 10. 兑换码管理 (管理员)
-| 方法 | 路径 | 说明 |
-|------|------|------|
-| GET | /api/redemption/ | 获取兑换码列表 |
-| GET | /api/redemption/search | 搜索兑换码 |
-| GET | /api/redemption/:id | 获取单个兑换码 |
-| POST | /api/redemption/ | 创建兑换码 |
-| PUT | /api/redemption/ | 更新兑换码 |
-| DELETE | /api/redemption/invalid | 删除无效兑换码 |
-| DELETE | /api/redemption/:id | 删除兑换码 |
-
-## 11. 日志
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/log/ | 管理员 | 获取全部日志 |
-| DELETE | /api/log/ | 管理员 | 删除历史日志 |
-| GET | /api/log/stat | 管理员 | 日志统计 |
-| GET | /api/log/self/stat | 用户 | 我的日志统计 |
-| GET | /api/log/search | 管理员 | 搜索全部日志 |
-| GET | /api/log/self | 用户 | 获取我的日志 |
-| GET | /api/log/self/search | 用户 | 搜索我的日志 |
-| GET | /api/log/token | 公开 | 根据 Token 查询日志（支持 CORS） |
-
-## 12. 数据统计
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/data/ | 管理员 | 全站用量按日期统计 |
-| GET | /api/data/self | 用户 | 我的用量按日期统计 |
-
-## 13. 分组
-| GET | /api/group/ | 管理员 | 获取全部分组列表 |
-
-## 14. Midjourney 任务
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/mj/self | 用户 | 获取自己的 MJ 任务 |
-| GET | /api/mj/ | 管理员 | 获取全部 MJ 任务 |
-
-## 15. 任务中心
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/task/self | 用户 | 获取我的任务 |
-| GET | /api/task/ | 管理员 | 获取全部任务 |
-
-## 16. 账户计费面板 (Dashboard)
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /dashboard/billing/subscription | 用户 Token | 获取订阅额度信息 |
-| GET | /v1/dashboard/billing/subscription | 同上 | 兼容 OpenAI SDK 路径 |
-| GET | /dashboard/billing/usage | 用户 Token | 获取使用量信息 |
-| GET | /v1/dashboard/billing/usage | 同上 | 兼容 OpenAI SDK 路径 |
-
---
-
-> **更新日期**：2025.07.17
@@ -1,82 +0,0 @@
-# Midjourney Proxy API文档
-
-**简介**:Midjourney Proxy API文档
-
-## 接口列表
-支持的接口如下：
-+ [x] /mj/submit/imagine
-+ [x] /mj/submit/change
-+ [x] /mj/submit/blend
-+ [x] /mj/submit/describe
-+ [x] /mj/image/{id} （通过此接口获取图片，**请必须在系统设置中填写服务器地址！！**）
-+ [x] /mj/task/{id}/fetch （此接口返回的图片地址为经过One API转发的地址）
-+ [x] /task/list-by-condition
-+ [x] /mj/submit/action （仅midjourney-proxy-plus支持，下同）
-+ [x] /mj/submit/modal
-+ [x] /mj/submit/shorten
-+ [x] /mj/task/{id}/image-seed
-+ [x] /mj/insight-face/swap （InsightFace）
-
-## 模型列表
-
-### midjourney-proxy支持
-
- mj_imagine (绘图)
- mj_variation (变换)
- mj_reroll (重绘)
- mj_blend (混合)
- mj_upscale (放大)
- mj_describe (图生文)
-
-### 仅midjourney-proxy-plus支持
-
- mj_zoom (比例变焦)
- mj_shorten (提示词缩短)
- mj_modal (窗口提交，局部重绘和自定义比例变焦必须和mj_modal一同添加)
- mj_inpaint (局部重绘提交，必须和mj_modal一同添加)
- mj_custom_zoom (自定义比例变焦，必须和mj_modal一同添加)
- mj_high_variation (强变换)
- mj_low_variation (弱变换)
- mj_pan (平移)
- swap_face (换脸)
-
-## 模型价格设置（在设置-运营设置-模型固定价格设置中设置）
-```json
-{
-  "mj_imagine": 0.1,
-  "mj_variation": 0.1,
-  "mj_reroll": 0.1,
-  "mj_blend": 0.1,
-  "mj_modal": 0.1,
-  "mj_zoom": 0.1,
-  "mj_shorten": 0.1,
-  "mj_high_variation": 0.1,
-  "mj_low_variation": 0.1,
-  "mj_pan": 0.1,
-  "mj_inpaint": 0,
-  "mj_custom_zoom": 0,
-  "mj_describe": 0.05,
-  "mj_upscale": 0.05,
-  "swap_face": 0.05
-}
-```
-其中mj_inpaint和mj_custom_zoom的价格设置为0，是因为这两个模型需要搭配mj_modal使用，所以价格由mj_modal决定。
-
-## 渠道设置
-
-### 对接 midjourney-proxy(plus)
-
-1.
-
-部署Midjourney-Proxy，并配置好midjourney账号等（强烈建议设置密钥），[项目地址](https://github.com/novicezk/midjourney-proxy)
-
-2. 在渠道管理中添加渠道，渠道类型选择**Midjourney Proxy**，如果是plus版本选择**Midjourney Proxy Plus**
-   ，模型请参考上方模型列表
-3. **代理**填写midjourney-proxy部署的地址，例如：http://localhost:8080
-4. 密钥填写midjourney-proxy的密钥，如果没有设置密钥，可以随便填
-
-### 对接上游new api
-
-1. 在渠道管理中添加渠道，渠道类型选择**Midjourney Proxy Plus**，模型请参考上方模型列表
-2. **代理**填写上游new api的地址，例如：http://localhost:3000
-3. 密钥填写上游new api的密钥
@@ -1,62 +0,0 @@
-# Rerank API文档
-
-**简介**:Rerank API文档
-
-## 接入Dify
-模型供应商选择Jina，按要求填写模型信息即可接入Dify。
-
-## 请求方式
-
-Post: /v1/rerank
-
-Request:
-
-```json
-{
-  "model": "jina-reranker-v2-base-multilingual",
-  "query": "What is the capital of the United States?",
-  "top_n": 3,
-  "documents": [
-    "Carson City is the capital city of the American state of Nevada.",
-    "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.",
-    "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.",
-    "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.",
-    "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states."
-  ]
-}
-```
-
-Response:
-
-```json
-{
-  "results": [
-    {
-      "document": {
-        "text": "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district."
-      },
-      "index": 2,
-      "relevance_score": 0.9999702
-    },
-    {
-      "document": {
-        "text": "Carson City is the capital city of the American state of Nevada."
-      },
-      "index": 0,
-      "relevance_score": 0.67800725
-    },
-    {
-      "document": {
-        "text": "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages."
-      },
-      "index": 3,
-      "relevance_score": 0.02800752
-    }
-  ],
-  "usage": {
-    "prompt_tokens": 158,
-    "completion_tokens": 0,
-    "total_tokens": 158
-  }
-}
-```
@@ -1,44 +0,0 @@
-# Suno API文档
-
-**简介**:Suno API文档
-
-## 接口列表
-支持的接口如下：
-+ [x] /suno/submit/music
-+ [x] /suno/submit/lyrics
-+ [x] /suno/fetch
-+ [x] /suno/fetch/:id
-
-## 模型列表
-
-### Suno API支持
-
- suno_music (自定义模式、灵感模式、续写)
- suno_lyrics (生成歌词)
-
-
-## 模型价格设置（在设置-运营设置-模型固定价格设置中设置）
-```json
-{
-  "suno_music": 0.3,
-  "suno_lyrics": 0.01
-}
-```
-
-## 渠道设置
-
-### 对接 Suno API
-
-1.
-部署 Suno API，并配置好suno账号等（强烈建议设置密钥），[项目地址](https://github.com/Suno-API/Suno-API)
-
-2. 在渠道管理中添加渠道，渠道类型选择**Suno API**
-   ，模型请参考上方模型列表
-3. **代理**填写 Suno API 部署的地址，例如：http://localhost:8080
-4. 密钥填写 Suno API 的密钥，如果没有设置密钥，可以随便填
-
-### 对接上游new api
-
-1. 在渠道管理中添加渠道，渠道类型选择**Suno API**，或任意类型，只需模型包含上方模型列表的模型
-2. **代理**填写上游new api的地址，例如：http://localhost:3000
-3. 密钥填写上游new api的密钥
@@ -2,6 +2,7 @@ package dto

 import (
 	"encoding/json"
+	"strings"

 	"github.com/QuantumNous/new-api/types"

@@ -24,11 +25,14 @@ func (r *AudioRequest) GetTokenCountMeta() *types.TokenCountMeta {
 		CombineText: r.Input,
 		TokenType:   types.TokenTypeTextNumber,
 	}
+	if strings.Contains(r.Model, "gpt") {
+		meta.TokenType = types.TokenTypeTokenizer
+	}
 	return meta
 }

 func (r *AudioRequest) IsStream(c *gin.Context) bool {
-	return false
+	return r.StreamFormat == "sse"
 }

 func (r *AudioRequest) SetModelName(modelName string) {
@@ -203,6 +203,9 @@ type ClaudeRequest struct {
 	Stream            bool            `json:"stream,omitempty"`
 	Tools             any             `json:"tools,omitempty"`
 	ContextManagement json.RawMessage `json:"context_management,omitempty"`
+	OutputConfig      json.RawMessage `json:"output_config,omitempty"`
+	OutputFormat      json.RawMessage `json:"output_format,omitempty"`
+	Container         json.RawMessage `json:"container,omitempty"`
 	ToolChoice        any             `json:"tool_choice,omitempty"`
 	Thinking          *Thinking       `json:"thinking,omitempty"`
 	McpServers        json.RawMessage `json:"mcp_servers,omitempty"`
@@ -1,26 +1,31 @@
 package dto

-import "github.com/QuantumNous/new-api/types"
+import (
+	"encoding/json"

-type OpenAIError struct {
-	Message string `json:"message"`
-	Type    string `json:"type"`
-	Param   string `json:"param"`
-	Code    any    `json:"code"`
-}
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/types"
+)
+
+//type OpenAIError struct {
+//	Message string `json:"message"`
+//	Type    string `json:"type"`
+//	Param   string `json:"param"`
+//	Code    any    `json:"code"`
+//}

 type OpenAIErrorWithStatusCode struct {
-	Error      OpenAIError `json:"error"`
-	StatusCode int         `json:"status_code"`
+	Error      types.OpenAIError `json:"error"`
+	StatusCode int               `json:"status_code"`
 	LocalError bool
 }

 type GeneralErrorResponse struct {
-	Error    types.OpenAIError `json:"error"`
-	Message  string            `json:"message"`
-	Msg      string            `json:"msg"`
-	Err      string            `json:"err"`
-	ErrorMsg string            `json:"error_msg"`
+	Error    json.RawMessage `json:"error"`
+	Message  string          `json:"message"`
+	Msg      string          `json:"msg"`
+	Err      string          `json:"err"`
+	ErrorMsg string          `json:"error_msg"`
 	Header   struct {
 		Message string `json:"message"`
 	} `json:"header"`
@@ -31,9 +36,35 @@ type GeneralErrorResponse struct {
 	} `json:"response"`
 }

+func (e GeneralErrorResponse) TryToOpenAIError() *types.OpenAIError {
+	var openAIError types.OpenAIError
+	if len(e.Error) > 0 {
+		err := common.Unmarshal(e.Error, &openAIError)
+		if err == nil && openAIError.Message != "" {
+			return &openAIError
+		}
+	}
+	return nil
+}
+
 func (e GeneralErrorResponse) ToMessage() string {
-	if e.Error.Message != "" {
-		return e.Error.Message
+	if len(e.Error) > 0 {
+		switch common.GetJsonType(e.Error) {
+		case "object":
+			var openAIError types.OpenAIError
+			err := common.Unmarshal(e.Error, &openAIError)
+			if err == nil && openAIError.Message != "" {
+				return openAIError.Message
+			}
+		case "string":
+			var msg string
+			err := common.Unmarshal(e.Error, &msg)
+			if err == nil && msg != "" {
+				return msg
+			}
+		default:
+			return string(e.Error)
+		}
 	}
 	if e.Message != "" {
 		return e.Message
@@ -142,7 +142,38 @@ type GeminiThinkingConfig struct {
 	IncludeThoughts bool `json:"includeThoughts,omitempty"`
 	ThinkingBudget  *int `json:"thinkingBudget,omitempty"`
 	// TODO Conflict with thinkingbudget.
-	// ThinkingLevel   json.RawMessage `json:"thinkingLevel,omitempty"`
+	ThinkingLevel string `json:"thinkingLevel,omitempty"`
+}
+
+// UnmarshalJSON allows GeminiThinkingConfig to accept both snake_case and camelCase fields.
+func (c *GeminiThinkingConfig) UnmarshalJSON(data []byte) error {
+	type Alias GeminiThinkingConfig
+	var aux struct {
+		Alias
+		IncludeThoughtsSnake *bool  `json:"include_thoughts,omitempty"`
+		ThinkingBudgetSnake  *int   `json:"thinking_budget,omitempty"`
+		ThinkingLevelSnake   string `json:"thinking_level,omitempty"`
+	}
+
+	if err := common.Unmarshal(data, &aux); err != nil {
+		return err
+	}
+
+	*c = GeminiThinkingConfig(aux.Alias)
+
+	if aux.IncludeThoughtsSnake != nil {
+		c.IncludeThoughts = *aux.IncludeThoughtsSnake
+	}
+
+	if aux.ThinkingBudgetSnake != nil {
+		c.ThinkingBudget = aux.ThinkingBudgetSnake
+	}
+
+	if aux.ThinkingLevelSnake != "" {
+		c.ThinkingLevel = aux.ThinkingLevelSnake
+	}
+
+	return nil
 }

 func (c *GeminiThinkingConfig) SetThinkingBudget(budget int) {
@@ -27,8 +27,11 @@ type ImageRequest struct {
 	OutputCompression json.RawMessage `json:"output_compression,omitempty"`
 	PartialImages     json.RawMessage `json:"partial_images,omitempty"`
 	// Stream            bool            `json:"stream,omitempty"`
-	Watermark *bool           `json:"watermark,omitempty"`
-	Image     json.RawMessage `json:"image,omitempty"`
+	Watermark *bool `json:"watermark,omitempty"`
+	// zhipu 4v
+	WatermarkEnabled json.RawMessage `json:"watermark_enabled,omitempty"`
+	UserId           json.RawMessage `json:"user_id,omitempty"`
+	Image            json.RawMessage `json:"image,omitempty"`
 	// 用匿名参数接收额外参数
 	Extra map[string]json.RawMessage `json:"-"`
 }
@@ -83,6 +83,7 @@ type GeneralOpenAIRequest struct {
 	// Ali Qwen Params
 	VlHighResolutionImages json.RawMessage `json:"vl_high_resolution_images,omitempty"`
 	EnableThinking         any             `json:"enable_thinking,omitempty"`
+	ChatTemplateKwargs     json.RawMessage `json:"chat_template_kwargs,omitempty"`
 	// ollama Params
 	Think json.RawMessage `json:"think,omitempty"`
 	// baidu v2
@@ -897,6 +898,12 @@ type Reasoning struct {
 	Summary string `json:"summary,omitempty"`
 }

+type Input struct {
+	Type    string          `json:"type,omitempty"`
+	Role    string          `json:"role,omitempty"`
+	Content json.RawMessage `json:"content,omitempty"`
+}
+
 type MediaInput struct {
 	Type     string `json:"type"`
 	Text     string `json:"text,omitempty"`
@@ -915,7 +922,7 @@ func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
 		return nil
 	}

-	var inputs []MediaInput
+	var mediaInputs []MediaInput

 	// Try string first
 	// if str, ok := common.GetJsonType(r.Input); ok {
@@ -925,60 +932,74 @@ func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
 	if common.GetJsonType(r.Input) == "string" {
 		var str string
 		_ = common.Unmarshal(r.Input, &str)
-		inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
-		return inputs
+		mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
+		return mediaInputs
 	}

 	// Try array of parts
 	if common.GetJsonType(r.Input) == "array" {
-		var array []any
-		_ = common.Unmarshal(r.Input, &array)
-		for _, itemAny := range array {
-			// Already parsed MediaInput
-			if media, ok := itemAny.(MediaInput); ok {
-				inputs = append(inputs, media)
-				continue
+		var inputs []Input
+		_ = common.Unmarshal(r.Input, &inputs)
+		for _, input := range inputs {
+			if common.GetJsonType(input.Content) == "string" {
+				var str string
+				_ = common.Unmarshal(input.Content, &str)
+				mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
 			}
-			// Generic map
-			item, ok := itemAny.(map[string]any)
-			if !ok {
-				continue
-			}
-			typeVal, ok := item["type"].(string)
-			if !ok {
-				continue
-			}
-			switch typeVal {
-			case "input_text":
-				text, _ := item["text"].(string)
-				inputs = append(inputs, MediaInput{Type: "input_text", Text: text})
-			case "input_image":
-				// image_url may be string or object with url field
-				var imageUrl string
-				switch v := item["image_url"].(type) {
-				case string:
-					imageUrl = v
-				case map[string]any:
-					if url, ok := v["url"].(string); ok {
-						imageUrl = url
+
+			if common.GetJsonType(input.Content) == "array" {
+				var array []any
+				_ = common.Unmarshal(input.Content, &array)
+				for _, itemAny := range array {
+					// Already parsed MediaContent
+					if media, ok := itemAny.(MediaInput); ok {
+						mediaInputs = append(mediaInputs, media)
+						continue
+					}
+
+					// Generic map
+					item, ok := itemAny.(map[string]any)
+					if !ok {
+						continue
+					}
+
+					typeVal, ok := item["type"].(string)
+					if !ok {
+						continue
+					}
+					switch typeVal {
+					case "input_text":
+						text, _ := item["text"].(string)
+						mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: text})
+					case "input_image":
+						// image_url may be string or object with url field
+						var imageUrl string
+						switch v := item["image_url"].(type) {
+						case string:
+							imageUrl = v
+						case map[string]any:
+							if url, ok := v["url"].(string); ok {
+								imageUrl = url
+							}
+						}
+						mediaInputs = append(mediaInputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
+					case "input_file":
+						// file_url may be string or object with url field
+						var fileUrl string
+						switch v := item["file_url"].(type) {
+						case string:
+							fileUrl = v
+						case map[string]any:
+							if url, ok := v["url"].(string); ok {
+								fileUrl = url
+							}
+						}
+						mediaInputs = append(mediaInputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
 					}
 				}
-				inputs = append(inputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
-			case "input_file":
-				// file_url may be string or object with url field
-				var fileUrl string
-				switch v := item["file_url"].(type) {
-				case string:
-					fileUrl = v
-				case map[string]any:
-					if url, ok := v["url"].(string); ok {
-						fileUrl = url
-					}
-				}
-				inputs = append(inputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
 			}
 		}
 	}

-	return inputs
+	return mediaInputs
 }
@@ -33,7 +33,7 @@ require (
 	github.com/mewkiz/flac v1.0.13
 	github.com/pkg/errors v0.9.1
 	github.com/pquerna/otp v1.5.0
-	github.com/samber/lo v1.39.0
+	github.com/samber/lo v1.52.0
 	github.com/shirou/gopsutil v3.21.11+incompatible
 	github.com/shopspring/decimal v1.4.0
 	github.com/stripe/stripe-go/v81 v81.4.0
@@ -99,6 +99,7 @@ require (
 	github.com/mitchellh/mapstructure v1.5.0 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/ncruces/go-strftime v0.1.9 // indirect
 	github.com/pelletier/go-toml/v2 v2.2.1 // indirect
 	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
 	github.com/tidwall/match v1.1.1 // indirect
@@ -110,13 +111,13 @@ require (
 	github.com/x448/float16 v0.8.4 // indirect
 	github.com/yusufpapurcu/wmi v1.2.3 // indirect
 	golang.org/x/arch v0.21.0 // indirect
-	golang.org/x/exp v0.0.0-20240404231335-c0f41cb1a7a0 // indirect
+	golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect
 	golang.org/x/sys v0.38.0 // indirect
 	golang.org/x/text v0.31.0 // indirect
 	google.golang.org/protobuf v1.34.2 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
-	modernc.org/libc v1.22.5 // indirect
-	modernc.org/mathutil v1.5.0 // indirect
-	modernc.org/memory v1.5.0 // indirect
-	modernc.org/sqlite v1.23.1 // indirect
+	modernc.org/libc v1.66.10 // indirect
+	modernc.org/mathutil v1.7.1 // indirect
+	modernc.org/memory v1.11.0 // indirect
+	modernc.org/sqlite v1.40.1 // indirect
 )
@@ -120,6 +120,7 @@ github.com/google/go-tpm v0.9.5/go.mod h1:h9jEsEECg7gtLis0upRBQU+GhYVH6jMjrFxI8u
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ=
 github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
 github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
@@ -193,6 +194,8 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJ
 github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
 github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
+github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
 github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
 github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
 github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
@@ -219,6 +222,8 @@ github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUA
 github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE=
 github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA=
 github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA=
+github.com/samber/lo v1.52.0 h1:Rvi+3BFHES3A8meP33VPAxiBZX/Aws5RxrschYGjomw=
+github.com/samber/lo v1.52.0/go.mod h1:4+MXEGsJzbKGaUEQFKBq2xtfuznW9oz/WrgyzMzRoM0=
 github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI=
 github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
 github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
@@ -285,6 +290,8 @@ golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
 golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
 golang.org/x/exp v0.0.0-20240404231335-c0f41cb1a7a0 h1:985EYyeCOxTpcgOTJpflJUwOeEz0CQOdPt73OzpE9F8=
 golang.org/x/exp v0.0.0-20240404231335-c0f41cb1a7a0/go.mod h1:/lliqkxwWAhPjf5oSOIJup2XcqJaw8RGS6k3TGEc7GI=
+golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
+golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
 golang.org/x/image v0.23.0 h1:HseQ7c2OpPKTPVzNjG5fwJsOTCiiwS4QdsYi5XU6H68=
 golang.org/x/image v0.23.0/go.mod h1:wJJBTdLfCCf3tiHa1fNxpZmUI4mmoZvwMCPP0ddoNKY=
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
@@ -345,9 +352,17 @@ gorm.io/gorm v1.25.2 h1:gs1o6Vsa+oVKG/a9ElL3XgyGfghFfkKA2SInQaCyMho=
 gorm.io/gorm v1.25.2/go.mod h1:L4uxeKpfBml98NYqVqwAdmV1a2nBtAec/cf3fpucW/k=
 modernc.org/libc v1.22.5 h1:91BNch/e5B0uPbJFgqbxXuOnxBQjlS//icfQEGmvyjE=
 modernc.org/libc v1.22.5/go.mod h1:jj+Z7dTNX8fBScMVNRAYZ/jF91K8fdT2hYMThc3YjBY=
+modernc.org/libc v1.66.10 h1:yZkb3YeLx4oynyR+iUsXsybsX4Ubx7MQlSYEw4yj59A=
+modernc.org/libc v1.66.10/go.mod h1:8vGSEwvoUoltr4dlywvHqjtAqHBaw0j1jI7iFBTAr2I=
 modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ=
 modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E=
+modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
+modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
 modernc.org/memory v1.5.0 h1:N+/8c5rE6EqugZwHii4IFsaJ7MUhoWX07J5tC/iI5Ds=
 modernc.org/memory v1.5.0/go.mod h1:PkUhL0Mugw21sHPeskwZW4D6VscE/GQJOnIpCnW6pSU=
+modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
+modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
 modernc.org/sqlite v1.23.1 h1:nrSBg4aRQQwq59JpvGEQ15tNxoO5pX/kUjcRNwSAGQM=
 modernc.org/sqlite v1.23.1/go.mod h1:OrDj17Mggn6MhE+iPbBNf7RGKODDE9NFT0f3EwDzJqk=
+modernc.org/sqlite v1.40.1 h1:VfuXcxcUWWKRBuP8+BR9L7VnmusMgBNNnBYGEe9w/iY=
+modernc.org/sqlite v1.40.1/go.mod h1:9fjQZ0mB1LLP0GYrp39oOJXx/I2sxEnZtzCmEQIKvGE=
@@ -2,12 +2,14 @@ package middleware

 import (
 	"fmt"
+	"net"
 	"net/http"
 	"strconv"
 	"strings"

 	"github.com/QuantumNous/new-api/common"
 	"github.com/QuantumNous/new-api/constant"
+	"github.com/QuantumNous/new-api/logger"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/service"
 	"github.com/QuantumNous/new-api/setting/ratio_setting"
@@ -240,13 +242,20 @@ func TokenAuth() func(c *gin.Context) {
 			return
 		}

-		allowIpsMap := token.GetIpLimitsMap()
-		if len(allowIpsMap) != 0 {
+		allowIps := token.GetIpLimits()
+		if len(allowIps) > 0 {
 			clientIp := c.ClientIP()
-			if _, ok := allowIpsMap[clientIp]; !ok {
+			logger.LogDebug(c, "Token has IP restrictions, checking client IP %s", clientIp)
+			ip := net.ParseIP(clientIp)
+			if ip == nil {
+				abortWithOpenAiMessage(c, http.StatusForbidden, "无法解析客户端 IP 地址")
+				return
+			}
+			if common.IsIpInCIDRList(ip, allowIps) == false {
 				abortWithOpenAiMessage(c, http.StatusForbidden, "您的 IP 不在令牌允许访问的列表中")
 				return
 			}
+			logger.LogDebug(c, "Client IP %s passed the token IP restrictions check", clientIp)
 		}

 		userCache, err := model.GetUserCache(token.UserId)
@@ -307,7 +316,8 @@ func SetupContextForToken(c *gin.Context, token *model.Token, parts ...string) e
 	} else {
 		c.Set("token_model_limit_enabled", false)
 	}
-	c.Set("token_group", token.Group)
+	common.SetContextKey(c, constant.ContextKeyTokenGroup, token.Group)
+	common.SetContextKey(c, constant.ContextKeyTokenCrossGroupRetry, token.CrossGroupRetry)
 	if len(parts) > 1 {
 		if model.IsAdmin(token.UserId) {
 			c.Set("specific_channel_id", parts[1])
@@ -97,7 +97,12 @@ func Distribute() func(c *gin.Context) {
 						common.SetContextKey(c, constant.ContextKeyUsingGroup, usingGroup)
 					}
 				}
-				channel, selectGroup, err = service.CacheGetRandomSatisfiedChannel(c, usingGroup, modelRequest.Model, 0)
+				channel, selectGroup, err = service.CacheGetRandomSatisfiedChannel(&service.RetryParam{
+					Ctx:        c,
+					ModelName:  modelRequest.Model,
+					TokenGroup: usingGroup,
+					Retry:      common.GetPointer(0),
+				})
 				if err != nil {
 					showGroup := usingGroup
 					if usingGroup == "auto" {
@@ -157,7 +162,7 @@ func getModelRequest(c *gin.Context) (*ModelRequest, bool, error) {
 			}
 			midjourneyModel, mjErr, success := service.GetMjRequestModel(relayMode, &midjourneyRequest)
 			if mjErr != nil {
-				return nil, false, fmt.Errorf(mjErr.Description)
+				return nil, false, fmt.Errorf("%s", mjErr.Description)
 			}
 			if midjourneyModel == "" {
 				if !success {
@@ -181,6 +186,10 @@ func getModelRequest(c *gin.Context) (*ModelRequest, bool, error) {
 		}
 		c.Set("platform", string(constant.TaskPlatformSuno))
 		c.Set("relay_mode", relayMode)
+	} else if strings.Contains(c.Request.URL.Path, "/v1/videos/") && strings.HasSuffix(c.Request.URL.Path, "/remix") {
+		relayMode := relayconstant.RelayModeVideoSubmit
+		c.Set("relay_mode", relayMode)
+		shouldSelectChannel = false
 	} else if strings.Contains(c.Request.URL.Path, "/v1/videos") {
 		//curl https://api.openai.com/v1/videos \
 		//  -H "Authorization: Bearer $OPENAI_API_KEY" \
@@ -5,32 +5,69 @@ import (
 	"io"
 	"net/http"

+	"github.com/QuantumNous/new-api/constant"
 	"github.com/andybalholm/brotli"
 	"github.com/gin-gonic/gin"
 )

+type readCloser struct {
+	io.Reader
+	closeFn func() error
+}
+
+func (rc *readCloser) Close() error {
+	if rc.closeFn != nil {
+		return rc.closeFn()
+	}
+	return nil
+}
+
 func DecompressRequestMiddleware() gin.HandlerFunc {
 	return func(c *gin.Context) {
 		if c.Request.Body == nil || c.Request.Method == http.MethodGet {
 			c.Next()
 			return
 		}
+		maxMB := constant.MaxRequestBodyMB
+		if maxMB <= 0 {
+			maxMB = 32
+		}
+		maxBytes := int64(maxMB) << 20
+
+		origBody := c.Request.Body
+		wrapMaxBytes := func(body io.ReadCloser) io.ReadCloser {
+			return http.MaxBytesReader(c.Writer, body, maxBytes)
+		}
+
 		switch c.GetHeader("Content-Encoding") {
 		case "gzip":
-			gzipReader, err := gzip.NewReader(c.Request.Body)
+			gzipReader, err := gzip.NewReader(origBody)
 			if err != nil {
+				_ = origBody.Close()
 				c.AbortWithStatus(http.StatusBadRequest)
 				return
 			}
-			defer gzipReader.Close()
-
-			// Replace the request body with the decompressed data
-			c.Request.Body = io.NopCloser(gzipReader)
+			// Replace the request body with the decompressed data, and enforce a max size (post-decompression).
+			c.Request.Body = wrapMaxBytes(&readCloser{
+				Reader: gzipReader,
+				closeFn: func() error {
+					_ = gzipReader.Close()
+					return origBody.Close()
+				},
+			})
 			c.Request.Header.Del("Content-Encoding")
 		case "br":
-			reader := brotli.NewReader(c.Request.Body)
-			c.Request.Body = io.NopCloser(reader)
+			reader := brotli.NewReader(origBody)
+			c.Request.Body = wrapMaxBytes(&readCloser{
+				Reader: reader,
+				closeFn: func() error {
+					return origBody.Close()
+				},
+			})
 			c.Request.Header.Del("Content-Encoding")
+		default:
+			// Even for uncompressed bodies, enforce a max size to avoid huge request allocations.
+			c.Request.Body = wrapMaxBytes(origBody)
 		}

 		// Continue processing the request
@@ -254,6 +254,9 @@ func (channel *Channel) Save() error {
 }

 func (channel *Channel) SaveWithoutKey() error {
+	if channel.Id == 0 {
+		return errors.New("channel ID is 0")
+	}
 	return DB.Omit("key").Save(channel).Error
 }

@@ -6,7 +6,6 @@ import (
 	"strings"

 	"github.com/QuantumNous/new-api/common"
-
 	"github.com/bytedance/gopkg/util/gopool"
 	"gorm.io/gorm"
 )
@@ -27,6 +26,7 @@ type Token struct {
 	AllowIps           *string        `json:"allow_ips" gorm:"default:''"`
 	UsedQuota          int            `json:"used_quota" gorm:"default:0"` // used quota
 	Group              string         `json:"group" gorm:"default:''"`
+	CrossGroupRetry    bool           `json:"cross_group_retry" gorm:"default:false"` // 跨分组重试，仅auto分组有效
 	DeletedAt          gorm.DeletedAt `gorm:"index"`
 }

@@ -34,26 +34,26 @@ func (token *Token) Clean() {
 	token.Key = ""
 }

-func (token *Token) GetIpLimitsMap() map[string]any {
+func (token *Token) GetIpLimits() []string {
 	// delete empty spaces
 	//split with \n
-	ipLimitsMap := make(map[string]any)
+	ipLimits := make([]string, 0)
 	if token.AllowIps == nil {
-		return ipLimitsMap
+		return ipLimits
 	}
 	cleanIps := strings.ReplaceAll(*token.AllowIps, " ", "")
 	if cleanIps == "" {
-		return ipLimitsMap
+		return ipLimits
 	}
 	ips := strings.Split(cleanIps, "\n")
 	for _, ip := range ips {
 		ip = strings.TrimSpace(ip)
 		ip = strings.ReplaceAll(ip, ",", "")
-		if common.IsIP(ip) {
-			ipLimitsMap[ip] = true
+		if ip != "" {
+			ipLimits = append(ipLimits, ip)
 		}
 	}
-	return ipLimitsMap
+	return ipLimits
 }

 func GetAllUserTokens(userId int, startIdx int, num int) ([]*Token, error) {
@@ -185,7 +185,7 @@ func (token *Token) Update() (err error) {
 		}
 	}()
 	err = DB.Model(token).Select("name", "status", "expired_time", "remain_quota", "unlimited_quota",
-		"model_limits_enabled", "model_limits", "allow_ips", "group").Updates(token).Error
+		"model_limits_enabled", "model_limits", "allow_ips", "group", "cross_group_retry").Updates(token).Error
 	return err
 }

@@ -27,6 +27,7 @@ type User struct {
 	Status           int            `json:"status" gorm:"type:int;default:1"` // enabled, disabled
 	Email            string         `json:"email" gorm:"index" validate:"max=50"`
 	GitHubId         string         `json:"github_id" gorm:"column:github_id;index"`
+	DiscordId        string         `json:"discord_id" gorm:"column:discord_id;index"`
 	OidcId           string         `json:"oidc_id" gorm:"column:oidc_id;index"`
 	WeChatId         string         `json:"wechat_id" gorm:"column:wechat_id;index"`
 	TelegramId       string         `json:"telegram_id" gorm:"column:telegram_id;index"`
@@ -539,6 +540,14 @@ func (user *User) FillUserByGitHubId() error {
 	return nil
 }

+func (user *User) FillUserByDiscordId() error {
+	if user.DiscordId == "" {
+		return errors.New("discord id 为空！")
+	}
+	DB.Where(User{DiscordId: user.DiscordId}).First(user)
+	return nil
+}
+
 func (user *User) FillUserByOidcId() error {
 	if user.OidcId == "" {
 		return errors.New("oidc id 为空！")
@@ -578,6 +587,10 @@ func IsGitHubIdAlreadyTaken(githubId string) bool {
 	return DB.Unscoped().Where("github_id = ?", githubId).Find(&User{}).RowsAffected == 1
 }

+func IsDiscordIdAlreadyTaken(discordId string) bool {
+	return DB.Unscoped().Where("discord_id = ?", discordId).Find(&User{}).RowsAffected == 1
+}
+
 func IsOidcIdAlreadyTaken(oidcId string) bool {
 	return DB.Where("oidc_id = ?", oidcId).Find(&User{}).RowsAffected == 1
 }
@@ -67,8 +67,11 @@ func AudioHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *type
 		service.ResetStatusCode(newAPIError, statusCodeMappingStr)
 		return newAPIError
 	}
-
-	postConsumeQuota(c, info, usage.(*dto.Usage), "")
+	if usage.(*dto.Usage).CompletionTokenDetails.AudioTokens > 0 || usage.(*dto.Usage).PromptTokensDetails.AudioTokens > 0 {
+		service.PostAudioConsumeQuota(c, info, usage.(*dto.Usage), "")
+	} else {
+		postConsumeQuota(c, info, usage.(*dto.Usage), "")
+	}

 	return nil
 }
@@ -47,7 +47,7 @@ type TaskAdaptor interface {
 	GetChannelName() string

 	// FetchTask
-	FetchTask(baseUrl, key string, body map[string]any) (*http.Response, error)
+	FetchTask(baseUrl, key string, body map[string]any, proxy string) (*http.Response, error)

 	ParseTaskResult(respBody []byte) (*relaycommon.TaskInfo, error)
 }
@@ -18,6 +18,7 @@ var awsModelIDMap = map[string]string{
 	"claude-opus-4-1-20250805":   "anthropic.claude-opus-4-1-20250805-v1:0",
 	"claude-sonnet-4-5-20250929": "anthropic.claude-sonnet-4-5-20250929-v1:0",
 	"claude-haiku-4-5-20251001":  "anthropic.claude-haiku-4-5-20251001-v1:0",
+	"claude-opus-4-5-20251101":   "anthropic.claude-opus-4-5-20251101-v1:0",
 	// Nova models
 	"nova-micro-v1:0":   "amazon.nova-micro-v1:0",
 	"nova-lite-v1:0":    "amazon.nova-lite-v1:0",
@@ -76,6 +77,11 @@ var awsModelCanCrossRegionMap = map[string]map[string]bool{
 		"ap": true,
 		"eu": true,
 	},
+	"anthropic.claude-opus-4-5-20251101-v1:0": {
+		"us": true,
+		"ap": true,
+		"eu": true,
+	},
 	"anthropic.claude-haiku-4-5-20251001-v1:0": {
 		"us": true,
 		"ap": true,
@@ -18,6 +18,7 @@ import (
 	"github.com/gin-gonic/gin"
 	"github.com/pkg/errors"

+	"github.com/QuantumNous/new-api/setting/model_setting"
 	"github.com/aws/aws-sdk-go-v2/aws"
 	"github.com/aws/aws-sdk-go-v2/credentials"
 	"github.com/aws/aws-sdk-go-v2/service/bedrockruntime"
@@ -25,6 +26,17 @@ import (
 	"github.com/aws/smithy-go/auth/bearer"
 )

+// getAwsErrorStatusCode extracts HTTP status code from AWS SDK error
+func getAwsErrorStatusCode(err error) int {
+	// Check for HTTP response error which contains status code
+	var httpErr interface{ HTTPStatusCode() int }
+	if errors.As(err, &httpErr) {
+		return httpErr.HTTPStatusCode()
+	}
+	// Default to 500 if we can't determine the status code
+	return http.StatusInternalServerError
+}
+
 func newAwsClient(c *gin.Context, info *relaycommon.RelayInfo) (*bedrockruntime.Client, error) {
 	var (
 		httpClient *http.Client
@@ -118,7 +130,7 @@ func doAwsClientRequest(c *gin.Context, info *relaycommon.RelayInfo, a *Adaptor,
 				Accept:      aws.String("application/json"),
 				ContentType: aws.String("application/json"),
 			}
-			awsReq.Body, err = common.Marshal(awsClaudeReq)
+			awsReq.Body, err = buildAwsRequestBody(c, info, awsClaudeReq)
 			if err != nil {
 				return nil, types.NewError(errors.Wrap(err, "marshal aws request fail"), types.ErrorCodeBadRequestBody)
 			}
@@ -130,7 +142,7 @@ func doAwsClientRequest(c *gin.Context, info *relaycommon.RelayInfo, a *Adaptor,
 				Accept:      aws.String("application/json"),
 				ContentType: aws.String("application/json"),
 			}
-			awsReq.Body, err = common.Marshal(awsClaudeReq)
+			awsReq.Body, err = buildAwsRequestBody(c, info, awsClaudeReq)
 			if err != nil {
 				return nil, types.NewError(errors.Wrap(err, "marshal aws request fail"), types.ErrorCodeBadRequestBody)
 			}
@@ -140,6 +152,24 @@ func doAwsClientRequest(c *gin.Context, info *relaycommon.RelayInfo, a *Adaptor,
 	}
 }

+// buildAwsRequestBody prepares the payload for AWS requests, applying passthrough rules when enabled.
+func buildAwsRequestBody(c *gin.Context, info *relaycommon.RelayInfo, awsClaudeReq any) ([]byte, error) {
+	if model_setting.GetGlobalSettings().PassThroughRequestEnabled || info.ChannelSetting.PassThroughBodyEnabled {
+		body, err := common.GetRequestBody(c)
+		if err != nil {
+			return nil, errors.Wrap(err, "get request body for pass-through fail")
+		}
+		var data map[string]interface{}
+		if err := common.Unmarshal(body, &data); err != nil {
+			return nil, errors.Wrap(err, "pass-through unmarshal request body fail")
+		}
+		delete(data, "model")
+		delete(data, "stream")
+		return common.Marshal(data)
+	}
+	return common.Marshal(awsClaudeReq)
+}
+
 func getAwsRegionPrefix(awsRegionId string) string {
 	parts := strings.Split(awsRegionId, "-")
 	regionPrefix := ""
@@ -173,7 +203,8 @@ func awsHandler(c *gin.Context, info *relaycommon.RelayInfo, a *Adaptor) (*types

 	awsResp, err := a.AwsClient.InvokeModel(c.Request.Context(), a.AwsReq.(*bedrockruntime.InvokeModelInput))
 	if err != nil {
-		return types.NewOpenAIError(errors.Wrap(err, "InvokeModel"), types.ErrorCodeAwsInvokeError, http.StatusInternalServerError), nil
+		statusCode := getAwsErrorStatusCode(err)
+		return types.NewOpenAIError(errors.Wrap(err, "InvokeModel"), types.ErrorCodeAwsInvokeError, statusCode), nil
 	}

 	claudeInfo := &claude.ClaudeResponseInfo{
@@ -199,7 +230,8 @@ func awsHandler(c *gin.Context, info *relaycommon.RelayInfo, a *Adaptor) (*types
 func awsStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, a *Adaptor) (*types.NewAPIError, *dto.Usage) {
 	awsResp, err := a.AwsClient.InvokeModelWithResponseStream(c.Request.Context(), a.AwsReq.(*bedrockruntime.InvokeModelWithResponseStreamInput))
 	if err != nil {
-		return types.NewOpenAIError(errors.Wrap(err, "InvokeModelWithResponseStream"), types.ErrorCodeAwsInvokeError, http.StatusInternalServerError), nil
+		statusCode := getAwsErrorStatusCode(err)
+		return types.NewOpenAIError(errors.Wrap(err, "InvokeModelWithResponseStream"), types.ErrorCodeAwsInvokeError, statusCode), nil
 	}
 	stream := awsResp.GetStream()
 	defer stream.Close()
@@ -238,7 +270,8 @@ func handleNovaRequest(c *gin.Context, info *relaycommon.RelayInfo, a *Adaptor)

 	awsResp, err := a.AwsClient.InvokeModel(c.Request.Context(), a.AwsReq.(*bedrockruntime.InvokeModelInput))
 	if err != nil {
-		return types.NewError(errors.Wrap(err, "InvokeModel"), types.ErrorCodeChannelAwsClientError), nil
+		statusCode := getAwsErrorStatusCode(err)
+		return types.NewOpenAIError(errors.Wrap(err, "InvokeModel"), types.ErrorCodeAwsInvokeError, statusCode), nil
 	}

 	// 解析Nova响应
@@ -150,7 +150,7 @@ func baiduHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Respon
 		return types.NewError(err, types.ErrorCodeBadResponseBody), nil
 	}
 	if baiduResponse.ErrorMsg != "" {
-		return types.NewError(fmt.Errorf(baiduResponse.ErrorMsg), types.ErrorCodeBadResponseBody), nil
+		return types.NewError(fmt.Errorf("%s", baiduResponse.ErrorMsg), types.ErrorCodeBadResponseBody), nil
 	}
 	fullTextResponse := responseBaidu2OpenAI(&baiduResponse)
 	jsonResponse, err := json.Marshal(fullTextResponse)
@@ -175,7 +175,7 @@ func baiduEmbeddingHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *ht
 		return types.NewError(err, types.ErrorCodeBadResponseBody), nil
 	}
 	if baiduResponse.ErrorMsg != "" {
-		return types.NewError(fmt.Errorf(baiduResponse.ErrorMsg), types.ErrorCodeBadResponseBody), nil
+		return types.NewError(fmt.Errorf("%s", baiduResponse.ErrorMsg), types.ErrorCodeBadResponseBody), nil
 	}
 	fullTextResponse := embeddingResponseBaidu2OpenAI(&baiduResponse)
 	jsonResponse, err := json.Marshal(fullTextResponse)
@@ -9,6 +9,7 @@ var ModelList = []string{
 	"claude-3-opus-20240229",
 	"claude-3-haiku-20240307",
 	"claude-3-5-haiku-20241022",
+	"claude-haiku-4-5-20251001",
 	"claude-3-5-sonnet-20240620",
 	"claude-3-5-sonnet-20241022",
 	"claude-3-7-sonnet-20250219",
@@ -21,6 +22,8 @@ var ModelList = []string{
 	"claude-opus-4-1-20250805-thinking",
 	"claude-sonnet-4-5-20250929",
 	"claude-sonnet-4-5-20250929-thinking",
+	"claude-opus-4-5-20251101",
+	"claude-opus-4-5-20251101-thinking",
 }

 var ChannelName = "claude"
@@ -673,7 +673,7 @@ func HandleStreamResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
 func HandleStreamFinalResponse(c *gin.Context, info *relaycommon.RelayInfo, claudeInfo *ClaudeResponseInfo, requestMode int) {

 	if requestMode == RequestModeCompletion {
-		claudeInfo.Usage = service.ResponseText2Usage(claudeInfo.ResponseText.String(), info.UpstreamModelName, info.PromptTokens)
+		claudeInfo.Usage = service.ResponseText2Usage(c, claudeInfo.ResponseText.String(), info.UpstreamModelName, info.GetEstimatePromptTokens())
 	} else {
 		if claudeInfo.Usage.PromptTokens == 0 {
 			//上游出错
@@ -682,7 +682,7 @@ func HandleStreamFinalResponse(c *gin.Context, info *relaycommon.RelayInfo, clau
 			if common.DebugEnabled {
 				common.SysLog("claude response usage is not complete, maybe upstream error")
 			}
-			claudeInfo.Usage = service.ResponseText2Usage(claudeInfo.ResponseText.String(), info.UpstreamModelName, claudeInfo.Usage.PromptTokens)
+			claudeInfo.Usage = service.ResponseText2Usage(c, claudeInfo.ResponseText.String(), info.UpstreamModelName, claudeInfo.Usage.PromptTokens)
 		}
 	}

@@ -734,10 +734,7 @@ func HandleClaudeResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
 		return types.WithClaudeError(*claudeError, http.StatusInternalServerError)
 	}
 	if requestMode == RequestModeCompletion {
-		completionTokens := service.CountTextToken(claudeResponse.Completion, info.OriginModelName)
-		claudeInfo.Usage.PromptTokens = info.PromptTokens
-		claudeInfo.Usage.CompletionTokens = completionTokens
-		claudeInfo.Usage.TotalTokens = info.PromptTokens + completionTokens
+		claudeInfo.Usage = service.ResponseText2Usage(c, claudeResponse.Completion, info.UpstreamModelName, info.GetEstimatePromptTokens())
 	} else {
 		claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens
 		claudeInfo.Usage.CompletionTokens = claudeResponse.Usage.OutputTokens
@@ -74,7 +74,7 @@ func cfStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Res
 	if err := scanner.Err(); err != nil {
 		logger.LogError(c, "error_scanning_stream_response: "+err.Error())
 	}
-	usage := service.ResponseText2Usage(responseText, info.UpstreamModelName, info.PromptTokens)
+	usage := service.ResponseText2Usage(c, responseText, info.UpstreamModelName, info.GetEstimatePromptTokens())
 	if info.ShouldIncludeUsage {
 		response := helper.GenerateFinalUsageResponse(id, info.StartTime.Unix(), info.UpstreamModelName, *usage)
 		err := helper.ObjectData(c, response)
@@ -105,7 +105,7 @@ func cfHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response)
 	for _, choice := range response.Choices {
 		responseText += choice.Message.StringContent()
 	}
-	usage := service.ResponseText2Usage(responseText, info.UpstreamModelName, info.PromptTokens)
+	usage := service.ResponseText2Usage(c, responseText, info.UpstreamModelName, info.GetEstimatePromptTokens())
 	response.Usage = *usage
 	response.Id = helper.GetResponseID(c)
 	jsonResponse, err := json.Marshal(response)
@@ -142,10 +142,6 @@ func cfSTTHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Respon
 	c.Writer.WriteHeader(resp.StatusCode)
 	_, _ = c.Writer.Write(jsonResponse)

-	usage := &dto.Usage{}
-	usage.PromptTokens = info.PromptTokens
-	usage.CompletionTokens = service.CountTextToken(cfResp.Result.Text, info.UpstreamModelName)
-	usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
-
+	usage := service.ResponseText2Usage(c, cfResp.Result.Text, info.UpstreamModelName, info.GetEstimatePromptTokens())
 	return nil, usage
 }
@@ -165,7 +165,7 @@ func cohereStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http
 		}
 	})
 	if usage.PromptTokens == 0 {
-		usage = service.ResponseText2Usage(responseText, info.UpstreamModelName, info.PromptTokens)
+		usage = service.ResponseText2Usage(c, responseText, info.UpstreamModelName, info.GetEstimatePromptTokens())
 	}
 	return usage, nil
 }
@@ -225,9 +225,9 @@ func cohereRerankHandler(c *gin.Context, resp *http.Response, info *relaycommon.
 	}
 	usage := dto.Usage{}
 	if cohereResp.Meta.BilledUnits.InputTokens == 0 {
-		usage.PromptTokens = info.PromptTokens
+		usage.PromptTokens = info.GetEstimatePromptTokens()
 		usage.CompletionTokens = 0
-		usage.TotalTokens = info.PromptTokens
+		usage.TotalTokens = info.GetEstimatePromptTokens()
 	} else {
 		usage.PromptTokens = cohereResp.Meta.BilledUnits.InputTokens
 		usage.CompletionTokens = cohereResp.Meta.BilledUnits.OutputTokens
@@ -142,7 +142,7 @@ func cozeChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *ht
 	helper.Done(c)

 	if usage.TotalTokens == 0 {
-		usage = service.ResponseText2Usage(responseText, info.UpstreamModelName, c.GetInt("coze_input_count"))
+		usage = service.ResponseText2Usage(c, responseText, info.UpstreamModelName, c.GetInt("coze_input_count"))
 	}

 	return usage, nil
@@ -208,7 +208,7 @@ func handleCozeEvent(c *gin.Context, event string, data string, responseText *st
 			return
 		}

-		common.SysLog(fmt.Sprintf("stream event error: ", errorData.Code, errorData.Message))
+		common.SysLog(fmt.Sprintf("stream event error: %v %v", errorData.Code, errorData.Message))
 	}
 }

@@ -246,7 +246,7 @@ func difyStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R
 	})
 	helper.Done(c)
 	if usage.TotalTokens == 0 {
-		usage = service.ResponseText2Usage(responseText, info.UpstreamModelName, info.PromptTokens)
+		usage = service.ResponseText2Usage(c, responseText, info.UpstreamModelName, info.GetEstimatePromptTokens())
 	}
 	usage.CompletionTokens += nodeToken
 	return usage, nil
@@ -137,6 +137,8 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 			info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
 		} else if strings.HasSuffix(info.UpstreamModelName, "-nothinking") {
 			info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-nothinking")
+		} else if baseModel, level := parseThinkingLevelSuffix(info.UpstreamModelName); level != "" {
+			info.UpstreamModelName = baseModel
 		}
 	}

@@ -32,7 +32,7 @@ var SafetySettingList = []string{
 	"HARM_CATEGORY_HATE_SPEECH",
 	"HARM_CATEGORY_SEXUALLY_EXPLICIT",
 	"HARM_CATEGORY_DANGEROUS_CONTENT",
-	"HARM_CATEGORY_CIVIC_INTEGRITY",
+	//"HARM_CATEGORY_CIVIC_INTEGRITY", This item is deprecated!
 }

 var ChannelName = "google gemini"
@@ -3,7 +3,6 @@ package gemini
 import (
 	"io"
 	"net/http"
-	"strings"

 	"github.com/QuantumNous/new-api/common"
 	"github.com/QuantumNous/new-api/dto"
@@ -13,8 +12,6 @@ import (
 	"github.com/QuantumNous/new-api/service"
 	"github.com/QuantumNous/new-api/types"

-	"github.com/pkg/errors"
-
 	"github.com/gin-gonic/gin"
 )

@@ -72,10 +69,7 @@ func NativeGeminiEmbeddingHandler(c *gin.Context, resp *http.Response, info *rel
 		println(string(responseBody))
 	}

-	usage := &dto.Usage{
-		PromptTokens: info.PromptTokens,
-		TotalTokens:  info.PromptTokens,
-	}
+	usage := service.ResponseText2Usage(c, "", info.UpstreamModelName, info.GetEstimatePromptTokens())

 	if info.IsGeminiBatchEmbedding {
 		var geminiResponse dto.GeminiBatchEmbeddingResponse
@@ -97,80 +91,15 @@ func NativeGeminiEmbeddingHandler(c *gin.Context, resp *http.Response, info *rel
 }

 func GeminiTextGenerationStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
-	var usage = &dto.Usage{}
-	var imageCount int
-
 	helper.SetEventStreamHeaders(c)

-	responseText := strings.Builder{}
-
-	helper.StreamScannerHandler(c, resp, info, func(data string) bool {
-		var geminiResponse dto.GeminiChatResponse
-		err := common.UnmarshalJsonStr(data, &geminiResponse)
+	return geminiStreamHandler(c, info, resp, func(data string, geminiResponse *dto.GeminiChatResponse) bool {
+		err := helper.StringData(c, data)
 		if err != nil {
-			logger.LogError(c, "error unmarshalling stream response: "+err.Error())
+			logger.LogError(c, "failed to write stream data: "+err.Error())
 			return false
 		}
-
-		// 统计图片数量
-		for _, candidate := range geminiResponse.Candidates {
-			for _, part := range candidate.Content.Parts {
-				if part.InlineData != nil && part.InlineData.MimeType != "" {
-					imageCount++
-				}
-				if part.Text != "" {
-					responseText.WriteString(part.Text)
-				}
-			}
-		}
-
-		// 更新使用量统计
-		if geminiResponse.UsageMetadata.TotalTokenCount != 0 {
-			usage.PromptTokens = geminiResponse.UsageMetadata.PromptTokenCount
-			usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount + geminiResponse.UsageMetadata.ThoughtsTokenCount
-			usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
-			usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
-			for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
-				if detail.Modality == "AUDIO" {
-					usage.PromptTokensDetails.AudioTokens = detail.TokenCount
-				} else if detail.Modality == "TEXT" {
-					usage.PromptTokensDetails.TextTokens = detail.TokenCount
-				}
-			}
-		}
-
-		// 直接发送 GeminiChatResponse 响应
-		err = helper.StringData(c, data)
-		if err != nil {
-			logger.LogError(c, err.Error())
-		}
 		info.SendResponseCount++
 		return true
 	})
-
-	if info.SendResponseCount == 0 {
-		return nil, types.NewOpenAIError(errors.New("no response received from Gemini API"), types.ErrorCodeEmptyResponse, http.StatusInternalServerError)
-	}
-
-	if imageCount != 0 {
-		if usage.CompletionTokens == 0 {
-			usage.CompletionTokens = imageCount * 258
-		}
-	}
-
-	// 如果usage.CompletionTokens为0，则使用本地统计的completion tokens
-	if usage.CompletionTokens == 0 {
-		str := responseText.String()
-		if len(str) > 0 {
-			usage = service.ResponseText2Usage(responseText.String(), info.UpstreamModelName, info.PromptTokens)
-		} else {
-			// 空补全，不需要使用量
-			usage = &dto.Usage{}
-		}
-	}
-
-	// 移除流式响应结尾的[Done]，因为Gemini API没有发送Done的行为
-	//helper.Done(c)
-
-	return usage, nil
 }
@@ -19,8 +19,8 @@ import (
 	"github.com/QuantumNous/new-api/relay/helper"
 	"github.com/QuantumNous/new-api/service"
 	"github.com/QuantumNous/new-api/setting/model_setting"
+	"github.com/QuantumNous/new-api/setting/reasoning"
 	"github.com/QuantumNous/new-api/types"
-
 	"github.com/gin-gonic/gin"
 )

@@ -122,6 +122,14 @@ func clampThinkingBudgetByEffort(modelName string, effort string) int {
 	return clampThinkingBudget(modelName, maxBudget)
 }

+func parseThinkingLevelSuffix(modelName string) (string, string) {
+	base, level, ok := reasoning.TrimEffortSuffix(modelName)
+	if !ok {
+		return modelName, ""
+	}
+	return base, level
+}
+
 func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.RelayInfo, oaiRequest ...dto.GeneralOpenAIRequest) {
 	if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
 		modelName := info.UpstreamModelName
@@ -178,6 +186,12 @@ func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.Rel
 					ThinkingBudget: common.GetPointer(0),
 				}
 			}
+		} else if _, level := parseThinkingLevelSuffix(modelName); level != "" {
+			geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{
+				IncludeThoughts: true,
+				ThinkingLevel:   level,
+			}
+			info.ReasoningEffort = level
 		}
 	}
 }
@@ -208,6 +222,7 @@ func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i

 	adaptorWithExtraBody := false

+	// patch extra_body
 	if len(textRequest.ExtraBody) > 0 {
 		if !strings.HasSuffix(info.UpstreamModelName, "-nothinking") {
 			var extraBody map[string]interface{}
@@ -239,6 +254,39 @@ func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i
 						}
 					}
 				}
+
+				// check error param name like imageConfig, should be image_config
+				if _, hasErrorParam := googleBody["imageConfig"]; hasErrorParam {
+					return nil, errors.New("extra_body.google.imageConfig is not supported, use extra_body.google.image_config instead")
+				}
+
+				if imageConfig, ok := googleBody["image_config"].(map[string]interface{}); ok {
+					// check error param name like aspectRatio, should be aspect_ratio
+					if _, hasErrorParam := imageConfig["aspectRatio"]; hasErrorParam {
+						return nil, errors.New("extra_body.google.image_config.aspectRatio is not supported, use extra_body.google.image_config.aspect_ratio instead")
+					}
+					// check error param name like imageSize, should be image_size
+					if _, hasErrorParam := imageConfig["imageSize"]; hasErrorParam {
+						return nil, errors.New("extra_body.google.image_config.imageSize is not supported, use extra_body.google.image_config.image_size instead")
+					}
+
+					// convert snake_case to camelCase for Gemini API
+					geminiImageConfig := make(map[string]interface{})
+					if aspectRatio, ok := imageConfig["aspect_ratio"]; ok {
+						geminiImageConfig["aspectRatio"] = aspectRatio
+					}
+					if imageSize, ok := imageConfig["image_size"]; ok {
+						geminiImageConfig["imageSize"] = imageSize
+					}
+
+					if len(geminiImageConfig) > 0 {
+						imageConfigBytes, err := common.Marshal(geminiImageConfig)
+						if err != nil {
+							return nil, fmt.Errorf("failed to marshal image_config: %w", err)
+						}
+						geminiRequest.GenerationConfig.ImageConfig = imageConfigBytes
+					}
+				}
 			}
 		}
 	}
@@ -412,9 +460,68 @@ func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i
 				if part.Text == "" {
 					continue
 				}
-				parts = append(parts, dto.GeminiPart{
-					Text: part.Text,
-				})
+				// check markdown image ![image](data:image/jpeg;base64,xxxxxxxxxxxx)
+				// 使用字符串查找而非正则，避免大文本性能问题
+				text := part.Text
+				hasMarkdownImage := false
+				for {
+					// 快速检查是否包含 markdown 图片标记
+					startIdx := strings.Index(text, "![")
+					if startIdx == -1 {
+						break
+					}
+					// 找到 ](
+					bracketIdx := strings.Index(text[startIdx:], "](data:")
+					if bracketIdx == -1 {
+						break
+					}
+					bracketIdx += startIdx
+					// 找到闭合的 )
+					closeIdx := strings.Index(text[bracketIdx+2:], ")")
+					if closeIdx == -1 {
+						break
+					}
+					closeIdx += bracketIdx + 2
+
+					hasMarkdownImage = true
+					// 添加图片前的文本
+					if startIdx > 0 {
+						textBefore := text[:startIdx]
+						if textBefore != "" {
+							parts = append(parts, dto.GeminiPart{
+								Text: textBefore,
+							})
+						}
+					}
+					// 提取 data URL (从 "](" 后面开始，到 ")" 之前)
+					dataUrl := text[bracketIdx+2 : closeIdx]
+					imageNum += 1
+					if constant.GeminiVisionMaxImageNum != -1 && imageNum > constant.GeminiVisionMaxImageNum {
+						return nil, fmt.Errorf("too many images in the message, max allowed is %d", constant.GeminiVisionMaxImageNum)
+					}
+					format, base64String, err := service.DecodeBase64FileData(dataUrl)
+					if err != nil {
+						return nil, fmt.Errorf("decode markdown base64 image data failed: %s", err.Error())
+					}
+					imgPart := dto.GeminiPart{
+						InlineData: &dto.GeminiInlineData{
+							MimeType: format,
+							Data:     base64String,
+						},
+					}
+					if shouldAttachThoughtSignature {
+						imgPart.ThoughtSignature = json.RawMessage(strconv.Quote(thoughtSignatureBypassValue))
+					}
+					parts = append(parts, imgPart)
+					// 继续处理剩余文本
+					text = text[closeIdx+1:]
+				}
+				// 添加剩余文本或原始文本（如果没有找到 markdown 图片）
+				if !hasMarkdownImage {
+					parts = append(parts, dto.GeminiPart{
+						Text: part.Text,
+					})
+				}
 			} else if part.Type == dto.ContentTypeImageURL {
 				imageNum += 1

@@ -484,6 +591,17 @@ func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i
 			}
 		}

+		// 如果需要附加签名但还没有附加（没有 tool_calls 或 tool_calls 为空），
+		// 则在第一个文本 part 上附加 thoughtSignature
+		if shouldAttachThoughtSignature && !signatureAttached && len(parts) > 0 {
+			for i := range parts {
+				if parts[i].Text != "" {
+					parts[i].ThoughtSignature = json.RawMessage(strconv.Quote(thoughtSignatureBypassValue))
+					break
+				}
+			}
+		}
+
 		content.Parts = parts

 		// there's no assistant role in gemini and API shall vomit if Role is not user or model
@@ -954,14 +1072,10 @@ func handleFinalStream(c *gin.Context, info *relaycommon.RelayInfo, resp *dto.Ch
 	return nil
 }

-func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
-	// responseText := ""
-	id := helper.GetResponseID(c)
-	createAt := common.GetTimestamp()
-	responseText := strings.Builder{}
+func geminiStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response, callback func(data string, geminiResponse *dto.GeminiChatResponse) bool) (*dto.Usage, *types.NewAPIError) {
 	var usage = &dto.Usage{}
 	var imageCount int
-	finishReason := constant.FinishReasonStop
+	responseText := strings.Builder{}

 	helper.StreamScannerHandler(c, resp, info, func(data string) bool {
 		var geminiResponse dto.GeminiChatResponse
@@ -971,6 +1085,7 @@ func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *
 			return false
 		}

+		// 统计图片数量
 		for _, candidate := range geminiResponse.Candidates {
 			for _, part := range candidate.Content.Parts {
 				if part.InlineData != nil && part.InlineData.MimeType != "" {
@@ -982,14 +1097,10 @@ func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *
 			}
 		}

-		response, isStop := streamResponseGeminiChat2OpenAI(&geminiResponse)
-
-		response.Id = id
-		response.Created = createAt
-		response.Model = info.UpstreamModelName
+		// 更新使用量统计
 		if geminiResponse.UsageMetadata.TotalTokenCount != 0 {
 			usage.PromptTokens = geminiResponse.UsageMetadata.PromptTokenCount
-			usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount
+			usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount + geminiResponse.UsageMetadata.ThoughtsTokenCount
 			usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
 			usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
 			for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
@@ -1000,6 +1111,45 @@ func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *
 				}
 			}
 		}
+
+		return callback(data, &geminiResponse)
+	})
+
+	if imageCount != 0 {
+		if usage.CompletionTokens == 0 {
+			usage.CompletionTokens = imageCount * 1400
+		}
+	}
+
+	usage.PromptTokensDetails.TextTokens = usage.PromptTokens
+	if usage.TotalTokens > 0 {
+		usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
+	}
+
+	if usage.CompletionTokens <= 0 {
+		str := responseText.String()
+		if len(str) > 0 {
+			usage = service.ResponseText2Usage(c, responseText.String(), info.UpstreamModelName, info.GetEstimatePromptTokens())
+		} else {
+			usage = &dto.Usage{}
+		}
+	}
+
+	return usage, nil
+}
+
+func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
+	id := helper.GetResponseID(c)
+	createAt := common.GetTimestamp()
+	finishReason := constant.FinishReasonStop
+
+	usage, err := geminiStreamHandler(c, info, resp, func(data string, geminiResponse *dto.GeminiChatResponse) bool {
+		response, isStop := streamResponseGeminiChat2OpenAI(geminiResponse)
+
+		response.Id = id
+		response.Created = createAt
+		response.Model = info.UpstreamModelName
+
 		logger.LogDebug(c, fmt.Sprintf("info.SendResponseCount = %d", info.SendResponseCount))
 		if info.SendResponseCount == 0 {
 			// send first response
@@ -1015,7 +1165,7 @@ func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *
 					emptyResponse.Choices[0].Delta.ToolCalls = copiedToolCalls
 				}
 				finishReason = constant.FinishReasonToolCalls
-				err = handleStream(c, info, emptyResponse)
+				err := handleStream(c, info, emptyResponse)
 				if err != nil {
 					logger.LogError(c, err.Error())
 				}
@@ -1025,14 +1175,14 @@ func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *
 					response.Choices[0].FinishReason = nil
 				}
 			} else {
-				err = handleStream(c, info, emptyResponse)
+				err := handleStream(c, info, emptyResponse)
 				if err != nil {
 					logger.LogError(c, err.Error())
 				}
 			}
 		}

-		err = handleStream(c, info, response)
+		err := handleStream(c, info, response)
 		if err != nil {
 			logger.LogError(c, err.Error())
 		}
@@ -1042,40 +1192,15 @@ func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *
 		return true
 	})

-	if info.SendResponseCount == 0 {
-		// 空补全，报错不计费
-		// empty response, throw an error
-		return nil, types.NewOpenAIError(errors.New("no response received from Gemini API"), types.ErrorCodeEmptyResponse, http.StatusInternalServerError)
-	}
-
-	if imageCount != 0 {
-		if usage.CompletionTokens == 0 {
-			usage.CompletionTokens = imageCount * 258
-		}
-	}
-
-	usage.PromptTokensDetails.TextTokens = usage.PromptTokens
-	usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
-
-	if usage.CompletionTokens == 0 {
-		str := responseText.String()
-		if len(str) > 0 {
-			usage = service.ResponseText2Usage(responseText.String(), info.UpstreamModelName, info.PromptTokens)
-		} else {
-			// 空补全，不需要使用量
-			usage = &dto.Usage{}
-		}
+	if err != nil {
+		return usage, err
 	}

 	response := helper.GenerateFinalUsageResponse(id, createAt, info.UpstreamModelName, *usage)
-	err := handleFinalStream(c, info, response)
-	if err != nil {
-		common.SysLog("send final response failed: " + err.Error())
+	handleErr := handleFinalStream(c, info, response)
+	if handleErr != nil {
+		common.SysLog("send final response failed: " + handleErr.Error())
 	}
-	//if info.RelayFormat == relaycommon.RelayFormatOpenAI {
-	//	helper.Done(c)
-	//}
-	//resp.Body.Close()
 	return usage, nil
 }

@@ -1177,11 +1302,7 @@ func GeminiEmbeddingHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *h
 	// Google has not yet clarified how embedding models will be billed
 	// refer to openai billing method to use input tokens billing
 	// https://platform.openai.com/docs/guides/embeddings#what-are-embeddings
-	usage := &dto.Usage{
-		PromptTokens:     info.PromptTokens,
-		CompletionTokens: 0,
-		TotalTokens:      info.PromptTokens,
-	}
+	usage := service.ResponseText2Usage(c, "", info.UpstreamModelName, info.GetEstimatePromptTokens())
 	openAIResponse.Usage = *usage

 	jsonResponse, jsonErr := common.Marshal(openAIResponse)
@@ -163,7 +163,7 @@ func handleTTSResponse(c *gin.Context, resp *http.Response, info *relaycommon.Re
 	}

 	usage = &dto.Usage{
-		PromptTokens:     info.PromptTokens,
+		PromptTokens:     info.GetEstimatePromptTokens(),
 		CompletionTokens: 0,
 		TotalTokens:      int(minimaxResp.ExtraInfo.UsageCharacters),
 	}
@@ -6,6 +6,7 @@ import (
 	"io"
 	"net/http"

+	channelconstant "github.com/QuantumNous/new-api/constant"
 	"github.com/QuantumNous/new-api/dto"
 	"github.com/QuantumNous/new-api/relay/channel"
 	"github.com/QuantumNous/new-api/relay/channel/claude"
@@ -44,6 +45,16 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
 }

 func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
+	baseURL := info.ChannelBaseUrl
+	if specialPlan, ok := channelconstant.ChannelSpecialBases[baseURL]; ok {
+		if info.RelayFormat == types.RelayFormatClaude {
+			return fmt.Sprintf("%s/v1/messages", specialPlan.ClaudeBaseURL), nil
+		}
+		if info.RelayFormat == types.RelayFormatOpenAI {
+			return fmt.Sprintf("%s/chat/completions", specialPlan.OpenAIBaseURL), nil
+		}
+	}
+
 	switch info.RelayFormat {
 	case types.RelayFormatClaude:
 		return fmt.Sprintf("%s/anthropic/v1/messages", info.ChannelBaseUrl), nil
@@ -42,7 +42,7 @@ type Adaptor struct {
 // support OAI models: o1-mini/o3-mini/o4-mini/o1/o3 etc...
 // minimal effort only available in gpt-5
 func parseReasoningEffortFromModelSuffix(model string) (string, string) {
-	effortSuffixes := []string{"-high", "-minimal", "-low", "-medium", "-none"}
+	effortSuffixes := []string{"-high", "-minimal", "-low", "-medium", "-none", "-xhigh"}
 	for _, suffix := range effortSuffixes {
 		if strings.HasSuffix(model, suffix) {
 			effort := strings.TrimPrefix(suffix, "-")
@@ -306,10 +306,11 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 			request.Temperature = nil
 		}

+		// gpt-5系列模型适配 归零不再支持的参数
 		if strings.HasPrefix(info.UpstreamModelName, "gpt-5") {
-			if info.UpstreamModelName != "gpt-5-chat-latest" {
-				request.Temperature = nil
-			}
+			request.Temperature = nil
+			request.TopP = 0 // oai 的 top_p 默认值是 1.0，但是为了 omitempty 属性直接不传，这里显式设置为 0
+			request.LogProbs = false
 		}

 		// 转换模型推理力度后缀
@@ -0,0 +1,145 @@
+package openai
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"math"
+	"net/http"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/constant"
+	"github.com/QuantumNous/new-api/dto"
+	"github.com/QuantumNous/new-api/logger"
+	relaycommon "github.com/QuantumNous/new-api/relay/common"
+	"github.com/QuantumNous/new-api/relay/helper"
+	"github.com/QuantumNous/new-api/service"
+	"github.com/QuantumNous/new-api/types"
+	"github.com/gin-gonic/gin"
+)
+
+func OpenaiTTSHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) *dto.Usage {
+	// the status code has been judged before, if there is a body reading failure,
+	// it should be regarded as a non-recoverable error, so it should not return err for external retry.
+	// Analogous to nginx's load balancing, it will only retry if it can't be requested or
+	// if the upstream returns a specific status code, once the upstream has already written the header,
+	// the subsequent failure of the response body should be regarded as a non-recoverable error,
+	// and can be terminated directly.
+	defer service.CloseResponseBodyGracefully(resp)
+	usage := &dto.Usage{}
+	usage.PromptTokens = info.GetEstimatePromptTokens()
+	usage.TotalTokens = info.GetEstimatePromptTokens()
+	for k, v := range resp.Header {
+		c.Writer.Header().Set(k, v[0])
+	}
+	c.Writer.WriteHeader(resp.StatusCode)
+
+	if info.IsStream {
+		helper.StreamScannerHandler(c, resp, info, func(data string) bool {
+			if service.SundaySearch(data, "usage") {
+				var simpleResponse dto.SimpleResponse
+				err := common.Unmarshal([]byte(data), &simpleResponse)
+				if err != nil {
+					logger.LogError(c, err.Error())
+				}
+				if simpleResponse.Usage.TotalTokens != 0 {
+					usage.PromptTokens = simpleResponse.Usage.InputTokens
+					usage.CompletionTokens = simpleResponse.OutputTokens
+					usage.TotalTokens = simpleResponse.TotalTokens
+				}
+			}
+			_ = helper.StringData(c, data)
+			return true
+		})
+	} else {
+		common.SetContextKey(c, constant.ContextKeyLocalCountTokens, true)
+		// 读取响应体到缓冲区
+		bodyBytes, err := io.ReadAll(resp.Body)
+		if err != nil {
+			logger.LogError(c, fmt.Sprintf("failed to read TTS response body: %v", err))
+			c.Writer.WriteHeaderNow()
+			return usage
+		}
+
+		// 写入响应到客户端
+		c.Writer.WriteHeaderNow()
+		_, err = c.Writer.Write(bodyBytes)
+		if err != nil {
+			logger.LogError(c, fmt.Sprintf("failed to write TTS response: %v", err))
+		}
+
+		// 计算音频时长并更新 usage
+		audioFormat := "mp3" // 默认格式
+		if audioReq, ok := info.Request.(*dto.AudioRequest); ok && audioReq.ResponseFormat != "" {
+			audioFormat = audioReq.ResponseFormat
+		}
+
+		var duration float64
+		var durationErr error
+
+		if audioFormat == "pcm" {
+			// PCM 格式没有文件头，根据 OpenAI TTS 的 PCM 参数计算时长
+			// 采样率: 24000 Hz, 位深度: 16-bit (2 bytes), 声道数: 1
+			const sampleRate = 24000
+			const bytesPerSample = 2
+			const channels = 1
+			duration = float64(len(bodyBytes)) / float64(sampleRate*bytesPerSample*channels)
+		} else {
+			ext := "." + audioFormat
+			reader := bytes.NewReader(bodyBytes)
+			duration, durationErr = common.GetAudioDuration(c.Request.Context(), reader, ext)
+		}
+
+		usage.PromptTokensDetails.TextTokens = usage.PromptTokens
+
+		if durationErr != nil {
+			logger.LogWarn(c, fmt.Sprintf("failed to get audio duration: %v", durationErr))
+			// 如果无法获取时长，则设置保底的 CompletionTokens，根据body大小计算
+			sizeInKB := float64(len(bodyBytes)) / 1000.0
+			estimatedTokens := int(math.Ceil(sizeInKB)) // 粗略估算每KB约等于1 token
+			usage.CompletionTokens = estimatedTokens
+			usage.CompletionTokenDetails.AudioTokens = estimatedTokens
+		} else if duration > 0 {
+			// 计算 token: ceil(duration) / 60.0 * 1000，即每分钟 1000 tokens
+			completionTokens := int(math.Round(math.Ceil(duration) / 60.0 * 1000))
+			usage.CompletionTokens = completionTokens
+			usage.CompletionTokenDetails.AudioTokens = completionTokens
+		}
+		usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
+	}
+
+	return usage
+}
+
+func OpenaiSTTHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo, responseFormat string) (*types.NewAPIError, *dto.Usage) {
+	defer service.CloseResponseBodyGracefully(resp)
+
+	responseBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError), nil
+	}
+	// 写入新的 response body
+	service.IOCopyBytesGracefully(c, resp, responseBody)
+
+	var responseData struct {
+		Usage *dto.Usage `json:"usage"`
+	}
+	if err := common.Unmarshal(responseBody, &responseData); err == nil && responseData.Usage != nil {
+		if responseData.Usage.TotalTokens > 0 {
+			usage := responseData.Usage
+			if usage.PromptTokens == 0 {
+				usage.PromptTokens = usage.InputTokens
+			}
+			if usage.CompletionTokens == 0 {
+				usage.CompletionTokens = usage.OutputTokens
+			}
+			return nil, usage
+		}
+	}
+
+	usage := &dto.Usage{}
+	usage.PromptTokens = info.GetEstimatePromptTokens()
+	usage.CompletionTokens = 0
+	usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
+	return nil, usage
+}
@@ -172,7 +172,7 @@ func handleLastResponse(lastStreamData string, responseId *string, createAt *int
 	shouldSendLastResp *bool) error {

 	var lastStreamResponse dto.ChatCompletionsStreamResponse
-	if err := json.Unmarshal(common.StringToByteSlice(lastStreamData), &lastStreamResponse); err != nil {
+	if err := common.Unmarshal(common.StringToByteSlice(lastStreamData), &lastStreamResponse); err != nil {
 		return err
 	}

@@ -1,7 +1,6 @@
 package openai

 import (
-	"encoding/json"
 	"fmt"
 	"io"
 	"net/http"
@@ -151,7 +150,7 @@ func OaiStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Re
 		var streamResp struct {
 			Usage *dto.Usage `json:"usage"`
 		}
-		err := json.Unmarshal([]byte(secondLastStreamData), &streamResp)
+		err := common.Unmarshal([]byte(secondLastStreamData), &streamResp)
 		if err == nil && streamResp.Usage != nil && service.ValidUsage(streamResp.Usage) {
 			usage = streamResp.Usage
 			containStreamUsage = true
@@ -183,7 +182,7 @@ func OaiStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Re
 	}

 	if !containStreamUsage {
-		usage = service.ResponseText2Usage(responseTextBuilder.String(), info.UpstreamModelName, info.PromptTokens)
+		usage = service.ResponseText2Usage(c, responseTextBuilder.String(), info.UpstreamModelName, info.GetEstimatePromptTokens())
 		usage.CompletionTokens += toolCount * 7
 	}

@@ -245,9 +244,9 @@ func OpenaiHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Respo
 			}
 		}
 		simpleResponse.Usage = dto.Usage{
-			PromptTokens:     info.PromptTokens,
+			PromptTokens:     info.GetEstimatePromptTokens(),
 			CompletionTokens: completionTokens,
-			TotalTokens:      info.PromptTokens + completionTokens,
+			TotalTokens:      info.GetEstimatePromptTokens() + completionTokens,
 		}
 		usageModified = true
 	}
@@ -327,68 +326,6 @@ func streamTTSResponse(c *gin.Context, resp *http.Response) {
 	}
 }

-func OpenaiTTSHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) *dto.Usage {
-	// the status code has been judged before, if there is a body reading failure,
-	// it should be regarded as a non-recoverable error, so it should not return err for external retry.
-	// Analogous to nginx's load balancing, it will only retry if it can't be requested or
-	// if the upstream returns a specific status code, once the upstream has already written the header,
-	// the subsequent failure of the response body should be regarded as a non-recoverable error,
-	// and can be terminated directly.
-	defer service.CloseResponseBodyGracefully(resp)
-	usage := &dto.Usage{}
-	usage.PromptTokens = info.PromptTokens
-	usage.TotalTokens = info.PromptTokens
-	for k, v := range resp.Header {
-		c.Writer.Header().Set(k, v[0])
-	}
-	c.Writer.WriteHeader(resp.StatusCode)
-
-	isStreaming := resp.ContentLength == -1 || resp.Header.Get("Content-Length") == ""
-	if isStreaming {
-		streamTTSResponse(c, resp)
-	} else {
-		c.Writer.WriteHeaderNow()
-		_, err := io.Copy(c.Writer, resp.Body)
-		if err != nil {
-			logger.LogError(c, err.Error())
-		}
-	}
-	return usage
-}
-
-func OpenaiSTTHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo, responseFormat string) (*types.NewAPIError, *dto.Usage) {
-	defer service.CloseResponseBodyGracefully(resp)
-
-	responseBody, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError), nil
-	}
-	// 写入新的 response body
-	service.IOCopyBytesGracefully(c, resp, responseBody)
-
-	var responseData struct {
-		Usage *dto.Usage `json:"usage"`
-	}
-	if err := json.Unmarshal(responseBody, &responseData); err == nil && responseData.Usage != nil {
-		if responseData.Usage.TotalTokens > 0 {
-			usage := responseData.Usage
-			if usage.PromptTokens == 0 {
-				usage.PromptTokens = usage.InputTokens
-			}
-			if usage.CompletionTokens == 0 {
-				usage.CompletionTokens = usage.OutputTokens
-			}
-			return nil, usage
-		}
-	}
-
-	usage := &dto.Usage{}
-	usage.PromptTokens = info.PromptTokens
-	usage.CompletionTokens = 0
-	usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
-	return nil, usage
-}
-
 func OpenaiRealtimeHandler(c *gin.Context, info *relaycommon.RelayInfo) (*types.NewAPIError, *dto.RealtimeUsage) {
 	if info == nil || info.ClientWs == nil || info.TargetWs == nil {
 		return types.NewError(fmt.Errorf("invalid websocket connection"), types.ErrorCodeBadResponse), nil
@@ -687,7 +624,7 @@ func extractCachedTokensFromBody(body []byte) (int, bool) {
 		} `json:"usage"`
 	}

-	if err := json.Unmarshal(body, &payload); err != nil {
+	if err := common.Unmarshal(body, &payload); err != nil {
 		return 0, false
 	}

@@ -141,7 +141,7 @@ func OaiResponsesStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp
 	}

 	if usage.PromptTokens == 0 && usage.CompletionTokens != 0 {
-		usage.PromptTokens = info.PromptTokens
+		usage.PromptTokens = info.GetEstimatePromptTokens()
 	}

 	usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
@@ -81,7 +81,7 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
 	if info.IsStream {
 		var responseText string
 		err, responseText = palmStreamHandler(c, resp)
-		usage = service.ResponseText2Usage(responseText, info.UpstreamModelName, info.PromptTokens)
+		usage = service.ResponseText2Usage(c, responseText, info.UpstreamModelName, info.GetEstimatePromptTokens())
 	} else {
 		usage, err = palmHandler(c, info, resp)
 	}
@@ -121,13 +121,8 @@ func palmHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Respons
 		}, resp.StatusCode)
 	}
 	fullTextResponse := responsePaLM2OpenAI(&palmResponse)
-	completionTokens := service.CountTextToken(palmResponse.Candidates[0].Content, info.UpstreamModelName)
-	usage := dto.Usage{
-		PromptTokens:     info.PromptTokens,
-		CompletionTokens: completionTokens,
-		TotalTokens:      info.PromptTokens + completionTokens,
-	}
-	fullTextResponse.Usage = usage
+	usage := service.ResponseText2Usage(c, palmResponse.Candidates[0].Content, info.UpstreamModelName, info.GetEstimatePromptTokens())
+	fullTextResponse.Usage = *usage
 	jsonResponse, err := common.Marshal(fullTextResponse)
 	if err != nil {
 		return nil, types.NewError(err, types.ErrorCodeBadResponseBody)
@@ -135,5 +130,5 @@ func palmHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Respons
 	c.Writer.Header().Set("Content-Type", "application/json")
 	c.Writer.WriteHeader(resp.StatusCode)
 	service.IOCopyBytesGracefully(c, resp, jsonResponse)
-	return &usage, nil
+	return usage, nil
 }
@@ -393,7 +393,7 @@ func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, info *rela
 }

 // FetchTask 查询任务状态
-func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http.Response, error) {
+func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any, proxy string) (*http.Response, error) {
 	taskID, ok := body["task_id"].(string)
 	if !ok {
 		return nil, fmt.Errorf("invalid task_id")
@@ -408,7 +408,11 @@ func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http

 	req.Header.Set("Authorization", "Bearer "+key)

-	return service.GetHttpClient().Do(req)
+	client, err := service.GetHttpClientWithProxy(proxy)
+	if err != nil {
+		return nil, fmt.Errorf("new proxy http client failed: %w", err)
+	}
+	return client.Do(req)
 }

 func (a *TaskAdaptor) GetModelList() []string {
@@ -146,7 +146,7 @@ func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, info *rela
 }

 // FetchTask fetch task status
-func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http.Response, error) {
+func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any, proxy string) (*http.Response, error) {
 	taskID, ok := body["task_id"].(string)
 	if !ok {
 		return nil, fmt.Errorf("invalid task_id")
@@ -163,7 +163,11 @@ func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http
 	req.Header.Set("Content-Type", "application/json")
 	req.Header.Set("Authorization", "Bearer "+key)

-	return service.GetHttpClient().Do(req)
+	client, err := service.GetHttpClientWithProxy(proxy)
+	if err != nil {
+		return nil, fmt.Errorf("new proxy http client failed: %w", err)
+	}
+	return client.Do(req)
 }

 func (a *TaskAdaptor) GetModelList() []string {
@@ -200,7 +200,7 @@ func (a *TaskAdaptor) GetChannelName() string {
 }

 // FetchTask fetch task status
-func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http.Response, error) {
+func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any, proxy string) (*http.Response, error) {
 	taskID, ok := body["task_id"].(string)
 	if !ok {
 		return nil, fmt.Errorf("invalid task_id")
@@ -223,7 +223,11 @@ func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http
 	req.Header.Set("Accept", "application/json")
 	req.Header.Set("x-goog-api-key", key)

-	return service.GetHttpClient().Do(req)
+	client, err := service.GetHttpClientWithProxy(proxy)
+	if err != nil {
+		return nil, fmt.Errorf("new proxy http client failed: %w", err)
+	}
+	return client.Do(req)
 }

 func (a *TaskAdaptor) ParseTaskResult(respBody []byte) (*relaycommon.TaskInfo, error) {
@@ -110,7 +110,7 @@ func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, info *rela
 	return hResp.TaskID, responseBody, nil
 }

-func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http.Response, error) {
+func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any, proxy string) (*http.Response, error) {
 	taskID, ok := body["task_id"].(string)
 	if !ok {
 		return nil, fmt.Errorf("invalid task_id")
@@ -126,7 +126,11 @@ func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http
 	req.Header.Set("Accept", "application/json")
 	req.Header.Set("Authorization", "Bearer "+key)

-	return service.GetHttpClient().Do(req)
+	client, err := service.GetHttpClientWithProxy(proxy)
+	if err != nil {
+		return nil, fmt.Errorf("new proxy http client failed: %w", err)
+	}
+	return client.Do(req)
 }

 func (a *TaskAdaptor) GetModelList() []string {
@@ -196,7 +196,7 @@ func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, info *rela
 	}

 	if jResp.Code != 10000 {
-		taskErr = service.TaskErrorWrapper(fmt.Errorf(jResp.Message), fmt.Sprintf("%d", jResp.Code), http.StatusInternalServerError)
+		taskErr = service.TaskErrorWrapper(fmt.Errorf("%s", jResp.Message), fmt.Sprintf("%d", jResp.Code), http.StatusInternalServerError)
 		return
 	}

@@ -210,7 +210,7 @@ func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, info *rela
 }

 // FetchTask fetch task status
-func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http.Response, error) {
+func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any, proxy string) (*http.Response, error) {
 	taskID, ok := body["task_id"].(string)
 	if !ok {
 		return nil, fmt.Errorf("invalid task_id")
@@ -251,7 +251,11 @@ func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http
 			return nil, errors.Wrap(err, "sign request failed")
 		}
 	}
-	return service.GetHttpClient().Do(req)
+	client, err := service.GetHttpClientWithProxy(proxy)
+	if err != nil {
+		return nil, fmt.Errorf("new proxy http client failed: %w", err)
+	}
+	return client.Do(req)
 }

 func (a *TaskAdaptor) GetModelList() []string {
@@ -186,7 +186,7 @@ func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, info *rela
 		return
 	}
 	if kResp.Code != 0 {
-		taskErr = service.TaskErrorWrapperLocal(fmt.Errorf(kResp.Message), "task_failed", http.StatusBadRequest)
+		taskErr = service.TaskErrorWrapperLocal(fmt.Errorf("%s", kResp.Message), "task_failed", http.StatusBadRequest)
 		return
 	}
 	ov := dto.NewOpenAIVideo()
@@ -199,7 +199,7 @@ func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, info *rela
 }

 // FetchTask fetch task status
-func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http.Response, error) {
+func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any, proxy string) (*http.Response, error) {
 	taskID, ok := body["task_id"].(string)
 	if !ok {
 		return nil, fmt.Errorf("invalid task_id")
@@ -228,7 +228,11 @@ func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http
 	req.Header.Set("Authorization", "Bearer "+token)
 	req.Header.Set("User-Agent", "kling-sdk/1.0")

-	return service.GetHttpClient().Do(req)
+	client, err := service.GetHttpClientWithProxy(proxy)
+	if err != nil {
+		return nil, fmt.Errorf("new proxy http client failed: %w", err)
+	}
+	return client.Do(req)
 }

 func (a *TaskAdaptor) GetModelList() []string {
@@ -5,8 +5,10 @@ import (
 	"fmt"
 	"io"
 	"net/http"
+	"strings"

 	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/constant"
 	"github.com/QuantumNous/new-api/dto"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/relay/channel"
@@ -67,11 +69,30 @@ func (a *TaskAdaptor) Init(info *relaycommon.RelayInfo) {
 	a.apiKey = info.ApiKey
 }

+func validateRemixRequest(c *gin.Context) *dto.TaskError {
+	var req struct {
+		Prompt string `json:"prompt"`
+	}
+	if err := common.UnmarshalBodyReusable(c, &req); err != nil {
+		return service.TaskErrorWrapperLocal(err, "invalid_request", http.StatusBadRequest)
+	}
+	if strings.TrimSpace(req.Prompt) == "" {
+		return service.TaskErrorWrapperLocal(fmt.Errorf("field prompt is required"), "invalid_request", http.StatusBadRequest)
+	}
+	return nil
+}
+
 func (a *TaskAdaptor) ValidateRequestAndSetAction(c *gin.Context, info *relaycommon.RelayInfo) (taskErr *dto.TaskError) {
+	if info.Action == constant.TaskActionRemix {
+		return validateRemixRequest(c)
+	}
 	return relaycommon.ValidateMultipartDirect(c, info)
 }

 func (a *TaskAdaptor) BuildRequestURL(info *relaycommon.RelayInfo) (string, error) {
+	if info.Action == constant.TaskActionRemix {
+		return fmt.Sprintf("%s/v1/videos/%s/remix", a.baseURL, info.OriginTaskID), nil
+	}
 	return fmt.Sprintf("%s/v1/videos", a.baseURL), nil
 }

@@ -125,7 +146,7 @@ func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, _ *relayco
 }

 // FetchTask fetch task status
-func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http.Response, error) {
+func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any, proxy string) (*http.Response, error) {
 	taskID, ok := body["task_id"].(string)
 	if !ok {
 		return nil, fmt.Errorf("invalid task_id")
@@ -140,7 +161,11 @@ func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http

 	req.Header.Set("Authorization", "Bearer "+key)

-	return service.GetHttpClient().Do(req)
+	client, err := service.GetHttpClientWithProxy(proxy)
+	if err != nil {
+		return nil, fmt.Errorf("new proxy http client failed: %w", err)
+	}
+	return client.Do(req)
 }

 func (a *TaskAdaptor) GetModelList() []string {
@@ -105,7 +105,7 @@ func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, info *rela
 		return
 	}
 	if !sunoResponse.IsSuccess() {
-		taskErr = service.TaskErrorWrapper(fmt.Errorf(sunoResponse.Message), sunoResponse.Code, http.StatusInternalServerError)
+		taskErr = service.TaskErrorWrapper(fmt.Errorf("%s", sunoResponse.Message), sunoResponse.Code, http.StatusInternalServerError)
 		return
 	}

@@ -132,7 +132,7 @@ func (a *TaskAdaptor) GetChannelName() string {
 	return ChannelName
 }

-func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http.Response, error) {
+func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any, proxy string) (*http.Response, error) {
 	requestUrl := fmt.Sprintf("%s/suno/fetch", baseUrl)
 	byteBody, err := json.Marshal(body)
 	if err != nil {
@@ -153,11 +153,11 @@ func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http
 	req = req.WithContext(ctx)
 	req.Header.Set("Content-Type", "application/json")
 	req.Header.Set("Authorization", "Bearer "+key)
-	resp, err := service.GetHttpClient().Do(req)
+	client, err := service.GetHttpClientWithProxy(proxy)
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("new proxy http client failed: %w", err)
 	}
-	return resp, nil
+	return client.Do(req)
 }

 func actionValidate(c *gin.Context, sunoRequest *dto.SunoSubmitReq, action string) (err error) {
@@ -12,7 +12,6 @@ import (

 	"github.com/QuantumNous/new-api/common"
 	"github.com/QuantumNous/new-api/model"
-
 	"github.com/gin-gonic/gin"

 	"github.com/QuantumNous/new-api/constant"
@@ -121,7 +120,11 @@ func (a *TaskAdaptor) BuildRequestHeader(c *gin.Context, req *http.Request, info
 		return fmt.Errorf("failed to decode credentials: %w", err)
 	}

-	token, err := vertexcore.AcquireAccessToken(*adc, "")
+	proxy := ""
+	if info != nil {
+		proxy = info.ChannelSetting.Proxy
+	}
+	token, err := vertexcore.AcquireAccessToken(*adc, proxy)
 	if err != nil {
 		return fmt.Errorf("failed to acquire access token: %w", err)
 	}
@@ -147,13 +150,40 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn
 			body.Parameters["storageUri"] = v
 		}
 		if v, ok := req.Metadata["sampleCount"]; ok {
-			body.Parameters["sampleCount"] = v
+			if i, ok := v.(int); ok {
+				body.Parameters["sampleCount"] = i
+			}
+			if f, ok := v.(float64); ok {
+				body.Parameters["sampleCount"] = int(f)
+			}
 		}
 	}
 	if _, ok := body.Parameters["sampleCount"]; !ok {
 		body.Parameters["sampleCount"] = 1
 	}

+	if body.Parameters["sampleCount"].(int) <= 0 {
+		return nil, fmt.Errorf("sampleCount must be greater than 0")
+	}
+
+	// if req.Duration > 0 {
+	// 	body.Parameters["durationSeconds"] = req.Duration
+	// } else if req.Seconds != "" {
+	// 	seconds, err := strconv.Atoi(req.Seconds)
+	// 	if err != nil {
+	// 		return nil, errors.Wrap(err, "convert seconds to int failed")
+	// 	}
+	// 	body.Parameters["durationSeconds"] = seconds
+	// }
+
+	info.PriceData.OtherRatios = map[string]float64{
+		"sampleCount": float64(body.Parameters["sampleCount"].(int)),
+	}
+
+	// if v, ok := body.Parameters["durationSeconds"]; ok {
+	// 	info.PriceData.OtherRatios["durationSeconds"] = float64(v.(int))
+	// }
+
 	data, err := json.Marshal(body)
 	if err != nil {
 		return nil, err
@@ -190,7 +220,7 @@ func (a *TaskAdaptor) GetModelList() []string { return []string{"veo-3.0-generat
 func (a *TaskAdaptor) GetChannelName() string { return "vertex" }

 // FetchTask fetch task status
-func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http.Response, error) {
+func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any, proxy string) (*http.Response, error) {
 	taskID, ok := body["task_id"].(string)
 	if !ok {
 		return nil, fmt.Errorf("invalid task_id")
@@ -223,7 +253,7 @@ func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http
 	if err := json.Unmarshal([]byte(key), adc); err != nil {
 		return nil, fmt.Errorf("failed to decode credentials: %w", err)
 	}
-	token, err := vertexcore.AcquireAccessToken(*adc, "")
+	token, err := vertexcore.AcquireAccessToken(*adc, proxy)
 	if err != nil {
 		return nil, fmt.Errorf("failed to acquire access token: %w", err)
 	}
@@ -235,7 +265,11 @@ func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http
 	req.Header.Set("Accept", "application/json")
 	req.Header.Set("Authorization", "Bearer "+token)
 	req.Header.Set("x-goog-user-project", adc.ProjectID)
-	return service.GetHttpClient().Do(req)
+	client, err := service.GetHttpClientWithProxy(proxy)
+	if err != nil {
+		return nil, fmt.Errorf("new proxy http client failed: %w", err)
+	}
+	return client.Do(req)
 }

 func (a *TaskAdaptor) ParseTaskResult(respBody []byte) (*relaycommon.TaskInfo, error) {
@@ -188,7 +188,7 @@ func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, info *rela
 	return vResp.TaskId, responseBody, nil
 }

-func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http.Response, error) {
+func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any, proxy string) (*http.Response, error) {
 	taskID, ok := body["task_id"].(string)
 	if !ok {
 		return nil, fmt.Errorf("invalid task_id")
@@ -204,7 +204,11 @@ func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http
 	req.Header.Set("Accept", "application/json")
 	req.Header.Set("Authorization", "Token "+key)

-	return service.GetHttpClient().Do(req)
+	client, err := service.GetHttpClientWithProxy(proxy)
+	if err != nil {
+		return nil, fmt.Errorf("new proxy http client failed: %w", err)
+	}
+	return client.Do(req)
 }

 func (a *TaskAdaptor) GetModelList() []string {
@@ -105,7 +105,7 @@ func tencentStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *htt
 		data = strings.TrimPrefix(data, "data:")

 		var tencentResponse TencentChatResponse
-		err := json.Unmarshal([]byte(data), &tencentResponse)
+		err := common.Unmarshal([]byte(data), &tencentResponse)
 		if err != nil {
 			common.SysLog("error unmarshalling stream response: " + err.Error())
 			continue
@@ -130,7 +130,7 @@ func tencentStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *htt

 	service.CloseResponseBodyGracefully(resp)

-	return service.ResponseText2Usage(responseText, info.UpstreamModelName, info.PromptTokens), nil
+	return service.ResponseText2Usage(c, responseText, info.UpstreamModelName, info.GetEstimatePromptTokens()), nil
 }

 func tencentHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
@@ -17,6 +17,7 @@ import (
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	"github.com/QuantumNous/new-api/relay/constant"
 	"github.com/QuantumNous/new-api/setting/model_setting"
+	"github.com/QuantumNous/new-api/setting/reasoning"
 	"github.com/QuantumNous/new-api/types"

 	"github.com/gin-gonic/gin"
@@ -39,6 +40,7 @@ var claudeModelMap = map[string]string{
 	"claude-opus-4-20250514":     "claude-opus-4@20250514",
 	"claude-opus-4-1-20250805":   "claude-opus-4-1@20250805",
 	"claude-sonnet-4-5-20250929": "claude-sonnet-4-5@20250929",
+	"claude-opus-4-5-20251101":   "claude-opus-4-5@20251101",
 }

 const anthropicVersion = "vertex-2023-10-16"
@@ -49,10 +51,43 @@ type Adaptor struct {
 }

 func (a *Adaptor) ConvertGeminiRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeminiChatRequest) (any, error) {
+	// Vertex AI does not support functionResponse.id; keep it stripped here for consistency.
+	if model_setting.GetGeminiSettings().RemoveFunctionResponseIdEnabled {
+		removeFunctionResponseID(request)
+	}
 	geminiAdaptor := gemini.Adaptor{}
 	return geminiAdaptor.ConvertGeminiRequest(c, info, request)
 }

+func removeFunctionResponseID(request *dto.GeminiChatRequest) {
+	if request == nil {
+		return
+	}
+
+	if len(request.Contents) > 0 {
+		for i := range request.Contents {
+			if len(request.Contents[i].Parts) == 0 {
+				continue
+			}
+			for j := range request.Contents[i].Parts {
+				part := &request.Contents[i].Parts[j]
+				if part.FunctionResponse == nil {
+					continue
+				}
+				if len(part.FunctionResponse.ID) > 0 {
+					part.FunctionResponse.ID = nil
+				}
+			}
+		}
+	}
+
+	if len(request.Requests) > 0 {
+		for i := range request.Requests {
+			removeFunctionResponseID(&request.Requests[i])
+		}
+	}
+}
+
 func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.ClaudeRequest) (any, error) {
 	if v, ok := claudeModelMap[info.UpstreamModelName]; ok {
 		c.Set("request_model", v)
@@ -180,6 +215,8 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 				info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
 			} else if strings.HasSuffix(info.UpstreamModelName, "-nothinking") {
 				info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-nothinking")
+			} else if baseModel, level, ok := reasoning.TrimEffortSuffix(info.UpstreamModelName); ok && level != "" {
+				info.UpstreamModelName = baseModel
 			}
 		}

@@ -13,6 +13,7 @@ import (
 	channelconstant "github.com/QuantumNous/new-api/constant"
 	"github.com/QuantumNous/new-api/dto"
 	"github.com/QuantumNous/new-api/relay/channel"
+	"github.com/QuantumNous/new-api/relay/channel/claude"
 	"github.com/QuantumNous/new-api/relay/channel/openai"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	"github.com/QuantumNous/new-api/relay/constant"
@@ -23,11 +24,8 @@ import (
 )

 const (
-	contextKeyTTSRequest          = "volcengine_tts_request"
-	contextKeyResponseFormat      = "response_format"
-	DoubaoCodingPlan              = "doubao-coding-plan"
-	DoubaoCodingPlanClaudeBaseURL = "https://ark.cn-beijing.volces.com/api/coding"
-	DoubaoCodingPlanOpenAIBaseURL = "https://ark.cn-beijing.volces.com/api/coding/v3"
+	contextKeyTTSRequest     = "volcengine_tts_request"
+	contextKeyResponseFormat = "response_format"
 )

 type Adaptor struct {
@@ -39,6 +37,10 @@ func (a *Adaptor) ConvertGeminiRequest(*gin.Context, *relaycommon.RelayInfo, *dt
 }

 func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayInfo, req *dto.ClaudeRequest) (any, error) {
+	if _, ok := channelconstant.ChannelSpecialBases[info.ChannelBaseUrl]; ok {
+		adaptor := claude.Adaptor{}
+		return adaptor.ConvertClaudeRequest(c, info, req)
+	}
 	adaptor := openai.Adaptor{}
 	return adaptor.ConvertClaudeRequest(c, info, req)
 }
@@ -238,11 +240,12 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 	if baseUrl == "" {
 		baseUrl = channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeVolcEngine]
 	}
+	specialPlan, hasSpecialPlan := channelconstant.ChannelSpecialBases[baseUrl]

 	switch info.RelayFormat {
 	case types.RelayFormatClaude:
-		if baseUrl == DoubaoCodingPlan {
-			return fmt.Sprintf("%s/v1/messages", DoubaoCodingPlanClaudeBaseURL), nil
+		if hasSpecialPlan && specialPlan.ClaudeBaseURL != "" {
+			return fmt.Sprintf("%s/v1/messages", specialPlan.ClaudeBaseURL), nil
 		}
 		if strings.HasPrefix(info.UpstreamModelName, "bot") {
 			return fmt.Sprintf("%s/api/v3/bots/chat/completions", baseUrl), nil
@@ -251,8 +254,8 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 	default:
 		switch info.RelayMode {
 		case constant.RelayModeChatCompletions:
-			if baseUrl == DoubaoCodingPlan {
-				return fmt.Sprintf("%s/chat/completions", DoubaoCodingPlanOpenAIBaseURL), nil
+			if hasSpecialPlan && specialPlan.OpenAIBaseURL != "" {
+				return fmt.Sprintf("%s/chat/completions", specialPlan.OpenAIBaseURL), nil
 			}
 			if strings.HasPrefix(info.UpstreamModelName, "bot") {
 				return fmt.Sprintf("%s/api/v3/bots/chat/completions", baseUrl), nil
@@ -340,6 +343,15 @@ func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, request
 }

 func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
+	if info.RelayFormat == types.RelayFormatClaude {
+		if _, ok := channelconstant.ChannelSpecialBases[info.ChannelBaseUrl]; ok {
+			if info.IsStream {
+				return claude.ClaudeStreamHandler(c, resp, info, claude.RequestModeMessage)
+			}
+			return claude.ClaudeHandler(c, resp, info, claude.RequestModeMessage)
+		}
+	}
+
 	if info.RelayMode == constant.RelayModeAudioSpeech {
 		encoding := mapEncoding(c.GetString(contextKeyResponseFormat))
 		if info.IsStream {
@@ -385,7 +385,7 @@ func (m *Message) writeSessionID(buf *bytes.Buffer) error {
 	}

 	size := len(m.SessionID)
-	if size > math.MaxUint32 {
+	if int64(size) > math.MaxUint32 {
 		return fmt.Errorf("session ID size (%d) exceeds max(uint32)", size)
 	}

@@ -407,7 +407,7 @@ func (m *Message) writeErrorCode(buf *bytes.Buffer) error {

 func (m *Message) writePayload(buf *bytes.Buffer) error {
 	size := len(m.Payload)
-	if size > math.MaxUint32 {
+	if int64(size) > math.MaxUint32 {
 		return fmt.Errorf("payload size (%d) exceeds max(uint32)", size)
 	}

@@ -184,9 +184,9 @@ func handleTTSResponse(c *gin.Context, resp *http.Response, info *relaycommon.Re
 	c.Data(http.StatusOK, contentType, audioData)

 	usage = &dto.Usage{
-		PromptTokens:     info.PromptTokens,
+		PromptTokens:     info.GetEstimatePromptTokens(),
 		CompletionTokens: 0,
-		TotalTokens:      info.PromptTokens,
+		TotalTokens:      info.GetEstimatePromptTokens(),
 	}

 	return usage, nil
@@ -284,9 +284,9 @@ func handleTTSWebSocketResponse(c *gin.Context, requestURL string, volcRequest V
 			if msg.Sequence < 0 {
 				c.Status(http.StatusOK)
 				usage = &dto.Usage{
-					PromptTokens:     info.PromptTokens,
+					PromptTokens:     info.GetEstimatePromptTokens(),
 					CompletionTokens: 0,
-					TotalTokens:      info.PromptTokens,
+					TotalTokens:      info.GetEstimatePromptTokens(),
 				}
 				return usage, nil
 			}
@@ -297,9 +297,9 @@ func handleTTSWebSocketResponse(c *gin.Context, requestURL string, volcRequest V

 	c.Status(http.StatusOK)
 	usage = &dto.Usage{
-		PromptTokens:     info.PromptTokens,
+		PromptTokens:     info.GetEstimatePromptTokens(),
 		CompletionTokens: 0,
-		TotalTokens:      info.PromptTokens,
+		TotalTokens:      info.GetEstimatePromptTokens(),
 	}
 	return usage, nil
 }
--- a/Show More
+++ b/Show More