refactor: move SidebarTrigger to header left, flatten system settings into sidebar

fix: TryUserAuth set role in context, fix docs page pagination response handling
refactor: sidebar full-height with logo+toggle at top, header beside sidebar
2026-06-15 04:17:37 +08:00 · 2026-06-15 04:07:15 +08:00 · 2026-06-15 04:02:05 +08:00 · 2026-06-15 03:57:06 +08:00 · 2026-06-15 03:48:41 +08:00 · 2026-06-15 03:15:21 +08:00
1427 changed files with 25486 additions and 21971 deletions
@@ -56,6 +56,8 @@
 # 对话超时设置
 # 所有请求超时时间，单位秒，默认为0，表示不限制
 # RELAY_TIMEOUT=0
+# Relay HTTP 客户端空闲连接超时时间，单位秒，默认跟随 Go 标准库，设置为0表示不限制
+# RELAY_IDLE_CONN_TIMEOUT=90
 # 流模式无响应超时时间，单位秒，如果出现空补全可以尝试改为更大值
 # STREAMING_TIMEOUT=300

@@ -0,0 +1,92 @@
+name: Docker Build
+
+on:
+  push:
+    branches:
+      - main
+      - master
+    tags:
+      - 'v*'
+  workflow_dispatch:
+
+jobs:
+  build-and-push:
+    name: Build and Push Docker Image
+    runs-on: act-runner-4c6g
+    env:
+      RUNNER_TOOL_CACHE: /toolcache
+
+    steps:
+      - name: Install Docker CLI
+        run: |
+          if ! command -v docker &> /dev/null; then
+            if command -v apk &> /dev/null; then
+              apk add --no-cache docker-cli
+            elif command -v apt-get &> /dev/null; then
+              apt-get update && apt-get install -y docker.io
+            else
+              curl -fsSL https://download.docker.com/linux/static/stable/x86_64/docker-24.0.7.tgz | tar xz -C /tmp
+              mv /tmp/docker/docker /usr/local/bin/
+              chmod +x /usr/local/bin/docker
+            fi
+          fi
+          docker --version
+
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Resolve tag & write VERSION
+        id: version
+        run: |
+          if echo "${{ github.ref }}" | grep -q "^refs/tags/"; then
+            TAG=${GITHUB_REF#refs/tags/}
+          else
+            SHORT_SHA=$(git rev-parse --short HEAD)
+            TAG="dev-${SHORT_SHA}"
+          fi
+          echo "TAG=${TAG}" >> $GITHUB_ENV
+          echo "${TAG}" > VERSION
+          echo "Building tag: ${TAG}"
+          cat VERSION
+
+      - name: Login to Gitea Container Registry
+        run: |
+          echo "${{ secrets.PACKAGES_TOKEN }}" | docker login git.viaeon.com -u "${{ github.actor }}" --password-stdin
+
+      - name: Build Docker image
+        run: |
+          echo "Building image with tag: ${{ env.TAG }}"
+          docker build \
+            --label "org.opencontainers.image.source=https://git.viaeon.com/admin/new-api" \
+            --label "org.opencontainers.image.revision=${{ github.sha }}" \
+            -t git.viaeon.com/admin/new-api:${{ env.TAG }} \
+            -t git.viaeon.com/admin/new-api:latest .
+
+      - name: Push Docker image
+        run: |
+          echo "Pushing ${{ env.TAG }}..."
+          docker push git.viaeon.com/admin/new-api:${{ env.TAG }}
+          echo "Pushing latest..."
+          docker push git.viaeon.com/admin/new-api:latest
+
+      - name: Cleanup Docker
+        if: always()
+        run: |
+          echo "Removing local images..."
+          docker rmi git.viaeon.com/admin/new-api:${{ env.TAG }} git.viaeon.com/admin/new-api:latest 2>/dev/null || true
+          echo "Pruning unused Docker resources..."
+          docker system prune -af --volumes 2>/dev/null || true
+          echo "Docker disk usage:"
+          docker system df
+
+      - name: Deploy via SSH
+        if: success()
+        run: |
+          if [ -z "${{ secrets.DEPLOY_SSH_HOST }}" ]; then
+            echo "DEPLOY_SSH_HOST not set, skip deploy"
+            exit 0
+          fi
+          apk add --no-cache sshpass 2>/dev/null || apt-get update && apt-get install -y sshpass 2>/dev/null || true
+          sshpass -p "${{ secrets.DEPLOY_SSH_PASS }}" ssh -o StrictHostKeyChecking=no -p ${{ secrets.DEPLOY_SSH_PORT || 22 }} ${{ secrets.DEPLOY_SSH_USER }}@${{ secrets.DEPLOY_SSH_HOST }} "cd ${{ secrets.DEPLOY_DIR || '/opt/new-api' }} && docker compose pull && docker compose up -d"
@@ -0,0 +1,73 @@
+name: Docker Build (alpha)
+
+on:
+  push:
+    branches:
+      - alpha
+  workflow_dispatch:
+
+jobs:
+  build-and-push:
+    name: Build and Push Alpha Docker Image
+    runs-on: act-runner-4c6g
+    env:
+      RUNNER_TOOL_CACHE: /toolcache
+
+    steps:
+      - name: Install Docker CLI
+        run: |
+          if ! command -v docker &> /dev/null; then
+            if command -v apk &> /dev/null; then
+              apk add --no-cache docker-cli
+            elif command -v apt-get &> /dev/null; then
+              apt-get update && apt-get install -y docker.io
+            else
+              curl -fsSL https://download.docker.com/linux/static/stable/x86_64/docker-24.0.7.tgz | tar xz -C /tmp
+              mv /tmp/docker/docker /usr/local/bin/
+              chmod +x /usr/local/bin/docker
+            fi
+          fi
+          docker --version
+
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Determine alpha version
+        id: version
+        run: |
+          VERSION="alpha-$(date +'%Y%m%d')-$(git rev-parse --short HEAD)"
+          echo "$VERSION" > VERSION
+          echo "VERSION=$VERSION" >> $GITHUB_ENV
+          echo "Publishing version: $VERSION"
+
+      - name: Login to Gitea Container Registry
+        run: |
+          echo "${{ secrets.PACKAGES_TOKEN }}" | docker login git.viaeon.com -u "${{ github.actor }}" --password-stdin 2>&1
+
+      - name: Build Docker image
+        run: |
+          echo "Building alpha image..."
+          docker build \
+            --label "org.opencontainers.image.source=https://git.viaeon.com/admin/new-api" \
+            --label "org.opencontainers.image.revision=${{ github.sha }}" \
+            -t git.viaeon.com/admin/new-api:${{ env.VERSION }} \
+            -t git.viaeon.com/admin/new-api:alpha . 2>&1
+
+      - name: Push Docker image
+        run: |
+          echo "Pushing ${{ env.VERSION }}..."
+          docker push git.viaeon.com/admin/new-api:${{ env.VERSION }}
+          echo "Pushing alpha..."
+          docker push git.viaeon.com/admin/new-api:alpha
+
+      - name: Cleanup Docker
+        if: always()
+        run: |
+          echo "Removing local images..."
+          docker rmi git.viaeon.com/admin/new-api:${{ env.VERSION }} git.viaeon.com/admin/new-api:alpha 2>/dev/null || true
+          echo "Pruning unused Docker resources..."
+          docker system prune -af --volumes 2>/dev/null || true
+          echo "Docker disk usage:"
+          docker system df
@@ -0,0 +1,73 @@
+name: Docker Build (nightly)
+
+on:
+  push:
+    branches:
+      - nightly
+  workflow_dispatch:
+
+jobs:
+  build-and-push:
+    name: Build and Push Nightly Docker Image
+    runs-on: act-runner-4c6g
+    env:
+      RUNNER_TOOL_CACHE: /toolcache
+
+    steps:
+      - name: Install Docker CLI
+        run: |
+          if ! command -v docker &> /dev/null; then
+            if command -v apk &> /dev/null; then
+              apk add --no-cache docker-cli
+            elif command -v apt-get &> /dev/null; then
+              apt-get update && apt-get install -y docker.io
+            else
+              curl -fsSL https://download.docker.com/linux/static/stable/x86_64/docker-24.0.7.tgz | tar xz -C /tmp
+              mv /tmp/docker/docker /usr/local/bin/
+              chmod +x /usr/local/bin/docker
+            fi
+          fi
+          docker --version
+
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Determine nightly version
+        id: version
+        run: |
+          VERSION="nightly-$(date +'%Y%m%d')-$(git rev-parse --short HEAD)"
+          echo "$VERSION" > VERSION
+          echo "VERSION=$VERSION" >> $GITHUB_ENV
+          echo "Publishing version: $VERSION"
+
+      - name: Login to Gitea Container Registry
+        run: |
+          echo "${{ secrets.PACKAGES_TOKEN }}" | docker login git.viaeon.com -u "${{ github.actor }}" --password-stdin 2>&1
+
+      - name: Build Docker image
+        run: |
+          echo "Building nightly image..."
+          docker build \
+            --label "org.opencontainers.image.source=https://git.viaeon.com/admin/new-api" \
+            --label "org.opencontainers.image.revision=${{ github.sha }}" \
+            -t git.viaeon.com/admin/new-api:${{ env.VERSION }} \
+            -t git.viaeon.com/admin/new-api:nightly . 2>&1
+
+      - name: Push Docker image
+        run: |
+          echo "Pushing ${{ env.VERSION }}..."
+          docker push git.viaeon.com/admin/new-api:${{ env.VERSION }}
+          echo "Pushing nightly..."
+          docker push git.viaeon.com/admin/new-api:nightly
+
+      - name: Cleanup Docker
+        if: always()
+        run: |
+          echo "Removing local images..."
+          docker rmi git.viaeon.com/admin/new-api:${{ env.VERSION }} git.viaeon.com/admin/new-api:nightly 2>/dev/null || true
+          echo "Pruning unused Docker resources..."
+          docker system prune -af --volumes 2>/dev/null || true
+          echo "Docker disk usage:"
+          docker system df
@@ -0,0 +1,82 @@
+name: PR Check
+
+on:
+  pull_request:
+    types: [opened, reopened]
+
+jobs:
+  pr-quality:
+    name: PR Quality Check
+    runs-on: act-runner-4c6g
+    env:
+      RUNNER_TOOL_CACHE: /toolcache
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Check PR description
+        env:
+          GITEA_TOKEN: ${{ secrets.PACKAGES_TOKEN }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          REPO: admin/new-api
+          GITEA_URL: https://git.viaeon.com
+        run: |
+          # 获取 PR 信息
+          PR_INFO=$(curl -s -H "Authorization: token ${GITEA_TOKEN}" \
+            "${GITEA_URL}/api/v1/repos/${REPO}/pulls/${PR_NUMBER}")
+
+          PR_BODY=$(echo "$PR_INFO" | jq -r '.body // empty')
+          PR_TITLE=$(echo "$PR_INFO" | jq -r '.title // empty')
+          PR_USER=$(echo "$PR_INFO" | jq -r '.user.login // empty')
+
+          FAILED=0
+          REASONS=""
+
+          # 检查 PR 描述是否为空
+          if [ -z "$PR_BODY" ] || [ "$PR_BODY" = "null" ]; then
+            FAILED=1
+            REASONS="${REASONS}- PR description is empty\n"
+          fi
+
+          # 检查 PR 标题是否为空
+          if [ -z "$PR_TITLE" ] || [ "$PR_TITLE" = "null" ]; then
+            FAILED=1
+            REASONS="${REASONS}- PR title is empty\n"
+          fi
+
+          # 检查是否包含纯 AI 生成标记
+          if echo "$PR_BODY" | grep -qi "Generated with Claude Code"; then
+            FAILED=1
+            REASONS="${REASONS}- PR appears to be purely AI-generated without meaningful human involvement\n"
+          fi
+
+          if [ "$FAILED" -eq 1 ]; then
+            echo "PR check failed:"
+            echo -e "$REASONS"
+
+            # 添加标签
+            curl -s -X POST \
+              "${GITEA_URL}/api/v1/repos/${REPO}/issues/${PR_NUMBER}/labels" \
+              -H "Authorization: token ${GITEA_TOKEN}" \
+              -H "Content-Type: application/json" \
+              -d '{"labels": ["pr-check-failed"]}'
+
+            # 添加评论
+            curl -s -X POST \
+              "${GITEA_URL}/api/v1/repos/${REPO}/issues/${PR_NUMBER}/comments" \
+              -H "Authorization: token ${GITEA_TOKEN}" \
+              -H "Content-Type: application/json" \
+              -d '{"body": "感谢您的提交。由于该 PR 未遵循我们的贡献模板，且被识别为缺乏人工参与的纯 AI 生成内容，我们将先予以关闭。我们更欢迎经过人工审核、验证并带有个人思考的贡献。如果您认为这其中存在误解，请回复告知。"}'
+
+            # 关闭 PR
+            curl -s -X PATCH \
+              "${GITEA_URL}/api/v1/repos/${REPO}/pulls/${PR_NUMBER}" \
+              -H "Authorization: token ${GITEA_TOKEN}" \
+              -H "Content-Type: application/json" \
+              -d '{"state": "closed"}'
+
+            exit 1
+          fi
+
+          echo "PR check passed!"
@@ -0,0 +1,161 @@
+name: Release (Linux)
+
+on:
+  push:
+    tags:
+      - 'v*'
+      - '!*-alpha*'
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: 'Tag name to build (e.g., v0.10.8)'
+        required: true
+        type: string
+
+jobs:
+  build-linux:
+    name: Linux Release
+    runs-on: act-runner-4c6g
+    env:
+      RUNNER_TOOL_CACHE: /toolcache
+
+    steps:
+      - name: Install dependencies
+        run: |
+          export PATH="/toolcache/bin:$PATH"
+          # Install Go
+          if ! command -v go &> /dev/null; then
+            curl -fsSL https://go.dev/dl/go1.25.1.linux-amd64.tar.gz | tar -C /usr/local -xzf -
+            echo "export PATH=\$PATH:/usr/local/go/bin" >> ~/.bashrc
+            export PATH=$PATH:/usr/local/go/bin
+          fi
+          go version
+          # Install Bun
+          if ! command -v bun &> /dev/null; then
+            curl -fsSL https://bun.sh/install | bash
+            export PATH="$HOME/.bun/bin:$PATH"
+          fi
+          bun --version
+
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Determine Version
+        run: |
+          if [ -n "${{ github.event.inputs.tag }}" ]; then
+            TAG="${{ github.event.inputs.tag }}"
+          else
+            TAG=${GITHUB_REF#refs/tags/}
+          fi
+          VERSION=$(git describe --tags 2>/dev/null || echo "$TAG")
+          echo "VERSION=$VERSION" >> $GITHUB_ENV
+          echo "Building version: $VERSION"
+
+      - name: Build Frontend (default)
+        env:
+          CI: ""
+        run: |
+          export PATH="$HOME/.bun/bin:/usr/local/go/bin:$PATH"
+          cd web
+          bun install --frozen-lockfile
+          cd default
+          DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$VERSION bun run build
+          cd ../..
+
+      - name: Build Frontend (classic)
+        env:
+          CI: ""
+        run: |
+          export PATH="$HOME/.bun/bin:/usr/local/go/bin:$PATH"
+          cd web
+          bun install --frozen-lockfile
+          cd classic
+          VITE_REACT_APP_VERSION=$VERSION bun run build
+          cd ../..
+
+      - name: Build Backend (amd64)
+        run: |
+          export PATH="/usr/local/go/bin:$PATH"
+          go mod download
+          go build -ldflags "-s -w -X 'new-api/common.Version=$VERSION' -extldflags '-static'" -o new-api-$VERSION
+
+      - name: Build Backend (arm64)
+        run: |
+          export PATH="/usr/local/go/bin:$PATH"
+          sudo apt-get update
+          DEBIAN_FRONTEND=noninteractive sudo apt-get install -y gcc-aarch64-linux-gnu
+          CC=aarch64-linux-gnu-gcc CGO_ENABLED=1 GOOS=linux GOARCH=arm64 go build -ldflags "-s -w -X 'new-api/common.Version=$VERSION' -extldflags '-static'" -o new-api-arm64-$VERSION
+
+      - name: Generate checksums
+        run: sha256sum new-api-* > checksums-linux.txt
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: linux-build
+          path: |
+            new-api-*
+            checksums-linux.txt
+
+  release:
+    name: Create Gitea Release
+    needs: [build-linux]
+    runs-on: act-runner-4c6g
+    env:
+      RUNNER_TOOL_CACHE: /toolcache
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Determine Version
+        run: |
+          if [ -n "${{ github.event.inputs.tag }}" ]; then
+            TAG="${{ github.event.inputs.tag }}"
+          else
+            TAG=${GITHUB_REF#refs/tags/}
+          fi
+          echo "TAG=$TAG" >> $GITHUB_ENV
+
+      - name: Download artifacts
+        uses: actions/download-artifact@v4
+        with:
+          path: artifacts
+
+      - name: Create Gitea Release
+        env:
+          GITEA_TOKEN: ${{ secrets.PACKAGES_TOKEN }}
+        run: |
+          # 使用 Gitea API 创建 Release
+          TAG="${{ env.TAG }}"
+          REPO="admin/new-api"
+          GITEA_URL="https://git.viaeon.com"
+
+          # 创建 Release
+          RELEASE_ID=$(curl -s -X POST \
+            "${GITEA_URL}/api/v1/repos/${REPO}/releases" \
+            -H "Authorization: token ${GITEA_TOKEN}" \
+            -H "Content-Type: application/json" \
+            -d "{
+              \"tag_name\": \"${TAG}\",
+              \"name\": \"${TAG}\",
+              \"body\": \"Release ${TAG}\",
+              \"draft\": false,
+              \"prerelease\": false
+            }" | jq -r '.id')
+
+          echo "Created release ID: ${RELEASE_ID}"
+
+          # 上传附件
+          find artifacts -type f | while read file; do
+            echo "Uploading: ${file}"
+            curl -s -X POST \
+              "${GITEA_URL}/api/v1/repos/${REPO}/releases/${RELEASE_ID}/assets" \
+              -H "Authorization: token ${GITEA_TOKEN}" \
+              -F "attachment=@${file}" \
+              -F "name=$(basename ${file})"
+          done
+
+          echo "Release ${TAG} created successfully!"
@@ -11,6 +11,8 @@ assignees: ''

 - 文档：https://docs.newapi.ai/
 - 使用问题先看或先问：https://deepwiki.com/QuantumNous/new-api
+- 开启透传后的转发相关反馈不接受 issue；透传模式会直接转发请求，请自行确认上游行为。
+- 不接受 coding plan、逆向渠道等技术支持类 issue。
 - 警告：删除本模板、删除小节标题或随意清空内容的 issue，可能会被直接关闭；重复恶意提交者可能会被 block。

 **您当前的 newapi 版本**
@@ -20,13 +22,18 @@ assignees: ''
 **提交确认**

 [//]: # (方框内删除已有的空格，填 x 号)
-+ [ ] 我已确认目前没有类似 issue
-+ [ ] 我已完整查看过文档 https://docs.newapi.ai/ 和项目 README，尤其是常见问题部分
-+ [ ] 我未删除此模板中的任何引导内容或小节标题，并会按要求完整填写
-+ [ ] 我理解项目维护者精力有限，不遵循模板要求的 issue 可能会被无视或直接关闭
+- [ ] **非重复 issue:** 我已搜索现有 [Issues](https://github.com/QuantumNous/new-api/issues?q=is%3Aissue)，确认目前没有类似 issue。
+- [ ] **提交前必读:** 我已完整阅读上方“提交前必读”，并已查看文档 https://docs.newapi.ai/、项目 README 且向 AI 提问，确认这不是使用、配置或接入类问题。
+- [ ] **模板完整:** 我未删除此模板中的任何引导内容或小节标题，并会按要求完整填写。
+- [ ] **维护成本:** 我理解项目维护者精力有限，不遵循模板要求的 issue 可能会被无视或直接关闭。

 **问题描述**

+请尽可能说明问题现象、影响范围，以及你判断它是程序问题而不是上游行为或使用问题的依据。
+
+- 转发问题请尽可能说明渠道类型、转换格式、上游原生支持依据和服务端日志。
+- 计费问题请尽可能附请求返回的 `usage` 示例。
+
 **复现步骤**

 **预期结果**
@@ -11,6 +11,8 @@ assignees: ''

 - Docs: https://docs.newapi.ai/
 - Usage questions first: https://deepwiki.com/QuantumNous/new-api
+- Issues about forwarding behavior after enabling pass-through mode are not accepted; pass-through mode forwards requests directly, so please verify upstream behavior yourself.
+- Technical support requests such as coding plans or reverse-engineering channels are not accepted as issues.
 - Warning: issues with this template removed, section headings deleted, or content cleared may be closed directly. Repeated abusive submissions may result in a block.

 **Your current newapi version**
@@ -20,13 +22,18 @@ Please fill this in, for example: `v1.0.0`
 **Submission Checks**

 [//]: # (Remove the space in the box and fill with an x)
-+ [ ] I have confirmed there are no similar issues
-+ [ ] I have thoroughly read the docs at https://docs.newapi.ai/ and the project README, especially the FAQ section
-+ [ ] I have not removed any guidance or section headings from this template and will complete it as requested
-+ [ ] I understand that maintainers have limited time and issues that do not follow this template may be ignored or closed directly
+- [ ] **Non-duplicate issue:** I have searched existing [Issues](https://github.com/QuantumNous/new-api/issues?q=is%3Aissue) and confirmed there are no similar issues.
+- [ ] **Read this first:** I have fully read the section above, reviewed the docs at https://docs.newapi.ai/ and the project README, and asked AI first, confirming this is not a usage, configuration, or integration question.
+- [ ] **Template intact:** I have not removed any guidance or section headings from this template and will complete it as requested.
+- [ ] **Maintainer time:** I understand that maintainers have limited time and issues that do not follow this template may be ignored or closed directly.

 **Issue Description**

+Describe the symptom, impact scope, and why you believe this is an application issue rather than upstream behavior or a usage question with as much detail as possible.
+
+- For forwarding issues, include the channel type, conversion format, upstream native-support evidence, and server logs when possible.
+- For billing issues, include an example of the returned `usage` when possible.
+
 **Steps to Reproduce**

 **Expected Result**
@@ -11,6 +11,8 @@ assignees: ''

 - 文档：https://docs.newapi.ai/
 - 使用问题先看或先问：https://deepwiki.com/QuantumNous/new-api
+- 开启透传后的转发相关反馈不接受 issue；透传模式会直接转发请求，请自行确认上游行为。
+- 不接受 coding plan、逆向渠道等技术支持类 issue。
 - 警告：删除本模板、删除小节标题或随意清空内容的 issue，可能会被直接关闭；重复恶意提交者可能会被 block。

 **您当前的 newapi 版本**
@@ -20,10 +22,10 @@ assignees: ''
 **提交确认**

 [//]: # (方框内删除已有的空格，填 x 号)
-+ [ ] 我已确认目前没有类似 issue
-+ [ ] 我已完整查看过文档 https://docs.newapi.ai/ 和项目 README，已确定现有版本无法满足需求
-+ [ ] 我未删除此模板中的任何引导内容或小节标题，并会按要求完整填写
-+ [ ] 我理解项目维护者精力有限，不遵循模板要求的 issue 可能会被无视或直接关闭
+- [ ] **非重复 issue:** 我已搜索现有 [Issues](https://github.com/QuantumNous/new-api/issues?q=is%3Aissue)，确认目前没有类似 issue。
+- [ ] **提交前必读:** 我已完整阅读上方“提交前必读”，并已查看文档 https://docs.newapi.ai/、项目 README 且向 AI 提问，确认这不是使用、配置或接入类问题，且现有版本无法满足需求。
+- [ ] **模板完整:** 我未删除此模板中的任何引导内容或小节标题，并会按要求完整填写。
+- [ ] **维护成本:** 我理解项目维护者精力有限，不遵循模板要求的 issue 可能会被无视或直接关闭。

 **功能描述**

@@ -11,6 +11,8 @@ assignees: ''

 - Docs: https://docs.newapi.ai/
 - Usage questions first: https://deepwiki.com/QuantumNous/new-api
+- Issues about forwarding behavior after enabling pass-through mode are not accepted; pass-through mode forwards requests directly, so please verify upstream behavior yourself.
+- Technical support requests such as coding plans or reverse-engineering channels are not accepted as issues.
 - Warning: issues with this template removed, section headings deleted, or content cleared may be closed directly. Repeated abusive submissions may result in a block.

 **Your current newapi version**
@@ -20,10 +22,10 @@ Please fill this in, for example: `v1.0.0`
 **Submission Checks**

 [//]: # (Remove the space in the box and fill with an x)
-+ [ ] I have confirmed there are no similar issues
-+ [ ] I have thoroughly read the docs at https://docs.newapi.ai/ and the project README, and confirmed the current version cannot meet my needs
-+ [ ] I have not removed any guidance or section headings from this template and will complete it as requested
-+ [ ] I understand that maintainers have limited time and issues that do not follow this template may be ignored or closed directly
+- [ ] **Non-duplicate issue:** I have searched existing [Issues](https://github.com/QuantumNous/new-api/issues?q=is%3Aissue) and confirmed there are no similar issues.
+- [ ] **Read this first:** I have fully read the section above, reviewed the docs at https://docs.newapi.ai/ and the project README, and asked AI first, confirming this is not a usage, configuration, or integration question, and that the current version cannot meet my needs.
+- [ ] **Template intact:** I have not removed any guidance or section headings from this template and will complete it as requested.
+- [ ] **Maintainer time:** I understand that maintainers have limited time and issues that do not follow this template may be ignored or closed directly.

 **Feature Description**

@@ -33,16 +33,18 @@ jobs:
        env:
          CI: ""
        run: |
-          cd web/default
-          bun install
+          cd web
+          bun install --frozen-lockfile
+          cd default
          DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$VERSION bun run build
          cd ../..
      - name: Build Frontend (classic)
        env:
          CI: ""
        run: |
-          cd web/classic
-          bun install
+          cd web
+          bun install --frozen-lockfile
+          cd classic
          VITE_REACT_APP_VERSION=$VERSION bun run build
          cd ../..
      - name: Set up Go
@@ -91,16 +93,18 @@ jobs:
          CI: ""
          NODE_OPTIONS: "--max-old-space-size=4096"
        run: |
-          cd web/default
-          bun install
+          cd web
+          bun install --frozen-lockfile
+          cd default
          DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$VERSION bun run build
          cd ../..
      - name: Build Frontend (classic)
        env:
          CI: ""
        run: |
-          cd web/classic
-          bun install
+          cd web
+          bun install --frozen-lockfile
+          cd classic
          VITE_REACT_APP_VERSION=$VERSION bun run build
          cd ../..
      - name: Set up Go
@@ -146,16 +150,18 @@ jobs:
        env:
          CI: ""
        run: |
-          cd web/default
-          bun install
+          cd web
+          bun install --frozen-lockfile
+          cd default
          DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$VERSION bun run build
          cd ../..
      - name: Build Frontend (classic)
        env:
          CI: ""
        run: |
-          cd web/classic
-          bun install
+          cd web
+          bun install --frozen-lockfile
+          cd classic
          VITE_REACT_APP_VERSION=$VERSION bun run build
          cd ../..
      - name: Set up Go
@@ -7,9 +7,10 @@ upload
 *.db
 build
 *.db-journal
-logs
+/logs
 web/default/dist
 web/classic/dist
+web/daisy/dist
 web/node_modules
 web/dist
 .env
@@ -35,3 +36,4 @@ data/
 .test
 token_estimator_test.go
 skills-lock.json
+.playwright-mcp
@@ -0,0 +1,170 @@
+# ModelsToken 管理平台 - 产品需求文档 (PRD)
+
+## 1. 产品概述
+
+ModelsToken 是一个 AI API 管理与分发平台，为开发者和企业提供统一的 AI 模型接入、密钥管理、用量计费、渠道代理等一站式服务。新前端将采用 React + DaisyUI 5 + TypeScript 构建，替换现有的 Default/Classic 双前端，同时新增本地文档管理功能。
+
+- 目标用户：AI 应用开发者、企业运维人员、API 服务管理者
+- 核心价值：简化 AI API 的管理复杂度，提供直观的操作界面和完整的文档支持
+
+## 2. 核心功能
+
+### 2.1 用户角色
+
+| 角色 | 注册方式 | 核心权限 |
+|------|----------|----------|
+| 普通用户 | 用户名/邮箱/OAuth | 密钥管理、充值、订阅、日志查看、文档访问 |
+| 管理员 | 由超级管理员指定 | 渠道管理、用户管理、兑换码、模型管理、订阅管理 |
+| 超级管理员 | 系统初始化 | 全部权限 + 系统设置 |
+
+### 2.2 功能模块
+
+#### 公共页面（无需登录）
+1. **首页**：Hero 区域、特性展示、快速入门指引
+2. **登录页**：用户名/密码、OAuth 登录（GitHub/Discord/OIDC/LinuxDO/微信/Telegram/自定义）
+3. **注册页**：注册表单 + Turnstile 人机验证
+4. **忘记密码**：邮箱重置链接
+5. **模型定价**：模型价格列表、搜索筛选
+6. **关于页面**：项目信息、版本、许可证
+7. **用户协议/隐私政策**
+8. **初始化向导**：首次部署配置
+
+#### 用户功能（需登录）
+1. **仪表盘**：额度概览、使用趋势图、API 信息面板、公告、FAQ
+2. **API 密钥管理**：创建/编辑/删除/批量操作、额度限制、模型限制、IP 限制
+3. **钱包/充值**：余额查看、兑换码充值、在线支付（易支付/Stripe/Creem/Waffo）、签到
+4. **订阅管理**：查看计划、购买订阅、当前订阅状态
+5. **使用日志**：请求日志搜索/筛选、MJ 日志、任务日志、统计图表
+6. **个人设置**：资料编辑、2FA 设置、Passkey 管理、OAuth 绑定、语言切换
+7. **Playground**：API 在线调试、Chat Completions 测试
+8. **文档中心**（新增）：本地文档管理、分类浏览、搜索、Markdown 渲染
+
+#### 管理员功能
+1. **渠道管理**：CRUD、测试、余额更新、标签管理、批量操作、多密钥、Codex OAuth、Ollama 管理
+2. **用户管理**：列表/搜索/创建/编辑/升降级/启禁/额度调整
+3. **兑换码管理**：CRUD、批量删除无效码
+4. **模型管理**：模型元数据 CRUD、上游同步、缺失模型检测
+5. **供应商管理**：CRUD
+6. **订阅管理**：计划 CRUD、用户订阅管理
+7. **部署管理**：io.net 部署 CRUD、容器管理、日志
+
+#### 超级管理员 - 系统设置
+1. **站点设置**：名称/Logo/页脚/公告/首页内容/服务器地址
+2. **认证设置**：注册/登录开关、OAuth 配置、Turnstile、Passkey、自定义 OAuth
+3. **计费设置**：额度/倍率/支付配置/签到/分组倍率
+4. **内容设置**：公告/FAQ/Uptime Kuma/聊天/绘图/Midjourney
+5. **模型设置**：透传/思维模型/Gemini/Claude 配置
+6. **运维设置**：重试/自动禁用/SMTP/性能监控/日志
+7. **安全设置**：速率限制/敏感词/SSRF 防护/IP 过滤
+
+### 2.3 新增功能 - 本地文档管理
+
+| 功能 | 说明 |
+|------|------|
+| 文档分类 | 支持多级分类树，管理员可创建/编辑/删除分类 |
+| 文档 CRUD | 管理员创建/编辑/删除文档，支持 Markdown 编辑器 |
+| 文档浏览 | 用户按分类浏览文档，支持搜索 |
+| 文档搜索 | 全文搜索文档标题和内容 |
+| 文档版本 | 文档更新历史记录 |
+| 权限控制 | 可设置文档为公开/登录可见/管理员可见 |
+
+## 3. 核心流程
+
+### 3.1 用户认证流程
+
+```mermaid
+flowchart TD
+    "访问平台" --> "已登录?"
+    "已登录?" -->|"是"| "仪表盘"
+    "已登录?" -->|"否"| "登录页"
+    "登录页" --> "输入凭证"
+    "输入凭证" --> "需要2FA?"
+    "需要2FA?" -->|"是"| "输入2FA码"
+    "需要2FA?" -->|"否"| "验证成功"
+    "输入2FA码" --> "验证成功"
+    "验证成功" --> "仪表盘"
+    "登录页" --> "OAuth登录"
+    "OAuth登录" --> "OAuth回调"
+    "OAuth回调" --> "已绑定账号?"
+    "已绑定账号?" -->|"是"| "仪表盘"
+    "已绑定账号?" -->|"否"| "绑定/注册"
+```
+
+### 3.2 API 调用流程
+
+```mermaid
+flowchart TD
+    "创建API密钥" --> "配置密钥参数"
+    "配置密钥参数" --> "使用密钥调用API"
+    "使用密钥调用API" --> "平台路由到渠道"
+    "平台路由到渠道" --> "返回结果"
+    "返回结果" --> "记录日志"
+    "记录日志" --> "扣除额度"
+```
+
+### 3.3 文档管理流程（新增）
+
+```mermaid
+flowchart TD
+    "管理员创建分类" --> "创建文档"
+    "创建文档" --> "Markdown编辑"
+    "Markdown编辑" --> "设置可见性"
+    "设置可见性" --> "发布文档"
+    "发布文档" --> "用户浏览/搜索"
+```
+
+## 4. 用户界面设计
+
+### 4.1 设计风格
+
+- **主色调**：深蓝 (#1e293b) + 亮蓝 (#3b82f6) 渐变，搭配 DaisyUI 的 `business` 主题
+- **辅助色**：翡翠绿 (#10b981) 用于成功/在线状态，琥珀色 (#f59e0b) 用于警告
+- **按钮风格**：DaisyUI 默认圆角按钮，主要操作用 `btn-primary`，危险操作用 `btn-error`
+- **字体**：JetBrains Mono（代码/密钥）+ Noto Sans SC（中文正文）
+- **布局风格**：左侧固定导航栏 + 顶部状态栏 + 主内容区，响应式折叠
+- **图标**：Lucide React 图标库
+- **动效**：DaisyUI 内置动画 + 页面切换淡入
+
+### 4.2 页面设计概览
+
+| 页面 | 模块 | UI 元素 |
+|------|------|---------|
+| 首页 | Hero | 渐变背景、特性卡片、快速开始按钮 |
+| 登录 | 表单 | 居中卡片、OAuth 按钮组、Turnstile |
+| 仪表盘 | 统计卡片 | 4 列额度卡片、折线图、公告栏、API 信息 |
+| 密钥管理 | 数据表 | 搜索栏、筛选器、表格、批量操作栏 |
+| 渠道管理 | 数据表+表单 | 标签筛选、测试按钮、多密钥管理抽屉 |
+| 系统设置 | 标签页 | 7 大分类侧边导航、表单分组、开关/输入框 |
+| 文档中心 | 侧边树+内容 | 分类树导航、Markdown 渲染、搜索框、面包屑 |
+| Playground | 分栏 | 左侧参数面板、右侧响应面板、模型选择器 |
+
+### 4.3 响应式设计
+
+- 桌面优先（1280px+）
+- 平板适配（768px-1279px）：侧边栏折叠为抽屉
+- 移动端适配（<768px）：单列布局，表格改为卡片列表
+
+### 4.4 布局结构
+
+```
+┌──────────────────────────────────────────────┐
+│  顶部导航栏 (Navbar)                           │
+│  Logo | 搜索 | 通知 | 用户菜单 | 主题切换        │
+├──────┬───────────────────────────────────────┤
+│      │                                       │
+│ 侧边 │         主内容区                        │
+│ 导航 │                                       │
+│ 栏   │  ┌─────────────────────────────────┐  │
+│      │  │  面包屑 + 页面标题 + 操作按钮      │  │
+│ 仪表盘│  ├─────────────────────────────────┤  │
+│ 密钥  │  │                                 │  │
+│ 渠道  │  │     页面内容                      │  │
+│ 用户  │  │                                 │  │
+│ 日志  │  │                                 │  │
+│ 钱包  │  └─────────────────────────────────┘  │
+│ 订阅  │                                       │
+│ 文档  │                                       │
+│ 设置  │                                       │
+│      │                                       │
+└──────┴───────────────────────────────────────┘
+```
@@ -0,0 +1,459 @@
+# ModelsToken 管理平台 - 技术架构文档
+
+## 1. 架构设计
+
+```mermaid
+flowchart TB
+    subgraph "前端 (React + DaisyUI 5)"
+        A["React 18"] --> B["React Router v6"]
+        B --> C["页面组件"]
+        C --> D["DaisyUI 5 组件"]
+        D --> E["Tailwind CSS 4"]
+        A --> F["Zustand 状态管理"]
+        A --> G["React Query 数据请求"]
+        A --> H["i18next 国际化"]
+        A --> I["React Markdown 渲染"]
+    end
+    subgraph "后端 (Go + Gin)"
+        J["Gin HTTP Server"]
+        J --> K["API 路由"]
+        J --> L["Relay 代理"]
+        K --> M["控制器"]
+        M --> N["模型层"]
+        N --> O["数据库 (SQLite/MySQL/PostgreSQL)"]
+    end
+    C -->|"Axios HTTP"| K
+```
+
+## 2. 技术说明
+
+- **前端框架**：React 18 + TypeScript
+- **UI 库**：DaisyUI 5 + Tailwind CSS 4
+- **构建工具**：Vite 6
+- **路由**：React Router v6（懒加载）
+- **状态管理**：Zustand（轻量级，替代 Redux）
+- **数据请求**：TanStack React Query v5 + Axios
+- **国际化**：i18next + react-i18next
+- **图表**：Recharts
+- **Markdown**：react-markdown + remark-gfm + rehype-highlight
+- **图标**：Lucide React
+- **代码高亮**：highlight.js
+- **表单验证**：React Hook Form + Zod
+- **通知**：react-hot-toast
+- **项目目录**：`web/daisy/`
+
+## 3. 路由定义
+
+### 3.1 公共路由
+
+| 路由 | 用途 |
+|------|------|
+| `/` | 首页 |
+| `/login` | 登录 |
+| `/register` | 注册 |
+| `/forgot-password` | 忘记密码 |
+| `/reset-password` | 密码重置确认 |
+| `/setup` | 初始化向导 |
+| `/pricing` | 模型定价 |
+| `/about` | 关于 |
+| `/user-agreement` | 用户协议 |
+| `/privacy-policy` | 隐私政策 |
+| `/oauth/callback/:provider` | OAuth 回调 |
+
+### 3.2 认证后路由
+
+| 路由 | 用途 |
+|------|------|
+| `/dashboard` | 仪表盘 |
+| `/tokens` | API 密钥管理 |
+| `/wallet` | 钱包/充值 |
+| `/subscriptions` | 订阅管理 |
+| `/logs` | 使用日志 |
+| `/logs/midjourney` | MJ 日志 |
+| `/logs/tasks` | 任务日志 |
+| `/profile` | 个人设置 |
+| `/playground` | Playground |
+| `/docs` | 文档中心（新增） |
+| `/docs/:slug` | 文档详情（新增） |
+
+### 3.3 管理员路由
+
+| 路由 | 用途 |
+|------|------|
+| `/admin/channels` | 渠道管理 |
+| `/admin/users` | 用户管理 |
+| `/admin/redemptions` | 兑换码管理 |
+| `/admin/models` | 模型管理 |
+| `/admin/vendors` | 供应商管理 |
+| `/admin/deployments` | 部署管理 |
+| `/admin/subscriptions` | 订阅计划管理 |
+
+### 3.4 超级管理员路由
+
+| 路由 | 用途 |
+|------|------|
+| `/settings/site` | 站点设置 |
+| `/settings/auth` | 认证设置 |
+| `/settings/billing` | 计费设置 |
+| `/settings/content` | 内容设置 |
+| `/settings/models` | 模型设置 |
+| `/settings/operations` | 运维设置 |
+| `/settings/security` | 安全设置 |
+| `/settings/docs` | 文档管理（新增） |
+
+## 4. API 定义
+
+### 4.1 核心类型
+
+```typescript
+// 用户
+interface User {
+  id: number;
+  username: string;
+  display_name: string;
+  email: string;
+  role: number; // 1=user, 10=admin, 100=root
+  status: number;
+  quota: number;
+  used_quota: number;
+  request_count: number;
+  group: string;
+  aff_code: string;
+  inviter_id: number;
+  language: string;
+  access_token: string;
+  created_time: number;
+}
+
+// 渠道
+interface Channel {
+  id: number;
+  type: number;
+  key: string;
+  openai_organization?: string;
+  base_url: string;
+  models: string;
+  model_mapping?: string;
+  group: string;
+  groups: string[];
+  name: string;
+  priority: number;
+  weight: number;
+  status: number;
+  tag?: string;
+  setting?: string;
+  test_time: number;
+  response_time: number;
+  balance: number;
+  balance_updated_time: number;
+  created_time: number;
+}
+
+// 令牌
+interface Token {
+  id: number;
+  user_id: number;
+  key: string;
+  status: number;
+  name: string;
+  created_time: number;
+  accessed_time: number;
+  expired_time: number;
+  remain_quota: number;
+  unlimited_quota: boolean;
+  used_quota: number;
+  models: string;
+  subnet: string;
+  group: string;
+}
+
+// 日志
+interface Log {
+  id: number;
+  user_id: number;
+  created_at: number;
+  type: number;
+  content: string;
+  username: string;
+  token_name: string;
+  model_name: string;
+  quota: number;
+  prompt_tokens: number;
+  completion_tokens: number;
+  channel_id: number;
+  token_id: number;
+  group: string;
+  request_id: string;
+  ip: string;
+  detail: string;
+}
+
+// 订阅计划
+interface SubscriptionPlan {
+  id: number;
+  name: string;
+  description: string;
+  price: number;
+  currency: string;
+  duration_days: number;
+  quota: number;
+  models: string;
+  enabled: boolean;
+  sort_order: number;
+  created_time: number;
+}
+
+// 文档（新增）
+interface Document {
+  id: number;
+  title: string;
+  slug: string;
+  content: string; // Markdown
+  category_id: number;
+  category?: DocumentCategory;
+  visibility: 'public' | 'auth' | 'admin';
+  sort_order: number;
+  created_at: string;
+  updated_at: string;
+  author_id: number;
+  author?: User;
+  versions?: DocumentVersion[];
+}
+
+interface DocumentCategory {
+  id: number;
+  name: string;
+  slug: string;
+  parent_id: number | null;
+  children?: DocumentCategory[];
+  sort_order: number;
+}
+
+interface DocumentVersion {
+  id: number;
+  document_id: number;
+  content: string;
+  created_at: string;
+  author_id: number;
+}
+```
+
+### 4.2 新增文档管理 API
+
+| 端点 | 方法 | 权限 | 说明 |
+|------|------|------|------|
+| `/api/docs/categories` | GET | 公开 | 获取分类树 |
+| `/api/docs/categories` | POST | Admin | 创建分类 |
+| `/api/docs/categories/:id` | PUT | Admin | 更新分类 |
+| `/api/docs/categories/:id` | DELETE | Admin | 删除分类 |
+| `/api/docs/` | GET | 按可见性 | 文档列表（支持搜索） |
+| `/api/docs/:slug` | GET | 按可见性 | 获取文档详情 |
+| `/api/docs/` | POST | Admin | 创建文档 |
+| `/api/docs/:id` | PUT | Admin | 更新文档 |
+| `/api/docs/:id` | DELETE | Admin | 删除文档 |
+| `/api/docs/:id/versions` | GET | Admin | 文档版本历史 |
+
+## 5. 项目目录结构
+
+```
+web/daisy/
+├── index.html
+├── package.json
+├── tsconfig.json
+├── vite.config.ts
+├── tailwind.config.ts
+├── public/
+│   └── manifest.json
+└── src/
+    ├── main.tsx                    # 入口
+    ├── App.tsx                     # 根组件 + 路由
+    ├── vite-env.d.ts
+    ├── api/                        # API 请求层
+    │   ├── client.ts               # Axios 实例 + 拦截器
+    │   ├── auth.ts                 # 认证 API
+    │   ├── channel.ts              # 渠道 API
+    │   ├── token.ts                # 令牌 API
+    │   ├── user.ts                 # 用户 API
+    │   ├── log.ts                  # 日志 API
+    │   ├── subscription.ts         # 订阅 API
+    │   ├── redemption.ts           # 兑换码 API
+    │   ├── model.ts                # 模型 API
+    │   ├── vendor.ts               # 供应商 API
+    │   ├── deployment.ts           # 部署 API
+    │   ├── option.ts               # 系统设置 API
+    │   ├── payment.ts              # 支付 API
+    │   └── doc.ts                  # 文档 API（新增）
+    ├── stores/                     # Zustand 状态
+    │   ├── auth.ts                 # 认证状态
+    │   └── ui.ts                   # UI 状态（侧边栏/主题）
+    ├── hooks/                      # 自定义 Hooks
+    │   ├── useAuth.ts
+    │   ├── usePermission.ts
+    │   └── useQuota.ts
+    ├── components/                 # 通用组件
+    │   ├── layout/
+    │   │   ├── AppLayout.tsx       # 主布局
+    │   │   ├── Sidebar.tsx         # 侧边导航
+    │   │   ├── Navbar.tsx          # 顶部导航
+    │   │   └── Breadcrumb.tsx      # 面包屑
+    │   ├── common/
+    │   │   ├── QuotaDisplay.tsx    # 额度显示
+    │   │   ├── ModelBadge.tsx      # 模型标签
+    │   │   ├── StatusBadge.tsx     # 状态标签
+    │   │   ├── SearchInput.tsx     # 搜索框
+    │   │   ├── DataTable.tsx       # 数据表格
+    │   │   ├── ConfirmDialog.tsx   # 确认对话框
+    │   │   └── LoadingSpinner.tsx  # 加载动画
+    │   └── charts/
+    │       ├── QuotaChart.tsx      # 额度趋势图
+    │       └── StatsChart.tsx      # 统计图表
+    ├── pages/                      # 页面组件
+    │   ├── public/
+    │   │   ├── Home.tsx
+    │   │   ├── Login.tsx
+    │   │   ├── Register.tsx
+    │   │   ├── ForgotPassword.tsx
+    │   │   ├── Pricing.tsx
+    │   │   ├── About.tsx
+    │   │   └── Setup.tsx
+    │   ├── dashboard/
+    │   │   └── Dashboard.tsx
+    │   ├── tokens/
+    │   │   ├── TokenList.tsx
+    │   │   └── TokenForm.tsx
+    │   ├── channels/
+    │   │   ├── ChannelList.tsx
+    │   │   └── ChannelForm.tsx
+    │   ├── users/
+    │   │   ├── UserList.tsx
+    │   │   └── UserForm.tsx
+    │   ├── logs/
+    │   │   ├── LogList.tsx
+    │   │   ├── MidjourneyLog.tsx
+    │   │   └── TaskLog.tsx
+    │   ├── wallet/
+    │   │   └── Wallet.tsx
+    │   ├── subscriptions/
+    │   │   ├── PlanList.tsx
+    │   │   └── MySubscription.tsx
+    │   ├── redemptions/
+    │   │   └── RedemptionList.tsx
+    │   ├── models/
+    │   │   └── ModelList.tsx
+    │   ├── vendors/
+    │   │   └── VendorList.tsx
+    │   ├── deployments/
+    │   │   └── DeploymentList.tsx
+    │   ├── playground/
+    │   │   └── Playground.tsx
+    │   ├── profile/
+    │   │   └── Profile.tsx
+    │   ├── docs/                   # 文档中心（新增）
+    │   │   ├── DocCenter.tsx       # 文档浏览主页
+    │   │   ├── DocViewer.tsx       # 文档阅读页
+    │   │   ├── DocEditor.tsx       # 文档编辑页（管理员）
+    │   │   └── DocCategoryManager.tsx # 分类管理（管理员）
+    │   └── settings/
+    │       ├── SiteSettings.tsx
+    │       ├── AuthSettings.tsx
+    │       ├── BillingSettings.tsx
+    │       ├── ContentSettings.tsx
+    │       ├── ModelSettings.tsx
+    │       ├── OperationsSettings.tsx
+    │       ├── SecuritySettings.tsx
+    │       └── DocSettings.tsx     # 文档设置（新增）
+    ├── i18n/                       # 国际化
+    │   ├── index.ts
+    │   └── locales/
+    │       ├── en.json
+    │       └── zh.json
+    ├── lib/                        # 工具函数
+    │   ├── constants.ts
+    │   ├── utils.ts
+    │   ├── quota.ts
+    │   └── channel-types.ts
+    └── types/                      # TypeScript 类型
+        ├── api.ts
+        ├── channel.ts
+        ├── token.ts
+        ├── user.ts
+        ├── log.ts
+        ├── subscription.ts
+        ├── doc.ts
+        └── option.ts
+```
+
+## 6. 数据模型（新增文档管理）
+
+```mermaid
+erDiagram
+    "document_categories" {
+        int id PK
+        string name
+        string slug UK
+        int parent_id FK
+        int sort_order
+        timestamp created_at
+    }
+    "documents" {
+        int id PK
+        string title
+        string slug UK
+        text content
+        int category_id FK
+        string visibility
+        int sort_order
+        int author_id FK
+        timestamp created_at
+        timestamp updated_at
+    }
+    "document_versions" {
+        int id PK
+        int document_id FK
+        text content
+        int author_id FK
+        timestamp created_at
+    }
+    "document_categories" ||--o{ "document_categories" : "parent"
+    "document_categories" ||--o{ "documents" : "has"
+    "documents" ||--o{ "document_versions" : "has"
+```
+
+### DDL
+
+```sql
+CREATE TABLE document_categories (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    name VARCHAR(100) NOT NULL,
+    slug VARCHAR(100) NOT NULL UNIQUE,
+    parent_id INTEGER REFERENCES document_categories(id) ON DELETE SET NULL,
+    sort_order INTEGER DEFAULT 0,
+    created_at DATETIME DEFAULT CURRENT_TIMESTAMP
+);
+
+CREATE TABLE documents (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    title VARCHAR(200) NOT NULL,
+    slug VARCHAR(200) NOT NULL UNIQUE,
+    content TEXT NOT NULL,
+    category_id INTEGER REFERENCES document_categories(id) ON DELETE SET NULL,
+    visibility VARCHAR(20) DEFAULT 'public' CHECK (visibility IN ('public', 'auth', 'admin')),
+    sort_order INTEGER DEFAULT 0,
+    author_id INTEGER NOT NULL,
+    created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+    updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
+);
+
+CREATE TABLE document_versions (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
+    content TEXT NOT NULL,
+    author_id INTEGER NOT NULL,
+    created_at DATETIME DEFAULT CURRENT_TIMESTAMP
+);
+
+CREATE INDEX idx_documents_slug ON documents(slug);
+CREATE INDEX idx_documents_category ON documents(category_id);
+CREATE INDEX idx_documents_visibility ON documents(visibility);
+CREATE INDEX idx_document_versions_doc ON document_versions(document_id);
+```
@@ -1,22 +1,24 @@
 FROM oven/bun:1@sha256:0733e50325078969732ebe3b15ce4c4be5082f18c4ac1a0f0ca4839c2e4e42a7 AS builder

-WORKDIR /build
-COPY web/default/package.json .
-COPY web/default/bun.lock .
-RUN bun install
-COPY ./web/default .
-COPY ./VERSION .
-RUN DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$(cat VERSION) bun run build
+WORKDIR /build/web
+COPY web/package.json web/bun.lock ./
+COPY web/default/package.json ./default/package.json
+COPY web/classic/package.json ./classic/package.json
+RUN bun install --frozen-lockfile
+COPY ./web/default ./default
+COPY ./VERSION /build/VERSION
+RUN cd default && DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$(cat /build/VERSION) bun run build

 FROM oven/bun:1@sha256:0733e50325078969732ebe3b15ce4c4be5082f18c4ac1a0f0ca4839c2e4e42a7 AS builder-classic

-WORKDIR /build
-COPY web/classic/package.json .
-COPY web/classic/bun.lock .
-RUN bun install
-COPY ./web/classic .
-COPY ./VERSION .
-RUN VITE_REACT_APP_VERSION=$(cat VERSION) bun run build
+WORKDIR /build/web
+COPY web/package.json web/bun.lock ./
+COPY web/default/package.json ./default/package.json
+COPY web/classic/package.json ./classic/package.json
+RUN bun install --frozen-lockfile
+COPY ./web/classic ./classic
+COPY ./VERSION /build/VERSION
+RUN cd classic && VITE_REACT_APP_VERSION=$(cat /build/VERSION) bun run build

 FROM golang:1.26.1-alpine@sha256:2389ebfa5b7f43eeafbd6be0c3700cc46690ef842ad962f6c5bd6be49ed82039 AS builder2
 ENV GO111MODULE=on CGO_ENABLED=0
@@ -32,8 +34,8 @@ ADD go.mod go.sum ./
 RUN go mod download

 COPY . .
-COPY --from=builder /build/dist ./web/default/dist
-COPY --from=builder-classic /build/dist ./web/classic/dist
+COPY --from=builder /build/web/default/dist ./web/default/dist
+COPY --from=builder-classic /build/web/classic/dist ./web/classic/dist
 RUN go build -ldflags "-s -w -X 'github.com/QuantumNous/new-api/common.Version=$(cat VERSION)'" -o new-api

 FROM debian:bookworm-slim@sha256:f06537653ac770703bc45b4b113475bd402f451e85223f0f2837acbf89ab020a
@@ -1,7 +1,7 @@
 new-api Notices

 new-api
-Copyright (c) QuantumNous and contributors.
+Copyright (c) modelstoken and contributors.

 This project is licensed under the GNU Affero General Public License v3.0.
 See LICENSE for the full project license terms.
@@ -19,7 +19,7 @@ Modified versions that present a user interface must also preserve a visible
 link to the original project in a prominent about, legal, footer, or
 attribution location:

-https://github.com/QuantumNous/new-api
+https://git.viaeon.com/admin/new-api

 Modified versions must not misrepresent the origin of the software and must
 mark their changes in accordance with AGPLv3 Section 7(c).
@@ -316,6 +316,7 @@ docker run --name new-api -d --restart always \
 | `CRYPTO_SECRET` | Encryption secret (required for Redis) | - |
 | `SQL_DSN` | Database connection string | - |
 | `REDIS_CONN_STRING` | Redis connection string | - |
+| `RELAY_IDLE_CONN_TIMEOUT` | Idle keep-alive timeout for relay HTTP clients, seconds. Defaults to Go standard library behavior; set `0` to disable | `90` |
 | `STREAMING_TIMEOUT` | Streaming timeout (seconds) | `300` |
 | `STREAM_SCANNER_MAX_BUFFER_MB` | Max per-line buffer (MB) for the stream scanner; increase when upstream sends huge image/base64 payloads | `64` |
 | `MAX_REQUEST_BODY_MB` | Max request body size (MB, counted **after decompression**; prevents huge requests/zip bombs from exhausting memory). Exceeding it returns `413` | `32` |
@@ -14,7 +14,7 @@ import (

 var StartTime = time.Now().Unix() // unit: second
 var Version = "v0.0.0"            // this hard coding will be replaced automatically when building, no need to manually change
-var SystemName = "New API"
+var SystemName = "ModelsToken"
 var Footer = ""
 var Logo = ""
 var TopUpLink = ""
@@ -170,6 +170,7 @@ var BatchUpdateInterval int

 var RelayTimeout int // unit is second

+var RelayIdleConnTimeout int // unit is second
 var RelayMaxIdleConns int
 var RelayMaxIdleConnsPerHost int

@@ -51,17 +51,21 @@ type themeAwareFileSystem struct {
 }

 func (t *themeAwareFileSystem) Exists(prefix string, path string) bool {
-	if GetTheme() == "classic" {
+	switch GetTheme() {
+	case "classic":
 		return t.classicFS.Exists(prefix, path)
+	default:
+		return t.defaultFS.Exists(prefix, path)
 	}
-	return t.defaultFS.Exists(prefix, path)
 }

 func (t *themeAwareFileSystem) Open(name string) (http.File, error) {
-	if GetTheme() == "classic" {
+	switch GetTheme() {
+	case "classic":
 		return t.classicFS.Open(name)
+	default:
+		return t.defaultFS.Open(name)
 	}
-	return t.defaultFS.Open(name)
 }

 func NewThemeAwareFS(defaultFS, classicFS static.ServeFileSystem) static.ServeFileSystem {
@@ -102,6 +102,7 @@ func InitEnv() {
 	SyncFrequency = GetEnvOrDefault("SYNC_FREQUENCY", 60)
 	BatchUpdateInterval = GetEnvOrDefault("BATCH_UPDATE_INTERVAL", 5)
 	RelayTimeout = GetEnvOrDefault("RELAY_TIMEOUT", 0)
+	RelayIdleConnTimeout = GetEnvOrDefault("RELAY_IDLE_CONN_TIMEOUT", 90)
 	RelayMaxIdleConns = GetEnvOrDefault("RELAY_MAX_IDLE_CONNS", 500)
 	RelayMaxIdleConnsPerHost = GetEnvOrDefault("RELAY_MAX_IDLE_CONNS_PER_HOST", 100)

@@ -111,11 +112,11 @@ func InitEnv() {

 	// Initialize rate limit variables
 	GlobalApiRateLimitEnable = GetEnvOrDefaultBool("GLOBAL_API_RATE_LIMIT_ENABLE", true)
-	GlobalApiRateLimitNum = GetEnvOrDefault("GLOBAL_API_RATE_LIMIT", 180)
+	GlobalApiRateLimitNum = GetEnvOrDefault("GLOBAL_API_RATE_LIMIT", 360)
 	GlobalApiRateLimitDuration = int64(GetEnvOrDefault("GLOBAL_API_RATE_LIMIT_DURATION", 180))

 	GlobalWebRateLimitEnable = GetEnvOrDefaultBool("GLOBAL_WEB_RATE_LIMIT_ENABLE", true)
-	GlobalWebRateLimitNum = GetEnvOrDefault("GLOBAL_WEB_RATE_LIMIT", 60)
+	GlobalWebRateLimitNum = GetEnvOrDefault("GLOBAL_WEB_RATE_LIMIT", 120)
 	GlobalWebRateLimitDuration = int64(GetEnvOrDefault("GLOBAL_WEB_RATE_LIMIT_DURATION", 180))

 	CriticalRateLimitEnable = GetEnvOrDefaultBool("CRITICAL_RATE_LIMIT_ENABLE", true)
@@ -135,6 +136,7 @@ func initConstantEnv() {
 	constant.StreamScannerMaxBufferMB = GetEnvOrDefault("STREAM_SCANNER_MAX_BUFFER_MB", 128)
 	// MaxRequestBodyMB 请求体最大大小（解压后），用于防止超大请求/zip bomb导致内存暴涨
 	constant.MaxRequestBodyMB = GetEnvOrDefault("MAX_REQUEST_BODY_MB", 128)
+	constant.AnonymousRequestBodyLimitKB = GetEnvOrDefault("ANONYMOUS_REQUEST_BODY_LIMIT_KB", 512)
 	// ForceStreamOption 覆盖请求参数，强制返回usage信息
 	constant.ForceStreamOption = GetEnvOrDefaultBool("FORCE_STREAM_OPTION", true)
 	constant.CountToken = GetEnvOrDefaultBool("CountToken", true)
@@ -0,0 +1,13 @@
+package common
+
+import "github.com/QuantumNous/new-api/constant"
+
+const defaultAnonymousRequestBodyLimitKB = 512
+
+func GetAnonymousRequestBodyLimitBytes() int64 {
+	limitKB := constant.AnonymousRequestBodyLimitKB
+	if limitKB < 0 {
+		limitKB = defaultAnonymousRequestBodyLimitKB
+	}
+	return int64(limitKB) << 10
+}
@@ -206,4 +206,8 @@ var ChannelSpecialBases = map[string]ChannelSpecialBase{
 		ClaudeBaseURL: "https://ark.cn-beijing.volces.com/api/coding",
 		OpenAIBaseURL: "https://ark.cn-beijing.volces.com/api/coding/v3",
 	},
+	"tencent-coding-plan": {
+		ClaudeBaseURL: "https://api.lkeap.cloud.tencent.com/coding",
+		OpenAIBaseURL: "https://api.lkeap.cloud.tencent.com/coding/v3",
+	},
 }
@@ -10,6 +10,7 @@ var GetMediaToken bool
 var GetMediaTokenNotStream bool
 var UpdateTask bool
 var MaxRequestBodyMB int
+var AnonymousRequestBodyLimitKB int
 var AzureDefaultAPIVersion string
 var NotifyLimitCount int
 var NotificationLimitDurationMinute int
@@ -814,7 +814,7 @@ func buildTestRequest(model string, endpointType string, channel *model.Channel,
 		testRequest.StreamOptions = &dto.StreamOptions{IncludeUsage: true}
 	}

-	if strings.HasPrefix(model, "o") {
+	if dto.IsOpenAIReasoningOModel(model) {
 		testRequest.MaxCompletionTokens = lo.ToPtr(uint(16))
 	} else if strings.Contains(model, "thinking") {
 		if !strings.Contains(model, "claude") {
@@ -312,7 +312,11 @@ func fetchChannelUpstreamModelIDs(channel *model.Channel) ([]string, error) {
 			url = fmt.Sprintf("%s/v1/models", baseURL)
 		}
 	default:
-		url = fmt.Sprintf("%s/v1/models", baseURL)
+		if plan, ok := constant.ChannelSpecialBases[baseURL]; ok && plan.OpenAIBaseURL != "" {
+			url = fmt.Sprintf("%s/models", plan.OpenAIBaseURL)
+		} else {
+			url = fmt.Sprintf("%s/v1/models", baseURL)
+		}
 	}

 	key, _, apiErr := channel.GetNextEnabledKey()
@@ -0,0 +1,256 @@
+package controller
+
+import (
+	"net/http"
+	"strconv"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/model"
+
+	"github.com/gin-gonic/gin"
+)
+
+// GetCategories 获取文档分类列表（公开）
+func GetCategories(c *gin.Context) {
+	categories, err := model.GetDocumentCategories()
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	common.ApiSuccess(c, categories)
+}
+
+// CreateCategory 创建文档分类（管理员）
+func CreateCategory(c *gin.Context) {
+	var category model.DocumentCategory
+	if err := c.ShouldBindJSON(&category); err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	if category.Name == "" {
+		common.ApiErrorMsg(c, "分类名称不能为空")
+		return
+	}
+	if category.Slug == "" {
+		common.ApiErrorMsg(c, "分类标识不能为空")
+		return
+	}
+	if err := model.CreateDocumentCategory(&category); err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	common.ApiSuccess(c, &category)
+}
+
+// UpdateCategory 更新文档分类（管理员）
+func UpdateCategory(c *gin.Context) {
+	id, err := strconv.Atoi(c.Param("id"))
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	var category model.DocumentCategory
+	if err := c.ShouldBindJSON(&category); err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	category.Id = id
+	if err := model.UpdateDocumentCategory(&category); err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	common.ApiSuccess(c, &category)
+}
+
+// DeleteCategory 删除文档分类（管理员）
+func DeleteCategory(c *gin.Context) {
+	id, err := strconv.Atoi(c.Param("id"))
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	if err := model.DeleteDocumentCategory(id); err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	common.ApiSuccess(c, nil)
+}
+
+// GetDocuments 获取文档列表（公开，根据认证状态过滤可见性）
+func GetDocuments(c *gin.Context) {
+	keyword := c.Query("keyword")
+	categoryIdStr := c.Query("category_id")
+
+	var categoryId *int
+	if categoryIdStr != "" {
+		id, err := strconv.Atoi(categoryIdStr)
+		if err == nil {
+			categoryId = &id
+		}
+	}
+
+	pageInfo := common.GetPageQuery(c)
+
+	// 根据用户认证状态决定可见性过滤
+	visibility := c.Query("visibility")
+	role := c.GetInt("role")
+
+	var documents []*model.Document
+	var total int64
+	var err error
+
+	if role >= common.RoleAdminUser {
+		// 管理员可看所有
+		documents, total, err = model.GetDocuments(keyword, visibility, categoryId, pageInfo.GetStartIdx(), pageInfo.GetPageSize())
+	} else if role >= common.RoleCommonUser {
+		// 普通用户只能看 public 和 auth
+		if visibility == "public" || visibility == "auth" {
+			documents, total, err = model.GetDocuments(keyword, visibility, categoryId, pageInfo.GetStartIdx(), pageInfo.GetPageSize())
+		} else {
+			documents, total, err = model.GetDocumentsByVisibility(keyword, []string{"public", "auth"}, categoryId, pageInfo.GetStartIdx(), pageInfo.GetPageSize())
+		}
+	} else {
+		// 未登录用户只能看 public
+		documents, total, err = model.GetDocuments(keyword, "public", categoryId, pageInfo.GetStartIdx(), pageInfo.GetPageSize())
+	}
+
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	pageInfo.SetTotal(int(total))
+	pageInfo.SetItems(documents)
+	common.ApiSuccess(c, pageInfo)
+}
+
+// GetDocument 获取单个文档（根据可见性检查权限）
+func GetDocument(c *gin.Context) {
+	slug := c.Param("slug")
+	doc, err := model.GetDocumentBySlug(slug)
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+
+	// 检查可见性权限
+	role := c.GetInt("role")
+	switch doc.Visibility {
+	case "admin":
+		if role < common.RoleAdminUser {
+			c.JSON(http.StatusOK, gin.H{
+				"success": false,
+				"message": "无权访问该文档",
+			})
+			return
+		}
+	case "auth":
+		if role < common.RoleCommonUser {
+			c.JSON(http.StatusOK, gin.H{
+				"success": false,
+				"message": "请先登录后查看该文档",
+			})
+			return
+		}
+	}
+
+	common.ApiSuccess(c, doc)
+}
+
+// CreateDocument 创建文档（管理员）
+func CreateDocument(c *gin.Context) {
+	var doc model.Document
+	if err := c.ShouldBindJSON(&doc); err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	if doc.Title == "" {
+		common.ApiErrorMsg(c, "文档标题不能为空")
+		return
+	}
+	if doc.Slug == "" {
+		common.ApiErrorMsg(c, "文档标识不能为空")
+		return
+	}
+	if doc.Content == "" {
+		common.ApiErrorMsg(c, "文档内容不能为空")
+		return
+	}
+	if doc.Visibility == "" {
+		doc.Visibility = "public"
+	}
+	doc.AuthorId = c.GetInt("id")
+	if err := model.CreateDocument(&doc); err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	common.ApiSuccess(c, &doc)
+}
+
+// UpdateDocument 更新文档（管理员，自动创建版本记录）
+func UpdateDocument(c *gin.Context) {
+	id, err := strconv.Atoi(c.Param("id"))
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	var doc model.Document
+	if err := c.ShouldBindJSON(&doc); err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	doc.Id = id
+
+	// 获取旧文档内容，自动创建版本记录
+	oldDoc, err := model.GetDocumentById(id)
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	version := &model.DocumentVersion{
+		DocumentId: oldDoc.Id,
+		Content:    oldDoc.Content,
+		AuthorId:   oldDoc.AuthorId,
+	}
+	if err := model.CreateDocumentVersion(version); err != nil {
+		common.ApiError(c, err)
+		return
+	}
+
+	if err := model.UpdateDocument(&doc); err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	common.ApiSuccess(c, &doc)
+}
+
+// DeleteDocument 删除文档（管理员）
+func DeleteDocument(c *gin.Context) {
+	id, err := strconv.Atoi(c.Param("id"))
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	if err := model.DeleteDocument(id); err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	common.ApiSuccess(c, nil)
+}
+
+// GetDocumentVersions 获取文档版本历史（管理员）
+func GetDocumentVersions(c *gin.Context) {
+	id, err := strconv.Atoi(c.Param("id"))
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	pageInfo := common.GetPageQuery(c)
+	versions, total, err := model.GetDocumentVersions(id, pageInfo.GetStartIdx(), pageInfo.GetPageSize())
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	pageInfo.SetTotal(int(total))
+	pageInfo.SetItems(versions)
+	common.ApiSuccess(c, pageInfo)
+}
@@ -7,7 +7,6 @@ import (
 	"strings"

 	"github.com/QuantumNous/new-api/common"
-	"github.com/QuantumNous/new-api/i18n"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/setting"
 	"github.com/QuantumNous/new-api/setting/console_setting"
@@ -29,10 +28,6 @@ var completionRatioMetaOptionKeys = []string{
 	"AudioCompletionRatio",
 }

-func isPaymentComplianceOptionKey(key string) bool {
-	return strings.HasPrefix(key, "payment_setting.compliance_")
-}
-
 func isPositiveOptionValue(value string) bool {
 	intValue, err := strconv.Atoi(strings.TrimSpace(value))
 	if err == nil {
@@ -139,15 +134,8 @@ func UpdateOption(c *gin.Context) {
 	}
 	switch option.Key {
 	case "QuotaForInviter", "QuotaForInvitee":
-		if isPositiveOptionValue(option.Value.(string)) && !operation_setting.IsPaymentComplianceConfirmed() {
-			common.ApiErrorI18n(c, i18n.MsgPaymentComplianceRequired)
-			return
-		}
+		// no compliance check needed
 	default:
-		if isPaymentComplianceOptionKey(option.Key) {
-			common.ApiErrorMsg(c, "合规确认字段不允许通过通用设置接口修改")
-			return
-		}
 	}
 	switch option.Key {
 	case "GitHubOAuthEnabled":
@@ -1,82 +0,0 @@
-package controller
-
-import (
-	"fmt"
-	"net/http"
-	"strconv"
-	"time"
-
-	"github.com/QuantumNous/new-api/common"
-	"github.com/QuantumNous/new-api/i18n"
-	"github.com/QuantumNous/new-api/logger"
-	"github.com/QuantumNous/new-api/model"
-	"github.com/QuantumNous/new-api/setting/operation_setting"
-
-	"github.com/gin-gonic/gin"
-)
-
-type PaymentComplianceRequest struct {
-	Confirmed bool `json:"confirmed"`
-}
-
-func requirePaymentCompliance(c *gin.Context) bool {
-	if !operation_setting.IsPaymentComplianceConfirmed() {
-		common.ApiErrorI18n(c, i18n.MsgPaymentComplianceRequired)
-		return false
-	}
-	return true
-}
-
-func ConfirmPaymentCompliance(c *gin.Context) {
-	if c.GetBool("use_access_token") {
-		c.JSON(http.StatusForbidden, gin.H{
-			"success": false,
-			"message": "This operation requires dashboard session authentication. API access token is not allowed.",
-		})
-		return
-	}
-
-	var req PaymentComplianceRequest
-	if err := common.DecodeJson(c.Request.Body, &req); err != nil {
-		common.ApiErrorMsg(c, "参数错误")
-		return
-	}
-	if !req.Confirmed {
-		common.ApiErrorMsg(c, "请确认合规声明")
-		return
-	}
-
-	now := time.Now().Unix()
-	userId := c.GetInt("id")
-	clientIP := c.ClientIP()
-
-	updates := map[string]string{
-		"payment_setting.compliance_confirmed":     "true",
-		"payment_setting.compliance_terms_version": operation_setting.CurrentComplianceTermsVersion,
-		"payment_setting.compliance_confirmed_at":  strconv.FormatInt(now, 10),
-		"payment_setting.compliance_confirmed_by":  strconv.Itoa(userId),
-		"payment_setting.compliance_confirmed_ip":  clientIP,
-	}
-
-	for key, value := range updates {
-		if err := model.UpdateOption(key, value); err != nil {
-			common.ApiError(c, err)
-			return
-		}
-	}
-
-	logger.LogInfo(c.Request.Context(), fmt.Sprintf(
-		"payment compliance confirmed user_id=%d ip=%s terms_version=%s confirmed_at=%d",
-		userId,
-		clientIP,
-		operation_setting.CurrentComplianceTermsVersion,
-		now,
-	))
-
-	common.ApiSuccess(c, gin.H{
-		"confirmed":     true,
-		"terms_version": operation_setting.CurrentComplianceTermsVersion,
-		"confirmed_at":  now,
-		"confirmed_by":  userId,
-	})
-}
@@ -7,14 +7,7 @@ import (
 	"github.com/QuantumNous/new-api/setting/operation_setting"
 )

-func isPaymentComplianceConfirmed() bool {
-	return operation_setting.IsPaymentComplianceConfirmed()
-}
-
 func isStripeTopUpEnabled() bool {
-	if !isPaymentComplianceConfirmed() {
-		return false
-	}
 	return strings.TrimSpace(setting.StripeApiSecret) != "" &&
 		strings.TrimSpace(setting.StripeWebhookSecret) != "" &&
 		strings.TrimSpace(setting.StripePriceId) != ""
@@ -29,9 +22,6 @@ func isStripeWebhookEnabled() bool {
 }

 func isCreemTopUpEnabled() bool {
-	if !isPaymentComplianceConfirmed() {
-		return false
-	}
 	products := strings.TrimSpace(setting.CreemProducts)
 	return strings.TrimSpace(setting.CreemApiKey) != "" &&
 		products != "" &&
@@ -47,9 +37,6 @@ func isCreemWebhookEnabled() bool {
 }

 func isWaffoTopUpEnabled() bool {
-	if !isPaymentComplianceConfirmed() {
-		return false
-	}
 	if !setting.WaffoEnabled {
 		return false
 	}
@@ -74,11 +61,6 @@ func isWaffoWebhookEnabled() bool {
 }

 func isWaffoPancakeTopUpEnabled() bool {
-	if !isPaymentComplianceConfirmed() {
-		return false
-	}
-	// Presence-of-credentials = enabled. Webhook public keys ship inside
-	// the SDK; mode (test/prod) is read from each event.
 	return strings.TrimSpace(setting.WaffoPancakeMerchantID) != "" &&
 		strings.TrimSpace(setting.WaffoPancakePrivateKey) != "" &&
 		strings.TrimSpace(setting.WaffoPancakeProductID) != ""
@@ -93,9 +75,6 @@ func isWaffoPancakeWebhookEnabled() bool {
 }

 func isEpayTopUpEnabled() bool {
-	if !isPaymentComplianceConfirmed() {
-		return false
-	}
 	return isEpayWebhookConfigured() && len(operation_setting.PayMethods) > 0
 }

@@ -8,21 +8,7 @@ import (
 	"github.com/stretchr/testify/require"
 )

-func confirmPaymentComplianceForTest(t *testing.T) {
-	t.Helper()
-	paymentSetting := operation_setting.GetPaymentSetting()
-	originalConfirmed := paymentSetting.ComplianceConfirmed
-	originalTermsVersion := paymentSetting.ComplianceTermsVersion
-	t.Cleanup(func() {
-		paymentSetting.ComplianceConfirmed = originalConfirmed
-		paymentSetting.ComplianceTermsVersion = originalTermsVersion
-	})
-	paymentSetting.ComplianceConfirmed = true
-	paymentSetting.ComplianceTermsVersion = operation_setting.CurrentComplianceTermsVersion
-}
-
 func TestStripeWebhookEnabledRequiresTopUpAndWebhookConfig(t *testing.T) {
-	confirmPaymentComplianceForTest(t)
 	originalAPISecret := setting.StripeApiSecret
 	originalWebhookSecret := setting.StripeWebhookSecret
 	originalPriceID := setting.StripePriceId
@@ -45,7 +31,6 @@ func TestStripeWebhookEnabledRequiresTopUpAndWebhookConfig(t *testing.T) {
 }

 func TestCreemWebhookEnabledRequiresTopUpAndWebhookConfig(t *testing.T) {
-	confirmPaymentComplianceForTest(t)
 	originalAPIKey := setting.CreemApiKey
 	originalProducts := setting.CreemProducts
 	originalWebhookSecret := setting.CreemWebhookSecret
@@ -68,7 +53,6 @@ func TestCreemWebhookEnabledRequiresTopUpAndWebhookConfig(t *testing.T) {
 }

 func TestWaffoWebhookEnabledRequiresTopUpAndWebhookConfig(t *testing.T) {
-	confirmPaymentComplianceForTest(t)
 	originalEnabled := setting.WaffoEnabled
 	originalSandbox := setting.WaffoSandbox
 	originalAPIKey := setting.WaffoApiKey
@@ -113,7 +97,6 @@ func TestWaffoWebhookEnabledRequiresTopUpAndWebhookConfig(t *testing.T) {
 }

 func TestWaffoPancakeWebhookEnabledRequiresTopUpAndWebhookConfig(t *testing.T) {
-	confirmPaymentComplianceForTest(t)
 	originalMerchantID := setting.WaffoPancakeMerchantID
 	originalPrivateKey := setting.WaffoPancakePrivateKey
 	originalProductID := setting.WaffoPancakeProductID
@@ -123,9 +106,6 @@ func TestWaffoPancakeWebhookEnabledRequiresTopUpAndWebhookConfig(t *testing.T) {
 		setting.WaffoPancakeProductID = originalProductID
 	})

-	// Presence of all three credentials enables the gateway. Webhook public
-	// keys are bundled in the SDK and there is no separate Enabled toggle —
-	// clear any of the three fields to disable.
 	setting.WaffoPancakeMerchantID = ""
 	setting.WaffoPancakePrivateKey = "private"
 	setting.WaffoPancakeProductID = "product"
@@ -143,7 +123,6 @@ func TestWaffoPancakeWebhookEnabledRequiresTopUpAndWebhookConfig(t *testing.T) {
 }

 func TestEpayWebhookEnabledRequiresTopUpAndWebhookConfig(t *testing.T) {
-	confirmPaymentComplianceForTest(t)
 	originalPayAddress := operation_setting.PayAddress
 	originalEpayID := operation_setting.EpayId
 	originalEpayKey := operation_setting.EpayKey
@@ -8,7 +8,6 @@ import (
 	"github.com/QuantumNous/new-api/common"
 	"github.com/QuantumNous/new-api/i18n"
 	"github.com/QuantumNous/new-api/model"
-	"github.com/QuantumNous/new-api/setting/operation_setting"

 	"github.com/gin-gonic/gin"
 )
@@ -60,11 +59,6 @@ func GetRedemption(c *gin.Context) {
 }

 func AddRedemption(c *gin.Context) {
-	if !operation_setting.IsPaymentComplianceConfirmed() {
-		common.ApiErrorI18n(c, i18n.MsgPaymentComplianceRequired)
-		return
-	}
-
 	redemption := model.Redemption{}
 	err := c.ShouldBindJSON(&redemption)
 	if err != nil {
@@ -6,7 +6,6 @@ import (

 	"github.com/QuantumNous/new-api/common"
 	"github.com/QuantumNous/new-api/model"
-	"github.com/QuantumNous/new-api/setting/operation_setting"
 	"github.com/QuantumNous/new-api/setting/ratio_setting"
 	"github.com/gin-gonic/gin"
 	"gorm.io/gorm"
@@ -29,11 +28,6 @@ type SubscriptionBalancePayRequest struct {
 // ---- User APIs ----

 func GetSubscriptionPlans(c *gin.Context) {
-	if !operation_setting.IsPaymentComplianceConfirmed() {
-		common.ApiSuccess(c, []SubscriptionPlanDTO{})
-		return
-	}
-
 	var plans []model.SubscriptionPlan
 	if err := model.DB.Where("enabled = ?", true).Order("sort_order desc, id desc").Find(&plans).Error; err != nil {
 		common.ApiError(c, err)
@@ -41,6 +35,7 @@ func GetSubscriptionPlans(c *gin.Context) {
 	}
 	result := make([]SubscriptionPlanDTO, 0, len(plans))
 	for _, p := range plans {
+		p.NormalizeDefaults()
 		result = append(result, SubscriptionPlanDTO{
 			Plan: p,
 		})
@@ -97,10 +92,6 @@ func UpdateSubscriptionPreference(c *gin.Context) {
 }

 func SubscriptionRequestBalancePay(c *gin.Context) {
-	if !requirePaymentCompliance(c) {
-		return
-	}
-
 	userId := c.GetInt("id")
 	var req SubscriptionBalancePayRequest
 	if err := c.ShouldBindJSON(&req); err != nil || req.PlanId <= 0 {
@@ -125,6 +116,7 @@ func AdminListSubscriptionPlans(c *gin.Context) {
 	}
 	result := make([]SubscriptionPlanDTO, 0, len(plans))
 	for _, p := range plans {
+		p.NormalizeDefaults()
 		result = append(result, SubscriptionPlanDTO{
 			Plan: p,
 		})
@@ -137,10 +129,6 @@ type AdminUpsertSubscriptionPlanRequest struct {
 }

 func AdminCreateSubscriptionPlan(c *gin.Context) {
-	if !requirePaymentCompliance(c) {
-		return
-	}
-
 	var req AdminUpsertSubscriptionPlanRequest
 	if err := c.ShouldBindJSON(&req); err != nil {
 		common.ApiErrorMsg(c, "参数错误")
@@ -163,6 +151,9 @@ func AdminCreateSubscriptionPlan(c *gin.Context) {
 		req.Plan.Currency = "USD"
 	}
 	req.Plan.Currency = "USD"
+	if req.Plan.AllowBalancePay == nil {
+		req.Plan.AllowBalancePay = common.GetPointer(true)
+	}
 	if req.Plan.DurationUnit == "" {
 		req.Plan.DurationUnit = model.SubscriptionDurationMonth
 	}
@@ -199,10 +190,6 @@ func AdminCreateSubscriptionPlan(c *gin.Context) {
 }

 func AdminUpdateSubscriptionPlan(c *gin.Context) {
-	if !requirePaymentCompliance(c) {
-		return
-	}
-
 	id, _ := strconv.Atoi(c.Param("id"))
 	if id <= 0 {
 		common.ApiErrorMsg(c, "无效的ID")
@@ -279,6 +266,9 @@ func AdminUpdateSubscriptionPlan(c *gin.Context) {
 			"quota_reset_custom_seconds": req.Plan.QuotaResetCustomSeconds,
 			"updated_at":                 common.GetTimestamp(),
 		}
+		if req.Plan.AllowBalancePay != nil {
+			updateMap["allow_balance_pay"] = *req.Plan.AllowBalancePay
+		}
 		if err := tx.Model(&model.SubscriptionPlan{}).Where("id = ?", id).Updates(updateMap).Error; err != nil {
 			return err
 		}
@@ -297,10 +287,6 @@ type AdminUpdateSubscriptionPlanStatusRequest struct {
 }

 func AdminUpdateSubscriptionPlanStatus(c *gin.Context) {
-	if !requirePaymentCompliance(c) {
-		return
-	}
-
 	id, _ := strconv.Atoi(c.Param("id"))
 	if id <= 0 {
 		common.ApiErrorMsg(c, "无效的ID")
@@ -325,10 +311,6 @@ type AdminBindSubscriptionRequest struct {
 }

 func AdminBindSubscription(c *gin.Context) {
-	if !requirePaymentCompliance(c) {
-		return
-	}
-
 	var req AdminBindSubscriptionRequest
 	if err := c.ShouldBindJSON(&req); err != nil || req.UserId <= 0 || req.PlanId <= 0 {
 		common.ApiErrorMsg(c, "参数错误")
@@ -368,10 +350,6 @@ type AdminCreateUserSubscriptionRequest struct {

 // AdminCreateUserSubscription creates a new user subscription from a plan (no payment).
 func AdminCreateUserSubscription(c *gin.Context) {
-	if !requirePaymentCompliance(c) {
-		return
-	}
-
 	userId, _ := strconv.Atoi(c.Param("id"))
 	if userId <= 0 {
 		common.ApiErrorMsg(c, "无效的用户ID")
@@ -21,10 +21,6 @@ type SubscriptionCreemPayRequest struct {
 }

 func SubscriptionRequestCreemPay(c *gin.Context) {
-	if !requirePaymentCompliance(c) {
-		return
-	}
-
 	var req SubscriptionCreemPayRequest

 	// Keep body for debugging consistency (like RequestCreemPay)
@@ -22,10 +22,6 @@ type SubscriptionEpayPayRequest struct {
 }

 func SubscriptionRequestEpay(c *gin.Context) {
-	if !requirePaymentCompliance(c) {
-		return
-	}
-
 	var req SubscriptionEpayPayRequest
 	if err := c.ShouldBindJSON(&req); err != nil || req.PlanId <= 0 {
 		common.ApiErrorMsg(c, "参数错误")
@@ -21,10 +21,6 @@ type SubscriptionStripePayRequest struct {
 }

 func SubscriptionRequestStripePay(c *gin.Context) {
-	if !requirePaymentCompliance(c) {
-		return
-	}
-
 	var req SubscriptionStripePayRequest
 	if err := c.ShouldBindJSON(&req); err != nil || req.PlanId <= 0 {
 		common.ApiErrorMsg(c, "参数错误")
@@ -21,10 +21,6 @@ type SubscriptionWaffoPancakePayRequest struct {
 }

 func SubscriptionRequestWaffoPancakePay(c *gin.Context) {
-	if !requirePaymentCompliance(c) {
-		return
-	}
-
 	var req SubscriptionWaffoPancakePayRequest
 	if err := c.ShouldBindJSON(&req); err != nil || req.PlanId <= 0 {
 		common.ApiErrorMsg(c, "参数错误")
@@ -22,13 +22,8 @@ import (
 )

 func GetTopUpInfo(c *gin.Context) {
-	complianceConfirmed := operation_setting.IsPaymentComplianceConfirmed()
-
 	// 获取支付方式
 	payMethods := operation_setting.PayMethods
-	if !complianceConfirmed {
-		payMethods = []map[string]string{}
-	}

 	// 如果启用了 Stripe 支付，添加到支付方法列表
 	if isStripeTopUpEnabled() {
@@ -101,9 +96,8 @@ func GetTopUpInfo(c *gin.Context) {
 		"enable_creem_topup":               isCreemTopUpEnabled(),
 		"enable_waffo_topup":               enableWaffo,
 		"enable_waffo_pancake_topup":       enableWaffoPancake,
-		"enable_redemption":                complianceConfirmed,
-		"payment_compliance_confirmed":     complianceConfirmed,
-		"payment_compliance_terms_version": operation_setting.CurrentComplianceTermsVersion,
+		"enable_redemption":                true,
+		"payment_compliance_confirmed":     true,
 		"waffo_pay_methods": func() interface{} {
 			if enableWaffo {
 				return setting.GetWaffoPayMethods()
@@ -17,7 +17,6 @@ import (
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/service"
 	"github.com/QuantumNous/new-api/setting"
-	"github.com/QuantumNous/new-api/setting/operation_setting"

 	"github.com/QuantumNous/new-api/constant"

@@ -344,10 +343,6 @@ type TransferAffQuotaRequest struct {
 }

 func TransferAffQuota(c *gin.Context) {
-	if !requirePaymentCompliance(c) {
-		return
-	}
-
 	id := c.GetInt("id")
 	user, err := model.GetUserById(id, true)
 	if err != nil {
@@ -1104,11 +1099,6 @@ func getTopUpLock(userID int) *topUpTryLock {
 }

 func TopUp(c *gin.Context) {
-	if !operation_setting.IsPaymentComplianceConfirmed() {
-		common.ApiErrorI18n(c, i18n.MsgPaymentComplianceRequired)
-		return
-	}
-
 	id := c.GetInt("id")
 	lock := getTopUpLock(id)
 	if !lock.TryLock() {
@@ -16,7 +16,7 @@ version: '3.4' # For compatibility with older Docker versions

 services:
  new-api:
-    image: calciumion/new-api:latest
+    image: git.viaeon.com/admin/new-api:latest
    container_name: new-api
    restart: always
    command: --log-dir /app/logs
@@ -34,6 +34,7 @@ services:
      - BATCH_UPDATE_ENABLED=true  # 是否启用批量更新 (Whether to enable batch update)
      - NODE_NAME=new-api-node-1  # 节点名称，用于审计日志中标识节点身份；多节点/容器部署时建议设置 (Node name used in audit logs; recommended when running multiple instances or in containers)
 #      - STREAMING_TIMEOUT=300  # 流模式无响应超时时间，单位秒，默认120秒，如果出现空补全可以尝试改为更大值 （Streaming timeout in seconds, default is 120s. Increase if experiencing empty completions）
+#      - RELAY_IDLE_CONN_TIMEOUT=90  # Relay HTTP 客户端空闲连接超时时间，单位秒，默认跟随 Go 标准库，设置为0表示不限制 (Relay HTTP client idle keep-alive timeout in seconds, defaults to Go standard library; set 0 to disable)
 #      - SESSION_SECRET=random_string  # 多机部署时设置，必须修改这个随机字符串！！ （multi-node deployment, set this to a random string!!!!!!!）
 #      - SYNC_FREQUENCY=60  # Uncomment if regular database syncing is needed
 #      - GOOGLE_ANALYTICS_ID=G-XXXXXXXXXX  # Google Analytics 的测量 ID (Google Analytics Measurement ID)
@@ -26,11 +26,11 @@ type ImageRequest struct {
 	OutputFormat      json.RawMessage `json:"output_format,omitempty"`
 	OutputCompression json.RawMessage `json:"output_compression,omitempty"`
 	PartialImages     json.RawMessage `json:"partial_images,omitempty"`
-	// Stream            bool            `json:"stream,omitempty"`
-	Images        json.RawMessage `json:"images,omitempty"`
-	Mask          json.RawMessage `json:"mask,omitempty"`
-	InputFidelity json.RawMessage `json:"input_fidelity,omitempty"`
-	Watermark     *bool           `json:"watermark,omitempty"`
+	Stream            *bool           `json:"stream,omitempty"`
+	Images            json.RawMessage `json:"images,omitempty"`
+	Mask              json.RawMessage `json:"mask,omitempty"`
+	InputFidelity     json.RawMessage `json:"input_fidelity,omitempty"`
+	Watermark         *bool           `json:"watermark,omitempty"`
 	// zhipu 4v
 	WatermarkEnabled json.RawMessage `json:"watermark_enabled,omitempty"`
 	UserId           json.RawMessage `json:"user_id,omitempty"`
@@ -163,7 +163,7 @@ func (i *ImageRequest) GetTokenCountMeta() *types.TokenCountMeta {
 }

 func (i *ImageRequest) IsStream(c *gin.Context) bool {
-	return false
+	return i.Stream != nil && *i.Stream
 }

 func (i *ImageRequest) SetModelName(modelName string) {
@@ -213,12 +213,22 @@ func (r *GeneralOpenAIRequest) ToMap() map[string]any {
 	return result
 }

+func IsOpenAIReasoningOModel(modelName string) bool {
+	return strings.HasPrefix(modelName, "o1") ||
+		strings.HasPrefix(modelName, "o3") ||
+		strings.HasPrefix(modelName, "o4")
+}
+
+func IsOpenAIGPT5Model(modelName string) bool {
+	return strings.HasPrefix(modelName, "gpt-5")
+}
+
 func (r *GeneralOpenAIRequest) GetSystemRoleName() string {
-	if strings.HasPrefix(r.Model, "o") {
+	if IsOpenAIReasoningOModel(r.Model) {
 		if !strings.HasPrefix(r.Model, "o1-mini") && !strings.HasPrefix(r.Model, "o1-preview") {
 			return "developer"
 		}
-	} else if strings.HasPrefix(r.Model, "gpt-5") {
+	} else if IsOpenAIGPT5Model(r.Model) {
 		return "developer"
 	}
 	return "system"
@@ -71,3 +71,27 @@ func TestOpenAIResponsesRequestPreserveExplicitZeroValues(t *testing.T) {
 	require.True(t, gjson.GetBytes(encoded, "stream").Exists())
 	require.True(t, gjson.GetBytes(encoded, "top_p").Exists())
 }
+
+func TestGeneralOpenAIRequestGetSystemRoleName(t *testing.T) {
+	tests := []struct {
+		name  string
+		model string
+		want  string
+	}{
+		{name: "o1 uses developer", model: "o1", want: "developer"},
+		{name: "o3 family uses developer", model: "o3-mini-high", want: "developer"},
+		{name: "o4 family uses developer", model: "o4-mini", want: "developer"},
+		{name: "o1 mini stays system", model: "o1-mini", want: "system"},
+		{name: "o1 preview stays system", model: "o1-preview", want: "system"},
+		{name: "gpt 5 uses developer", model: "gpt-5", want: "developer"},
+		{name: "omni is not o series", model: "omni-moderation-latest", want: "system"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			req := GeneralOpenAIRequest{Model: tt.model}
+
+			require.Equal(t, tt.want, req.GetSystemRoleName())
+		})
+	}
+}
@@ -18,10 +18,10 @@
    "openai",
    "claude"
  ],
-  "author": "QuantumNous",
+  "author": "modelstoken",
  "repository": {
    "type": "git",
-    "url": "https://github.com/QuantumNous/new-api"
+    "url": "https://git.viaeon.com/admin/new-api"
  },
  "devDependencies": {
    "cross-env": "^7.0.3",
@@ -152,7 +152,6 @@ const (
 	MsgPaymentWebhookNotConfig   = "payment.webhook_not_configured"
 	MsgPaymentPriceIdNotConfig   = "payment.price_id_not_configured"
 	MsgPaymentCreemNotConfig     = "payment.creem_not_configured"
-	MsgPaymentComplianceRequired = "payment.compliance_required"
 )

 // Topup related messages
@@ -164,7 +164,7 @@ func main() {
 		common.SysLog(fmt.Sprintf("panic detected: %v", err))
 		c.JSON(http.StatusInternalServerError, gin.H{
 			"error": gin.H{
-				"message": fmt.Sprintf("Panic detected, error: %v. Please submit a issue here: https://github.com/Calcium-Ion/new-api", err),
+				"message": fmt.Sprintf("Panic detected, error: %v. Please submit a issue here: https://git.viaeon.com/admin/new-api/issues", err),
 				"type":    "new_api_panic",
 			},
 		})
@@ -1,6 +1,8 @@
 FRONTEND_DIR = ./web/default
 FRONTEND_CLASSIC_DIR = ./web/classic
 BACKEND_DIR = .
+DEV_FRONTEND_DEFAULT_PORT ?= 5173
+DEV_FRONTEND_CLASSIC_PORT ?= 5174
 DEV_COMPOSE_FILE = docker-compose.dev.yml
 DEV_POSTGRES_SERVICE = postgres
 DEV_BACKEND_SERVICE = new-api
@@ -14,11 +16,13 @@ all: build-all-frontends start-backend

 build-frontend:
 	@echo "Building default frontend..."
-	@cd $(FRONTEND_DIR) && bun install && DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$(cat ../../VERSION) bun run build
+	@cd ./web && bun install --frozen-lockfile
+	@cd $(FRONTEND_DIR) && DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$(cat ../../VERSION) bun run build

 build-frontend-classic:
 	@echo "Building classic frontend..."
-	@cd $(FRONTEND_CLASSIC_DIR) && bun install && VITE_REACT_APP_VERSION=$(cat ../../VERSION) bun run build
+	@cd ./web && bun install --frozen-lockfile
+	@cd $(FRONTEND_CLASSIC_DIR) && VITE_REACT_APP_VERSION=$(cat ../../VERSION) bun run build

 build-all-frontends: build-frontend build-frontend-classic

@@ -35,12 +39,35 @@ dev-api-rebuild:
 	@docker compose -f $(DEV_COMPOSE_FILE) up -d --build $(DEV_BACKEND_SERVICE)

 dev-web:
-	@echo "Starting frontend dev server..."
-	@cd $(FRONTEND_DIR) && bun install && bun run dev
+	@echo "Starting both frontend dev servers..."
+	@echo "Default frontend: http://localhost:$(DEV_FRONTEND_DEFAULT_PORT)"
+	@echo "Classic frontend: http://localhost:$(DEV_FRONTEND_CLASSIC_PORT)"
+	@cd ./web && bun install
+	@(cd $(FRONTEND_DIR) && bun run dev -- --host 0.0.0.0 --port $(DEV_FRONTEND_DEFAULT_PORT)) & \
+		default_pid=$$!; \
+		(cd $(FRONTEND_CLASSIC_DIR) && bun run dev -- --host 0.0.0.0 --port $(DEV_FRONTEND_CLASSIC_PORT)) & \
+		classic_pid=$$!; \
+		trap 'kill $$default_pid $$classic_pid 2>/dev/null; wait $$default_pid $$classic_pid 2>/dev/null; exit 130' INT TERM; \
+		while kill -0 $$default_pid 2>/dev/null && kill -0 $$classic_pid 2>/dev/null; do \
+			sleep 1; \
+		done; \
+		if ! kill -0 $$default_pid 2>/dev/null; then \
+			wait $$default_pid; \
+			status=$$?; \
+			kill $$classic_pid 2>/dev/null; \
+			wait $$classic_pid 2>/dev/null; \
+			exit $$status; \
+		fi; \
+		wait $$classic_pid; \
+		status=$$?; \
+		kill $$default_pid 2>/dev/null; \
+		wait $$default_pid 2>/dev/null; \
+		exit $$status

 dev-web-classic:
 	@echo "Starting classic frontend dev server..."
-	@cd $(FRONTEND_CLASSIC_DIR) && bun install && bun run dev
+	@cd ./web && bun install
+	@cd $(FRONTEND_CLASSIC_DIR) && bun run dev -- --host 0.0.0.0 --port $(DEV_FRONTEND_CLASSIC_PORT)

 dev: dev-api dev-web

@@ -163,6 +163,10 @@ func TryUserAuth() func(c *gin.Context) {
 		if id != nil {
 			c.Set("id", id)
 		}
+		role := session.Get("role")
+		if role != nil {
+			c.Set("role", role)
+		}
 		c.Next()
 	}
 }
@@ -102,14 +102,10 @@ func Distribute() func(c *gin.Context) {
 				}

 				if preferredChannelID, found := service.GetPreferredChannelByAffinity(c, modelRequest.Model, usingGroup); found {
+					affinityUsable := false
 					preferred, err := model.CacheGetChannel(preferredChannelID)
-					if err == nil && preferred != nil {
-						if preferred.Status != common.ChannelStatusEnabled {
-							if service.ShouldSkipRetryAfterChannelAffinityFailure(c) {
-								abortWithOpenAiMessage(c, http.StatusForbidden, i18n.T(c, i18n.MsgDistributorAffinityChannelDisabled))
-								return
-							}
-						} else if usingGroup == "auto" {
+					if err == nil && preferred != nil && preferred.Status == common.ChannelStatusEnabled {
+						if usingGroup == "auto" {
 							userGroup := common.GetContextKeyString(c, constant.ContextKeyUserGroup)
 							autoGroups := service.GetUserAutoGroup(userGroup)
 							for _, g := range autoGroups {
@@ -117,6 +113,7 @@ func Distribute() func(c *gin.Context) {
 									selectGroup = g
 									common.SetContextKey(c, constant.ContextKeyAutoGroup, g)
 									channel = preferred
+									affinityUsable = true
 									service.MarkChannelAffinityUsed(c, g, preferred.Id)
 									break
 								}
@@ -124,9 +121,13 @@ func Distribute() func(c *gin.Context) {
 						} else if model.IsChannelEnabledForGroupModel(usingGroup, modelRequest.Model, preferred.Id) {
 							channel = preferred
 							selectGroup = usingGroup
+							affinityUsable = true
 							service.MarkChannelAffinityUsed(c, usingGroup, preferred.Id)
 						}
 					}
+					if !affinityUsable && !service.ShouldKeepChannelAffinityOnChannelDisabled() {
+						service.ClearCurrentChannelAffinityCache(c)
+					}
 				}

 				if channel == nil {
@@ -298,6 +299,7 @@ func getModelRequest(c *gin.Context) (*ModelRequest, bool, error) {
 		} else if c.Request.Method == http.MethodGet {
 			relayMode = relayconstant.RelayModeVideoFetchByID
 			shouldSelectChannel = false
+			modelRequest.Model = getTaskOriginModelName(c)
 		}
 		c.Set("relay_mode", relayMode)
 	} else if strings.Contains(c.Request.URL.Path, "/v1/video/generations") {
@@ -312,6 +314,7 @@ func getModelRequest(c *gin.Context) (*ModelRequest, bool, error) {
 		} else if c.Request.Method == http.MethodGet {
 			relayMode = relayconstant.RelayModeVideoFetchByID
 			shouldSelectChannel = false
+			modelRequest.Model = getTaskOriginModelName(c)
 		}
 		if _, ok := c.Get("relay_mode"); !ok {
 			c.Set("relay_mode", relayMode)
@@ -396,6 +399,31 @@ func getModelRequest(c *gin.Context) (*ModelRequest, bool, error) {
 	return &modelRequest, shouldSelectChannel, nil
 }

+// 修复 #4834: GET /v1/video/generations/:task_id && /v1/video/:task_id 此前不解析 model，
+// 当 token 启用「可用模型限制」时，下游 modelLimitEnable 校验会因
+// modelRequest.Model 为空而误报 "This token has no access to model"。
+// 从已存储的任务记录中回填 OriginModelName 即可让校验走在正确的模型上。
+func getTaskOriginModelName(c *gin.Context) string {
+	if !common.GetContextKeyBool(c, constant.ContextKeyTokenModelLimitEnabled) {
+		return ""
+	}
+
+	taskId := c.Param("task_id")
+	if taskId == "" {
+		// jimeng adapter
+		taskId = c.GetString("task_id")
+	}
+	if taskId == "" {
+		return ""
+	}
+
+	userId := c.GetInt("id")
+	if task, exist, err := model.GetByTaskId(userId, taskId); err == nil && exist && task != nil {
+		return task.Properties.OriginModelName
+	}
+	return ""
+}
+
 func SetupContextForSelectedChannel(c *gin.Context, channel *model.Channel, modelName string) *types.NewAPIError {
 	c.Set("original_model", modelName) // for retry
 	if channel == nil {
@@ -17,7 +17,7 @@ func RelayPanicRecover() gin.HandlerFunc {
 				common.SysLog(fmt.Sprintf("stacktrace from panic: %s", string(debug.Stack())))
 				c.JSON(http.StatusInternalServerError, gin.H{
 					"error": gin.H{
-						"message": fmt.Sprintf("Panic detected, error: %v. Please submit a issue here: https://github.com/Calcium-Ion/new-api", err),
+						"message": fmt.Sprintf("Panic detected, error: %v. Please submit a issue here: https://git.viaeon.com/admin/new-api/issues", err),
 						"type":    "new_api_panic",
 					},
 				})
@@ -0,0 +1,47 @@
+package middleware
+
+import (
+	"bytes"
+	"io"
+	"net/http"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/gin-gonic/gin"
+)
+
+func AnonymousRequestBodyLimit() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		maxBytes := common.GetAnonymousRequestBodyLimitBytes()
+		if maxBytes <= 0 || c.Request.Body == nil {
+			c.Next()
+			return
+		}
+
+		originalBody := c.Request.Body
+		limitedBody, err := readAnonymousRequestBody(originalBody, maxBytes)
+		_ = originalBody.Close()
+		if err != nil {
+			if common.IsRequestBodyTooLargeError(err) {
+				c.AbortWithStatus(http.StatusRequestEntityTooLarge)
+				return
+			}
+			c.AbortWithStatus(http.StatusBadRequest)
+			return
+		}
+
+		c.Request.Body = io.NopCloser(bytes.NewReader(limitedBody))
+		c.Request.ContentLength = int64(len(limitedBody))
+		c.Next()
+	}
+}
+
+func readAnonymousRequestBody(body io.Reader, maxBytes int64) ([]byte, error) {
+	data, err := io.ReadAll(io.LimitReader(body, maxBytes+1))
+	if err != nil {
+		return nil, err
+	}
+	if int64(len(data)) > maxBytes {
+		return nil, common.ErrRequestBodyTooLarge
+	}
+	return data, nil
+}
@@ -0,0 +1,96 @@
+package model
+
+import (
+	"time"
+)
+
+type Document struct {
+	Id         int       `json:"id" gorm:"primaryKey"`
+	Title      string    `json:"title" gorm:"not null"`
+	Slug       string    `json:"slug" gorm:"type:varchar(255);uniqueIndex;not null"`
+	Content    string    `json:"content" gorm:"type:text;not null"`
+	CategoryId *int      `json:"category_id" gorm:"index"`
+	Visibility string    `json:"visibility" gorm:"default:'public'"` // public, auth, admin
+	SortOrder  int       `json:"sort_order" gorm:"default:0"`
+	AuthorId   int       `json:"author_id" gorm:"not null"`
+	CreatedAt  time.Time `json:"created_at" gorm:"autoCreateTime"`
+	UpdatedAt  time.Time `json:"updated_at" gorm:"autoUpdateTime"`
+}
+
+func GetDocuments(keyword string, visibility string, categoryId *int, startIdx int, num int) ([]*Document, int64, error) {
+	query := DB.Model(&Document{})
+	if keyword != "" {
+		like := "%" + keyword + "%"
+		query = query.Where("title LIKE ? OR content LIKE ?", like, like)
+	}
+	if visibility != "" {
+		query = query.Where("visibility = ?", visibility)
+	}
+	if categoryId != nil {
+		query = query.Where("category_id = ?", *categoryId)
+	}
+	var total int64
+	if err := query.Count(&total).Error; err != nil {
+		return nil, 0, err
+	}
+	var documents []*Document
+	if err := query.Order("sort_order ASC, id DESC").Offset(startIdx).Limit(num).Find(&documents).Error; err != nil {
+		return nil, 0, err
+	}
+	return documents, total, nil
+}
+
+func GetDocumentsByVisibility(keyword string, visibilities []string, categoryId *int, startIdx int, num int) ([]*Document, int64, error) {
+	query := DB.Model(&Document{})
+	if keyword != "" {
+		like := "%" + keyword + "%"
+		query = query.Where("title LIKE ? OR content LIKE ?", like, like)
+	}
+	if len(visibilities) > 0 {
+		query = query.Where("visibility IN ?", visibilities)
+	}
+	if categoryId != nil {
+		query = query.Where("category_id = ?", *categoryId)
+	}
+	var total int64
+	if err := query.Count(&total).Error; err != nil {
+		return nil, 0, err
+	}
+	var documents []*Document
+	if err := query.Order("sort_order ASC, id DESC").Offset(startIdx).Limit(num).Find(&documents).Error; err != nil {
+		return nil, 0, err
+	}
+	return documents, total, nil
+}
+
+func GetDocumentBySlug(slug string) (*Document, error) {
+	var doc Document
+	err := DB.Where("slug = ?", slug).First(&doc).Error
+	if err != nil {
+		return nil, err
+	}
+	return &doc, nil
+}
+
+func GetDocumentById(id int) (*Document, error) {
+	var doc Document
+	err := DB.First(&doc, id).Error
+	if err != nil {
+		return nil, err
+	}
+	return &doc, nil
+}
+
+func CreateDocument(doc *Document) error {
+	return DB.Create(doc).Error
+}
+
+func UpdateDocument(doc *Document) error {
+	return DB.Model(doc).Select("title", "slug", "content", "category_id", "visibility", "sort_order").Updates(doc).Error
+}
+
+func DeleteDocument(id int) error {
+	// Delete associated versions first
+	DB.Where("document_id = ?", id).Delete(&DocumentVersion{})
+	return DB.Delete(&Document{}, id).Error
+}
@@ -0,0 +1,38 @@
+package model
+
+import (
+	"time"
+)
+
+type DocumentCategory struct {
+	Id        int       `json:"id" gorm:"primaryKey"`
+	Name      string    `json:"name" gorm:"not null"`
+	Slug      string    `json:"slug" gorm:"type:varchar(255);uniqueIndex;not null"`
+	ParentId  *int      `json:"parent_id" gorm:"index"`
+	SortOrder int       `json:"sort_order" gorm:"default:0"`
+	CreatedAt time.Time `json:"created_at" gorm:"autoCreateTime"`
+}
+
+func GetDocumentCategories() ([]*DocumentCategory, error) {
+	var categories []*DocumentCategory
+	err := DB.Order("sort_order ASC, id ASC").Find(&categories).Error
+	return categories, err
+}
+
+func GetDocumentCategoryTree() ([]*DocumentCategory, error) {
+	var categories []*DocumentCategory
+	err := DB.Order("sort_order ASC, id ASC").Find(&categories).Error
+	return categories, err
+}
+
+func CreateDocumentCategory(category *DocumentCategory) error {
+	return DB.Create(category).Error
+}
+
+func UpdateDocumentCategory(category *DocumentCategory) error {
+	return DB.Model(category).Select("name", "slug", "parent_id", "sort_order").Updates(category).Error
+}
+
+func DeleteDocumentCategory(id int) error {
+	return DB.Delete(&DocumentCategory{}, id).Error
+}
@@ -0,0 +1,30 @@
+package model
+
+import (
+	"time"
+)
+
+type DocumentVersion struct {
+	Id         int       `json:"id" gorm:"primaryKey"`
+	DocumentId int       `json:"document_id" gorm:"index;not null"`
+	Content    string    `json:"content" gorm:"type:text;not null"`
+	AuthorId   int       `json:"author_id" gorm:"not null"`
+	CreatedAt  time.Time `json:"created_at" gorm:"autoCreateTime"`
+}
+
+func GetDocumentVersions(documentId int, startIdx int, num int) ([]*DocumentVersion, int64, error) {
+	query := DB.Model(&DocumentVersion{}).Where("document_id = ?", documentId)
+	var total int64
+	if err := query.Count(&total).Error; err != nil {
+		return nil, 0, err
+	}
+	var versions []*DocumentVersion
+	if err := query.Order("id DESC").Offset(startIdx).Limit(num).Find(&versions).Error; err != nil {
+		return nil, 0, err
+	}
+	return versions, total, nil
+}
+
+func CreateDocumentVersion(version *DocumentVersion) error {
+	return DB.Create(version).Error
+}
@@ -32,9 +32,9 @@ func applyExplicitLogTextFilter(tx *gorm.DB, column string, value string) (*gorm
 }

 type Log struct {
-	Id                int    `json:"id" gorm:"index:idx_created_at_id,priority:1;index:idx_user_id_id,priority:2"`
+	Id                int    `json:"id" gorm:"index:idx_created_at_id,priority:2;index:idx_user_id_id,priority:2"`
 	UserId            int    `json:"user_id" gorm:"index;index:idx_user_id_id,priority:1"`
-	CreatedAt         int64  `json:"created_at" gorm:"bigint;index:idx_created_at_id,priority:2;index:idx_created_at_type"`
+	CreatedAt         int64  `json:"created_at" gorm:"bigint;index:idx_created_at_id,priority:1;index:idx_created_at_type"`
 	Type              int    `json:"type" gorm:"index:idx_created_at_type"`
 	Content           string `json:"content"`
 	Username          string `json:"username" gorm:"index;index:index_username_model_name,priority:2;default:''"`
@@ -354,7 +354,7 @@ func GetAllLogs(logType int, startTimestamp int64, endTimestamp int64, modelName
 	if err != nil {
 		return nil, 0, err
 	}
-	err = tx.Order("logs.id desc").Limit(num).Offset(startIdx).Find(&logs).Error
+	err = tx.Order("logs.created_at desc, logs.id desc").Limit(num).Offset(startIdx).Find(&logs).Error
 	if err != nil {
 		return nil, 0, err
 	}
@@ -281,6 +281,9 @@ func migrateDB() error {
 		&CustomOAuthProvider{},
 		&UserOAuthBinding{},
 		&PerfMetric{},
+		&DocumentCategory{},
+		&Document{},
+		&DocumentVersion{},
 	)
 	if err != nil {
 		return err
@@ -330,6 +333,9 @@ func migrateDBFast() error {
 		{&CustomOAuthProvider{}, "CustomOAuthProvider"},
 		{&UserOAuthBinding{}, "UserOAuthBinding"},
 		{&PerfMetric{}, "PerfMetric"},
+		{&DocumentCategory{}, "DocumentCategory"},
+		{&Document{}, "Document"},
+		{&DocumentVersion{}, "DocumentVersion"},
 	}
 	// 动态计算migration数量，确保errChan缓冲区足够大
 	errChan := make(chan error, len(migrations))
@@ -397,6 +403,7 @@ func ensureSubscriptionPlanTableSQLite() error {
 ` + "`custom_seconds`" + ` bigint NOT NULL DEFAULT 0,
 ` + "`enabled`" + ` numeric DEFAULT 1,
 ` + "`sort_order`" + ` integer DEFAULT 0,
+` + "`allow_balance_pay`" + ` numeric DEFAULT 1,
 ` + "`stripe_price_id`" + ` varchar(128) DEFAULT '',
 ` + "`creem_product_id`" + ` varchar(128) DEFAULT '',
 ` + "`waffo_pancake_product_id`" + ` varchar(128) DEFAULT '',
@@ -431,6 +438,7 @@ PRIMARY KEY (` + "`id`" + `)
 		{Name: "custom_seconds", DDL: "`custom_seconds` bigint NOT NULL DEFAULT 0"},
 		{Name: "enabled", DDL: "`enabled` numeric DEFAULT 1"},
 		{Name: "sort_order", DDL: "`sort_order` integer DEFAULT 0"},
+		{Name: "allow_balance_pay", DDL: "`allow_balance_pay` numeric DEFAULT 1"},
 		{Name: "stripe_price_id", DDL: "`stripe_price_id` varchar(128) DEFAULT ''"},
 		{Name: "creem_product_id", DDL: "`creem_product_id` varchar(128) DEFAULT ''"},
 		{Name: "waffo_pancake_product_id", DDL: "`waffo_pancake_product_id` varchar(128) DEFAULT ''"},
@@ -160,6 +160,8 @@ type SubscriptionPlan struct {
 	Enabled   bool `json:"enabled" gorm:"default:true"`
 	SortOrder int  `json:"sort_order" gorm:"type:int;default:0"`

+	AllowBalancePay *bool `json:"allow_balance_pay" gorm:"default:true"`
+
 	StripePriceId         string `json:"stripe_price_id" gorm:"type:varchar(128);default:''"`
 	CreemProductId        string `json:"creem_product_id" gorm:"type:varchar(128);default:''"`
 	WaffoPancakeProductId string `json:"waffo_pancake_product_id" gorm:"type:varchar(128);default:''"`
@@ -193,6 +195,12 @@ func (p *SubscriptionPlan) BeforeUpdate(tx *gorm.DB) error {
 	return nil
 }

+func (p *SubscriptionPlan) NormalizeDefaults() {
+	if p.AllowBalancePay == nil {
+		p.AllowBalancePay = common.GetPointer(true)
+	}
+}
+
 // Subscription order (payment -> webhook -> create UserSubscription)
 type SubscriptionOrder struct {
 	Id     int     `json:"id"`
@@ -360,6 +368,7 @@ func getSubscriptionPlanByIdTx(tx *gorm.DB, id int) (*SubscriptionPlan, error) {
 	key := subscriptionPlanCacheKey(id)
 	if key != "" {
 		if cached, found, err := getSubscriptionPlanCache().Get(key); err == nil && found {
+			cached.NormalizeDefaults()
 			return &cached, nil
 		}
 	}
@@ -371,6 +380,7 @@ func getSubscriptionPlanByIdTx(tx *gorm.DB, id int) (*SubscriptionPlan, error) {
 	if err := query.Where("id = ?", id).First(&plan).Error; err != nil {
 		return nil, err
 	}
+	plan.NormalizeDefaults()
 	_ = getSubscriptionPlanCache().SetWithTTL(key, plan, subscriptionPlanCacheTTL())
 	return &plan, nil
 }
@@ -701,6 +711,9 @@ func PurchaseSubscriptionWithBalance(userId int, planId int) error {
 		if plan.PriceAmount < 0 {
 			return errors.New("套餐价格不能为负数")
 		}
+		if plan.AllowBalancePay != nil && !*plan.AllowBalancePay {
+			return errors.New("该套餐不允许使用余额兑换")
+		}

 		requiredQuota, err := calcSubscriptionBalanceQuota(plan.PriceAmount)
 		if err != nil {
@@ -11,7 +11,6 @@ import (
 	"github.com/QuantumNous/new-api/common"
 	"github.com/QuantumNous/new-api/dto"
 	"github.com/QuantumNous/new-api/logger"
-	"github.com/QuantumNous/new-api/setting/operation_setting"

 	"github.com/bytedance/gopkg/util/gopool"
 	"gorm.io/gorm"
@@ -418,7 +417,7 @@ func (user *User) Insert(inviterId int) error {
 	if common.QuotaForNewUser > 0 {
 		RecordLog(user.Id, LogTypeSystem, fmt.Sprintf("新用户注册赠送 %s", logger.LogQuota(common.QuotaForNewUser)))
 	}
-	if inviterId != 0 && operation_setting.IsPaymentComplianceConfirmed() {
+	if inviterId != 0 {
 		if common.QuotaForInvitee > 0 {
 			_ = IncreaseUserQuota(user.Id, common.QuotaForInvitee, true)
 			RecordLog(user.Id, LogTypeSystem, fmt.Sprintf("使用邀请码赠送 %s", logger.LogQuota(common.QuotaForInvitee)))
@@ -479,7 +478,7 @@ func (user *User) FinalizeOAuthUserCreation(inviterId int) {
 	if common.QuotaForNewUser > 0 {
 		RecordLog(user.Id, LogTypeSystem, fmt.Sprintf("新用户注册赠送 %s", logger.LogQuota(common.QuotaForNewUser)))
 	}
-	if inviterId != 0 && operation_setting.IsPaymentComplianceConfirmed() {
+	if inviterId != 0 {
 		if common.QuotaForInvitee > 0 {
 			_ = IncreaseUserQuota(user.Id, common.QuotaForInvitee, true)
 			RecordLog(user.Id, LogTypeSystem, fmt.Sprintf("使用邀请码赠送 %s", logger.LogQuota(common.QuotaForInvitee)))
@@ -19,6 +19,7 @@ var awsModelIDMap = map[string]string{
 	"claude-opus-4-5-20251101":   "anthropic.claude-opus-4-5-20251101-v1:0",
 	"claude-opus-4-6":            "anthropic.claude-opus-4-6-v1",
 	"claude-opus-4-7":            "anthropic.claude-opus-4-7",
+	"claude-opus-4-8":            "anthropic.claude-opus-4-8",
 	// Nova models
 	"nova-micro-v1:0":   "amazon.nova-micro-v1:0",
 	"nova-lite-v1:0":    "amazon.nova-lite-v1:0",
@@ -97,6 +98,11 @@ var awsModelCanCrossRegionMap = map[string]map[string]bool{
 		"ap": true,
 		"eu": true,
 	},
+	"anthropic.claude-opus-4-8": {
+		"us": true,
+		"ap": true,
+		"eu": true,
+	},
 	"anthropic.claude-haiku-4-5-20251001-v1:0": {
 		"us": true,
 		"ap": true,
@@ -33,6 +33,13 @@ var ModelList = []string{
 	"claude-opus-4-7-medium",
 	"claude-opus-4-7-low",
 	"claude-opus-4-7-thinking",
+	"claude-opus-4-8",
+	"claude-opus-4-8-max",
+	"claude-opus-4-8-xhigh",
+	"claude-opus-4-8-high",
+	"claude-opus-4-8-medium",
+	"claude-opus-4-8-low",
+	"claude-opus-4-8-thinking",
 }

 var ChannelName = "claude"
@@ -154,14 +154,17 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe
 	}

 	if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(textRequest.Model); ok && effortLevel != "" &&
-		(strings.HasPrefix(textRequest.Model, "claude-opus-4-6") || strings.HasPrefix(textRequest.Model, "claude-opus-4-7")) {
+		(strings.HasPrefix(textRequest.Model, "claude-opus-4-6") ||
+			strings.HasPrefix(textRequest.Model, "claude-opus-4-7") ||
+			strings.HasPrefix(textRequest.Model, "claude-opus-4-8")) {
 		claudeRequest.Model = baseModel
 		claudeRequest.Thinking = &dto.Thinking{
 			Type: "adaptive",
 		}
 		claudeRequest.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel))
-		if strings.HasPrefix(baseModel, "claude-opus-4-7") {
-			// Opus 4.7 rejects non-default temperature/top_p/top_k with 400
+		if strings.HasPrefix(baseModel, "claude-opus-4-7") ||
+			strings.HasPrefix(baseModel, "claude-opus-4-8") {
+			// Opus 4.7/4.8 reject non-default temperature/top_p/top_k with 400
 			// and defaults display to "omitted"; restore the 4.6 visible summary.
 			claudeRequest.Thinking.Display = "summarized"
 			claudeRequest.Temperature = nil
@@ -175,8 +178,9 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe
 		strings.HasSuffix(textRequest.Model, "-thinking") {

 		trimmedModel := strings.TrimSuffix(textRequest.Model, "-thinking")
-		if strings.HasPrefix(trimmedModel, "claude-opus-4-7") {
-			// Opus 4.7 rejects thinking.type="enabled"; use adaptive at high effort.
+		if strings.HasPrefix(trimmedModel, "claude-opus-4-7") ||
+			strings.HasPrefix(trimmedModel, "claude-opus-4-8") {
+			// Opus 4.7/4.8 reject thinking.type="enabled"; use adaptive at high effort.
 			claudeRequest.Thinking = &dto.Thinking{Type: "adaptive", Display: "summarized"}
 			claudeRequest.OutputConfig = json.RawMessage(`{"effort":"high"}`)
 			claudeRequest.Temperature = nil
@@ -9,6 +9,10 @@ import (
 	"github.com/stretchr/testify/require"
 )

+func commonPointer[T any](value T) *T {
+	return &value
+}
+
 func TestFormatClaudeResponseInfo_MessageStart(t *testing.T) {
 	claudeInfo := &ClaudeResponseInfo{
 		Usage: &dto.Usage{},
@@ -310,6 +314,58 @@ func TestRequestOpenAI2ClaudeMessage_IgnoresUnsupportedFileContent(t *testing.T)
 	require.Equal(t, "see attachment", *content[0].Text)
 }

+func TestRequestOpenAI2ClaudeMessage_ClaudeOpus48HighUsesAdaptiveThinking(t *testing.T) {
+	request := dto.GeneralOpenAIRequest{
+		Model:       "claude-opus-4-8-high",
+		Temperature: commonPointer(0.7),
+		TopP:        commonPointer(0.9),
+		TopK:        commonPointer(40),
+		Messages: []dto.Message{
+			{
+				Role:    "user",
+				Content: "hello",
+			},
+		},
+	}
+
+	claudeRequest, err := RequestOpenAI2ClaudeMessage(nil, request)
+	require.NoError(t, err)
+	require.Equal(t, "claude-opus-4-8", claudeRequest.Model)
+	require.NotNil(t, claudeRequest.Thinking)
+	require.Equal(t, "adaptive", claudeRequest.Thinking.Type)
+	require.Equal(t, "summarized", claudeRequest.Thinking.Display)
+	require.JSONEq(t, `{"effort":"high"}`, string(claudeRequest.OutputConfig))
+	require.Nil(t, claudeRequest.Temperature)
+	require.Nil(t, claudeRequest.TopP)
+	require.Nil(t, claudeRequest.TopK)
+}
+
+func TestRequestOpenAI2ClaudeMessage_ClaudeOpus48ThinkingUsesAdaptiveHighEffort(t *testing.T) {
+	request := dto.GeneralOpenAIRequest{
+		Model:       "claude-opus-4-8-thinking",
+		Temperature: commonPointer(0.7),
+		TopP:        commonPointer(0.9),
+		TopK:        commonPointer(40),
+		Messages: []dto.Message{
+			{
+				Role:    "user",
+				Content: "hello",
+			},
+		},
+	}
+
+	claudeRequest, err := RequestOpenAI2ClaudeMessage(nil, request)
+	require.NoError(t, err)
+	require.Equal(t, "claude-opus-4-8", claudeRequest.Model)
+	require.NotNil(t, claudeRequest.Thinking)
+	require.Equal(t, "adaptive", claudeRequest.Thinking.Type)
+	require.Equal(t, "summarized", claudeRequest.Thinking.Display)
+	require.JSONEq(t, `{"effort":"high"}`, string(claudeRequest.OutputConfig))
+	require.Nil(t, claudeRequest.Temperature)
+	require.Nil(t, claudeRequest.TopP)
+	require.Nil(t, claudeRequest.TopK)
+}
+
 func TestRequestOpenAI2ClaudeMessage_SupportsPDFFileContent(t *testing.T) {
 	request := dto.GeneralOpenAIRequest{
 		Model: "claude-3-5-sonnet",
@@ -30,7 +30,7 @@ func convertCf2CompletionsRequest(textRequest dto.GeneralOpenAIRequest) *CfReque
 }

 func cfStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*types.NewAPIError, *dto.Usage) {
-	scanner := bufio.NewScanner(resp.Body)
+	scanner := helper.NewStreamScanner(resp.Body)
 	scanner.Split(bufio.ScanLines)

 	helper.SetEventStreamHeaders(c)
@@ -1,7 +1,6 @@
 package cohere

 import (
-	"bufio"
 	"encoding/json"
 	"io"
 	"net/http"
@@ -86,7 +85,7 @@ func cohereStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http
 	createdTime := common.GetTimestamp()
 	usage := &dto.Usage{}
 	responseText := ""
-	scanner := bufio.NewScanner(resp.Body)
+	scanner := helper.NewStreamScanner(resp.Body)
 	scanner.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) {
 		if atEOF && len(data) == 0 {
 			return 0, nil, nil
@@ -106,6 +105,9 @@ func cohereStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http
 			data := scanner.Text()
 			dataChan <- data
 		}
+		if err := scanner.Err(); err != nil {
+			common.SysLog("error reading stream: " + err.Error())
+		}
 		stopChan <- true
 	}()
 	helper.SetEventStreamHeaders(c)
@@ -98,7 +98,7 @@ func cozeChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Res
 }

 func cozeChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
-	scanner := bufio.NewScanner(resp.Body)
+	scanner := helper.NewStreamScanner(resp.Body)
 	scanner.Split(bufio.ScanLines)
 	helper.SetEventStreamHeaders(c)
 	id := helper.GetResponseID(c)
@@ -159,9 +159,14 @@ func requestOpenAI2Dify(c *gin.Context, info *relaycommon.RelayInfo, request dto
 					media := mediaContent.GetImageMedia()
 					var file *DifyFile
 					if media.IsRemoteImage() {
-						file.Type = media.MimeType
-						file.TransferMode = "remote_url"
-						file.URL = media.Url
+						// 修复 #2083: 远程图片分支此前未初始化 file，
+						// 导致 file.Type = ... 触发 nil pointer dereference
+						// 而 panic（500: "invalid memory address or nil pointer dereference"）。
+						file = &DifyFile{
+							Type:         media.MimeType,
+							TransferMode: "remote_url",
+							URL:          media.Url,
+						}
 					} else {
 						file = uploadDifyFile(c, info, difyReq.User, mediaContent)
 					}
@@ -5,7 +5,9 @@ import (
 	"fmt"
 	"io"
 	"net/http"
+	"strings"

+	"github.com/QuantumNous/new-api/common"
 	channelconstant "github.com/QuantumNous/new-api/constant"
 	"github.com/QuantumNous/new-api/dto"
 	"github.com/QuantumNous/new-api/relay/channel"
@@ -79,9 +81,23 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *rel
 }

 func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
+	if request.Temperature != nil && isTemperatureOneOnlyModel(getUpstreamModelName(info, request.Model)) && *request.Temperature != 1.0 {
+		request.Temperature = common.GetPointer[float64](1.0)
+	}
 	return request, nil
 }

+func getUpstreamModelName(info *relaycommon.RelayInfo, fallback string) string {
+	if info != nil && info.ChannelMeta != nil && info.UpstreamModelName != "" {
+		return info.UpstreamModelName
+	}
+	return fallback
+}
+
+func isTemperatureOneOnlyModel(model string) bool {
+	return strings.EqualFold(model, "kimi-k2.6")
+}
+
 func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
 	// TODO implement me
 	return nil, errors.New("not implemented")
@@ -0,0 +1,68 @@
+package moonshot
+
+import (
+	"testing"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/dto"
+	relaycommon "github.com/QuantumNous/new-api/relay/common"
+	"github.com/stretchr/testify/require"
+)
+
+func TestConvertOpenAIRequestKimiK26UsesOnlyAllowedTemperature(t *testing.T) {
+	request := &dto.GeneralOpenAIRequest{
+		Model:       "kimi-k2.6",
+		Temperature: common.GetPointer[float64](0.7),
+	}
+	info := &relaycommon.RelayInfo{
+		ChannelMeta: &relaycommon.ChannelMeta{
+			UpstreamModelName: "kimi-k2.6",
+		},
+	}
+
+	converted, err := (&Adaptor{}).ConvertOpenAIRequest(nil, info, request)
+
+	require.NoError(t, err)
+	convertedRequest, ok := converted.(*dto.GeneralOpenAIRequest)
+	require.True(t, ok)
+	require.NotNil(t, convertedRequest.Temperature)
+	require.Equal(t, 1.0, *convertedRequest.Temperature)
+}
+
+func TestConvertOpenAIRequestKimiK26KeepsOmittedTemperatureOmitted(t *testing.T) {
+	request := &dto.GeneralOpenAIRequest{
+		Model: "kimi-k2.6",
+	}
+	info := &relaycommon.RelayInfo{
+		ChannelMeta: &relaycommon.ChannelMeta{
+			UpstreamModelName: "kimi-k2.6",
+		},
+	}
+
+	converted, err := (&Adaptor{}).ConvertOpenAIRequest(nil, info, request)
+
+	require.NoError(t, err)
+	convertedRequest, ok := converted.(*dto.GeneralOpenAIRequest)
+	require.True(t, ok)
+	require.Nil(t, convertedRequest.Temperature)
+}
+
+func TestConvertOpenAIRequestOtherMoonshotModelKeepsTemperature(t *testing.T) {
+	request := &dto.GeneralOpenAIRequest{
+		Model:       "kimi-k2.5",
+		Temperature: common.GetPointer[float64](0.7),
+	}
+	info := &relaycommon.RelayInfo{
+		ChannelMeta: &relaycommon.ChannelMeta{
+			UpstreamModelName: "kimi-k2.5",
+		},
+	}
+
+	converted, err := (&Adaptor{}).ConvertOpenAIRequest(nil, info, request)
+
+	require.NoError(t, err)
+	convertedRequest, ok := converted.(*dto.GeneralOpenAIRequest)
+	require.True(t, ok)
+	require.NotNil(t, convertedRequest.Temperature)
+	require.Equal(t, 0.7, *convertedRequest.Temperature)
+}
@@ -1,7 +1,6 @@
 package ollama

 import (
-	"bufio"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -12,6 +11,7 @@ import (
 	"github.com/QuantumNous/new-api/common"
 	"github.com/QuantumNous/new-api/dto"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
+	"github.com/QuantumNous/new-api/relay/helper"
 	"github.com/QuantumNous/new-api/service"
 	"github.com/QuantumNous/new-api/types"

@@ -397,7 +397,7 @@ func PullOllamaModelStream(baseURL, apiKey, modelName string, progressCallback f
 	}

 	// 读取流式响应
-	scanner := bufio.NewScanner(response.Body)
+	scanner := helper.NewStreamScanner(response.Body)
 	successful := false
 	for scanner.Scan() {
 		line := scanner.Text()
@@ -1,7 +1,6 @@
 package ollama

 import (
-	"bufio"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -70,7 +69,7 @@ func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http
 	defer service.CloseResponseBodyGracefully(resp)

 	helper.SetEventStreamHeaders(c)
-	scanner := bufio.NewScanner(resp.Body)
+	scanner := helper.NewStreamScanner(resp.Body)
 	usage := &dto.Usage{}
 	var model = info.UpstreamModelName
 	var responseId = common.GetUUID()
@@ -9,6 +9,7 @@ import (
 	"mime/multipart"
 	"net/http"
 	"net/textproto"
+	"net/url"
 	"path/filepath"
 	"strings"

@@ -163,6 +164,20 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 		url = strings.Replace(url, "{model}", info.UpstreamModelName, -1)
 		return url, nil
 	default:
+		// Handle coding plan special base URLs
+		if specialPlan, ok := constant.ChannelSpecialBases[info.ChannelBaseUrl]; ok && specialPlan.OpenAIBaseURL != "" {
+			if info.RelayFormat == types.RelayFormatClaude {
+				return fmt.Sprintf("%s/v1/messages", specialPlan.ClaudeBaseURL), nil
+			}
+			switch info.RelayMode {
+			case relayconstant.RelayModeEmbeddings:
+				return fmt.Sprintf("%s/embeddings", specialPlan.OpenAIBaseURL), nil
+			case relayconstant.RelayModeImagesGenerations:
+				return fmt.Sprintf("%s/images/generations", specialPlan.OpenAIBaseURL), nil
+			default:
+				return fmt.Sprintf("%s/chat/completions", specialPlan.OpenAIBaseURL), nil
+			}
+		}
 		if (info.RelayFormat == types.RelayFormatClaude || info.RelayFormat == types.RelayFormatGemini) &&
 			info.RelayMode != relayconstant.RelayModeResponses &&
 			info.RelayMode != relayconstant.RelayModeResponsesCompact {
@@ -220,7 +235,7 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, header *http.Header, info *
 			header.Set("HTTP-Referer", "https://www.newapi.ai")
 		}
 		if header.Get("X-OpenRouter-Title") == "" {
-			header.Set("X-OpenRouter-Title", "New API")
+			header.Set("X-OpenRouter-Title", "ModelsToken")
 		}
 	}
 	return nil
@@ -310,18 +325,20 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 		}

 	}
-	if strings.HasPrefix(info.UpstreamModelName, "o") || strings.HasPrefix(info.UpstreamModelName, "gpt-5") {
+	isOModel := dto.IsOpenAIReasoningOModel(info.UpstreamModelName)
+	isGPT5Model := dto.IsOpenAIGPT5Model(info.UpstreamModelName)
+	if isOModel || isGPT5Model {
 		if lo.FromPtrOr(request.MaxCompletionTokens, uint(0)) == 0 && lo.FromPtrOr(request.MaxTokens, uint(0)) != 0 {
 			request.MaxCompletionTokens = request.MaxTokens
 			request.MaxTokens = nil
 		}

-		if strings.HasPrefix(info.UpstreamModelName, "o") {
+		if isOModel {
 			request.Temperature = nil
 		}

 		// gpt-5系列模型适配 归零不再支持的参数
-		if strings.HasPrefix(info.UpstreamModelName, "gpt-5") {
+		if isGPT5Model {
 			request.Temperature = nil
 			request.TopP = nil
 			request.LogProbs = nil
@@ -437,10 +454,13 @@ func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInf
 		// 使用已解析的 multipart 表单，避免重复解析
 		mf := c.Request.MultipartForm
 		if mf == nil {
-			if _, err := c.MultipartForm(); err != nil {
-				return nil, errors.New("failed to parse multipart form")
+			form, err := common.ParseMultipartFormReusable(c)
+			if err != nil {
+				return nil, fmt.Errorf("failed to parse multipart form: %w", err)
 			}
-			mf = c.Request.MultipartForm
+			c.Request.MultipartForm = form
+			c.Request.PostForm = url.Values(form.Value)
+			mf = form
 		}

 		// 写入所有非文件字段
@@ -623,7 +643,11 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
 	case relayconstant.RelayModeAudioTranscription:
 		err, usage = OpenaiSTTHandler(c, resp, info, a.ResponseFormat)
 	case relayconstant.RelayModeImagesGenerations, relayconstant.RelayModeImagesEdits:
-		usage, err = OpenaiHandlerWithUsage(c, info, resp)
+		if info.IsStream {
+			usage, err = OpenaiImageStreamHandler(c, info, resp)
+		} else {
+			usage, err = OpenaiImageHandler(c, info, resp)
+		}
 	case relayconstant.RelayModeRerank:
 		usage, err = common_handler.RerankHandler(c, info, resp)
 	case relayconstant.RelayModeResponses:
@@ -0,0 +1,98 @@
+package openai
+
+import (
+	"bytes"
+	"io"
+	"mime/multipart"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/dto"
+	relaycommon "github.com/QuantumNous/new-api/relay/common"
+	relayconstant "github.com/QuantumNous/new-api/relay/constant"
+	"github.com/gin-gonic/gin"
+	"github.com/stretchr/testify/require"
+)
+
+// TestConvertImageEditRequestMultipart verifies that ConvertImageRequest
+// re-serializes multipart image edit requests with all fields (including
+// stream) and the file intact, both when the form was already parsed and when
+// it must be re-parsed from the reusable body.
+func TestConvertImageEditRequestMultipart(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	newMultipartContext := func(t *testing.T, prompt string) *gin.Context {
+		var body bytes.Buffer
+		writer := multipart.NewWriter(&body)
+		require.NoError(t, writer.WriteField("model", "gpt-image-1"))
+		require.NoError(t, writer.WriteField("prompt", prompt))
+		require.NoError(t, writer.WriteField("stream", "true"))
+		require.NoError(t, writer.WriteField("partial_images", "3"))
+		part, err := writer.CreateFormFile("image", "input.png")
+		require.NoError(t, err)
+		_, err = part.Write([]byte("fake image"))
+		require.NoError(t, err)
+		require.NoError(t, writer.Close())
+
+		c, _ := gin.CreateTestContext(httptest.NewRecorder())
+		c.Request = httptest.NewRequest(http.MethodPost, "/v1/images/edits", &body)
+		c.Request.Header.Set("Content-Type", writer.FormDataContentType())
+		return c
+	}
+
+	convertAndReplay := func(t *testing.T, c *gin.Context, prompt string) {
+		info := &relaycommon.RelayInfo{
+			RelayMode: relayconstant.RelayModeImagesEdits,
+		}
+		request := dto.ImageRequest{
+			Model:  "gpt-image-1",
+			Prompt: prompt,
+			Stream: common.GetPointer(true),
+		}
+
+		converted, err := (&Adaptor{}).ConvertImageRequest(c, info, request)
+		require.NoError(t, err)
+		convertedBody, ok := converted.(*bytes.Buffer)
+		require.True(t, ok)
+
+		replayedRequest := httptest.NewRequest(http.MethodPost, "/v1/images/edits", bytes.NewReader(convertedBody.Bytes()))
+		replayedRequest.Header.Set("Content-Type", c.Request.Header.Get("Content-Type"))
+		require.NoError(t, replayedRequest.ParseMultipartForm(32<<20))
+
+		require.Equal(t, "gpt-image-1", replayedRequest.PostForm.Get("model"))
+		require.Equal(t, prompt, replayedRequest.PostForm.Get("prompt"))
+		require.Equal(t, "true", replayedRequest.PostForm.Get("stream"))
+		require.Equal(t, "3", replayedRequest.PostForm.Get("partial_images"))
+		require.Len(t, replayedRequest.MultipartForm.File["image"], 1)
+
+		file, err := replayedRequest.MultipartForm.File["image"][0].Open()
+		require.NoError(t, err)
+		defer file.Close()
+		fileBytes, err := io.ReadAll(file)
+		require.NoError(t, err)
+		require.Equal(t, []byte("fake image"), fileBytes)
+	}
+
+	t.Run("with pre-parsed form", func(t *testing.T) {
+		prompt := "edit this image"
+		c := newMultipartContext(t, prompt)
+		require.NoError(t, c.Request.ParseMultipartForm(32<<20))
+
+		convertAndReplay(t, c, prompt)
+	})
+
+	t.Run("re-parses reusable body when form is missing", func(t *testing.T) {
+		prompt := "edit without pre-parsed form"
+		c := newMultipartContext(t, prompt)
+
+		storage, err := common.GetBodyStorage(c)
+		require.NoError(t, err)
+		c.Request.Body = io.NopCloser(storage)
+		c.Request.MultipartForm = nil
+		c.Request.PostForm = nil
+
+		convertAndReplay(t, c, prompt)
+	})
+}
@@ -0,0 +1,173 @@
+package openai
+
+import (
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"github.com/QuantumNous/new-api/constant"
+	relaycommon "github.com/QuantumNous/new-api/relay/common"
+	"github.com/gin-gonic/gin"
+	"github.com/stretchr/testify/require"
+)
+
+func newImageTestContext(t *testing.T, body, contentType string, isStream bool) (*gin.Context, *httptest.ResponseRecorder, *http.Response, *relaycommon.RelayInfo) {
+	t.Helper()
+
+	recorder := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(recorder)
+	c.Request = httptest.NewRequest(http.MethodPost, "/v1/images/generations", nil)
+
+	resp := &http.Response{
+		StatusCode: http.StatusOK,
+		Body:       io.NopCloser(strings.NewReader(body)),
+		Header:     http.Header{"Content-Type": []string{contentType}},
+	}
+	info := &relaycommon.RelayInfo{
+		ChannelMeta: &relaycommon.ChannelMeta{},
+		IsStream:    isStream,
+	}
+	return c, recorder, resp, info
+}
+
+// TestOpenaiImageStreamHandlerForwardsSSEAndUsage covers the core SSE path:
+// chunks are forwarded with rebuilt event lines, usage is extracted and
+// normalized (input_tokens -> prompt_tokens with details), and [DONE] is
+// re-emitted to the client.
+func TestOpenaiImageStreamHandlerForwardsSSEAndUsage(t *testing.T) {
+	oldMode := gin.Mode()
+	gin.SetMode(gin.TestMode)
+	t.Cleanup(func() { gin.SetMode(oldMode) })
+
+	oldTimeout := constant.StreamingTimeout
+	constant.StreamingTimeout = 30
+	t.Cleanup(func() { constant.StreamingTimeout = oldTimeout })
+
+	body := strings.Join([]string{
+		`event: image_generation.partial_image`,
+		`data: {"type":"image_generation.partial_image","b64_json":"partial"}`,
+		``,
+		`data: {"usage":{"input_tokens":3,"output_tokens":4,"total_tokens":7,"input_tokens_details":{"image_tokens":2,"text_tokens":1}}}`,
+		``,
+		`data: [DONE]`,
+		``,
+	}, "\n")
+
+	c, recorder, resp, info := newImageTestContext(t, body, "text/event-stream", true)
+
+	usage, err := OpenaiImageStreamHandler(c, info, resp)
+	require.Nil(t, err)
+	require.Equal(t, 3, usage.PromptTokens)
+	require.Equal(t, 4, usage.CompletionTokens)
+	require.Equal(t, 7, usage.TotalTokens)
+	require.Equal(t, 2, usage.PromptTokensDetails.ImageTokens)
+	require.Equal(t, 1, usage.PromptTokensDetails.TextTokens)
+	require.Contains(t, recorder.Body.String(), `event: image_generation.partial_image`)
+	require.Contains(t, recorder.Body.String(), `data: {"type":"image_generation.partial_image","b64_json":"partial"}`)
+	require.Contains(t, recorder.Body.String(), `data: {"usage":{"input_tokens":3,"output_tokens":4,"total_tokens":7,"input_tokens_details":{"image_tokens":2,"text_tokens":1}}}`)
+	require.Contains(t, recorder.Body.String(), `data: [DONE]`)
+	require.Equal(t, "text/event-stream", recorder.Header().Get("Content-Type"))
+}
+
+// TestOpenaiImageStreamHandlerWrapsJSONResponse covers the non-SSE fallback:
+// a JSON upstream response is wrapped into pseudo-SSE completed events.
+func TestOpenaiImageStreamHandlerWrapsJSONResponse(t *testing.T) {
+	oldMode := gin.Mode()
+	gin.SetMode(gin.TestMode)
+	t.Cleanup(func() { gin.SetMode(oldMode) })
+
+	body := `{"created":1710000000,"data":[{"b64_json":"final","revised_prompt":"draw a cat"}],"usage":{"input_tokens":3,"output_tokens":4,"total_tokens":7,"input_tokens_details":{"image_tokens":2,"text_tokens":1}}}`
+
+	c, recorder, resp, info := newImageTestContext(t, body, "application/json", true)
+
+	usage, err := OpenaiImageStreamHandler(c, info, resp)
+	require.Nil(t, err)
+	require.Equal(t, 3, usage.PromptTokens)
+	require.Equal(t, 4, usage.CompletionTokens)
+	require.Equal(t, 7, usage.TotalTokens)
+	require.Equal(t, 2, usage.PromptTokensDetails.ImageTokens)
+	require.Equal(t, 1, usage.PromptTokensDetails.TextTokens)
+	require.Equal(t, "text/event-stream", recorder.Header().Get("Content-Type"))
+	require.Empty(t, recorder.Header().Get("Content-Length"))
+	require.Contains(t, recorder.Body.String(), `event: image_generation.completed`)
+	require.Contains(t, recorder.Body.String(), `"type":"image_generation.completed"`)
+	require.Contains(t, recorder.Body.String(), `"b64_json":"final"`)
+	require.Contains(t, recorder.Body.String(), `"revised_prompt":"draw a cat"`)
+	require.Contains(t, recorder.Body.String(), `data: [DONE]`)
+}
+
+// TestOpenaiImageHandlersReturnJSONError covers JSON error responses for both
+// entry points: the non-streaming handler and the stream handler's non-SSE
+// fallback. Neither must leak the error body to the client.
+func TestOpenaiImageHandlersReturnJSONError(t *testing.T) {
+	oldMode := gin.Mode()
+	gin.SetMode(gin.TestMode)
+	t.Cleanup(func() { gin.SetMode(oldMode) })
+
+	body := `{"error":{"message":"content moderation failed","type":"upstream_error","code":"content_moderation_failed","status":502}}`
+
+	t.Run("non-streaming handler", func(t *testing.T) {
+		c, recorder, resp, info := newImageTestContext(t, body, "application/json", false)
+
+		usage, err := OpenaiImageHandler(c, info, resp)
+		require.Nil(t, usage)
+		require.NotNil(t, err)
+		require.Equal(t, http.StatusOK, err.StatusCode)
+		oaiError := err.ToOpenAIError()
+		require.Equal(t, "content moderation failed", oaiError.Message)
+		require.Equal(t, "upstream_error", oaiError.Type)
+		require.Equal(t, "content_moderation_failed", oaiError.Code)
+		require.Empty(t, recorder.Body.String())
+	})
+
+	t.Run("stream handler JSON fallback", func(t *testing.T) {
+		c, recorder, resp, info := newImageTestContext(t, body, "application/json", true)
+
+		usage, err := OpenaiImageStreamHandler(c, info, resp)
+		require.Nil(t, usage)
+		require.NotNil(t, err)
+		require.Equal(t, http.StatusOK, err.StatusCode)
+		require.Equal(t, "content moderation failed", err.ToOpenAIError().Message)
+		require.Empty(t, recorder.Body.String())
+	})
+}
+
+// TestOpenaiImageStreamHandlerRecordsUpstreamErrorEvent verifies that an error
+// event inside the SSE stream is recorded as a soft error while the payload is
+// still forwarded to the client.
+func TestOpenaiImageStreamHandlerRecordsUpstreamErrorEvent(t *testing.T) {
+	oldMode := gin.Mode()
+	gin.SetMode(gin.TestMode)
+	t.Cleanup(func() { gin.SetMode(oldMode) })
+
+	oldTimeout := constant.StreamingTimeout
+	constant.StreamingTimeout = 30
+	t.Cleanup(func() { constant.StreamingTimeout = oldTimeout })
+
+	body := strings.Join([]string{
+		`event: image_generation.partial_image`,
+		`data: {"type":"image_generation.partial_image","b64_json":"partial"}`,
+		``,
+		`event: error`,
+		`data: {"type":"upstream_error","error":{"message":"stream error: stream ID 77; INTERNAL_ERROR; received from peer"}}`,
+		``,
+	}, "\n")
+
+	c, recorder, resp, info := newImageTestContext(t, body, "text/event-stream", true)
+
+	usage, err := OpenaiImageStreamHandler(c, info, resp)
+	require.Nil(t, err)
+	require.NotNil(t, usage)
+	require.NotNil(t, info.StreamStatus)
+	require.Equal(t, relaycommon.StreamEndReasonEOF, info.StreamStatus.EndReason)
+	require.True(t, info.StreamStatus.HasErrors())
+	require.Equal(t, 1, info.StreamStatus.TotalErrorCount())
+	require.Contains(t, info.StreamStatus.Errors[0].Message, "INTERNAL_ERROR")
+	// The scanner strips the upstream "event: error" line; the event name is
+	// rebuilt from the JSON "type" field (upstream_error). The error message
+	// is still forwarded in the data: payload (stream ID 77).
+	require.Contains(t, recorder.Body.String(), `event: upstream_error`)
+	require.Contains(t, recorder.Body.String(), `stream ID 77`)
+}
@@ -14,12 +14,9 @@ import (
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	"github.com/QuantumNous/new-api/relay/helper"
 	"github.com/QuantumNous/new-api/service"
-
 	"github.com/QuantumNous/new-api/types"

-	"github.com/bytedance/gopkg/util/gopool"
 	"github.com/gin-gonic/gin"
-	"github.com/gorilla/websocket"
 )

 func sendStreamData(c *gin.Context, info *relaycommon.RelayInfo, data string, forceFormat bool, thinkToContent bool) error {
@@ -293,421 +290,3 @@ func OpenaiHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Respo

 	return &simpleResponse.Usage, nil
 }
-
-func streamTTSResponse(c *gin.Context, resp *http.Response) {
-	c.Writer.WriteHeaderNow()
-
-	flusher, ok := c.Writer.(http.Flusher)
-	if !ok {
-		logger.LogWarn(c, "streaming not supported")
-		_, err := io.Copy(c.Writer, resp.Body)
-		if err != nil {
-			logger.LogWarn(c, err.Error())
-		}
-		return
-	}
-
-	buffer := make([]byte, 4096)
-	for {
-		n, err := resp.Body.Read(buffer)
-		//logger.LogInfo(c, fmt.Sprintf("streamTTSResponse read %d bytes", n))
-		if n > 0 {
-			if _, writeErr := c.Writer.Write(buffer[:n]); writeErr != nil {
-				logger.LogError(c, writeErr.Error())
-				break
-			}
-			flusher.Flush()
-		}
-		if err != nil {
-			if err != io.EOF {
-				logger.LogError(c, err.Error())
-			}
-			break
-		}
-	}
-}
-
-func OpenaiRealtimeHandler(c *gin.Context, info *relaycommon.RelayInfo) (*types.NewAPIError, *dto.RealtimeUsage) {
-	if info == nil || info.ClientWs == nil || info.TargetWs == nil {
-		return types.NewError(fmt.Errorf("invalid websocket connection"), types.ErrorCodeBadResponse), nil
-	}
-
-	info.IsStream = true
-	clientConn := info.ClientWs
-	targetConn := info.TargetWs
-
-	clientClosed := make(chan struct{})
-	targetClosed := make(chan struct{})
-	sendChan := make(chan []byte, 100)
-	receiveChan := make(chan []byte, 100)
-	errChan := make(chan error, 2)
-
-	usage := &dto.RealtimeUsage{}
-	localUsage := &dto.RealtimeUsage{}
-	sumUsage := &dto.RealtimeUsage{}
-
-	gopool.Go(func() {
-		defer func() {
-			if r := recover(); r != nil {
-				errChan <- fmt.Errorf("panic in client reader: %v", r)
-			}
-		}()
-		for {
-			select {
-			case <-c.Done():
-				return
-			default:
-				_, message, err := clientConn.ReadMessage()
-				if err != nil {
-					if !websocket.IsCloseError(err, websocket.CloseNormalClosure, websocket.CloseGoingAway) {
-						errChan <- fmt.Errorf("error reading from client: %v", err)
-					}
-					close(clientClosed)
-					return
-				}
-
-				realtimeEvent := &dto.RealtimeEvent{}
-				err = common.Unmarshal(message, realtimeEvent)
-				if err != nil {
-					errChan <- fmt.Errorf("error unmarshalling message: %v", err)
-					return
-				}
-
-				if realtimeEvent.Type == dto.RealtimeEventTypeSessionUpdate {
-					if realtimeEvent.Session != nil {
-						if realtimeEvent.Session.Tools != nil {
-							info.RealtimeTools = realtimeEvent.Session.Tools
-						}
-					}
-				}
-
-				textToken, audioToken, err := service.CountTokenRealtime(info, *realtimeEvent, info.UpstreamModelName)
-				if err != nil {
-					errChan <- fmt.Errorf("error counting text token: %v", err)
-					return
-				}
-				logger.LogInfo(c, fmt.Sprintf("type: %s, textToken: %d, audioToken: %d", realtimeEvent.Type, textToken, audioToken))
-				localUsage.TotalTokens += textToken + audioToken
-				localUsage.InputTokens += textToken + audioToken
-				localUsage.InputTokenDetails.TextTokens += textToken
-				localUsage.InputTokenDetails.AudioTokens += audioToken
-
-				err = helper.WssString(c, targetConn, string(message))
-				if err != nil {
-					errChan <- fmt.Errorf("error writing to target: %v", err)
-					return
-				}
-
-				select {
-				case sendChan <- message:
-				default:
-				}
-			}
-		}
-	})
-
-	gopool.Go(func() {
-		defer func() {
-			if r := recover(); r != nil {
-				errChan <- fmt.Errorf("panic in target reader: %v", r)
-			}
-		}()
-		for {
-			select {
-			case <-c.Done():
-				return
-			default:
-				_, message, err := targetConn.ReadMessage()
-				if err != nil {
-					if !websocket.IsCloseError(err, websocket.CloseNormalClosure, websocket.CloseGoingAway) {
-						errChan <- fmt.Errorf("error reading from target: %v", err)
-					}
-					close(targetClosed)
-					return
-				}
-				info.SetFirstResponseTime()
-				realtimeEvent := &dto.RealtimeEvent{}
-				err = common.Unmarshal(message, realtimeEvent)
-				if err != nil {
-					errChan <- fmt.Errorf("error unmarshalling message: %v", err)
-					return
-				}
-
-				if realtimeEvent.Type == dto.RealtimeEventTypeResponseDone {
-					realtimeUsage := realtimeEvent.Response.Usage
-					if realtimeUsage != nil {
-						usage.TotalTokens += realtimeUsage.TotalTokens
-						usage.InputTokens += realtimeUsage.InputTokens
-						usage.OutputTokens += realtimeUsage.OutputTokens
-						usage.InputTokenDetails.AudioTokens += realtimeUsage.InputTokenDetails.AudioTokens
-						usage.InputTokenDetails.CachedTokens += realtimeUsage.InputTokenDetails.CachedTokens
-						usage.InputTokenDetails.TextTokens += realtimeUsage.InputTokenDetails.TextTokens
-						usage.OutputTokenDetails.AudioTokens += realtimeUsage.OutputTokenDetails.AudioTokens
-						usage.OutputTokenDetails.TextTokens += realtimeUsage.OutputTokenDetails.TextTokens
-						err := preConsumeUsage(c, info, usage, sumUsage)
-						if err != nil {
-							errChan <- fmt.Errorf("error consume usage: %v", err)
-							return
-						}
-						// 本次计费完成，清除
-						usage = &dto.RealtimeUsage{}
-
-						localUsage = &dto.RealtimeUsage{}
-					} else {
-						textToken, audioToken, err := service.CountTokenRealtime(info, *realtimeEvent, info.UpstreamModelName)
-						if err != nil {
-							errChan <- fmt.Errorf("error counting text token: %v", err)
-							return
-						}
-						logger.LogInfo(c, fmt.Sprintf("type: %s, textToken: %d, audioToken: %d", realtimeEvent.Type, textToken, audioToken))
-						localUsage.TotalTokens += textToken + audioToken
-						info.IsFirstRequest = false
-						localUsage.InputTokens += textToken + audioToken
-						localUsage.InputTokenDetails.TextTokens += textToken
-						localUsage.InputTokenDetails.AudioTokens += audioToken
-						err = preConsumeUsage(c, info, localUsage, sumUsage)
-						if err != nil {
-							errChan <- fmt.Errorf("error consume usage: %v", err)
-							return
-						}
-						// 本次计费完成，清除
-						localUsage = &dto.RealtimeUsage{}
-						// print now usage
-					}
-					logger.LogInfo(c, fmt.Sprintf("realtime streaming sumUsage: %v", sumUsage))
-					logger.LogInfo(c, fmt.Sprintf("realtime streaming localUsage: %v", localUsage))
-					logger.LogInfo(c, fmt.Sprintf("realtime streaming localUsage: %v", localUsage))
-
-				} else if realtimeEvent.Type == dto.RealtimeEventTypeSessionUpdated || realtimeEvent.Type == dto.RealtimeEventTypeSessionCreated {
-					realtimeSession := realtimeEvent.Session
-					if realtimeSession != nil {
-						// update audio format
-						info.InputAudioFormat = common.GetStringIfEmpty(realtimeSession.InputAudioFormat, info.InputAudioFormat)
-						info.OutputAudioFormat = common.GetStringIfEmpty(realtimeSession.OutputAudioFormat, info.OutputAudioFormat)
-					}
-				} else {
-					textToken, audioToken, err := service.CountTokenRealtime(info, *realtimeEvent, info.UpstreamModelName)
-					if err != nil {
-						errChan <- fmt.Errorf("error counting text token: %v", err)
-						return
-					}
-					logger.LogInfo(c, fmt.Sprintf("type: %s, textToken: %d, audioToken: %d", realtimeEvent.Type, textToken, audioToken))
-					localUsage.TotalTokens += textToken + audioToken
-					localUsage.OutputTokens += textToken + audioToken
-					localUsage.OutputTokenDetails.TextTokens += textToken
-					localUsage.OutputTokenDetails.AudioTokens += audioToken
-				}
-
-				err = helper.WssString(c, clientConn, string(message))
-				if err != nil {
-					errChan <- fmt.Errorf("error writing to client: %v", err)
-					return
-				}
-
-				select {
-				case receiveChan <- message:
-				default:
-				}
-			}
-		}
-	})
-
-	select {
-	case <-clientClosed:
-	case <-targetClosed:
-	case err := <-errChan:
-		//return service.OpenAIErrorWrapper(err, "realtime_error", http.StatusInternalServerError), nil
-		logger.LogError(c, "realtime error: "+err.Error())
-	case <-c.Done():
-	}
-
-	if usage.TotalTokens != 0 {
-		_ = preConsumeUsage(c, info, usage, sumUsage)
-	}
-
-	if localUsage.TotalTokens != 0 {
-		_ = preConsumeUsage(c, info, localUsage, sumUsage)
-	}
-
-	// check usage total tokens, if 0, use local usage
-
-	return nil, sumUsage
-}
-
-func preConsumeUsage(ctx *gin.Context, info *relaycommon.RelayInfo, usage *dto.RealtimeUsage, totalUsage *dto.RealtimeUsage) error {
-	if usage == nil || totalUsage == nil {
-		return fmt.Errorf("invalid usage pointer")
-	}
-
-	totalUsage.TotalTokens += usage.TotalTokens
-	totalUsage.InputTokens += usage.InputTokens
-	totalUsage.OutputTokens += usage.OutputTokens
-	totalUsage.InputTokenDetails.CachedTokens += usage.InputTokenDetails.CachedTokens
-	totalUsage.InputTokenDetails.TextTokens += usage.InputTokenDetails.TextTokens
-	totalUsage.InputTokenDetails.AudioTokens += usage.InputTokenDetails.AudioTokens
-	totalUsage.OutputTokenDetails.TextTokens += usage.OutputTokenDetails.TextTokens
-	totalUsage.OutputTokenDetails.AudioTokens += usage.OutputTokenDetails.AudioTokens
-	// clear usage
-	err := service.PreWssConsumeQuota(ctx, info, usage)
-	return err
-}
-
-func OpenaiHandlerWithUsage(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
-	defer service.CloseResponseBodyGracefully(resp)
-
-	responseBody, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError)
-	}
-
-	var usageResp dto.SimpleResponse
-	err = common.Unmarshal(responseBody, &usageResp)
-	if err != nil {
-		return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
-	}
-
-	// 写入新的 response body
-	service.IOCopyBytesGracefully(c, resp, responseBody)
-
-	// Once we've written to the client, we should not return errors anymore
-	// because the upstream has already consumed resources and returned content
-	// We should still perform billing even if parsing fails
-	// format
-	if usageResp.InputTokens > 0 {
-		usageResp.PromptTokens += usageResp.InputTokens
-	}
-	if usageResp.OutputTokens > 0 {
-		usageResp.CompletionTokens += usageResp.OutputTokens
-	}
-	if usageResp.InputTokensDetails != nil {
-		usageResp.PromptTokensDetails.ImageTokens += usageResp.InputTokensDetails.ImageTokens
-		usageResp.PromptTokensDetails.TextTokens += usageResp.InputTokensDetails.TextTokens
-	}
-	applyUsagePostProcessing(info, &usageResp.Usage, responseBody)
-	return &usageResp.Usage, nil
-}
-
-func applyUsagePostProcessing(info *relaycommon.RelayInfo, usage *dto.Usage, responseBody []byte) {
-	if info == nil || usage == nil {
-		return
-	}
-
-	switch info.ChannelType {
-	case constant.ChannelTypeDeepSeek:
-		if usage.PromptTokensDetails.CachedTokens == 0 && usage.PromptCacheHitTokens != 0 {
-			usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens
-		}
-	case constant.ChannelTypeZhipu_v4:
-		// 智普的cached_tokens在标准位置: usage.prompt_tokens_details.cached_tokens
-		if usage.PromptTokensDetails.CachedTokens == 0 {
-			if usage.InputTokensDetails != nil && usage.InputTokensDetails.CachedTokens > 0 {
-				usage.PromptTokensDetails.CachedTokens = usage.InputTokensDetails.CachedTokens
-			} else if cachedTokens, ok := extractCachedTokensFromBody(responseBody); ok {
-				usage.PromptTokensDetails.CachedTokens = cachedTokens
-			} else if usage.PromptCacheHitTokens > 0 {
-				usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens
-			}
-		}
-	case constant.ChannelTypeMoonshot:
-		// Moonshot的cached_tokens在非标准位置: choices[].usage.cached_tokens
-		if usage.PromptTokensDetails.CachedTokens == 0 {
-			if usage.InputTokensDetails != nil && usage.InputTokensDetails.CachedTokens > 0 {
-				usage.PromptTokensDetails.CachedTokens = usage.InputTokensDetails.CachedTokens
-			} else if cachedTokens, ok := extractMoonshotCachedTokensFromBody(responseBody); ok {
-				usage.PromptTokensDetails.CachedTokens = cachedTokens
-			} else if cachedTokens, ok := extractCachedTokensFromBody(responseBody); ok {
-				usage.PromptTokensDetails.CachedTokens = cachedTokens
-			} else if usage.PromptCacheHitTokens > 0 {
-				usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens
-			}
-		}
-	case constant.ChannelTypeOpenAI:
-		if usage.PromptTokensDetails.CachedTokens == 0 {
-			if cachedTokens, ok := extractLlamaCachedTokensFromBody(responseBody); ok {
-				usage.PromptTokensDetails.CachedTokens = cachedTokens
-			}
-		}
-	}
-}
-
-func extractCachedTokensFromBody(body []byte) (int, bool) {
-	if len(body) == 0 {
-		return 0, false
-	}
-
-	var payload struct {
-		Usage struct {
-			PromptTokensDetails struct {
-				CachedTokens *int `json:"cached_tokens"`
-			} `json:"prompt_tokens_details"`
-			CachedTokens         *int `json:"cached_tokens"`
-			PromptCacheHitTokens *int `json:"prompt_cache_hit_tokens"`
-		} `json:"usage"`
-	}
-
-	if err := common.Unmarshal(body, &payload); err != nil {
-		return 0, false
-	}
-
-	if payload.Usage.PromptTokensDetails.CachedTokens != nil {
-		return *payload.Usage.PromptTokensDetails.CachedTokens, true
-	}
-	if payload.Usage.CachedTokens != nil {
-		return *payload.Usage.CachedTokens, true
-	}
-	if payload.Usage.PromptCacheHitTokens != nil {
-		return *payload.Usage.PromptCacheHitTokens, true
-	}
-	return 0, false
-}
-
-// extractMoonshotCachedTokensFromBody 从Moonshot的非标准位置提取cached_tokens
-// Moonshot的流式响应格式: {"choices":[{"usage":{"cached_tokens":111}}]}
-func extractMoonshotCachedTokensFromBody(body []byte) (int, bool) {
-	if len(body) == 0 {
-		return 0, false
-	}
-
-	var payload struct {
-		Choices []struct {
-			Usage struct {
-				CachedTokens *int `json:"cached_tokens"`
-			} `json:"usage"`
-		} `json:"choices"`
-	}
-
-	if err := common.Unmarshal(body, &payload); err != nil {
-		return 0, false
-	}
-
-	// 遍历choices查找cached_tokens
-	for _, choice := range payload.Choices {
-		if choice.Usage.CachedTokens != nil && *choice.Usage.CachedTokens > 0 {
-			return *choice.Usage.CachedTokens, true
-		}
-	}
-
-	return 0, false
-}
-
-// extractLlamaCachedTokensFromBody 从llama.cpp的非标准位置提取cache_n
-func extractLlamaCachedTokensFromBody(body []byte) (int, bool) {
-	if len(body) == 0 {
-		return 0, false
-	}
-
-	var payload struct {
-		Timings struct {
-			CachedTokens *int `json:"cache_n"`
-		} `json:"timings"`
-	}
-
-	if err := common.Unmarshal(body, &payload); err != nil {
-		return 0, false
-	}
-
-	if payload.Timings.CachedTokens == nil {
-		return 0, false
-	}
-	return *payload.Timings.CachedTokens, true
-}
@@ -0,0 +1,287 @@
+package openai
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/dto"
+	"github.com/QuantumNous/new-api/logger"
+	relaycommon "github.com/QuantumNous/new-api/relay/common"
+	"github.com/QuantumNous/new-api/relay/helper"
+	"github.com/QuantumNous/new-api/service"
+	"github.com/QuantumNous/new-api/types"
+
+	"github.com/gin-gonic/gin"
+)
+
+// OpenaiImageHandler handles non-streaming OpenAI image responses
+// (generations/edits), returning the parsed usage for billing.
+func OpenaiImageHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
+	defer service.CloseResponseBodyGracefully(resp)
+
+	responseBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError)
+	}
+
+	var usageResp dto.SimpleResponse
+	err = common.Unmarshal(responseBody, &usageResp)
+	if err != nil {
+		return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
+	}
+
+	if oaiError := usageResp.GetOpenAIError(); oaiError != nil && oaiError.Type != "" {
+		return nil, types.WithOpenAIError(*oaiError, resp.StatusCode)
+	}
+
+	// 写入新的 response body
+	service.IOCopyBytesGracefully(c, resp, responseBody)
+
+	normalizeOpenAIUsage(&usageResp.Usage)
+	applyUsagePostProcessing(info, &usageResp.Usage, responseBody)
+	return &usageResp.Usage, nil
+}
+
+// normalizeOpenAIUsage maps the OpenAI Images usage shape (input_tokens /
+// output_tokens / input_tokens_details) onto the canonical prompt/completion
+// fields. It is used only on the OpenAI image relay paths (generations/edits,
+// streaming and non-streaming): the image API never returns prompt_tokens /
+// completion_tokens, so the overwrite (=) semantics here are equivalent to the
+// previous additive (+=) behavior while avoiding any future double-counting if
+// both field sets are ever populated. Do not reuse this on chat/embedding paths
+// without revisiting the overwrite semantics.
+func normalizeOpenAIUsage(usage *dto.Usage) {
+	if usage == nil {
+		return
+	}
+	if usage.InputTokens != 0 {
+		usage.PromptTokens = usage.InputTokens
+	}
+	if usage.OutputTokens != 0 {
+		usage.CompletionTokens = usage.OutputTokens
+	}
+	if usage.InputTokensDetails != nil {
+		usage.PromptTokensDetails.CachedTokens = usage.InputTokensDetails.CachedTokens
+		usage.PromptTokensDetails.CachedCreationTokens = usage.InputTokensDetails.CachedCreationTokens
+		usage.PromptTokensDetails.ImageTokens = usage.InputTokensDetails.ImageTokens
+		usage.PromptTokensDetails.TextTokens = usage.InputTokensDetails.TextTokens
+		usage.PromptTokensDetails.AudioTokens = usage.InputTokensDetails.AudioTokens
+	}
+	if usage.TotalTokens == 0 {
+		usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
+	}
+}
+
+func OpenaiImageStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
+	if resp == nil || resp.Body == nil {
+		logger.LogError(c, "invalid image stream response")
+		return nil, types.NewOpenAIError(fmt.Errorf("invalid response"), types.ErrorCodeBadResponse, http.StatusInternalServerError)
+	}
+
+	contentType := strings.ToLower(resp.Header.Get("Content-Type"))
+	if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
+		return OpenaiImageHandler(c, info, resp)
+	}
+	if !strings.Contains(contentType, "text/event-stream") {
+		return OpenaiImageJSONAsStreamHandler(c, info, resp)
+	}
+	// Reuse the shared streaming engine (helper.StreamScannerHandler) so the
+	// image streaming path gets the same ping keepalive, streaming-timeout
+	// watchdog, client-disconnect detection, panic recovery and goroutine
+	// cleanup as every other relay stream. The scanner delivers only the
+	// "data:" payload, so the SSE "event:" line is rebuilt from the JSON "type"
+	// field (real OpenAI image events keep event == type).
+	usage := &dto.Usage{}
+	var lastStreamData []byte
+
+	helper.StreamScannerHandler(c, resp, info, func(data string, sr *helper.StreamResult) {
+		raw := common.StringToByteSlice(data)
+		lastStreamData = raw
+		if isOpenAIImageStreamErrorEvent(raw) {
+			// Record the error as a soft error; the scanner drives the final
+			// EndReason. HasErrors() flags the failure for logging/handling.
+			sr.Error(fmt.Errorf("%s", extractOpenAIImageStreamErrorMessage(raw)))
+		}
+		var usageResp dto.SimpleResponse
+		if err := common.Unmarshal(raw, &usageResp); err == nil {
+			normalizeOpenAIUsage(&usageResp.Usage)
+			if service.ValidUsage(&usageResp.Usage) {
+				usage = &usageResp.Usage
+			}
+		}
+		writeOpenaiImageStreamChunk(c, raw)
+	})
+
+	// StreamScannerHandler consumes the upstream [DONE]; re-emit it so the
+	// client still receives a terminal data: [DONE].
+	if info != nil && info.StreamStatus != nil && info.StreamStatus.EndReason == relaycommon.StreamEndReasonDone {
+		helper.Done(c)
+	}
+
+	applyUsagePostProcessing(info, usage, lastStreamData)
+	return usage, nil
+}
+
+// writeOpenaiImageStreamChunk rebuilds the SSE frame for an image stream chunk:
+// it emits an "event:" line derived from the JSON "type" field (when present)
+// followed by the verbatim "data:" payload, mirroring helper.ResponseChunkData.
+func writeOpenaiImageStreamChunk(c *gin.Context, data []byte) {
+	var payload struct {
+		Type string `json:"type"`
+	}
+	_ = common.Unmarshal(data, &payload)
+	if eventName := strings.TrimSpace(payload.Type); eventName != "" {
+		c.Render(-1, common.CustomEvent{Data: fmt.Sprintf("event: %s\n", eventName)})
+	}
+	c.Render(-1, common.CustomEvent{Data: "data: " + string(data)})
+	_ = helper.FlushWriter(c)
+}
+
+// isOpenAIImageStreamErrorEvent detects upstream error chunks by JSON content
+// only ("type" of error/upstream_error, or a non-empty "error" field). The SSE
+// "event:" line is not available here: StreamScannerHandler delivers only the
+// "data:" payload. A payload carrying just a "message" key is deliberately NOT
+// treated as an error to avoid false positives.
+func isOpenAIImageStreamErrorEvent(data []byte) bool {
+	if !json.Valid(data) {
+		return false
+	}
+	var payload struct {
+		Type  string          `json:"type"`
+		Error json.RawMessage `json:"error"`
+	}
+	if err := common.Unmarshal(data, &payload); err != nil {
+		return false
+	}
+	payloadType := strings.ToLower(strings.TrimSpace(payload.Type))
+	return payloadType == "error" || payloadType == "upstream_error" || len(payload.Error) > 0
+}
+
+func extractOpenAIImageStreamErrorMessage(data []byte) string {
+	if len(data) == 0 || !json.Valid(data) {
+		return "upstream image stream returned error event"
+	}
+	var payload struct {
+		Message string          `json:"message"`
+		Error   json.RawMessage `json:"error"`
+	}
+	if err := common.Unmarshal(data, &payload); err != nil {
+		return "upstream image stream returned error event"
+	}
+	if msg := strings.TrimSpace(payload.Message); msg != "" {
+		return msg
+	}
+	if len(payload.Error) > 0 {
+		var nested struct {
+			Message string `json:"message"`
+		}
+		if err := common.Unmarshal(payload.Error, &nested); err == nil {
+			if msg := strings.TrimSpace(nested.Message); msg != "" {
+				return msg
+			}
+		}
+		if msg := strings.TrimSpace(common.JsonRawMessageToString(payload.Error)); msg != "" {
+			return msg
+		}
+	}
+	return "upstream image stream returned error event"
+}
+
+func OpenaiImageJSONAsStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
+	defer service.CloseResponseBodyGracefully(resp)
+
+	responseBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError)
+	}
+
+	var imageResp dto.ImageResponse
+	if err := common.Unmarshal(responseBody, &imageResp); err != nil {
+		return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
+	}
+
+	var usageResp dto.SimpleResponse
+	_ = common.Unmarshal(responseBody, &usageResp)
+	if oaiError := usageResp.GetOpenAIError(); oaiError != nil && oaiError.Type != "" {
+		return nil, types.WithOpenAIError(*oaiError, resp.StatusCode)
+	}
+	normalizeOpenAIUsage(&usageResp.Usage)
+	applyUsagePostProcessing(info, &usageResp.Usage, responseBody)
+
+	helper.SetEventStreamHeaders(c)
+	c.Status(http.StatusOK)
+
+	created := imageResp.Created
+	if created == 0 {
+		created = time.Now().Unix()
+	}
+	if info != nil {
+		info.SetFirstResponseTime()
+	}
+	for _, image := range imageResp.Data {
+		payload := map[string]any{
+			"type":       "image_generation.completed",
+			"created_at": created,
+		}
+		if image.Url != "" {
+			payload["url"] = image.Url
+		}
+		if image.B64Json != "" {
+			payload["b64_json"] = image.B64Json
+		}
+		if image.RevisedPrompt != "" {
+			payload["revised_prompt"] = image.RevisedPrompt
+		}
+		if service.ValidUsage(&usageResp.Usage) {
+			payload["usage"] = usageResp.Usage
+		}
+		if err := writeOpenaiImageStreamPayload(c, "image_generation.completed", payload); err != nil {
+			if info != nil && info.StreamStatus != nil {
+				info.StreamStatus.SetEndReason(relaycommon.StreamEndReasonClientGone, err)
+			}
+			return &usageResp.Usage, nil
+		}
+	}
+	if err := writeOpenaiImageStreamDone(c); err != nil {
+		if info != nil && info.StreamStatus != nil {
+			info.StreamStatus.SetEndReason(relaycommon.StreamEndReasonClientGone, err)
+		}
+		return &usageResp.Usage, nil
+	}
+	if info != nil {
+		info.ReceivedResponseCount += len(imageResp.Data)
+		if info.StreamStatus == nil {
+			info.StreamStatus = relaycommon.NewStreamStatus()
+		}
+		info.StreamStatus.SetEndReason(relaycommon.StreamEndReasonDone, nil)
+	}
+	return &usageResp.Usage, nil
+}
+
+func writeOpenaiImageStreamPayload(c *gin.Context, eventName string, payload any) error {
+	data, err := common.Marshal(payload)
+	if err != nil {
+		return err
+	}
+	if eventName != "" {
+		if _, err := fmt.Fprintf(c.Writer, "event: %s\n", eventName); err != nil {
+			return err
+		}
+	}
+	if _, err := fmt.Fprintf(c.Writer, "data: %s\n\n", data); err != nil {
+		return err
+	}
+	return helper.FlushWriter(c)
+}
+
+func writeOpenaiImageStreamDone(c *gin.Context) error {
+	if _, err := fmt.Fprint(c.Writer, "data: [DONE]\n\n"); err != nil {
+		return err
+	}
+	return helper.FlushWriter(c)
+}
@@ -0,0 +1,242 @@
+package openai
+
+import (
+	"fmt"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/dto"
+	"github.com/QuantumNous/new-api/logger"
+	relaycommon "github.com/QuantumNous/new-api/relay/common"
+	"github.com/QuantumNous/new-api/relay/helper"
+	"github.com/QuantumNous/new-api/service"
+	"github.com/QuantumNous/new-api/types"
+
+	"github.com/bytedance/gopkg/util/gopool"
+	"github.com/gin-gonic/gin"
+	"github.com/gorilla/websocket"
+)
+
+func OpenaiRealtimeHandler(c *gin.Context, info *relaycommon.RelayInfo) (*types.NewAPIError, *dto.RealtimeUsage) {
+	if info == nil || info.ClientWs == nil || info.TargetWs == nil {
+		return types.NewError(fmt.Errorf("invalid websocket connection"), types.ErrorCodeBadResponse), nil
+	}
+
+	info.IsStream = true
+	clientConn := info.ClientWs
+	targetConn := info.TargetWs
+
+	clientClosed := make(chan struct{})
+	targetClosed := make(chan struct{})
+	sendChan := make(chan []byte, 100)
+	receiveChan := make(chan []byte, 100)
+	errChan := make(chan error, 2)
+
+	usage := &dto.RealtimeUsage{}
+	localUsage := &dto.RealtimeUsage{}
+	sumUsage := &dto.RealtimeUsage{}
+
+	gopool.Go(func() {
+		defer func() {
+			if r := recover(); r != nil {
+				errChan <- fmt.Errorf("panic in client reader: %v", r)
+			}
+		}()
+		for {
+			select {
+			case <-c.Done():
+				return
+			default:
+				_, message, err := clientConn.ReadMessage()
+				if err != nil {
+					if !websocket.IsCloseError(err, websocket.CloseNormalClosure, websocket.CloseGoingAway) {
+						errChan <- fmt.Errorf("error reading from client: %v", err)
+					}
+					close(clientClosed)
+					return
+				}
+
+				realtimeEvent := &dto.RealtimeEvent{}
+				err = common.Unmarshal(message, realtimeEvent)
+				if err != nil {
+					errChan <- fmt.Errorf("error unmarshalling message: %v", err)
+					return
+				}
+
+				if realtimeEvent.Type == dto.RealtimeEventTypeSessionUpdate {
+					if realtimeEvent.Session != nil {
+						if realtimeEvent.Session.Tools != nil {
+							info.RealtimeTools = realtimeEvent.Session.Tools
+						}
+					}
+				}
+
+				textToken, audioToken, err := service.CountTokenRealtime(info, *realtimeEvent, info.UpstreamModelName)
+				if err != nil {
+					errChan <- fmt.Errorf("error counting text token: %v", err)
+					return
+				}
+				logger.LogInfo(c, fmt.Sprintf("type: %s, textToken: %d, audioToken: %d", realtimeEvent.Type, textToken, audioToken))
+				localUsage.TotalTokens += textToken + audioToken
+				localUsage.InputTokens += textToken + audioToken
+				localUsage.InputTokenDetails.TextTokens += textToken
+				localUsage.InputTokenDetails.AudioTokens += audioToken
+
+				err = helper.WssString(c, targetConn, string(message))
+				if err != nil {
+					errChan <- fmt.Errorf("error writing to target: %v", err)
+					return
+				}
+
+				select {
+				case sendChan <- message:
+				default:
+				}
+			}
+		}
+	})
+
+	gopool.Go(func() {
+		defer func() {
+			if r := recover(); r != nil {
+				errChan <- fmt.Errorf("panic in target reader: %v", r)
+			}
+		}()
+		for {
+			select {
+			case <-c.Done():
+				return
+			default:
+				_, message, err := targetConn.ReadMessage()
+				if err != nil {
+					if !websocket.IsCloseError(err, websocket.CloseNormalClosure, websocket.CloseGoingAway) {
+						errChan <- fmt.Errorf("error reading from target: %v", err)
+					}
+					close(targetClosed)
+					return
+				}
+				info.SetFirstResponseTime()
+				realtimeEvent := &dto.RealtimeEvent{}
+				err = common.Unmarshal(message, realtimeEvent)
+				if err != nil {
+					errChan <- fmt.Errorf("error unmarshalling message: %v", err)
+					return
+				}
+
+				if realtimeEvent.Type == dto.RealtimeEventTypeResponseDone {
+					realtimeUsage := realtimeEvent.Response.Usage
+					if realtimeUsage != nil {
+						usage.TotalTokens += realtimeUsage.TotalTokens
+						usage.InputTokens += realtimeUsage.InputTokens
+						usage.OutputTokens += realtimeUsage.OutputTokens
+						usage.InputTokenDetails.AudioTokens += realtimeUsage.InputTokenDetails.AudioTokens
+						usage.InputTokenDetails.CachedTokens += realtimeUsage.InputTokenDetails.CachedTokens
+						usage.InputTokenDetails.TextTokens += realtimeUsage.InputTokenDetails.TextTokens
+						usage.OutputTokenDetails.AudioTokens += realtimeUsage.OutputTokenDetails.AudioTokens
+						usage.OutputTokenDetails.TextTokens += realtimeUsage.OutputTokenDetails.TextTokens
+						err := preConsumeUsage(c, info, usage, sumUsage)
+						if err != nil {
+							errChan <- fmt.Errorf("error consume usage: %v", err)
+							return
+						}
+						// 本次计费完成，清除
+						usage = &dto.RealtimeUsage{}
+
+						localUsage = &dto.RealtimeUsage{}
+					} else {
+						textToken, audioToken, err := service.CountTokenRealtime(info, *realtimeEvent, info.UpstreamModelName)
+						if err != nil {
+							errChan <- fmt.Errorf("error counting text token: %v", err)
+							return
+						}
+						logger.LogInfo(c, fmt.Sprintf("type: %s, textToken: %d, audioToken: %d", realtimeEvent.Type, textToken, audioToken))
+						localUsage.TotalTokens += textToken + audioToken
+						info.IsFirstRequest = false
+						localUsage.InputTokens += textToken + audioToken
+						localUsage.InputTokenDetails.TextTokens += textToken
+						localUsage.InputTokenDetails.AudioTokens += audioToken
+						err = preConsumeUsage(c, info, localUsage, sumUsage)
+						if err != nil {
+							errChan <- fmt.Errorf("error consume usage: %v", err)
+							return
+						}
+						// 本次计费完成，清除
+						localUsage = &dto.RealtimeUsage{}
+						// print now usage
+					}
+					logger.LogInfo(c, fmt.Sprintf("realtime streaming sumUsage: %v", sumUsage))
+					logger.LogInfo(c, fmt.Sprintf("realtime streaming localUsage: %v", localUsage))
+					logger.LogInfo(c, fmt.Sprintf("realtime streaming localUsage: %v", localUsage))
+
+				} else if realtimeEvent.Type == dto.RealtimeEventTypeSessionUpdated || realtimeEvent.Type == dto.RealtimeEventTypeSessionCreated {
+					realtimeSession := realtimeEvent.Session
+					if realtimeSession != nil {
+						// update audio format
+						info.InputAudioFormat = common.GetStringIfEmpty(realtimeSession.InputAudioFormat, info.InputAudioFormat)
+						info.OutputAudioFormat = common.GetStringIfEmpty(realtimeSession.OutputAudioFormat, info.OutputAudioFormat)
+					}
+				} else {
+					textToken, audioToken, err := service.CountTokenRealtime(info, *realtimeEvent, info.UpstreamModelName)
+					if err != nil {
+						errChan <- fmt.Errorf("error counting text token: %v", err)
+						return
+					}
+					logger.LogInfo(c, fmt.Sprintf("type: %s, textToken: %d, audioToken: %d", realtimeEvent.Type, textToken, audioToken))
+					localUsage.TotalTokens += textToken + audioToken
+					localUsage.OutputTokens += textToken + audioToken
+					localUsage.OutputTokenDetails.TextTokens += textToken
+					localUsage.OutputTokenDetails.AudioTokens += audioToken
+				}
+
+				err = helper.WssString(c, clientConn, string(message))
+				if err != nil {
+					errChan <- fmt.Errorf("error writing to client: %v", err)
+					return
+				}
+
+				select {
+				case receiveChan <- message:
+				default:
+				}
+			}
+		}
+	})
+
+	select {
+	case <-clientClosed:
+	case <-targetClosed:
+	case err := <-errChan:
+		//return service.OpenAIErrorWrapper(err, "realtime_error", http.StatusInternalServerError), nil
+		logger.LogError(c, "realtime error: "+err.Error())
+	case <-c.Done():
+	}
+
+	if usage.TotalTokens != 0 {
+		_ = preConsumeUsage(c, info, usage, sumUsage)
+	}
+
+	if localUsage.TotalTokens != 0 {
+		_ = preConsumeUsage(c, info, localUsage, sumUsage)
+	}
+
+	// check usage total tokens, if 0, use local usage
+
+	return nil, sumUsage
+}
+
+func preConsumeUsage(ctx *gin.Context, info *relaycommon.RelayInfo, usage *dto.RealtimeUsage, totalUsage *dto.RealtimeUsage) error {
+	if usage == nil || totalUsage == nil {
+		return fmt.Errorf("invalid usage pointer")
+	}
+
+	totalUsage.TotalTokens += usage.TotalTokens
+	totalUsage.InputTokens += usage.InputTokens
+	totalUsage.OutputTokens += usage.OutputTokens
+	totalUsage.InputTokenDetails.CachedTokens += usage.InputTokenDetails.CachedTokens
+	totalUsage.InputTokenDetails.TextTokens += usage.InputTokenDetails.TextTokens
+	totalUsage.InputTokenDetails.AudioTokens += usage.InputTokenDetails.AudioTokens
+	totalUsage.OutputTokenDetails.TextTokens += usage.OutputTokenDetails.TextTokens
+	totalUsage.OutputTokenDetails.AudioTokens += usage.OutputTokenDetails.AudioTokens
+	// clear usage
+	err := service.PreWssConsumeQuota(ctx, info, usage)
+	return err
+}
@@ -0,0 +1,133 @@
+package openai
+
+import (
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/constant"
+	"github.com/QuantumNous/new-api/dto"
+	relaycommon "github.com/QuantumNous/new-api/relay/common"
+)
+
+func applyUsagePostProcessing(info *relaycommon.RelayInfo, usage *dto.Usage, responseBody []byte) {
+	if info == nil || usage == nil {
+		return
+	}
+
+	switch info.ChannelType {
+	case constant.ChannelTypeDeepSeek:
+		if usage.PromptTokensDetails.CachedTokens == 0 && usage.PromptCacheHitTokens != 0 {
+			usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens
+		}
+	case constant.ChannelTypeZhipu_v4:
+		// 智普的cached_tokens在标准位置: usage.prompt_tokens_details.cached_tokens
+		if usage.PromptTokensDetails.CachedTokens == 0 {
+			if usage.InputTokensDetails != nil && usage.InputTokensDetails.CachedTokens > 0 {
+				usage.PromptTokensDetails.CachedTokens = usage.InputTokensDetails.CachedTokens
+			} else if cachedTokens, ok := extractCachedTokensFromBody(responseBody); ok {
+				usage.PromptTokensDetails.CachedTokens = cachedTokens
+			} else if usage.PromptCacheHitTokens > 0 {
+				usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens
+			}
+		}
+	case constant.ChannelTypeMoonshot:
+		// Moonshot的cached_tokens在非标准位置: choices[].usage.cached_tokens
+		if usage.PromptTokensDetails.CachedTokens == 0 {
+			if usage.InputTokensDetails != nil && usage.InputTokensDetails.CachedTokens > 0 {
+				usage.PromptTokensDetails.CachedTokens = usage.InputTokensDetails.CachedTokens
+			} else if cachedTokens, ok := extractMoonshotCachedTokensFromBody(responseBody); ok {
+				usage.PromptTokensDetails.CachedTokens = cachedTokens
+			} else if cachedTokens, ok := extractCachedTokensFromBody(responseBody); ok {
+				usage.PromptTokensDetails.CachedTokens = cachedTokens
+			} else if usage.PromptCacheHitTokens > 0 {
+				usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens
+			}
+		}
+	case constant.ChannelTypeOpenAI:
+		if usage.PromptTokensDetails.CachedTokens == 0 {
+			if cachedTokens, ok := extractLlamaCachedTokensFromBody(responseBody); ok {
+				usage.PromptTokensDetails.CachedTokens = cachedTokens
+			}
+		}
+	}
+}
+
+func extractCachedTokensFromBody(body []byte) (int, bool) {
+	if len(body) == 0 {
+		return 0, false
+	}
+
+	var payload struct {
+		Usage struct {
+			PromptTokensDetails struct {
+				CachedTokens *int `json:"cached_tokens"`
+			} `json:"prompt_tokens_details"`
+			CachedTokens         *int `json:"cached_tokens"`
+			PromptCacheHitTokens *int `json:"prompt_cache_hit_tokens"`
+		} `json:"usage"`
+	}
+
+	if err := common.Unmarshal(body, &payload); err != nil {
+		return 0, false
+	}
+
+	if payload.Usage.PromptTokensDetails.CachedTokens != nil {
+		return *payload.Usage.PromptTokensDetails.CachedTokens, true
+	}
+	if payload.Usage.CachedTokens != nil {
+		return *payload.Usage.CachedTokens, true
+	}
+	if payload.Usage.PromptCacheHitTokens != nil {
+		return *payload.Usage.PromptCacheHitTokens, true
+	}
+	return 0, false
+}
+
+// extractMoonshotCachedTokensFromBody 从Moonshot的非标准位置提取cached_tokens
+// Moonshot的流式响应格式: {"choices":[{"usage":{"cached_tokens":111}}]}
+func extractMoonshotCachedTokensFromBody(body []byte) (int, bool) {
+	if len(body) == 0 {
+		return 0, false
+	}
+
+	var payload struct {
+		Choices []struct {
+			Usage struct {
+				CachedTokens *int `json:"cached_tokens"`
+			} `json:"usage"`
+		} `json:"choices"`
+	}
+
+	if err := common.Unmarshal(body, &payload); err != nil {
+		return 0, false
+	}
+
+	// 遍历choices查找cached_tokens
+	for _, choice := range payload.Choices {
+		if choice.Usage.CachedTokens != nil && *choice.Usage.CachedTokens > 0 {
+			return *choice.Usage.CachedTokens, true
+		}
+	}
+
+	return 0, false
+}
+
+// extractLlamaCachedTokensFromBody 从llama.cpp的非标准位置提取cache_n
+func extractLlamaCachedTokensFromBody(body []byte) (int, bool) {
+	if len(body) == 0 {
+		return 0, false
+	}
+
+	var payload struct {
+		Timings struct {
+			CachedTokens *int `json:"cache_n"`
+		} `json:"timings"`
+	}
+
+	if err := common.Unmarshal(body, &payload); err != nil {
+		return 0, false
+	}
+
+	if payload.Timings.CachedTokens == nil {
+		return 0, false
+	}
+	return *payload.Timings.CachedTokens, true
+}
@@ -92,7 +92,7 @@ func streamResponseTencent2OpenAI(TencentResponse *TencentChatResponse) *dto.Cha

 func tencentStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
 	var responseText string
-	scanner := bufio.NewScanner(resp.Body)
+	scanner := helper.NewStreamScanner(resp.Body)
 	scanner.Split(bufio.ScanLines)

 	helper.SetEventStreamHeaders(c)
@@ -45,6 +45,7 @@ var claudeModelMap = map[string]string{
 	"claude-opus-4-5-20251101":   "claude-opus-4-5@20251101",
 	"claude-opus-4-6":            "claude-opus-4-6",
 	"claude-opus-4-7":            "claude-opus-4-7",
+	"claude-opus-4-8":            "claude-opus-4-8",
 }

 const anthropicVersion = "vertex-2023-10-16"
@@ -114,7 +114,7 @@ func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, request
 func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
 	switch info.RelayMode {
 	case constant.RelayModeImagesGenerations, constant.RelayModeImagesEdits:
-		usage, err = openai.OpenaiHandlerWithUsage(c, info, resp)
+		usage, err = openai.OpenaiImageHandler(c, info, resp)
 	case constant.RelayModeResponses:
 		if info.IsStream {
 			usage, err = openai.OaiResponsesStreamHandler(c, info, resp)
@@ -157,7 +157,7 @@ func streamMetaResponseZhipu2OpenAI(zhipuResponse *ZhipuStreamMetaResponse) (*dt

 func zhipuStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
 	var usage *dto.Usage
-	scanner := bufio.NewScanner(resp.Body)
+	scanner := helper.NewStreamScanner(resp.Body)
 	scanner.Split(bufio.ScanLines)
 	dataChan := make(chan string)
 	metaChan := make(chan string)
@@ -180,6 +180,9 @@ func zhipuStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.
 				}
 			}
 		}
+		if err := scanner.Err(); err != nil {
+			common.SysLog("error reading stream: " + err.Error())
+		}
 		stopChan <- true
 	}()
 	helper.SetEventStreamHeaders(c)
@@ -53,14 +53,17 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
 	}

 	if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(request.Model); ok && effortLevel != "" &&
-		(strings.HasPrefix(request.Model, "claude-opus-4-6") || strings.HasPrefix(request.Model, "claude-opus-4-7")) {
+		(strings.HasPrefix(request.Model, "claude-opus-4-6") ||
+			strings.HasPrefix(request.Model, "claude-opus-4-7") ||
+			strings.HasPrefix(request.Model, "claude-opus-4-8")) {
 		request.Model = baseModel
 		request.Thinking = &dto.Thinking{
 			Type: "adaptive",
 		}
 		request.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel))
-		if strings.HasPrefix(request.Model, "claude-opus-4-7") {
-			// Opus 4.7 rejects non-default temperature/top_p/top_k with 400
+		if strings.HasPrefix(request.Model, "claude-opus-4-7") ||
+			strings.HasPrefix(request.Model, "claude-opus-4-8") {
+			// Opus 4.7/4.8 reject non-default temperature/top_p/top_k with 400
 			// and defaults display to "omitted"; restore the 4.6 visible summary.
 			request.Thinking.Display = "summarized"
 			request.Temperature = nil
@@ -74,8 +77,9 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
 		strings.HasSuffix(request.Model, "-thinking") {
 		if request.Thinking == nil {
 			baseModel := strings.TrimSuffix(request.Model, "-thinking")
-			if strings.HasPrefix(baseModel, "claude-opus-4-7") {
-				// Opus 4.7 rejects thinking.type="enabled"; use adaptive at high effort.
+			if strings.HasPrefix(baseModel, "claude-opus-4-7") ||
+				strings.HasPrefix(baseModel, "claude-opus-4-8") {
+				// Opus 4.7/4.8 reject thinking.type="enabled"; use adaptive at high effort.
 				request.Thinking = &dto.Thinking{Type: "adaptive", Display: "summarized"}
 				request.OutputConfig = json.RawMessage(`{"effort":"high"}`)
 				request.Temperature = nil
@@ -151,6 +155,7 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
 		if err != nil {
 			return types.NewErrorWithStatusCode(err, types.ErrorCodeReadRequestBodyFailed, http.StatusBadRequest, types.ErrOptionWithSkipRetry())
 		}
+		info.UpstreamRequestBodySize = storage.Size()
 		requestBody = common.ReaderOnly(storage)
 	} else {
 		convertedRequest, err := adaptor.ConvertClaudeRequest(c, info, request)
@@ -0,0 +1,71 @@
+package helper
+
+import (
+	"bytes"
+	"io"
+	"mime/multipart"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"testing"
+
+	"github.com/QuantumNous/new-api/common"
+	relayconstant "github.com/QuantumNous/new-api/relay/constant"
+	"github.com/gin-gonic/gin"
+	"github.com/stretchr/testify/require"
+)
+
+// TestGetAndValidOpenAIImageRequestMultipartStream verifies multipart image
+// edit parsing: the stream field is parsed and validated, and the request body
+// stays replayable for the upstream request.
+func TestGetAndValidOpenAIImageRequestMultipartStream(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	newContext := func(t *testing.T, streamValue string, withImage bool) (*gin.Context, string) {
+		var body bytes.Buffer
+		writer := multipart.NewWriter(&body)
+		require.NoError(t, writer.WriteField("model", "gpt-image-1"))
+		require.NoError(t, writer.WriteField("prompt", "edit this image"))
+		require.NoError(t, writer.WriteField("stream", streamValue))
+		if withImage {
+			part, err := writer.CreateFormFile("image", "input.png")
+			require.NoError(t, err)
+			_, err = part.Write([]byte("fake image"))
+			require.NoError(t, err)
+		}
+		require.NoError(t, writer.Close())
+		originalBody := body.String()
+
+		c, _ := gin.CreateTestContext(httptest.NewRecorder())
+		c.Request = httptest.NewRequest(http.MethodPost, "/v1/images/edits", &body)
+		c.Request.Header.Set("Content-Type", writer.FormDataContentType())
+		return c, originalBody
+	}
+
+	t.Run("valid stream value keeps body replayable", func(t *testing.T) {
+		c, originalBody := newContext(t, "true", true)
+
+		req, err := GetAndValidOpenAIImageRequest(c, relayconstant.RelayModeImagesEdits)
+		require.NoError(t, err)
+		require.NotNil(t, req.Stream)
+		require.True(t, *req.Stream)
+		require.True(t, req.IsStream(c))
+
+		bodyAfterValidation, err := io.ReadAll(c.Request.Body)
+		require.NoError(t, err)
+		require.Equal(t, originalBody, string(bodyAfterValidation))
+
+		form, err := common.ParseMultipartFormReusable(c)
+		require.NoError(t, err)
+		require.Equal(t, "true", url.Values(form.Value).Get("stream"))
+		require.Len(t, form.File["image"], 1)
+	})
+
+	t.Run("invalid stream value is rejected", func(t *testing.T) {
+		c, _ := newContext(t, "notabool", false)
+
+		_, err := GetAndValidOpenAIImageRequest(c, relayconstant.RelayModeImagesEdits)
+		require.Error(t, err)
+		require.Contains(t, err.Error(), "invalid stream value")
+	})
+}
@@ -22,8 +22,8 @@ import (
 )

 const (
-	InitialScannerBufferSize    = 64 << 10 // 64KB (64*1024)
-	DefaultMaxScannerBufferSize = 64 << 20 // 64MB (64*1024*1024) default SSE buffer size
+	InitialScannerBufferSize    = 64 << 10  // 64KB (64*1024)
+	DefaultMaxScannerBufferSize = 128 << 20 // 64MB (64*1024*1024) default SSE buffer size
 	DefaultPingInterval         = 10 * time.Second
 )

@@ -34,6 +34,12 @@ func getScannerBufferSize() int {
 	return DefaultMaxScannerBufferSize
 }

+func NewStreamScanner(reader io.Reader) *bufio.Scanner {
+	scanner := bufio.NewScanner(reader)
+	scanner.Buffer(make([]byte, InitialScannerBufferSize), getScannerBufferSize())
+	return scanner
+}
+
 func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo, dataHandler func(data string, sr *StreamResult)) {

 	if resp == nil || dataHandler == nil {
@@ -54,7 +60,7 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon

 	var (
 		stopChan   = make(chan bool, 3) // 增加缓冲区避免阻塞
-		scanner    = bufio.NewScanner(resp.Body)
+		scanner    = NewStreamScanner(resp.Body)
 		ticker     = time.NewTicker(streamingTimeout)
 		pingTicker *time.Ticker
 		writeMutex sync.Mutex     // Mutex to protect concurrent writes
@@ -104,7 +110,6 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
 		close(stopChan)
 	}()

-	scanner.Buffer(make([]byte, InitialScannerBufferSize), getScannerBufferSize())
 	scanner.Split(bufio.ScanLines)
 	SetEventStreamHeaders(c)

@@ -1,6 +1,7 @@
 package helper

 import (
+	"bufio"
 	"fmt"
 	"io"
 	"net/http"
@@ -81,6 +82,22 @@ func TestStreamScannerHandler_NilInputs(t *testing.T) {
 	StreamScannerHandler(c, &http.Response{Body: io.NopCloser(strings.NewReader(""))}, info, nil)
 }

+func TestNewStreamScanner_AllowsLargeStreamLine(t *testing.T) {
+	oldBufferMB := constant.StreamScannerMaxBufferMB
+	constant.StreamScannerMaxBufferMB = 1
+	t.Cleanup(func() {
+		constant.StreamScannerMaxBufferMB = oldBufferMB
+	})
+
+	payload := strings.Repeat("x", 128<<10)
+	scanner := NewStreamScanner(strings.NewReader("data: " + payload + "\n"))
+	scanner.Split(bufio.ScanLines)
+
+	require.True(t, scanner.Scan())
+	assert.Equal(t, "data: "+payload, scanner.Text())
+	require.NoError(t, scanner.Err())
+}
+
 func TestStreamScannerHandler_EmptyBody(t *testing.T) {
 	t.Parallel()

@@ -614,7 +631,7 @@ func TestStreamScannerHandler_StreamStatus_InitializedIfNil(t *testing.T) {
 	assert.NotNil(t, info.StreamStatus)
 }

-func TestStreamScannerHandler_StreamStatus_PreInitialized(t *testing.T) {
+func TestStreamScannerHandler_StreamStatus_ReplacesPreInitialized(t *testing.T) {
 	t.Parallel()

 	body := buildSSEBody(5)
@@ -626,7 +643,7 @@ func TestStreamScannerHandler_StreamStatus_PreInitialized(t *testing.T) {
 	StreamScannerHandler(c, resp, info, func(data string, sr *StreamResult) {})

 	assert.Equal(t, relaycommon.StreamEndReasonDone, info.StreamStatus.EndReason)
-	assert.Equal(t, 1, info.StreamStatus.TotalErrorCount())
+	assert.Equal(t, 0, info.StreamStatus.TotalErrorCount())
 }

 func TestStreamScannerHandler_PingInterleavesWithSlowUpstream(t *testing.T) {
@@ -4,6 +4,8 @@ import (
 	"errors"
 	"fmt"
 	"math"
+	"net/url"
+	"strconv"
 	"strings"

 	"github.com/QuantumNous/new-api/common"
@@ -144,16 +146,25 @@ func GetAndValidOpenAIImageRequest(c *gin.Context, relayMode int) (*dto.ImageReq
 	switch relayMode {
 	case relayconstant.RelayModeImagesEdits:
 		if strings.Contains(c.Request.Header.Get("Content-Type"), "multipart/form-data") {
-			_, err := c.MultipartForm()
+			form, err := common.ParseMultipartFormReusable(c)
 			if err != nil {
 				return nil, fmt.Errorf("failed to parse image edit form request: %w", err)
 			}
-			formData := c.Request.PostForm
+			formData := url.Values(form.Value)
+			c.Request.MultipartForm = form
+			c.Request.PostForm = formData
 			imageRequest.Prompt = formData.Get("prompt")
 			imageRequest.Model = formData.Get("model")
 			imageRequest.N = common.GetPointer(uint(common.String2Int(formData.Get("n"))))
 			imageRequest.Quality = formData.Get("quality")
 			imageRequest.Size = formData.Get("size")
+			if streamValue := strings.TrimSpace(formData.Get("stream")); streamValue != "" {
+				stream, err := strconv.ParseBool(streamValue)
+				if err != nil {
+					return nil, fmt.Errorf("invalid stream value: %w", err)
+				}
+				imageRequest.Stream = common.GetPointer(stream)
+			}
 			if imageValue := formData.Get("image"); imageValue != "" {
 				imageRequest.Image, _ = common.Marshal(imageValue)
 			}
@@ -17,9 +17,10 @@ func SetApiRouter(router *gin.Engine) {
 	apiRouter.Use(gzip.Gzip(gzip.DefaultCompression))
 	apiRouter.Use(middleware.BodyStorageCleanup()) // 清理请求体存储
 	apiRouter.Use(middleware.GlobalAPIRateLimit())
+	anonymousRequestBodyLimit := middleware.AnonymousRequestBodyLimit()
 	{
 		apiRouter.GET("/setup", controller.GetSetup)
-		apiRouter.POST("/setup", controller.PostSetup)
+		apiRouter.POST("/setup", anonymousRequestBodyLimit, controller.PostSetup)
 		apiRouter.GET("/status", controller.GetStatus)
 		apiRouter.GET("/uptime/status", controller.GetUptimeKumaStatus)
 		apiRouter.GET("/models", middleware.UserAuth(), controller.DashboardListModels)
@@ -40,39 +41,39 @@ func SetApiRouter(router *gin.Engine) {
 		apiRouter.GET("/rankings", middleware.HeaderNavModuleAuth("rankings"), controller.GetRankings)
 		apiRouter.GET("/verification", middleware.EmailVerificationRateLimit(), middleware.TurnstileCheck(), controller.SendEmailVerification)
 		apiRouter.GET("/reset_password", middleware.CriticalRateLimit(), middleware.TurnstileCheck(), controller.SendPasswordResetEmail)
-		apiRouter.POST("/user/reset", middleware.CriticalRateLimit(), controller.ResetPassword)
+		apiRouter.POST("/user/reset", middleware.CriticalRateLimit(), anonymousRequestBodyLimit, controller.ResetPassword)
 		// OAuth routes - specific routes must come before :provider wildcard
 		apiRouter.GET("/oauth/state", middleware.CriticalRateLimit(), controller.GenerateOAuthCode)
-		apiRouter.POST("/oauth/email/bind", middleware.CriticalRateLimit(), controller.EmailBind)
+		apiRouter.POST("/oauth/email/bind", middleware.CriticalRateLimit(), anonymousRequestBodyLimit, controller.EmailBind)
 		// Non-standard OAuth (WeChat, Telegram) - keep original routes
 		apiRouter.GET("/oauth/wechat", middleware.CriticalRateLimit(), controller.WeChatAuth)
-		apiRouter.POST("/oauth/wechat/bind", middleware.CriticalRateLimit(), controller.WeChatBind)
+		apiRouter.POST("/oauth/wechat/bind", middleware.CriticalRateLimit(), anonymousRequestBodyLimit, controller.WeChatBind)
 		apiRouter.GET("/oauth/telegram/login", middleware.CriticalRateLimit(), controller.TelegramLogin)
 		apiRouter.GET("/oauth/telegram/bind", middleware.CriticalRateLimit(), controller.TelegramBind)
 		// Standard OAuth providers (GitHub, Discord, OIDC, LinuxDO) - unified route
 		apiRouter.GET("/oauth/:provider", middleware.CriticalRateLimit(), controller.HandleOAuth)
 		apiRouter.GET("/ratio_config", middleware.CriticalRateLimit(), controller.GetRatioConfig)

-		apiRouter.POST("/stripe/webhook", controller.StripeWebhook)
-		apiRouter.POST("/creem/webhook", controller.CreemWebhook)
-		apiRouter.POST("/waffo/webhook", controller.WaffoWebhook)
+		apiRouter.POST("/stripe/webhook", anonymousRequestBodyLimit, controller.StripeWebhook)
+		apiRouter.POST("/creem/webhook", anonymousRequestBodyLimit, controller.CreemWebhook)
+		apiRouter.POST("/waffo/webhook", anonymousRequestBodyLimit, controller.WaffoWebhook)
 		// :env separates test vs prod URLs so the operator can register each
 		// in Pancake's matching webhook slot; handler enforces env match.
-		apiRouter.POST("/waffo-pancake/webhook/:env", controller.WaffoPancakeWebhook)
+		apiRouter.POST("/waffo-pancake/webhook/:env", anonymousRequestBodyLimit, controller.WaffoPancakeWebhook)

 		// Universal secure verification routes
 		apiRouter.POST("/verify", middleware.UserAuth(), middleware.CriticalRateLimit(), controller.UniversalVerify)

 		userRoute := apiRouter.Group("/user")
 		{
-			userRoute.POST("/register", middleware.CriticalRateLimit(), middleware.TurnstileCheck(), controller.Register)
-			userRoute.POST("/login", middleware.CriticalRateLimit(), middleware.TurnstileCheck(), controller.Login)
-			userRoute.POST("/login/2fa", middleware.CriticalRateLimit(), controller.Verify2FALogin)
-			userRoute.POST("/passkey/login/begin", middleware.CriticalRateLimit(), controller.PasskeyLoginBegin)
-			userRoute.POST("/passkey/login/finish", middleware.CriticalRateLimit(), controller.PasskeyLoginFinish)
+			userRoute.POST("/register", middleware.CriticalRateLimit(), anonymousRequestBodyLimit, middleware.TurnstileCheck(), controller.Register)
+			userRoute.POST("/login", middleware.CriticalRateLimit(), anonymousRequestBodyLimit, middleware.TurnstileCheck(), controller.Login)
+			userRoute.POST("/login/2fa", middleware.CriticalRateLimit(), anonymousRequestBodyLimit, controller.Verify2FALogin)
+			userRoute.POST("/passkey/login/begin", middleware.CriticalRateLimit(), anonymousRequestBodyLimit, controller.PasskeyLoginBegin)
+			userRoute.POST("/passkey/login/finish", middleware.CriticalRateLimit(), anonymousRequestBodyLimit, controller.PasskeyLoginFinish)
 			//userRoute.POST("/tokenlog", middleware.CriticalRateLimit(), controller.TokenLog)
 			userRoute.GET("/logout", controller.Logout)
-			userRoute.POST("/epay/notify", controller.EpayNotify)
+			userRoute.POST("/epay/notify", anonymousRequestBodyLimit, controller.EpayNotify)
 			userRoute.GET("/epay/notify", controller.EpayNotify)
 			userRoute.GET("/groups", controller.GetUserGroups)

@@ -176,16 +177,15 @@ func SetApiRouter(router *gin.Engine) {
 		}

 		// Subscription payment callbacks (no auth)
-		apiRouter.POST("/subscription/epay/notify", controller.SubscriptionEpayNotify)
+		apiRouter.POST("/subscription/epay/notify", anonymousRequestBodyLimit, controller.SubscriptionEpayNotify)
 		apiRouter.GET("/subscription/epay/notify", controller.SubscriptionEpayNotify)
 		apiRouter.GET("/subscription/epay/return", controller.SubscriptionEpayReturn)
-		apiRouter.POST("/subscription/epay/return", controller.SubscriptionEpayReturn)
+		apiRouter.POST("/subscription/epay/return", anonymousRequestBodyLimit, controller.SubscriptionEpayReturn)
 		optionRoute := apiRouter.Group("/option")
 		optionRoute.Use(middleware.RootAuth())
 		{
 			optionRoute.GET("/", controller.GetOptions)
 			optionRoute.PUT("/", controller.UpdateOption)
-			optionRoute.POST("/payment_compliance", controller.ConfirmPaymentCompliance)
 			optionRoute.GET("/channel_affinity_cache", controller.GetChannelAffinityCacheStats)
 			optionRoute.DELETE("/channel_affinity_cache", controller.ClearChannelAffinityCache)
 			optionRoute.POST("/rest_model_ratio", controller.ResetModelRatio)
@@ -346,6 +346,30 @@ func SetApiRouter(router *gin.Engine) {
 			taskRoute.GET("/", middleware.AdminAuth(), controller.GetAllTask)
 		}

+		// Document routes (public)
+		docsPublic := apiRouter.Group("/docs")
+		docsPublic.Use(middleware.TryUserAuth())
+		{
+			docsPublic.GET("/categories", controller.GetCategories)
+			docsPublic.GET("/", controller.GetDocuments)
+			docsPublic.GET("/:slug", controller.GetDocument)
+		}
+
+		// Document routes (admin) - use /admin/docs to avoid conflict with /:slug
+		docsAdmin := apiRouter.Group("/admin/docs")
+		docsAdmin.Use(middleware.AdminAuth())
+		{
+			docsAdmin.GET("/categories", controller.GetCategories)
+			docsAdmin.POST("/categories", controller.CreateCategory)
+			docsAdmin.PUT("/categories/:id", controller.UpdateCategory)
+			docsAdmin.DELETE("/categories/:id", controller.DeleteCategory)
+			docsAdmin.GET("/", controller.GetDocuments)
+			docsAdmin.POST("/", controller.CreateDocument)
+			docsAdmin.PUT("/:id", controller.UpdateDocument)
+			docsAdmin.DELETE("/:id", controller.DeleteDocument)
+			docsAdmin.GET("/:id/versions", controller.GetDocumentVersions)
+		}
+
 		vendorRoute := apiRouter.Group("/vendors")
 		vendorRoute.Use(middleware.AdminAuth())
 		{
@@ -13,7 +13,7 @@ import (
 	"github.com/gin-gonic/gin"
 )

-// ThemeAssets holds the embedded frontend assets for both themes.
+// ThemeAssets holds the embedded frontend assets for all themes.
 type ThemeAssets struct {
 	DefaultBuildFS   embed.FS
 	DefaultIndexPage []byte
@@ -37,9 +37,10 @@ func SetWebRouter(router *gin.Engine, assets ThemeAssets) {
 			return
 		}
 		c.Header("Cache-Control", "no-cache")
-		if common.GetTheme() == "classic" {
+		switch common.GetTheme() {
+		case "classic":
 			c.Data(http.StatusOK, "text/html; charset=utf-8", assets.ClassicIndexPage)
-		} else {
+		default:
 			c.Data(http.StatusOK, "text/html; charset=utf-8", assets.DefaultIndexPage)
 		}
 	})
@@ -641,6 +641,38 @@ func ShouldSkipRetryAfterChannelAffinityFailure(c *gin.Context) bool {
 	return meta.SkipRetry
 }

+func ClearCurrentChannelAffinityCache(c *gin.Context) bool {
+	if c == nil {
+		return false
+	}
+	cacheKey, _, ok := getChannelAffinityContext(c)
+	if !ok || cacheKey == "" {
+		return false
+	}
+
+	cache := getChannelAffinityCache()
+	deleted, err := cache.DeleteMany([]string{cacheKey})
+	if err != nil {
+		common.SysError(fmt.Sprintf("channel affinity cache delete current failed: err=%v", err))
+		return false
+	}
+	c.Set(ginKeyChannelAffinitySkipRetry, false)
+	for _, ok := range deleted {
+		if ok {
+			return true
+		}
+	}
+	return false
+}
+
+func ShouldKeepChannelAffinityOnChannelDisabled() bool {
+	setting := operation_setting.GetChannelAffinitySetting()
+	if setting == nil {
+		return false
+	}
+	return setting.KeepOnChannelDisabled
+}
+
 func MarkChannelAffinityUsed(c *gin.Context, selectedGroup string, channelID int) {
 	if c == nil || channelID <= 0 {
 		return
@@ -236,6 +236,33 @@ func TestGetPreferredChannelByAffinity_RequestHeaderKeySource(t *testing.T) {
 	require.Equal(t, buildChannelAffinityKeyHint(affinityValue), meta.KeyHint)
 }

+func TestClearCurrentChannelAffinityCache(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	cacheKeySuffix := fmt.Sprintf("codex cli trace:default:clear-current-%d", time.Now().UnixNano())
+	cacheKeyFull := channelAffinityCacheNamespace + ":" + cacheKeySuffix
+	cache := getChannelAffinityCache()
+	require.NoError(t, cache.SetWithTTL(cacheKeySuffix, 9527, time.Minute))
+	t.Cleanup(func() {
+		_, _ = cache.DeleteMany([]string{cacheKeySuffix})
+	})
+
+	ctx := buildChannelAffinityTemplateContextForTest(channelAffinityMeta{
+		CacheKey:   cacheKeyFull,
+		TTLSeconds: 60,
+		RuleName:   "codex cli trace",
+		SkipRetry:  true,
+	})
+	require.True(t, ShouldSkipRetryAfterChannelAffinityFailure(ctx))
+
+	deleted := ClearCurrentChannelAffinityCache(ctx)
+	require.True(t, deleted)
+	_, found, err := cache.Get(cacheKeySuffix)
+	require.NoError(t, err)
+	require.False(t, found)
+	require.False(t, ShouldSkipRetryAfterChannelAffinityFailure(ctx))
+}
+
 func TestChannelAffinityHitCodexTemplatePassHeadersEffective(t *testing.T) {
 	gin.SetMode(gin.TestMode)

@@ -37,6 +37,7 @@ func InitHttpClient() {
 	transport := &http.Transport{
 		MaxIdleConns:        common.RelayMaxIdleConns,
 		MaxIdleConnsPerHost: common.RelayMaxIdleConnsPerHost,
+		IdleConnTimeout:     time.Duration(common.RelayIdleConnTimeout) * time.Second,
 		ForceAttemptHTTP2:   true,
 		Proxy:               http.ProxyFromEnvironment, // Support HTTP_PROXY, HTTPS_PROXY, NO_PROXY env vars
 	}
@@ -108,6 +109,7 @@ func NewProxyHttpClient(proxyURL string) (*http.Client, error) {
 		transport := &http.Transport{
 			MaxIdleConns:        common.RelayMaxIdleConns,
 			MaxIdleConnsPerHost: common.RelayMaxIdleConnsPerHost,
+			IdleConnTimeout:     time.Duration(common.RelayIdleConnTimeout) * time.Second,
 			ForceAttemptHTTP2:   true,
 			Proxy:               http.ProxyURL(parsedURL),
 		}
@@ -147,6 +149,7 @@ func NewProxyHttpClient(proxyURL string) (*http.Client, error) {
 		transport := &http.Transport{
 			MaxIdleConns:        common.RelayMaxIdleConns,
 			MaxIdleConnsPerHost: common.RelayMaxIdleConnsPerHost,
+			IdleConnTimeout:     time.Duration(common.RelayIdleConnTimeout) * time.Second,
 			ForceAttemptHTTP2:   true,
 			DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
 				return dialer.Dial(network, addr)
@@ -28,11 +28,12 @@ type ChannelAffinityRule struct {
 }

 type ChannelAffinitySetting struct {
-	Enabled           bool                  `json:"enabled"`
-	SwitchOnSuccess   bool                  `json:"switch_on_success"`
-	MaxEntries        int                   `json:"max_entries"`
-	DefaultTTLSeconds int                   `json:"default_ttl_seconds"`
-	Rules             []ChannelAffinityRule `json:"rules"`
+	Enabled               bool                  `json:"enabled"`
+	SwitchOnSuccess       bool                  `json:"switch_on_success"`
+	KeepOnChannelDisabled bool                  `json:"keep_on_channel_disabled"`
+	MaxEntries            int                   `json:"max_entries"`
+	DefaultTTLSeconds     int                   `json:"default_ttl_seconds"`
+	Rules                 []ChannelAffinityRule `json:"rules"`
 }

 var codexCliPassThroughHeaders = []string{
@@ -74,10 +75,11 @@ func buildPassHeaderTemplate(headers []string) map[string]interface{} {
 }

 var channelAffinitySetting = ChannelAffinitySetting{
-	Enabled:           true,
-	SwitchOnSuccess:   true,
-	MaxEntries:        100_000,
-	DefaultTTLSeconds: 3600,
+	Enabled:               true,
+	SwitchOnSuccess:       true,
+	KeepOnChannelDisabled: false,
+	MaxEntries:            100_000,
+	DefaultTTLSeconds:     3600,
 	Rules: []ChannelAffinityRule{
 		{
 			Name:       "codex cli trace",
@@ -5,16 +5,8 @@ import "github.com/QuantumNous/new-api/setting/config"
 type PaymentSetting struct {
 	AmountOptions  []int           `json:"amount_options"`
 	AmountDiscount map[int]float64 `json:"amount_discount"` // 充值金额对应的折扣，例如 100 元 0.9 表示 100 元充值享受 9 折优惠
-
-	ComplianceConfirmed    bool   `json:"compliance_confirmed"`
-	ComplianceTermsVersion string `json:"compliance_terms_version"`
-	ComplianceConfirmedAt  int64  `json:"compliance_confirmed_at"`
-	ComplianceConfirmedBy  int    `json:"compliance_confirmed_by"`
-	ComplianceConfirmedIP  string `json:"compliance_confirmed_ip"`
 }

-const CurrentComplianceTermsVersion = "v1"
-
 // 默认配置
 var paymentSetting = PaymentSetting{
 	AmountOptions:  []int{10, 20, 50, 100, 200, 500},
@@ -29,8 +21,3 @@ func init() {
 func GetPaymentSetting() *PaymentSetting {
 	return &paymentSetting
 }
-
-func IsPaymentComplianceConfirmed() bool {
-	return paymentSetting.ComplianceConfirmed &&
-		paymentSetting.ComplianceTermsVersion == CurrentComplianceTermsVersion
-}
@@ -71,6 +71,13 @@ var defaultCacheRatio = map[string]float64{
 	"claude-opus-4-7-high":                0.1,
 	"claude-opus-4-7-medium":              0.1,
 	"claude-opus-4-7-low":                 0.1,
+	"claude-opus-4-8":                     0.1,
+	"claude-opus-4-8-thinking":            0.1,
+	"claude-opus-4-8-max":                 0.1,
+	"claude-opus-4-8-xhigh":               0.1,
+	"claude-opus-4-8-high":                0.1,
+	"claude-opus-4-8-medium":              0.1,
+	"claude-opus-4-8-low":                 0.1,
 }

 var defaultCreateCacheRatio = map[string]float64{
@@ -106,6 +113,13 @@ var defaultCreateCacheRatio = map[string]float64{
 	"claude-opus-4-7-high":                1.25,
 	"claude-opus-4-7-medium":              1.25,
 	"claude-opus-4-7-low":                 1.25,
+	"claude-opus-4-8":                     1.25,
+	"claude-opus-4-8-thinking":            1.25,
+	"claude-opus-4-8-max":                 1.25,
+	"claude-opus-4-8-xhigh":               1.25,
+	"claude-opus-4-8-high":                1.25,
+	"claude-opus-4-8-medium":              1.25,
+	"claude-opus-4-8-low":                 1.25,
 }

 //var defaultCreateCacheRatio = map[string]float64{}
@@ -152,6 +152,12 @@ var defaultModelRatio = map[string]float64{
 	"claude-opus-4-7-high":                      2.5,
 	"claude-opus-4-7-medium":                    2.5,
 	"claude-opus-4-7-low":                       2.5,
+	"claude-opus-4-8":                           2.5,
+	"claude-opus-4-8-max":                       2.5,
+	"claude-opus-4-8-xhigh":                     2.5,
+	"claude-opus-4-8-high":                      2.5,
+	"claude-opus-4-8-medium":                    2.5,
+	"claude-opus-4-8-low":                       2.5,
 	"claude-3-opus-20240229":                    7.5, // $15 / 1M tokens
 	"claude-opus-4-20250514":                    7.5,
 	"claude-opus-4-1-20250805":                  7.5,
@@ -16,7 +16,7 @@
      content="A unified AI model hub for aggregation & distribution. It supports cross-converting various LLMs into OpenAI-compatible, Claude-compatible, or Gemini-compatible formats. A centralized gateway for personal and enterprise model management."
    />
    <meta name="generator" content="new-api" />
-    <title>New API</title>
+    <title>ModelsToken</title>
    <!--umami-->
    <!--Google Analytics-->
  </head>
@@ -24,6 +24,5 @@
  <body>
    <noscript>You need to enable JavaScript to run this app.</noscript>
    <div id="root"></div>
-    <script type="module" src="/src/index.jsx"></script>
  </body>
 </html>
@@ -4,30 +4,32 @@
  "private": true,
  "type": "module",
  "dependencies": {
+    "@douyinfe/semi-illustrations": "^2.69.1",
    "@douyinfe/semi-icons": "^2.63.1",
    "@douyinfe/semi-ui": "^2.69.1",
-    "@lobehub/icons": "^2.0.0",
+    "@lobehub/icons": "catalog:",
    "@visactor/react-vchart": "~1.8.8",
    "@visactor/vchart": "~1.8.8",
    "@visactor/vchart-semi-theme": "~1.8.8",
-    "axios": "1.15.2",
-    "clsx": "^2.1.1",
-    "dayjs": "^1.11.11",
+    "axios": "catalog:",
+    "clsx": "catalog:",
+    "dayjs": "catalog:",
    "history": "^5.3.0",
+    "highlight.js": "^11.11.1",
    "i18next": "^23.16.8",
    "i18next-browser-languagedetector": "^7.2.0",
    "katex": "^0.16.22",
    "lucide-react": "^0.511.0",
    "marked": "^4.1.1",
    "mermaid": "^11.6.0",
-    "qrcode.react": "^4.2.0",
-    "react": "^18.2.0",
-    "react-dom": "^18.2.0",
+    "qrcode.react": "catalog:",
+    "react": "^19.2.6",
+    "react-dom": "^19.2.6",
    "react-dropzone": "^14.2.3",
    "react-fireworks": "^1.0.4",
    "react-i18next": "^13.0.0",
-    "react-icons": "^5.5.0",
-    "react-markdown": "^10.1.0",
+    "react-icons": "catalog:",
+    "react-markdown": "catalog:",
    "react-router-dom": "^6.3.0",
    "react-telegram-login": "^1.1.2",
    "react-toastify": "^9.0.8",
@@ -35,20 +37,20 @@
    "rehype-highlight": "^7.0.2",
    "rehype-katex": "^7.0.1",
    "remark-breaks": "^4.0.0",
-    "remark-gfm": "^4.0.1",
+    "remark-gfm": "catalog:",
    "remark-math": "^6.0.0",
-    "sse.js": "^2.6.0",
+    "sse.js": "catalog:",
    "unist-util-visit": "^5.0.0",
    "use-debounce": "^10.0.4"
  },
  "scripts": {
-    "dev": "vite",
-    "build": "vite build",
+    "dev": "rsbuild dev",
+    "build": "rsbuild build",
    "lint": "prettier . --check",
    "lint:fix": "prettier . --write",
    "eslint": "bunx eslint \"**/*.{js,jsx}\" --cache",
    "eslint:fix": "bunx eslint \"**/*.{js,jsx}\" --fix --cache",
-    "preview": "vite preview",
+    "preview": "rsbuild preview",
    "i18n:extract": "bunx i18next-cli extract",
    "i18n:status": "bunx i18next-cli status",
    "i18n:sync": "bunx i18next-cli sync",
@@ -73,20 +75,19 @@
    ]
  },
  "devDependencies": {
-    "@douyinfe/vite-plugin-semi": "^2.74.0-alpha.6",
+    "@rsbuild/core": "^2.0.7",
+    "@rsbuild/plugin-react": "^2.0.0",
    "@so1ve/prettier-config": "^3.1.0",
-    "@vitejs/plugin-react": "^4.2.1",
    "autoprefixer": "^10.4.21",
-    "code-inspector-plugin": "^1.3.3",
    "eslint": "8.57.0",
    "eslint-plugin-header": "^3.1.1",
    "eslint-plugin-react-hooks": "^5.2.0",
    "i18next-cli": "^1.10.3",
    "postcss": "^8.5.3",
-    "prettier": "^3.0.0",
+    "prop-types": "^15.8.1",
+    "prettier": "catalog:",
    "tailwindcss": "^3",
-    "typescript": "4.4.2",
-    "vite": "^5.2.0"
+    "typescript": "4.4.2"
  },
  "prettier": {
    "singleQuote": true,
--- a/Show More
+++ b/Show More