Merge pull request #1172 from QuantumNous/alpha

feat: 0.8 version
🎨 feat(UI): add minimum width to progress bars in log tables
2025-06-07 21:36:07 +08:00 · 2025-06-07 21:03:16 +08:00 · 2025-06-07 16:00:55 +08:00 · 2025-06-07 13:18:50 +08:00 · 2025-06-07 12:32:20 +08:00 · 2025-06-07 12:26:23 +08:00
278 changed files with 33423 additions and 15693 deletions
@@ -1,14 +1,15 @@
-name: Publish Docker image (arm64)
+name: Publish Docker image (alpha)

 on:
  push:
-    tags:
-      - '*'
+    branches:
+      - alpha
  workflow_dispatch:
    inputs:
      name:
-        description: 'reason'
+        description: "reason"
        required: false
+
 jobs:
  push_to_registries:
    name: Push Docker image to multiple registries
@@ -22,13 +23,7 @@ jobs:

      - name: Save version info
        run: |
-          git describe --tags > VERSION 
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+          echo "alpha-$(date +'%Y%m%d')-$(git rev-parse --short HEAD)" > VERSION

      - name: Log in to Docker Hub
        uses: docker/login-action@v3
@@ -43,6 +38,9 @@ jobs:
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
      - name: Extract metadata (tags, labels) for Docker
        id: meta
        uses: docker/metadata-action@v5
@@ -50,6 +48,9 @@ jobs:
          images: |
            calciumion/new-api
            ghcr.io/${{ github.repository }}
+          tags: |
+            type=raw,value=alpha
+            type=raw,value=alpha-{{date 'YYYYMMDD'}}-{{sha}}

      - name: Build and push Docker images
        uses: docker/build-push-action@v5
@@ -58,4 +59,4 @@ jobs:
          platforms: linux/amd64,linux/arm64
          push: true
          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
+          labels: ${{ steps.meta.outputs.labels }}
@@ -1,54 +0,0 @@
-name: Publish Docker image (amd64)
-
-on:
-  push:
-    tags:
-      - '*'
-  workflow_dispatch:
-    inputs:
-      name:
-        description: 'reason'
-        required: false
-jobs:
-  push_to_registries:
-    name: Push Docker image to multiple registries
-    runs-on: ubuntu-latest
-    permissions:
-      packages: write
-      contents: read
-    steps:
-      - name: Check out the repo
-        uses: actions/checkout@v4
-
-      - name: Save version info
-        run: |
-          git describe --tags > VERSION 
-
-      - name: Log in to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
-
-      - name: Log in to the Container registry
-        uses: docker/login-action@v3
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Extract metadata (tags, labels) for Docker
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            calciumion/new-api
-            ghcr.io/${{ github.repository }}
-
-      - name: Build and push Docker images
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          push: true
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
@@ -1,54 +0,0 @@
-name: Linux Release
-permissions:
-  contents: write
-
-on:
-  push:
-    tags:
-      - '*'
-      - '!*-alpha*'
-jobs:
-  release:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-      - uses: actions/setup-node@v3
-        with:
-          node-version: 18
-      - name: Build Frontend
-        env:
-          CI: ""
-        run: |
-          cd web
-          npm install
-          REACT_APP_VERSION=$(git describe --tags) npm run build
-          cd ..
-      - name: Set up Go
-        uses: actions/setup-go@v3
-        with:
-          go-version: '>=1.18.0'
-      - name: Build Backend (amd64)
-        run: |
-          go mod download
-          go build -ldflags "-s -w -X 'one-api/common.Version=$(git describe --tags)' -extldflags '-static'" -o one-api
-
-      - name: Build Backend (arm64)
-        run: |
-          sudo apt-get update
-          DEBIAN_FRONTEND=noninteractive sudo apt-get install -y gcc-aarch64-linux-gnu
-          CC=aarch64-linux-gnu-gcc CGO_ENABLED=1 GOOS=linux GOARCH=arm64 go build -ldflags "-s -w -X 'one-api/common.Version=$(git describe --tags)' -extldflags '-static'" -o one-api-arm64
-
-      - name: Release
-        uses: softprops/action-gh-release@v1
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            one-api
-            one-api-arm64
-          draft: true
-          generate_release_notes: true
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -1,45 +0,0 @@
-name: macOS Release
-permissions:
-  contents: write
-
-on:
-  push:
-    tags:
-      - '*'
-      - '!*-alpha*'
-jobs:
-  release:
-    runs-on: macos-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-      - uses: actions/setup-node@v3
-        with:
-          node-version: 18
-      - name: Build Frontend
-        env:
-          CI: ""
-        run: |
-          cd web
-          npm install
-          REACT_APP_VERSION=$(git describe --tags) npm run build
-          cd ..
-      - name: Set up Go
-        uses: actions/setup-go@v3
-        with:
-          go-version: '>=1.18.0'
-      - name: Build Backend
-        run: |
-          go mod download
-          go build -ldflags "-X 'one-api/common.Version=$(git describe --tags)'" -o one-api-macos
-      - name: Release
-        uses: softprops/action-gh-release@v1
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: one-api-macos
-          draft: true
-          generate_release_notes: true
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -1,48 +0,0 @@
-name: Windows Release
-permissions:
-  contents: write
-
-on:
-  push:
-    tags:
-      - '*'
-      - '!*-alpha*'
-jobs:
-  release:
-    runs-on: windows-latest
-    defaults:
-      run:
-        shell: bash
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-      - uses: actions/setup-node@v3
-        with:
-          node-version: 18
-      - name: Build Frontend
-        env:
-          CI: ""
-        run: |
-          cd web
-          npm install
-          REACT_APP_VERSION=$(git describe --tags) npm run build
-          cd ..
-      - name: Set up Go
-        uses: actions/setup-go@v3
-        with:
-          go-version: '>=1.18.0'
-      - name: Build Backend
-        run: |
-          go mod download
-          go build -ldflags "-s -w -X 'one-api/common.Version=$(git describe --tags)'" -o one-api.exe
-      - name: Release
-        uses: softprops/action-gh-release@v1
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: one-api.exe
-          draft: true
-          generate_release_notes: true
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -9,4 +9,5 @@ logs
 web/dist
 .env
 one-api
-.DS_Store
+.DS_Store
+tiktoken_cache
@@ -1,10 +1,13 @@
+<p align="right">
+   <a href="./README.md">中文</a> | <strong>English</strong>
+</p>
 <div align="center">

 ![new-api](/web/public/logo.png)

 # New API

-🍥 Next Generation LLM Gateway and AI Asset Management System
+🍥 Next-Generation Large Model Gateway and AI Asset Management System

 <a href="https://trendshift.io/repositories/8227" target="_blank"><img src="https://trendshift.io/api/badge/repositories/8227" alt="Calcium-Ion%2Fnew-api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>

@@ -33,171 +36,159 @@
 > This is an open-source project developed based on [One API](https://github.com/songquanpeng/one-api)

 > [!IMPORTANT]  
-> - Users must comply with OpenAI's [Terms of Use](https://openai.com/policies/terms-of-use) and relevant laws and regulations. Not to be used for illegal purposes.
-> - This project is for personal learning only. Stability is not guaranteed, and no technical support is provided.
+> - This project is for personal learning purposes only, with no guarantee of stability or technical support.
+> - Users must comply with OpenAI's [Terms of Use](https://openai.com/policies/terms-of-use) and **applicable laws and regulations**, and must not use it for illegal purposes.
+> - According to the [《Interim Measures for the Management of Generative Artificial Intelligence Services》](http://www.cac.gov.cn/2023-07/13/c_1690898327029107.htm), please do not provide any unregistered generative AI services to the public in China.
+
+## 📚 Documentation
+
+For detailed documentation, please visit our official Wiki: [https://docs.newapi.pro/](https://docs.newapi.pro/)
+
+You can also access the AI-generated DeepWiki:
+[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/QuantumNous/new-api)

 ## ✨ Key Features

-1. 🎨 New UI interface (some interfaces pending update)
-2. 🌍 Multi-language support (work in progress)
-3. 🎨 Added [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) interface support, [Integration Guide](Midjourney.md)
-4. 💰 Online recharge support, configurable in system settings:
-    - [x] EasyPay
-5. 🔍 Query usage quota by key:
-    - Works with [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool)
-6. 📑 Configurable items per page in pagination
-7. 🔄 Compatible with original One API database (one-api.db)
-8. 💵 Support per-request model pricing, configurable in System Settings - Operation Settings
-9. ⚖️ Support channel **weighted random** selection
-10. 📈 Data dashboard (console)
-11. 🔒 Configurable model access per token
-12. 🤖 Telegram authorization login support:
-    1. System Settings - Configure Login Registration - Allow Telegram Login
-    2. Send /setdomain command to [@Botfather](https://t.me/botfather)
-    3. Select your bot, then enter http(s)://your-website/login
-    4. Telegram Bot name is the bot username without @
-13. 🎵 Added [Suno API](https://github.com/Suno-API/Suno-API) interface support, [Integration Guide](Suno.md)
-14. 🔄 Support for Rerank models, compatible with Cohere and Jina, can integrate with Dify, [Integration Guide](Rerank.md)
-15. ⚡ **[OpenAI Realtime API](https://platform.openai.com/docs/guides/realtime/integration)** - Support for OpenAI's Realtime API, including Azure channels
-16. 🧠 Support for setting reasoning effort through model name suffix:
-    - Add suffix `-high` to set high reasoning effort (e.g., `o3-mini-high`)
-    - Add suffix `-medium` to set medium reasoning effort
-    - Add suffix `-low` to set low reasoning effort
-17. 🔄 Thinking to content option `thinking_to_content` in `Channel->Edit->Channel Extra Settings`, default is `false`, when `true`, the `reasoning_content` of the thinking content will be converted to `<think>` tags and concatenated to the content returned.
-18. 🔄 Model rate limit, support setting total request limit and successful request limit in `System Settings->Rate Limit Settings`
-19. 💰 Cache billing support, when enabled can charge a configurable ratio for cache hits:
-    1. Set `Prompt Cache Ratio` in `System Settings -> Operation Settings`
-    2. Set `Prompt Cache Ratio` in channel settings, range 0-1 (e.g., 0.5 means 50% charge on cache hits)
+New API offers a wide range of features, please refer to [Features Introduction](https://docs.newapi.pro/wiki/features-introduction) for details:
+
+1. 🎨 Brand new UI interface
+2. 🌍 Multi-language support
+3. 💰 Online recharge functionality (YiPay)
+4. 🔍 Support for querying usage quotas with keys (works with [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool))
+5. 🔄 Compatible with the original One API database
+6. 💵 Support for pay-per-use model pricing
+7. ⚖️ Support for weighted random channel selection
+8. 📈 Data dashboard (console)
+9. 🔒 Token grouping and model restrictions
+10. 🤖 Support for more authorization login methods (LinuxDO, Telegram, OIDC)
+11. 🔄 Support for Rerank models (Cohere and Jina), [API Documentation](https://docs.newapi.pro/api/jinaai-rerank)
+12. ⚡ Support for OpenAI Realtime API (including Azure channels), [API Documentation](https://docs.newapi.pro/api/openai-realtime)
+13. ⚡ Support for Claude Messages format, [API Documentation](https://docs.newapi.pro/api/anthropic-chat)
+14. Support for entering chat interface via /chat2link route
+15. 🧠 Support for setting reasoning effort through model name suffixes:
+    1. OpenAI o-series models
+        - Add `-high` suffix for high reasoning effort (e.g.: `o3-mini-high`)
+        - Add `-medium` suffix for medium reasoning effort (e.g.: `o3-mini-medium`)
+        - Add `-low` suffix for low reasoning effort (e.g.: `o3-mini-low`)
+    2. Claude thinking models
+        - Add `-thinking` suffix to enable thinking mode (e.g.: `claude-3-7-sonnet-20250219-thinking`)
+16. 🔄 Thinking-to-content functionality
+17. 🔄 Model rate limiting for users
+18. 💰 Cache billing support, which allows billing at a set ratio when cache is hit:
+    1. Set the `Prompt Cache Ratio` option in `System Settings-Operation Settings`
+    2. Set `Prompt Cache Ratio` in the channel, range 0-1, e.g., setting to 0.5 means billing at 50% when cache is hit
    3. Supported channels:
        - [x] OpenAI
-        - [x] Azure 
+        - [x] Azure
        - [x] DeepSeek
-        - [ ] Claude
+        - [x] Claude

 ## Model Support
-This version additionally supports:
-1. Third-party model **gpts** (gpt-4-gizmo-*)
-2. [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) interface, [Integration Guide](Midjourney.md)
-3. Custom channels with full API URL support
-4. [Suno API](https://github.com/Suno-API/Suno-API) interface, [Integration Guide](Suno.md)
-5. Rerank models, supporting [Cohere](https://cohere.ai/) and [Jina](https://jina.ai/), [Integration Guide](Rerank.md)
-6. Dify

-You can add custom models gpt-4-gizmo-* in channels. These are third-party models and cannot be called with official OpenAI keys.
+This version supports multiple models, please refer to [API Documentation-Relay Interface](https://docs.newapi.pro/api) for details:

-## Additional Configurations Beyond One API
- `GENERATE_DEFAULT_TOKEN`: Generate initial token for new users, default `false`
- `STREAMING_TIMEOUT`: Set streaming response timeout, default 60 seconds
- `DIFY_DEBUG`: Output workflow and node info to client for Dify channel, default `true`
- `FORCE_STREAM_OPTION`: Override client stream_options parameter, default `true`
- `GET_MEDIA_TOKEN`: Calculate image tokens, default `true`
- `GET_MEDIA_TOKEN_NOT_STREAM`: Calculate image tokens in non-stream mode, default `true`
- `UPDATE_TASK`: Update async tasks (Midjourney, Suno), default `true`
- `GEMINI_MODEL_MAP`: Specify Gemini model versions (v1/v1beta), format: "model:version", comma-separated
- `COHERE_SAFETY_SETTING`: Cohere model [safety settings](https://docs.cohere.com/docs/safety-modes#overview), options: `NONE`, `CONTEXTUAL`, `STRICT`, default `NONE`
- `GEMINI_VISION_MAX_IMAGE_NUM`: Gemini model maximum image number, default `16`, set to `-1` to disable
- `MAX_FILE_DOWNLOAD_MB`: Maximum file download size in MB, default `20`
- `CRYPTO_SECRET`: Encryption key for encrypting database content
- `AZURE_DEFAULT_API_VERSION`: Azure channel default API version, if not specified in channel settings, use this version, default `2024-12-01-preview`
- `NOTIFICATION_LIMIT_DURATION_MINUTE`: Duration of notification limit in minutes, default `10`
- `NOTIFY_LIMIT_COUNT`: Maximum number of user notifications in the specified duration, default `2`
+1. Third-party models **gpts** (gpt-4-gizmo-*)
+2. Third-party channel [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) interface, [API Documentation](https://docs.newapi.pro/api/midjourney-proxy-image)
+3. Third-party channel [Suno API](https://github.com/Suno-API/Suno-API) interface, [API Documentation](https://docs.newapi.pro/api/suno-music)
+4. Custom channels, supporting full call address input
+5. Rerank models ([Cohere](https://cohere.ai/) and [Jina](https://jina.ai/)), [API Documentation](https://docs.newapi.pro/api/jinaai-rerank)
+6. Claude Messages format, [API Documentation](https://docs.newapi.pro/api/anthropic-chat)
+7. Dify, currently only supports chatflow
+
+## Environment Variable Configuration
+
+For detailed configuration instructions, please refer to [Installation Guide-Environment Variables Configuration](https://docs.newapi.pro/installation/environment-variables):
+
+- `GENERATE_DEFAULT_TOKEN`: Whether to generate initial tokens for newly registered users, default is `false`
+- `STREAMING_TIMEOUT`: Streaming response timeout, default is 60 seconds
+- `DIFY_DEBUG`: Whether to output workflow and node information for Dify channels, default is `true`
+- `FORCE_STREAM_OPTION`: Whether to override client stream_options parameter, default is `true`
+- `GET_MEDIA_TOKEN`: Whether to count image tokens, default is `true`
+- `GET_MEDIA_TOKEN_NOT_STREAM`: Whether to count image tokens in non-streaming cases, default is `true`
+- `UPDATE_TASK`: Whether to update asynchronous tasks (Midjourney, Suno), default is `true`
+- `COHERE_SAFETY_SETTING`: Cohere model safety settings, options are `NONE`, `CONTEXTUAL`, `STRICT`, default is `NONE`
+- `GEMINI_VISION_MAX_IMAGE_NUM`: Maximum number of images for Gemini models, default is `16`
+- `MAX_FILE_DOWNLOAD_MB`: Maximum file download size in MB, default is `20`
+- `CRYPTO_SECRET`: Encryption key used for encrypting database content
+- `AZURE_DEFAULT_API_VERSION`: Azure channel default API version, default is `2025-04-01-preview`
+- `NOTIFICATION_LIMIT_DURATION_MINUTE`: Notification limit duration, default is `10` minutes
+- `NOTIFY_LIMIT_COUNT`: Maximum number of user notifications within the specified duration, default is `2`
+- `ERROR_LOG_ENABLED=true`: Whether to record and display error logs, default is `false`

 ## Deployment

+For detailed deployment guides, please refer to [Installation Guide-Deployment Methods](https://docs.newapi.pro/installation):
+
 > [!TIP]
-> Latest Docker image: `calciumion/new-api:latest`  
-> Default account: root, password: 123456
+> Latest Docker image: `calciumion/new-api:latest`

-### Multi-Server Deployment
- Must set `SESSION_SECRET` environment variable, otherwise login state will not be consistent across multiple servers.
- If using a public Redis, must set `CRYPTO_SECRET` environment variable, otherwise Redis content will not be able to be obtained in multi-server deployment.
+### Multi-machine Deployment Considerations
+- Environment variable `SESSION_SECRET` must be set, otherwise login status will be inconsistent across multiple machines
+- If sharing Redis, `CRYPTO_SECRET` must be set, otherwise Redis content cannot be accessed across multiple machines

-### Requirements
- Local database (default): SQLite (Docker deployment must mount `/data` directory)
- Remote database: MySQL >= 5.7.8, PgSQL >= 9.6
+### Deployment Requirements
+- Local database (default): SQLite (Docker deployment must mount the `/data` directory)
+- Remote database: MySQL version >= 5.7.8, PgSQL version >= 9.6

-### Deployment with BT Panel
-Install BT Panel (**version 9.2.0** or above) from [BT Panel Official Website](https://www.bt.cn/new/download.html), choose the stable version script to download and install.  
-After installation, log in to BT Panel and click Docker in the menu bar. First-time access will prompt to install Docker service. Click Install Now and follow the prompts to complete installation.  
-After installation, find **New-API** in the app store, click install, configure basic options to complete installation.  
-[Pictorial Guide](BT.md)
+### Deployment Methods

-### Docker Deployment
+#### Using BaoTa Panel Docker Feature
+Install BaoTa Panel (version **9.2.0** or above), find **New-API** in the application store and install it.
+[Tutorial with images](./docs/BT.md)

-### Using Docker Compose (Recommended)
+#### Using Docker Compose (Recommended)
 ```shell
-# Clone project
+# Download the project
 git clone https://github.com/Calcium-Ion/new-api.git
 cd new-api
 # Edit docker-compose.yml as needed
-# nano docker-compose.yml
-# vim docker-compose.yml
 # Start
 docker-compose up -d
 ```

-#### Update Version
+#### Using Docker Image Directly
 ```shell
-docker-compose pull
-docker-compose up -d
-```
-
-### Direct Docker Image Usage
-```shell
-# SQLite deployment:
+# Using SQLite
 docker run --name new-api -d --restart always -p 3000:3000 -e TZ=Asia/Shanghai -v /home/ubuntu/data/new-api:/data calciumion/new-api:latest

-# MySQL deployment (add -e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi"), modify database connection parameters as needed
-# Example:
+# Using MySQL
 docker run --name new-api -d --restart always -p 3000:3000 -e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" -e TZ=Asia/Shanghai -v /home/ubuntu/data/new-api:/data calciumion/new-api:latest
 ```

-#### Update Version
-```shell
-# Pull the latest image
-docker pull calciumion/new-api:latest
-# Stop and remove the old container
-docker stop new-api
-docker rm new-api
-# Run the new container with the same parameters as before
-docker run --name new-api -d --restart always -p 3000:3000 -e TZ=Asia/Shanghai -v /home/ubuntu/data/new-api:/data calciumion/new-api:latest
-```
+## Channel Retry and Cache
+Channel retry functionality has been implemented, you can set the number of retries in `Settings->Operation Settings->General Settings`. It is **recommended to enable caching**.

-Alternatively, you can use Watchtower for automatic updates (not recommended, may cause database incompatibility):
-```shell
-docker run --rm -v /var/run/docker.sock:/var/run/docker.sock containrrr/watchtower -cR
-```
+### Cache Configuration Method
+1. `REDIS_CONN_STRING`: Set Redis as cache
+2. `MEMORY_CACHE_ENABLED`: Enable memory cache (no need to set manually if Redis is set)

-## Channel Retry
-Channel retry is implemented, configurable in `Settings->Operation Settings->General Settings`. **Cache recommended**.  
-If retry is enabled, the system will automatically use the next priority channel for the same request after a failed request.
+## API Documentation

-### Cache Configuration
-1. `REDIS_CONN_STRING`: Use Redis as cache
-    + Example: `REDIS_CONN_STRING=redis://default:redispw@localhost:49153`
-2. `MEMORY_CACHE_ENABLED`: Enable memory cache, default `false`
-    + Example: `MEMORY_CACHE_ENABLED=true`
+For detailed API documentation, please refer to [API Documentation](https://docs.newapi.pro/api):

-### Why Some Errors Don't Retry
-Error codes 400, 504, 524 won't retry
-### To Enable Retry for 400
-In `Channel->Edit`, set `Status Code Override` to:
-```json
-{
-  "400": "500"
-}
-```
-
-## Integration Guides
- [Midjourney Integration](Midjourney.md)
- [Suno Integration](Suno.md)
+- [Chat API](https://docs.newapi.pro/api/openai-chat)
+- [Image API](https://docs.newapi.pro/api/openai-image)
+- [Rerank API](https://docs.newapi.pro/api/jinaai-rerank)
+- [Realtime API](https://docs.newapi.pro/api/openai-realtime)
+- [Claude Chat API (messages)](https://docs.newapi.pro/api/anthropic-chat)

 ## Related Projects
 - [One API](https://github.com/songquanpeng/one-api): Original project
 - [Midjourney-Proxy](https://github.com/novicezk/midjourney-proxy): Midjourney interface support
- [chatnio](https://github.com/Deeptrain-Community/chatnio): Next-gen AI B/C solution
- [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool): Query usage quota by key
+- [chatnio](https://github.com/Deeptrain-Community/chatnio): Next-generation AI one-stop B/C-end solution
+- [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool): Query usage quota with key
+
+Other projects based on New API:
+- [new-api-horizon](https://github.com/Calcium-Ion/new-api-horizon): High-performance optimized version of New API
+- [VoAPI](https://github.com/VoAPI/VoAPI): Frontend beautified version based on New API
+
+## Help and Support
+
+If you have any questions, please refer to [Help and Support](https://docs.newapi.pro/support):
+- [Community Interaction](https://docs.newapi.pro/support/community-interaction)
+- [Issue Feedback](https://docs.newapi.pro/support/feedback-issues)
+- [FAQ](https://docs.newapi.pro/support/faq)

 ## 🌟 Star History

-[![Star History Chart](https://api.star-history.com/svg?repos=Calcium-Ion/new-api&type=Date)](https://star-history.com/#Calcium-Ion/new-api&Date)
+[![Star History Chart](https://api.star-history.com/svg?repos=Calcium-Ion/new-api&type=Date)](https://star-history.com/#Calcium-Ion/new-api&Date)
@@ -44,6 +44,9 @@

 详细文档请访问我们的官方Wiki：[https://docs.newapi.pro/](https://docs.newapi.pro/)

+也可访问AI生成的DeepWiki:
+[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/QuantumNous/new-api)
+
 ## ✨ 主要特性

 New API提供了丰富的功能，详细特性请参考[特性说明](https://docs.newapi.pro/wiki/features-introduction)：
@@ -107,9 +110,10 @@ New API提供了丰富的功能，详细特性请参考[特性说明](https://do
 - `GEMINI_VISION_MAX_IMAGE_NUM`：Gemini模型最大图片数量，默认 `16`
 - `MAX_FILE_DOWNLOAD_MB`: 最大文件下载大小，单位MB，默认 `20`
 - `CRYPTO_SECRET`：加密密钥，用于加密数据库内容
- `AZURE_DEFAULT_API_VERSION`：Azure渠道默认API版本，默认 `2024-12-01-preview`
+- `AZURE_DEFAULT_API_VERSION`：Azure渠道默认API版本，默认 `2025-04-01-preview`
 - `NOTIFICATION_LIMIT_DURATION_MINUTE`：通知限制持续时间，默认 `10`分钟
 - `NOTIFY_LIMIT_COUNT`：用户通知在指定持续时间内的最大数量，默认 `2`
+- `ERROR_LOG_ENABLED=true`: 是否记录并显示错误日志，默认`false`

 ## 部署

@@ -117,7 +121,6 @@ New API提供了丰富的功能，详细特性请参考[特性说明](https://do

 > [!TIP]
 > 最新版Docker镜像：`calciumion/new-api:latest`  
-> 默认账号root 密码123456

 ### 多机部署注意事项
 - 必须设置环境变量 `SESSION_SECRET`，否则会导致多机部署时登录状态不一致
@@ -131,7 +134,7 @@ New API提供了丰富的功能，详细特性请参考[特性说明](https://do

 #### 使用宝塔面板Docker功能部署
 安装宝塔面板（**9.2.0版本**及以上），在应用商店中找到**New-API**安装即可。
-[图文教程](BT.md)
+[图文教程](./docs/BT.md)

 #### 使用Docker Compose部署（推荐）
 ```shell
@@ -1,8 +1,8 @@
 package common

 import (
-	"os"
-	"strconv"
+	//"os"
+	//"strconv"
 	"sync"
 	"time"

@@ -62,9 +62,13 @@ var EmailDomainWhitelist = []string{
 	"yahoo.com",
 	"foxmail.com",
 }
+var EmailLoginAuthServerList = []string{
+	"smtp.sendcloud.net",
+	"smtp.azurecomm.net",
+}

-var DebugEnabled = os.Getenv("DEBUG") == "true"
-var MemoryCacheEnabled = os.Getenv("MEMORY_CACHE_ENABLED") == "true"
+var DebugEnabled bool
+var MemoryCacheEnabled bool

 var LogConsumeEnabled = true

@@ -103,22 +107,22 @@ var RetryTimes = 0

 //var RootUserEmail = ""

-var IsMasterNode = os.Getenv("NODE_TYPE") != "slave"
+var IsMasterNode bool

-var requestInterval, _ = strconv.Atoi(os.Getenv("POLLING_INTERVAL"))
-var RequestInterval = time.Duration(requestInterval) * time.Second
+var requestInterval int
+var RequestInterval time.Duration

-var SyncFrequency = GetEnvOrDefault("SYNC_FREQUENCY", 60) // unit is second
+var SyncFrequency int // unit is second

 var BatchUpdateEnabled = false
-var BatchUpdateInterval = GetEnvOrDefault("BATCH_UPDATE_INTERVAL", 5)
+var BatchUpdateInterval int

-var RelayTimeout = GetEnvOrDefault("RELAY_TIMEOUT", 0) // unit is second
+var RelayTimeout int // unit is second

-var GeminiSafetySetting = GetEnvOrDefaultString("GEMINI_SAFETY_SETTING", "BLOCK_NONE")
+var GeminiSafetySetting string

 // https://docs.cohere.com/docs/safety-modes Type; NONE/CONTEXTUAL/STRICT
-var CohereSafetySetting = GetEnvOrDefaultString("COHERE_SAFETY_SETTING", "NONE")
+var CohereSafetySetting string

 const (
 	RequestIdKey = "X-Oneapi-Request-Id"
@@ -145,13 +149,13 @@ var (
 // All duration's unit is seconds
 // Shouldn't larger then RateLimitKeyExpirationDuration
 var (
-	GlobalApiRateLimitEnable   = GetEnvOrDefaultBool("GLOBAL_API_RATE_LIMIT_ENABLE", true)
-	GlobalApiRateLimitNum      = GetEnvOrDefault("GLOBAL_API_RATE_LIMIT", 180)
-	GlobalApiRateLimitDuration = int64(GetEnvOrDefault("GLOBAL_API_RATE_LIMIT_DURATION", 180))
+	GlobalApiRateLimitEnable   bool
+	GlobalApiRateLimitNum      int
+	GlobalApiRateLimitDuration int64

-	GlobalWebRateLimitEnable   = GetEnvOrDefaultBool("GLOBAL_WEB_RATE_LIMIT_ENABLE", true)
-	GlobalWebRateLimitNum      = GetEnvOrDefault("GLOBAL_WEB_RATE_LIMIT", 60)
-	GlobalWebRateLimitDuration = int64(GetEnvOrDefault("GLOBAL_WEB_RATE_LIMIT_DURATION", 180))
+	GlobalWebRateLimitEnable   bool
+	GlobalWebRateLimitNum      int
+	GlobalWebRateLimitDuration int64

 	UploadRateLimitNum            = 10
 	UploadRateLimitDuration int64 = 60
@@ -235,6 +239,8 @@ const (
 	ChannelTypeVolcEngine     = 45
 	ChannelTypeBaiduV2        = 46
 	ChannelTypeXinference     = 47
+	ChannelTypeXai            = 48
+	ChannelTypeCoze           = 49
 	ChannelTypeDummy          // this one is only for count, do not add any channel after this

 )
@@ -288,4 +294,6 @@ var ChannelBaseURLs = []string{
 	"https://ark.cn-beijing.volces.com",         //45
 	"https://qianfan.baidubce.com",              //46
 	"",                                          //47
+	"https://api.x.ai",                          //48
+	"https://api.coze.cn",                       //49
 }
@@ -5,6 +5,7 @@ import (
 	"encoding/base64"
 	"fmt"
 	"net/smtp"
+	"slices"
 	"strings"
 	"time"
 )
@@ -79,7 +80,7 @@ func SendEmail(subject string, receiver string, content string) error {
 		if err != nil {
 			return err
 		}
-	} else if isOutlookServer(SMTPAccount) || SMTPServer == "smtp.azurecomm.net" {
+	} else if isOutlookServer(SMTPAccount) || slices.Contains(EmailLoginAuthServerList, SMTPServer) {
 		auth = LoginAuth(SMTPAccount, SMTPToken)
 		err = smtp.SendMail(addr, auth, SMTPFrom, to, mail)
 	} else {
@@ -6,6 +6,8 @@ import (
 	"log"
 	"os"
 	"path/filepath"
+	"strconv"
+	"time"
 )

 var (
@@ -66,4 +68,31 @@ func LoadEnv() {
 			}
 		}
 	}
+
+	// Initialize variables from constants.go that were using environment variables
+	DebugEnabled = os.Getenv("DEBUG") == "true"
+	MemoryCacheEnabled = os.Getenv("MEMORY_CACHE_ENABLED") == "true"
+	IsMasterNode = os.Getenv("NODE_TYPE") != "slave"
+
+	// Parse requestInterval and set RequestInterval
+	requestInterval, _ = strconv.Atoi(os.Getenv("POLLING_INTERVAL"))
+	RequestInterval = time.Duration(requestInterval) * time.Second
+
+	// Initialize variables with GetEnvOrDefault
+	SyncFrequency = GetEnvOrDefault("SYNC_FREQUENCY", 60)
+	BatchUpdateInterval = GetEnvOrDefault("BATCH_UPDATE_INTERVAL", 5)
+	RelayTimeout = GetEnvOrDefault("RELAY_TIMEOUT", 0)
+
+	// Initialize string variables with GetEnvOrDefaultString
+	GeminiSafetySetting = GetEnvOrDefaultString("GEMINI_SAFETY_SETTING", "BLOCK_NONE")
+	CohereSafetySetting = GetEnvOrDefaultString("COHERE_SAFETY_SETTING", "NONE")
+
+	// Initialize rate limit variables
+	GlobalApiRateLimitEnable = GetEnvOrDefaultBool("GLOBAL_API_RATE_LIMIT_ENABLE", true)
+	GlobalApiRateLimitNum = GetEnvOrDefault("GLOBAL_API_RATE_LIMIT", 180)
+	GlobalApiRateLimitDuration = int64(GetEnvOrDefault("GLOBAL_API_RATE_LIMIT_DURATION", 180))
+
+	GlobalWebRateLimitEnable = GetEnvOrDefaultBool("GLOBAL_WEB_RATE_LIMIT_ENABLE", true)
+	GlobalWebRateLimitNum = GetEnvOrDefault("GLOBAL_WEB_RATE_LIMIT", 60)
+	GlobalWebRateLimitDuration = int64(GetEnvOrDefault("GLOBAL_WEB_RATE_LIMIT_DURATION", 180))
 }
@@ -12,3 +12,7 @@ func DecodeJson(data []byte, v any) error {
 func DecodeJsonStr(data string, v any) error {
 	return DecodeJson(StringToByteSlice(data), v)
 }
+
+func EncodeJson(v any) ([]byte, error) {
+	return json.Marshal(v)
+}
@@ -0,0 +1,89 @@
+package limiter
+
+import (
+	"context"
+	_ "embed"
+	"fmt"
+	"github.com/go-redis/redis/v8"
+	"one-api/common"
+	"sync"
+)
+
+//go:embed lua/rate_limit.lua
+var rateLimitScript string
+
+type RedisLimiter struct {
+	client         *redis.Client
+	limitScriptSHA string
+}
+
+var (
+	instance *RedisLimiter
+	once     sync.Once
+)
+
+func New(ctx context.Context, r *redis.Client) *RedisLimiter {
+	once.Do(func() {
+		// 预加载脚本
+		limitSHA, err := r.ScriptLoad(ctx, rateLimitScript).Result()
+		if err != nil {
+			common.SysLog(fmt.Sprintf("Failed to load rate limit script: %v", err))
+		}
+		instance = &RedisLimiter{
+			client:         r,
+			limitScriptSHA: limitSHA,
+		}
+	})
+
+	return instance
+}
+
+func (rl *RedisLimiter) Allow(ctx context.Context, key string, opts ...Option) (bool, error) {
+	// 默认配置
+	config := &Config{
+		Capacity:  10,
+		Rate:      1,
+		Requested: 1,
+	}
+
+	// 应用选项模式
+	for _, opt := range opts {
+		opt(config)
+	}
+
+	// 执行限流
+	result, err := rl.client.EvalSha(
+		ctx,
+		rl.limitScriptSHA,
+		[]string{key},
+		config.Requested,
+		config.Rate,
+		config.Capacity,
+	).Int()
+
+	if err != nil {
+		return false, fmt.Errorf("rate limit failed: %w", err)
+	}
+	return result == 1, nil
+}
+
+// Config 配置选项模式
+type Config struct {
+	Capacity  int64
+	Rate      int64
+	Requested int64
+}
+
+type Option func(*Config)
+
+func WithCapacity(c int64) Option {
+	return func(cfg *Config) { cfg.Capacity = c }
+}
+
+func WithRate(r int64) Option {
+	return func(cfg *Config) { cfg.Rate = r }
+}
+
+func WithRequested(n int64) Option {
+	return func(cfg *Config) { cfg.Requested = n }
+}
@@ -0,0 +1,44 @@
+-- 令牌桶限流器
+-- KEYS[1]: 限流器唯一标识
+-- ARGV[1]: 请求令牌数 (通常为1)
+-- ARGV[2]: 令牌生成速率 (每秒)
+-- ARGV[3]: 桶容量
+
+local key = KEYS[1]
+local requested = tonumber(ARGV[1])
+local rate = tonumber(ARGV[2])
+local capacity = tonumber(ARGV[3])
+
+-- 获取当前时间（Redis服务器时间）
+local now = redis.call('TIME')
+local nowInSeconds = tonumber(now[1])
+
+-- 获取桶状态
+local bucket = redis.call('HMGET', key, 'tokens', 'last_time')
+local tokens = tonumber(bucket[1])
+local last_time = tonumber(bucket[2])
+
+-- 初始化桶（首次请求或过期）
+if not tokens or not last_time then
+    tokens = capacity
+    last_time = nowInSeconds
+else
+    -- 计算新增令牌
+    local elapsed = nowInSeconds - last_time
+    local add_tokens = elapsed * rate
+    tokens = math.min(capacity, tokens + add_tokens)
+    last_time = nowInSeconds
+end
+
+-- 判断是否允许请求
+local allowed = false
+if tokens >= requested then
+    tokens = tokens - requested
+    allowed = true
+end
+
+---- 更新桶状态并设置过期时间
+redis.call('HMSET', key, 'tokens', tokens, 'last_time', last_time)
+--redis.call('EXPIRE', key, math.ceil(capacity / rate) + 60) -- 适当延长过期时间
+
+return allowed and 1 or 0
@@ -7,7 +7,6 @@ import (
 	"encoding/base64"
 	"encoding/json"
 	"fmt"
-	"github.com/pkg/errors"
 	"html/template"
 	"io"
 	"log"
@@ -22,6 +21,7 @@ import (
 	"time"

 	"github.com/google/uuid"
+	"github.com/pkg/errors"
 )

 func OpenBrowser(url string) {
@@ -0,0 +1,5 @@
+package constant
+
+import "time"
+
+var AzureNoRemoveDotTime = time.Date(2025, time.May, 10, 0, 0, 0, 0, time.UTC).Unix()
@@ -4,32 +4,42 @@ import (
 	"one-api/common"
 )

-var StreamingTimeout = common.GetEnvOrDefault("STREAMING_TIMEOUT", 60)
-var DifyDebug = common.GetEnvOrDefaultBool("DIFY_DEBUG", true)
-
-var MaxFileDownloadMB = common.GetEnvOrDefault("MAX_FILE_DOWNLOAD_MB", 20)
-
-// ForceStreamOption 覆盖请求参数，强制返回usage信息
-var ForceStreamOption = common.GetEnvOrDefaultBool("FORCE_STREAM_OPTION", true)
-
-var GetMediaToken = common.GetEnvOrDefaultBool("GET_MEDIA_TOKEN", true)
-
-var GetMediaTokenNotStream = common.GetEnvOrDefaultBool("GET_MEDIA_TOKEN_NOT_STREAM", true)
-
-var UpdateTask = common.GetEnvOrDefaultBool("UPDATE_TASK", true)
-
-var AzureDefaultAPIVersion = common.GetEnvOrDefaultString("AZURE_DEFAULT_API_VERSION", "2024-12-01-preview")
+var StreamingTimeout int
+var DifyDebug bool
+var MaxFileDownloadMB int
+var ForceStreamOption bool
+var GetMediaToken bool
+var GetMediaTokenNotStream bool
+var UpdateTask bool
+var AzureDefaultAPIVersion string
+var GeminiVisionMaxImageNum int
+var NotifyLimitCount int
+var NotificationLimitDurationMinute int
+var GenerateDefaultToken bool
+var ErrorLogEnabled bool

 //var GeminiModelMap = map[string]string{
 //	"gemini-1.0-pro": "v1",
 //}

-var GeminiVisionMaxImageNum = common.GetEnvOrDefault("GEMINI_VISION_MAX_IMAGE_NUM", 16)
-
-var NotifyLimitCount = common.GetEnvOrDefault("NOTIFY_LIMIT_COUNT", 2)
-var NotificationLimitDurationMinute = common.GetEnvOrDefault("NOTIFICATION_LIMIT_DURATION_MINUTE", 10)
-
 func InitEnv() {
+	StreamingTimeout = common.GetEnvOrDefault("STREAMING_TIMEOUT", 60)
+	DifyDebug = common.GetEnvOrDefaultBool("DIFY_DEBUG", true)
+	MaxFileDownloadMB = common.GetEnvOrDefault("MAX_FILE_DOWNLOAD_MB", 20)
+	// ForceStreamOption 覆盖请求参数，强制返回usage信息
+	ForceStreamOption = common.GetEnvOrDefaultBool("FORCE_STREAM_OPTION", true)
+	GetMediaToken = common.GetEnvOrDefaultBool("GET_MEDIA_TOKEN", true)
+	GetMediaTokenNotStream = common.GetEnvOrDefaultBool("GET_MEDIA_TOKEN_NOT_STREAM", true)
+	UpdateTask = common.GetEnvOrDefaultBool("UPDATE_TASK", true)
+	AzureDefaultAPIVersion = common.GetEnvOrDefaultString("AZURE_DEFAULT_API_VERSION", "2025-04-01-preview")
+	GeminiVisionMaxImageNum = common.GetEnvOrDefault("GEMINI_VISION_MAX_IMAGE_NUM", 16)
+	NotifyLimitCount = common.GetEnvOrDefault("NOTIFY_LIMIT_COUNT", 2)
+	NotificationLimitDurationMinute = common.GetEnvOrDefault("NOTIFICATION_LIMIT_DURATION_MINUTE", 10)
+	// GenerateDefaultToken 是否生成初始令牌，默认关闭。
+	GenerateDefaultToken = common.GetEnvOrDefaultBool("GENERATE_DEFAULT_TOKEN", false)
+	// 是否启用错误日志
+	ErrorLogEnabled = common.GetEnvOrDefaultBool("ERROR_LOG_ENABLED", false)
+
 	//modelVersionMapStr := strings.TrimSpace(os.Getenv("GEMINI_MODEL_MAP"))
 	//if modelVersionMapStr == "" {
 	//	return
@@ -43,6 +53,3 @@ func InitEnv() {
 	//	}
 	//}
 }
-
-// GenerateDefaultToken 是否生成初始令牌，默认关闭。
-var GenerateDefaultToken = common.GetEnvOrDefaultBool("GENERATE_DEFAULT_TOKEN", false)
@@ -0,0 +1,3 @@
+package constant
+
+var Setup = false
@@ -1,11 +1,12 @@
 package constant

 var (
-	UserSettingNotifyType            = "notify_type"             // QuotaWarningType 额度预警类型
-	UserSettingQuotaWarningThreshold = "quota_warning_threshold" // QuotaWarningThreshold 额度预警阈值
-	UserSettingWebhookUrl            = "webhook_url"             // WebhookUrl webhook地址
-	UserSettingWebhookSecret         = "webhook_secret"          // WebhookSecret webhook密钥
-	UserSettingNotificationEmail     = "notification_email"      // NotificationEmail 通知邮箱地址
+	UserSettingNotifyType            = "notify_type"                    // QuotaWarningType 额度预警类型
+	UserSettingQuotaWarningThreshold = "quota_warning_threshold"        // QuotaWarningThreshold 额度预警阈值
+	UserSettingWebhookUrl            = "webhook_url"                    // WebhookUrl webhook地址
+	UserSettingWebhookSecret         = "webhook_secret"                 // WebhookSecret webhook密钥
+	UserSettingNotificationEmail     = "notification_email"             // NotificationEmail 通知邮箱地址
+	UserAcceptUnsetRatioModel        = "accept_unset_model_ratio_model" // AcceptUnsetRatioModel 是否接受未设置价格的模型
 )

 var (
@@ -108,6 +108,13 @@ type DeepSeekUsageResponse struct {
 	} `json:"balance_infos"`
 }

+type OpenRouterCreditResponse struct {
+	Data struct {
+		TotalCredits float64 `json:"total_credits"`
+		TotalUsage   float64 `json:"total_usage"`
+	} `json:"data"`
+}
+
 // GetAuthHeader get auth header
 func GetAuthHeader(token string) http.Header {
 	h := http.Header{}
@@ -281,6 +288,22 @@ func updateChannelAIGC2DBalance(channel *model.Channel) (float64, error) {
 	return response.TotalAvailable, nil
 }

+func updateChannelOpenRouterBalance(channel *model.Channel) (float64, error) {
+	url := "https://openrouter.ai/api/v1/credits"
+	body, err := GetResponseBody("GET", url, channel, GetAuthHeader(channel.Key))
+	if err != nil {
+		return 0, err
+	}
+	response := OpenRouterCreditResponse{}
+	err = json.Unmarshal(body, &response)
+	if err != nil {
+		return 0, err
+	}
+	balance := response.Data.TotalCredits - response.Data.TotalUsage
+	channel.UpdateBalance(balance)
+	return balance, nil
+}
+
 func updateChannelBalance(channel *model.Channel) (float64, error) {
 	baseURL := common.ChannelBaseURLs[channel.Type]
 	if channel.GetBaseURL() == "" {
@@ -307,6 +330,8 @@ func updateChannelBalance(channel *model.Channel) (float64, error) {
 		return updateChannelSiliconFlowBalance(channel)
 	case common.ChannelTypeDeepSeek:
 		return updateChannelDeepSeekBalance(channel)
+	case common.ChannelTypeOpenRouter:
+		return updateChannelOpenRouterBalance(channel)
 	default:
 		return 0, errors.New("尚未实现")
 	}
@@ -103,7 +103,15 @@ func testChannel(channel *model.Channel, testModel string) (err error, openAIErr
 	}

 	request := buildTestRequest(testModel)
-	common.SysLog(fmt.Sprintf("testing channel %d with model %s , info %v ", channel.Id, testModel, info))
+	// 创建一个用于日志的 info 副本，移除 ApiKey
+	logInfo := *info
+	logInfo.ApiKey = ""
+	common.SysLog(fmt.Sprintf("testing channel %d with model %s , info %+v ", channel.Id, testModel, logInfo))
+
+	priceData, err := helper.ModelPriceHelper(c, info, 0, int(request.MaxTokens))
+	if err != nil {
+		return err, nil
+	}

 	adaptor.Init(info)

@@ -143,10 +151,7 @@ func testChannel(channel *model.Channel, testModel string) (err error, openAIErr
 		return err, nil
 	}
 	info.PromptTokens = usage.PromptTokens
-	priceData, err := helper.ModelPriceHelper(c, info, usage.PromptTokens, int(request.MaxTokens))
-	if err != nil {
-		return err, nil
-	}
+
 	quota := 0
 	if !priceData.UsePrice {
 		quota = usage.PromptTokens + int(math.Round(float64(usage.CompletionTokens)*priceData.CompletionRatio))
@@ -184,10 +189,14 @@ func buildTestRequest(model string) *dto.GeneralOpenAIRequest {
 		return testRequest
 	}
 	// 并非Embedding 模型
-	if strings.HasPrefix(model, "o1") || strings.HasPrefix(model, "o3") {
+	if strings.HasPrefix(model, "o") {
 		testRequest.MaxCompletionTokens = 10
 	} else if strings.Contains(model, "thinking") {
-		testRequest.MaxTokens = 50
+		if !strings.Contains(model, "claude") {
+			testRequest.MaxTokens = 50
+		}
+	} else if strings.Contains(model, "gemini") {
+		testRequest.MaxTokens = 300
 	} else {
 		testRequest.MaxTokens = 10
 	}
@@ -119,6 +119,12 @@ func FetchUpstreamModels(c *gin.Context) {
 		baseURL = channel.GetBaseURL()
 	}
 	url := fmt.Sprintf("%s/v1/models", baseURL)
+	switch channel.Type {
+	case common.ChannelTypeGemini:
+		url = fmt.Sprintf("%s/v1beta/openai/models", baseURL)
+	case common.ChannelTypeAli:
+		url = fmt.Sprintf("%s/compatible-mode/v1/models", baseURL)
+	}
 	body, err := GetResponseBody("GET", url, channel, GetAuthHeader(channel.Key))
 	if err != nil {
 		c.JSON(http.StatusOK, gin.H{
@@ -139,7 +145,11 @@ func FetchUpstreamModels(c *gin.Context) {

 	var ids []string
 	for _, model := range result.Data {
-		ids = append(ids, model.ID)
+		id := model.ID
+		if channel.Type == common.ChannelTypeGemini {
+			id = strings.TrimPrefix(id, "models/")
+		}
+		ids = append(ids, id)
 	}

 	c.JSON(http.StatusOK, gin.H{
@@ -196,7 +196,7 @@ func DeleteHistoryLogs(c *gin.Context) {
 		})
 		return
 	}
-	count, err := model.DeleteOldLog(targetTimestamp)
+	count, err := model.DeleteOldLog(c.Request.Context(), targetTimestamp, 100)
 	if err != nil {
 		c.JSON(http.StatusOK, gin.H{
 			"success": false,
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"net/http"
 	"one-api/common"
+	"one-api/constant"
 	"one-api/model"
 	"one-api/setting"
 	"one-api/setting/operation_setting"
@@ -72,6 +73,7 @@ func GetStatus(c *gin.Context) {
 			"oidc_enabled":                system_setting.GetOIDCSettings().Enabled,
 			"oidc_client_id":              system_setting.GetOIDCSettings().ClientId,
 			"oidc_authorization_endpoint": system_setting.GetOIDCSettings().AuthorizationEndpoint,
+			"setup":                       constant.Setup,
 		},
 	})
 	return
@@ -53,11 +53,12 @@ func UpdateOption(c *gin.Context) {
 			return
 		}
 	case "oidc.enabled":
-		if option.Value == "true" && system_setting.GetOIDCSettings().Enabled {
+		if option.Value == "true" && system_setting.GetOIDCSettings().ClientId == "" {
 			c.JSON(http.StatusOK, gin.H{
 				"success": false,
 				"message": "无法启用 OIDC 登录，请先填入 OIDC Client Id 以及 OIDC Client Secret！",
 			})
+			return
 		}
 	case "LinuxDOOAuthEnabled":
 		if option.Value == "true" && common.LinuxDOClientId == "" {
@@ -89,6 +90,15 @@ func UpdateOption(c *gin.Context) {
 				"success": false,
 				"message": "无法启用 Turnstile 校验，请先填入 Turnstile 校验相关配置信息！",
 			})
+
+			return
+		}
+	case "TelegramOAuthEnabled":
+		if option.Value == "true" && common.TelegramBotToken == "" {
+			c.JSON(http.StatusOK, gin.H{
+				"success": false,
+				"message": "无法启用 Telegram OAuth，请先填入 Telegram Bot Token！",
+			})
 			return
 		}
 	case "GroupRatio":
@@ -100,6 +110,16 @@ func UpdateOption(c *gin.Context) {
 			})
 			return
 		}
+	case "ModelRequestRateLimitGroup":
+		err = setting.CheckModelRequestRateLimitGroup(option.Value)
+		if err != nil {
+			c.JSON(http.StatusOK, gin.H{
+				"success": false,
+				"message": err.Error(),
+			})
+			return
+		}
+
 	}
 	err = model.UpdateOption(option.Key, option.Value)
 	if err != nil {
@@ -4,12 +4,11 @@ import (
 	"bytes"
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
-	"github.com/gorilla/websocket"
 	"io"
 	"log"
 	"net/http"
 	"one-api/common"
+	constant2 "one-api/constant"
 	"one-api/dto"
 	"one-api/middleware"
 	"one-api/model"
@@ -19,12 +18,15 @@ import (
 	"one-api/relay/helper"
 	"one-api/service"
 	"strings"
+
+	"github.com/gin-gonic/gin"
+	"github.com/gorilla/websocket"
 )

 func relayHandler(c *gin.Context, relayMode int) *dto.OpenAIErrorWithStatusCode {
 	var err *dto.OpenAIErrorWithStatusCode
 	switch relayMode {
-	case relayconstant.RelayModeImagesGenerations:
+	case relayconstant.RelayModeImagesGenerations, relayconstant.RelayModeImagesEdits:
 		err = relay.ImageHelper(c)
 	case relayconstant.RelayModeAudioSpeech:
 		fallthrough
@@ -36,9 +38,33 @@ func relayHandler(c *gin.Context, relayMode int) *dto.OpenAIErrorWithStatusCode
 		err = relay.RerankHelper(c, relayMode)
 	case relayconstant.RelayModeEmbeddings:
 		err = relay.EmbeddingHelper(c)
+	case relayconstant.RelayModeResponses:
+		err = relay.ResponsesHelper(c)
+	case relayconstant.RelayModeGemini:
+		err = relay.GeminiHelper(c)
 	default:
 		err = relay.TextHelper(c)
 	}
+
+	if constant2.ErrorLogEnabled && err != nil {
+		// 保存错误日志到mysql中
+		userId := c.GetInt("id")
+		tokenName := c.GetString("token_name")
+		modelName := c.GetString("original_model")
+		tokenId := c.GetInt("token_id")
+		userGroup := c.GetString("group")
+		channelId := c.GetInt("channel_id")
+		other := make(map[string]interface{})
+		other["error_type"] = err.Error.Type
+		other["error_code"] = err.Error.Code
+		other["status_code"] = err.StatusCode
+		other["channel_id"] = channelId
+		other["channel_name"] = c.GetString("channel_name")
+		other["channel_type"] = c.GetInt("channel_type")
+
+		model.RecordErrorLog(c, userId, channelId, modelName, tokenName, err.Error.Message, tokenId, 0, false, userGroup, other)
+	}
+
 	return err
 }

@@ -0,0 +1,173 @@
+package controller
+
+import (
+	"github.com/gin-gonic/gin"
+	"one-api/common"
+	"one-api/constant"
+	"one-api/model"
+	"one-api/setting/operation_setting"
+	"time"
+)
+
+type Setup struct {
+	Status       bool   `json:"status"`
+	RootInit     bool   `json:"root_init"`
+	DatabaseType string `json:"database_type"`
+}
+
+type SetupRequest struct {
+	Username           string `json:"username"`
+	Password           string `json:"password"`
+	ConfirmPassword    string `json:"confirmPassword"`
+	SelfUseModeEnabled bool   `json:"SelfUseModeEnabled"`
+	DemoSiteEnabled    bool   `json:"DemoSiteEnabled"`
+}
+
+func GetSetup(c *gin.Context) {
+	setup := Setup{
+		Status: constant.Setup,
+	}
+	if constant.Setup {
+		c.JSON(200, gin.H{
+			"success": true,
+			"data":    setup,
+		})
+		return
+	}
+	setup.RootInit = model.RootUserExists()
+	if common.UsingMySQL {
+		setup.DatabaseType = "mysql"
+	}
+	if common.UsingPostgreSQL {
+		setup.DatabaseType = "postgres"
+	}
+	if common.UsingSQLite {
+		setup.DatabaseType = "sqlite"
+	}
+	c.JSON(200, gin.H{
+		"success": true,
+		"data":    setup,
+	})
+}
+
+func PostSetup(c *gin.Context) {
+	// Check if setup is already completed
+	if constant.Setup {
+		c.JSON(400, gin.H{
+			"success": false,
+			"message": "系统已经初始化完成",
+		})
+		return
+	}
+
+	// Check if root user already exists
+	rootExists := model.RootUserExists()
+
+	var req SetupRequest
+	err := c.ShouldBindJSON(&req)
+	if err != nil {
+		c.JSON(400, gin.H{
+			"success": false,
+			"message": "请求参数有误",
+		})
+		return
+	}
+
+	// If root doesn't exist, validate and create admin account
+	if !rootExists {
+		// Validate password
+		if req.Password != req.ConfirmPassword {
+			c.JSON(400, gin.H{
+				"success": false,
+				"message": "两次输入的密码不一致",
+			})
+			return
+		}
+
+		if len(req.Password) < 8 {
+			c.JSON(400, gin.H{
+				"success": false,
+				"message": "密码长度至少为8个字符",
+			})
+			return
+		}
+
+		// Create root user
+		hashedPassword, err := common.Password2Hash(req.Password)
+		if err != nil {
+			c.JSON(500, gin.H{
+				"success": false,
+				"message": "系统错误: " + err.Error(),
+			})
+			return
+		}
+		rootUser := model.User{
+			Username:    req.Username,
+			Password:    hashedPassword,
+			Role:        common.RoleRootUser,
+			Status:      common.UserStatusEnabled,
+			DisplayName: "Root User",
+			AccessToken: nil,
+			Quota:       100000000,
+		}
+		err = model.DB.Create(&rootUser).Error
+		if err != nil {
+			c.JSON(500, gin.H{
+				"success": false,
+				"message": "创建管理员账号失败: " + err.Error(),
+			})
+			return
+		}
+	}
+
+	// Set operation modes
+	operation_setting.SelfUseModeEnabled = req.SelfUseModeEnabled
+	operation_setting.DemoSiteEnabled = req.DemoSiteEnabled
+
+	// Save operation modes to database for persistence
+	err = model.UpdateOption("SelfUseModeEnabled", boolToString(req.SelfUseModeEnabled))
+	if err != nil {
+		c.JSON(500, gin.H{
+			"success": false,
+			"message": "保存自用模式设置失败: " + err.Error(),
+		})
+		return
+	}
+
+	err = model.UpdateOption("DemoSiteEnabled", boolToString(req.DemoSiteEnabled))
+	if err != nil {
+		c.JSON(500, gin.H{
+			"success": false,
+			"message": "保存演示站点模式设置失败: " + err.Error(),
+		})
+		return
+	}
+
+	// Update setup status
+	constant.Setup = true
+
+	setup := model.Setup{
+		Version:       common.Version,
+		InitializedAt: time.Now().Unix(),
+	}
+	err = model.DB.Create(&setup).Error
+	if err != nil {
+		c.JSON(500, gin.H{
+			"success": false,
+			"message": "系统初始化失败: " + err.Error(),
+		})
+		return
+	}
+
+	c.JSON(200, gin.H{
+		"success": true,
+		"message": "系统初始化成功",
+	})
+}
+
+func boolToString(b bool) string {
+	if b {
+		return "true"
+	}
+	return "false"
+}
@@ -592,7 +592,14 @@ func UpdateSelf(c *gin.Context) {
 		user.Password = "" // rollback to what it should be
 		cleanUser.Password = ""
 	}
-	updatePassword := user.Password != ""
+	updatePassword, err := checkUpdatePassword(user.OriginalPassword, user.Password, cleanUser.Id)
+	if err != nil {
+		c.JSON(http.StatusOK, gin.H{
+			"success": false,
+			"message": err.Error(),
+		})
+		return
+	}
 	if err := cleanUser.Update(updatePassword); err != nil {
 		c.JSON(http.StatusOK, gin.H{
 			"success": false,
@@ -608,6 +615,23 @@ func UpdateSelf(c *gin.Context) {
 	return
 }

+func checkUpdatePassword(originalPassword string, newPassword string, userId int) (updatePassword bool, err error) {
+	var currentUser *model.User
+	currentUser, err = model.GetUserById(userId, true)
+	if err != nil {
+		return
+	}
+	if !common.ValidatePasswordAndHash(originalPassword, currentUser.Password) {
+		err = fmt.Errorf("原密码错误")
+		return
+	}
+	if newPassword == "" {
+		return
+	}
+	updatePassword = true
+	return
+}
+
 func DeleteUser(c *gin.Context) {
 	id, err := strconv.Atoi(c.Param("id"))
 	if err != nil {
@@ -913,11 +937,12 @@ func TopUp(c *gin.Context) {
 }

 type UpdateUserSettingRequest struct {
-	QuotaWarningType      string  `json:"notify_type"`
-	QuotaWarningThreshold float64 `json:"quota_warning_threshold"`
-	WebhookUrl            string  `json:"webhook_url,omitempty"`
-	WebhookSecret         string  `json:"webhook_secret,omitempty"`
-	NotificationEmail     string  `json:"notification_email,omitempty"`
+	QuotaWarningType           string  `json:"notify_type"`
+	QuotaWarningThreshold      float64 `json:"quota_warning_threshold"`
+	WebhookUrl                 string  `json:"webhook_url,omitempty"`
+	WebhookSecret              string  `json:"webhook_secret,omitempty"`
+	NotificationEmail          string  `json:"notification_email,omitempty"`
+	AcceptUnsetModelRatioModel bool    `json:"accept_unset_model_ratio_model"`
 }

 func UpdateUserSetting(c *gin.Context) {
@@ -993,6 +1018,7 @@ func UpdateUserSetting(c *gin.Context) {
 	settings := map[string]interface{}{
 		constant.UserSettingNotifyType:            req.QuotaWarningType,
 		constant.UserSettingQuotaWarningThreshold: req.QuotaWarningThreshold,
+		"accept_unset_model_ratio_model":          req.AcceptUnsetModelRatioModel,
 	}

 	// 如果是webhook类型,添加webhook相关设置
@@ -15,6 +15,8 @@ services:
      - SQL_DSN=root:123456@tcp(mysql:3306)/new-api  # Point to the mysql service
      - REDIS_CONN_STRING=redis://redis
      - TZ=Asia/Shanghai
+      - ERROR_LOG_ENABLED=true # 是否启用错误日志记录
+    #      - TIKTOKEN_CACHE_DIR=./tiktoken_cache  # 如果需要使用tiktoken_cache，请取消注释
    #      - SESSION_SECRET=random_string  # 多机部署时设置，必须修改这个随机字符串！！！！！！！
    #      - NODE_TYPE=slave  # Uncomment for slave node in multi-node deployment
    #      - SYNC_FREQUENCY=60  # Uncomment if regular database syncing is needed
@@ -11,7 +11,7 @@
    - 类型为字符串，填写代理地址（例如 socks5 协议的代理地址）

 3. thinking_to_content
-   - 用于标识是否将思考内容`reasoning_conetnt`转换为`<think>`标签拼接到内容中返回
+   - 用于标识是否将思考内容`reasoning_content`转换为`<think>`标签拼接到内容中返回
   - 类型为布尔值，设置为 true 时启用思考内容转换

 --------------------------------------------------------------
@@ -30,4 +30,4 @@

 --------------------------------------------------------------

-通过调整上述 JSON 配置中的值，可以灵活控制渠道的额外行为，比如是否进行格式化以及使用特定的网络代理。
+通过调整上述 JSON 配置中的值，可以灵活控制渠道的额外行为，比如是否进行格式化以及使用特定的网络代理。
@@ -1,3 +1,3 @@
-密钥为环境变量SESSION_SECRET
-
-![8285bba413e770fe9620f1bf9b40d44e](https://github.com/user-attachments/assets/7a6fc03e-c457-45e4-b8f9-184508fc26b0)
+密钥为环境变量SESSION_SECRET
+
+![8285bba413e770fe9620f1bf9b40d44e](https://github.com/user-attachments/assets/7a6fc03e-c457-45e4-b8f9-184508fc26b0)
@@ -7,17 +7,18 @@ type ClaudeMetadata struct {
 }

 type ClaudeMediaMessage struct {
-	Type        string               `json:"type"`
-	Text        *string              `json:"text,omitempty"`
-	Model       string               `json:"model,omitempty"`
-	Source      *ClaudeMessageSource `json:"source,omitempty"`
-	Usage       *ClaudeUsage         `json:"usage,omitempty"`
-	StopReason  *string              `json:"stop_reason,omitempty"`
-	PartialJson *string              `json:"partial_json,omitempty"`
-	Role        string               `json:"role,omitempty"`
-	Thinking    string               `json:"thinking,omitempty"`
-	Signature   string               `json:"signature,omitempty"`
-	Delta       string               `json:"delta,omitempty"`
+	Type         string               `json:"type,omitempty"`
+	Text         *string              `json:"text,omitempty"`
+	Model        string               `json:"model,omitempty"`
+	Source       *ClaudeMessageSource `json:"source,omitempty"`
+	Usage        *ClaudeUsage         `json:"usage,omitempty"`
+	StopReason   *string              `json:"stop_reason,omitempty"`
+	PartialJson  *string              `json:"partial_json,omitempty"`
+	Role         string               `json:"role,omitempty"`
+	Thinking     string               `json:"thinking,omitempty"`
+	Signature    string               `json:"signature,omitempty"`
+	Delta        string               `json:"delta,omitempty"`
+	CacheControl json.RawMessage      `json:"cache_control,omitempty"`
 	// tool_calls
 	Id        string          `json:"id,omitempty"`
 	Name      string          `json:"name,omitempty"`
@@ -50,6 +51,11 @@ func (c *ClaudeMediaMessage) GetStringContent() string {
 	return ""
 }

+func (c *ClaudeMediaMessage) GetJsonRowString() string {
+	jsonContent, _ := json.Marshal(c)
+	return string(jsonContent)
+}
+
 func (c *ClaudeMediaMessage) SetContent(content any) {
 	jsonContent, _ := json.Marshal(content)
 	c.Content = jsonContent
@@ -65,8 +71,9 @@ func (c *ClaudeMediaMessage) ParseMediaContent() []ClaudeMediaMessage {

 type ClaudeMessageSource struct {
 	Type      string `json:"type"`
-	MediaType string `json:"media_type"`
-	Data      any    `json:"data"`
+	MediaType string `json:"media_type,omitempty"`
+	Data      any    `json:"data,omitempty"`
+	Url       string `json:"url,omitempty"`
 }

 type ClaudeMessage struct {
@@ -1,14 +1,20 @@
 package dto

+import "encoding/json"
+
 type ImageRequest struct {
-	Model          string `json:"model"`
-	Prompt         string `json:"prompt" binding:"required"`
-	N              int    `json:"n,omitempty"`
-	Size           string `json:"size,omitempty"`
-	Quality        string `json:"quality,omitempty"`
-	ResponseFormat string `json:"response_format,omitempty"`
-	Style          string `json:"style,omitempty"`
-	User           string `json:"user,omitempty"`
+	Model          string          `json:"model"`
+	Prompt         string          `json:"prompt" binding:"required"`
+	N              int             `json:"n,omitempty"`
+	Size           string          `json:"size,omitempty"`
+	Quality        string          `json:"quality,omitempty"`
+	ResponseFormat string          `json:"response_format,omitempty"`
+	Style          string          `json:"style,omitempty"`
+	User           string          `json:"user,omitempty"`
+	ExtraFields    json.RawMessage `json:"extra_fields,omitempty"`
+	Background     string          `json:"background,omitempty"`
+	Moderation     string          `json:"moderation,omitempty"`
+	OutputFormat   string          `json:"output_format,omitempty"`
 }

 type ImageResponse struct {
@@ -2,6 +2,7 @@ package dto

 import (
 	"encoding/json"
+	"one-api/common"
 	"strings"
 )

@@ -18,39 +19,51 @@ type FormatJsonSchema struct {
 }

 type GeneralOpenAIRequest struct {
-	Model               string            `json:"model,omitempty"`
-	Messages            []Message         `json:"messages,omitempty"`
-	Prompt              any               `json:"prompt,omitempty"`
-	Prefix              any               `json:"prefix,omitempty"`
-	Suffix              any               `json:"suffix,omitempty"`
-	Stream              bool              `json:"stream,omitempty"`
-	StreamOptions       *StreamOptions    `json:"stream_options,omitempty"`
-	MaxTokens           uint              `json:"max_tokens,omitempty"`
-	MaxCompletionTokens uint              `json:"max_completion_tokens,omitempty"`
-	ReasoningEffort     string            `json:"reasoning_effort,omitempty"`
-	Temperature         *float64          `json:"temperature,omitempty"`
-	TopP                float64           `json:"top_p,omitempty"`
-	TopK                int               `json:"top_k,omitempty"`
-	Stop                any               `json:"stop,omitempty"`
-	N                   int               `json:"n,omitempty"`
-	Input               any               `json:"input,omitempty"`
-	Instruction         string            `json:"instruction,omitempty"`
-	Size                string            `json:"size,omitempty"`
-	Functions           any               `json:"functions,omitempty"`
-	FrequencyPenalty    float64           `json:"frequency_penalty,omitempty"`
-	PresencePenalty     float64           `json:"presence_penalty,omitempty"`
-	ResponseFormat      *ResponseFormat   `json:"response_format,omitempty"`
-	EncodingFormat      any               `json:"encoding_format,omitempty"`
-	Seed                float64           `json:"seed,omitempty"`
-	Tools               []ToolCallRequest `json:"tools,omitempty"`
-	ToolChoice          any               `json:"tool_choice,omitempty"`
-	User                string            `json:"user,omitempty"`
-	LogProbs            bool              `json:"logprobs,omitempty"`
-	TopLogProbs         int               `json:"top_logprobs,omitempty"`
-	Dimensions          int               `json:"dimensions,omitempty"`
-	Modalities          any               `json:"modalities,omitempty"`
-	Audio               any               `json:"audio,omitempty"`
-	ExtraBody           any               `json:"extra_body,omitempty"`
+	Model               string         `json:"model,omitempty"`
+	Messages            []Message      `json:"messages,omitempty"`
+	Prompt              any            `json:"prompt,omitempty"`
+	Prefix              any            `json:"prefix,omitempty"`
+	Suffix              any            `json:"suffix,omitempty"`
+	Stream              bool           `json:"stream,omitempty"`
+	StreamOptions       *StreamOptions `json:"stream_options,omitempty"`
+	MaxTokens           uint           `json:"max_tokens,omitempty"`
+	MaxCompletionTokens uint           `json:"max_completion_tokens,omitempty"`
+	ReasoningEffort     string         `json:"reasoning_effort,omitempty"`
+	Temperature      *float64          `json:"temperature,omitempty"`
+	TopP             float64           `json:"top_p,omitempty"`
+	TopK             int               `json:"top_k,omitempty"`
+	Stop             any               `json:"stop,omitempty"`
+	N                int               `json:"n,omitempty"`
+	Input            any               `json:"input,omitempty"`
+	Instruction      string            `json:"instruction,omitempty"`
+	Size             string            `json:"size,omitempty"`
+	Functions        any               `json:"functions,omitempty"`
+	FrequencyPenalty float64           `json:"frequency_penalty,omitempty"`
+	PresencePenalty  float64           `json:"presence_penalty,omitempty"`
+	ResponseFormat   *ResponseFormat   `json:"response_format,omitempty"`
+	EncodingFormat   any               `json:"encoding_format,omitempty"`
+	Seed             float64           `json:"seed,omitempty"`
+	ParallelTooCalls *bool             `json:"parallel_tool_calls,omitempty"`
+	Tools            []ToolCallRequest `json:"tools,omitempty"`
+	ToolChoice       any               `json:"tool_choice,omitempty"`
+	User             string            `json:"user,omitempty"`
+	LogProbs         bool              `json:"logprobs,omitempty"`
+	TopLogProbs      int               `json:"top_logprobs,omitempty"`
+	Dimensions       int               `json:"dimensions,omitempty"`
+	Modalities       any               `json:"modalities,omitempty"`
+	Audio            any               `json:"audio,omitempty"`
+	EnableThinking   any               `json:"enable_thinking,omitempty"` // ali
+	ExtraBody        any               `json:"extra_body,omitempty"`
+	WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
+  // OpenRouter Params
+	Reasoning json.RawMessage `json:"reasoning,omitempty"`
+}
+
+func (r *GeneralOpenAIRequest) ToMap() map[string]any {
+	result := make(map[string]any)
+	data, _ := common.EncodeJson(r)
+	_ = common.DecodeJson(data, &result)
+	return result
 }

 type ToolCallRequest struct {
@@ -70,11 +83,11 @@ type StreamOptions struct {
 	IncludeUsage bool `json:"include_usage,omitempty"`
 }

-func (r GeneralOpenAIRequest) GetMaxTokens() int {
+func (r *GeneralOpenAIRequest) GetMaxTokens() int {
 	return int(r.MaxTokens)
 }

-func (r GeneralOpenAIRequest) ParseInput() []string {
+func (r *GeneralOpenAIRequest) ParseInput() []string {
 	if r.Input == nil {
 		return nil
 	}
@@ -111,6 +124,10 @@ type MediaContent struct {
 	Text       string `json:"text,omitempty"`
 	ImageUrl   any    `json:"image_url,omitempty"`
 	InputAudio any    `json:"input_audio,omitempty"`
+	File       any    `json:"file,omitempty"`
+	VideoUrl   any    `json:"video_url,omitempty"`
+	// OpenRouter Params
+	CacheControl json.RawMessage `json:"cache_control,omitempty"`
 }

 func (m *MediaContent) GetImageMedia() *MessageImageUrl {
@@ -120,6 +137,20 @@ func (m *MediaContent) GetImageMedia() *MessageImageUrl {
 	return nil
 }

+func (m *MediaContent) GetInputAudio() *MessageInputAudio {
+	if m.InputAudio != nil {
+		return m.InputAudio.(*MessageInputAudio)
+	}
+	return nil
+}
+
+func (m *MediaContent) GetFile() *MessageFile {
+	if m.File != nil {
+		return m.File.(*MessageFile)
+	}
+	return nil
+}
+
 type MessageImageUrl struct {
 	Url      string `json:"url"`
 	Detail   string `json:"detail"`
@@ -135,10 +166,22 @@ type MessageInputAudio struct {
 	Format string `json:"format"`
 }

+type MessageFile struct {
+	FileName string `json:"filename,omitempty"`
+	FileData string `json:"file_data,omitempty"`
+	FileId   string `json:"file_id,omitempty"`
+}
+
+type MessageVideoUrl struct {
+	Url string `json:"url"`
+}
+
 const (
 	ContentTypeText       = "text"
 	ContentTypeImageURL   = "image_url"
 	ContentTypeInputAudio = "input_audio"
+	ContentTypeFile       = "file"
+	ContentTypeVideoUrl   = "video_url" // 阿里百炼视频识别
 )

 func (m *Message) GetPrefix() bool {
@@ -192,6 +235,12 @@ func (m *Message) StringContent() string {
 	return stringContent
 }

+func (m *Message) SetNullContent() {
+	m.Content = nil
+	m.parsedStringContent = nil
+	m.parsedContent = nil
+}
+
 func (m *Message) SetStringContent(content string) {
 	jsonContent, _ := json.Marshal(content)
 	m.Content = jsonContent
@@ -292,6 +341,39 @@ func (m *Message) ParseContent() []MediaContent {
 						})
 					}
 				}
+			case ContentTypeFile:
+				if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
+					fileId, ok3 := fileData["file_id"].(string)
+					if ok3 {
+						contentList = append(contentList, MediaContent{
+							Type: ContentTypeFile,
+							File: &MessageFile{
+								FileId: fileId,
+							},
+						})
+					} else {
+						fileName, ok1 := fileData["filename"].(string)
+						fileDataStr, ok2 := fileData["file_data"].(string)
+						if ok1 && ok2 {
+							contentList = append(contentList, MediaContent{
+								Type: ContentTypeFile,
+								File: &MessageFile{
+									FileName: fileName,
+									FileData: fileDataStr,
+								},
+							})
+						}
+					}
+				}
+			case ContentTypeVideoUrl:
+				if videoUrl, ok := contentItem["video_url"].(string); ok {
+					contentList = append(contentList, MediaContent{
+						Type: ContentTypeVideoUrl,
+						VideoUrl: &MessageVideoUrl{
+							Url: videoUrl,
+						},
+					})
+				}
 			}
 		}
 	}
@@ -301,3 +383,54 @@ func (m *Message) ParseContent() []MediaContent {
 	}
 	return contentList
 }
+
+type WebSearchOptions struct {
+	SearchContextSize string          `json:"search_context_size,omitempty"`
+	UserLocation      json.RawMessage `json:"user_location,omitempty"`
+}
+
+type OpenAIResponsesRequest struct {
+	Model              string               `json:"model"`
+	Input              json.RawMessage      `json:"input,omitempty"`
+	Include            json.RawMessage      `json:"include,omitempty"`
+	Instructions       json.RawMessage      `json:"instructions,omitempty"`
+	MaxOutputTokens    uint                 `json:"max_output_tokens,omitempty"`
+	Metadata           json.RawMessage      `json:"metadata,omitempty"`
+	ParallelToolCalls  bool                 `json:"parallel_tool_calls,omitempty"`
+	PreviousResponseID string               `json:"previous_response_id,omitempty"`
+	Reasoning          *Reasoning           `json:"reasoning,omitempty"`
+	ServiceTier        string               `json:"service_tier,omitempty"`
+	Store              bool                 `json:"store,omitempty"`
+	Stream             bool                 `json:"stream,omitempty"`
+	Temperature        float64              `json:"temperature,omitempty"`
+	Text               json.RawMessage      `json:"text,omitempty"`
+	ToolChoice         json.RawMessage      `json:"tool_choice,omitempty"`
+	Tools              []ResponsesToolsCall `json:"tools,omitempty"`
+	TopP               float64              `json:"top_p,omitempty"`
+	Truncation         string               `json:"truncation,omitempty"`
+	User               string               `json:"user,omitempty"`
+}
+
+type Reasoning struct {
+	Effort  string `json:"effort,omitempty"`
+	Summary string `json:"summary,omitempty"`
+}
+
+type ResponsesToolsCall struct {
+	Type string `json:"type"`
+	// Web Search
+	UserLocation      json.RawMessage `json:"user_location,omitempty"`
+	SearchContextSize string          `json:"search_context_size,omitempty"`
+	// File Search
+	VectorStoreIds []string        `json:"vector_store_ids,omitempty"`
+	MaxNumResults  uint            `json:"max_num_results,omitempty"`
+	Filters        json.RawMessage `json:"filters,omitempty"`
+	// Computer Use
+	DisplayWidth  uint   `json:"display_width,omitempty"`
+	DisplayHeight uint   `json:"display_height,omitempty"`
+	Environment   string `json:"environment,omitempty"`
+	// Function
+	Name        string          `json:"name,omitempty"`
+	Description string          `json:"description,omitempty"`
+	Parameters  json.RawMessage `json:"parameters,omitempty"`
+}
@@ -1,5 +1,7 @@
 package dto

+import "encoding/json"
+
 type SimpleResponse struct {
 	Usage `json:"usage"`
 	Error *OpenAIError `json:"error"`
@@ -166,10 +168,93 @@ type CompletionsStreamResponse struct {
 }

 type Usage struct {
-	PromptTokens           int                `json:"prompt_tokens"`
-	CompletionTokens       int                `json:"completion_tokens"`
-	TotalTokens            int                `json:"total_tokens"`
-	PromptCacheHitTokens   int                `json:"prompt_cache_hit_tokens,omitempty"`
+	PromptTokens         int `json:"prompt_tokens"`
+	CompletionTokens     int `json:"completion_tokens"`
+	TotalTokens          int `json:"total_tokens"`
+	PromptCacheHitTokens int `json:"prompt_cache_hit_tokens,omitempty"`
+
 	PromptTokensDetails    InputTokenDetails  `json:"prompt_tokens_details"`
 	CompletionTokenDetails OutputTokenDetails `json:"completion_tokens_details"`
+	InputTokens            int                `json:"input_tokens"`
+	OutputTokens           int                `json:"output_tokens"`
+	InputTokensDetails     *InputTokenDetails `json:"input_tokens_details"`
+}
+
+type InputTokenDetails struct {
+	CachedTokens         int `json:"cached_tokens"`
+	CachedCreationTokens int `json:"-"`
+	TextTokens           int `json:"text_tokens"`
+	AudioTokens          int `json:"audio_tokens"`
+	ImageTokens          int `json:"image_tokens"`
+}
+
+type OutputTokenDetails struct {
+	TextTokens      int `json:"text_tokens"`
+	AudioTokens     int `json:"audio_tokens"`
+	ReasoningTokens int `json:"reasoning_tokens"`
+}
+
+type OpenAIResponsesResponse struct {
+	ID                 string               `json:"id"`
+	Object             string               `json:"object"`
+	CreatedAt          int                  `json:"created_at"`
+	Status             string               `json:"status"`
+	Error              *OpenAIError         `json:"error,omitempty"`
+	IncompleteDetails  *IncompleteDetails   `json:"incomplete_details,omitempty"`
+	Instructions       string               `json:"instructions"`
+	MaxOutputTokens    int                  `json:"max_output_tokens"`
+	Model              string               `json:"model"`
+	Output             []ResponsesOutput    `json:"output"`
+	ParallelToolCalls  bool                 `json:"parallel_tool_calls"`
+	PreviousResponseID string               `json:"previous_response_id"`
+	Reasoning          *Reasoning           `json:"reasoning"`
+	Store              bool                 `json:"store"`
+	Temperature        float64              `json:"temperature"`
+	ToolChoice         string               `json:"tool_choice"`
+	Tools              []ResponsesToolsCall `json:"tools"`
+	TopP               float64              `json:"top_p"`
+	Truncation         string               `json:"truncation"`
+	Usage              *Usage               `json:"usage"`
+	User               json.RawMessage      `json:"user"`
+	Metadata           json.RawMessage      `json:"metadata"`
+}
+
+type IncompleteDetails struct {
+	Reasoning string `json:"reasoning"`
+}
+
+type ResponsesOutput struct {
+	Type    string                   `json:"type"`
+	ID      string                   `json:"id"`
+	Status  string                   `json:"status"`
+	Role    string                   `json:"role"`
+	Content []ResponsesOutputContent `json:"content"`
+}
+
+type ResponsesOutputContent struct {
+	Type        string        `json:"type"`
+	Text        string        `json:"text"`
+	Annotations []interface{} `json:"annotations"`
+}
+
+const (
+	BuildInToolWebSearchPreview = "web_search_preview"
+	BuildInToolFileSearch       = "file_search"
+)
+
+const (
+	BuildInCallWebSearchCall = "web_search_call"
+)
+
+const (
+	ResponsesOutputTypeItemAdded = "response.output_item.added"
+	ResponsesOutputTypeItemDone  = "response.output_item.done"
+)
+
+// ResponsesStreamResponse 用于处理 /v1/responses 流式响应
+type ResponsesStreamResponse struct {
+	Type     string                   `json:"type"`
+	Response *OpenAIResponsesResponse `json:"response,omitempty"`
+	Delta    string                   `json:"delta,omitempty"`
+	Item     *ResponsesOutput         `json:"item,omitempty"`
 }
@@ -43,19 +43,6 @@ type RealtimeUsage struct {
 	OutputTokenDetails OutputTokenDetails `json:"output_token_details"`
 }

-type InputTokenDetails struct {
-	CachedTokens         int `json:"cached_tokens"`
-	CachedCreationTokens int
-	TextTokens           int `json:"text_tokens"`
-	AudioTokens          int `json:"audio_tokens"`
-	ImageTokens          int `json:"image_tokens"`
-}
-
-type OutputTokenDetails struct {
-	TextTokens  int `json:"text_tokens"`
-	AudioTokens int `json:"audio_tokens"`
-}
-
 type RealtimeSession struct {
 	Modalities              []string                `json:"modalities"`
 	Instructions            string                  `json:"instructions"`
@@ -12,6 +12,7 @@ import (
 	"one-api/model"
 	"one-api/router"
 	"one-api/service"
+	"one-api/setting/operation_setting"
 	"os"
 	"strconv"

@@ -33,7 +34,7 @@ var indexPage []byte
 func main() {
 	err := godotenv.Load(".env")
 	if err != nil {
-		common.SysLog("Support for .env file is disabled")
+		common.SysLog("Support for .env file is disabled: " + err.Error())
 	}

 	common.LoadEnv()
@@ -51,6 +52,9 @@ func main() {
 	if err != nil {
 		common.FatalLog("failed to initialize database: " + err.Error())
 	}
+
+	model.CheckSetup()
+
 	// Initialize SQL Database
 	err = model.InitLogDB()
 	if err != nil {
@@ -69,10 +73,15 @@ func main() {
 		common.FatalLog("failed to initialize Redis: " + err.Error())
 	}

+	// Initialize model settings
+	operation_setting.InitRatioSettings()
 	// Initialize constants
 	constant.InitEnv()
 	// Initialize options
 	model.InitOptionMap()
+
+	service.InitTokenEncoders()
+
 	if common.RedisEnabled {
 		// for compatibility with old versions
 		common.MemoryCacheEnabled = true
@@ -80,9 +89,22 @@ func main() {
 	if common.MemoryCacheEnabled {
 		common.SysLog("memory cache enabled")
 		common.SysError(fmt.Sprintf("sync frequency: %d seconds", common.SyncFrequency))
-		model.InitChannelCache()
-	}
-	if common.MemoryCacheEnabled {
+
+		// Add panic recovery and retry for InitChannelCache
+		func() {
+			defer func() {
+				if r := recover(); r != nil {
+					common.SysError(fmt.Sprintf("InitChannelCache panic: %v, retrying once", r))
+					// Retry once
+					_, fixErr := model.FixAbility()
+					if fixErr != nil {
+						common.SysError(fmt.Sprintf("InitChannelCache failed: %s", fixErr.Error()))
+					}
+				}
+			}()
+			model.InitChannelCache()
+		}()
+
 		go model.SyncOptions(common.SyncFrequency)
 		go model.SyncChannelCache(common.SyncFrequency)
 	}
@@ -126,8 +148,6 @@ func main() {
 		common.SysLog("pprof enabled")
 	}

-	service.InitTokenEncoders()
-
 	// Initialize HTTP server
 	server := gin.New()
 	server.Use(gin.CustomRecovery(func(c *gin.Context, err any) {
@@ -1,13 +1,14 @@
 package middleware

 import (
-	"github.com/gin-contrib/sessions"
-	"github.com/gin-gonic/gin"
 	"net/http"
 	"one-api/common"
 	"one-api/model"
 	"strconv"
 	"strings"
+
+	"github.com/gin-contrib/sessions"
+	"github.com/gin-gonic/gin"
 )

 func validUserInfo(username string, role int) bool {
@@ -182,6 +183,18 @@ func TokenAuth() func(c *gin.Context) {
 				c.Request.Header.Set("Authorization", "Bearer "+key)
 			}
 		}
+		// gemini api 从query中获取key
+		if strings.HasPrefix(c.Request.URL.Path, "/v1beta/models/") {
+			skKey := c.Query("key")
+			if skKey != "" {
+				c.Request.Header.Set("Authorization", "Bearer "+skKey)
+			}
+			// 从x-goog-api-key header中获取key
+			xGoogKey := c.Request.Header.Get("x-goog-api-key")
+			if xGoogKey != "" {
+				c.Request.Header.Set("Authorization", "Bearer "+xGoogKey)
+			}
+		}
 		key := c.Request.Header.Get("Authorization")
 		parts := make([]string, 0)
 		key = strings.TrimPrefix(key, "Bearer ")
@@ -162,7 +162,15 @@ func getModelRequest(c *gin.Context) (*ModelRequest, bool, error) {
 		}
 		c.Set("platform", string(constant.TaskPlatformSuno))
 		c.Set("relay_mode", relayMode)
-	} else if !strings.HasPrefix(c.Request.URL.Path, "/v1/audio/transcriptions") {
+	} else if strings.HasPrefix(c.Request.URL.Path, "/v1beta/models/") {
+		// Gemini API 路径处理: /v1beta/models/gemini-2.0-flash:generateContent
+		relayMode := relayconstant.RelayModeGemini
+		modelName := extractModelNameFromGeminiPath(c.Request.URL.Path)
+		if modelName != "" {
+			modelRequest.Model = modelName
+		}
+		c.Set("relay_mode", relayMode)
+	} else if !strings.HasPrefix(c.Request.URL.Path, "/v1/audio/transcriptions") && !strings.HasPrefix(c.Request.URL.Path, "/v1/images/edits") {
 		err = common.UnmarshalBodyReusable(c, &modelRequest)
 	}
 	if err != nil {
@@ -184,6 +192,8 @@ func getModelRequest(c *gin.Context) (*ModelRequest, bool, error) {
 	}
 	if strings.HasPrefix(c.Request.URL.Path, "/v1/images/generations") {
 		modelRequest.Model = common.GetStringIfEmpty(modelRequest.Model, "dall-e")
+	} else if strings.HasPrefix(c.Request.URL.Path, "/v1/images/edits") {
+		modelRequest.Model = common.GetStringIfEmpty(c.PostForm("model"), "gpt-image-1")
 	}
 	if strings.HasPrefix(c.Request.URL.Path, "/v1/audio") {
 		relayMode := relayconstant.RelayModeAudioSpeech
@@ -211,7 +221,9 @@ func SetupContextForSelectedChannel(c *gin.Context, channel *model.Channel, mode
 	c.Set("channel_id", channel.Id)
 	c.Set("channel_name", channel.Name)
 	c.Set("channel_type", channel.Type)
+	c.Set("channel_create_time", channel.CreatedTime)
 	c.Set("channel_setting", channel.GetSetting())
+	c.Set("param_override", channel.GetParamOverride())
 	if nil != channel.OpenAIOrganization && "" != *channel.OpenAIOrganization {
 		c.Set("channel_organization", *channel.OpenAIOrganization)
 	}
@@ -236,5 +248,35 @@ func SetupContextForSelectedChannel(c *gin.Context, channel *model.Channel, mode
 		c.Set("api_version", channel.Other)
 	case common.ChannelTypeMokaAI:
 		c.Set("api_version", channel.Other)
+	case common.ChannelTypeCoze:
+		c.Set("bot_id", channel.Other)
 	}
 }
+
+// extractModelNameFromGeminiPath 从 Gemini API URL 路径中提取模型名
+// 输入格式: /v1beta/models/gemini-2.0-flash:generateContent
+// 输出: gemini-2.0-flash
+func extractModelNameFromGeminiPath(path string) string {
+	// 查找 "/models/" 的位置
+	modelsPrefix := "/models/"
+	modelsIndex := strings.Index(path, modelsPrefix)
+	if modelsIndex == -1 {
+		return ""
+	}
+
+	// 从 "/models/" 之后开始提取
+	startIndex := modelsIndex + len(modelsPrefix)
+	if startIndex >= len(path) {
+		return ""
+	}
+
+	// 查找 ":" 的位置，模型名在 ":" 之前
+	colonIndex := strings.Index(path[startIndex:], ":")
+	if colonIndex == -1 {
+		// 如果没有找到 ":"，返回从 "/models/" 到路径结尾的部分
+		return path[startIndex:]
+	}
+
+	// 返回模型名部分
+	return path[startIndex : startIndex+colonIndex]
+}
@@ -5,6 +5,8 @@ import (
 	"fmt"
 	"net/http"
 	"one-api/common"
+	"one-api/common/limiter"
+	"one-api/constant"
 	"one-api/setting"
 	"strconv"
 	"time"
@@ -78,21 +80,9 @@ func redisRateLimitHandler(duration int64, totalMaxCount, successMaxCount int) g
 		ctx := context.Background()
 		rdb := common.RDB

-		// 1. 检查总请求数限制（当totalMaxCount为0时会自动跳过）
-		totalKey := fmt.Sprintf("rateLimit:%s:%s", ModelRequestRateLimitCountMark, userId)
-		allowed, err := checkRedisRateLimit(ctx, rdb, totalKey, totalMaxCount, duration)
-		if err != nil {
-			fmt.Println("检查总请求数限制失败:", err.Error())
-			abortWithOpenAiMessage(c, http.StatusInternalServerError, "rate_limit_check_failed")
-			return
-		}
-		if !allowed {
-			abortWithOpenAiMessage(c, http.StatusTooManyRequests, fmt.Sprintf("您已达到总请求数限制：%d分钟内最多请求%d次，包括失败次数，请检查您的请求是否正确", setting.ModelRequestRateLimitDurationMinutes, totalMaxCount))
-		}
-
-		// 2. 检查成功请求数限制
+		// 1. 检查成功请求数限制
 		successKey := fmt.Sprintf("rateLimit:%s:%s", ModelRequestRateLimitSuccessCountMark, userId)
-		allowed, err = checkRedisRateLimit(ctx, rdb, successKey, successMaxCount, duration)
+		allowed, err := checkRedisRateLimit(ctx, rdb, successKey, successMaxCount, duration)
 		if err != nil {
 			fmt.Println("检查成功请求数限制失败:", err.Error())
 			abortWithOpenAiMessage(c, http.StatusInternalServerError, "rate_limit_check_failed")
@@ -103,8 +93,29 @@ func redisRateLimitHandler(duration int64, totalMaxCount, successMaxCount int) g
 			return
 		}

-		// 3. 记录总请求（当totalMaxCount为0时会自动跳过）
-		recordRedisRequest(ctx, rdb, totalKey, totalMaxCount)
+		//2.检查总请求数限制并记录总请求（当totalMaxCount为0时会自动跳过，使用令牌桶限流器
+		if totalMaxCount > 0 {
+			totalKey := fmt.Sprintf("rateLimit:%s", userId)
+			// 初始化
+			tb := limiter.New(ctx, rdb)
+			allowed, err = tb.Allow(
+				ctx,
+				totalKey,
+				limiter.WithCapacity(int64(totalMaxCount)*duration),
+				limiter.WithRate(int64(totalMaxCount)),
+				limiter.WithRequested(duration),
+			)
+
+			if err != nil {
+				fmt.Println("检查总请求数限制失败:", err.Error())
+				abortWithOpenAiMessage(c, http.StatusInternalServerError, "rate_limit_check_failed")
+				return
+			}
+
+			if !allowed {
+				abortWithOpenAiMessage(c, http.StatusTooManyRequests, fmt.Sprintf("您已达到总请求数限制：%d分钟内最多请求%d次，包括失败次数，请检查您的请求是否正确", setting.ModelRequestRateLimitDurationMinutes, totalMaxCount))
+			}
+		}

 		// 4. 处理请求
 		c.Next()
@@ -165,6 +176,19 @@ func ModelRequestRateLimit() func(c *gin.Context) {
 		totalMaxCount := setting.ModelRequestRateLimitCount
 		successMaxCount := setting.ModelRequestRateLimitSuccessCount

+		// 获取分组
+		group := c.GetString("token_group")
+		if group == "" {
+			group = c.GetString(constant.ContextKeyUserGroup)
+		}
+
+		//获取分组的限流配置
+		groupTotalCount, groupSuccessCount, found := setting.GetGroupRateLimit(group)
+		if found {
+			totalMaxCount = groupTotalCount
+			successMaxCount = groupSuccessCount
+		}
+
 		// 根据存储类型选择并执行限流处理器
 		if common.RedisEnabled {
 			redisRateLimitHandler(duration, totalMaxCount, successMaxCount)(c)
@@ -50,7 +50,7 @@ func getPriority(group string, model string, retry int) (int, error) {
 	err := DB.Model(&Ability{}).
 		Select("DISTINCT(priority)").
 		Where(groupCol+" = ? and model = ? and enabled = "+trueVal, group, model).
-		Order("priority DESC").              // 按优先级降序排序
+		Order("priority DESC"). // 按优先级降序排序
 		Pluck("priority", &priorities).Error // Pluck用于将查询的结果直接扫描到一个切片中

 	if err != nil {
@@ -261,12 +261,28 @@ func FixAbility() (int, error) {
 		common.SysError(fmt.Sprintf("Get channel ids from channel table failed: %s", err.Error()))
 		return 0, err
 	}
-	// Delete abilities of channels that are not in channel table
-	err = DB.Where("channel_id NOT IN (?)", channelIds).Delete(&Ability{}).Error
-	if err != nil {
-		common.SysError(fmt.Sprintf("Delete abilities of channels that are not in channel table failed: %s", err.Error()))
-		return 0, err
+
+	// Delete abilities of channels that are not in channel table - in batches to avoid too many placeholders
+	if len(channelIds) > 0 {
+		// Process deletion in chunks to avoid "too many placeholders" error
+		for _, chunk := range lo.Chunk(channelIds, 100) {
+			err = DB.Where("channel_id NOT IN (?)", chunk).Delete(&Ability{}).Error
+			if err != nil {
+				common.SysError(fmt.Sprintf("Delete abilities of channels (batch) that are not in channel table failed: %s", err.Error()))
+				return 0, err
+			}
+		}
+	} else {
+		// If no channels exist, delete all abilities
+		err = DB.Delete(&Ability{}).Error
+		if err != nil {
+			common.SysError(fmt.Sprintf("Delete all abilities failed: %s", err.Error()))
+			return 0, err
+		}
+		common.SysLog("Delete all abilities successfully")
+		return 0, nil
 	}
+
 	common.SysLog(fmt.Sprintf("Delete abilities of channels that are not in channel table successfully, ids: %v", channelIds))
 	count += len(channelIds)

@@ -275,17 +291,26 @@ func FixAbility() (int, error) {
 	err = DB.Table("abilities").Distinct("channel_id").Pluck("channel_id", &abilityChannelIds).Error
 	if err != nil {
 		common.SysError(fmt.Sprintf("Get channel ids from abilities table failed: %s", err.Error()))
-		return 0, err
+		return count, err
 	}
+
 	var channels []Channel
 	if len(abilityChannelIds) == 0 {
 		err = DB.Find(&channels).Error
 	} else {
-		err = DB.Where("id NOT IN (?)", abilityChannelIds).Find(&channels).Error
-	}
-	if err != nil {
-		return 0, err
+		// Process query in chunks to avoid "too many placeholders" error
+		err = nil
+		for _, chunk := range lo.Chunk(abilityChannelIds, 100) {
+			var channelsChunk []Channel
+			err = DB.Where("id NOT IN (?)", chunk).Find(&channelsChunk).Error
+			if err != nil {
+				common.SysError(fmt.Sprintf("Find channels not in abilities table failed: %s", err.Error()))
+				return count, err
+			}
+			channels = append(channels, channelsChunk...)
+		}
 	}
+
 	for _, channel := range channels {
 		err := channel.UpdateAbilities(nil)
 		if err != nil {
@@ -16,6 +16,9 @@ var channelsIDM map[int]*Channel
 var channelSyncLock sync.RWMutex

 func InitChannelCache() {
+	if !common.MemoryCacheEnabled {
+		return
+	}
 	newChannelId2channel := make(map[int]*Channel)
 	var channels []*Channel
 	DB.Where("status = ?", common.ChannelStatusEnabled).Find(&channels)
@@ -84,9 +87,11 @@ func CacheGetRandomSatisfiedChannel(group string, model string, retry int) (*Cha
 	if !common.MemoryCacheEnabled {
 		return GetRandomSatisfiedChannel(group, model, retry)
 	}
+
 	channelSyncLock.RLock()
-	defer channelSyncLock.RUnlock()
 	channels := group2model2channels[group][model]
+	channelSyncLock.RUnlock()
+
 	if len(channels) == 0 {
 		return nil, errors.New("channel not found")
 	}
@@ -36,6 +36,7 @@ type Channel struct {
 	OtherInfo         string  `json:"other_info"`
 	Tag               *string `json:"tag" gorm:"index"`
 	Setting           *string `json:"setting" gorm:"type:text"`
+	ParamOverride     *string `json:"param_override" gorm:"type:text"`
 }

 func (channel *Channel) GetModels() []string {
@@ -45,6 +46,17 @@ func (channel *Channel) GetModels() []string {
 	return strings.Split(strings.Trim(channel.Models, ","), ",")
 }

+func (channel *Channel) GetGroups() []string {
+	if channel.Group == "" {
+		return []string{}
+	}
+	groups := strings.Split(strings.Trim(channel.Group, ","), ",")
+	for i, group := range groups {
+		groups[i] = strings.TrimSpace(group)
+	}
+	return groups
+}
+
 func (channel *Channel) GetOtherInfo() map[string]interface{} {
 	otherInfo := make(map[string]interface{})
 	if channel.OtherInfo != "" {
@@ -118,10 +130,15 @@ func SearchChannels(keyword string, group string, model string, idSort bool) ([]

 	// 如果是 PostgreSQL，使用双引号
 	if common.UsingPostgreSQL {
-		keyCol = `"key"`
 		modelsCol = `"models"`
 	}

+	baseURLCol := "`base_url`"
+	// 如果是 PostgreSQL，使用双引号
+	if common.UsingPostgreSQL {
+		baseURLCol = `"base_url"`
+	}
+
 	order := "priority desc"
 	if idSort {
 		order = "id desc"
@@ -141,11 +158,11 @@ func SearchChannels(keyword string, group string, model string, idSort bool) ([]
 			// sqlite, PostgreSQL
 			groupCondition = `(',' || ` + groupCol + ` || ',') LIKE ?`
 		}
-		whereClause = "(id = ? OR name LIKE ? OR " + keyCol + " = ?) AND " + modelsCol + ` LIKE ? AND ` + groupCondition
-		args = append(args, common.String2Int(keyword), "%"+keyword+"%", keyword, "%"+model+"%", "%,"+group+",%")
+		whereClause = "(id = ? OR name LIKE ? OR " + keyCol + " = ? OR " + baseURLCol + " LIKE ?) AND " + modelsCol + ` LIKE ? AND ` + groupCondition
+		args = append(args, common.String2Int(keyword), "%"+keyword+"%", keyword, "%"+keyword+"%", "%"+model+"%", "%,"+group+",%")
 	} else {
-		whereClause = "(id = ? OR name LIKE ? OR " + keyCol + " = ?) AND " + modelsCol + " LIKE ?"
-		args = append(args, common.String2Int(keyword), "%"+keyword+"%", keyword, "%"+model+"%")
+		whereClause = "(id = ? OR name LIKE ? OR " + keyCol + " = ? OR " + baseURLCol + " LIKE ?) AND " + modelsCol + " LIKE ?"
+		args = append(args, common.String2Int(keyword), "%"+keyword+"%", keyword, "%"+keyword+"%", "%"+model+"%")
 	}

 	// 执行查询
@@ -449,6 +466,12 @@ func SearchTags(keyword string, group string, model string, idSort bool) ([]*str
 		modelsCol = `"models"`
 	}

+	baseURLCol := "`base_url`"
+	// 如果是 PostgreSQL，使用双引号
+	if common.UsingPostgreSQL {
+		baseURLCol = `"base_url"`
+	}
+
 	order := "priority desc"
 	if idSort {
 		order = "id desc"
@@ -468,11 +491,11 @@ func SearchTags(keyword string, group string, model string, idSort bool) ([]*str
 			// sqlite, PostgreSQL
 			groupCondition = `(',' || ` + groupCol + ` || ',') LIKE ?`
 		}
-		whereClause = "(id = ? OR name LIKE ? OR " + keyCol + " = ?) AND " + modelsCol + ` LIKE ? AND ` + groupCondition
-		args = append(args, common.String2Int(keyword), "%"+keyword+"%", keyword, "%"+model+"%", "%,"+group+",%")
+		whereClause = "(id = ? OR name LIKE ? OR " + keyCol + " = ? OR " + baseURLCol + " LIKE ?) AND " + modelsCol + ` LIKE ? AND ` + groupCondition
+		args = append(args, common.String2Int(keyword), "%"+keyword+"%", keyword, "%"+keyword+"%", "%"+model+"%", "%,"+group+",%")
 	} else {
-		whereClause = "(id = ? OR name LIKE ? OR " + keyCol + " = ?) AND " + modelsCol + " LIKE ?"
-		args = append(args, common.String2Int(keyword), "%"+keyword+"%", keyword, "%"+model+"%")
+		whereClause = "(id = ? OR name LIKE ? OR " + keyCol + " = ? OR " + baseURLCol + " LIKE ?) AND " + modelsCol + " LIKE ?"
+		args = append(args, common.String2Int(keyword), "%"+keyword+"%", keyword, "%"+keyword+"%", "%"+model+"%")
 	}

 	subQuery := baseQuery.Where(whereClause, args...).
@@ -511,6 +534,17 @@ func (channel *Channel) SetSetting(setting map[string]interface{}) {
 	channel.Setting = common.GetPointer[string](string(settingBytes))
 }

+func (channel *Channel) GetParamOverride() map[string]interface{} {
+	paramOverride := make(map[string]interface{})
+	if channel.ParamOverride != nil && *channel.ParamOverride != "" {
+		err := json.Unmarshal([]byte(*channel.ParamOverride), &paramOverride)
+		if err != nil {
+			common.SysError("failed to unmarshal param override: " + err.Error())
+		}
+	}
+	return paramOverride
+}
+
 func GetChannelsByIds(ids []int) ([]*Channel, error) {
 	var channels []*Channel
 	err := DB.Where("id in (?)", ids).Find(&channels).Error
@@ -1,6 +1,7 @@
 package model

 import (
+	"context"
 	"fmt"
 	"one-api/common"
 	"os"
@@ -40,6 +41,7 @@ const (
 	LogTypeConsume
 	LogTypeManage
 	LogTypeSystem
+	LogTypeError
 )

 func formatUserLogs(logs []*Log) {
@@ -88,6 +90,35 @@ func RecordLog(userId int, logType int, content string) {
 	}
 }

+func RecordErrorLog(c *gin.Context, userId int, channelId int, modelName string, tokenName string, content string, tokenId int, useTimeSeconds int,
+	isStream bool, group string, other map[string]interface{}) {
+	common.LogInfo(c, fmt.Sprintf("record error log: userId=%d, channelId=%d, modelName=%s, tokenName=%s, content=%s", userId, channelId, modelName, tokenName, content))
+	username := c.GetString("username")
+	otherStr := common.MapToJsonStr(other)
+	log := &Log{
+		UserId:           userId,
+		Username:         username,
+		CreatedAt:        common.GetTimestamp(),
+		Type:             LogTypeError,
+		Content:          content,
+		PromptTokens:     0,
+		CompletionTokens: 0,
+		TokenName:        tokenName,
+		ModelName:        modelName,
+		Quota:            0,
+		ChannelId:        channelId,
+		TokenId:          tokenId,
+		UseTime:          useTimeSeconds,
+		IsStream:         isStream,
+		Group:            group,
+		Other:            otherStr,
+	}
+	err := LOG_DB.Create(log).Error
+	if err != nil {
+		common.LogError(c, "failed to record log: "+err.Error())
+	}
+}
+
 func RecordConsumeLog(c *gin.Context, userId int, channelId int, promptTokens int, completionTokens int,
 	modelName string, tokenName string, quota int, content string, tokenId int, userQuota int, useTimeSeconds int,
 	isStream bool, group string, other map[string]interface{}) {
@@ -310,7 +341,25 @@ func SumUsedToken(logType int, startTimestamp int64, endTimestamp int64, modelNa
 	return token
 }

-func DeleteOldLog(targetTimestamp int64) (int64, error) {
-	result := LOG_DB.Where("created_at < ?", targetTimestamp).Delete(&Log{})
-	return result.RowsAffected, result.Error
+func DeleteOldLog(ctx context.Context, targetTimestamp int64, limit int) (int64, error) {
+	var total int64 = 0
+
+	for {
+		if nil != ctx.Err() {
+			return total, ctx.Err()
+		}
+
+		result := LOG_DB.Where("created_at < ?", targetTimestamp).Limit(limit).Delete(&Log{})
+		if nil != result.Error {
+			return total, result.Error
+		}
+
+		total += result.RowsAffected
+
+		if result.RowsAffected < int64(limit) {
+			break
+		}
+	}
+
+	return total, nil
 }
@@ -3,6 +3,7 @@ package model
 import (
 	"log"
 	"one-api/common"
+	"one-api/constant"
 	"os"
 	"strings"
 	"sync"
@@ -55,6 +56,33 @@ func createRootAccountIfNeed() error {
 	return nil
 }

+func CheckSetup() {
+	setup := GetSetup()
+	if setup == nil {
+		// No setup record exists, check if we have a root user
+		if RootUserExists() {
+			common.SysLog("system is not initialized, but root user exists")
+			// Create setup record
+			newSetup := Setup{
+				Version:       common.Version,
+				InitializedAt: time.Now().Unix(),
+			}
+			err := DB.Create(&newSetup).Error
+			if err != nil {
+				common.SysLog("failed to create setup record: " + err.Error())
+			}
+			constant.Setup = true
+		} else {
+			common.SysLog("system is not initialized and no root user exists")
+			constant.Setup = false
+		}
+	} else {
+		// Setup record exists, system is initialized
+		common.SysLog("system is already initialized at: " + time.Unix(setup.InitializedAt, 0).String())
+		constant.Setup = true
+	}
+}
+
 func chooseDB(envName string) (*gorm.DB, error) {
 	defer func() {
 		initCol()
@@ -214,8 +242,9 @@ func migrateDB() error {
 	if err != nil {
 		return err
 	}
+	err = DB.AutoMigrate(&Setup{})
 	common.SysLog("database migrated")
-	err = createRootAccountIfNeed()
+	//err = createRootAccountIfNeed()
 	return err
 }

@@ -67,6 +67,7 @@ func InitOptionMap() {
 	common.OptionMap["ServerAddress"] = ""
 	common.OptionMap["WorkerUrl"] = setting.WorkerUrl
 	common.OptionMap["WorkerValidKey"] = setting.WorkerValidKey
+	common.OptionMap["WorkerAllowHttpImageRequestEnabled"] = strconv.FormatBool(setting.WorkerAllowHttpImageRequestEnabled)
 	common.OptionMap["PayAddress"] = ""
 	common.OptionMap["CustomCallbackAddress"] = ""
 	common.OptionMap["EpayId"] = ""
@@ -92,6 +93,7 @@ func InitOptionMap() {
 	common.OptionMap["ModelRequestRateLimitCount"] = strconv.Itoa(setting.ModelRequestRateLimitCount)
 	common.OptionMap["ModelRequestRateLimitDurationMinutes"] = strconv.Itoa(setting.ModelRequestRateLimitDurationMinutes)
 	common.OptionMap["ModelRequestRateLimitSuccessCount"] = strconv.Itoa(setting.ModelRequestRateLimitSuccessCount)
+	common.OptionMap["ModelRequestRateLimitGroup"] = setting.ModelRequestRateLimitGroup2JSONString()
 	common.OptionMap["ModelRatio"] = operation_setting.ModelRatio2JSONString()
 	common.OptionMap["ModelPrice"] = operation_setting.ModelPrice2JSONString()
 	common.OptionMap["CacheRatio"] = operation_setting.CacheRatio2JSONString()
@@ -256,6 +258,8 @@ func updateOptionMap(key string, value string) (err error) {
 			setting.StopOnSensitiveEnabled = boolValue
 		case "SMTPSSLEnabled":
 			common.SMTPSSLEnabled = boolValue
+		case "WorkerAllowHttpImageRequestEnabled":
+			setting.WorkerAllowHttpImageRequestEnabled = boolValue
 		}
 	}
 	switch key {
@@ -338,6 +342,8 @@ func updateOptionMap(key string, value string) (err error) {
 		setting.ModelRequestRateLimitDurationMinutes, _ = strconv.Atoi(value)
 	case "ModelRequestRateLimitSuccessCount":
 		setting.ModelRequestRateLimitSuccessCount, _ = strconv.Atoi(value)
+	case "ModelRequestRateLimitGroup":
+		err = setting.UpdateModelRequestRateLimitGroupByJSONString(value)
 	case "RetryTimes":
 		common.RetryTimes, _ = strconv.Atoi(value)
 	case "DataExportInterval":
@@ -0,0 +1,16 @@
+package model
+
+type Setup struct {
+	ID            uint   `json:"id" gorm:"primaryKey"`
+	Version       string `json:"version" gorm:"type:varchar(50);not null"`
+	InitializedAt int64  `json:"initialized_at" gorm:"type:bigint;not null"`
+}
+
+func GetSetup() *Setup {
+	var setup Setup
+	err := DB.First(&setup).Error
+	if err != nil {
+		return nil
+	}
+	return &setup
+}
@@ -18,6 +18,7 @@ type User struct {
 	Id               int            `json:"id"`
 	Username         string         `json:"username" gorm:"unique;index" validate:"max=12"`
 	Password         string         `json:"password" gorm:"not null;" validate:"min=8,max=20"`
+	OriginalPassword string         `json:"original_password" gorm:"-:all"` // this field is only for Password change verification, don't save it to database!
 	DisplayName      string         `json:"display_name" gorm:"index" validate:"max=20"`
 	Role             int            `json:"role" gorm:"type:int;default:1"`   // admin, common
 	Status           int            `json:"status" gorm:"type:int;default:1"` // enabled, disabled
@@ -108,7 +109,7 @@ func CheckUserExistOrDeleted(username string, email string) (bool, error) {

 func GetMaxUserId() int {
 	var user User
-	DB.Last(&user)
+	DB.Unscoped().Last(&user)
 	return user.Id
 }

@@ -808,3 +809,12 @@ func (user *User) FillUserByLinuxDOId() error {
 	err := DB.Where("linux_do_id = ?", user.LinuxDOId).First(user).Error
 	return err
 }
+
+func RootUserExists() bool {
+	var user User
+	err := DB.Where("role = ?", common.RoleRootUser).First(&user).Error
+	if err != nil {
+		return false
+	}
+	return true
+}
@@ -1,11 +1,12 @@
 package channel

 import (
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
 	relaycommon "one-api/relay/common"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor interface {
@@ -18,6 +19,7 @@ type Adaptor interface {
 	ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error)
 	ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error)
 	ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error)
+	ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error)
 	DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error)
 	DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *dto.OpenAIErrorWithStatusCode)
 	GetModelList() []string
@@ -3,7 +3,6 @@ package ali
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
@@ -11,6 +10,8 @@ import (
 	"one-api/relay/channel/openai"
 	relaycommon "one-api/relay/common"
 	"one-api/relay/constant"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
@@ -32,6 +33,8 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 		fullRequestURL = fmt.Sprintf("%s/api/v1/services/embeddings/text-embedding/text-embedding", info.BaseUrl)
 	case constant.RelayModeImagesGenerations:
 		fullRequestURL = fmt.Sprintf("%s/api/v1/services/aigc/text2image/image-synthesis", info.BaseUrl)
+	case constant.RelayModeCompletions:
+		fullRequestURL = fmt.Sprintf("%s/compatible-mode/v1/completions", info.BaseUrl)
 	default:
 		fullRequestURL = fmt.Sprintf("%s/compatible-mode/v1/chat/completions", info.BaseUrl)
 	}
@@ -54,6 +57,12 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 	if request == nil {
 		return nil, errors.New("request is nil")
 	}
+
+	// fix: ali parameter.enable_thinking must be set to false for non-streaming calls
+	if !info.IsStream {
+		request.EnableThinking = false
+	}
+
 	switch info.RelayMode {
 	default:
 		aliReq := requestOpenAI2Ali(*request)
@@ -79,6 +88,11 @@ func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInf
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -1,7 +1,12 @@
 package ali

 var ModelList = []string{
-	"qwen-turbo", "qwen-plus", "qwen-max", "qwen-max-longcontext",
+	"qwen-turbo",
+	"qwen-plus",
+	"qwen-max",
+	"qwen-max-longcontext",
+	"qwq-32b",
+	"qwen3-235b-a22b",
 	"text-embedding-v1",
 }

@@ -1,16 +1,23 @@
 package channel

 import (
+	"context"
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
-	"github.com/gorilla/websocket"
 	"io"
 	"net/http"
 	common2 "one-api/common"
 	"one-api/relay/common"
 	"one-api/relay/constant"
+	"one-api/relay/helper"
 	"one-api/service"
+	"one-api/setting/operation_setting"
+	"sync"
+	"time"
+
+	"github.com/bytedance/gopkg/util/gopool"
+	"github.com/gin-gonic/gin"
+	"github.com/gorilla/websocket"
 )

 func SetupApiRequestHeader(info *common.RelayInfo, c *gin.Context, req *http.Header) {
@@ -55,6 +62,9 @@ func DoFormRequest(a Adaptor, c *gin.Context, info *common.RelayInfo, requestBod
 	if err != nil {
 		return nil, fmt.Errorf("get request url failed: %w", err)
 	}
+	if common2.DebugEnabled {
+		println("fullRequestURL:", fullRequestURL)
+	}
 	req, err := http.NewRequest(c.Request.Method, fullRequestURL, requestBody)
 	if err != nil {
 		return nil, fmt.Errorf("new request failed: %w", err)
@@ -94,6 +104,65 @@ func DoWssRequest(a Adaptor, c *gin.Context, info *common.RelayInfo, requestBody
 	return targetConn, nil
 }

+func startPingKeepAlive(c *gin.Context, pingInterval time.Duration) context.CancelFunc {
+	pingerCtx, stopPinger := context.WithCancel(context.Background())
+
+	gopool.Go(func() {
+		defer func() {
+			if common2.DebugEnabled {
+				println("SSE ping goroutine stopped.")
+			}
+		}()
+
+		if pingInterval <= 0 {
+			pingInterval = helper.DefaultPingInterval
+		}
+
+		ticker := time.NewTicker(pingInterval)
+		// 退出时清理 ticker
+		defer ticker.Stop()
+
+		var pingMutex sync.Mutex
+		if common2.DebugEnabled {
+			println("SSE ping goroutine started")
+		}
+
+		for {
+			select {
+			// 发送 ping 数据
+			case <-ticker.C:
+				if err := sendPingData(c, &pingMutex); err != nil {
+					return
+				}
+			// 收到退出信号
+			case <-pingerCtx.Done():
+				return
+			// request 结束
+			case <-c.Request.Context().Done():
+				return
+			}
+		}
+	})
+
+	return stopPinger
+}
+
+func sendPingData(c *gin.Context, mutex *sync.Mutex) error {
+	mutex.Lock()
+	defer mutex.Unlock()
+
+	err := helper.PingData(c)
+	if err != nil {
+		common2.LogError(c, "SSE ping error: "+err.Error())
+		return err
+	}
+
+	if common2.DebugEnabled {
+		println("SSE ping data sent.")
+	}
+	return nil
+}
+
 func doRequest(c *gin.Context, req *http.Request, info *common.RelayInfo) (*http.Response, error) {
 	var client *http.Client
 	var err error
@@ -105,13 +174,28 @@ func doRequest(c *gin.Context, req *http.Request, info *common.RelayInfo) (*http
 	} else {
 		client = service.GetHttpClient()
 	}
+
+	if info.IsStream {
+		helper.SetEventStreamHeaders(c)
+
+		// 处理流式请求的 ping 保活
+		generalSettings := operation_setting.GetGeneralSetting()
+		if generalSettings.PingIntervalEnabled {
+			pingInterval := time.Duration(generalSettings.PingIntervalSeconds) * time.Second
+			stopPinger := startPingKeepAlive(c, pingInterval)
+			defer stopPinger()
+		}
+	}
+
 	resp, err := client.Do(req)
+
 	if err != nil {
 		return nil, err
 	}
 	if resp == nil {
 		return nil, errors.New("resp is nil")
 	}
+
 	_ = req.Body.Close()
 	_ = c.Request.Body.Close()
 	return resp, nil
@@ -2,13 +2,14 @@ package aws

 import (
 	"errors"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
 	"one-api/relay/channel/claude"
 	relaycommon "one-api/relay/common"
 	"one-api/setting/model_setting"
+
+	"github.com/gin-gonic/gin"
 )

 const (
@@ -74,6 +75,11 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return nil, nil
 }
@@ -11,6 +11,8 @@ var awsModelIDMap = map[string]string{
 	"claude-3-5-sonnet-20241022": "anthropic.claude-3-5-sonnet-20241022-v2:0",
 	"claude-3-5-haiku-20241022":  "anthropic.claude-3-5-haiku-20241022-v1:0",
 	"claude-3-7-sonnet-20250219": "anthropic.claude-3-7-sonnet-20250219-v1:0",
+	"claude-sonnet-4-20250514":   "anthropic.claude-sonnet-4-20250514-v1:0",
+	"claude-opus-4-20250514":     "anthropic.claude-opus-4-20250514-v1:0",
 }

 var awsModelCanCrossRegionMap = map[string]map[string]bool{
@@ -41,6 +43,16 @@ var awsModelCanCrossRegionMap = map[string]map[string]bool{
 	},
 	"anthropic.claude-3-7-sonnet-20250219-v1:0": {
 		"us": true,
+		"ap": true,
+		"eu": true,
+	},
+	"anthropic.claude-sonnet-4-20250514-v1:0": {
+		"us": true,
+		"ap": true,
+		"eu": true,
+	},
+	"anthropic.claude-opus-4-20250514-v1:0": {
+		"us": true,
 	},
 }

@@ -135,6 +135,12 @@ func awsStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
 		return wrapErr(errors.Wrap(err, "awsModelID")), nil
 	}

+	awsRegionPrefix := awsRegionPrefix(awsCli.Options().Region)
+	canCrossRegion := awsModelCanCrossRegion(awsModelId, awsRegionPrefix)
+	if canCrossRegion {
+		awsModelId = awsModelCrossRegion(awsModelId, awsRegionPrefix)
+	}
+
 	awsReq := &bedrockruntime.InvokeModelWithResponseStreamInput{
 		ModelId:     aws.String(awsModelId),
 		Accept:      aws.String("application/json"),
@@ -3,7 +3,6 @@ package baidu
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
@@ -11,6 +10,8 @@ import (
 	relaycommon "one-api/relay/common"
 	"one-api/relay/constant"
 	"strings"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
@@ -130,6 +131,11 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return baiduEmbeddingRequest, nil
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -3,13 +3,15 @@ package baidu_v2
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
 	"one-api/relay/channel"
 	"one-api/relay/channel/openai"
 	relaycommon "one-api/relay/common"
+	"strings"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
@@ -48,6 +50,18 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 	if request == nil {
 		return nil, errors.New("request is nil")
 	}
+	if strings.HasSuffix(info.UpstreamModelName, "-search") {
+		info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-search")
+		request.Model = info.UpstreamModelName
+		toMap := request.ToMap()
+		toMap["web_search"] = map[string]any{
+			"enable":          true,
+			"enable_citation": true,
+			"enable_trace":    true,
+			"enable_status":   false,
+		}
+		return toMap, nil
+	}
 	return request, nil
 }

@@ -60,6 +74,11 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -3,7 +3,6 @@ package claude
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
@@ -11,6 +10,8 @@ import (
 	relaycommon "one-api/relay/common"
 	"one-api/setting/model_setting"
 	"strings"
+
+	"github.com/gin-gonic/gin"
 )

 const (
@@ -37,10 +38,10 @@ func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInf
 }

 func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
-	if strings.HasPrefix(info.UpstreamModelName, "claude-3") {
-		a.RequestMode = RequestModeMessage
-	} else {
+	if strings.HasPrefix(info.UpstreamModelName, "claude-2") || strings.HasPrefix(info.UpstreamModelName, "claude-instant") {
 		a.RequestMode = RequestModeCompletion
+	} else {
+		a.RequestMode = RequestModeMessage
 	}
 }

@@ -84,6 +85,11 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -13,6 +13,10 @@ var ModelList = []string{
 	"claude-3-5-sonnet-20241022",
 	"claude-3-7-sonnet-20250219",
 	"claude-3-7-sonnet-20250219-thinking",
+	"claude-sonnet-4-20250514",
+	"claude-sonnet-4-20250514-thinking",
+	"claude-opus-4-20250514",
+	"claude-opus-4-20250514-thinking",
 }

 var ChannelName = "claude"
@@ -24,6 +24,8 @@ func stopReasonClaude2OpenAI(reason string) string {
 		return "stop"
 	case "max_tokens":
 		return "max_tokens"
+	case "tool_use":
+		return "tool_calls"
 	default:
 		return reason
 	}
@@ -70,7 +72,9 @@ func RequestOpenAI2ClaudeMessage(textRequest dto.GeneralOpenAIRequest) (*dto.Cla
 				Description: tool.Function.Description,
 			}
 			claudeTool.InputSchema = make(map[string]interface{})
-			claudeTool.InputSchema["type"] = params["type"].(string)
+			if params["type"] != nil {
+				claudeTool.InputSchema["type"] = params["type"].(string)
+			}
 			claudeTool.InputSchema["properties"] = params["properties"]
 			claudeTool.InputSchema["required"] = params["required"]
 			for s, a := range params {
@@ -296,6 +300,13 @@ func StreamResponseClaude2OpenAI(reqMode int, claudeResponse *dto.ClaudeResponse
 	response.Model = claudeResponse.Model
 	response.Choices = make([]dto.ChatCompletionsStreamResponseChoice, 0)
 	tools := make([]dto.ToolCallResponse, 0)
+	fcIdx := 0
+	if claudeResponse.Index != nil {
+		fcIdx = *claudeResponse.Index - 1
+		if fcIdx < 0 {
+			fcIdx = 0
+		}
+	}
 	var choice dto.ChatCompletionsStreamResponseChoice
 	if reqMode == RequestModeCompletion {
 		choice.Delta.SetContentString(claudeResponse.Completion)
@@ -315,8 +326,9 @@ func StreamResponseClaude2OpenAI(reqMode int, claudeResponse *dto.ClaudeResponse
 				//choice.Delta.SetContentString(claudeResponse.ContentBlock.Text)
 				if claudeResponse.ContentBlock.Type == "tool_use" {
 					tools = append(tools, dto.ToolCallResponse{
-						ID:   claudeResponse.ContentBlock.Id,
-						Type: "function",
+						Index: common.GetPointer(fcIdx),
+						ID:    claudeResponse.ContentBlock.Id,
+						Type:  "function",
 						Function: dto.FunctionResponse{
 							Name:      claudeResponse.ContentBlock.Name,
 							Arguments: "",
@@ -328,11 +340,12 @@ func StreamResponseClaude2OpenAI(reqMode int, claudeResponse *dto.ClaudeResponse
 			}
 		} else if claudeResponse.Type == "content_block_delta" {
 			if claudeResponse.Delta != nil {
-				choice.Index = *claudeResponse.Index
 				choice.Delta.Content = claudeResponse.Delta.Text
 				switch claudeResponse.Delta.Type {
 				case "input_json_delta":
 					tools = append(tools, dto.ToolCallResponse{
+						Type:  "function",
+						Index: common.GetPointer(fcIdx),
 						Function: dto.FunctionResponse{
 							Arguments: *claudeResponse.Delta.PartialJson,
 						},
@@ -55,6 +55,11 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 	}
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -3,13 +3,14 @@ package cohere
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
 	"one-api/relay/channel"
 	relaycommon "one-api/relay/common"
 	"one-api/relay/constant"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
@@ -52,6 +53,11 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 	return requestOpenAI2Cohere(*request), nil
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -0,0 +1,132 @@
+package coze
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"one-api/dto"
+	"one-api/relay/channel"
+	"one-api/relay/common"
+	"time"
+
+	"github.com/gin-gonic/gin"
+)
+
+type Adaptor struct {
+}
+
+// ConvertAudioRequest implements channel.Adaptor.
+func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *common.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
+	return nil, errors.New("not implemented")
+}
+
+// ConvertClaudeRequest implements channel.Adaptor.
+func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *common.RelayInfo, request *dto.ClaudeRequest) (any, error) {
+	return nil, errors.New("not implemented")
+}
+
+// ConvertEmbeddingRequest implements channel.Adaptor.
+func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *common.RelayInfo, request dto.EmbeddingRequest) (any, error) {
+	return nil, errors.New("not implemented")
+}
+
+// ConvertImageRequest implements channel.Adaptor.
+func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *common.RelayInfo, request dto.ImageRequest) (any, error) {
+	return nil, errors.New("not implemented")
+}
+
+// ConvertOpenAIRequest implements channel.Adaptor.
+func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *common.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
+	if request == nil {
+		return nil, errors.New("request is nil")
+	}
+	return convertCozeChatRequest(c, *request), nil
+}
+
+// ConvertOpenAIResponsesRequest implements channel.Adaptor.
+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *common.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	return nil, errors.New("not implemented")
+}
+
+// ConvertRerankRequest implements channel.Adaptor.
+func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
+	return nil, errors.New("not implemented")
+}
+
+// DoRequest implements channel.Adaptor.
+func (a *Adaptor) DoRequest(c *gin.Context, info *common.RelayInfo, requestBody io.Reader) (any, error) {
+	if info.IsStream {
+		return channel.DoApiRequest(a, c, info, requestBody)
+	}
+	// 首先发送创建消息请求，成功后再发送获取消息请求
+	// 发送创建消息请求
+	resp, err := channel.DoApiRequest(a, c, info, requestBody)
+	if err != nil {
+		return nil, err
+	}
+	// 解析 resp
+	var cozeResponse CozeChatResponse
+	respBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, err
+	}
+	err = json.Unmarshal(respBody, &cozeResponse)
+	if cozeResponse.Code != 0 {
+		return nil, errors.New(cozeResponse.Msg)
+	}
+	c.Set("coze_conversation_id", cozeResponse.Data.ConversationId)
+	c.Set("coze_chat_id", cozeResponse.Data.Id)
+	// 轮询检查消息是否完成
+	for {
+		err, isComplete := checkIfChatComplete(a, c, info)
+		if err != nil {
+			return nil, err
+		} else {
+			if isComplete {
+				break
+			}
+		}
+		time.Sleep(time.Second * 1)
+	}
+	// 发送获取消息请求
+	return getChatDetail(a, c, info)
+}
+
+// DoResponse implements channel.Adaptor.
+func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *common.RelayInfo) (usage any, err *dto.OpenAIErrorWithStatusCode) {
+	if info.IsStream {
+		err, usage = cozeChatStreamHandler(c, resp, info)
+	} else {
+		err, usage = cozeChatHandler(c, resp, info)
+	}
+	return
+}
+
+// GetChannelName implements channel.Adaptor.
+func (a *Adaptor) GetChannelName() string {
+	return ChannelName
+}
+
+// GetModelList implements channel.Adaptor.
+func (a *Adaptor) GetModelList() []string {
+	return ModelList
+}
+
+// GetRequestURL implements channel.Adaptor.
+func (a *Adaptor) GetRequestURL(info *common.RelayInfo) (string, error) {
+	return fmt.Sprintf("%s/v3/chat", info.BaseUrl), nil
+}
+
+// Init implements channel.Adaptor.
+func (a *Adaptor) Init(info *common.RelayInfo) {
+
+}
+
+// SetupRequestHeader implements channel.Adaptor.
+func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *common.RelayInfo) error {
+	channel.SetupApiRequestHeader(info, c, req)
+	req.Set("Authorization", "Bearer "+info.ApiKey)
+	return nil
+}
@@ -0,0 +1,30 @@
+package coze
+
+var ModelList = []string{
+	"moonshot-v1-8k",
+	"moonshot-v1-32k",
+	"moonshot-v1-128k",
+	"Baichuan4",
+	"abab6.5s-chat-pro",
+	"glm-4-0520",
+	"qwen-max",
+	"deepseek-r1",
+	"deepseek-v3",
+	"deepseek-r1-distill-qwen-32b",
+	"deepseek-r1-distill-qwen-7b",
+	"step-1v-8k",
+	"step-1.5v-mini",
+	"Doubao-pro-32k",
+	"Doubao-pro-256k",
+	"Doubao-lite-128k",
+	"Doubao-lite-32k",
+	"Doubao-vision-lite-32k",
+	"Doubao-vision-pro-32k",
+	"Doubao-1.5-pro-vision-32k",
+	"Doubao-1.5-lite-32k",
+	"Doubao-1.5-pro-32k",
+	"Doubao-1.5-thinking-pro",
+	"Doubao-1.5-pro-256k",
+}
+
+var ChannelName = "coze"
@@ -0,0 +1,78 @@
+package coze
+
+import "encoding/json"
+
+type CozeError struct {
+	Code    int    `json:"code"`
+	Message string `json:"message"`
+}
+
+type CozeEnterMessage struct {
+	Role        string          `json:"role"`
+	Type        string          `json:"type,omitempty"`
+	Content     json.RawMessage `json:"content,omitempty"`
+	MetaData    json.RawMessage `json:"meta_data,omitempty"`
+	ContentType string          `json:"content_type,omitempty"`
+}
+
+type CozeChatRequest struct {
+	BotId              string             `json:"bot_id"`
+	UserId             string             `json:"user_id"`
+	AdditionalMessages []CozeEnterMessage `json:"additional_messages,omitempty"`
+	Stream             bool               `json:"stream,omitempty"`
+	CustomVariables    json.RawMessage    `json:"custom_variables,omitempty"`
+	AutoSaveHistory    bool               `json:"auto_save_history,omitempty"`
+	MetaData           json.RawMessage    `json:"meta_data,omitempty"`
+	ExtraParams        json.RawMessage    `json:"extra_params,omitempty"`
+	ShortcutCommand    json.RawMessage    `json:"shortcut_command,omitempty"`
+	Parameters         json.RawMessage    `json:"parameters,omitempty"`
+}
+
+type CozeChatResponse struct {
+	Code int                  `json:"code"`
+	Msg  string               `json:"msg"`
+	Data CozeChatResponseData `json:"data"`
+}
+
+type CozeChatResponseData struct {
+	Id             string        `json:"id"`
+	ConversationId string        `json:"conversation_id"`
+	BotId          string        `json:"bot_id"`
+	CreatedAt      int64         `json:"created_at"`
+	LastError      CozeError     `json:"last_error"`
+	Status         string        `json:"status"`
+	Usage          CozeChatUsage `json:"usage"`
+}
+
+type CozeChatUsage struct {
+	TokenCount  int `json:"token_count"`
+	OutputCount int `json:"output_count"`
+	InputCount  int `json:"input_count"`
+}
+
+type CozeChatDetailResponse struct {
+	Data   []CozeChatV3MessageDetail `json:"data"`
+	Code   int                       `json:"code"`
+	Msg    string                    `json:"msg"`
+	Detail CozeResponseDetail        `json:"detail"`
+}
+
+type CozeChatV3MessageDetail struct {
+	Id               string          `json:"id"`
+	Role             string          `json:"role"`
+	Type             string          `json:"type"`
+	BotId            string          `json:"bot_id"`
+	ChatId           string          `json:"chat_id"`
+	Content          json.RawMessage `json:"content"`
+	MetaData         json.RawMessage `json:"meta_data"`
+	CreatedAt        int64           `json:"created_at"`
+	SectionId        string          `json:"section_id"`
+	UpdatedAt        int64           `json:"updated_at"`
+	ContentType      string          `json:"content_type"`
+	ConversationId   string          `json:"conversation_id"`
+	ReasoningContent string          `json:"reasoning_content"`
+}
+
+type CozeResponseDetail struct {
+	Logid string `json:"logid"`
+}
@@ -0,0 +1,300 @@
+package coze
+
+import (
+	"bufio"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"one-api/common"
+	"one-api/dto"
+	relaycommon "one-api/relay/common"
+	"one-api/relay/helper"
+	"one-api/service"
+	"strings"
+
+	"github.com/gin-gonic/gin"
+)
+
+func convertCozeChatRequest(c *gin.Context, request dto.GeneralOpenAIRequest) *CozeChatRequest {
+	var messages []CozeEnterMessage
+	// 将 request的messages的role为user的content转换为CozeMessage
+	for _, message := range request.Messages {
+		if message.Role == "user" {
+			messages = append(messages, CozeEnterMessage{
+				Role:    "user",
+				Content: message.Content,
+				// TODO: support more content type
+				ContentType: "text",
+			})
+		}
+	}
+	user := request.User
+	if user == "" {
+		user = helper.GetResponseID(c)
+	}
+	cozeRequest := &CozeChatRequest{
+		BotId:              c.GetString("bot_id"),
+		UserId:             user,
+		AdditionalMessages: messages,
+		Stream:             request.Stream,
+	}
+	return cozeRequest
+}
+
+func cozeChatHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
+	responseBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
+	}
+	err = resp.Body.Close()
+	if err != nil {
+		return service.OpenAIErrorWrapperLocal(err, "close_response_body_failed", http.StatusInternalServerError), nil
+	}
+	// convert coze response to openai response
+	var response dto.TextResponse
+	var cozeResponse CozeChatDetailResponse
+	response.Model = info.UpstreamModelName
+	err = json.Unmarshal(responseBody, &cozeResponse)
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "unmarshal_response_body_failed", http.StatusInternalServerError), nil
+	}
+	if cozeResponse.Code != 0 {
+		return service.OpenAIErrorWrapper(errors.New(cozeResponse.Msg), fmt.Sprintf("%d", cozeResponse.Code), http.StatusInternalServerError), nil
+	}
+	// 从上下文获取 usage
+	var usage dto.Usage
+	usage.PromptTokens = c.GetInt("coze_input_count")
+	usage.CompletionTokens = c.GetInt("coze_output_count")
+	usage.TotalTokens = c.GetInt("coze_token_count")
+	response.Usage = usage
+	response.Id = helper.GetResponseID(c)
+
+	var responseContent json.RawMessage
+	for _, data := range cozeResponse.Data {
+		if data.Type == "answer" {
+			responseContent = data.Content
+			response.Created = data.CreatedAt
+		}
+	}
+	// 添加 response.Choices
+	response.Choices = []dto.OpenAITextResponseChoice{
+		{
+			Index:        0,
+			Message:      dto.Message{Role: "assistant", Content: responseContent},
+			FinishReason: "stop",
+		},
+	}
+	jsonResponse, err := json.Marshal(response)
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "marshal_response_body_failed", http.StatusInternalServerError), nil
+	}
+	c.Writer.Header().Set("Content-Type", "application/json")
+	c.Writer.WriteHeader(resp.StatusCode)
+	_, _ = c.Writer.Write(jsonResponse)
+
+	return nil, &usage
+}
+
+func cozeChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
+	scanner := bufio.NewScanner(resp.Body)
+	scanner.Split(bufio.ScanLines)
+	helper.SetEventStreamHeaders(c)
+	id := helper.GetResponseID(c)
+	var responseText string
+
+	var currentEvent string
+	var currentData string
+	var usage dto.Usage
+
+	for scanner.Scan() {
+		line := scanner.Text()
+
+		if line == "" {
+			if currentEvent != "" && currentData != "" {
+				// handle last event
+				handleCozeEvent(c, currentEvent, currentData, &responseText, &usage, id, info)
+				currentEvent = ""
+				currentData = ""
+			}
+			continue
+		}
+
+		if strings.HasPrefix(line, "event:") {
+			currentEvent = strings.TrimSpace(line[6:])
+			continue
+		}
+
+		if strings.HasPrefix(line, "data:") {
+			currentData = strings.TrimSpace(line[5:])
+			continue
+		}
+	}
+
+	// Last event
+	if currentEvent != "" && currentData != "" {
+		handleCozeEvent(c, currentEvent, currentData, &responseText, &usage, id, info)
+	}
+
+	if err := scanner.Err(); err != nil {
+		return service.OpenAIErrorWrapper(err, "stream_scanner_error", http.StatusInternalServerError), nil
+	}
+	helper.Done(c)
+
+	if usage.TotalTokens == 0 {
+		usage.PromptTokens = info.PromptTokens
+		usage.CompletionTokens, _ = service.CountTextToken("gpt-3.5-turbo", responseText)
+		usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
+	}
+
+	return nil, &usage
+}
+
+func handleCozeEvent(c *gin.Context, event string, data string, responseText *string, usage *dto.Usage, id string, info *relaycommon.RelayInfo) {
+	switch event {
+	case "conversation.chat.completed":
+		// 将 data 解析为 CozeChatResponseData
+		var chatData CozeChatResponseData
+		err := json.Unmarshal([]byte(data), &chatData)
+		if err != nil {
+			common.SysError("error_unmarshalling_stream_response: " + err.Error())
+			return
+		}
+
+		usage.PromptTokens = chatData.Usage.InputCount
+		usage.CompletionTokens = chatData.Usage.OutputCount
+		usage.TotalTokens = chatData.Usage.TokenCount
+
+		finishReason := "stop"
+		stopResponse := helper.GenerateStopResponse(id, common.GetTimestamp(), info.UpstreamModelName, finishReason)
+		helper.ObjectData(c, stopResponse)
+
+	case "conversation.message.delta":
+		// 将 data 解析为 CozeChatV3MessageDetail
+		var messageData CozeChatV3MessageDetail
+		err := json.Unmarshal([]byte(data), &messageData)
+		if err != nil {
+			common.SysError("error_unmarshalling_stream_response: " + err.Error())
+			return
+		}
+
+		var content string
+		err = json.Unmarshal(messageData.Content, &content)
+		if err != nil {
+			common.SysError("error_unmarshalling_stream_response: " + err.Error())
+			return
+		}
+
+		*responseText += content
+
+		openaiResponse := dto.ChatCompletionsStreamResponse{
+			Id:      id,
+			Object:  "chat.completion.chunk",
+			Created: common.GetTimestamp(),
+			Model:   info.UpstreamModelName,
+		}
+
+		choice := dto.ChatCompletionsStreamResponseChoice{
+			Index: 0,
+		}
+		choice.Delta.SetContentString(content)
+		openaiResponse.Choices = append(openaiResponse.Choices, choice)
+
+		helper.ObjectData(c, openaiResponse)
+
+	case "error":
+		var errorData CozeError
+		err := json.Unmarshal([]byte(data), &errorData)
+		if err != nil {
+			common.SysError("error_unmarshalling_stream_response: " + err.Error())
+			return
+		}
+
+		common.SysError(fmt.Sprintf("stream event error: ", errorData.Code, errorData.Message))
+	}
+}
+
+func checkIfChatComplete(a *Adaptor, c *gin.Context, info *relaycommon.RelayInfo) (error, bool) {
+	requestURL := fmt.Sprintf("%s/v3/chat/retrieve", info.BaseUrl)
+
+	requestURL = requestURL + "?conversation_id=" + c.GetString("coze_conversation_id") + "&chat_id=" + c.GetString("coze_chat_id")
+	// 将 conversationId和chatId作为参数发送get请求
+	req, err := http.NewRequest("GET", requestURL, nil)
+	if err != nil {
+		return err, false
+	}
+	err = a.SetupRequestHeader(c, &req.Header, info)
+	if err != nil {
+		return err, false
+	}
+
+	resp, err := doRequest(req, info) // 调用 doRequest
+	if err != nil {
+		return err, false
+	}
+	if resp == nil { // 确保在 doRequest 失败时 resp 不为 nil 导致 panic
+		return fmt.Errorf("resp is nil"), false
+	}
+	defer resp.Body.Close() // 确保响应体被关闭
+
+	// 解析 resp 到 CozeChatResponse
+	var cozeResponse CozeChatResponse
+	responseBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return fmt.Errorf("read response body failed: %w", err), false
+	}
+	err = json.Unmarshal(responseBody, &cozeResponse)
+	if err != nil {
+		return fmt.Errorf("unmarshal response body failed: %w", err), false
+	}
+	if cozeResponse.Data.Status == "completed" {
+		// 在上下文设置 usage
+		c.Set("coze_token_count", cozeResponse.Data.Usage.TokenCount)
+		c.Set("coze_output_count", cozeResponse.Data.Usage.OutputCount)
+		c.Set("coze_input_count", cozeResponse.Data.Usage.InputCount)
+		return nil, true
+	} else if cozeResponse.Data.Status == "failed" || cozeResponse.Data.Status == "canceled" || cozeResponse.Data.Status == "requires_action" {
+		return fmt.Errorf("chat status: %s", cozeResponse.Data.Status), false
+	} else {
+		return nil, false
+	}
+}
+
+func getChatDetail(a *Adaptor, c *gin.Context, info *relaycommon.RelayInfo) (*http.Response, error) {
+	requestURL := fmt.Sprintf("%s/v3/chat/message/list", info.BaseUrl)
+
+	requestURL = requestURL + "?conversation_id=" + c.GetString("coze_conversation_id") + "&chat_id=" + c.GetString("coze_chat_id")
+	req, err := http.NewRequest("GET", requestURL, nil)
+	if err != nil {
+		return nil, fmt.Errorf("new request failed: %w", err)
+	}
+	err = a.SetupRequestHeader(c, &req.Header, info)
+	if err != nil {
+		return nil, fmt.Errorf("setup request header failed: %w", err)
+	}
+	resp, err := doRequest(req, info)
+	if err != nil {
+		return nil, fmt.Errorf("do request failed: %w", err)
+	}
+	return resp, nil
+}
+
+func doRequest(req *http.Request, info *relaycommon.RelayInfo) (*http.Response, error) {
+	var client *http.Client
+	var err error // 声明 err 变量
+	if proxyURL, ok := info.ChannelSetting["proxy"]; ok {
+		client, err = service.NewProxyHttpClient(proxyURL.(string))
+		if err != nil {
+			return nil, fmt.Errorf("new proxy http client failed: %w", err)
+		}
+	} else {
+		client = service.GetHttpClient()
+	}
+	resp, err := client.Do(req)
+	if err != nil { // 增加对 client.Do(req) 返回错误的检查
+		return nil, fmt.Errorf("client.Do failed: %w", err)
+	}
+	// _ = resp.Body.Close()
+	return resp, nil
+}
@@ -3,7 +3,6 @@ package deepseek
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
@@ -11,6 +10,9 @@ import (
 	"one-api/relay/channel/openai"
 	relaycommon "one-api/relay/common"
 	"one-api/relay/constant"
+	"strings"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
@@ -36,9 +38,13 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
 }

 func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
+	fimBaseUrl := info.BaseUrl
+	if !strings.HasSuffix(info.BaseUrl, "/beta") {
+		fimBaseUrl += "/beta"
+	}
 	switch info.RelayMode {
 	case constant.RelayModeCompletions:
-		return fmt.Sprintf("%s/beta/completions", info.BaseUrl), nil
+		return fmt.Sprintf("%s/completions", fimBaseUrl), nil
 	default:
 		return fmt.Sprintf("%s/v1/chat/completions", info.BaseUrl), nil
 	}
@@ -66,6 +72,11 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -3,12 +3,13 @@ package dify
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
 	"one-api/relay/channel"
 	relaycommon "one-api/relay/common"
+
+	"github.com/gin-gonic/gin"
 )

 const (
@@ -86,6 +87,11 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -1,7 +1,6 @@
 package dify

 import (
-	"bufio"
 	"bytes"
 	"encoding/base64"
 	"encoding/json"
@@ -198,6 +197,12 @@ func streamResponseDify2OpenAI(difyResponse DifyChunkChatCompletionResponse) *dt
 			choice.Delta.SetReasoningContent(text + "\n")
 		}
 	} else if difyResponse.Event == "message" || difyResponse.Event == "agent_message" {
+		if difyResponse.Answer == "<details style=\"color:gray;background-color: #f8f8f8;padding: 8px;border-radius: 4px;\" open> <summary> Thinking... </summary>\n" {
+			difyResponse.Answer = "<think>"
+		} else if difyResponse.Answer == "</details>" {
+			difyResponse.Answer = "</think>"
+		}
+
 		choice.Delta.SetContentString(difyResponse.Answer)
 	}
 	response.Choices = append(response.Choices, choice)
@@ -207,12 +212,8 @@ func streamResponseDify2OpenAI(difyResponse DifyChunkChatCompletionResponse) *dt
 func difyStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
 	var responseText string
 	usage := &dto.Usage{}
-	scanner := bufio.NewScanner(resp.Body)
-	scanner.Split(bufio.ScanLines)
 	var nodeToken int
-
 	helper.SetEventStreamHeaders(c)
-
 	helper.StreamScannerHandler(c, resp, info, func(data string) bool {
 		var difyResponse DifyChunkChatCompletionResponse
 		err := json.Unmarshal([]byte(data), &difyResponse)
@@ -241,13 +242,10 @@ func difyStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Re
 		}
 		return true
 	})
-	if err := scanner.Err(); err != nil {
-		common.SysError("error reading stream: " + err.Error())
-	}
 	helper.Done(c)
 	err := resp.Body.Close()
 	if err != nil {
-		//return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
+		// return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
 		common.SysError("close_response_body_failed: " + err.Error())
 	}
 	if usage.TotalTokens == 0 {
@@ -10,9 +10,9 @@ import (
 	"one-api/dto"
 	"one-api/relay/channel"
 	relaycommon "one-api/relay/common"
+	"one-api/relay/constant"
 	"one-api/service"
 	"one-api/setting/model_setting"
-
 	"strings"

 	"github.com/gin-gonic/gin"
@@ -70,6 +70,16 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
 }

 func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
+
+	if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
+		// suffix -thinking and -nothinking
+		if strings.HasSuffix(info.OriginModelName, "-thinking") {
+			info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
+		} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
+			info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-nothinking")
+		}
+	}
+
 	version := model_setting.GetGeminiVersionSetting(info.UpstreamModelName)

 	if strings.HasPrefix(info.UpstreamModelName, "imagen") {
@@ -99,11 +109,13 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 	if request == nil {
 		return nil, errors.New("request is nil")
 	}
-	ai, err := CovertGemini2OpenAI(*request)
+
+	geminiRequest, err := CovertGemini2OpenAI(*request, info)
 	if err != nil {
 		return nil, err
 	}
-	return ai, nil
+
+	return geminiRequest, nil
 }

 func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
@@ -144,11 +156,24 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return geminiRequest, nil
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }

 func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *dto.OpenAIErrorWithStatusCode) {
+	if info.RelayMode == constant.RelayModeGemini {
+		if info.IsStream {
+			return GeminiTextGenerationStreamHandler(c, resp, info)
+		} else {
+			return GeminiTextGenerationHandler(c, resp, info)
+		}
+	}
+
 	if strings.HasPrefix(info.UpstreamModelName, "imagen") {
 		return GeminiImageHandler(c, resp, info)
 	}
@@ -165,6 +190,18 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
 	} else {
 		err, usage = GeminiChatHandler(c, resp, info)
 	}
+
+	//if usage.(*dto.Usage).CompletionTokenDetails.ReasoningTokens > 100 {
+	//	// 没有请求-thinking的情况下，产生思考token，则按照思考模型计费
+	//	if !strings.HasSuffix(info.OriginModelName, "-thinking") &&
+	//		!strings.HasSuffix(info.OriginModelName, "-nothinking") {
+	//		thinkingModelName := info.OriginModelName + "-thinking"
+	//		if operation_setting.SelfUseModeEnabled || helper.ContainPriceOrRatio(thinkingModelName) {
+	//			info.OriginModelName = thinkingModelName
+	//		}
+	//	}
+	//}
+
 	return
 }

@@ -16,6 +16,8 @@ var ModelList = []string{
 	"gemini-2.0-pro-exp",
 	// thinking exp
 	"gemini-2.0-flash-thinking-exp",
+	"gemini-2.5-pro-exp-03-25",
+	"gemini-2.5-pro-preview-03-25",
 	// imagen models
 	"imagen-3.0-generate-002",
 	// embedding models
@@ -2,10 +2,19 @@ package gemini

 type GeminiChatRequest struct {
 	Contents           []GeminiChatContent        `json:"contents"`
-	SafetySettings     []GeminiChatSafetySettings `json:"safety_settings,omitempty"`
-	GenerationConfig   GeminiChatGenerationConfig `json:"generation_config,omitempty"`
+	SafetySettings     []GeminiChatSafetySettings `json:"safetySettings,omitempty"`
+	GenerationConfig   GeminiChatGenerationConfig `json:"generationConfig,omitempty"`
 	Tools              []GeminiChatTool           `json:"tools,omitempty"`
-	SystemInstructions *GeminiChatContent         `json:"system_instruction,omitempty"`
+	SystemInstructions *GeminiChatContent         `json:"systemInstruction,omitempty"`
+}
+
+type GeminiThinkingConfig struct {
+	IncludeThoughts bool `json:"includeThoughts,omitempty"`
+	ThinkingBudget  *int `json:"thinkingBudget,omitempty"`
+}
+
+func (c *GeminiThinkingConfig) SetThinkingBudget(budget int) {
+	c.ThinkingBudget = &budget
 }

 type GeminiInlineData struct {
@@ -18,14 +27,9 @@ type FunctionCall struct {
 	Arguments    any    `json:"args"`
 }

-type GeminiFunctionResponseContent struct {
-	Name    string `json:"name"`
-	Content any    `json:"content"`
-}
-
 type FunctionResponse struct {
-	Name     string                        `json:"name"`
-	Response GeminiFunctionResponseContent `json:"response"`
+	Name     string                 `json:"name"`
+	Response map[string]interface{} `json:"response"`
 }

 type GeminiPartExecutableCode struct {
@@ -45,6 +49,7 @@ type GeminiFileData struct {

 type GeminiPart struct {
 	Text                string                         `json:"text,omitempty"`
+	Thought             bool                           `json:"thought,omitempty"`
 	InlineData          *GeminiInlineData              `json:"inlineData,omitempty"`
 	FunctionCall        *FunctionCall                  `json:"functionCall,omitempty"`
 	FunctionResponse    *FunctionResponse              `json:"functionResponse,omitempty"`
@@ -71,15 +76,17 @@ type GeminiChatTool struct {
 }

 type GeminiChatGenerationConfig struct {
-	Temperature      *float64 `json:"temperature,omitempty"`
-	TopP             float64  `json:"topP,omitempty"`
-	TopK             float64  `json:"topK,omitempty"`
-	MaxOutputTokens  uint     `json:"maxOutputTokens,omitempty"`
-	CandidateCount   int      `json:"candidateCount,omitempty"`
-	StopSequences    []string `json:"stopSequences,omitempty"`
-	ResponseMimeType string   `json:"responseMimeType,omitempty"`
-	ResponseSchema   any      `json:"responseSchema,omitempty"`
-	Seed             int64    `json:"seed,omitempty"`
+	Temperature        *float64              `json:"temperature,omitempty"`
+	TopP               float64               `json:"topP,omitempty"`
+	TopK               float64               `json:"topK,omitempty"`
+	MaxOutputTokens    uint                  `json:"maxOutputTokens,omitempty"`
+	CandidateCount     int                   `json:"candidateCount,omitempty"`
+	StopSequences      []string              `json:"stopSequences,omitempty"`
+	ResponseMimeType   string                `json:"responseMimeType,omitempty"`
+	ResponseSchema     any                   `json:"responseSchema,omitempty"`
+	Seed               int64                 `json:"seed,omitempty"`
+	ResponseModalities []string              `json:"responseModalities,omitempty"`
+	ThinkingConfig     *GeminiThinkingConfig `json:"thinkingConfig,omitempty"`
 }

 type GeminiChatCandidate struct {
@@ -105,9 +112,16 @@ type GeminiChatResponse struct {
 }

 type GeminiUsageMetadata struct {
-	PromptTokenCount     int `json:"promptTokenCount"`
-	CandidatesTokenCount int `json:"candidatesTokenCount"`
-	TotalTokenCount      int `json:"totalTokenCount"`
+	PromptTokenCount     int                         `json:"promptTokenCount"`
+	CandidatesTokenCount int                         `json:"candidatesTokenCount"`
+	TotalTokenCount      int                         `json:"totalTokenCount"`
+	ThoughtsTokenCount   int                         `json:"thoughtsTokenCount"`
+	PromptTokensDetails  []GeminiPromptTokensDetails `json:"promptTokensDetails"`
+}
+
+type GeminiPromptTokensDetails struct {
+	Modality   string `json:"modality"`
+	TokenCount int    `json:"tokenCount"`
 }

 // Imagen related structs
@@ -0,0 +1,145 @@
+package gemini
+
+import (
+	"encoding/json"
+	"io"
+	"net/http"
+	"one-api/common"
+	"one-api/dto"
+	relaycommon "one-api/relay/common"
+	"one-api/relay/helper"
+	"one-api/service"
+
+	"github.com/gin-gonic/gin"
+)
+
+func GeminiTextGenerationHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.Usage, *dto.OpenAIErrorWithStatusCode) {
+	// 读取响应体
+	responseBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError)
+	}
+	err = resp.Body.Close()
+	if err != nil {
+		return nil, service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError)
+	}
+
+	if common.DebugEnabled {
+		println(string(responseBody))
+	}
+
+	// 解析为 Gemini 原生响应格式
+	var geminiResponse GeminiChatResponse
+	err = common.DecodeJson(responseBody, &geminiResponse)
+	if err != nil {
+		return nil, service.OpenAIErrorWrapper(err, "unmarshal_response_body_failed", http.StatusInternalServerError)
+	}
+
+	// 检查是否有候选响应
+	if len(geminiResponse.Candidates) == 0 {
+		return nil, &dto.OpenAIErrorWithStatusCode{
+			Error: dto.OpenAIError{
+				Message: "No candidates returned",
+				Type:    "server_error",
+				Param:   "",
+				Code:    500,
+			},
+			StatusCode: resp.StatusCode,
+		}
+	}
+
+	// 计算使用量（基于 UsageMetadata）
+	usage := dto.Usage{
+		PromptTokens:     geminiResponse.UsageMetadata.PromptTokenCount,
+		CompletionTokens: geminiResponse.UsageMetadata.CandidatesTokenCount,
+		TotalTokens:      geminiResponse.UsageMetadata.TotalTokenCount,
+	}
+
+	usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
+
+	for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
+		if detail.Modality == "AUDIO" {
+			usage.PromptTokensDetails.AudioTokens = detail.TokenCount
+		} else if detail.Modality == "TEXT" {
+			usage.PromptTokensDetails.TextTokens = detail.TokenCount
+		}
+	}
+
+	// 直接返回 Gemini 原生格式的 JSON 响应
+	jsonResponse, err := json.Marshal(geminiResponse)
+	if err != nil {
+		return nil, service.OpenAIErrorWrapper(err, "marshal_response_body_failed", http.StatusInternalServerError)
+	}
+
+	// 设置响应头并写入响应
+	c.Writer.Header().Set("Content-Type", "application/json")
+	c.Writer.WriteHeader(resp.StatusCode)
+	_, err = c.Writer.Write(jsonResponse)
+	if err != nil {
+		return nil, service.OpenAIErrorWrapper(err, "write_response_failed", http.StatusInternalServerError)
+	}
+
+	return &usage, nil
+}
+
+func GeminiTextGenerationStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.Usage, *dto.OpenAIErrorWithStatusCode) {
+	var usage = &dto.Usage{}
+	var imageCount int
+
+	helper.SetEventStreamHeaders(c)
+
+	helper.StreamScannerHandler(c, resp, info, func(data string) bool {
+		var geminiResponse GeminiChatResponse
+		err := common.DecodeJsonStr(data, &geminiResponse)
+		if err != nil {
+			common.LogError(c, "error unmarshalling stream response: "+err.Error())
+			return false
+		}
+
+		// 统计图片数量
+		for _, candidate := range geminiResponse.Candidates {
+			for _, part := range candidate.Content.Parts {
+				if part.InlineData != nil && part.InlineData.MimeType != "" {
+					imageCount++
+				}
+			}
+		}
+
+		// 更新使用量统计
+		if geminiResponse.UsageMetadata.TotalTokenCount != 0 {
+			usage.PromptTokens = geminiResponse.UsageMetadata.PromptTokenCount
+			usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount
+			usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
+			usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
+			for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
+				if detail.Modality == "AUDIO" {
+					usage.PromptTokensDetails.AudioTokens = detail.TokenCount
+				} else if detail.Modality == "TEXT" {
+					usage.PromptTokensDetails.TextTokens = detail.TokenCount
+				}
+			}
+		}
+
+		// 直接发送 GeminiChatResponse 响应
+		err = helper.ObjectData(c, geminiResponse)
+		if err != nil {
+			common.LogError(c, err.Error())
+		}
+
+		return true
+	})
+
+	if imageCount != 0 {
+		if usage.CompletionTokens == 0 {
+			usage.CompletionTokens = imageCount * 258
+		}
+	}
+
+	// 计算最终使用量
+	usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
+
+	// 移除流式响应结尾的[Done]，因为Gemini API没有发送Done的行为
+	//helper.Done(c)
+
+	return usage, nil
+}
@@ -18,12 +18,29 @@ import (
 	"github.com/gin-gonic/gin"
 )

+var geminiSupportedMimeTypes = map[string]bool{
+	"application/pdf": true,
+	"audio/mpeg":      true,
+	"audio/mp3":       true,
+	"audio/wav":       true,
+	"image/png":       true,
+	"image/jpeg":      true,
+	"text/plain":      true,
+	"video/mov":       true,
+	"video/mpeg":      true,
+	"video/mp4":       true,
+	"video/mpg":       true,
+	"video/avi":       true,
+	"video/wmv":       true,
+	"video/mpegps":    true,
+	"video/flv":       true,
+}
+
 // Setting safety to the lowest possible values since Gemini is already powerless enough
-func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatRequest, error) {
+func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon.RelayInfo) (*GeminiChatRequest, error) {

 	geminiRequest := GeminiChatRequest{
 		Contents: make([]GeminiChatContent, 0, len(textRequest.Messages)),
-		//SafetySettings: []GeminiChatSafetySettings{},
 		GenerationConfig: GeminiChatGenerationConfig{
 			Temperature:     textRequest.Temperature,
 			TopP:            textRequest.TopP,
@@ -32,6 +49,75 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatReque
 		},
 	}

+	if model_setting.IsGeminiModelSupportImagine(info.UpstreamModelName) {
+		geminiRequest.GenerationConfig.ResponseModalities = []string{
+			"TEXT",
+			"IMAGE",
+		}
+	}
+
+	if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
+		if strings.HasSuffix(info.OriginModelName, "-thinking") {
+			// 硬编码不支持 ThinkingBudget 的旧模型
+			unsupportedModels := []string{
+				"gemini-2.5-pro-preview-05-06",
+				"gemini-2.5-pro-preview-03-25",
+			}
+
+			isUnsupported := false
+			for _, unsupportedModel := range unsupportedModels {
+				if strings.HasPrefix(info.OriginModelName, unsupportedModel) {
+					isUnsupported = true
+					break
+				}
+			}
+
+			if isUnsupported {
+				geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
+					IncludeThoughts: true,
+				}
+			} else {
+				budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
+
+				// 检查是否为新的2.5pro模型（支持ThinkingBudget但有特殊范围）
+				isNew25Pro := strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") &&
+					!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-05-06") &&
+					!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-03-25")
+
+				if isNew25Pro {
+					// 新的2.5pro模型：ThinkingBudget范围为128-32768
+					if budgetTokens == 0 || budgetTokens < 128 {
+						budgetTokens = 128
+					} else if budgetTokens > 32768 {
+						budgetTokens = 32768
+					}
+				} else {
+					// 其他模型：ThinkingBudget范围为0-24576
+					if budgetTokens == 0 || budgetTokens > 24576 {
+						budgetTokens = 24576
+					}
+				}
+
+				geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
+					ThinkingBudget:  common.GetPointer(int(budgetTokens)),
+					IncludeThoughts: true,
+				}
+			}
+		} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
+			// 检查是否为新的2.5pro模型（不支持-nothinking，因为最低值只能为128）
+			isNew25Pro := strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") &&
+				!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-05-06") &&
+				!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-03-25")
+
+			if !isNew25Pro {
+				// 只有非新2.5pro模型才支持-nothinking
+				geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
+					ThinkingBudget: common.GetPointer(0),
+				}
+			}
+		}
+	}
+
 	safetySettings := make([]GeminiChatSafetySettings, 0, len(SafetySettingList))
 	for _, category := range SafetySettingList {
 		safetySettings = append(safetySettings, GeminiChatSafetySettings{
@@ -56,6 +142,7 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatReque
 				continue
 			}
 			if tool.Function.Parameters != nil {
+
 				params, ok := tool.Function.Parameters.(map[string]interface{})
 				if ok {
 					if props, hasProps := params["properties"].(map[string]interface{}); hasProps {
@@ -65,6 +152,9 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatReque
 					}
 				}
 			}
+			// Clean the parameters before appending
+			cleanedParams := cleanFunctionParameters(tool.Function.Parameters)
+			tool.Function.Parameters = cleanedParams
 			functions = append(functions, tool.Function)
 		}
 		if codeExecution {
@@ -86,11 +176,11 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatReque
 		// json_data, _ := json.Marshal(geminiRequest.Tools)
 		// common.SysLog("tools_json: " + string(json_data))
 	} else if textRequest.Functions != nil {
-		geminiRequest.Tools = []GeminiChatTool{
-			{
-				FunctionDeclarations: textRequest.Functions,
-			},
-		}
+		//geminiRequest.Tools = []GeminiChatTool{
+		//	{
+		//		FunctionDeclarations: textRequest.Functions,
+		//	},
+		//}
 	}

 	if textRequest.ResponseFormat != nil && (textRequest.ResponseFormat.Type == "json_schema" || textRequest.ResponseFormat.Type == "json_object") {
@@ -121,17 +211,12 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatReque
 			} else if val, exists := tool_call_ids[message.ToolCallId]; exists {
 				name = val
 			}
-			content := common.StrToMap(message.StringContent())
+			contentMap := common.StrToMap(message.StringContent())
 			functionResp := &FunctionResponse{
-				Name: name,
-				Response: GeminiFunctionResponseContent{
-					Name:    name,
-					Content: content,
-				},
-			}
-			if content == nil {
-				functionResp.Response.Content = message.StringContent()
+				Name:     name,
+				Response: contentMap,
 			}
+
 			*parts = append(*parts, GeminiPart{
 				FunctionResponse: functionResp,
 			})
@@ -181,14 +266,20 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatReque
 				}
 				// 判断是否是url
 				if strings.HasPrefix(part.GetImageMedia().Url, "http") {
-					// 是url，获取图片的类型和base64编码的数据
+					// 是url，获取文件的类型和base64编码的数据
 					fileData, err := service.GetFileBase64FromUrl(part.GetImageMedia().Url)
 					if err != nil {
-						return nil, fmt.Errorf("get file base64 from url failed: %s", err.Error())
+						return nil, fmt.Errorf("get file base64 from url '%s' failed: %w", part.GetImageMedia().Url, err)
 					}
+
+					// 校验 MimeType 是否在 Gemini 支持的白名单中
+					if _, ok := geminiSupportedMimeTypes[strings.ToLower(fileData.MimeType)]; !ok {
+						return nil, fmt.Errorf("MIME type '%s' from URL '%s' is not supported by Gemini. Supported types are: %v", fileData.MimeType, part.GetImageMedia().Url, getSupportedMimeTypesList())
+					}
+
 					parts = append(parts, GeminiPart{
 						InlineData: &GeminiInlineData{
-							MimeType: fileData.MimeType,
+							MimeType: fileData.MimeType, // 使用原始的 MimeType，因为大小写可能对API有意义
 							Data:     fileData.Base64Data,
 						},
 					})
@@ -204,6 +295,34 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatReque
 						},
 					})
 				}
+			} else if part.Type == dto.ContentTypeFile {
+				if part.GetFile().FileId != "" {
+					return nil, fmt.Errorf("only base64 file is supported in gemini")
+				}
+				format, base64String, err := service.DecodeBase64FileData(part.GetFile().FileData)
+				if err != nil {
+					return nil, fmt.Errorf("decode base64 file data failed: %s", err.Error())
+				}
+				parts = append(parts, GeminiPart{
+					InlineData: &GeminiInlineData{
+						MimeType: format,
+						Data:     base64String,
+					},
+				})
+			} else if part.Type == dto.ContentTypeInputAudio {
+				if part.GetInputAudio().Data == "" {
+					return nil, fmt.Errorf("only base64 audio is supported in gemini")
+				}
+				base64String, err := service.DecodeBase64AudioData(part.GetInputAudio().Data)
+				if err != nil {
+					return nil, fmt.Errorf("decode base64 audio data failed: %s", err.Error())
+				}
+				parts = append(parts, GeminiPart{
+					InlineData: &GeminiInlineData{
+						MimeType: "audio/" + part.GetInputAudio().Format,
+						Data:     base64String,
+					},
+				})
 			}
 		}

@@ -229,6 +348,128 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatReque
 	return &geminiRequest, nil
 }

+// Helper function to get a list of supported MIME types for error messages
+func getSupportedMimeTypesList() []string {
+	keys := make([]string, 0, len(geminiSupportedMimeTypes))
+	for k := range geminiSupportedMimeTypes {
+		keys = append(keys, k)
+	}
+	return keys
+}
+
+// cleanFunctionParameters recursively removes unsupported fields from Gemini function parameters.
+func cleanFunctionParameters(params interface{}) interface{} {
+	if params == nil {
+		return nil
+	}
+
+	switch v := params.(type) {
+	case map[string]interface{}:
+		// Create a copy to avoid modifying the original
+		cleanedMap := make(map[string]interface{})
+		for k, val := range v {
+			cleanedMap[k] = val
+		}
+
+		// Remove unsupported root-level fields
+		delete(cleanedMap, "default")
+		delete(cleanedMap, "exclusiveMaximum")
+		delete(cleanedMap, "exclusiveMinimum")
+		delete(cleanedMap, "$schema")
+		delete(cleanedMap, "additionalProperties")
+
+		// Check and clean 'format' for string types
+		if propType, typeExists := cleanedMap["type"].(string); typeExists && propType == "string" {
+			if formatValue, formatExists := cleanedMap["format"].(string); formatExists {
+				if formatValue != "enum" && formatValue != "date-time" {
+					delete(cleanedMap, "format")
+				}
+			}
+		}
+
+		// Clean properties
+		if props, ok := cleanedMap["properties"].(map[string]interface{}); ok && props != nil {
+			cleanedProps := make(map[string]interface{})
+			for propName, propValue := range props {
+				cleanedProps[propName] = cleanFunctionParameters(propValue)
+			}
+			cleanedMap["properties"] = cleanedProps
+		}
+
+		// Recursively clean items in arrays
+		if items, ok := cleanedMap["items"].(map[string]interface{}); ok && items != nil {
+			cleanedMap["items"] = cleanFunctionParameters(items)
+		}
+		// Also handle items if it's an array of schemas
+		if itemsArray, ok := cleanedMap["items"].([]interface{}); ok {
+			cleanedItemsArray := make([]interface{}, len(itemsArray))
+			for i, item := range itemsArray {
+				cleanedItemsArray[i] = cleanFunctionParameters(item)
+			}
+			cleanedMap["items"] = cleanedItemsArray
+		}
+
+		// Recursively clean other schema composition keywords
+		for _, field := range []string{"allOf", "anyOf", "oneOf"} {
+			if nested, ok := cleanedMap[field].([]interface{}); ok {
+				cleanedNested := make([]interface{}, len(nested))
+				for i, item := range nested {
+					cleanedNested[i] = cleanFunctionParameters(item)
+				}
+				cleanedMap[field] = cleanedNested
+			}
+		}
+
+		// Recursively clean patternProperties
+		if patternProps, ok := cleanedMap["patternProperties"].(map[string]interface{}); ok {
+			cleanedPatternProps := make(map[string]interface{})
+			for pattern, schema := range patternProps {
+				cleanedPatternProps[pattern] = cleanFunctionParameters(schema)
+			}
+			cleanedMap["patternProperties"] = cleanedPatternProps
+		}
+
+		// Recursively clean definitions
+		if definitions, ok := cleanedMap["definitions"].(map[string]interface{}); ok {
+			cleanedDefinitions := make(map[string]interface{})
+			for defName, defSchema := range definitions {
+				cleanedDefinitions[defName] = cleanFunctionParameters(defSchema)
+			}
+			cleanedMap["definitions"] = cleanedDefinitions
+		}
+
+		// Recursively clean $defs (newer JSON Schema draft)
+		if defs, ok := cleanedMap["$defs"].(map[string]interface{}); ok {
+			cleanedDefs := make(map[string]interface{})
+			for defName, defSchema := range defs {
+				cleanedDefs[defName] = cleanFunctionParameters(defSchema)
+			}
+			cleanedMap["$defs"] = cleanedDefs
+		}
+
+		// Clean conditional keywords
+		for _, field := range []string{"if", "then", "else", "not"} {
+			if nested, ok := cleanedMap[field]; ok {
+				cleanedMap[field] = cleanFunctionParameters(nested)
+			}
+		}
+
+		return cleanedMap
+
+	case []interface{}:
+		// Handle arrays of schemas
+		cleanedArray := make([]interface{}, len(v))
+		for i, item := range v {
+			cleanedArray[i] = cleanFunctionParameters(item)
+		}
+		return cleanedArray
+
+	default:
+		// Not a map or array, return as is (e.g., could be a primitive)
+		return params
+	}
+}
+
 func removeAdditionalPropertiesWithDepth(schema interface{}, depth int) interface{} {
 	if depth >= 5 {
 		return schema
@@ -240,6 +481,7 @@ func removeAdditionalPropertiesWithDepth(schema interface{}, depth int) interfac
 	}
 	// 删除所有的title字段
 	delete(v, "title")
+	delete(v, "$schema")
 	// 如果type不为object和array，则直接返回
 	if typeVal, exists := v["type"]; !exists || (typeVal != "object" && typeVal != "array") {
 		return schema
@@ -387,6 +629,8 @@ func responseGeminiChat2OpenAI(response *GeminiChatResponse) *dto.OpenAITextResp
 					if call := getResponseToolCall(&part); call != nil {
 						toolCalls = append(toolCalls, *call)
 					}
+				} else if part.Thought {
+					choice.Message.ReasoningContent = part.Text
 				} else {
 					if part.ExecutableCode != nil {
 						texts = append(texts, "```"+part.ExecutableCode.Language+"\n"+part.ExecutableCode.Code+"\n```")
@@ -404,7 +648,6 @@ func responseGeminiChat2OpenAI(response *GeminiChatResponse) *dto.OpenAITextResp
 				choice.Message.SetToolCalls(toolCalls)
 				isToolCall = true
 			}
-
 			choice.Message.SetStringContent(strings.Join(texts, "\n"))

 		}
@@ -427,9 +670,10 @@ func responseGeminiChat2OpenAI(response *GeminiChatResponse) *dto.OpenAITextResp
 	return &fullTextResponse
 }

-func streamResponseGeminiChat2OpenAI(geminiResponse *GeminiChatResponse) (*dto.ChatCompletionsStreamResponse, bool) {
+func streamResponseGeminiChat2OpenAI(geminiResponse *GeminiChatResponse) (*dto.ChatCompletionsStreamResponse, bool, bool) {
 	choices := make([]dto.ChatCompletionsStreamResponseChoice, 0, len(geminiResponse.Candidates))
 	isStop := false
+	hasImage := false
 	for _, candidate := range geminiResponse.Candidates {
 		if candidate.FinishReason != nil && *candidate.FinishReason == "STOP" {
 			isStop = true
@@ -443,6 +687,7 @@ func streamResponseGeminiChat2OpenAI(geminiResponse *GeminiChatResponse) (*dto.C
 		}
 		var texts []string
 		isTools := false
+		isThought := false
 		if candidate.FinishReason != nil {
 			// p := GeminiConvertFinishReason(*candidate.FinishReason)
 			switch *candidate.FinishReason {
@@ -455,12 +700,21 @@ func streamResponseGeminiChat2OpenAI(geminiResponse *GeminiChatResponse) (*dto.C
 			}
 		}
 		for _, part := range candidate.Content.Parts {
-			if part.FunctionCall != nil {
+			if part.InlineData != nil {
+				if strings.HasPrefix(part.InlineData.MimeType, "image") {
+					imgText := "![image](data:" + part.InlineData.MimeType + ";base64," + part.InlineData.Data + ")"
+					texts = append(texts, imgText)
+					hasImage = true
+				}
+			} else if part.FunctionCall != nil {
 				isTools = true
 				if call := getResponseToolCall(&part); call != nil {
 					call.SetIndex(len(choice.Delta.ToolCalls))
 					choice.Delta.ToolCalls = append(choice.Delta.ToolCalls, *call)
 				}
+			} else if part.Thought {
+				isThought = true
+				texts = append(texts, part.Text)
 			} else {
 				if part.ExecutableCode != nil {
 					texts = append(texts, "```"+part.ExecutableCode.Language+"\n"+part.ExecutableCode.Code+"\n```\n")
@@ -473,7 +727,11 @@ func streamResponseGeminiChat2OpenAI(geminiResponse *GeminiChatResponse) (*dto.C
 				}
 			}
 		}
-		choice.Delta.SetContentString(strings.Join(texts, "\n"))
+		if isThought {
+			choice.Delta.SetReasoningContent(strings.Join(texts, "\n"))
+		} else {
+			choice.Delta.SetContentString(strings.Join(texts, "\n"))
+		}
 		if isTools {
 			choice.FinishReason = &constant.FinishReasonToolCalls
 		}
@@ -483,7 +741,7 @@ func streamResponseGeminiChat2OpenAI(geminiResponse *GeminiChatResponse) (*dto.C
 	var response dto.ChatCompletionsStreamResponse
 	response.Object = "chat.completion.chunk"
 	response.Choices = choices
-	return &response, isStop
+	return &response, isStop, hasImage
 }

 func GeminiChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
@@ -491,23 +749,35 @@ func GeminiChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycom
 	id := fmt.Sprintf("chatcmpl-%s", common.GetUUID())
 	createAt := common.GetTimestamp()
 	var usage = &dto.Usage{}
+	var imageCount int

 	helper.StreamScannerHandler(c, resp, info, func(data string) bool {
 		var geminiResponse GeminiChatResponse
-		err := json.Unmarshal([]byte(data), &geminiResponse)
+		err := common.DecodeJsonStr(data, &geminiResponse)
 		if err != nil {
 			common.LogError(c, "error unmarshalling stream response: "+err.Error())
 			return false
 		}

-		response, isStop := streamResponseGeminiChat2OpenAI(&geminiResponse)
+		response, isStop, hasImage := streamResponseGeminiChat2OpenAI(&geminiResponse)
+		if hasImage {
+			imageCount++
+		}
 		response.Id = id
 		response.Created = createAt
 		response.Model = info.UpstreamModelName
-		// responseText += response.Choices[0].Delta.GetContentString()
 		if geminiResponse.UsageMetadata.TotalTokenCount != 0 {
 			usage.PromptTokens = geminiResponse.UsageMetadata.PromptTokenCount
 			usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount
+			usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
+			usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
+			for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
+				if detail.Modality == "AUDIO" {
+					usage.PromptTokensDetails.AudioTokens = detail.TokenCount
+				} else if detail.Modality == "TEXT" {
+					usage.PromptTokensDetails.TextTokens = detail.TokenCount
+				}
+			}
 		}
 		err = helper.ObjectData(c, response)
 		if err != nil {
@@ -522,9 +792,14 @@ func GeminiChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycom

 	var response *dto.ChatCompletionsStreamResponse

-	usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
+	if imageCount != 0 {
+		if usage.CompletionTokens == 0 {
+			usage.CompletionTokens = imageCount * 258
+		}
+	}
+
 	usage.PromptTokensDetails.TextTokens = usage.PromptTokens
-	usage.CompletionTokenDetails.TextTokens = usage.CompletionTokens
+	usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens

 	if info.ShouldIncludeUsage {
 		response = helper.GenerateFinalUsageResponse(id, createAt, info.UpstreamModelName, *usage)
@@ -547,8 +822,11 @@ func GeminiChatHandler(c *gin.Context, resp *http.Response, info *relaycommon.Re
 	if err != nil {
 		return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
 	}
+	if common.DebugEnabled {
+		println(string(responseBody))
+	}
 	var geminiResponse GeminiChatResponse
-	err = json.Unmarshal(responseBody, &geminiResponse)
+	err = common.DecodeJson(responseBody, &geminiResponse)
 	if err != nil {
 		return service.OpenAIErrorWrapper(err, "unmarshal_response_body_failed", http.StatusInternalServerError), nil
 	}
@@ -570,6 +848,18 @@ func GeminiChatHandler(c *gin.Context, resp *http.Response, info *relaycommon.Re
 		CompletionTokens: geminiResponse.UsageMetadata.CandidatesTokenCount,
 		TotalTokens:      geminiResponse.UsageMetadata.TotalTokenCount,
 	}
+
+	usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
+	usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
+
+	for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
+		if detail.Modality == "AUDIO" {
+			usage.PromptTokensDetails.AudioTokens = detail.TokenCount
+		} else if detail.Modality == "TEXT" {
+			usage.PromptTokensDetails.TextTokens = detail.TokenCount
+		}
+	}
+
 	fullTextResponse.Usage = usage
 	jsonResponse, err := json.Marshal(fullTextResponse)
 	if err != nil {
@@ -3,7 +3,6 @@ package jina
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
@@ -12,6 +11,8 @@ import (
 	relaycommon "one-api/relay/common"
 	"one-api/relay/common_handler"
 	"one-api/relay/constant"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
@@ -55,6 +56,11 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 	return request, nil
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -2,13 +2,14 @@ package mistral

 import (
 	"errors"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
 	"one-api/relay/channel"
 	"one-api/relay/channel/openai"
 	relaycommon "one-api/relay/common"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
@@ -59,6 +60,11 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -3,7 +3,6 @@ package mokaai
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
@@ -11,6 +10,8 @@ import (
 	relaycommon "one-api/relay/common"
 	"one-api/relay/constant"
 	"strings"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
@@ -74,6 +75,11 @@ func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dt
 	return nil, nil
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -2,7 +2,6 @@ package ollama

 import (
 	"errors"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
@@ -10,6 +9,8 @@ import (
 	"one-api/relay/channel/openai"
 	relaycommon "one-api/relay/common"
 	relayconstant "one-api/relay/constant"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
@@ -64,6 +65,11 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return requestOpenAI2Embeddings(request), nil
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -8,6 +8,7 @@ import (
 	"io"
 	"mime/multipart"
 	"net/http"
+	"net/textproto"
 	"one-api/common"
 	constant2 "one-api/constant"
 	"one-api/dto"
@@ -22,6 +23,7 @@ import (
 	"one-api/relay/common_handler"
 	"one-api/relay/constant"
 	"one-api/service"
+	"path/filepath"
 	"strings"

 	"github.com/gin-gonic/gin"
@@ -36,7 +38,7 @@ func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayIn
 	if !strings.Contains(request.Model, "claude") {
 		return nil, fmt.Errorf("you are using openai channel type with path /v1/messages, only claude model supported convert, but got %s", request.Model)
 	}
-	aiRequest, err := service.ClaudeToOpenAIRequest(*request)
+	aiRequest, err := service.ClaudeToOpenAIRequest(*request, info)
 	if err != nil {
 		return nil, err
 	}
@@ -87,7 +89,10 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 		requestURL = fmt.Sprintf("%s?api-version=%s", requestURL, apiVersion)
 		task := strings.TrimPrefix(requestURL, "/v1/")
 		model_ := info.UpstreamModelName
-		model_ = strings.Replace(model_, ".", "", -1)
+		// 2025年5月10日后创建的渠道不移除.
+		if info.ChannelCreateTime < constant2.AzureNoRemoveDotTime {
+			model_ = strings.Replace(model_, ".", "", -1)
+		}
 		// https://github.com/songquanpeng/one-api/issues/67
 		requestURL = fmt.Sprintf("/openai/deployments/%s/%s", model_, task)
 		if info.RelayMode == constant.RelayModeRealtime {
@@ -147,14 +152,12 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 	if info.ChannelType != common.ChannelTypeOpenAI && info.ChannelType != common.ChannelTypeAzure {
 		request.StreamOptions = nil
 	}
-	if strings.HasPrefix(request.Model, "o1") || strings.HasPrefix(request.Model, "o3") {
+	if strings.HasPrefix(request.Model, "o") {
 		if request.MaxCompletionTokens == 0 && request.MaxTokens != 0 {
 			request.MaxCompletionTokens = request.MaxTokens
 			request.MaxTokens = 0
 		}
-		if strings.HasPrefix(request.Model, "o3") || strings.HasPrefix(request.Model, "o1") {
-			request.Temperature = nil
-		}
+		request.Temperature = nil
 		if strings.HasSuffix(request.Model, "-high") {
 			request.ReasoningEffort = "high"
 			request.Model = strings.TrimSuffix(request.Model, "-high")
@@ -167,11 +170,13 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 		}
 		info.ReasoningEffort = request.ReasoningEffort
 		info.UpstreamModelName = request.Model
-	}
-	if request.Model == "o1" || request.Model == "o1-2024-12-17" || strings.HasPrefix(request.Model, "o3") {
-		//修改第一个Message的内容，将system改为developer
-		if len(request.Messages) > 0 && request.Messages[0].Role == "system" {
-			request.Messages[0].Role = "developer"
+
+		// o系列模型developer适配（o1-mini除外）
+		if !strings.HasPrefix(request.Model, "o1-mini") && !strings.HasPrefix(request.Model, "o1-preview") {
+			//修改第一个Message的内容，将system改为developer
+			if len(request.Messages) > 0 && request.Messages[0].Role == "system" {
+				request.Messages[0].Role = "developer"
+			}
 		}
 	}

@@ -236,11 +241,167 @@ func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInf
 }

 func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) {
+	switch info.RelayMode {
+	case constant.RelayModeImagesEdits:
+
+		var requestBody bytes.Buffer
+		writer := multipart.NewWriter(&requestBody)
+
+		writer.WriteField("model", request.Model)
+		// 获取所有表单字段
+		formData := c.Request.PostForm
+		// 遍历表单字段并打印输出
+		for key, values := range formData {
+			if key == "model" {
+				continue
+			}
+			for _, value := range values {
+				writer.WriteField(key, value)
+			}
+		}
+
+		// Parse the multipart form to handle both single image and multiple images
+		if err := c.Request.ParseMultipartForm(32 << 20); err != nil { // 32MB max memory
+			return nil, errors.New("failed to parse multipart form")
+		}
+
+		if c.Request.MultipartForm != nil && c.Request.MultipartForm.File != nil {
+			// Check if "image" field exists in any form, including array notation
+			var imageFiles []*multipart.FileHeader
+			var exists bool
+
+			// First check for standard "image" field
+			if imageFiles, exists = c.Request.MultipartForm.File["image"]; !exists || len(imageFiles) == 0 {
+				// If not found, check for "image[]" field
+				if imageFiles, exists = c.Request.MultipartForm.File["image[]"]; !exists || len(imageFiles) == 0 {
+					// If still not found, iterate through all fields to find any that start with "image["
+					foundArrayImages := false
+					for fieldName, files := range c.Request.MultipartForm.File {
+						if strings.HasPrefix(fieldName, "image[") && len(files) > 0 {
+							foundArrayImages = true
+							for _, file := range files {
+								imageFiles = append(imageFiles, file)
+							}
+						}
+					}
+
+					// If no image fields found at all
+					if !foundArrayImages && (len(imageFiles) == 0) {
+						return nil, errors.New("image is required")
+					}
+				}
+			}
+
+			// Process all image files
+			for i, fileHeader := range imageFiles {
+				file, err := fileHeader.Open()
+				if err != nil {
+					return nil, fmt.Errorf("failed to open image file %d: %w", i, err)
+				}
+				defer file.Close()
+
+				// If multiple images, use image[] as the field name
+				fieldName := "image"
+				if len(imageFiles) > 1 {
+					fieldName = "image[]"
+				}
+
+				// Determine MIME type based on file extension
+				mimeType := detectImageMimeType(fileHeader.Filename)
+
+				// Create a form file with the appropriate content type
+				h := make(textproto.MIMEHeader)
+				h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="%s"; filename="%s"`, fieldName, fileHeader.Filename))
+				h.Set("Content-Type", mimeType)
+
+				part, err := writer.CreatePart(h)
+				if err != nil {
+					return nil, fmt.Errorf("create form part failed for image %d: %w", i, err)
+				}
+
+				if _, err := io.Copy(part, file); err != nil {
+					return nil, fmt.Errorf("copy file failed for image %d: %w", i, err)
+				}
+			}
+
+			// Handle mask file if present
+			if maskFiles, exists := c.Request.MultipartForm.File["mask"]; exists && len(maskFiles) > 0 {
+				maskFile, err := maskFiles[0].Open()
+				if err != nil {
+					return nil, errors.New("failed to open mask file")
+				}
+				defer maskFile.Close()
+
+				// Determine MIME type for mask file
+				mimeType := detectImageMimeType(maskFiles[0].Filename)
+
+				// Create a form file with the appropriate content type
+				h := make(textproto.MIMEHeader)
+				h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="mask"; filename="%s"`, maskFiles[0].Filename))
+				h.Set("Content-Type", mimeType)
+
+				maskPart, err := writer.CreatePart(h)
+				if err != nil {
+					return nil, errors.New("create form file failed for mask")
+				}
+
+				if _, err := io.Copy(maskPart, maskFile); err != nil {
+					return nil, errors.New("copy mask file failed")
+				}
+			}
+		} else {
+			return nil, errors.New("no multipart form data found")
+		}
+
+		// 关闭 multipart 编写器以设置分界线
+		writer.Close()
+		c.Request.Header.Set("Content-Type", writer.FormDataContentType())
+		return bytes.NewReader(requestBody.Bytes()), nil
+
+	default:
+		return request, nil
+	}
+}
+
+// detectImageMimeType determines the MIME type based on the file extension
+func detectImageMimeType(filename string) string {
+	ext := strings.ToLower(filepath.Ext(filename))
+	switch ext {
+	case ".jpg", ".jpeg":
+		return "image/jpeg"
+	case ".png":
+		return "image/png"
+	case ".webp":
+		return "image/webp"
+	default:
+		// Try to detect from extension if possible
+		if strings.HasPrefix(ext, ".jp") {
+			return "image/jpeg"
+		}
+		// Default to png as a fallback
+		return "image/png"
+	}
+}
+
+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// 模型后缀转换 reasoning effort
+	if strings.HasSuffix(request.Model, "-high") {
+		request.Reasoning.Effort = "high"
+		request.Model = strings.TrimSuffix(request.Model, "-high")
+	} else if strings.HasSuffix(request.Model, "-low") {
+		request.Reasoning.Effort = "low"
+		request.Model = strings.TrimSuffix(request.Model, "-low")
+	} else if strings.HasSuffix(request.Model, "-medium") {
+		request.Reasoning.Effort = "medium"
+		request.Model = strings.TrimSuffix(request.Model, "-medium")
+	}
 	return request, nil
 }

 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
-	if info.RelayMode == constant.RelayModeAudioTranscription || info.RelayMode == constant.RelayModeAudioTranslation {
+	if info.RelayMode == constant.RelayModeAudioTranscription ||
+		info.RelayMode == constant.RelayModeAudioTranslation ||
+		info.RelayMode == constant.RelayModeImagesEdits {
 		return channel.DoFormRequest(a, c, info, requestBody)
 	} else if info.RelayMode == constant.RelayModeRealtime {
 		return channel.DoWssRequest(a, c, info, requestBody)
@@ -259,10 +420,16 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
 		fallthrough
 	case constant.RelayModeAudioTranscription:
 		err, usage = OpenaiSTTHandler(c, resp, info, a.ResponseFormat)
-	case constant.RelayModeImagesGenerations:
-		err, usage = OpenaiTTSHandler(c, resp, info)
+	case constant.RelayModeImagesGenerations, constant.RelayModeImagesEdits:
+		err, usage = OpenaiHandlerWithUsage(c, resp, info)
 	case constant.RelayModeRerank:
 		err, usage = common_handler.RerankHandler(c, info, resp)
+	case constant.RelayModeResponses:
+		if info.IsStream {
+			err, usage = OaiResponsesStreamHandler(c, resp, info)
+		} else {
+			err, usage = OaiResponsesHandler(c, resp, info)
+		}
 	default:
 		if info.IsStream {
 			err, usage = OaiStreamHandler(c, resp, info)
@@ -31,6 +31,9 @@ func handleClaudeFormat(c *gin.Context, data string, info *relaycommon.RelayInfo
 		return err
 	}

+	if streamResponse.Usage != nil {
+		info.ClaudeConvertInfo.Usage = streamResponse.Usage
+	}
 	claudeResponses := service.StreamResponseOpenAI2Claude(&streamResponse, info)
 	for _, resp := range claudeResponses {
 		helper.ClaudeData(c, *resp)
@@ -38,12 +41,7 @@ func handleClaudeFormat(c *gin.Context, data string, info *relaycommon.RelayInfo
 	return nil
 }

-func processStreamResponse(item string, responseTextBuilder *strings.Builder, toolCount *int) error {
-	var streamResponse dto.ChatCompletionsStreamResponse
-	if err := json.Unmarshal(common.StringToByteSlice(item), &streamResponse); err != nil {
-		return err
-	}
-
+func ProcessStreamResponse(streamResponse dto.ChatCompletionsStreamResponse, responseTextBuilder *strings.Builder, toolCount *int) error {
 	for _, choice := range streamResponse.Choices {
 		responseTextBuilder.WriteString(choice.Delta.GetContentString())
 		responseTextBuilder.WriteString(choice.Delta.GetReasoningContent())
@@ -78,7 +76,11 @@ func processChatCompletions(streamResp string, streamItems []string, responseTex
 		// 一次性解析失败，逐个解析
 		common.SysError("error unmarshalling stream response: " + err.Error())
 		for _, item := range streamItems {
-			if err := processStreamResponse(item, responseTextBuilder, toolCount); err != nil {
+			var streamResponse dto.ChatCompletionsStreamResponse
+			if err := json.Unmarshal(common.StringToByteSlice(item), &streamResponse); err != nil {
+				return err
+			}
+			if err := ProcessStreamResponse(streamResponse, responseTextBuilder, toolCount); err != nil {
 				common.SysError("error processing stream response: " + err.Error())
 			}
 		}
@@ -170,15 +172,14 @@ func handleFinalResponse(c *gin.Context, info *relaycommon.RelayInfo, lastStream
 		helper.Done(c)

 	case relaycommon.RelayFormatClaude:
+		info.ClaudeConvertInfo.Done = true
 		var streamResponse dto.ChatCompletionsStreamResponse
 		if err := json.Unmarshal(common.StringToByteSlice(lastStreamData), &streamResponse); err != nil {
 			common.SysError("error unmarshalling stream response: " + err.Error())
 			return
 		}

-		if !containStreamUsage {
-			streamResponse.Usage = usage
-		}
+		info.ClaudeConvertInfo.Usage = usage

 		claudeResponses := service.StreamResponseOpenAI2Claude(&streamResponse, info)
 		for _, resp := range claudeResponses {
@@ -186,3 +187,10 @@ func handleFinalResponse(c *gin.Context, info *relaycommon.RelayInfo, lastStream
 		}
 	}
 }
+
+func sendResponsesStreamData(c *gin.Context, streamResponse dto.ResponsesStreamResponse, data string) {
+	if data == "" {
+		return
+	}
+	helper.ResponseChunkData(c, streamResponse, data)
+}
@@ -117,6 +117,7 @@ func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
 	model := info.UpstreamModelName

 	var responseTextBuilder strings.Builder
+	var toolCount int
 	var usage = &dto.Usage{}
 	var streamItems []string // store stream items
 	var forceFormat bool
@@ -130,8 +131,6 @@ func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
 		thinkToContent = think2Content
 	}

-	toolCount := 0
-
 	var (
 		lastStreamData string
 	)
@@ -142,7 +141,6 @@ func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
 			if err != nil {
 				common.SysError("error handling stream format: " + err.Error())
 			}
-			info.SetFirstResponseTime()
 		}
 		lastStreamData = data
 		streamItems = append(streamItems, data)
@@ -170,8 +168,10 @@ func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
 			}
 		}
 	}
+
 	if shouldSendLastResp {
 		sendStreamData(c, info, lastStreamData, forceFormat, thinkToContent)
+		//err = handleStreamFormat(c, info, lastStreamData, forceFormat, thinkToContent)
 	}

 	// 处理token计算
@@ -215,10 +215,35 @@ func OpenaiHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayI
 			StatusCode: resp.StatusCode,
 		}, nil
 	}
+	
+	forceFormat := false
+	if forceFmt, ok := info.ChannelSetting[constant.ForceFormat].(bool); ok {
+		forceFormat = forceFmt
+	}
+
+	if simpleResponse.Usage.TotalTokens == 0 || (simpleResponse.Usage.PromptTokens == 0 && simpleResponse.Usage.CompletionTokens == 0) {
+		completionTokens := 0
+		for _, choice := range simpleResponse.Choices {
+			ctkm, _ := service.CountTextToken(choice.Message.StringContent()+choice.Message.ReasoningContent+choice.Message.Reasoning, info.UpstreamModelName)
+			completionTokens += ctkm
+		}
+		simpleResponse.Usage = dto.Usage{
+			PromptTokens:     info.PromptTokens,
+			CompletionTokens: completionTokens,
+			TotalTokens:      info.PromptTokens + completionTokens,
+		}
+	}

 	switch info.RelayFormat {
 	case relaycommon.RelayFormatOpenAI:
-		break
+		if forceFormat {
+			responseBody, err = json.Marshal(simpleResponse)
+			if err != nil {
+				return service.OpenAIErrorWrapper(err, "marshal_response_body_failed", http.StatusInternalServerError), nil
+			}
+		} else {
+			break
+		}
 	case relaycommon.RelayFormatClaude:
 		claudeResp := service.ResponseOpenAI2Claude(&simpleResponse, info)
 		claudeRespStr, err := json.Marshal(claudeResp)
@@ -244,52 +269,29 @@ func OpenaiHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayI
 		common.SysError("error copying response body: " + err.Error())
 	}
 	resp.Body.Close()
-	if simpleResponse.Usage.TotalTokens == 0 || (simpleResponse.Usage.PromptTokens == 0 && simpleResponse.Usage.CompletionTokens == 0) {
-		completionTokens := 0
-		for _, choice := range simpleResponse.Choices {
-			ctkm, _ := service.CountTextToken(choice.Message.StringContent()+choice.Message.ReasoningContent+choice.Message.Reasoning, info.UpstreamModelName)
-			completionTokens += ctkm
-		}
-		simpleResponse.Usage = dto.Usage{
-			PromptTokens:     info.PromptTokens,
-			CompletionTokens: completionTokens,
-			TotalTokens:      info.PromptTokens + completionTokens,
-		}
-	}
 	return nil, &simpleResponse.Usage
 }

 func OpenaiTTSHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
-	responseBody, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
-	}
-	err = resp.Body.Close()
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
-	}
-	// Reset response body
-	resp.Body = io.NopCloser(bytes.NewBuffer(responseBody))
-	// We shouldn't set the header before we parse the response body, because the parse part may fail.
-	// And then we will have to send an error response, but in this case, the header has already been set.
-	// So the httpClient will be confused by the response.
-	// For example, Postman will report error, and we cannot check the response at all.
+	// the status code has been judged before, if there is a body reading failure,
+	// it should be regarded as a non-recoverable error, so it should not return err for external retry.
+	// Analogous to nginx's load balancing, it will only retry if it can't be requested or 
+	// if the upstream returns a specific status code, once the upstream has already written the header, 
+	// the subsequent failure of the response body should be regarded as a non-recoverable error, 
+	// and can be terminated directly.
+	defer resp.Body.Close()
+	usage := &dto.Usage{}
+	usage.PromptTokens = info.PromptTokens
+	usage.TotalTokens = info.PromptTokens
 	for k, v := range resp.Header {
 		c.Writer.Header().Set(k, v[0])
 	}
 	c.Writer.WriteHeader(resp.StatusCode)
-	_, err = io.Copy(c.Writer, resp.Body)
+	c.Writer.WriteHeaderNow()
+	_, err := io.Copy(c.Writer, resp.Body)
 	if err != nil {
-		return service.OpenAIErrorWrapper(err, "copy_response_body_failed", http.StatusInternalServerError), nil
+		common.LogError(c, err.Error())
 	}
-	err = resp.Body.Close()
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
-	}
-
-	usage := &dto.Usage{}
-	usage.PromptTokens = info.PromptTokens
-	usage.TotalTokens = info.PromptTokens
 	return nil, usage
 }

@@ -595,3 +597,52 @@ func preConsumeUsage(ctx *gin.Context, info *relaycommon.RelayInfo, usage *dto.R
 	err := service.PreWssConsumeQuota(ctx, info, usage)
 	return err
 }
+
+func OpenaiHandlerWithUsage(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
+	responseBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
+	}
+	err = resp.Body.Close()
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
+	}
+	// Reset response body
+	resp.Body = io.NopCloser(bytes.NewBuffer(responseBody))
+	// We shouldn't set the header before we parse the response body, because the parse part may fail.
+	// And then we will have to send an error response, but in this case, the header has already been set.
+	// So the httpClient will be confused by the response.
+	// For example, Postman will report error, and we cannot check the response at all.
+	for k, v := range resp.Header {
+		c.Writer.Header().Set(k, v[0])
+	}
+	// reset content length
+	c.Writer.Header().Set("Content-Length", fmt.Sprintf("%d", len(responseBody)))
+	c.Writer.WriteHeader(resp.StatusCode)
+	_, err = io.Copy(c.Writer, resp.Body)
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "copy_response_body_failed", http.StatusInternalServerError), nil
+	}
+	err = resp.Body.Close()
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
+	}
+
+	var usageResp dto.SimpleResponse
+	err = json.Unmarshal(responseBody, &usageResp)
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "parse_response_body_failed", http.StatusInternalServerError), nil
+	}
+	// format
+	if usageResp.InputTokens > 0 {
+		usageResp.PromptTokens += usageResp.InputTokens
+	}
+	if usageResp.OutputTokens > 0 {
+		usageResp.CompletionTokens += usageResp.OutputTokens
+	}
+	if usageResp.InputTokensDetails != nil {
+		usageResp.PromptTokensDetails.ImageTokens += usageResp.InputTokensDetails.ImageTokens
+		usageResp.PromptTokensDetails.TextTokens += usageResp.InputTokensDetails.TextTokens
+	}
+	return nil, &usageResp.Usage
+}
@@ -0,0 +1,119 @@
+package openai
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"net/http"
+	"one-api/common"
+	"one-api/dto"
+	relaycommon "one-api/relay/common"
+	"one-api/relay/helper"
+	"one-api/service"
+	"strings"
+
+	"github.com/gin-gonic/gin"
+)
+
+func OaiResponsesHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
+	// read response body
+	var responsesResponse dto.OpenAIResponsesResponse
+	responseBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
+	}
+	err = resp.Body.Close()
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
+	}
+	err = common.DecodeJson(responseBody, &responsesResponse)
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "unmarshal_response_body_failed", http.StatusInternalServerError), nil
+	}
+	if responsesResponse.Error != nil {
+		return &dto.OpenAIErrorWithStatusCode{
+			Error: dto.OpenAIError{
+				Message: responsesResponse.Error.Message,
+				Type:    "openai_error",
+				Code:    responsesResponse.Error.Code,
+			},
+			StatusCode: resp.StatusCode,
+		}, nil
+	}
+
+	// reset response body
+	resp.Body = io.NopCloser(bytes.NewBuffer(responseBody))
+	// We shouldn't set the header before we parse the response body, because the parse part may fail.
+	// And then we will have to send an error response, but in this case, the header has already been set.
+	// So the httpClient will be confused by the response.
+	// For example, Postman will report error, and we cannot check the response at all.
+	for k, v := range resp.Header {
+		c.Writer.Header().Set(k, v[0])
+	}
+	c.Writer.WriteHeader(resp.StatusCode)
+	// copy response body
+	_, err = io.Copy(c.Writer, resp.Body)
+	if err != nil {
+		common.SysError("error copying response body: " + err.Error())
+	}
+	resp.Body.Close()
+	// compute usage
+	usage := dto.Usage{}
+	usage.PromptTokens = responsesResponse.Usage.InputTokens
+	usage.CompletionTokens = responsesResponse.Usage.OutputTokens
+	usage.TotalTokens = responsesResponse.Usage.TotalTokens
+	// 解析 Tools 用量
+	for _, tool := range responsesResponse.Tools {
+		info.ResponsesUsageInfo.BuiltInTools[tool.Type].CallCount++
+	}
+	return nil, &usage
+}
+
+func OaiResponsesStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
+	if resp == nil || resp.Body == nil {
+		common.LogError(c, "invalid response or response body")
+		return service.OpenAIErrorWrapper(fmt.Errorf("invalid response"), "invalid_response", http.StatusInternalServerError), nil
+	}
+
+	var usage = &dto.Usage{}
+	var responseTextBuilder strings.Builder
+
+	helper.StreamScannerHandler(c, resp, info, func(data string) bool {
+
+		// 检查当前数据是否包含 completed 状态和 usage 信息
+		var streamResponse dto.ResponsesStreamResponse
+		if err := common.DecodeJsonStr(data, &streamResponse); err == nil {
+			sendResponsesStreamData(c, streamResponse, data)
+			switch streamResponse.Type {
+			case "response.completed":
+				usage.PromptTokens = streamResponse.Response.Usage.InputTokens
+				usage.CompletionTokens = streamResponse.Response.Usage.OutputTokens
+				usage.TotalTokens = streamResponse.Response.Usage.TotalTokens
+			case "response.output_text.delta":
+				// 处理输出文本
+				responseTextBuilder.WriteString(streamResponse.Delta)
+			case dto.ResponsesOutputTypeItemDone:
+				// 函数调用处理
+				if streamResponse.Item != nil {
+					switch streamResponse.Item.Type {
+					case dto.BuildInCallWebSearchCall:
+						info.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolWebSearchPreview].CallCount++
+					}
+				}
+			}
+		}
+		return true
+	})
+
+	if usage.CompletionTokens == 0 {
+		// 计算输出文本的 token 数量
+		tempStr := responseTextBuilder.String()
+		if len(tempStr) > 0 {
+			// 非正常结束，使用输出文本的 token 数量
+			completionTokens, _ := service.CountTextToken(tempStr, info.UpstreamModelName)
+			usage.CompletionTokens = completionTokens
+		}
+	}
+
+	return nil, usage
+}
@@ -0,0 +1,9 @@
+package openrouter
+
+type RequestReasoning struct {
+	// One of the following (not both):
+	Effort    string `json:"effort,omitempty"`     // Can be "high", "medium", or "low" (OpenAI-style)
+	MaxTokens int    `json:"max_tokens,omitempty"` // Specific token limit (Anthropic-style)
+	// Optional: Default is false. All models support this.
+	Exclude bool `json:"exclude,omitempty"` // Set to true to exclude reasoning tokens from response
+}
@@ -3,13 +3,14 @@ package palm
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
 	"one-api/relay/channel"
 	relaycommon "one-api/relay/common"
 	"one-api/service"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
@@ -60,6 +61,11 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -3,13 +3,14 @@ package perplexity
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
 	"one-api/relay/channel"
 	"one-api/relay/channel/openai"
 	relaycommon "one-api/relay/common"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
@@ -63,6 +64,11 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -3,7 +3,6 @@ package siliconflow
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
@@ -11,6 +10,8 @@ import (
 	"one-api/relay/channel/openai"
 	relaycommon "one-api/relay/common"
 	"one-api/relay/constant"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
@@ -58,6 +59,11 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 	return request, nil
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -74,13 +80,9 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
 	switch info.RelayMode {
 	case constant.RelayModeRerank:
 		err, usage = siliconflowRerankHandler(c, resp)
-	case constant.RelayModeChatCompletions:
-		if info.IsStream {
-			err, usage = openai.OaiStreamHandler(c, resp, info)
-		} else {
-			err, usage = openai.OpenaiHandler(c, resp, info)
-		}
 	case constant.RelayModeCompletions:
+		fallthrough
+	case constant.RelayModeChatCompletions:
 		if info.IsStream {
 			err, usage = openai.OaiStreamHandler(c, resp, info)
 		} else {
@@ -3,7 +3,6 @@ package tencent
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/common"
@@ -13,6 +12,8 @@ import (
 	"one-api/service"
 	"strconv"
 	"strings"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
@@ -84,6 +85,11 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -4,7 +4,6 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
@@ -13,7 +12,11 @@ import (
 	"one-api/relay/channel/gemini"
 	"one-api/relay/channel/openai"
 	relaycommon "one-api/relay/common"
+	"one-api/relay/constant"
+	"one-api/setting/model_setting"
 	"strings"
+
+	"github.com/gin-gonic/gin"
 )

 const (
@@ -29,6 +32,8 @@ var claudeModelMap = map[string]string{
 	"claude-3-5-sonnet-20240620": "claude-3-5-sonnet@20240620",
 	"claude-3-5-sonnet-20241022": "claude-3-5-sonnet-v2@20241022",
 	"claude-3-7-sonnet-20250219": "claude-3-7-sonnet@20250219",
+	"claude-sonnet-4-20250514":   "claude-sonnet-4@20250514",
+	"claude-opus-4-20250514":     "claude-opus-4@20250514",
 }

 const anthropicVersion = "vertex-2023-10-16"
@@ -77,19 +82,37 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 	a.AccountCredentials = *adc
 	suffix := ""
 	if a.RequestMode == RequestModeGemini {
+		if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
+			// suffix -thinking and -nothinking
+			if strings.HasSuffix(info.OriginModelName, "-thinking") {
+				info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
+			} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
+				info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-nothinking")
+			}
+		}
+
 		if info.IsStream {
 			suffix = "streamGenerateContent?alt=sse"
 		} else {
 			suffix = "generateContent"
 		}
-		return fmt.Sprintf(
-			"https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/publishers/google/models/%s:%s",
-			region,
-			adc.ProjectID,
-			region,
-			info.UpstreamModelName,
-			suffix,
-		), nil
+		if region == "global" {
+			return fmt.Sprintf(
+				"https://aiplatform.googleapis.com/v1/projects/%s/locations/global/publishers/google/models/%s:%s",
+				adc.ProjectID,
+				info.UpstreamModelName,
+				suffix,
+			), nil
+		} else {
+			return fmt.Sprintf(
+				"https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/publishers/google/models/%s:%s",
+				region,
+				adc.ProjectID,
+				region,
+				info.UpstreamModelName,
+				suffix,
+			), nil
+		}
 	} else if a.RequestMode == RequestModeClaude {
 		if info.IsStream {
 			suffix = "streamRawPredict?alt=sse"
@@ -143,7 +166,7 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 		info.UpstreamModelName = claudeReq.Model
 		return vertexClaudeReq, nil
 	} else if a.RequestMode == RequestModeGemini {
-		geminiRequest, err := gemini.CovertGemini2OpenAI(*request)
+		geminiRequest, err := gemini.CovertGemini2OpenAI(*request, info)
 		if err != nil {
 			return nil, err
 		}
@@ -164,6 +187,11 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -174,7 +202,11 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
 		case RequestModeClaude:
 			err, usage = claude.ClaudeStreamHandler(c, resp, info, claude.RequestModeMessage)
 		case RequestModeGemini:
-			err, usage = gemini.GeminiChatStreamHandler(c, resp, info)
+			if info.RelayMode == constant.RelayModeGemini {
+				usage, err = gemini.GeminiTextGenerationStreamHandler(c, resp, info)
+			} else {
+				err, usage = gemini.GeminiChatStreamHandler(c, resp, info)
+			}
 		case RequestModeLlama:
 			err, usage = openai.OaiStreamHandler(c, resp, info)
 		}
@@ -183,7 +215,11 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
 		case RequestModeClaude:
 			err, usage = claude.ClaudeHandler(c, resp, claude.RequestModeMessage, info)
 		case RequestModeGemini:
-			err, usage = gemini.GeminiChatHandler(c, resp, info)
+			if info.RelayMode == constant.RelayModeGemini {
+				usage, err = gemini.GeminiTextGenerationHandler(c, resp, info)
+			} else {
+				err, usage = gemini.GeminiChatHandler(c, resp, info)
+			}
 		case RequestModeLlama:
 			err, usage = openai.OpenaiHandler(c, resp, info)
 		}
@@ -3,7 +3,6 @@ package volcengine
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
@@ -12,6 +11,8 @@ import (
 	relaycommon "one-api/relay/common"
 	"one-api/relay/constant"
 	"strings"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
@@ -71,6 +72,11 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return request, nil
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -0,0 +1,118 @@
+package xai
+
+import (
+	"errors"
+	"io"
+	"net/http"
+	"one-api/dto"
+	"one-api/relay/channel"
+	"one-api/relay/channel/openai"
+	relaycommon "one-api/relay/common"
+	"strings"
+
+	"one-api/relay/constant"
+
+	"github.com/gin-gonic/gin"
+)
+
+type Adaptor struct {
+}
+
+func (a *Adaptor) ConvertClaudeRequest(*gin.Context, *relaycommon.RelayInfo, *dto.ClaudeRequest) (any, error) {
+	//TODO implement me
+	//panic("implement me")
+	return nil, errors.New("not available")
+}
+
+func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
+	//not available
+	return nil, errors.New("not available")
+}
+
+func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) {
+	xaiRequest := ImageRequest{
+		Model:          request.Model,
+		Prompt:         request.Prompt,
+		N:              request.N,
+		ResponseFormat: request.ResponseFormat,
+	}
+	return xaiRequest, nil
+}
+
+func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
+}
+
+func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
+	return relaycommon.GetFullRequestURL(info.BaseUrl, info.RequestURLPath, info.ChannelType), nil
+}
+
+func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
+	channel.SetupApiRequestHeader(info, c, req)
+	req.Set("Authorization", "Bearer "+info.ApiKey)
+	return nil
+}
+
+func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
+	if request == nil {
+		return nil, errors.New("request is nil")
+	}
+	if strings.HasPrefix(request.Model, "grok-3-mini") {
+		if request.MaxCompletionTokens == 0 && request.MaxTokens != 0 {
+			request.MaxCompletionTokens = request.MaxTokens
+			request.MaxTokens = 0
+		}
+		if strings.HasSuffix(request.Model, "-high") {
+			request.ReasoningEffort = "high"
+			request.Model = strings.TrimSuffix(request.Model, "-high")
+		} else if strings.HasSuffix(request.Model, "-low") {
+			request.ReasoningEffort = "low"
+			request.Model = strings.TrimSuffix(request.Model, "-low")
+		} else if strings.HasSuffix(request.Model, "-medium") {
+			request.ReasoningEffort = "medium"
+			request.Model = strings.TrimSuffix(request.Model, "-medium")
+		}
+		info.ReasoningEffort = request.ReasoningEffort
+		info.UpstreamModelName = request.Model
+	}
+	return request, nil
+}
+
+func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
+	return nil, nil
+}
+
+func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error) {
+	//not available
+	return nil, errors.New("not available")
+}
+
+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
+func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
+	return channel.DoApiRequest(a, c, info, requestBody)
+}
+
+func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *dto.OpenAIErrorWithStatusCode) {
+	switch info.RelayMode {
+	case constant.RelayModeImagesGenerations, constant.RelayModeImagesEdits:
+		err, usage = openai.OpenaiHandlerWithUsage(c, resp, info)
+	default:
+		if info.IsStream {
+			err, usage = xAIStreamHandler(c, resp, info)
+		} else {
+			err, usage = xAIHandler(c, resp, info)
+		}
+	}
+	return
+}
+
+func (a *Adaptor) GetModelList() []string {
+	return ModelList
+}
+
+func (a *Adaptor) GetChannelName() string {
+	return ChannelName
+}
@@ -0,0 +1,18 @@
+package xai
+
+var ModelList = []string{
+	// grok-3
+	"grok-3-beta", "grok-3-mini-beta",
+	// grok-3 mini
+	"grok-3-fast-beta", "grok-3-mini-fast-beta",
+	// extend grok-3-mini reasoning
+	"grok-3-mini-beta-high", "grok-3-mini-beta-low", "grok-3-mini-beta-medium",
+	"grok-3-mini-fast-beta-high", "grok-3-mini-fast-beta-low", "grok-3-mini-fast-beta-medium",
+	// image model
+	"grok-2-image",
+	// legacy models
+	"grok-2", "grok-2-vision",
+	"grok-beta", "grok-vision-beta",
+}
+
+var ChannelName = "xai"
@@ -0,0 +1,27 @@
+package xai
+
+import "one-api/dto"
+
+// ChatCompletionResponse represents the response from XAI chat completion API
+type ChatCompletionResponse struct {
+	Id                string `json:"id"`
+	Object            string `json:"object"`
+	Created           int64  `json:"created"`
+	Model             string `json:"model"`
+	Choices           []dto.ChatCompletionsStreamResponseChoice
+	Usage             *dto.Usage `json:"usage"`
+	SystemFingerprint string     `json:"system_fingerprint"`
+}
+
+// quality, size or style are not supported by xAI API at the moment.
+type ImageRequest struct {
+	Model          string          `json:"model"`
+	Prompt         string          `json:"prompt" binding:"required"`
+	N              int             `json:"n,omitempty"`
+	// Size           string          `json:"size,omitempty"`
+	// Quality        string          `json:"quality,omitempty"`
+	ResponseFormat string          `json:"response_format,omitempty"`
+	// Style          string          `json:"style,omitempty"`
+	// User           string          `json:"user,omitempty"`
+	// ExtraFields    json.RawMessage `json:"extra_fields,omitempty"`
+}
@@ -0,0 +1,119 @@
+package xai
+
+import (
+	"bytes"
+	"encoding/json"
+	"github.com/gin-gonic/gin"
+	"io"
+	"net/http"
+	"one-api/common"
+	"one-api/dto"
+	"one-api/relay/channel/openai"
+	relaycommon "one-api/relay/common"
+	"one-api/relay/helper"
+	"one-api/service"
+	"strings"
+)
+
+func streamResponseXAI2OpenAI(xAIResp *dto.ChatCompletionsStreamResponse, usage *dto.Usage) *dto.ChatCompletionsStreamResponse {
+	if xAIResp == nil {
+		return nil
+	}
+	if xAIResp.Usage != nil {
+		xAIResp.Usage.CompletionTokens = usage.CompletionTokens
+	}
+	openAIResp := &dto.ChatCompletionsStreamResponse{
+		Id:      xAIResp.Id,
+		Object:  xAIResp.Object,
+		Created: xAIResp.Created,
+		Model:   xAIResp.Model,
+		Choices: xAIResp.Choices,
+		Usage:   xAIResp.Usage,
+	}
+
+	return openAIResp
+}
+
+func xAIStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
+	usage := &dto.Usage{}
+	var responseTextBuilder strings.Builder
+	var toolCount int
+	var containStreamUsage bool
+
+	helper.SetEventStreamHeaders(c)
+
+	helper.StreamScannerHandler(c, resp, info, func(data string) bool {
+		var xAIResp *dto.ChatCompletionsStreamResponse
+		err := json.Unmarshal([]byte(data), &xAIResp)
+		if err != nil {
+			common.SysError("error unmarshalling stream response: " + err.Error())
+			return true
+		}
+
+		// 把 xAI 的usage转换为 OpenAI 的usage
+		if xAIResp.Usage != nil {
+			containStreamUsage = true
+			usage.PromptTokens = xAIResp.Usage.PromptTokens
+			usage.TotalTokens = xAIResp.Usage.TotalTokens
+			usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
+		}
+
+		openaiResponse := streamResponseXAI2OpenAI(xAIResp, usage)
+		_ = openai.ProcessStreamResponse(*openaiResponse, &responseTextBuilder, &toolCount)
+		err = helper.ObjectData(c, openaiResponse)
+		if err != nil {
+			common.SysError(err.Error())
+		}
+		return true
+	})
+
+	if !containStreamUsage {
+		usage, _ = service.ResponseText2Usage(responseTextBuilder.String(), info.UpstreamModelName, info.PromptTokens)
+		usage.CompletionTokens += toolCount * 7
+	}
+
+	helper.Done(c)
+	err := resp.Body.Close()
+	if err != nil {
+		//return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
+		common.SysError("close_response_body_failed: " + err.Error())
+	}
+	return nil, usage
+}
+
+func xAIHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
+	responseBody, err := io.ReadAll(resp.Body)
+	var response *dto.TextResponse
+	err = common.DecodeJson(responseBody, &response)
+	if err != nil {
+		common.SysError("error unmarshalling stream response: " + err.Error())
+		return nil, nil
+	}
+	response.Usage.CompletionTokens = response.Usage.TotalTokens - response.Usage.PromptTokens
+	response.Usage.CompletionTokenDetails.TextTokens = response.Usage.CompletionTokens - response.Usage.CompletionTokenDetails.ReasoningTokens
+
+	// new body
+	encodeJson, err := common.EncodeJson(response)
+	if err != nil {
+		common.SysError("error marshalling stream response: " + err.Error())
+		return nil, nil
+	}
+
+	// set new body
+	resp.Body = io.NopCloser(bytes.NewBuffer(encodeJson))
+
+	for k, v := range resp.Header {
+		c.Writer.Header().Set(k, v[0])
+	}
+	c.Writer.WriteHeader(resp.StatusCode)
+	_, err = io.Copy(c.Writer, resp.Body)
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "copy_response_body_failed", http.StatusInternalServerError), nil
+	}
+	err = resp.Body.Close()
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
+	}
+
+	return nil, &response.Usage
+}
@@ -2,7 +2,6 @@ package xunfei

 import (
 	"errors"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
@@ -10,6 +9,8 @@ import (
 	relaycommon "one-api/relay/common"
 	"one-api/service"
 	"strings"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
@@ -61,6 +62,11 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	// xunfei's request is not http request, so we don't need to do anything here
 	dummyResp := &http.Response{}
@@ -3,12 +3,13 @@ package zhipu
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
 	"one-api/relay/channel"
 	relaycommon "one-api/relay/common"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
@@ -71,6 +72,11 @@ func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, request
 	return channel.DoApiRequest(a, c, info, requestBody)
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *dto.OpenAIErrorWithStatusCode) {
 	if info.IsStream {
 		err, usage = zhipuStreamHandler(c, resp)
@@ -3,13 +3,15 @@ package zhipu_4v
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
 	"one-api/relay/channel"
 	"one-api/relay/channel/openai"
 	relaycommon "one-api/relay/common"
+	relayconstant "one-api/relay/constant"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
@@ -35,7 +37,13 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
 }

 func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
-	return fmt.Sprintf("%s/api/paas/v4/chat/completions", info.BaseUrl), nil
+	baseUrl := fmt.Sprintf("%s/api/paas/v4", info.BaseUrl)
+	switch info.RelayMode {
+	case relayconstant.RelayModeEmbeddings:
+		return fmt.Sprintf("%s/embeddings", baseUrl), nil
+	default:
+		return fmt.Sprintf("%s/chat/completions", baseUrl), nil
+	}
 }

 func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
@@ -60,7 +68,11 @@ func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dt
 }

 func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error) {
-	//TODO implement me
+	return request, nil
+}
+
+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
 	return nil, errors.New("not implemented")
 }

@@ -1,17 +1,9 @@
 package zhipu_4v

 import (
-	"bufio"
-	"bytes"
-	"encoding/json"
-	"github.com/gin-gonic/gin"
 	"github.com/golang-jwt/jwt"
-	"io"
-	"net/http"
 	"one-api/common"
 	"one-api/dto"
-	"one-api/relay/helper"
-	"one-api/service"
 	"strings"
 	"sync"
 	"time"
@@ -119,163 +111,3 @@ func requestOpenAI2Zhipu(request dto.GeneralOpenAIRequest) *dto.GeneralOpenAIReq
 		ToolChoice:  request.ToolChoice,
 	}
 }
-
-//func responseZhipu2OpenAI(response *dto.OpenAITextResponse) *dto.OpenAITextResponse {
-//	fullTextResponse := dto.OpenAITextResponse{
-//		Id:      response.Id,
-//		Object:  "chat.completion",
-//		Created: common.GetTimestamp(),
-//		Choices: make([]dto.OpenAITextResponseChoice, 0, len(response.TextResponseChoices)),
-//		Usage:   response.Usage,
-//	}
-//	for i, choice := range response.TextResponseChoices {
-//		content, _ := json.Marshal(strings.Trim(choice.Content, "\""))
-//		openaiChoice := dto.OpenAITextResponseChoice{
-//			Index: i,
-//			Message: dto.Message{
-//				Role:    choice.Role,
-//				Content: content,
-//			},
-//			FinishReason: "",
-//		}
-//		if i == len(response.TextResponseChoices)-1 {
-//			openaiChoice.FinishReason = "stop"
-//		}
-//		fullTextResponse.Choices = append(fullTextResponse.Choices, openaiChoice)
-//	}
-//	return &fullTextResponse
-//}
-
-func streamResponseZhipu2OpenAI(zhipuResponse *ZhipuV4StreamResponse) *dto.ChatCompletionsStreamResponse {
-	var choice dto.ChatCompletionsStreamResponseChoice
-	choice.Delta.Content = zhipuResponse.Choices[0].Delta.Content
-	choice.Delta.Role = zhipuResponse.Choices[0].Delta.Role
-	choice.Delta.ToolCalls = zhipuResponse.Choices[0].Delta.ToolCalls
-	choice.Index = zhipuResponse.Choices[0].Index
-	choice.FinishReason = zhipuResponse.Choices[0].FinishReason
-	response := dto.ChatCompletionsStreamResponse{
-		Id:      zhipuResponse.Id,
-		Object:  "chat.completion.chunk",
-		Created: zhipuResponse.Created,
-		Model:   "glm-4v",
-		Choices: []dto.ChatCompletionsStreamResponseChoice{choice},
-	}
-	return &response
-}
-
-func lastStreamResponseZhipuV42OpenAI(zhipuResponse *ZhipuV4StreamResponse) (*dto.ChatCompletionsStreamResponse, *dto.Usage) {
-	response := streamResponseZhipu2OpenAI(zhipuResponse)
-	return response, &zhipuResponse.Usage
-}
-
-func zhipuStreamHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
-	var usage *dto.Usage
-	scanner := bufio.NewScanner(resp.Body)
-	scanner.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) {
-		if atEOF && len(data) == 0 {
-			return 0, nil, nil
-		}
-		if i := strings.Index(string(data), "\n"); i >= 0 {
-			return i + 1, data[0:i], nil
-		}
-		if atEOF {
-			return len(data), data, nil
-		}
-		return 0, nil, nil
-	})
-	dataChan := make(chan string)
-	stopChan := make(chan bool)
-	go func() {
-		for scanner.Scan() {
-			data := scanner.Text()
-			if len(data) < 6 { // ignore blank line or wrong format
-				continue
-			}
-			if data[:6] != "data: " && data[:6] != "[DONE]" {
-				continue
-			}
-			dataChan <- data
-		}
-		stopChan <- true
-	}()
-	helper.SetEventStreamHeaders(c)
-	c.Stream(func(w io.Writer) bool {
-		select {
-		case data := <-dataChan:
-			if strings.HasPrefix(data, "data: [DONE]") {
-				data = data[:12]
-			}
-			// some implementations may add \r at the end of data
-			data = strings.TrimSuffix(data, "\r")
-
-			var streamResponse ZhipuV4StreamResponse
-			err := json.Unmarshal([]byte(data), &streamResponse)
-			if err != nil {
-				common.SysError("error unmarshalling stream response: " + err.Error())
-			}
-			var response *dto.ChatCompletionsStreamResponse
-			if strings.Contains(data, "prompt_tokens") {
-				response, usage = lastStreamResponseZhipuV42OpenAI(&streamResponse)
-			} else {
-				response = streamResponseZhipu2OpenAI(&streamResponse)
-			}
-			jsonResponse, err := json.Marshal(response)
-			if err != nil {
-				common.SysError("error marshalling stream response: " + err.Error())
-				return true
-			}
-			c.Render(-1, common.CustomEvent{Data: "data: " + string(jsonResponse)})
-			return true
-		case <-stopChan:
-			return false
-		}
-	})
-	err := resp.Body.Close()
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
-	}
-	return nil, usage
-}
-
-func zhipuHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
-	var textResponse ZhipuV4Response
-	responseBody, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
-	}
-	err = resp.Body.Close()
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
-	}
-	err = json.Unmarshal(responseBody, &textResponse)
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "unmarshal_response_body_failed", http.StatusInternalServerError), nil
-	}
-	if textResponse.Error.Type != "" {
-		return &dto.OpenAIErrorWithStatusCode{
-			Error:      textResponse.Error,
-			StatusCode: resp.StatusCode,
-		}, nil
-	}
-	// Reset response body
-	resp.Body = io.NopCloser(bytes.NewBuffer(responseBody))
-
-	// We shouldn't set the header before we parse the response body, because the parse part may fail.
-	// And then we will have to send an error response, but in this case, the header has already been set.
-	// So the HTTPClient will be confused by the response.
-	// For example, Postman will report error, and we cannot check the response at all.
-	for k, v := range resp.Header {
-		c.Writer.Header().Set(k, v[0])
-	}
-	c.Writer.WriteHeader(resp.StatusCode)
-	_, err = io.Copy(c.Writer, resp.Body)
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "copy_response_body_failed", http.StatusInternalServerError), nil
-	}
-	err = resp.Body.Close()
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
-	}
-
-	return nil, &textResponse.Usage
-}
--- a/Show More
+++ b/Show More