fddf54ccc5
Three layered optimizations targeting Gemini-style 5MB base64 payloads where
RSS could balloon to tens of GB under concurrent load:
1. Byte-based param override (relay/common/override.go)
- Switch legacy/operations hot paths from common.Marshal round-trips and
map[string]any conversions to gjson/sjson on []byte directly.
- Avoids cloning 5MB strings during each Set/Delete operation.
2. strings.Builder for Gemini response markdown (relay/channel/gemini/relay-gemini.go)
- Replace string concatenation + strings.Join when assembling
"" content for inline image responses.
- Pre-allocates capacity from inline_data byte sizes.
3. Outbound BodyStorage + streaming Decoder (this commit's core)
- New relay/common/outbound_body.go helper wraps marshaled upstream bodies
in common.BodyStorage, allowing disk-cache mode to offload jsonData to
a temp file while waiting for upstream TTFB. The original []byte can
then be GC'd, removing ~5MB/req of heap residency during the longest
window of a request.
- All 7 relay handlers (gemini/claude/responses/embedding/image/compatible/
rerank) plus chat_completions_via_responses adopt the helper with
defer closer.Close() and explicit jsonData = nil.
- relay/common/relay_info.go: new UpstreamRequestBodySize so
relay/channel/api_request.go can populate req.ContentLength (lost when
body becomes a type-erased io.Reader).
- common/gin.go UnmarshalBodyReusable: when storage is disk-backed and
content-type is JSON, decode via DecodeJson(storage) instead of
storage.Bytes()+Unmarshal, removing one transient 5MB copy per request.
memory mode and form/multipart paths unchanged.
88 lines
1.7 KiB
Go
88 lines
1.7 KiB
Go
// Copyright 2014 Manu Martinez-Almeida. All rights reserved.
|
|
// Use of this source code is governed by a MIT style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package common
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
"sync"
|
|
)
|
|
|
|
type stringWriter interface {
|
|
io.Writer
|
|
writeString(string) (int, error)
|
|
}
|
|
|
|
type stringWrapper struct {
|
|
io.Writer
|
|
}
|
|
|
|
func (w stringWrapper) writeString(str string) (int, error) {
|
|
return w.Writer.Write([]byte(str))
|
|
}
|
|
|
|
func checkWriter(writer io.Writer) stringWriter {
|
|
if w, ok := writer.(stringWriter); ok {
|
|
return w
|
|
} else {
|
|
return stringWrapper{writer}
|
|
}
|
|
}
|
|
|
|
// Server-Sent Events
|
|
// W3C Working Draft 29 October 2009
|
|
// http://www.w3.org/TR/2009/WD-eventsource-20091029/
|
|
|
|
var writeContentType = []string{"text/event-stream"}
|
|
var noCache = []string{"no-cache"}
|
|
|
|
var fieldReplacer = strings.NewReplacer(
|
|
"\n", "\\n",
|
|
"\r", "\\r")
|
|
|
|
var dataReplacer = strings.NewReplacer(
|
|
"\n", "\n",
|
|
"\r", "\\r")
|
|
|
|
type CustomEvent struct {
|
|
Event string
|
|
Id string
|
|
Retry uint
|
|
Data interface{}
|
|
|
|
Mutex sync.Mutex
|
|
}
|
|
|
|
func encode(writer io.Writer, event CustomEvent) error {
|
|
w := checkWriter(writer)
|
|
return writeData(w, event.Data)
|
|
}
|
|
|
|
func writeData(w stringWriter, data interface{}) error {
|
|
dataReplacer.WriteString(w, fmt.Sprint(data))
|
|
if strings.HasPrefix(data.(string), "data") {
|
|
w.writeString("\n\n")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (r CustomEvent) Render(w http.ResponseWriter) error {
|
|
r.WriteContentType(w)
|
|
return encode(w, r)
|
|
}
|
|
|
|
func (r CustomEvent) WriteContentType(w http.ResponseWriter) {
|
|
r.Mutex.Lock()
|
|
defer r.Mutex.Unlock()
|
|
header := w.Header()
|
|
header["Content-Type"] = writeContentType
|
|
|
|
if _, exist := header["Cache-Control"]; !exist {
|
|
header["Cache-Control"] = noCache
|
|
}
|
|
}
|