feat: add 91Spider proxy support and drive improvements

This commit is contained in:
nianzhibai
2026-06-01 17:39:41 +08:00
parent cf9de5b40a
commit c78f22aedb
20 changed files with 1504 additions and 42 deletions
+28 -3
View File
@@ -68,6 +68,7 @@ import time
import random
import json
import os
import socket
import sys
import html
from urllib.parse import urljoin, unquote, urlparse
@@ -80,6 +81,28 @@ except ImportError:
print("请运行: pip install beautifulsoup4 lxml")
sys.exit(1)
def prefer_ipv4_for_plain_socks5_proxy():
"""PySocks may pick IPv6 first for socks5://; some SOCKS5 servers only accept IPv4."""
proxy_envs = (
os.environ.get("HTTPS_PROXY", ""),
os.environ.get("HTTP_PROXY", ""),
os.environ.get("https_proxy", ""),
os.environ.get("http_proxy", ""),
)
uses_plain_socks5 = any(v.strip().lower().startswith("socks5://") for v in proxy_envs)
if not uses_plain_socks5 or getattr(socket, "_spider91_ipv4_first", False):
return
original_getaddrinfo = socket.getaddrinfo
def getaddrinfo_ipv4_first(*args, **kwargs):
infos = original_getaddrinfo(*args, **kwargs)
return sorted(infos, key=lambda info: 0 if info[0] == socket.AF_INET else 1)
socket.getaddrinfo = getaddrinfo_ipv4_first
socket._spider91_ipv4_first = True
# ===================== 配置区域 =====================
BASE_URL = "https://www.91porn.com/v.php"
LIST_PARAMS = {
@@ -757,13 +780,15 @@ def main():
"日志改走 stderr。配合 backend 边读边下载使用。")
args, _ = parser.parse_known_args()
cli_out = sys.stderr if args.stream_output else sys.stdout
prefer_ipv4_for_plain_socks5_proxy()
print("""
================================================
91porn 视频爬虫启动中...
================================================
按 Ctrl+C 可随时中断并保存进度
""")
""", file=cli_out)
# 加载已知 ID(来自 backend 的 catalog 已入库列表;兼容旧参数名)
seen_viewkeys = []
@@ -775,9 +800,9 @@ def main():
if line:
seen_viewkeys.append(line)
except FileNotFoundError:
print(f"警告: --seen-viewkeys-file 不存在: {args.seen_viewkeys_file}")
print(f"警告: --seen-viewkeys-file 不存在: {args.seen_viewkeys_file}", file=cli_out)
except Exception as e:
print(f"警告: 读取 --seen-viewkeys-file 失败: {e}")
print(f"警告: 读取 --seen-viewkeys-file 失败: {e}", file=cli_out)
# 决定运行模式
if args.target_new is not None:
+1
View File
@@ -36,6 +36,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
python3-bs4 \
python3-lxml \
python3-requests \
python3-socks \
tar \
tzdata \
&& rm -rf /var/lib/apt/lists/*
+3 -3
View File
@@ -7,15 +7,17 @@ toolchain go1.23.4
require (
github.com/OpenListTeam/wopan-sdk-go v0.2.0
github.com/SheltonZhu/115driver v1.3.2
github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible
github.com/go-chi/chi/v5 v5.1.0
github.com/go-resty/resty/v2 v2.14.0
golang.org/x/net v0.27.0
golang.org/x/sys v0.30.0
gopkg.in/yaml.v3 v3.0.1
modernc.org/sqlite v1.33.1
)
require (
github.com/aead/ecdh v0.2.0 // indirect
github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible // indirect
github.com/andreburgaud/crypt2go v1.1.0 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/google/uuid v1.6.0 // indirect
@@ -28,8 +30,6 @@ require (
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
github.com/skip2/go-qrcode v0.0.0-20200617195104-da1b6568686e // indirect
golang.org/x/crypto v0.25.0 // indirect
golang.org/x/net v0.27.0 // indirect
golang.org/x/sys v0.30.0 // indirect
golang.org/x/time v0.8.0 // indirect
modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 // indirect
modernc.org/libc v1.55.3 // indirect
+63 -1
View File
@@ -7,6 +7,7 @@ import (
"errors"
"fmt"
"net/http"
"net/url"
"os"
"strconv"
"strings"
@@ -395,6 +396,7 @@ func (a *AdminServer) handleListDrives(w http.ResponseWriter, r *http.Request) {
SkipDirIDs []string `json:"skipDirIds"`
// LastCrawlAt 是 spider91 上次成功爬取的 unix 秒(来自 credentials.last_crawl_at)。
// 其它 kind 留 0;前端用它显示"上次抓取: N 小时前"。
Spider91Proxy string `json:"spider91Proxy,omitempty"`
LastCrawlAt int64 `json:"lastCrawlAt,omitempty"`
ThumbnailGenerationStatus GenerationStatus `json:"thumbnailGenerationStatus"`
PreviewGenerationStatus GenerationStatus `json:"previewGenerationStatus"`
@@ -453,6 +455,7 @@ func (a *AdminServer) handleListDrives(w http.ResponseWriter, r *http.Request) {
HasCredential: hasCred,
TeaserEnabled: d.TeaserEnabled,
SkipDirIDs: append([]string{}, d.SkipDirIDs...),
Spider91Proxy: spider91ProxyForDrive(d),
LastCrawlAt: lastCrawlAt,
ThumbnailGenerationStatus: generation.Thumbnail,
PreviewGenerationStatus: generation.Preview,
@@ -505,7 +508,14 @@ func (a *AdminServer) handleUpsertDrive(w http.ResponseWriter, r *http.Request)
if existingDrive, err := a.Catalog.GetDrive(r.Context(), body.ID); err == nil {
existing = existingDrive
}
if len(body.Credentials) == 0 && existing != nil && len(existing.Credentials) > 0 {
if body.Kind == "spider91" {
credentials, err := mergeSpider91Credentials(existing, body.Credentials)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
body.Credentials = credentials
} else if len(body.Credentials) == 0 && existing != nil && len(existing.Credentials) > 0 {
body.Credentials = existing.Credentials
}
@@ -554,6 +564,58 @@ func (a *AdminServer) handleUpsertDrive(w http.ResponseWriter, r *http.Request)
writeJSON(w, http.StatusOK, map[string]any{"ok": true})
}
func spider91ProxyForDrive(d *catalog.Drive) string {
if d == nil || d.Kind != "spider91" || d.Credentials == nil {
return ""
}
return strings.TrimSpace(d.Credentials["proxy"])
}
func mergeSpider91Credentials(existing *catalog.Drive, incoming map[string]string) (map[string]string, error) {
merged := map[string]string{}
if existing != nil {
for k, v := range existing.Credentials {
merged[k] = v
}
}
for k, v := range incoming {
if strings.TrimSpace(k) == "" {
continue
}
if k == "proxy" {
proxy, err := normalizeSpider91ProxyURL(v)
if err != nil {
return nil, err
}
if proxy == "" {
delete(merged, "proxy")
} else {
merged["proxy"] = proxy
}
continue
}
merged[k] = v
}
return merged, nil
}
func normalizeSpider91ProxyURL(raw string) (string, error) {
proxy := strings.TrimSpace(raw)
if proxy == "" {
return "", nil
}
u, err := url.Parse(proxy)
if err != nil || u.Scheme == "" || u.Host == "" {
return "", fmt.Errorf("91Spider 代理地址格式无效,请填写类似 http://127.0.0.1:7890 的地址")
}
switch strings.ToLower(u.Scheme) {
case "http", "https", "socks5", "socks5h":
return proxy, nil
default:
return "", fmt.Errorf("91Spider 代理地址仅支持 http://、https://、socks5:// 或 socks5h://")
}
}
func (a *AdminServer) handleDeleteDrive(w http.ResponseWriter, r *http.Request) {
id := chi.URLParam(r, "id")
if err := a.Catalog.DeleteDrive(r.Context(), id); err != nil {
+184
View File
@@ -439,6 +439,190 @@ func TestHandleUpsertDriveReplacesExistingCredentialsWhenProvided(t *testing.T)
}
}
func TestHandleUpsertSpider91ProxyPreservesRuntimeCredentials(t *testing.T) {
ctx := context.Background()
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
if err != nil {
t.Fatalf("open catalog: %v", err)
}
t.Cleanup(func() {
if err := cat.Close(); err != nil {
t.Fatalf("close catalog: %v", err)
}
})
if err := cat.UpsertDrive(ctx, &catalog.Drive{
ID: "spider91-main",
Kind: "spider91",
Name: "91 Spider",
RootID: "/",
Credentials: map[string]string{
"last_crawl_at": "1800000000",
"proxy": "http://old-proxy.local:7890",
"script_path": "/opt/video-site-91/91VideoSpider/spider_91porn.py",
},
Status: "ok",
}); err != nil {
t.Fatalf("seed drive: %v", err)
}
req := httptest.NewRequest(http.MethodPost, "/admin/api/drives", strings.NewReader(`{
"id": "spider91-main",
"kind": "spider91",
"name": "91 Spider",
"rootId": "/",
"credentials": {"proxy": " socks5h://proxy-user:proxy-pass@127.0.0.1:7891 "}
}`))
rr := httptest.NewRecorder()
(&AdminServer{Catalog: cat}).handleUpsertDrive(rr, req)
if rr.Code != http.StatusOK {
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
}
got, err := cat.GetDrive(ctx, "spider91-main")
if err != nil {
t.Fatalf("get drive: %v", err)
}
if got.Credentials["proxy"] != "socks5h://proxy-user:proxy-pass@127.0.0.1:7891" {
t.Fatalf("proxy = %q, want trimmed new proxy", got.Credentials["proxy"])
}
if got.Credentials["last_crawl_at"] != "1800000000" {
t.Fatalf("last_crawl_at = %q, want preserved", got.Credentials["last_crawl_at"])
}
if got.Credentials["script_path"] == "" {
t.Fatalf("script_path should be preserved")
}
req = httptest.NewRequest(http.MethodPost, "/admin/api/drives", strings.NewReader(`{
"id": "spider91-main",
"kind": "spider91",
"name": "91 Spider",
"rootId": "/",
"credentials": {"proxy": " "}
}`))
rr = httptest.NewRecorder()
(&AdminServer{Catalog: cat}).handleUpsertDrive(rr, req)
if rr.Code != http.StatusOK {
t.Fatalf("clear status = %d, body = %s", rr.Code, rr.Body.String())
}
got, err = cat.GetDrive(ctx, "spider91-main")
if err != nil {
t.Fatalf("get cleared drive: %v", err)
}
if _, ok := got.Credentials["proxy"]; ok {
t.Fatalf("proxy should be removed after empty save, got %q", got.Credentials["proxy"])
}
if got.Credentials["last_crawl_at"] != "1800000000" {
t.Fatalf("last_crawl_at after clear = %q, want preserved", got.Credentials["last_crawl_at"])
}
}
func TestHandleUpsertSpider91RejectsUnsupportedProxyScheme(t *testing.T) {
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
if err != nil {
t.Fatalf("open catalog: %v", err)
}
t.Cleanup(func() {
if err := cat.Close(); err != nil {
t.Fatalf("close catalog: %v", err)
}
})
req := httptest.NewRequest(http.MethodPost, "/admin/api/drives", strings.NewReader(`{
"id": "spider91-main",
"kind": "spider91",
"name": "91 Spider",
"rootId": "/",
"credentials": {"proxy": "ftp://127.0.0.1:21"}
}`))
rr := httptest.NewRecorder()
(&AdminServer{Catalog: cat}).handleUpsertDrive(rr, req)
if rr.Code != http.StatusBadRequest {
t.Fatalf("status = %d, want 400; body = %s", rr.Code, rr.Body.String())
}
if !strings.Contains(rr.Body.String(), "socks5:// 或 socks5h://") {
t.Fatalf("body = %q, want supported schemes message", rr.Body.String())
}
}
func TestHandleListDrivesIncludesSpider91Proxy(t *testing.T) {
ctx := context.Background()
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
if err != nil {
t.Fatalf("open catalog: %v", err)
}
t.Cleanup(func() {
if err := cat.Close(); err != nil {
t.Fatalf("close catalog: %v", err)
}
})
for _, d := range []*catalog.Drive{
{
ID: "spider91-main",
Kind: "spider91",
Name: "91 Spider",
RootID: "/",
Credentials: map[string]string{
"last_crawl_at": "1800000000",
"proxy": " http://127.0.0.1:7890 ",
},
Status: "ok",
},
{
ID: "onedrive-main",
Kind: "onedrive",
Name: "OneDrive",
RootID: "root",
Credentials: map[string]string{
"proxy": "http://should-not-leak.local:7890",
},
Status: "ok",
},
} {
if err := cat.UpsertDrive(ctx, d); err != nil {
t.Fatalf("seed drive %s: %v", d.ID, err)
}
}
req := httptest.NewRequest(http.MethodGet, "/admin/api/drives", nil)
rr := httptest.NewRecorder()
(&AdminServer{Catalog: cat}).handleListDrives(rr, req)
if rr.Code != http.StatusOK {
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
}
var got []struct {
ID string `json:"id"`
Spider91Proxy string `json:"spider91Proxy"`
LastCrawlAt int64 `json:"lastCrawlAt"`
}
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
t.Fatalf("decode: %v", err)
}
byID := map[string]struct {
Spider91Proxy string
LastCrawlAt int64
}{}
for _, d := range got {
byID[d.ID] = struct {
Spider91Proxy string
LastCrawlAt int64
}{Spider91Proxy: d.Spider91Proxy, LastCrawlAt: d.LastCrawlAt}
}
if byID["spider91-main"].Spider91Proxy != "http://127.0.0.1:7890" {
t.Fatalf("spider91 proxy = %q, want trimmed proxy", byID["spider91-main"].Spider91Proxy)
}
if byID["spider91-main"].LastCrawlAt != 1800000000 {
t.Fatalf("lastCrawlAt = %d, want 1800000000", byID["spider91-main"].LastCrawlAt)
}
if byID["onedrive-main"].Spider91Proxy != "" {
t.Fatalf("onedrive spider91Proxy = %q, want empty", byID["onedrive-main"].Spider91Proxy)
}
}
func TestHandleListDrivesIncludesTeaserCounts(t *testing.T) {
ctx := context.Background()
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
+1 -4
View File
@@ -1503,10 +1503,7 @@ func normalizeDriveRootID(kind, rootID string) string {
}
return rootID
case "localstorage", "spider91":
if rootID == "" {
return "/"
}
return rootID
return "/"
default:
if rootID == "" {
return "0"
+42
View File
@@ -61,6 +61,7 @@ func TestUpsertDriveDefaultsRootIDByKind(t *testing.T) {
{id: "onedrive", kind: "onedrive", want: "root"},
{id: "googledrive", kind: "googledrive", want: "root"},
{id: "localstorage", kind: "localstorage", want: "/"},
{id: "spider91", kind: "spider91", want: "/"},
}
for _, tc := range cases {
if err := cat.UpsertDrive(ctx, &Drive{
@@ -82,3 +83,44 @@ func TestUpsertDriveDefaultsRootIDByKind(t *testing.T) {
}
}
}
func TestUpsertDriveIgnoresRootIDForLocalStorageAndSpider91(t *testing.T) {
ctx := context.Background()
cat, err := Open(t.TempDir() + "/catalog.db")
if err != nil {
t.Fatalf("open catalog: %v", err)
}
t.Cleanup(func() {
if err := cat.Close(); err != nil {
t.Fatalf("close catalog: %v", err)
}
})
for _, tc := range []struct {
id string
kind string
}{
{id: "localstorage", kind: "localstorage"},
{id: "spider91", kind: "spider91"},
} {
if err := cat.UpsertDrive(ctx, &Drive{
ID: tc.id,
Kind: tc.kind,
Name: tc.kind,
RootID: "manual-root",
ScanRootID: "manual-scan-root",
}); err != nil {
t.Fatalf("upsert %s: %v", tc.kind, err)
}
got, err := cat.GetDrive(ctx, tc.id)
if err != nil {
t.Fatalf("get %s: %v", tc.kind, err)
}
if got.RootID != "/" {
t.Fatalf("%s rootId = %q, want /", tc.kind, got.RootID)
}
if got.ScanRootID != "/" {
t.Fatalf("%s scanRootId = %q, want /", tc.kind, got.ScanRootID)
}
}
}
+121 -15
View File
@@ -8,6 +8,7 @@ import (
"fmt"
"io"
"log"
"net"
"net/http"
"net/url"
"os"
@@ -20,6 +21,7 @@ import (
"time"
"github.com/video-site/backend/internal/catalog"
"golang.org/x/net/proxy"
)
// 默认 author/tag 标签,便于在前端筛选 spider91 来源的视频。
@@ -79,29 +81,123 @@ func NewCrawler(cfg CrawlerConfig) *Crawler {
cfg.DownloadTimeout = 30 * time.Minute
}
if cfg.HTTPClient == nil {
// 选 proxy 函数:显式 ProxyURL > 环境变量 > 直连
proxyFn := http.ProxyFromEnvironment
if strings.TrimSpace(cfg.ProxyURL) != "" {
if u, err := url.Parse(cfg.ProxyURL); err == nil {
proxyFn = http.ProxyURL(u)
} else {
log.Printf("[spider91] invalid proxy URL %q, falling back to env: %v", cfg.ProxyURL, err)
}
transport := &http.Transport{
Proxy: http.ProxyFromEnvironment,
ResponseHeaderTimeout: 60 * time.Second,
MaxIdleConns: 10,
IdleConnTimeout: 90 * time.Second,
}
if err := configureExplicitProxy(transport, cfg.ProxyURL); err != nil {
log.Printf("[spider91] invalid configured proxy URL, falling back to env: %v", err)
}
cfg.HTTPClient = &http.Client{
// 不限制总下载时长,靠 ctx 控制;只挡 dial / handshake / header
Timeout: 0,
Transport: &http.Transport{
Proxy: proxyFn,
ResponseHeaderTimeout: 60 * time.Second,
MaxIdleConns: 10,
IdleConnTimeout: 90 * time.Second,
},
Timeout: 0,
Transport: transport,
}
}
return &Crawler{cfg: cfg}
}
func configureExplicitProxy(transport *http.Transport, raw string) error {
proxyURL := strings.TrimSpace(raw)
if proxyURL == "" {
return nil
}
u, err := url.Parse(proxyURL)
if err != nil || u.Scheme == "" || u.Host == "" {
return fmt.Errorf("invalid proxy URL")
}
switch strings.ToLower(u.Scheme) {
case "http", "https":
transport.Proxy = http.ProxyURL(u)
transport.DialContext = nil
return nil
case "socks5", "socks5h":
dialContext, err := socksProxyDialContext(u)
if err != nil {
return err
}
transport.Proxy = nil
transport.DialContext = dialContext
return nil
default:
return fmt.Errorf("unsupported proxy scheme %q", u.Scheme)
}
}
func socksProxyDialContext(proxyURL *url.URL) (func(context.Context, string, string) (net.Conn, error), error) {
var auth *proxy.Auth
if proxyURL.User != nil {
username := proxyURL.User.Username()
password, _ := proxyURL.User.Password()
auth = &proxy.Auth{User: username, Password: password}
}
dialer, err := proxy.SOCKS5("tcp", proxyURL.Host, auth, &net.Dialer{Timeout: 60 * time.Second})
if err != nil {
return nil, err
}
remoteDNS := strings.EqualFold(proxyURL.Scheme, "socks5h")
return func(ctx context.Context, network, addr string) (net.Conn, error) {
target := addr
if !remoteDNS {
resolved, err := resolveSocksTarget(ctx, addr)
if err != nil {
return nil, err
}
target = resolved
}
if ctxDialer, ok := dialer.(proxy.ContextDialer); ok {
return ctxDialer.DialContext(ctx, network, target)
}
type result struct {
conn net.Conn
err error
}
ch := make(chan result, 1)
go func() {
conn, err := dialer.Dial(network, target)
ch <- result{conn: conn, err: err}
}()
select {
case <-ctx.Done():
return nil, ctx.Err()
case res := <-ch:
return res.conn, res.err
}
}, nil
}
func resolveSocksTarget(ctx context.Context, addr string) (string, error) {
host, port, err := net.SplitHostPort(addr)
if err != nil || net.ParseIP(host) != nil {
return addr, nil
}
ips, err := net.DefaultResolver.LookupIPAddr(ctx, host)
if err != nil {
return "", err
}
ip := selectSocksTargetIP(ips)
if ip == nil {
return "", fmt.Errorf("resolve %s: no address", host)
}
return net.JoinHostPort(ip.String(), port), nil
}
func selectSocksTargetIP(ips []net.IPAddr) net.IP {
for _, addr := range ips {
if ip4 := addr.IP.To4(); ip4 != nil {
return ip4
}
}
for _, addr := range ips {
if addr.IP != nil {
return addr.IP
}
}
return nil
}
// CrawlResult 汇总一次 RunOnce 的结果。
type CrawlResult struct {
// TargetNew 是本次 RunOnce 的目标新增数(来自 drive.Credentials.target_new)。
@@ -324,6 +420,16 @@ func (c *Crawler) startSpiderTargetNew(ctx context.Context, targetNew int, seenP
if c.cfg.WorkDir != "" {
cmd.Dir = c.cfg.WorkDir
}
if proxyURL := strings.TrimSpace(c.cfg.ProxyURL); proxyURL != "" {
cmd.Env = append(os.Environ(),
"HTTP_PROXY="+proxyURL,
"HTTPS_PROXY="+proxyURL,
"http_proxy="+proxyURL,
"https_proxy="+proxyURL,
"NO_PROXY=",
"no_proxy=",
)
}
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, nil, fmt.Errorf("stdout pipe: %w", err)
@@ -3,6 +3,8 @@ package spider91
import (
"context"
"encoding/json"
"io"
"net"
"net/http"
"net/http/httptest"
"net/url"
@@ -233,6 +235,108 @@ func TestCrawlerRunOnceMissingScript(t *testing.T) {
}
}
func TestCrawlerPassesProxyToSpiderProcess(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("shell-based fake script only on unix")
}
tmp := t.TempDir()
scriptPath := filepath.Join(tmp, "print_proxy_env.sh")
script := `#!/bin/sh
printf 'HTTP_PROXY=%s\n' "$HTTP_PROXY"
printf 'HTTPS_PROXY=%s\n' "$HTTPS_PROXY"
printf 'http_proxy=%s\n' "$http_proxy"
printf 'https_proxy=%s\n' "$https_proxy"
printf 'NO_PROXY=%s\n' "$NO_PROXY"
printf 'no_proxy=%s\n' "$no_proxy"
`
if err := os.WriteFile(scriptPath, []byte(script), 0o755); err != nil {
t.Fatalf("write script: %v", err)
}
proxyURL := "socks5h://proxy.local:1080"
drv := New(Config{ID: "proxy-drive", RootDir: filepath.Join(tmp, "proxy-drive")})
c := NewCrawler(CrawlerConfig{
Driver: drv,
PythonPath: "sh",
ScriptPath: scriptPath,
ProxyURL: proxyURL,
})
cmd, stdout, err := c.startSpiderTargetNew(
context.Background(),
1,
filepath.Join(tmp, "seen.txt"),
filepath.Join(tmp, "out.json"),
)
if err != nil {
t.Fatalf("startSpiderTargetNew: %v", err)
}
raw, err := io.ReadAll(stdout)
if err != nil {
t.Fatalf("read stdout: %v", err)
}
if err := cmd.Wait(); err != nil {
t.Fatalf("wait: %v", err)
}
want := strings.Join([]string{
"HTTP_PROXY=" + proxyURL,
"HTTPS_PROXY=" + proxyURL,
"http_proxy=" + proxyURL,
"https_proxy=" + proxyURL,
"NO_PROXY=",
"no_proxy=",
}, "\n") + "\n"
if string(raw) != want {
t.Fatalf("proxy env = %q, want %q", string(raw), want)
}
}
func TestConfigureExplicitProxySupportsSocksSchemes(t *testing.T) {
for _, raw := range []string{
"socks5://127.0.0.1:1080",
"socks5h://proxy-user:proxy-pass@127.0.0.1:1080",
} {
t.Run(raw, func(t *testing.T) {
transport := &http.Transport{Proxy: http.ProxyFromEnvironment}
if err := configureExplicitProxy(transport, raw); err != nil {
t.Fatalf("configureExplicitProxy: %v", err)
}
if transport.Proxy != nil {
t.Fatalf("Transport.Proxy should be nil for SOCKS proxy")
}
if transport.DialContext == nil {
t.Fatalf("Transport.DialContext should be set for SOCKS proxy")
}
})
}
transport := &http.Transport{Proxy: http.ProxyFromEnvironment}
if err := configureExplicitProxy(transport, "http://127.0.0.1:7890"); err != nil {
t.Fatalf("configureExplicitProxy http: %v", err)
}
if transport.Proxy == nil {
t.Fatalf("Transport.Proxy should be set for HTTP proxy")
}
if transport.DialContext != nil {
t.Fatalf("Transport.DialContext should not be set for HTTP proxy")
}
if err := configureExplicitProxy(&http.Transport{}, "ftp://127.0.0.1:21"); err == nil {
t.Fatalf("expected unsupported proxy scheme error")
}
}
func TestSelectSocksTargetIPPrefersIPv4(t *testing.T) {
got := selectSocksTargetIP([]net.IPAddr{
{IP: net.ParseIP("2606:4700:20::681a:229")},
{IP: net.ParseIP("104.26.3.41")},
})
if got == nil || got.String() != "104.26.3.41" {
t.Fatalf("selectSocksTargetIP = %v, want IPv4 104.26.3.41", got)
}
}
// TestCrawlerThumbDownloadFailureMarksStatusFailed 验证:网站封面下载失败时
// crawler 把 thumbnail_status 显式标 'failed',避免后续封面补队列一直重复
// 捞到这条 spider91 视频。
+168
View File
@@ -0,0 +1,168 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package socks
import (
"context"
"errors"
"io"
"net"
"strconv"
"time"
)
var (
noDeadline = time.Time{}
aLongTimeAgo = time.Unix(1, 0)
)
func (d *Dialer) connect(ctx context.Context, c net.Conn, address string) (_ net.Addr, ctxErr error) {
host, port, err := splitHostPort(address)
if err != nil {
return nil, err
}
if deadline, ok := ctx.Deadline(); ok && !deadline.IsZero() {
c.SetDeadline(deadline)
defer c.SetDeadline(noDeadline)
}
if ctx != context.Background() {
errCh := make(chan error, 1)
done := make(chan struct{})
defer func() {
close(done)
if ctxErr == nil {
ctxErr = <-errCh
}
}()
go func() {
select {
case <-ctx.Done():
c.SetDeadline(aLongTimeAgo)
errCh <- ctx.Err()
case <-done:
errCh <- nil
}
}()
}
b := make([]byte, 0, 6+len(host)) // the size here is just an estimate
b = append(b, Version5)
if len(d.AuthMethods) == 0 || d.Authenticate == nil {
b = append(b, 1, byte(AuthMethodNotRequired))
} else {
ams := d.AuthMethods
if len(ams) > 255 {
return nil, errors.New("too many authentication methods")
}
b = append(b, byte(len(ams)))
for _, am := range ams {
b = append(b, byte(am))
}
}
if _, ctxErr = c.Write(b); ctxErr != nil {
return
}
if _, ctxErr = io.ReadFull(c, b[:2]); ctxErr != nil {
return
}
if b[0] != Version5 {
return nil, errors.New("unexpected protocol version " + strconv.Itoa(int(b[0])))
}
am := AuthMethod(b[1])
if am == AuthMethodNoAcceptableMethods {
return nil, errors.New("no acceptable authentication methods")
}
if d.Authenticate != nil {
if ctxErr = d.Authenticate(ctx, c, am); ctxErr != nil {
return
}
}
b = b[:0]
b = append(b, Version5, byte(d.cmd), 0)
if ip := net.ParseIP(host); ip != nil {
if ip4 := ip.To4(); ip4 != nil {
b = append(b, AddrTypeIPv4)
b = append(b, ip4...)
} else if ip6 := ip.To16(); ip6 != nil {
b = append(b, AddrTypeIPv6)
b = append(b, ip6...)
} else {
return nil, errors.New("unknown address type")
}
} else {
if len(host) > 255 {
return nil, errors.New("FQDN too long")
}
b = append(b, AddrTypeFQDN)
b = append(b, byte(len(host)))
b = append(b, host...)
}
b = append(b, byte(port>>8), byte(port))
if _, ctxErr = c.Write(b); ctxErr != nil {
return
}
if _, ctxErr = io.ReadFull(c, b[:4]); ctxErr != nil {
return
}
if b[0] != Version5 {
return nil, errors.New("unexpected protocol version " + strconv.Itoa(int(b[0])))
}
if cmdErr := Reply(b[1]); cmdErr != StatusSucceeded {
return nil, errors.New("unknown error " + cmdErr.String())
}
if b[2] != 0 {
return nil, errors.New("non-zero reserved field")
}
l := 2
var a Addr
switch b[3] {
case AddrTypeIPv4:
l += net.IPv4len
a.IP = make(net.IP, net.IPv4len)
case AddrTypeIPv6:
l += net.IPv6len
a.IP = make(net.IP, net.IPv6len)
case AddrTypeFQDN:
if _, err := io.ReadFull(c, b[:1]); err != nil {
return nil, err
}
l += int(b[0])
default:
return nil, errors.New("unknown address type " + strconv.Itoa(int(b[3])))
}
if cap(b) < l {
b = make([]byte, l)
} else {
b = b[:l]
}
if _, ctxErr = io.ReadFull(c, b); ctxErr != nil {
return
}
if a.IP != nil {
copy(a.IP, b)
} else {
a.Name = string(b[:len(b)-2])
}
a.Port = int(b[len(b)-2])<<8 | int(b[len(b)-1])
return &a, nil
}
func splitHostPort(address string) (string, int, error) {
host, port, err := net.SplitHostPort(address)
if err != nil {
return "", 0, err
}
portnum, err := strconv.Atoi(port)
if err != nil {
return "", 0, err
}
if 1 > portnum || portnum > 0xffff {
return "", 0, errors.New("port number out of range " + port)
}
return host, portnum, nil
}
+317
View File
@@ -0,0 +1,317 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package socks provides a SOCKS version 5 client implementation.
//
// SOCKS protocol version 5 is defined in RFC 1928.
// Username/Password authentication for SOCKS version 5 is defined in
// RFC 1929.
package socks
import (
"context"
"errors"
"io"
"net"
"strconv"
)
// A Command represents a SOCKS command.
type Command int
func (cmd Command) String() string {
switch cmd {
case CmdConnect:
return "socks connect"
case cmdBind:
return "socks bind"
default:
return "socks " + strconv.Itoa(int(cmd))
}
}
// An AuthMethod represents a SOCKS authentication method.
type AuthMethod int
// A Reply represents a SOCKS command reply code.
type Reply int
func (code Reply) String() string {
switch code {
case StatusSucceeded:
return "succeeded"
case 0x01:
return "general SOCKS server failure"
case 0x02:
return "connection not allowed by ruleset"
case 0x03:
return "network unreachable"
case 0x04:
return "host unreachable"
case 0x05:
return "connection refused"
case 0x06:
return "TTL expired"
case 0x07:
return "command not supported"
case 0x08:
return "address type not supported"
default:
return "unknown code: " + strconv.Itoa(int(code))
}
}
// Wire protocol constants.
const (
Version5 = 0x05
AddrTypeIPv4 = 0x01
AddrTypeFQDN = 0x03
AddrTypeIPv6 = 0x04
CmdConnect Command = 0x01 // establishes an active-open forward proxy connection
cmdBind Command = 0x02 // establishes a passive-open forward proxy connection
AuthMethodNotRequired AuthMethod = 0x00 // no authentication required
AuthMethodUsernamePassword AuthMethod = 0x02 // use username/password
AuthMethodNoAcceptableMethods AuthMethod = 0xff // no acceptable authentication methods
StatusSucceeded Reply = 0x00
)
// An Addr represents a SOCKS-specific address.
// Either Name or IP is used exclusively.
type Addr struct {
Name string // fully-qualified domain name
IP net.IP
Port int
}
func (a *Addr) Network() string { return "socks" }
func (a *Addr) String() string {
if a == nil {
return "<nil>"
}
port := strconv.Itoa(a.Port)
if a.IP == nil {
return net.JoinHostPort(a.Name, port)
}
return net.JoinHostPort(a.IP.String(), port)
}
// A Conn represents a forward proxy connection.
type Conn struct {
net.Conn
boundAddr net.Addr
}
// BoundAddr returns the address assigned by the proxy server for
// connecting to the command target address from the proxy server.
func (c *Conn) BoundAddr() net.Addr {
if c == nil {
return nil
}
return c.boundAddr
}
// A Dialer holds SOCKS-specific options.
type Dialer struct {
cmd Command // either CmdConnect or cmdBind
proxyNetwork string // network between a proxy server and a client
proxyAddress string // proxy server address
// ProxyDial specifies the optional dial function for
// establishing the transport connection.
ProxyDial func(context.Context, string, string) (net.Conn, error)
// AuthMethods specifies the list of request authentication
// methods.
// If empty, SOCKS client requests only AuthMethodNotRequired.
AuthMethods []AuthMethod
// Authenticate specifies the optional authentication
// function. It must be non-nil when AuthMethods is not empty.
// It must return an error when the authentication is failed.
Authenticate func(context.Context, io.ReadWriter, AuthMethod) error
}
// DialContext connects to the provided address on the provided
// network.
//
// The returned error value may be a net.OpError. When the Op field of
// net.OpError contains "socks", the Source field contains a proxy
// server address and the Addr field contains a command target
// address.
//
// See func Dial of the net package of standard library for a
// description of the network and address parameters.
func (d *Dialer) DialContext(ctx context.Context, network, address string) (net.Conn, error) {
if err := d.validateTarget(network, address); err != nil {
proxy, dst, _ := d.pathAddrs(address)
return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: err}
}
if ctx == nil {
proxy, dst, _ := d.pathAddrs(address)
return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: errors.New("nil context")}
}
var err error
var c net.Conn
if d.ProxyDial != nil {
c, err = d.ProxyDial(ctx, d.proxyNetwork, d.proxyAddress)
} else {
var dd net.Dialer
c, err = dd.DialContext(ctx, d.proxyNetwork, d.proxyAddress)
}
if err != nil {
proxy, dst, _ := d.pathAddrs(address)
return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: err}
}
a, err := d.connect(ctx, c, address)
if err != nil {
c.Close()
proxy, dst, _ := d.pathAddrs(address)
return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: err}
}
return &Conn{Conn: c, boundAddr: a}, nil
}
// DialWithConn initiates a connection from SOCKS server to the target
// network and address using the connection c that is already
// connected to the SOCKS server.
//
// It returns the connection's local address assigned by the SOCKS
// server.
func (d *Dialer) DialWithConn(ctx context.Context, c net.Conn, network, address string) (net.Addr, error) {
if err := d.validateTarget(network, address); err != nil {
proxy, dst, _ := d.pathAddrs(address)
return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: err}
}
if ctx == nil {
proxy, dst, _ := d.pathAddrs(address)
return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: errors.New("nil context")}
}
a, err := d.connect(ctx, c, address)
if err != nil {
proxy, dst, _ := d.pathAddrs(address)
return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: err}
}
return a, nil
}
// Dial connects to the provided address on the provided network.
//
// Unlike DialContext, it returns a raw transport connection instead
// of a forward proxy connection.
//
// Deprecated: Use DialContext or DialWithConn instead.
func (d *Dialer) Dial(network, address string) (net.Conn, error) {
if err := d.validateTarget(network, address); err != nil {
proxy, dst, _ := d.pathAddrs(address)
return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: err}
}
var err error
var c net.Conn
if d.ProxyDial != nil {
c, err = d.ProxyDial(context.Background(), d.proxyNetwork, d.proxyAddress)
} else {
c, err = net.Dial(d.proxyNetwork, d.proxyAddress)
}
if err != nil {
proxy, dst, _ := d.pathAddrs(address)
return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: err}
}
if _, err := d.DialWithConn(context.Background(), c, network, address); err != nil {
c.Close()
return nil, err
}
return c, nil
}
func (d *Dialer) validateTarget(network, address string) error {
switch network {
case "tcp", "tcp6", "tcp4":
default:
return errors.New("network not implemented")
}
switch d.cmd {
case CmdConnect, cmdBind:
default:
return errors.New("command not implemented")
}
return nil
}
func (d *Dialer) pathAddrs(address string) (proxy, dst net.Addr, err error) {
for i, s := range []string{d.proxyAddress, address} {
host, port, err := splitHostPort(s)
if err != nil {
return nil, nil, err
}
a := &Addr{Port: port}
a.IP = net.ParseIP(host)
if a.IP == nil {
a.Name = host
}
if i == 0 {
proxy = a
} else {
dst = a
}
}
return
}
// NewDialer returns a new Dialer that dials through the provided
// proxy server's network and address.
func NewDialer(network, address string) *Dialer {
return &Dialer{proxyNetwork: network, proxyAddress: address, cmd: CmdConnect}
}
const (
authUsernamePasswordVersion = 0x01
authStatusSucceeded = 0x00
)
// UsernamePassword are the credentials for the username/password
// authentication method.
type UsernamePassword struct {
Username string
Password string
}
// Authenticate authenticates a pair of username and password with the
// proxy server.
func (up *UsernamePassword) Authenticate(ctx context.Context, rw io.ReadWriter, auth AuthMethod) error {
switch auth {
case AuthMethodNotRequired:
return nil
case AuthMethodUsernamePassword:
if len(up.Username) == 0 || len(up.Username) > 255 || len(up.Password) > 255 {
return errors.New("invalid username/password")
}
b := []byte{authUsernamePasswordVersion}
b = append(b, byte(len(up.Username)))
b = append(b, up.Username...)
b = append(b, byte(len(up.Password)))
b = append(b, up.Password...)
// TODO(mikio): handle IO deadlines and cancelation if
// necessary
if _, err := rw.Write(b); err != nil {
return err
}
if _, err := io.ReadFull(rw, b[:2]); err != nil {
return err
}
if b[0] != authUsernamePasswordVersion {
return errors.New("invalid username/password version")
}
if b[1] != authStatusSucceeded {
return errors.New("username/password authentication failed")
}
return nil
}
return errors.New("unsupported authentication method " + strconv.Itoa(int(auth)))
}
+54
View File
@@ -0,0 +1,54 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package proxy
import (
"context"
"net"
)
// A ContextDialer dials using a context.
type ContextDialer interface {
DialContext(ctx context.Context, network, address string) (net.Conn, error)
}
// Dial works like DialContext on net.Dialer but using a dialer returned by FromEnvironment.
//
// The passed ctx is only used for returning the Conn, not the lifetime of the Conn.
//
// Custom dialers (registered via RegisterDialerType) that do not implement ContextDialer
// can leak a goroutine for as long as it takes the underlying Dialer implementation to timeout.
//
// A Conn returned from a successful Dial after the context has been cancelled will be immediately closed.
func Dial(ctx context.Context, network, address string) (net.Conn, error) {
d := FromEnvironment()
if xd, ok := d.(ContextDialer); ok {
return xd.DialContext(ctx, network, address)
}
return dialContext(ctx, d, network, address)
}
// WARNING: this can leak a goroutine for as long as the underlying Dialer implementation takes to timeout
// A Conn returned from a successful Dial after the context has been cancelled will be immediately closed.
func dialContext(ctx context.Context, d Dialer, network, address string) (net.Conn, error) {
var (
conn net.Conn
done = make(chan struct{}, 1)
err error
)
go func() {
conn, err = d.Dial(network, address)
close(done)
if conn != nil && ctx.Err() != nil {
conn.Close()
}
}()
select {
case <-ctx.Done():
err = ctx.Err()
case <-done:
}
return conn, err
}
+31
View File
@@ -0,0 +1,31 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package proxy
import (
"context"
"net"
)
type direct struct{}
// Direct implements Dialer by making network connections directly using net.Dial or net.DialContext.
var Direct = direct{}
var (
_ Dialer = Direct
_ ContextDialer = Direct
)
// Dial directly invokes net.Dial with the supplied parameters.
func (direct) Dial(network, addr string) (net.Conn, error) {
return net.Dial(network, addr)
}
// DialContext instantiates a net.Dialer and invokes its DialContext receiver with the supplied parameters.
func (direct) DialContext(ctx context.Context, network, addr string) (net.Conn, error) {
var d net.Dialer
return d.DialContext(ctx, network, addr)
}
+151
View File
@@ -0,0 +1,151 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package proxy
import (
"context"
"net"
"strings"
)
// A PerHost directs connections to a default Dialer unless the host name
// requested matches one of a number of exceptions.
type PerHost struct {
def, bypass Dialer
bypassNetworks []*net.IPNet
bypassIPs []net.IP
bypassZones []string
bypassHosts []string
}
// NewPerHost returns a PerHost Dialer that directs connections to either
// defaultDialer or bypass, depending on whether the connection matches one of
// the configured rules.
func NewPerHost(defaultDialer, bypass Dialer) *PerHost {
return &PerHost{
def: defaultDialer,
bypass: bypass,
}
}
// Dial connects to the address addr on the given network through either
// defaultDialer or bypass.
func (p *PerHost) Dial(network, addr string) (c net.Conn, err error) {
host, _, err := net.SplitHostPort(addr)
if err != nil {
return nil, err
}
return p.dialerForRequest(host).Dial(network, addr)
}
// DialContext connects to the address addr on the given network through either
// defaultDialer or bypass.
func (p *PerHost) DialContext(ctx context.Context, network, addr string) (c net.Conn, err error) {
host, _, err := net.SplitHostPort(addr)
if err != nil {
return nil, err
}
d := p.dialerForRequest(host)
if x, ok := d.(ContextDialer); ok {
return x.DialContext(ctx, network, addr)
}
return dialContext(ctx, d, network, addr)
}
func (p *PerHost) dialerForRequest(host string) Dialer {
if ip := net.ParseIP(host); ip != nil {
for _, net := range p.bypassNetworks {
if net.Contains(ip) {
return p.bypass
}
}
for _, bypassIP := range p.bypassIPs {
if bypassIP.Equal(ip) {
return p.bypass
}
}
return p.def
}
for _, zone := range p.bypassZones {
if strings.HasSuffix(host, zone) {
return p.bypass
}
if host == zone[1:] {
// For a zone ".example.com", we match "example.com"
// too.
return p.bypass
}
}
for _, bypassHost := range p.bypassHosts {
if bypassHost == host {
return p.bypass
}
}
return p.def
}
// AddFromString parses a string that contains comma-separated values
// specifying hosts that should use the bypass proxy. Each value is either an
// IP address, a CIDR range, a zone (*.example.com) or a host name
// (localhost). A best effort is made to parse the string and errors are
// ignored.
func (p *PerHost) AddFromString(s string) {
hosts := strings.Split(s, ",")
for _, host := range hosts {
host = strings.TrimSpace(host)
if len(host) == 0 {
continue
}
if strings.Contains(host, "/") {
// We assume that it's a CIDR address like 127.0.0.0/8
if _, net, err := net.ParseCIDR(host); err == nil {
p.AddNetwork(net)
}
continue
}
if ip := net.ParseIP(host); ip != nil {
p.AddIP(ip)
continue
}
if strings.HasPrefix(host, "*.") {
p.AddZone(host[1:])
continue
}
p.AddHost(host)
}
}
// AddIP specifies an IP address that will use the bypass proxy. Note that
// this will only take effect if a literal IP address is dialed. A connection
// to a named host will never match an IP.
func (p *PerHost) AddIP(ip net.IP) {
p.bypassIPs = append(p.bypassIPs, ip)
}
// AddNetwork specifies an IP range that will use the bypass proxy. Note that
// this will only take effect if a literal IP address is dialed. A connection
// to a named host will never match.
func (p *PerHost) AddNetwork(net *net.IPNet) {
p.bypassNetworks = append(p.bypassNetworks, net)
}
// AddZone specifies a DNS suffix that will use the bypass proxy. A zone of
// "example.com" matches "example.com" and all of its subdomains.
func (p *PerHost) AddZone(zone string) {
zone = strings.TrimSuffix(zone, ".")
if !strings.HasPrefix(zone, ".") {
zone = "." + zone
}
p.bypassZones = append(p.bypassZones, zone)
}
// AddHost specifies a host name that will use the bypass proxy.
func (p *PerHost) AddHost(host string) {
host = strings.TrimSuffix(host, ".")
p.bypassHosts = append(p.bypassHosts, host)
}
+149
View File
@@ -0,0 +1,149 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package proxy provides support for a variety of protocols to proxy network
// data.
package proxy // import "golang.org/x/net/proxy"
import (
"errors"
"net"
"net/url"
"os"
"sync"
)
// A Dialer is a means to establish a connection.
// Custom dialers should also implement ContextDialer.
type Dialer interface {
// Dial connects to the given address via the proxy.
Dial(network, addr string) (c net.Conn, err error)
}
// Auth contains authentication parameters that specific Dialers may require.
type Auth struct {
User, Password string
}
// FromEnvironment returns the dialer specified by the proxy-related
// variables in the environment and makes underlying connections
// directly.
func FromEnvironment() Dialer {
return FromEnvironmentUsing(Direct)
}
// FromEnvironmentUsing returns the dialer specify by the proxy-related
// variables in the environment and makes underlying connections
// using the provided forwarding Dialer (for instance, a *net.Dialer
// with desired configuration).
func FromEnvironmentUsing(forward Dialer) Dialer {
allProxy := allProxyEnv.Get()
if len(allProxy) == 0 {
return forward
}
proxyURL, err := url.Parse(allProxy)
if err != nil {
return forward
}
proxy, err := FromURL(proxyURL, forward)
if err != nil {
return forward
}
noProxy := noProxyEnv.Get()
if len(noProxy) == 0 {
return proxy
}
perHost := NewPerHost(proxy, forward)
perHost.AddFromString(noProxy)
return perHost
}
// proxySchemes is a map from URL schemes to a function that creates a Dialer
// from a URL with such a scheme.
var proxySchemes map[string]func(*url.URL, Dialer) (Dialer, error)
// RegisterDialerType takes a URL scheme and a function to generate Dialers from
// a URL with that scheme and a forwarding Dialer. Registered schemes are used
// by FromURL.
func RegisterDialerType(scheme string, f func(*url.URL, Dialer) (Dialer, error)) {
if proxySchemes == nil {
proxySchemes = make(map[string]func(*url.URL, Dialer) (Dialer, error))
}
proxySchemes[scheme] = f
}
// FromURL returns a Dialer given a URL specification and an underlying
// Dialer for it to make network requests.
func FromURL(u *url.URL, forward Dialer) (Dialer, error) {
var auth *Auth
if u.User != nil {
auth = new(Auth)
auth.User = u.User.Username()
if p, ok := u.User.Password(); ok {
auth.Password = p
}
}
switch u.Scheme {
case "socks5", "socks5h":
addr := u.Hostname()
port := u.Port()
if port == "" {
port = "1080"
}
return SOCKS5("tcp", net.JoinHostPort(addr, port), auth, forward)
}
// If the scheme doesn't match any of the built-in schemes, see if it
// was registered by another package.
if proxySchemes != nil {
if f, ok := proxySchemes[u.Scheme]; ok {
return f(u, forward)
}
}
return nil, errors.New("proxy: unknown scheme: " + u.Scheme)
}
var (
allProxyEnv = &envOnce{
names: []string{"ALL_PROXY", "all_proxy"},
}
noProxyEnv = &envOnce{
names: []string{"NO_PROXY", "no_proxy"},
}
)
// envOnce looks up an environment variable (optionally by multiple
// names) once. It mitigates expensive lookups on some platforms
// (e.g. Windows).
// (Borrowed from net/http/transport.go)
type envOnce struct {
names []string
once sync.Once
val string
}
func (e *envOnce) Get() string {
e.once.Do(e.init)
return e.val
}
func (e *envOnce) init() {
for _, n := range e.names {
e.val = os.Getenv(n)
if e.val != "" {
return
}
}
}
// reset is used by tests
func (e *envOnce) reset() {
e.once = sync.Once{}
e.val = ""
}
+42
View File
@@ -0,0 +1,42 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package proxy
import (
"context"
"net"
"golang.org/x/net/internal/socks"
)
// SOCKS5 returns a Dialer that makes SOCKSv5 connections to the given
// address with an optional username and password.
// See RFC 1928 and RFC 1929.
func SOCKS5(network, address string, auth *Auth, forward Dialer) (Dialer, error) {
d := socks.NewDialer(network, address)
if forward != nil {
if f, ok := forward.(ContextDialer); ok {
d.ProxyDial = func(ctx context.Context, network string, address string) (net.Conn, error) {
return f.DialContext(ctx, network, address)
}
} else {
d.ProxyDial = func(ctx context.Context, network string, address string) (net.Conn, error) {
return dialContext(ctx, forward, network, address)
}
}
}
if auth != nil {
up := socks.UsernamePassword{
Username: auth.User,
Password: auth.Password,
}
d.AuthMethods = []socks.AuthMethod{
socks.AuthMethodNotRequired,
socks.AuthMethodUsernamePassword,
}
d.Authenticate = up.Authenticate
}
return d, nil
}
+2
View File
@@ -67,6 +67,8 @@ github.com/skip2/go-qrcode/reedsolomon
golang.org/x/crypto/curve25519
# golang.org/x/net v0.27.0
## explicit; go 1.18
golang.org/x/net/internal/socks
golang.org/x/net/proxy
golang.org/x/net/publicsuffix
# golang.org/x/sys v0.30.0
## explicit; go 1.18
+30 -15
View File
@@ -169,7 +169,7 @@ export function DrivesPage() {
kind: d.kind,
name: d.name,
rootId: d.rootId,
creds: {},
creds: d.kind === "spider91" ? { proxy: d.spider91Proxy ?? "" } : {},
spider91UploadDriveId: settings?.spider91UploadDriveId ?? "",
});
setModalOpen(true);
@@ -185,7 +185,9 @@ export function DrivesPage() {
const driveID = existing
? form.id
: makeUniqueDriveId(form.kind, name, list);
const rootId = form.rootId.trim() || defaultRootId(form.kind);
const rootId = usesRootDirectoryID(form.kind)
? form.rootId.trim() || defaultRootId(form.kind)
: defaultRootId(form.kind);
// 若编辑且没有提供凭证,提示一下但仍允许保存(不改凭证)
setSaving(true);
try {
@@ -408,7 +410,7 @@ export function DrivesPage() {
<span className="admin-detail-label"> ID</span>
<span className="admin-detail-value admin-mono-cell">{d.id}</span>
</div>
{d.kind !== "spider91" && (
{usesRootDirectoryID(d.kind) && (
<>
<div className="admin-detail-row">
<span className="admin-detail-label"> ID</span>
@@ -974,17 +976,19 @@ function DriveForm({
<option value="wopan"></option>
</select>
</div>
<div className="admin-form__row">
<label> ID</label>
<input
value={form.rootId}
onChange={(e) => set("rootId", e.target.value)}
placeholder={rootIdPlaceholder(form.kind)}
/>
<div className="admin-form__help">
使ID获取方式请参考OpenList文档
{usesRootDirectoryID(form.kind) && (
<div className="admin-form__row">
<label> ID</label>
<input
value={form.rootId}
onChange={(e) => set("rootId", e.target.value)}
placeholder={rootIdPlaceholder(form.kind)}
/>
<div className="admin-form__help">
使ID获取方式请参考OpenList文档
</div>
</div>
</div>
)}
{(help || fields.length > 0) && (
<>
@@ -1087,7 +1091,7 @@ function credentialHelp(kind: Kind, isEdit: boolean): string {
case "localstorage":
return `把服务器上的一个已有目录作为视频来源扫描。填写绝对路径,例如 /mnt/videos;系统会读取该目录及子目录中的视频,并生成封面、Teaser 和指纹。${note}`;
case "spider91":
return "91 爬虫会把定时抓取到的视频和封面先保存到本机,并作为一个视频来源接入站点;它不是外部网盘,不需要填写 Cookie 或目录 ID。后续流水线会把较早的视频上传到你选择的 115 / PikPak / OneDrive 目标盘。";
return "91 爬虫会把定时抓取到的视频和封面先保存到本机,并作为一个视频来源接入站点;可按服务器网络情况单独配置代理。后续流水线会把较早的视频上传到你选择的 115 / PikPak / OneDrive 目标盘。";
default:
return "";
}
@@ -1188,7 +1192,14 @@ function credentialFields(kind: Kind): Array<{
},
];
case "spider91":
return [];
return [
{
key: "proxy",
label: "代理地址(可选)",
placeholder: "http://127.0.0.1:7890",
help: "仅用于 91Spider 的列表/详情请求和视频、封面下载;留空则使用服务器环境变量 HTTP_PROXY / HTTPS_PROXY 或直连。支持 http://、https://、socks5:// 或 socks5h://。",
},
];
}
}
@@ -1201,6 +1212,10 @@ function defaultRootId(kind: Kind): string {
return "0";
}
function usesRootDirectoryID(kind: Kind): boolean {
return kind !== "localstorage" && kind !== "spider91";
}
function rootIdPlaceholder(kind: Kind): string {
const rootId = defaultRootId(kind);
return rootId ? `默认:${rootId}` : "留空表示根目录";
+2
View File
@@ -93,6 +93,8 @@ export type AdminDrive = {
skipDirIds: string[];
// spider91 上次成功爬取时间(unix 秒);其它 kind 留空。
lastCrawlAt?: number;
// spider91 专用代理地址;仅后台管理接口返回,用于编辑表单回显。
spider91Proxy?: string;
thumbnailGenerationStatus?: DriveGenerationStatus;
previewGenerationStatus?: DriveGenerationStatus;
fingerprintGenerationStatus?: DriveGenerationStatus;
+11 -1
View File
@@ -8,6 +8,9 @@ const drivesPageSource = readFileSync(
);
test("spider91 drive form does not expose advanced crawler credentials", () => {
assert.match(drivesPageSource, /key: "proxy"/);
assert.match(drivesPageSource, /label: "代理地址(可选)"/);
assert.match(drivesPageSource, /支持 http:\/\/、https:\/\/、socks5:\/\/ 或 socks5h:\/\//);
assert.doesNotMatch(drivesPageSource, /target_new/);
assert.doesNotMatch(drivesPageSource, /crawl_hour/);
assert.doesNotMatch(drivesPageSource, /python_path/);
@@ -24,8 +27,14 @@ test("spider91 upload target uses explicit local-save option instead of auto tar
assert.doesNotMatch(drivesPageSource, /自动模式/);
});
test("drive form shows a root directory id field for all drive kinds", () => {
test("drive form hides root directory id for localstorage and spider91", () => {
assert.match(drivesPageSource, /<label>根目录 ID<\/label>/);
assert.match(
drivesPageSource,
/function usesRootDirectoryID\(kind: Kind\): boolean \{\s*return kind !== "localstorage" && kind !== "spider91";\s*\}/
);
assert.match(drivesPageSource, /\{usesRootDirectoryID\(form\.kind\) && \(/);
assert.match(drivesPageSource, /\{usesRootDirectoryID\(d\.kind\) && \(/);
assert.match(drivesPageSource, /placeholder=\{rootIdPlaceholder\(form\.kind\)\}/);
assert.doesNotMatch(drivesPageSource, /扫描起点目录 ID/);
assert.doesNotMatch(drivesPageSource, /set\("scanRootId"/);
@@ -94,6 +103,7 @@ test("localstorage drive form asks for a server directory path", () => {
assert.match(fields, /key: "path"/);
assert.match(fields, /label: "本地目录路径"/);
assert.match(drivesPageSource, /if \(kind === "localstorage"\) return "\/"/);
assert.match(drivesPageSource, /kind !== "localstorage" && kind !== "spider91"/);
});
test("drive type selector keeps primary source order", () => {