mirror of
https://github.com/nianzhibai/91.git
synced 2026-06-24 20:52:40 +08:00
Compare commits
147 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2adaac3d7d | |||
| ee8af315b0 | |||
| 6884473dbf | |||
| f0458f7043 | |||
| e32da9016b | |||
| 2427f58165 | |||
| 00aaeed736 | |||
| 5efbceb205 | |||
| 0faeaf408f | |||
| 1b5eda92b0 | |||
| 840a858dbd | |||
| 1ee5ee35be | |||
| 12b737b6fe | |||
| bd33d26a1f | |||
| 36fe32cb84 | |||
| 194d98895a | |||
| 2437fbd779 | |||
| 4dd66b8120 | |||
| 30b736cf36 | |||
| 57391e0e98 | |||
| 052e142520 | |||
| f9351324c6 | |||
| bb83277d62 | |||
| aa856db1f6 | |||
| 7e5e67697e | |||
| 9cc8e02bec | |||
| 139e63eef2 | |||
| b8388eba59 | |||
| 76782f3801 | |||
| 1ae1408fb6 | |||
| 738406162a | |||
| 0f111b846d | |||
| 4dd9015bd7 | |||
| 84fbb6f51c | |||
| 992b20da93 | |||
| 1770693666 | |||
| 177041633a | |||
| ae324d3752 | |||
| 7f1e4eaa29 | |||
| 811d87cc27 | |||
| e4408f5655 | |||
| e93c906921 | |||
| 96e423b952 | |||
| a8ccc19e9e | |||
| 7ddf33d726 | |||
| c1355385e1 | |||
| ec5a01b6aa | |||
| 71d4a16db1 | |||
| 940e5dd76d | |||
| e826c05d5c | |||
| 3465b9e837 | |||
| d33c1b1b20 | |||
| 5fc8e9ebb7 | |||
| dc7d2a5de3 | |||
| 2f2bfbfcdc | |||
| 9def08b0c5 | |||
| c87208117e | |||
| a770b3af6b | |||
| e1b8f0eae7 | |||
| 2d907da07d | |||
| 78cfb0a9e5 | |||
| fa7823ef3e | |||
| 5b0afcfc6c | |||
| 76ae3cea7d | |||
| abe335cea0 | |||
| 8dff0f07b9 | |||
| 5080203b7c | |||
| df6f0ebbbf | |||
| 8f0d52aec4 | |||
| 53327c9b8e | |||
| 57ed546b83 | |||
| 869c0d5f78 | |||
| 397823bb8d | |||
| 9e1acd4e56 | |||
| 2cd365acd4 | |||
| 48808ec568 | |||
| 5dc00e486d | |||
| 4ec1097496 | |||
| 95e46d8fbb | |||
| fdfc4771df | |||
| c8c6812ae6 | |||
| b938ff1221 | |||
| 7d63a6d265 | |||
| a8de7d2f6b | |||
| d4fcff896e | |||
| cada336e96 | |||
| 5bb93bd95b | |||
| b6be7d021c | |||
| e36a17f99d | |||
| e01b7cc3b7 | |||
| c78f22aedb | |||
| cf9de5b40a | |||
| be19f81e82 | |||
| 4d679ef64f | |||
| 4ba964b7e2 | |||
| cd3b3c6976 | |||
| 91c03947d1 | |||
| 7f1c1a51a3 | |||
| 077c2e2c38 | |||
| 30a62f265a | |||
| 38e62c6a2f | |||
| 6345cf74e0 | |||
| f004b14d20 | |||
| a407312dfa | |||
| a165605b0f | |||
| 0ac1a5b13e | |||
| a83449b129 | |||
| c68891e6f0 | |||
| 9892599412 | |||
| 0cb2a7a1c2 | |||
| 87d197496b | |||
| 0e3a5bd5cd | |||
| d72bfee10f | |||
| 389dd981a8 | |||
| 44d622d49c | |||
| d7ff0c98af | |||
| 66adf444ba | |||
| 8f8037b838 | |||
| 215d9596fd | |||
| e57058db79 | |||
| 6ec61833f2 | |||
| 6e87f88d53 | |||
| e78fa9d978 | |||
| afbff9eb55 | |||
| 039ec2a988 | |||
| da0683344e | |||
| 1a1282382e | |||
| 34b6fa8ea9 | |||
| 08e38bc4ca | |||
| c93d193efe | |||
| 08568c3951 | |||
| 7e394e2971 | |||
| d16e3168f9 | |||
| 81f348b246 | |||
| 1e71c1fb72 | |||
| d5122d289e | |||
| c146ad50ed | |||
| f5c20f9594 | |||
| 62e69d4c06 | |||
| 51725ba82f | |||
| c06db836dd | |||
| b8717da4fd | |||
| 2d57545e87 | |||
| 6518d772c0 | |||
| f2c0e7f854 | |||
| 3c7219ecd6 | |||
| 94669fd35e |
+17
-3
@@ -23,13 +23,27 @@ tools/
|
||||
|
||||
# 编译产物
|
||||
backend/server
|
||||
backend/server.*
|
||||
release/
|
||||
tsconfig.tsbuildinfo
|
||||
tmp/
|
||||
|
||||
# 91 爬虫脚本独立运行时的默认输出文件(backend 跑时会显式 --output 到 backend/data/spider91/,所以不会落在这里)
|
||||
91porn_videos.json
|
||||
91VideoSpider/91porn_videos.json
|
||||
91VideoSpider/data/
|
||||
91VideoSpider/__pycache__/
|
||||
__pycache__/
|
||||
*.pyc
|
||||
|
||||
# Local scratch images
|
||||
/*.png
|
||||
/*.jpg
|
||||
/*.jpeg
|
||||
/*.gif
|
||||
/*.webp
|
||||
/*.bmp
|
||||
/*.ico
|
||||
/image.jpg
|
||||
/image003.jpg
|
||||
/image004.jpg
|
||||
/image005.png
|
||||
/image006.png
|
||||
/image02.png
|
||||
|
||||
@@ -1,859 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
91porn 视频爬虫脚本
|
||||
===================
|
||||
爬取 https://www.91porn.com/v.php?category=top&viewtype=basic 下的所有视频信息:
|
||||
- 视频名称
|
||||
- 封面图直链
|
||||
- 视频直链 (MP4)
|
||||
|
||||
依赖安装:
|
||||
pip install requests beautifulsoup4 lxml
|
||||
|
||||
使用方法:
|
||||
# 全量爬取(默认行为,从 page=1 一直爬到末尾,写到 OUTPUT_FILE)
|
||||
python spider_91porn.py
|
||||
|
||||
# 只爬指定页(单页模式,手动调试用)
|
||||
python spider_91porn.py --page 1 --output /tmp/spider91_page1.json
|
||||
|
||||
# 凑够 N 个新视频模式(backend 凌晨任务用)
|
||||
python spider_91porn.py --target-new 15 --seen-viewkeys-file /tmp/seen.txt --output /tmp/new.json
|
||||
|
||||
CLI 参数:
|
||||
--page N 只爬第 N 页,配合 --output 用于手动调试
|
||||
--target-new N 从 page 1 起翻页直到凑够 N 个新视频(不在 seen 列表里的)
|
||||
--seen-viewkeys-file FILE 每行一个已知 viewkey 或 mp4 源 ID,命中即跳过;与 --target-new 配合使用
|
||||
--output FILE 输出 JSON 路径,覆盖默认的 OUTPUT_FILE
|
||||
--no-resume 禁用断点续爬(单页/target-new 模式下自动禁用)
|
||||
--quiet 压缩日志,每条视频只输出一行
|
||||
-h / --help 帮助
|
||||
|
||||
配置说明 (编辑脚本内 "配置区域"):
|
||||
- MIN_PAGE_DELAY / MAX_PAGE_DELAY : 列表页请求间隔 (默认 3-6 秒)
|
||||
- MIN_DETAIL_DELAY / MAX_DETAIL_DELAY : 详情页请求间隔 (默认 2-5 秒)
|
||||
- MAX_PAGES : 限制最大爬取页数 (None=不限, 如 5=只爬前5页)
|
||||
- OUTPUT_FILE : 输出文件名
|
||||
|
||||
输出格式 (JSON):
|
||||
{
|
||||
"videos": [
|
||||
{
|
||||
"title": "视频标题",
|
||||
"thumb_url": "https://...thumb/xxxx.jpg",
|
||||
"video_url": "https://...mp43/xxxx.mp4?st=...",
|
||||
"viewkey": "abc123...",
|
||||
"source_id": "xxxx",
|
||||
"detail_url": "https://...view_video.php?viewkey=..."
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
|
||||
注意:
|
||||
1. 视频直链包含时效性token (e参数为过期时间戳),会过期,需定期重新爬取
|
||||
2. 脚本已内置随机延时,请勿移除,避免对服务器造成压力
|
||||
3. 网站有Cloudflare保护,如遇到403/5xx错误,可能需要使用带cookie的session
|
||||
4. 本脚本仅供学习交流,请遵守当地法律法规
|
||||
|
||||
作者: OpenCode
|
||||
日期: 2026-05-22
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import requests
|
||||
import re
|
||||
import time
|
||||
import random
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import sys
|
||||
import html
|
||||
from urllib.parse import urljoin, unquote, urlparse
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
print("错误: 缺少依赖库 beautifulsoup4")
|
||||
print("请运行: pip install beautifulsoup4 lxml")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def prefer_ipv4_for_plain_socks5_proxy():
|
||||
"""PySocks may pick IPv6 first for socks5://; some SOCKS5 servers only accept IPv4."""
|
||||
proxy_envs = (
|
||||
os.environ.get("HTTPS_PROXY", ""),
|
||||
os.environ.get("HTTP_PROXY", ""),
|
||||
os.environ.get("https_proxy", ""),
|
||||
os.environ.get("http_proxy", ""),
|
||||
)
|
||||
uses_plain_socks5 = any(v.strip().lower().startswith("socks5://") for v in proxy_envs)
|
||||
if not uses_plain_socks5 or getattr(socket, "_spider91_ipv4_first", False):
|
||||
return
|
||||
|
||||
original_getaddrinfo = socket.getaddrinfo
|
||||
|
||||
def getaddrinfo_ipv4_first(*args, **kwargs):
|
||||
infos = original_getaddrinfo(*args, **kwargs)
|
||||
return sorted(infos, key=lambda info: 0 if info[0] == socket.AF_INET else 1)
|
||||
|
||||
socket.getaddrinfo = getaddrinfo_ipv4_first
|
||||
socket._spider91_ipv4_first = True
|
||||
|
||||
# ===================== 配置区域 =====================
|
||||
BASE_URL = "https://www.91porn.com/v.php"
|
||||
LIST_PARAMS = {
|
||||
"category": "top",
|
||||
"viewtype": "basic"
|
||||
}
|
||||
|
||||
# 请求头 (模拟真实浏览器)
|
||||
HEADERS = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/125.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": (
|
||||
"text/html,application/xhtml+xml,application/xml;"
|
||||
"q=0.9,image/avif,image/webp,image/apng,*/*;"
|
||||
"q=0.8,application/signed-exchange;v=b3;q=0.7"
|
||||
),
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
# 注意: 不要包含 "br" (brotli),除非安装了 brotli 库
|
||||
# "Accept-Encoding": "gzip, deflate, br",
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "none",
|
||||
"Sec-Fetch-User": "?1",
|
||||
}
|
||||
|
||||
# 延时配置 (秒) - 控制爬取频率,避免被封
|
||||
MIN_PAGE_DELAY = 3.0 # 列表页之间最小延时
|
||||
MAX_PAGE_DELAY = 6.0 # 列表页之间最大延时
|
||||
MIN_DETAIL_DELAY = 2.0 # 详情页之间最小延时
|
||||
MAX_DETAIL_DELAY = 5.0 # 详情页之间最大延时
|
||||
|
||||
# 重试配置
|
||||
MAX_RETRIES = 3
|
||||
RETRY_DELAY = 5.0
|
||||
|
||||
# 输出配置
|
||||
OUTPUT_FILE = "91porn_videos.json"
|
||||
MAX_PAGES = None # 设置为 None 爬取所有页,或设置整数如 5 只爬前5页
|
||||
RESUME = True # 是否跳过输出文件中已存在的 viewkey (断点续爬)
|
||||
MAX_EMPTY_PAGES = 2 # 连续空页数达到此值时停止爬取
|
||||
# ===================================================
|
||||
|
||||
|
||||
class Porn91Spider:
|
||||
def __init__(
|
||||
self,
|
||||
output_file: str = None,
|
||||
start_page: int = 1,
|
||||
max_pages: int = None,
|
||||
resume: bool = None,
|
||||
max_empty_pages: int = None,
|
||||
quiet: bool = False,
|
||||
target_new: int = None,
|
||||
seen_viewkeys: list = None,
|
||||
stream_output: bool = False,
|
||||
):
|
||||
"""
|
||||
构造函数。所有参数都有默认值,等同于使用脚本顶部的全局配置。
|
||||
backend 调用时会传 output_file/seen_viewkeys/target_new,等价于:
|
||||
"从第 1 页开始爬,跳过 seen_viewkeys 里的视频,凑够 target_new 个新视频后停止"
|
||||
|
||||
stream_output=True 时(backend 流水线用):
|
||||
- 每凑齐一个 video 直链就把该 entry 作为一行 JSON 写到 stdout 并 flush,
|
||||
便于上层(Go crawler)边读边下载,不再等所有详情页处理完。
|
||||
- 所有日志改走 stderr,避免与 stdout JSONL 流混合。
|
||||
- --output 仍生效,作为离线归档用(脚本退出时一次性写完整 JSON)。
|
||||
"""
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(HEADERS)
|
||||
# 91porn 没有固定 mode cookie 时,详情页首次请求可能返回与列表卡片
|
||||
# 不一致的视频源;固定桌面模式让列表页和详情页解析保持一致。
|
||||
self.session.cookies.set("mode", "d")
|
||||
|
||||
# 解析后的实际配置;优先使用构造参数,回退到模块级配置
|
||||
self.output_file = output_file if output_file is not None else OUTPUT_FILE
|
||||
self.start_page = max(1, int(start_page or 1))
|
||||
# max_pages=None 表示不限制;max_pages=N 表示从 start_page 起爬 N 页
|
||||
self.max_pages = max_pages if max_pages is None or max_pages > 0 else None
|
||||
# resume 默认跟模块配置;单页模式下调用方应该显式传 False
|
||||
self.resume = RESUME if resume is None else bool(resume)
|
||||
self.max_empty_pages = (
|
||||
MAX_EMPTY_PAGES if max_empty_pages is None else int(max_empty_pages)
|
||||
)
|
||||
# target_new 是 backend 触发时的核心模式:累计处理这么多新源视频后退出。
|
||||
self.target_new = target_new if target_new and target_new > 0 else None
|
||||
self.quiet = bool(quiet)
|
||||
# stream_output:每解析出一个 video 直链立即输出一行 JSON 到 stdout
|
||||
# (配合 backend Go 端 bufio.Scanner 实时消费,下载一个就开始下一个)。
|
||||
# 开启后所有 log 都走 stderr。
|
||||
self.stream_output = bool(stream_output)
|
||||
|
||||
# 添加重试适配器
|
||||
try:
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
retry_strategy = Retry(
|
||||
total=MAX_RETRIES,
|
||||
backoff_factor=1,
|
||||
status_forcelist=[429, 500, 502, 503, 504],
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry_strategy)
|
||||
self.session.mount("https://", adapter)
|
||||
self.session.mount("http://", adapter)
|
||||
except ImportError:
|
||||
pass # urllib3 版本可能较低
|
||||
|
||||
self.results = []
|
||||
self.pages_crawled = 0
|
||||
self.processed_videos = 0
|
||||
self.skipped_videos = 0
|
||||
self.failed_videos = 0
|
||||
self.skip_viewkeys = set()
|
||||
|
||||
# backend 通过 --seen-viewkeys-file 传进来一批已入库的历史 ID。
|
||||
# 兼容旧名:文件里可能是 viewkey,也可能是新逻辑使用的 mp4 源 ID。
|
||||
if seen_viewkeys:
|
||||
for vk in seen_viewkeys:
|
||||
if not vk:
|
||||
continue
|
||||
vk = vk.strip()
|
||||
if vk:
|
||||
self.skip_viewkeys.add(vk)
|
||||
|
||||
# 断点续爬:加载已有结果,跳过已处理的 viewkey
|
||||
if self.resume and os.path.exists(self.output_file):
|
||||
try:
|
||||
with open(self.output_file, 'r', encoding='utf-8') as f:
|
||||
existing_data = json.load(f)
|
||||
existing_videos = existing_data.get('videos', [])
|
||||
self.results = existing_videos
|
||||
for v in existing_videos:
|
||||
vk = v.get('viewkey', '')
|
||||
if vk:
|
||||
self.skip_viewkeys.add(vk)
|
||||
self.processed_videos = existing_data.get('successful', 0)
|
||||
self.failed_videos = existing_data.get('failed', 0)
|
||||
self.log(f"加载已有数据: {len(self.results)} 个视频, 将跳过已处理项")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def log(self, message: str):
|
||||
"""带时间戳的日志输出。stream_output 模式下走 stderr,避免污染 stdout JSONL。"""
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
line = f"[{timestamp}] {message}"
|
||||
if self.stream_output:
|
||||
print(line, file=sys.stderr, flush=True)
|
||||
else:
|
||||
print(line)
|
||||
|
||||
def emit_stream_video(self, video: dict):
|
||||
"""stream_output 模式下把单条 video entry 作为一行 JSON 写到 stdout 并立即刷盘。
|
||||
Go 端 bufio.Scanner 按行读取,每收到一行就立即下载视频和封面。"""
|
||||
if not self.stream_output:
|
||||
return
|
||||
try:
|
||||
print(json.dumps(video, ensure_ascii=False), flush=True)
|
||||
except Exception as e:
|
||||
# stdout 异常基本只在管道断开时发生(消费方进程死了);
|
||||
# 写到 stderr 让 backend 看到,然后让 crawl 循环自己 break。
|
||||
print(f"[stream] emit failed: {e}", file=sys.stderr, flush=True)
|
||||
|
||||
def random_sleep(self, min_sec: float, max_sec: float):
|
||||
"""随机延时,模拟人类行为"""
|
||||
delay = random.uniform(min_sec, max_sec)
|
||||
if not self.quiet:
|
||||
self.log(f" 随机延时 {delay:.2f} 秒...")
|
||||
time.sleep(delay)
|
||||
|
||||
def fetch_page(self, url: str, description: str = "", referer: str = "") -> str:
|
||||
"""
|
||||
获取页面HTML内容,带错误处理和重试
|
||||
"""
|
||||
headers_extra = {}
|
||||
if referer:
|
||||
headers_extra["Referer"] = referer
|
||||
|
||||
for attempt in range(1, MAX_RETRIES + 1):
|
||||
try:
|
||||
self.log(f"正在请求: {description or url} (尝试 {attempt}/{MAX_RETRIES})")
|
||||
response = self.session.get(url, timeout=30, headers=headers_extra)
|
||||
|
||||
# 检查是否被Cloudflare拦截 (需在 raise_for_status 之前)
|
||||
if response.status_code == 403:
|
||||
self.log("警告: 收到 403 Forbidden,可能被拦截")
|
||||
if attempt < MAX_RETRIES:
|
||||
self.random_sleep(RETRY_DELAY, RETRY_DELAY + 3)
|
||||
continue
|
||||
return ""
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
# 优先使用 content.decode('utf-8'),避免 requests 编码检测问题
|
||||
try:
|
||||
html_content = response.content.decode('utf-8', errors='replace')
|
||||
except Exception:
|
||||
html_content = response.text
|
||||
|
||||
# Cloudflare 挑战检测:如果页面主要内容只有挑战页面,而非正常内容
|
||||
# 注意:网站本身会加载 challenge-platform 脚本,所以不能仅凭此判断
|
||||
is_cf_challenge = (
|
||||
"Just a moment" in html_content and
|
||||
len(html_content) < 8000
|
||||
)
|
||||
if is_cf_challenge:
|
||||
self.log("警告: 页面被Cloudflare挑战拦截,需要浏览器环境或正确cookie")
|
||||
if attempt < MAX_RETRIES:
|
||||
self.random_sleep(RETRY_DELAY, RETRY_DELAY + 5)
|
||||
continue
|
||||
return ""
|
||||
|
||||
return html_content
|
||||
except requests.exceptions.HTTPError as e:
|
||||
self.log(f"HTTP错误: {e}")
|
||||
if attempt < MAX_RETRIES:
|
||||
self.random_sleep(RETRY_DELAY, RETRY_DELAY + 3)
|
||||
else:
|
||||
return ""
|
||||
except requests.exceptions.RequestException as e:
|
||||
self.log(f"请求失败: {e}")
|
||||
if attempt < MAX_RETRIES:
|
||||
self.random_sleep(RETRY_DELAY, RETRY_DELAY + 3)
|
||||
else:
|
||||
self.log(f"达到最大重试次数,放弃: {url}")
|
||||
return ""
|
||||
return ""
|
||||
|
||||
def parse_list_page(self, html: str) -> list:
|
||||
"""
|
||||
解析列表页,提取视频基本信息
|
||||
返回: [{title, detail_url, thumb_url, viewkey}, ...]
|
||||
"""
|
||||
videos = []
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
|
||||
# 只解析正常视频卡片。页面中还混有 col-lg-8 的异常大卡片,里面的标题、
|
||||
# thumb、detail URL 会串到其它视频,不能作为入库来源。
|
||||
video_cards = soup.select('div.col-xs-12.col-sm-4.col-md-3.col-lg-3')
|
||||
|
||||
seen_cards = set()
|
||||
|
||||
for card in video_cards:
|
||||
link = card.find('a', href=re.compile(r'view_video\.php\?viewkey='))
|
||||
if not link:
|
||||
continue
|
||||
href = link.get('href', '')
|
||||
if not href:
|
||||
continue
|
||||
|
||||
# 提取 viewkey
|
||||
match = re.search(r'viewkey=([^&]+)', href)
|
||||
if not match:
|
||||
continue
|
||||
viewkey = match.group(1)
|
||||
|
||||
detail_url = urljoin(BASE_URL, href)
|
||||
|
||||
# 提取标题
|
||||
title = self._extract_title(link)
|
||||
|
||||
# 提取列表卡片来源 ID 和封面图 URL
|
||||
thumb_url = ""
|
||||
source_id = ""
|
||||
overlay = link.find(id=re.compile(r'^playvthumb_\d+$'))
|
||||
if overlay:
|
||||
source_id = overlay.get('id', '').rsplit('_', 1)[-1]
|
||||
img = link.find('img', class_=re.compile(r'img-responsive'))
|
||||
if img:
|
||||
thumb_url = img.get('src', '') or img.get('data-original', '')
|
||||
if thumb_url:
|
||||
thumb_url = urljoin(BASE_URL, thumb_url)
|
||||
if not source_id and thumb_url:
|
||||
source_id = self._extract_thumb_source_id(thumb_url)
|
||||
|
||||
card_key = source_id or detail_url
|
||||
if card_key in seen_cards:
|
||||
continue
|
||||
seen_cards.add(card_key)
|
||||
|
||||
videos.append({
|
||||
"title": title,
|
||||
"detail_url": detail_url,
|
||||
"thumb_url": thumb_url,
|
||||
"viewkey": viewkey,
|
||||
"source_id": source_id
|
||||
})
|
||||
|
||||
return videos
|
||||
|
||||
def _extract_title(self, link) -> str:
|
||||
"""
|
||||
从视频链接标签中提取并清理标题
|
||||
"""
|
||||
# 优先从 span.video-title 获取 (已渲染的干净标题)
|
||||
title_el = link.find('span', class_=re.compile(r'video-title'))
|
||||
if title_el:
|
||||
title = title_el.get_text(strip=True)
|
||||
if title:
|
||||
return html.unescape(title)
|
||||
|
||||
# 备用: 从 link 的 title 属性提取
|
||||
title = link.get('title', '').strip()
|
||||
if title:
|
||||
return html.unescape(title)
|
||||
|
||||
# 最后手段: 从链接文本提取并清理前缀
|
||||
text = link.get_text(separator=' ', strip=True)
|
||||
# 去掉前缀: "HD" / "91" / 时间戳 "HH:MM:SS"
|
||||
text = re.sub(r'^(HD\s+|91\s+)?\d{2}:\d{2}:\d{2}\s*', '', text)
|
||||
text = re.sub(r'\s+', ' ', text).strip()
|
||||
return html.unescape(text)[:120]
|
||||
|
||||
def parse_detail_page(self, html: str) -> dict:
|
||||
"""
|
||||
解析详情页,提取视频直链
|
||||
返回: {"video_url": "...", "source_id": "...", "title": "..."} 或空字典
|
||||
"""
|
||||
result = {}
|
||||
|
||||
if not html:
|
||||
return result
|
||||
|
||||
title = self._extract_detail_title(html)
|
||||
if title:
|
||||
result["title"] = title
|
||||
|
||||
# 方法1: 解码 strencode2 (主要方式, 页面通过 document.write 动态写入 video 标签)
|
||||
# 格式: document.write(strencode2("%3c%73%6f..."));
|
||||
strencode_match = re.search(r'strencode2\(["\']([^"\']+)["\']\)', html)
|
||||
if strencode_match:
|
||||
encoded = strencode_match.group(1)
|
||||
try:
|
||||
# strencode2 在JS中等价于 unescape / decodeURIComponent
|
||||
decoded = unquote(encoded)
|
||||
|
||||
# 从解码后的 HTML 片段中提取 src
|
||||
src_match = re.search(r"src=['\"]([^'\"]+)['\"]", decoded)
|
||||
if src_match:
|
||||
video_url = src_match.group(1)
|
||||
# 规范化双斜杠 (如 https://host//path -> https://host/path)
|
||||
video_url = re.sub(r'(https?://[^/]+)//+', r'\1/', video_url)
|
||||
result["video_url"] = video_url
|
||||
result["source_id"] = self._extract_source_id(video_url)
|
||||
return result
|
||||
except Exception as e:
|
||||
self.log(f" 解码 strencode2 失败: {e}")
|
||||
|
||||
# 方法2: 通用正则匹配页面中的 mp4 链接 (备用, 过滤广告)
|
||||
mp4_match = re.search(
|
||||
r'https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*',
|
||||
html
|
||||
)
|
||||
if mp4_match:
|
||||
url = mp4_match.group(0)
|
||||
if 'kwai' not in url and 'ad-' not in url.lower():
|
||||
result["video_url"] = url
|
||||
result["source_id"] = self._extract_source_id(url)
|
||||
return result
|
||||
|
||||
return result
|
||||
|
||||
def _extract_detail_title(self, html_text: str) -> str:
|
||||
soup = BeautifulSoup(html_text, 'lxml')
|
||||
title_el = soup.find('title')
|
||||
if not title_el:
|
||||
return ""
|
||||
title = title_el.get_text(" ", strip=True)
|
||||
title = re.sub(r'\s*-\s*91porn.*$', '', title, flags=re.IGNORECASE).strip()
|
||||
return html.unescape(title)[:160]
|
||||
|
||||
def _extract_source_id(self, video_url: str) -> str:
|
||||
path = urlparse(video_url or "").path
|
||||
name = os.path.basename(path)
|
||||
stem, ext = os.path.splitext(name)
|
||||
if ext.lower() not in {".mp4", ".m4v", ".mov", ".webm", ".mkv", ".avi"}:
|
||||
return ""
|
||||
source_id = re.sub(r'[^0-9]+', '', stem)
|
||||
if not source_id or source_id != stem:
|
||||
return ""
|
||||
return source_id
|
||||
|
||||
def _extract_thumb_source_id(self, thumb_url: str) -> str:
|
||||
path = urlparse(thumb_url or "").path
|
||||
match = re.search(r'/thumb/(\d+)\.[A-Za-z0-9]+$', path)
|
||||
return match.group(1) if match else ""
|
||||
|
||||
def _thumb_url_for_source(self, thumb_url: str, source_id: str) -> str:
|
||||
if not thumb_url or not source_id:
|
||||
return thumb_url
|
||||
parsed = urlparse(thumb_url)
|
||||
match = re.search(r'/thumb/([^/?#]+)\.[A-Za-z0-9]+$', parsed.path)
|
||||
if not match:
|
||||
return thumb_url
|
||||
current = match.group(1)
|
||||
if current == source_id:
|
||||
return thumb_url
|
||||
path = re.sub(
|
||||
r'/thumb/[^/?#]+\.[A-Za-z0-9]+$',
|
||||
f'/thumb/{source_id}.jpg',
|
||||
parsed.path,
|
||||
)
|
||||
return parsed._replace(path=path, query="", fragment="").geturl()
|
||||
|
||||
def crawl(self):
|
||||
"""
|
||||
主爬取流程。停止条件(任一满足即停):
|
||||
- 达到 max_pages 配置
|
||||
- 连续 max_empty_pages 页都没有视频
|
||||
- target_new 模式下,已经累计处理 target_new 个新视频
|
||||
"""
|
||||
self.log("=" * 60)
|
||||
self.log("91porn 视频爬虫启动")
|
||||
self.log("=" * 60)
|
||||
self.log(f"配置: 列表页延时 {MIN_PAGE_DELAY}-{MAX_PAGE_DELAY}s, 详情页延时 {MIN_DETAIL_DELAY}-{MAX_DETAIL_DELAY}s")
|
||||
self.log(f"配置: 最大重试 {MAX_RETRIES} 次, 连续空页上限 {self.max_empty_pages}")
|
||||
self.log(f"配置: 起始页 {self.start_page}, 最大爬取页数 {self.max_pages if self.max_pages else '不限'}")
|
||||
if self.target_new:
|
||||
self.log(f"配置: 目标新增视频数 {self.target_new}")
|
||||
self.log(f"配置: 输出文件 {os.path.abspath(self.output_file)}")
|
||||
if self.skip_viewkeys:
|
||||
self.log(f"配置: 已跳过 {len(self.skip_viewkeys)} 个已知 viewkey")
|
||||
self.log("")
|
||||
|
||||
page_num = self.start_page
|
||||
consecutive_empty = 0
|
||||
crawled_in_session = 0
|
||||
|
||||
while True:
|
||||
if self.max_pages is not None and crawled_in_session >= self.max_pages:
|
||||
self.log(f"达到配置的页数上限 {self.max_pages},停止")
|
||||
break
|
||||
if consecutive_empty >= self.max_empty_pages:
|
||||
self.log(f"连续 {self.max_empty_pages} 页无结果,已达到末尾")
|
||||
break
|
||||
if self.target_new is not None and self.processed_videos >= self.target_new:
|
||||
self.log(f"已累计 {self.processed_videos} 个新视频,达到目标 {self.target_new},停止")
|
||||
break
|
||||
|
||||
if page_num == 1:
|
||||
page_url = f"{BASE_URL}?category=top&viewtype=basic"
|
||||
else:
|
||||
page_url = f"{BASE_URL}?category=top&viewtype=basic&page={page_num}"
|
||||
|
||||
if crawled_in_session > 0:
|
||||
self.log("")
|
||||
self.random_sleep(MIN_PAGE_DELAY, MAX_PAGE_DELAY)
|
||||
|
||||
self.log(f"[页 {page_num}] 请求: {page_url}")
|
||||
page_html = self.fetch_page(page_url, f"列表页 第{page_num}页")
|
||||
|
||||
if not page_html:
|
||||
self.log(f"[页 {page_num}] 获取失败,跳过")
|
||||
consecutive_empty += 1
|
||||
page_num += 1
|
||||
crawled_in_session += 1
|
||||
continue
|
||||
|
||||
page_videos = self.parse_list_page(page_html)
|
||||
|
||||
# 判断页面是否真的没有视频(而非全部已处理)
|
||||
if not page_videos:
|
||||
self.log(f"[页 {page_num}] 页面无视频,可能已到末尾")
|
||||
consecutive_empty += 1
|
||||
page_num += 1
|
||||
crawled_in_session += 1
|
||||
continue
|
||||
|
||||
consecutive_empty = 0
|
||||
|
||||
# 过滤已处理的 viewkey,只保留新视频
|
||||
new_videos = [v for v in page_videos if v['viewkey'] not in self.skip_viewkeys]
|
||||
skipped_on_page = len(page_videos) - len(new_videos)
|
||||
|
||||
if skipped_on_page > 0:
|
||||
self.log(f"[页 {page_num}] 发现 {len(page_videos)} 个链接, 其中 {skipped_on_page} 个已处理, {len(new_videos)} 个新视频")
|
||||
else:
|
||||
self.log(f"[页 {page_num}] 发现 {len(new_videos)} 个视频")
|
||||
|
||||
if new_videos:
|
||||
self._process_video_list(new_videos, referer=page_url)
|
||||
self.pages_crawled += 1
|
||||
page_num += 1
|
||||
crawled_in_session += 1
|
||||
|
||||
self._save_results()
|
||||
self._print_summary()
|
||||
|
||||
def _process_video_list(self, videos: list, referer: str = ""):
|
||||
"""
|
||||
处理一批视频列表,逐个获取详情页
|
||||
"""
|
||||
for idx, video in enumerate(videos, 1):
|
||||
# target_new 模式下,凑够后立即停止,不再请求详情页
|
||||
if self.target_new is not None and self.processed_videos >= self.target_new:
|
||||
return
|
||||
# 跳过已处理的 viewkey (断点续爬)
|
||||
if video['viewkey'] in self.skip_viewkeys:
|
||||
self.log(f" [SKIP] 已处理过: {video['viewkey']}")
|
||||
self.skipped_videos += 1
|
||||
continue
|
||||
|
||||
self.log(f" 处理视频 {idx}/{len(videos)}: {video['title'][:40]}...")
|
||||
|
||||
# 延时控制 (同一批次内第一个视频不延时)
|
||||
if idx > 1:
|
||||
self.random_sleep(MIN_DETAIL_DELAY, MAX_DETAIL_DELAY)
|
||||
|
||||
# 获取详情页
|
||||
detail_html = self.fetch_page(video['detail_url'], f"详情页 viewkey={video['viewkey']}", referer=referer)
|
||||
|
||||
if not detail_html:
|
||||
self.log(f" [FAIL] 详情页获取失败: {video['viewkey']}")
|
||||
video["video_url"] = ""
|
||||
self.results.append(video)
|
||||
self.skip_viewkeys.add(video['viewkey'])
|
||||
self.failed_videos += 1
|
||||
continue
|
||||
|
||||
# 解析视频直链
|
||||
detail_info = self.parse_detail_page(detail_html)
|
||||
|
||||
if detail_info.get("video_url"):
|
||||
video["video_url"] = detail_info["video_url"]
|
||||
if detail_info.get("title"):
|
||||
video["title"] = detail_info["title"]
|
||||
list_source_id = video.get("source_id", "")
|
||||
detail_source_id = detail_info.get("source_id", "")
|
||||
if list_source_id and detail_source_id and list_source_id != detail_source_id:
|
||||
self.log(
|
||||
f" [FAIL] 详情页视频源不匹配: list_source_id={list_source_id} "
|
||||
f"detail_source_id={detail_source_id} viewkey={video['viewkey']}"
|
||||
)
|
||||
self.failed_videos += 1
|
||||
self.skip_viewkeys.add(video['viewkey'])
|
||||
continue
|
||||
if not list_source_id and detail_source_id:
|
||||
video["source_id"] = detail_source_id
|
||||
if video.get("source_id"):
|
||||
video["thumb_url"] = self._thumb_url_for_source(
|
||||
video.get("thumb_url", ""),
|
||||
video["source_id"],
|
||||
)
|
||||
if video["source_id"] in self.skip_viewkeys:
|
||||
self.log(f" [SKIP] 已处理过 source_id: {video['source_id']}")
|
||||
self.skipped_videos += 1
|
||||
continue
|
||||
self.results.append(video)
|
||||
self.skip_viewkeys.add(video['viewkey'])
|
||||
if video.get("source_id"):
|
||||
self.skip_viewkeys.add(video["source_id"])
|
||||
self.processed_videos += 1
|
||||
self.log(f" [OK] 成功提取视频直链")
|
||||
# 流式:立刻把这条 entry 交给 Go 端开始下载,不等本批余下视频
|
||||
self.emit_stream_video(video)
|
||||
else:
|
||||
self.log(f" [FAIL] 未找到视频直链: {video['viewkey']}")
|
||||
video["video_url"] = ""
|
||||
self.results.append(video)
|
||||
self.skip_viewkeys.add(video['viewkey'])
|
||||
self.failed_videos += 1
|
||||
|
||||
def _save_results(self):
|
||||
"""
|
||||
保存结果到JSON文件
|
||||
"""
|
||||
output_data = {
|
||||
"crawl_time": datetime.now().isoformat(),
|
||||
"source_url": BASE_URL,
|
||||
"pages_crawled": self.pages_crawled,
|
||||
"total_videos": len(self.results),
|
||||
"successful": self.processed_videos,
|
||||
"skipped": self.skipped_videos,
|
||||
"failed": self.failed_videos,
|
||||
"videos": self.results
|
||||
}
|
||||
|
||||
try:
|
||||
# 保证父目录存在;写入临时文件后原子 rename,避免读到半截 JSON
|
||||
out_path = self.output_file
|
||||
parent = os.path.dirname(os.path.abspath(out_path))
|
||||
if parent:
|
||||
os.makedirs(parent, exist_ok=True)
|
||||
tmp_path = out_path + ".part"
|
||||
with open(tmp_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(output_data, f, ensure_ascii=False, indent=2)
|
||||
os.replace(tmp_path, out_path)
|
||||
self.log(f"结果已保存到: {os.path.abspath(out_path)}")
|
||||
except Exception as e:
|
||||
self.log(f"保存文件失败: {e}")
|
||||
# 尝试输出到控制台作为备份
|
||||
print("\n--- 备份输出 ---")
|
||||
print(json.dumps(output_data, ensure_ascii=False, indent=2))
|
||||
|
||||
def _print_summary(self):
|
||||
"""
|
||||
打印爬取摘要
|
||||
"""
|
||||
self.log("")
|
||||
self.log("=" * 60)
|
||||
self.log("爬取完成!")
|
||||
self.log("=" * 60)
|
||||
self.log(f"爬取页数: {self.pages_crawled}")
|
||||
self.log(f"总视频数: {len(self.results)}")
|
||||
self.log(f"成功提取直链: {self.processed_videos}")
|
||||
self.log(f"跳过(已处理): {self.skipped_videos}")
|
||||
self.log(f"失败/缺失直链: {self.failed_videos}")
|
||||
self.log(f"输出文件: {os.path.abspath(self.output_file)}")
|
||||
self.log("=" * 60)
|
||||
|
||||
|
||||
def print_help():
|
||||
print("""
|
||||
================================================
|
||||
91porn 视频爬虫 v1.0
|
||||
================================================
|
||||
|
||||
本脚本将爬取 91porn "本月最热" 分类下的所有视频信息:
|
||||
- 视频名称
|
||||
- 封面图直链
|
||||
- 视频直链 (MP4)
|
||||
|
||||
依赖安装:
|
||||
pip install requests beautifulsoup4 lxml
|
||||
|
||||
使用方法:
|
||||
python spider_91porn.py
|
||||
|
||||
配置说明 (编辑脚本内 "配置区域"):
|
||||
MIN_PAGE_DELAY / MAX_PAGE_DELAY : 列表页请求间隔 (默认 3-6 秒)
|
||||
MIN_DETAIL_DELAY / MAX_DETAIL_DELAY : 详情页请求间隔 (默认 2-5 秒)
|
||||
MAX_PAGES : 限制最大爬取页数 (None=不限, 如 5=只爬前5页)
|
||||
OUTPUT_FILE : 输出文件名 (默认 91porn_videos.json)
|
||||
|
||||
按 Ctrl+C 可随时中断并保存已爬取的数据
|
||||
|
||||
注意:
|
||||
1. 视频直链包含时效性token,会过期,需定期重新爬取
|
||||
2. 脚本已内置随机延时,请勿移除,避免对服务器造成压力
|
||||
3. 如遇到Cloudflare拦截,需要先通过浏览器获取Cookie
|
||||
4. 本脚本仅供学习交流,请遵守当地法律法规
|
||||
================================================
|
||||
""")
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) > 1 and sys.argv[1] in ('-h', '--help', 'help'):
|
||||
print_help()
|
||||
return
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="spider_91porn.py",
|
||||
description="91porn 视频元数据爬虫",
|
||||
add_help=False, # 让 -h/--help 走 print_help() 中文版本
|
||||
)
|
||||
parser.add_argument("--page", type=int, default=None,
|
||||
help="只爬指定页(单页模式,配合 --output 用于定时任务)")
|
||||
parser.add_argument("--output", type=str, default=None,
|
||||
help="输出 JSON 路径,覆盖默认 OUTPUT_FILE")
|
||||
parser.add_argument("--max-pages", type=int, default=None,
|
||||
help="单页模式下,从 --page 起最多再爬几页(默认 1)")
|
||||
parser.add_argument("--no-resume", action="store_true",
|
||||
help="禁用断点续爬(单页模式默认禁用)")
|
||||
parser.add_argument("--quiet", action="store_true",
|
||||
help="压缩日志,每条视频只输出关键事件")
|
||||
parser.add_argument("--target-new", type=int, default=None,
|
||||
help="目标新增模式:从 page 1 起翻页直到累计处理这么多新源视频后停止(backend 凌晨任务用)")
|
||||
parser.add_argument("--seen-viewkeys-file", type=str, default=None,
|
||||
help="文件路径,每行一个已处理过的 viewkey 或 mp4 源 ID;脚本会跳过这些视频")
|
||||
parser.add_argument("--stream-output", action="store_true",
|
||||
help="流式模式:每解析一条视频直链就立即把它作为一行 JSON 写到 stdout 并 flush;"
|
||||
"日志改走 stderr。配合 backend 边读边下载使用。")
|
||||
|
||||
args, _ = parser.parse_known_args()
|
||||
cli_out = sys.stderr if args.stream_output else sys.stdout
|
||||
prefer_ipv4_for_plain_socks5_proxy()
|
||||
|
||||
print("""
|
||||
================================================
|
||||
91porn 视频爬虫启动中...
|
||||
================================================
|
||||
按 Ctrl+C 可随时中断并保存进度
|
||||
""", file=cli_out)
|
||||
|
||||
# 加载已知 ID(来自 backend 的 catalog 已入库列表;兼容旧参数名)
|
||||
seen_viewkeys = []
|
||||
if args.seen_viewkeys_file:
|
||||
try:
|
||||
with open(args.seen_viewkeys_file, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
seen_viewkeys.append(line)
|
||||
except FileNotFoundError:
|
||||
print(f"警告: --seen-viewkeys-file 不存在: {args.seen_viewkeys_file}", file=cli_out)
|
||||
except Exception as e:
|
||||
print(f"警告: 读取 --seen-viewkeys-file 失败: {e}", file=cli_out)
|
||||
|
||||
# 决定运行模式
|
||||
if args.target_new is not None:
|
||||
# 凑够 N 个新视频模式:从 page 1 起翻页,直到累计 target_new 个新视频
|
||||
spider = Porn91Spider(
|
||||
output_file=args.output,
|
||||
start_page=1,
|
||||
max_pages=None,
|
||||
resume=False, # 凑够 N 模式靠 seen_viewkeys 去重,不读 OUTPUT_FILE
|
||||
quiet=args.quiet,
|
||||
target_new=args.target_new,
|
||||
seen_viewkeys=seen_viewkeys,
|
||||
stream_output=args.stream_output,
|
||||
)
|
||||
elif args.page is not None:
|
||||
# 单页模式(保留作手动调试用):start_page=N, max_pages=1
|
||||
start_page = max(1, args.page)
|
||||
max_pages = args.max_pages if args.max_pages and args.max_pages > 0 else 1
|
||||
spider = Porn91Spider(
|
||||
output_file=args.output,
|
||||
start_page=start_page,
|
||||
max_pages=max_pages,
|
||||
resume=False,
|
||||
quiet=args.quiet,
|
||||
seen_viewkeys=seen_viewkeys,
|
||||
stream_output=args.stream_output,
|
||||
)
|
||||
else:
|
||||
# 全量模式(向后兼容):从 page 1 起爬到末尾
|
||||
spider = Porn91Spider(
|
||||
output_file=args.output,
|
||||
resume=False if args.no_resume else None,
|
||||
quiet=args.quiet,
|
||||
seen_viewkeys=seen_viewkeys,
|
||||
stream_output=args.stream_output,
|
||||
)
|
||||
|
||||
try:
|
||||
spider.crawl()
|
||||
except KeyboardInterrupt:
|
||||
spider.log("\n用户中断,正在保存已爬取的数据...")
|
||||
spider._save_results()
|
||||
spider._print_summary()
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
spider.log(f"发生未预料的错误: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
spider._save_results()
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,135 +0,0 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
## Common Commands
|
||||
|
||||
### Full Local Development
|
||||
|
||||
```bash
|
||||
npm install
|
||||
./start.sh
|
||||
```
|
||||
|
||||
`./start.sh` starts the Go backend on `127.0.0.1:9192` and the frontend on `0.0.0.0:9191`. By default it builds the frontend and runs Vite preview mode. Use hot reload with:
|
||||
|
||||
```bash
|
||||
FRONTEND_MODE=dev ./start.sh --restart
|
||||
```
|
||||
|
||||
Useful variants:
|
||||
|
||||
```bash
|
||||
./start.sh --status
|
||||
./start.sh --restart
|
||||
./start.sh --stop
|
||||
```
|
||||
|
||||
### Frontend
|
||||
|
||||
Run from the repository root:
|
||||
|
||||
```bash
|
||||
npm run dev # Vite dev server; default port comes from Vite unless overridden
|
||||
npm run dev:raw # Vite dev server on 127.0.0.1:5173
|
||||
npm run build # tsc -b && vite build
|
||||
npm run preview # Vite preview; vite.config.ts uses port 9191
|
||||
npm run lint # TypeScript no-emit check
|
||||
npm test # node --import tsx --test tests/*.test.ts
|
||||
```
|
||||
|
||||
Run one frontend test:
|
||||
|
||||
```bash
|
||||
node --import tsx --test tests/previewIntent.test.ts
|
||||
```
|
||||
|
||||
### Backend
|
||||
|
||||
Run from `backend/` unless noted:
|
||||
|
||||
```bash
|
||||
go run ./cmd/server
|
||||
go test ./... -count=1
|
||||
go build -o video-server ./cmd/server
|
||||
```
|
||||
|
||||
Run one backend package or test:
|
||||
|
||||
```bash
|
||||
go test ./internal/scanner -count=1
|
||||
go test ./internal/scanner -run TestParse -count=1
|
||||
```
|
||||
|
||||
The backend requires Go 1.23+ and uses vendored dependencies in `backend/vendor/`, so keep `go mod vendor` in sync after dependency changes.
|
||||
|
||||
### Release and Deployment
|
||||
|
||||
```bash
|
||||
scripts/build-release.sh # builds Linux amd64/arm64 release tarballs into release/
|
||||
sudo bash install.sh # prebuilt installer flow used by README
|
||||
sudo bash deploy.sh # build from current checkout and install systemd services
|
||||
```
|
||||
|
||||
Docker uses the root `Dockerfile` and `docker-compose.yml`. The runtime image exposes port `9191` and stores persistent data under `/opt/video-site-91/data`.
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
This is a private video aggregation site with a React/Vite frontend and a Go backend.
|
||||
|
||||
### Frontend
|
||||
|
||||
The frontend is a React 18 SPA under `src/`. `src/main.tsx` mounts `BrowserRouter`, `ToastProvider`, and `AuthProvider`, then renders `src/App.tsx`. `App.tsx` defines the public app routes (`/`, `/list`, `/shorts`, `/upload`, `/video/:id`) and admin routes under `/admin`; both main-site and admin pages are wrapped in `RequireAuth`, while `/login` is public.
|
||||
|
||||
Frontend API calls are split by surface:
|
||||
|
||||
- `src/data/videos.ts` calls the main authenticated API under `/api` and upload/proxy-related endpoints.
|
||||
- `src/admin/api.ts` is the admin API client for `/admin/api`, always sending cookies and raising `UnauthorizedError` on `401`.
|
||||
|
||||
`vite.config.ts` proxies `/api`, `/p`, and `/admin/api` to `http://127.0.0.1:9192`, with frontend dev/preview served on port `9191` by default. The alias `@` maps to `src`.
|
||||
|
||||
Styling is plain CSS loaded from `src/main.tsx` in token/base/layout/navigation/search/video/admin layers. Shared UI lives in `src/components`, page-level screens in `src/pages`, and admin screens in `src/admin`.
|
||||
|
||||
### Backend
|
||||
|
||||
The backend entrypoint is `backend/cmd/server/main.go`. It loads `config.yaml` or `VIDEO_CONFIG`, creates the SQLite catalog and preview directories, builds the app state, registers API routes, starts the nightly runner, and then asynchronously attaches configured external drives so slow upstream login checks do not block port binding.
|
||||
|
||||
Important backend packages:
|
||||
|
||||
- `internal/config`: YAML config loading and first-run admin credential setup.
|
||||
- `internal/catalog`: SQLite catalog, schema migration, video metadata, settings, tags, drive records, generation status, and deduplication state. It opens SQLite with WAL and a busy timeout.
|
||||
- `internal/drives`: provider abstraction. Implementations include `quark`, `p115`, `pikpak`, `wopan`, `onedrive`, `localstorage`, `localupload`, and `spider91`.
|
||||
- `internal/scanner`: recursively lists drive directories, parses filenames/tags, upserts catalog videos, applies skip-directory rules, and enqueues newly discovered videos.
|
||||
- `internal/preview`: ffprobe/ffmpeg thumbnail and teaser generation workers. Generated assets are local files under the configured preview directory.
|
||||
- `internal/fingerprint`: asynchronous sampled SHA-256 worker used for cross-drive duplicate detection.
|
||||
- `internal/proxy`: `/p/*` media serving. Some providers redirect with `302` to signed CDN URLs, while providers requiring backend-held headers are reverse-proxied with Range support.
|
||||
- `internal/api`: main API and admin API route handlers.
|
||||
- `internal/nightly`: daily pipeline for drive scans, spider91 crawl, migration, queue drain, and duplicate asset cleanup.
|
||||
- `internal/spider91migrate`: migration from spider91 downloads to a configured cloud drive.
|
||||
|
||||
### Runtime Flow
|
||||
|
||||
1. Admin adds or edits drives through `/admin/drives`, which persists drive config in the catalog.
|
||||
2. The server attaches the drive implementation into the proxy registry and can trigger scans.
|
||||
3. Scans convert provider files into catalog video rows, parse titles/authors/tags from filenames, and queue preview/fingerprint work.
|
||||
4. The frontend lists videos through `/api/home`, `/api/list`, `/api/video/:id`, and streams media through `/p/*` endpoints.
|
||||
5. The nightly runner performs the scheduled end-to-end maintenance pipeline; admins can trigger it manually through `/admin/api/jobs/nightly/run`.
|
||||
|
||||
### Configuration and Data
|
||||
|
||||
Backend defaults come from `backend/config.example.yaml`. On first backend start, `config.yaml` is created automatically if missing. Default local development paths are:
|
||||
|
||||
- Backend listen address: `127.0.0.1:9192`
|
||||
- SQLite DB: `backend/data/video-site.db`
|
||||
- Generated previews/thumbs: `backend/data/previews`
|
||||
|
||||
Docker and installer deployments rewrite config paths so data lives under `/opt/video-site-91/data` or the mounted `./data` directory.
|
||||
|
||||
`VIDEO_FRONTEND_DIR` controls where the Go server looks for built frontend assets. If unset, it serves `./dist` when present. Backend routes (`/api`, `/admin/api`, `/p`) are excluded from the SPA fallback.
|
||||
|
||||
## Notes for Changes
|
||||
|
||||
- Main-site API routes and proxy routes require authentication; only login/setup and `/api/settings/theme` are intentionally public.
|
||||
- When adding a new drive provider, implement `internal/drives.Drive`, persist any needed config through catalog/admin APIs, attach it in `cmd/server`, and decide whether `/p/stream` should redirect or reverse-proxy in `internal/proxy`.
|
||||
- Generated thumbnails and teasers are local runtime assets; do not treat them as source files.
|
||||
- Frontend tests use Node's built-in test runner with `tsx`; TypeScript linting only checks `src` through the root `tsconfig.json`.
|
||||
+2
-1
@@ -41,12 +41,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
tzdata \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN python3 -c "import requests, bs4, lxml, socks"
|
||||
|
||||
WORKDIR /opt/video-site-91
|
||||
|
||||
COPY --from=backend /out/server ./server
|
||||
COPY --from=frontend /app/dist ./dist
|
||||
COPY backend/config.example.yaml ./config.example.yaml
|
||||
COPY 91VideoSpider/ ./91VideoSpider/
|
||||
COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
|
||||
|
||||
ARG VERSION=dev
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2026 nianzhibai
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -12,21 +12,19 @@
|
||||
<a href="#快速开始">快速开始</a> ·
|
||||
<a href="#功能特性">功能特性</a> ·
|
||||
<a href="#预览图">预览图</a> ·
|
||||
<a href="#数据存放位置">数据目录</a>
|
||||
<a href="#数据存放位置">数据目录</a> ·
|
||||
<a href="#许可证">许可证</a>
|
||||
</p>
|
||||
|
||||
---
|
||||
|
||||
## 功能特性
|
||||
|
||||
- **多后端支持** — 兼容 115 云盘、PikPak 云盘、OneDrive、Google Drive 和本地存储
|
||||
- **低带宽播放** — 115 云盘、PikPak 云盘、OneDrive 都支持302模式,在线播放视频时,不占用服务器带宽,播放体验不受服务器带宽影响;Google Drive 不支持302模式,走服务器中转,观看体验会受服务器带宽影响
|
||||
- **多后端支持** — 兼容 115 云盘、PikPak 云盘、123网盘、联通网盘、光鸭网盘、OneDrive、Google Drive 和本地存储
|
||||
- **低带宽播放** — 115 云盘、PikPak 云盘、123网盘、联通网盘、光鸭网盘、OneDrive 支持302模式,在线播放视频时,不占用服务器带宽,播放体验不受服务器带宽影响;Google Drive 不支持302模式,走服务器中转,观看体验会受服务器带宽影响
|
||||
- **封面 & 预览片段** — 自动为每个视频生成封面图和预览片段,首页快速选片
|
||||
- **91 爬虫** — 内置爬虫,支持抓取 91 本月最热视频
|
||||
- **双主题** — 黑黄经典主题 / 粉白清新主题,随时切换
|
||||
- **爬虫脚本** — 项目支持导入自定义脚本,但是有一些规范,具体可以参考 [SpiderFor91](https://github.com/Just-Spider/SpiderFor91),项目不再内置任何爬虫脚本
|
||||
- **短视频模式** — 一键切换抖音风格,沉浸刷片
|
||||
- **低资源占用** — 2C2G 服务器稳定运行,主要性能消耗就是封面图和预览视频的生成
|
||||
|
||||
---
|
||||
|
||||
## 预览图
|
||||
@@ -83,6 +81,14 @@ sudo bash install.sh
|
||||
|
||||
> `video-site-91` 为等效别名,两者可互换使用。
|
||||
|
||||
**已部署用户升级:**
|
||||
|
||||
```bash
|
||||
91 update
|
||||
```
|
||||
|
||||
升级会保留现有 `config.yaml`、数据库、封面、预览、上传文件和爬虫数据。脚本会自动安装或检查 `ffmpeg` / `ffprobe` 等运行依赖,并在新版本启动失败时回滚到升级前文件。
|
||||
|
||||
**自定义端口:**
|
||||
|
||||
```bash
|
||||
@@ -154,6 +160,7 @@ docker compose up -d # 更新并重启
|
||||
```
|
||||
|
||||
> 所有配置、数据库、封面、预览及上传文件均保存在 `./data/` 目录下。
|
||||
> 从旧版本升级 Docker 部署时,执行 `docker compose pull && docker compose up -d` 即可;`./data/` 不会被镜像更新覆盖。
|
||||
|
||||
---
|
||||
|
||||
@@ -179,15 +186,6 @@ docker compose up -d # 更新并重启
|
||||
|
||||
---
|
||||
|
||||
## 更多文档
|
||||
|
||||
| 文档 | 内容 |
|
||||
|------|------|
|
||||
| [backend/README.md](backend/README.md) | 后端实现、接口说明、网盘字段 |
|
||||
| [video-site-implementation-plan.md](video-site-implementation-plan.md) | 完整实现方案 |
|
||||
|
||||
---
|
||||
|
||||
## 使用须知
|
||||
|
||||
本项目面向**个人私有部署**,请仅接入你有权访问和管理的内容,并遵守对应网盘、站点的服务条款及所在地法律法规。
|
||||
@@ -196,6 +194,20 @@ docker compose up -d # 更新并重启
|
||||
|
||||
---
|
||||
|
||||
## PR提交规范
|
||||
欢迎大家提交PR,一起来完善这个项目,但是这里要说明一下PR提交的规范
|
||||
1. 一个PR的功能改动要单一,不建议一个PR修改了大量功能。单个PR单个功能修改,这样也更容易Merge
|
||||
2. 完善项目的PR比新增功能的PR更容易Merge(例如:例如你发现开发者没有实现爬取的视频上传到某个网盘,并且你有这个需求,此时你可以实现一下这个功能然后提交PR,也感谢你为开发者分担工作量)
|
||||
3. 新增功能的PR不容易Merge,因为某些功能的需求可能不是所有人都需要的,如果一味的不断增加功能,会让项目变得过于庞大。当然如果你肯定你的新功能和想法很好,并且相信将会对于项目有很大的改善,那么热烈欢迎你的PR
|
||||
|
||||
---
|
||||
|
||||
## 许可证
|
||||
|
||||
本项目基于 [MIT License](LICENSE) 开源。
|
||||
|
||||
---
|
||||
|
||||
## 致谢
|
||||
|
||||
- [OpenList](https://github.com/OpenListTeam/OpenList) — 优秀的开源项目
|
||||
|
||||
+18
-16
@@ -2,8 +2,8 @@
|
||||
|
||||
视频聚合站的 Go 后端。提供三件事:
|
||||
|
||||
1. 多家网盘统一抽象(夸克 / 115 / PikPak / 联通沃盘 / OneDrive / Google Drive / 本地存储)
|
||||
2. 视频元数据目录(SQLite)+ 扫描 + teaser 预生成
|
||||
1. 多家网盘统一抽象(夸克 / 115 / PikPak / 联通网盘 / 光鸭网盘 / OneDrive / Google Drive / 本地存储)
|
||||
2. 视频元数据目录(SQLite)+ 扫描 + 预览视频预生成
|
||||
3. REST API(前台)+ 管理后台 + 直链代理
|
||||
4. 标签池、视频隐藏、按网盘统计和详情页来源网盘类型展示能力
|
||||
|
||||
@@ -19,12 +19,13 @@ internal/
|
||||
quark/ 夸克(自己实现,参考 OpenList quark_uc)
|
||||
p115/ 115(壳子 + SheltonZhu/115driver)
|
||||
pikpak/ PikPak(自己实现,参考 OpenList pikpak)
|
||||
wopan/ 联通沃盘(壳子 + OpenListTeam/wopan-sdk-go)
|
||||
wopan/ 联通网盘(壳子 + OpenListTeam/wopan-sdk-go)
|
||||
guangyapan/ 光鸭网盘(参考 AList GuangYaPan)
|
||||
onedrive/ OneDrive(OpenList 在线续期 + Microsoft Graph 文件接口)
|
||||
googledrive/ Google Drive(OpenList 在线续期 + Google Drive API;播放走后端代理)
|
||||
localstorage/ 本地目录扫描(服务器已有视频目录)
|
||||
scanner/ 扫目录 → 落库
|
||||
preview/ ffmpeg 抽封面和生成多段 teaser
|
||||
preview/ ffmpeg 抽封面和生成多段预览视频
|
||||
proxy/ /p/stream/*、/p/preview/* 代理
|
||||
auth/ 管理员 session
|
||||
api/ REST 路由
|
||||
@@ -81,7 +82,7 @@ go run ./cmd/server 后端 9192
|
||||
|
||||
## 添加一个盘
|
||||
|
||||
推荐在前端管理后台 `/admin/drives` 新增网盘。保存后会立即挂载并触发扫描;视频结果可在 `/admin/videos` 按网盘查看,每页 100 条,页面会同时显示各网盘 Teaser 已生成、待生成、失败数量。
|
||||
推荐在前端管理后台 `/admin/drives` 新增网盘。保存后会立即挂载并触发扫描;视频结果可在 `/admin/videos` 按网盘查看,每页 100 条,页面会同时显示各网盘预览视频已生成、待生成、失败数量。
|
||||
|
||||
也可以直接调用后端接口:
|
||||
|
||||
@@ -108,8 +109,9 @@ go run ./cmd/server 后端 9192
|
||||
| p115 | `cookie`(形如 `UID=...; CID=...; SEID=...; KID=...`) |
|
||||
| pikpak | `username`、`password`(token、验证码和设备 ID 由服务端自动处理并保存) |
|
||||
| wopan | `access_token`、`refresh_token`,可选 `family_id` |
|
||||
| guangyapan | 推荐后台扫码登录自动写入 `access_token`、`refresh_token`;也可手工填写 token;可选 `root_path` |
|
||||
| onedrive | `refresh_token` |
|
||||
| googledrive | `refresh_token` |
|
||||
| googledrive | 默认只需 `refresh_token`;自建 OAuth 客户端模式还需 `use_online_api=false`、`client_id`、`client_secret` |
|
||||
| localstorage | `path`(服务器上的已有视频目录,如 `/mnt/videos`) |
|
||||
|
||||
### PikPak 速度说明
|
||||
@@ -120,7 +122,7 @@ go run ./cmd/server 后端 9192
|
||||
|
||||
OneDrive 按 OpenList 默认应用方式调用 `https://api.oplist.org/onedrive/renewapi` 在线刷新 token,不需要配置 Azure 应用的 `client_id` / `client_secret` / `redirect_uri`。后台新建 OneDrive 时只需要填 OpenList 代刷得到的 `refresh_token`;服务端会默认挂载根目录并自动回写新 token。
|
||||
|
||||
Google Drive 按 OpenList 在线 API 调用 `https://api.oplist.org/googleui/renewapi` 刷新 token。后台新建 Google Drive 时只需要填 OpenList Google Drive 获取到的 `refresh_token`。Google Drive 下载地址必须携带 `Authorization` 头,浏览器不能直接 302 使用,所以本站会由后端代理 `/p/stream` 播放,不加入零带宽 302 白名单。
|
||||
Google Drive 默认按 OpenList 在线 API 调用 `https://api.oplist.org/googleui/renewapi` 刷新 token。后台新建 Google Drive 时只需要填 OpenList Google Drive 获取到的 `refresh_token`。如果不想依赖 OpenList 在线 API,可以关闭“使用 OpenList 在线续期 API”,并填写同一个 Google OAuth 客户端授权得到的 `refresh_token`、`client_id`、`client_secret`,服务端会直接请求 Google OAuth token 接口续期。Google Drive 下载地址必须携带 `Authorization` 头,浏览器不能直接 302 使用,所以本站会由后端代理 `/p/stream` 播放,不加入零带宽 302 白名单。
|
||||
|
||||
## 文件名约定
|
||||
|
||||
@@ -145,22 +147,22 @@ Google Drive 按 OpenList 在线 API 调用 `https://api.oplist.org/googleui/ren
|
||||
|
||||
1. 同一网盘同一文件按 `(drive_id, file_id)` 形成稳定视频 ID,重复扫描只更新同一行。
|
||||
2. 扫描时优先按网盘侧 `content_hash` 去重;没有 hash 时退化为 `file_name + size_bytes`。
|
||||
3. 扫描、爬虫、本地上传或服务启动挂载网盘后,后台指纹 worker 会异步读取视频的少量 Range 片段,生成 `sampled_sha256`。前台列表、首页、搜索、推荐会按 `size_bytes + sampled_sha256` 只展示最早入库的 canonical 视频。
|
||||
3. 扫描、本地上传或服务启动挂载网盘后,后台指纹 worker 会异步读取视频的少量 Range 片段,生成 `sampled_sha256`。前台列表、首页、搜索、推荐会按 `size_bytes + sampled_sha256` 只展示最早入库的 canonical 视频。
|
||||
|
||||
`sampled_sha256` 是文件级去重:适合识别同一个视频文件被复制到 115 / PikPak / OneDrive 等不同网盘的情况。它不会删除任何网盘文件,也不用于识别转码、裁剪、加水印后的同源视频。
|
||||
`sampled_sha256` 是文件级去重:适合识别同一个视频文件被复制到 115 / PikPak / OneDrive / Google Drive 等不同网盘的情况。它不会删除任何网盘文件,也不用于识别转码、裁剪、加水印后的同源视频。
|
||||
|
||||
封面和 teaser 仍然优先生成,不等待指纹完成。夜间流水线最后会做一次重复资产清理:对 `size_bytes + sampled_sha256` 命中的非 canonical 视频,只删除本机生成的重复封面和 teaser,并把对应字段重置为 `pending`。网盘原文件和视频元数据记录不会被删除;如果 canonical 视频以后被移除,这些重复项会重新进入生成队列。
|
||||
封面和预览视频仍然优先生成,不等待指纹完成。夜间流水线最后会做一次重复资产清理:对 `size_bytes + sampled_sha256` 命中的非 canonical 视频,只删除本机生成的重复封面和预览视频,并把对应字段重置为 `pending`。网盘原文件和视频元数据记录不会被删除;如果 canonical 视频以后被移除,这些重复项会重新进入生成队列。
|
||||
|
||||
## 管理能力
|
||||
|
||||
- `/admin/drives`:新增、编辑、删除网盘,触发扫描。
|
||||
- `/admin/videos`:按网盘筛选视频,每页 100 条分页,查看各网盘 Teaser 统计,编辑标题/作者/分类/标签,单条或全量重生 teaser。
|
||||
- `/admin/videos`:按网盘筛选视频,每页 100 条分页,查看各网盘预览视频统计,编辑标题/作者/分类/标签,单条或全量重生预览视频;拉黑视频页可查看被删除或被隐藏的视频,并支持移出黑名单后在下次扫盘重新入库。
|
||||
- `/admin/tags`:新增标签并用内置规则自动匹配已有视频;删除非系统标签时会从所有视频上同步移除该标签。
|
||||
- 播放页视频信息会展示来源网盘类型;同时提供“不再展示”,点击后会把视频标记为全局隐藏。隐藏视频不会再出现在首页、列表、搜索、相关推荐和详情接口中。目前没有管理后台恢复入口,如需恢复可把数据库里对应视频的 `hidden` 字段改回 `0`。
|
||||
- 播放页视频信息会展示来源网盘类型,并提供删除入口。被删除或被隐藏的视频会进入黑名单,不会再出现在首页、列表、搜索和详情接口中;在后台移出黑名单后,会在下次扫盘时重新发现并入库。
|
||||
|
||||
## Teaser 生成
|
||||
## 预览视频生成
|
||||
|
||||
scanner 扫到新视频会把 `(driveID, videoID)` 丢进 worker 队列。worker 会先用 `ffprobe` 探测时长,再用 `ffmpeg` 抽封面和生成无声 teaser:
|
||||
scanner 扫到新视频会把 `(driveID, videoID)` 丢进 worker 队列。worker 会先用 `ffprobe` 探测时长,再用 `ffmpeg` 抽封面和生成无声预览视频:
|
||||
|
||||
```
|
||||
ffmpeg -ss <起点> -headers "UA/Cookie/Referer" -i <直链> \
|
||||
@@ -168,9 +170,9 @@ ffmpeg -ss <起点> -headers "UA/Cookie/Referer" -i <直链> \
|
||||
-movflags +faststart -y <local>.mp4
|
||||
```
|
||||
|
||||
当前策略是每段固定 3 秒;30 秒以下最多 3 段,30 秒及以上固定 4 段;长视频在 20% 到 80% 区间均匀取段。生成的 teaser 和封面都只保存在本地 `data/previews/`,不会回写到网盘;旧数据中的 `preview_file_id` 会被忽略。
|
||||
当前策略是每段固定 3 秒;30 秒以下最多 3 段,30 秒及以上固定 4 段;长视频在 20% 到 80% 区间均匀取段。生成的预览视频和封面都只保存在本地 `data/previews/`,不会回写到网盘;旧数据中的 `preview_file_id` 会被忽略。
|
||||
|
||||
服务启动或网盘重新挂载时,如果 Teaser 开关已开启,后端会把历史 `pending` 任务重新入队,避免重启后长期停在“待生成”。OneDrive 扫盘和直链生成 teaser / 封面时可能触发 Microsoft Graph 429、`TooManyRequests`、`activityLimitReached` 或 throttled 文本;后端会识别这类错误并让当前网盘进入冷却期,保留任务为 `pending`,避免连续请求触发更严重限流。扫盘阶段会按 `Retry-After` 或默认冷却时间等待后继续当前目录。
|
||||
服务启动或网盘重新挂载时,如果预览视频开关已开启,后端会把历史 `pending` 任务重新入队,避免重启后长期停在“待生成”。OneDrive 扫盘和直链生成预览视频 / 封面时可能触发 Microsoft Graph 429、`TooManyRequests`、`activityLimitReached` 或 throttled 文本;Google Drive 可能返回 429、`usageLimits`、`userRateLimitExceeded`、`downloadQuotaExceeded` 等限制标识。后端会识别这类错误并让当前网盘进入冷却期,保留任务为 `pending`,避免连续请求触发更严重限流。扫盘阶段会按 `Retry-After` 或默认冷却时间等待后继续当前目录。
|
||||
|
||||
前端卡片的 `previewSrc` 统一指向 `/p/preview/<videoID>`,后端只从本地 `preview_local` 文件读取。
|
||||
|
||||
|
||||
+2314
-393
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,32 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
)
|
||||
|
||||
func TestCrawlerIntCredFallbacks(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
d *catalog.Drive
|
||||
key string
|
||||
def int
|
||||
want int
|
||||
}{
|
||||
{"nil drive", nil, "page", 1, 1},
|
||||
{"nil creds", &catalog.Drive{}, "page", 7, 7},
|
||||
{"empty value", &catalog.Drive{Credentials: map[string]string{"page": ""}}, "page", 5, 5},
|
||||
{"non-numeric", &catalog.Drive{Credentials: map[string]string{"page": "abc"}}, "page", 9, 9},
|
||||
{"happy", &catalog.Drive{Credentials: map[string]string{"page": "42"}}, "page", 1, 42},
|
||||
{"missing key", &catalog.Drive{Credentials: map[string]string{"a": "1"}}, "b", 99, 99},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := crawlerIntCred(tc.d, tc.key, tc.def)
|
||||
if got != tc.want {
|
||||
t.Fatalf("crawlerIntCred(%s) = %d, want %d", tc.name, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,89 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"testing"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
"github.com/video-site/backend/internal/proxy"
|
||||
)
|
||||
|
||||
func TestSpider91IntCredFallbacks(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
d *catalog.Drive
|
||||
key string
|
||||
def int
|
||||
want int
|
||||
}{
|
||||
{"nil drive", nil, "page", 1, 1},
|
||||
{"nil creds", &catalog.Drive{}, "page", 7, 7},
|
||||
{"empty value", &catalog.Drive{Credentials: map[string]string{"page": ""}}, "page", 5, 5},
|
||||
{"non-numeric", &catalog.Drive{Credentials: map[string]string{"page": "abc"}}, "page", 9, 9},
|
||||
{"happy", &catalog.Drive{Credentials: map[string]string{"page": "42"}}, "page", 1, 42},
|
||||
{"missing key", &catalog.Drive{Credentials: map[string]string{"a": "1"}}, "b", 99, 99},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := spider91IntCred(tc.d, tc.key, tc.def)
|
||||
if got != tc.want {
|
||||
t.Fatalf("spider91IntCred(%s) = %d, want %d", tc.name, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpider91UploadDriveIDDoesNotAutoSelectTarget(t *testing.T) {
|
||||
reg := proxy.NewRegistry()
|
||||
reg.Set("p115-one", &spider91UploadTargetFakeDrive{id: "p115-one", kind: "p115"})
|
||||
reg.Set("onedrive-one", &spider91UploadTargetFakeDrive{id: "onedrive-one", kind: "onedrive"})
|
||||
|
||||
app := &App{registry: reg}
|
||||
if got := app.Spider91UploadDriveID(); got != "" {
|
||||
t.Fatalf("empty upload target selected %q, want local-only empty target", got)
|
||||
}
|
||||
|
||||
app.spider91UploadDriveID = "p115-one"
|
||||
if got := app.Spider91UploadDriveID(); got != "p115-one" {
|
||||
t.Fatalf("explicit upload target = %q, want p115-one", got)
|
||||
}
|
||||
|
||||
app.spider91UploadDriveID = "onedrive-one"
|
||||
if got := app.Spider91UploadDriveID(); got != "onedrive-one" {
|
||||
t.Fatalf("explicit onedrive upload target = %q, want onedrive-one", got)
|
||||
}
|
||||
|
||||
app.spider91UploadDriveID = "missing"
|
||||
if got := app.Spider91UploadDriveID(); got != "" {
|
||||
t.Fatalf("missing upload target = %q, want empty", got)
|
||||
}
|
||||
}
|
||||
|
||||
type spider91UploadTargetFakeDrive struct {
|
||||
id string
|
||||
kind string
|
||||
}
|
||||
|
||||
func (d *spider91UploadTargetFakeDrive) Kind() string { return d.kind }
|
||||
func (d *spider91UploadTargetFakeDrive) ID() string { return d.id }
|
||||
func (d *spider91UploadTargetFakeDrive) Init(context.Context) error {
|
||||
return nil
|
||||
}
|
||||
func (d *spider91UploadTargetFakeDrive) List(context.Context, string) ([]drives.Entry, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (d *spider91UploadTargetFakeDrive) Stat(context.Context, string) (*drives.Entry, error) {
|
||||
return nil, drives.ErrNotSupported
|
||||
}
|
||||
func (d *spider91UploadTargetFakeDrive) StreamURL(context.Context, string) (*drives.StreamLink, error) {
|
||||
return nil, drives.ErrNotSupported
|
||||
}
|
||||
func (d *spider91UploadTargetFakeDrive) Upload(context.Context, string, string, io.Reader, int64) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
func (d *spider91UploadTargetFakeDrive) EnsureDir(context.Context, string) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
func (d *spider91UploadTargetFakeDrive) RootID() string { return "root" }
|
||||
+1465
-11
File diff suppressed because it is too large
Load Diff
+26
-11
@@ -22,7 +22,7 @@ server:
|
||||
storage:
|
||||
# SQLite 数据库文件路径
|
||||
db_path: "./data/video-site.db"
|
||||
# 本地 teaser 和封面目录
|
||||
# 本地预览视频和封面目录
|
||||
local_preview_dir: "./data/previews"
|
||||
|
||||
scanner:
|
||||
@@ -33,33 +33,30 @@ scanner:
|
||||
# 单次扫描每家网盘目录递归层数上限
|
||||
max_depth: 5
|
||||
# 被扫描的扩展名
|
||||
video_extensions: [".mp4", ".mkv", ".mov", ".webm", ".avi"]
|
||||
video_extensions: [".mp4", ".mkv", ".mov", ".webm", ".avi", ".strm"]
|
||||
|
||||
nightly:
|
||||
# 凌晨流水线触发整点(0-23),默认 1 即每天 01:00。流程:
|
||||
# Phase 1 扫所有非 spider91 / 非 localupload 网盘 → 检测新增 / 删除
|
||||
# → 入队封面和 teaser → 等所有队列 idle
|
||||
# Phase 2 spider91 爬虫(如配置)→ 入队 teaser → 等队列 idle
|
||||
# Phase 3 spider91 → 云盘迁移(一次性 sweep)
|
||||
# 凌晨流水线触发整点(0-23),默认 1 即每天 01:00。
|
||||
# 运行时会统一编排扫描、媒体资产生成和后续清理任务。
|
||||
cron_hour: 1
|
||||
# 单次流水线总耗时上限(软超时);超过后当前 phase 跑完不启动后续 phase。
|
||||
max_duration: 6h
|
||||
|
||||
preview:
|
||||
# 是否启用 ffmpeg 抽帧生成 teaser
|
||||
# 是否启用 ffmpeg 抽帧生成预览视频
|
||||
enabled: true
|
||||
# ffmpeg / ffprobe 可执行文件名或绝对路径
|
||||
ffmpeg_path: "ffmpeg"
|
||||
ffprobe_path: "ffprobe"
|
||||
# teaser 每段时长(秒),实际生成时每段最多 3 秒
|
||||
# 预览视频每段时长(秒),实际生成时每段最多 3 秒
|
||||
duration_seconds: 3
|
||||
# 兼容旧配置;当前 30 秒以下最多 3 段,30 秒及以上固定 4 段
|
||||
segments: 3
|
||||
# teaser 视频宽度
|
||||
# 预览视频宽度
|
||||
width: 480
|
||||
|
||||
# 盘列表。上线后请通过管理后台添加,本文件可留空。
|
||||
# kind 支持 quark / p115 / pikpak / wopan / onedrive / googledrive / localstorage。
|
||||
# kind 支持 quark / p115 / p123 / pikpak / wopan / guangyapan / onedrive / googledrive / localstorage。
|
||||
# OneDrive 示例:
|
||||
# - id: "my-onedrive"
|
||||
# kind: "onedrive"
|
||||
@@ -74,11 +71,29 @@ preview:
|
||||
# root_id: "root"
|
||||
# params:
|
||||
# refresh_token: "..."
|
||||
# # 默认 use_online_api=true,会使用 OpenList 在线续期 API。
|
||||
# # 如需使用自己创建的 Google OAuth 客户端,取消下面三行注释:
|
||||
# # use_online_api: "false"
|
||||
# # client_id: "..."
|
||||
# # client_secret: "..."
|
||||
# 光鸭网盘示例:
|
||||
# - id: "my-guangyapan"
|
||||
# kind: "guangyapan"
|
||||
# name: "我的光鸭网盘"
|
||||
# # 留空表示光鸭网盘根目录;也可以填写光鸭目录 fileId
|
||||
# root_id: ""
|
||||
# params:
|
||||
# # 推荐在后台使用扫码登录自动写入 access_token / refresh_token。
|
||||
# refresh_token: "..."
|
||||
# # 可选:按路径解析扫描根目录,优先于 root_id
|
||||
# # root_path: "影视/电影"
|
||||
# 本地存储示例:
|
||||
# - id: "local-media"
|
||||
# kind: "localstorage"
|
||||
# name: "本地视频目录"
|
||||
# root_id: "/"
|
||||
# params:
|
||||
# # Docker 部署时这里和 .strm 里的绝对路径都必须使用容器内路径。
|
||||
# # 例如宿主机 /mnt/videos 挂载为 /media,就填写 /media。
|
||||
# path: "/mnt/videos"
|
||||
drives: []
|
||||
|
||||
+1
-1
@@ -10,6 +10,7 @@ require (
|
||||
github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible
|
||||
github.com/go-chi/chi/v5 v5.1.0
|
||||
github.com/go-resty/resty/v2 v2.14.0
|
||||
github.com/skip2/go-qrcode v0.0.0-20200617195104-da1b6568686e
|
||||
golang.org/x/net v0.27.0
|
||||
golang.org/x/sys v0.30.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
@@ -28,7 +29,6 @@ require (
|
||||
github.com/pierrec/lz4/v4 v4.1.17 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||
github.com/skip2/go-qrcode v0.0.0-20200617195104-da1b6568686e // indirect
|
||||
golang.org/x/crypto v0.25.0 // indirect
|
||||
golang.org/x/time v0.8.0 // indirect
|
||||
modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 // indirect
|
||||
|
||||
+1462
-88
File diff suppressed because it is too large
Load Diff
+1585
-75
File diff suppressed because it is too large
Load Diff
+206
-112
@@ -11,10 +11,12 @@ import (
|
||||
"io"
|
||||
"math/rand/v2"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
@@ -23,7 +25,7 @@ import (
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives/localstorage"
|
||||
"github.com/video-site/backend/internal/drives/localupload"
|
||||
"github.com/video-site/backend/internal/drives/spider91"
|
||||
"github.com/video-site/backend/internal/mediaasset"
|
||||
"github.com/video-site/backend/internal/proxy"
|
||||
)
|
||||
|
||||
@@ -52,8 +54,16 @@ type Server struct {
|
||||
LocalDir string
|
||||
UploadDir string
|
||||
OnVideoUploaded func(*catalog.Video)
|
||||
// OnHideVideo 处理前台「不再展示」。隐藏机制已废弃,改走拉黑逻辑:
|
||||
// 删除库中记录 + 本地封面/预览,保留网盘源文件,并写黑名单墓碑
|
||||
// (扫盘不再入库)。未注入时回退为旧的 hidden 标记。
|
||||
OnHideVideo func(ctx context.Context, videoID string) error
|
||||
|
||||
// GetTheme 返回当前生效的主题("dark" | "pink")。前台 /api/settings/theme 用,
|
||||
tagCacheMu sync.Mutex
|
||||
tagCacheUntil time.Time
|
||||
tagCache []TagDTO
|
||||
|
||||
// GetTheme 返回当前生效的主题("dark" | "pink" | "sky")。前台 /api/settings/theme 用,
|
||||
// 不需要登录。无注入时返回 "dark"。
|
||||
GetTheme func() string
|
||||
}
|
||||
@@ -83,7 +93,12 @@ type VideoDTO struct {
|
||||
Dislikes int `json:"dislikes"`
|
||||
PublishedAt string `json:"publishedAt"`
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
Category string `json:"category,omitempty"`
|
||||
}
|
||||
|
||||
type TagDTO struct {
|
||||
ID string `json:"id"`
|
||||
Label string `json:"label"`
|
||||
Count int `json:"count"`
|
||||
}
|
||||
|
||||
type VideoDetailDTO struct {
|
||||
@@ -134,20 +149,19 @@ func (s *Server) RegisterRoutes(r chi.Router, a *auth.Authenticator) {
|
||||
r.Post("/api/shorts/next", s.handleShortsNext)
|
||||
|
||||
// 代理路由同样需要鉴权,防止绕过
|
||||
r.Get("/p/stream/{driveID}/{fileID}", s.handleStream)
|
||||
r.Get("/p/stream/{driveID}/*", s.handleStream)
|
||||
r.Get("/p/upload/{videoID}", s.handleUploadedVideo)
|
||||
r.Get("/p/spider91/{videoID}", s.handleSpider91Video)
|
||||
r.Get("/p/preview/{videoID}", s.handlePreview)
|
||||
r.Get("/p/thumb/{videoID}", s.handleThumb)
|
||||
})
|
||||
}
|
||||
|
||||
// handleGetTheme 返回当前生效的主题。无需登录。响应永远是
|
||||
// {"theme": "dark"} 或 {"theme": "pink"},便于前端无脑解析。
|
||||
// {"theme": "dark" | "pink" | "sky"},便于前端无脑解析。
|
||||
func (s *Server) handleGetTheme(w http.ResponseWriter, r *http.Request) {
|
||||
theme := "dark"
|
||||
if s.GetTheme != nil {
|
||||
if v := s.GetTheme(); v == "pink" || v == "dark" {
|
||||
if v := s.GetTheme(); v == "pink" || v == "dark" || v == "sky" {
|
||||
theme = v
|
||||
}
|
||||
}
|
||||
@@ -177,6 +191,27 @@ func (s *Server) handleHome(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
items = appendUniqueVideos(items, fallback, homePageSize)
|
||||
}
|
||||
if len(items) < homePageSize && len(excludeIDs) > 0 {
|
||||
// The browser keeps a recent-video exclude list so normal refreshes do not
|
||||
// repeat too quickly. On small libraries that list can cover every visible
|
||||
// video; when that happens, start a new random round instead of returning
|
||||
// an empty home section.
|
||||
roundExclude := videoIDs(items)
|
||||
fallback, err := s.Catalog.RandomVideosWithReadyThumbnailsExcluding(r.Context(), roundExclude, homePageSize-len(items))
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
items = appendUniqueVideos(items, fallback, homePageSize)
|
||||
}
|
||||
if len(items) < homePageSize && len(excludeIDs) > 0 {
|
||||
fallback, err := s.Catalog.RandomVideosExcluding(r.Context(), videoIDs(items), homePageSize-len(items))
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
items = appendUniqueVideos(items, fallback, homePageSize)
|
||||
}
|
||||
w.Header().Set("Cache-Control", "no-store")
|
||||
writeJSON(w, http.StatusOK, mapVideos(items))
|
||||
}
|
||||
@@ -236,6 +271,16 @@ func appendUniqueVideos(dst []*catalog.Video, candidates []*catalog.Video, limit
|
||||
return dst
|
||||
}
|
||||
|
||||
func videoIDs(items []*catalog.Video) []string {
|
||||
out := make([]string, 0, len(items))
|
||||
for _, item := range items {
|
||||
if item != nil && item.ID != "" {
|
||||
out = append(out, item.ID)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (s *Server) handleList(w http.ResponseWriter, r *http.Request) {
|
||||
q := r.URL.Query()
|
||||
page, _ := strconv.Atoi(q.Get("page"))
|
||||
@@ -245,12 +290,12 @@ func (s *Server) handleList(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
sort := q.Get("sort")
|
||||
params := catalog.ListParams{
|
||||
Keyword: q.Get("q"),
|
||||
Tag: q.Get("tag"),
|
||||
Category: q.Get("cat"),
|
||||
Sort: sort,
|
||||
Page: page,
|
||||
PageSize: size,
|
||||
Keyword: q.Get("q"),
|
||||
Tag: q.Get("tag"),
|
||||
Sort: sort,
|
||||
Page: page,
|
||||
PageSize: size,
|
||||
SkipTotal: strings.EqualFold(q.Get("count"), "false"),
|
||||
}
|
||||
if sort == "" || sort == "latest" {
|
||||
params.PreferReadyThumbnails = true
|
||||
@@ -269,7 +314,7 @@ func (s *Server) handleList(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
func (s *Server) handleVideoDetail(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
id := routeParam(r, "id")
|
||||
v, err := s.Catalog.GetVideo(r.Context(), id)
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusNotFound, err)
|
||||
@@ -279,6 +324,15 @@ func (s *Server) handleVideoDetail(w http.ResponseWriter, r *http.Request) {
|
||||
writeErr(w, http.StatusNotFound, sql.ErrNoRows)
|
||||
return
|
||||
}
|
||||
if v.DriveID != localUploadDriveID {
|
||||
if _, err := s.Catalog.GetDrive(r.Context(), v.DriveID); err != nil {
|
||||
drives, listErr := s.Catalog.ListDrives(r.Context())
|
||||
if listErr != nil || len(drives) > 0 {
|
||||
writeErr(w, http.StatusNotFound, sql.ErrNoRows)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
related := s.pickRelatedVideos(r.Context(), v, 6)
|
||||
dto := mapVideo(v)
|
||||
if d, err := s.Catalog.GetDrive(r.Context(), v.DriveID); err == nil {
|
||||
@@ -290,7 +344,7 @@ func (s *Server) handleVideoDetail(w http.ResponseWriter, r *http.Request) {
|
||||
VideoSrc: s.videoSource(v),
|
||||
Poster: thumbnailURL(v),
|
||||
Description: v.Description,
|
||||
EmbedURL: fmt.Sprintf(`<iframe src="/embed/%s" width="640" height="360" frameborder="0" allowfullscreen></iframe>`, v.ID),
|
||||
EmbedURL: fmt.Sprintf(`<iframe src="/embed/%s" width="640" height="360" frameborder="0" allowfullscreen></iframe>`, pathSegment(v.ID)),
|
||||
AuthorProfile: AuthorProfile{
|
||||
ID: "author-" + v.Author,
|
||||
Name: v.Author,
|
||||
@@ -442,29 +496,39 @@ func appendRandomRelated(picked []*catalog.Video, pool []*catalog.Video, targetL
|
||||
}
|
||||
|
||||
func (s *Server) handleTags(w http.ResponseWriter, r *http.Request) {
|
||||
now := time.Now()
|
||||
s.tagCacheMu.Lock()
|
||||
if s.tagCache != nil && now.Before(s.tagCacheUntil) {
|
||||
out := append([]TagDTO(nil), s.tagCache...)
|
||||
s.tagCacheMu.Unlock()
|
||||
w.Header().Set("Cache-Control", "private, max-age=15")
|
||||
writeJSON(w, http.StatusOK, out)
|
||||
return
|
||||
}
|
||||
s.tagCacheMu.Unlock()
|
||||
|
||||
stats, err := s.Catalog.ListTags(r.Context())
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
type tag struct {
|
||||
ID string `json:"id"`
|
||||
Label string `json:"label"`
|
||||
Count int `json:"count"`
|
||||
}
|
||||
out := make([]tag, 0, len(stats))
|
||||
out := make([]TagDTO, 0, len(stats))
|
||||
for _, stat := range stats {
|
||||
out = append(out, tag{ID: stat.Label, Label: stat.Label, Count: stat.Count})
|
||||
out = append(out, TagDTO{ID: stat.Label, Label: stat.Label, Count: stat.Count})
|
||||
}
|
||||
s.tagCacheMu.Lock()
|
||||
s.tagCache = append([]TagDTO(nil), out...)
|
||||
s.tagCacheUntil = now.Add(30 * time.Second)
|
||||
s.tagCacheMu.Unlock()
|
||||
|
||||
w.Header().Set("Cache-Control", "private, max-age=15")
|
||||
writeJSON(w, http.StatusOK, out)
|
||||
}
|
||||
|
||||
// shortsNextReq 客户端把当前轮已看过的 video id 列表传上来。
|
||||
// PreferredFromVideoID 来自短视频页最近一次点赞成功的视频,用于优先推荐相似标签。
|
||||
type shortsNextReq struct {
|
||||
SeenIDs []string `json:"seenIds"`
|
||||
Count int `json:"count"`
|
||||
PreferredFromVideoID string `json:"preferredFromVideoId"`
|
||||
SeenIDs []string `json:"seenIds"`
|
||||
Count int `json:"count"`
|
||||
}
|
||||
|
||||
// ShortsItemDTO 是短视频流单条的精简结构。比 VideoDTO 多 videoSrc / poster,
|
||||
@@ -482,8 +546,8 @@ type ShortsItemDTO struct {
|
||||
// - 服务器从未在 seenIds 中的可见视频里随机抽至多 count 条返回
|
||||
// - 当返回数量 < count 且小于全库可见总数时,说明本轮即将结束,
|
||||
// 返回 roundComplete=true,前端应在用户看完返回的这些后清空本地已看记录开新一轮
|
||||
// - 当 seenIds 已经覆盖全库时,本接口直接返回新一轮的随机一批
|
||||
// (传 seenIds=[] 即可让客户端在轮次完成后重新开始)
|
||||
// - 当 seenIds 真实覆盖当前全部可见视频时,本接口直接返回新一轮的随机一批
|
||||
// (不能仅看 seenIds 长度,里面可能有隐藏、删除或历史脏 ID)
|
||||
func (s *Server) handleShortsNext(w http.ResponseWriter, r *http.Request) {
|
||||
var body shortsNextReq
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
|
||||
@@ -504,22 +568,18 @@ func (s *Server) handleShortsNext(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// 如果客户端已看记录已经 ≥ 全库,则视为新一轮,直接忽略 seenIds
|
||||
exclude := body.SeenIDs
|
||||
if total > 0 && len(exclude) >= total {
|
||||
exclude = nil
|
||||
}
|
||||
|
||||
var items []*catalog.Video
|
||||
if strings.TrimSpace(body.PreferredFromVideoID) != "" {
|
||||
items, err = s.Catalog.RandomVideosForPreferredVideoExcluding(r.Context(), body.PreferredFromVideoID, exclude, count)
|
||||
} else {
|
||||
items, err = s.Catalog.RandomVideosExcluding(r.Context(), exclude, count)
|
||||
}
|
||||
items, err := s.Catalog.RandomVideosExcluding(r.Context(), body.SeenIDs, count)
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
if total > 0 && len(items) == 0 && len(body.SeenIDs) > 0 {
|
||||
items, err = s.Catalog.RandomVideosExcluding(r.Context(), nil, count)
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// 注入 sourceLabel 以便前端展示来源网盘
|
||||
driveLabels := make(map[string]string)
|
||||
@@ -557,7 +617,7 @@ type updateVideoTagsReq struct {
|
||||
}
|
||||
|
||||
func (s *Server) handleUpdateVideoTags(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
id := routeParam(r, "id")
|
||||
var body updateVideoTagsReq
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
writeErr(w, http.StatusBadRequest, err)
|
||||
@@ -580,7 +640,7 @@ func (s *Server) handleUpdateVideoTags(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
func (s *Server) handleLike(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
id := routeParam(r, "id")
|
||||
likes, err := s.Catalog.IncrementLike(r.Context(), id)
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
@@ -592,7 +652,7 @@ func (s *Server) handleLike(w http.ResponseWriter, r *http.Request) {
|
||||
// handleUnlike 取消点赞:likes - 1(保底 0)。
|
||||
// 短视频模式中爱心按钮点击切换状态时使用。
|
||||
func (s *Server) handleUnlike(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
id := routeParam(r, "id")
|
||||
likes, err := s.Catalog.DecrementLike(r.Context(), id)
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
@@ -606,7 +666,7 @@ func (s *Server) handleUnlike(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
func (s *Server) handleView(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
id := routeParam(r, "id")
|
||||
views, err := s.Catalog.IncrementView(r.Context(), id)
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
@@ -620,8 +680,15 @@ func (s *Server) handleView(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
func (s *Server) handleHideVideo(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
if err := s.Catalog.HideVideo(r.Context(), id); err != nil {
|
||||
id := routeParam(r, "id")
|
||||
var err error
|
||||
if s.OnHideVideo != nil {
|
||||
// 走拉黑逻辑:删记录 + 删本地封面/预览 + 写墓碑,保留网盘源文件。
|
||||
err = s.OnHideVideo(r.Context(), id)
|
||||
} else {
|
||||
err = s.Catalog.HideVideo(r.Context(), id)
|
||||
}
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
writeErr(w, http.StatusNotFound, err)
|
||||
return
|
||||
@@ -737,12 +804,12 @@ func (s *Server) handleUploadVideo(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
func (s *Server) handleStream(w http.ResponseWriter, r *http.Request) {
|
||||
driveID := chi.URLParam(r, "driveID")
|
||||
fileID := chi.URLParam(r, "fileID")
|
||||
driveID := routeParam(r, "driveID")
|
||||
fileID := routeWildcardParam(r, "*")
|
||||
s.Proxy.ServeStream(w, r, driveID, fileID)
|
||||
}
|
||||
func (s *Server) handleUploadedVideo(w http.ResponseWriter, r *http.Request) {
|
||||
videoID := chi.URLParam(r, "videoID")
|
||||
videoID := routeParam(r, "videoID")
|
||||
v, err := s.Catalog.GetVideo(r.Context(), videoID)
|
||||
if err != nil || v.Hidden || v.DriveID != localUploadDriveID {
|
||||
http.NotFound(w, r)
|
||||
@@ -762,46 +829,8 @@ func (s *Server) handleUploadedVideo(w http.ResponseWriter, r *http.Request) {
|
||||
http.ServeFile(w, r, path)
|
||||
}
|
||||
|
||||
// handleSpider91Video 服务 spider91 drive 下载到本地的视频文件。
|
||||
// 路径形如 /p/spider91/<videoID>,videoID = "spider91-<driveID>-<sourceID>"。
|
||||
// 通过 catalog 拿到 file_id("<sourceID>.mp4"),再让 driver 解析到绝对路径并 ServeFile。
|
||||
func (s *Server) handleSpider91Video(w http.ResponseWriter, r *http.Request) {
|
||||
videoID := chi.URLParam(r, "videoID")
|
||||
v, err := s.Catalog.GetVideo(r.Context(), videoID)
|
||||
if err != nil || v.Hidden {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
if s.Proxy == nil || s.Proxy.Registry == nil {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
d, ok := s.Proxy.Registry.Get(v.DriveID)
|
||||
if !ok || d.Kind() != spider91.Kind {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
sd, ok := d.(*spider91.Driver)
|
||||
if !ok {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
path, err := sd.VideoPath(v.FileID)
|
||||
if err != nil {
|
||||
http.Error(w, "invalid video id", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
info, err := os.Stat(path)
|
||||
if err != nil || info.IsDir() || info.Size() == 0 {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
w.Header().Set("Cache-Control", "private, max-age=300")
|
||||
http.ServeFile(w, r, path)
|
||||
}
|
||||
|
||||
func (s *Server) handlePreview(w http.ResponseWriter, r *http.Request) {
|
||||
videoID := chi.URLParam(r, "videoID")
|
||||
videoID := routeParam(r, "videoID")
|
||||
v, err := s.Catalog.GetVideo(r.Context(), videoID)
|
||||
if err != nil {
|
||||
http.NotFound(w, r)
|
||||
@@ -826,15 +855,20 @@ func (s *Server) handlePreview(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
func (s *Server) handleThumb(w http.ResponseWriter, r *http.Request) {
|
||||
videoID := chi.URLParam(r, "videoID")
|
||||
// 直接读本地 thumbs 目录中 <videoID>.jpg
|
||||
path := filepath.Join(s.LocalDir, "thumbs", videoID+".jpg")
|
||||
clean := filepath.Clean(path)
|
||||
if !strings.HasPrefix(clean, filepath.Clean(s.LocalDir)) {
|
||||
http.Error(w, "invalid path", http.StatusForbidden)
|
||||
return
|
||||
videoID := routeParam(r, "videoID")
|
||||
var clean string
|
||||
for _, path := range mediaasset.ThumbnailPathCandidates(s.LocalDir, videoID) {
|
||||
candidate := filepath.Clean(path)
|
||||
if !strings.HasPrefix(candidate, filepath.Clean(s.LocalDir)) {
|
||||
http.Error(w, "invalid path", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
if _, err := os.Stat(candidate); err == nil {
|
||||
clean = candidate
|
||||
break
|
||||
}
|
||||
}
|
||||
if _, err := os.Stat(clean); err != nil {
|
||||
if clean == "" {
|
||||
w.Header().Set("Cache-Control", "no-store")
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
@@ -856,7 +890,7 @@ func mapVideo(v *catalog.Video) VideoDTO {
|
||||
}
|
||||
return VideoDTO{
|
||||
ID: v.ID,
|
||||
Href: "/video/" + v.ID,
|
||||
Href: "/video/" + pathSegment(v.ID),
|
||||
Title: v.Title,
|
||||
Thumbnail: thumbnailURL(v),
|
||||
PreviewSrc: previewURL(v),
|
||||
@@ -873,12 +907,11 @@ func mapVideo(v *catalog.Video) VideoDTO {
|
||||
Dislikes: v.Dislikes,
|
||||
PublishedAt: v.PublishedAt.Format("2006-01-02"),
|
||||
Tags: tags,
|
||||
Category: v.Category,
|
||||
}
|
||||
}
|
||||
|
||||
func previewURL(v *catalog.Video) string {
|
||||
base := "/p/preview/" + v.ID
|
||||
base := "/p/preview/" + pathSegment(v.ID)
|
||||
if v.UpdatedAt.IsZero() {
|
||||
return base
|
||||
}
|
||||
@@ -886,31 +919,90 @@ func previewURL(v *catalog.Video) string {
|
||||
}
|
||||
|
||||
func thumbnailURL(v *catalog.Video) string {
|
||||
base := "/p/thumb/" + pathSegment(v.ID)
|
||||
if v.ThumbnailURL != "" {
|
||||
return v.ThumbnailURL
|
||||
base = v.ThumbnailURL
|
||||
if thumbnailURLMatchesVideoID(base, v.ID) {
|
||||
base = "/p/thumb/" + pathSegment(v.ID)
|
||||
}
|
||||
}
|
||||
return "/p/thumb/" + v.ID
|
||||
if !strings.HasPrefix(base, "/p/thumb/") || v.UpdatedAt.IsZero() {
|
||||
return base
|
||||
}
|
||||
return base + "?v=" + strconv.FormatInt(v.UpdatedAt.UnixMilli(), 10)
|
||||
}
|
||||
|
||||
// transcodedSource 在视频有就绪的浏览器兼容性转码产物时返回产物的播放地址。
|
||||
// 产物和原始文件在同一个 drive 上,走同一条 /p/stream 代理/302 链路。
|
||||
func transcodedSource(v *catalog.Video) (string, bool) {
|
||||
if v.TranscodeStatus == "ready" && v.TranscodedFileID != "" && v.DriveID != localUploadDriveID {
|
||||
return fmt.Sprintf("/p/stream/%s/%s", pathSegment(v.DriveID), pathSegment(v.TranscodedFileID)), true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func (s *Server) videoSource(v *catalog.Video) string {
|
||||
if v.DriveID == localUploadDriveID {
|
||||
return "/p/upload/" + v.ID
|
||||
return "/p/upload/" + pathSegment(v.ID)
|
||||
}
|
||||
if s.Proxy != nil && s.Proxy.Registry != nil {
|
||||
if d, ok := s.Proxy.Registry.Get(v.DriveID); ok && d.Kind() == spider91.Kind {
|
||||
return "/p/spider91/" + v.ID
|
||||
}
|
||||
if src, ok := transcodedSource(v); ok {
|
||||
return src
|
||||
}
|
||||
return fmt.Sprintf("/p/stream/%s/%s", v.DriveID, v.FileID)
|
||||
return fmt.Sprintf("/p/stream/%s/%s", pathSegment(v.DriveID), pathSegment(v.FileID))
|
||||
}
|
||||
|
||||
// videoSource 兼容旧调用点,没有 server context 时按之前逻辑回退到 /p/stream。
|
||||
// 内部新增的代码请使用 (*Server).videoSource。
|
||||
func videoSource(v *catalog.Video) string {
|
||||
if v.DriveID == localUploadDriveID {
|
||||
return "/p/upload/" + v.ID
|
||||
return "/p/upload/" + pathSegment(v.ID)
|
||||
}
|
||||
return fmt.Sprintf("/p/stream/%s/%s", v.DriveID, v.FileID)
|
||||
if src, ok := transcodedSource(v); ok {
|
||||
return src
|
||||
}
|
||||
return fmt.Sprintf("/p/stream/%s/%s", pathSegment(v.DriveID), pathSegment(v.FileID))
|
||||
}
|
||||
|
||||
func pathSegment(value string) string {
|
||||
return url.PathEscape(value)
|
||||
}
|
||||
|
||||
func routeParam(r *http.Request, key string) string {
|
||||
value := chi.URLParam(r, key)
|
||||
if value == "" {
|
||||
return ""
|
||||
}
|
||||
if decoded, err := url.PathUnescape(value); err == nil {
|
||||
return decoded
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
func routeWildcardParam(r *http.Request, key string) string {
|
||||
value := chi.URLParam(r, key)
|
||||
if value == "" {
|
||||
return ""
|
||||
}
|
||||
value = strings.TrimPrefix(value, "/")
|
||||
if decoded, err := url.PathUnescape(value); err == nil {
|
||||
return decoded
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
func thumbnailURLMatchesVideoID(value, videoID string) bool {
|
||||
if !strings.HasPrefix(value, "/p/thumb/") {
|
||||
return false
|
||||
}
|
||||
tail := strings.TrimPrefix(value, "/p/thumb/")
|
||||
if idx := strings.IndexByte(tail, '?'); idx >= 0 {
|
||||
tail = tail[:idx]
|
||||
}
|
||||
if tail == videoID {
|
||||
return true
|
||||
}
|
||||
decoded, err := url.PathUnescape(tail)
|
||||
return err == nil && decoded == videoID
|
||||
}
|
||||
|
||||
func driveKindLabel(kind string) string {
|
||||
@@ -919,18 +1011,20 @@ func driveKindLabel(kind string) string {
|
||||
return "夸克网盘"
|
||||
case "p115":
|
||||
return "115 网盘"
|
||||
case "p123":
|
||||
return "123网盘"
|
||||
case "pikpak":
|
||||
return "PikPak"
|
||||
case "wopan":
|
||||
return "联通沃盘"
|
||||
return "联通网盘"
|
||||
case "guangyapan":
|
||||
return "光鸭网盘"
|
||||
case "onedrive":
|
||||
return "OneDrive"
|
||||
case "googledrive":
|
||||
return "Google Drive"
|
||||
case localstorage.Kind:
|
||||
return "本地存储"
|
||||
case spider91.Kind:
|
||||
return "91 爬虫"
|
||||
default:
|
||||
return kind
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
@@ -17,6 +18,8 @@ import (
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
"github.com/video-site/backend/internal/mediaasset"
|
||||
"github.com/video-site/backend/internal/proxy"
|
||||
)
|
||||
|
||||
@@ -65,6 +68,68 @@ func TestVideoSourceKeepsDirectStreamForMp4(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestVideoURLsEscapePathSegments(t *testing.T) {
|
||||
updated := time.UnixMilli(1778863000123)
|
||||
v := &catalog.Video{
|
||||
ID: "wopan-drive-fid/with space",
|
||||
DriveID: "drive-1",
|
||||
FileID: "fid/with space",
|
||||
Title: "Video",
|
||||
UpdatedAt: updated,
|
||||
}
|
||||
|
||||
dto := mapVideo(v)
|
||||
if dto.Href != "/video/wopan-drive-fid%2Fwith%20space" {
|
||||
t.Fatalf("href = %q, want escaped video id", dto.Href)
|
||||
}
|
||||
if dto.PreviewSrc != "/p/preview/wopan-drive-fid%2Fwith%20space?v=1778863000123" {
|
||||
t.Fatalf("preview = %q, want escaped video id", dto.PreviewSrc)
|
||||
}
|
||||
if dto.Thumbnail != "/p/thumb/wopan-drive-fid%2Fwith%20space?v=1778863000123" {
|
||||
t.Fatalf("thumbnail = %q, want escaped video id", dto.Thumbnail)
|
||||
}
|
||||
if got := videoSource(v); got != "/p/stream/drive-1/fid%2Fwith%20space" {
|
||||
t.Fatalf("video source = %q, want escaped file id", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestThumbnailURLRewritesStoredLocalURLForUnsafeVideoID(t *testing.T) {
|
||||
got := thumbnailURL(&catalog.Video{
|
||||
ID: "wopan-drive-fid/with space",
|
||||
ThumbnailURL: "/p/thumb/wopan-drive-fid/with space",
|
||||
UpdatedAt: time.UnixMilli(1778863000123),
|
||||
})
|
||||
|
||||
if got != "/p/thumb/wopan-drive-fid%2Fwith%20space?v=1778863000123" {
|
||||
t.Fatalf("thumbnail URL = %q, want escaped local URL", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleStreamDecodesEscapedWildcardFileID(t *testing.T) {
|
||||
local := filepath.Join(t.TempDir(), "video.mp4")
|
||||
if err := os.WriteFile(local, []byte("ok"), 0o644); err != nil {
|
||||
t.Fatalf("write local video: %v", err)
|
||||
}
|
||||
drv := &apiStreamFakeDrive{localPath: local}
|
||||
reg := proxy.NewRegistry()
|
||||
reg.Set("drive-1", drv)
|
||||
srv := &Server{Proxy: proxy.New(reg)}
|
||||
|
||||
router := chi.NewRouter()
|
||||
router.Get("/p/stream/{driveID}/*", srv.handleStream)
|
||||
req := httptest.NewRequest(http.MethodGet, "/p/stream/drive-1/fid%2Fwith%20space", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
|
||||
router.ServeHTTP(rr, req)
|
||||
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
if drv.fileID != "fid/with space" {
|
||||
t.Fatalf("fileID = %q, want decoded original", drv.fileID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVideoSourceUsesLocalUploadRoute(t *testing.T) {
|
||||
v := &catalog.Video{
|
||||
ID: "video-1",
|
||||
@@ -99,6 +164,70 @@ func TestPreviewURLFallsBackWithoutUpdatedAt(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleVideoDetailDecodesEscapedVideoID(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: "wopan-drive-fid/with space",
|
||||
DriveID: "drive-1",
|
||||
FileID: "fid/with space",
|
||||
Title: "Video",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed video: %v", err)
|
||||
}
|
||||
|
||||
router := chi.NewRouter()
|
||||
router.Get("/api/video/{id}", (&Server{Catalog: cat}).handleVideoDetail)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/video/wopan-drive-fid%2Fwith%20space", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
|
||||
router.ServeHTTP(rr, req)
|
||||
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
var got VideoDetailDTO
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
if got.ID != "wopan-drive-fid/with space" {
|
||||
t.Fatalf("id = %q, want original video id", got.ID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestThumbnailURLVersionsLocalGeneratedThumbnails(t *testing.T) {
|
||||
got := thumbnailURL(&catalog.Video{
|
||||
ID: "video-1",
|
||||
ThumbnailURL: "/p/thumb/video-1",
|
||||
UpdatedAt: time.UnixMilli(1778863000123),
|
||||
})
|
||||
if got != "/p/thumb/video-1?v=1778863000123" {
|
||||
t.Fatalf("thumbnail URL = %q, want versioned local URL", got)
|
||||
}
|
||||
|
||||
remote := "https://thumb.example/video-1.jpg"
|
||||
got = thumbnailURL(&catalog.Video{
|
||||
ID: "video-1",
|
||||
ThumbnailURL: remote,
|
||||
UpdatedAt: time.UnixMilli(1778863000123),
|
||||
})
|
||||
if got != remote {
|
||||
t.Fatalf("remote thumbnail URL = %q, want unchanged %q", got, remote)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleHomePrioritizesVideosWithReadyThumbnails(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
@@ -219,6 +348,63 @@ func TestHandleHomeExcludesRecentlyShownVideos(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleHomeStartsNewRoundWhenRecentExcludesAllVisibleVideos(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
excludes := make([]string, 0, homePageSize+2)
|
||||
for i := 0; i < homePageSize+2; i++ {
|
||||
id := "ready-video-" + strconv.Itoa(i)
|
||||
excludes = append(excludes, "exclude="+id)
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: id,
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: id,
|
||||
ThumbnailURL: "https://thumb.example/" + id + ".jpg",
|
||||
PublishedAt: now.Add(time.Duration(i) * time.Minute),
|
||||
CreatedAt: now.Add(time.Duration(i) * time.Minute),
|
||||
UpdatedAt: now.Add(time.Duration(i) * time.Minute),
|
||||
}); err != nil {
|
||||
t.Fatalf("seed ready video %s: %v", id, err)
|
||||
}
|
||||
}
|
||||
|
||||
rr := httptest.NewRecorder()
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/home?"+strings.Join(excludes, "&"), nil)
|
||||
(&Server{Catalog: cat}).handleHome(rr, req)
|
||||
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
var got []VideoDTO
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if len(got) != homePageSize {
|
||||
t.Fatalf("home items = %d, want %d; body=%s", len(got), homePageSize, rr.Body.String())
|
||||
}
|
||||
seen := map[string]bool{}
|
||||
for _, item := range got {
|
||||
if seen[item.ID] {
|
||||
t.Fatalf("home returned duplicate video %q; items=%#v", item.ID, got)
|
||||
}
|
||||
seen[item.ID] = true
|
||||
if !strings.HasPrefix(item.ID, "ready-video-") {
|
||||
t.Fatalf("home returned unexpected video %q; items=%#v", item.ID, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleListLatestPrefersReadyThumbnails(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
@@ -290,6 +476,88 @@ func TestHandleListLatestPrefersReadyThumbnails(t *testing.T) {
|
||||
t.Fatalf("thumbnail for %q = %q, want ready thumbnail URL", item.ID, item.Thumbnail)
|
||||
}
|
||||
}
|
||||
|
||||
rr = httptest.NewRecorder()
|
||||
req = httptest.NewRequest(http.MethodGet, "/api/list?page=1&size=12&sort=latest&count=false", nil)
|
||||
(&Server{Catalog: cat}).handleList(rr, req)
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("count=false status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
got = struct {
|
||||
Items []VideoDTO `json:"items"`
|
||||
Total int `json:"total"`
|
||||
}{}
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode count=false response: %v", err)
|
||||
}
|
||||
if got.Total != 0 {
|
||||
t.Fatalf("count=false total = %d, want 0", got.Total)
|
||||
}
|
||||
if len(got.Items) != 12 {
|
||||
t.Fatalf("count=false items = %d, want 12", len(got.Items))
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleListIgnoresCategoryQueryAndDoesNotExposeCategory(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*catalog.Video{
|
||||
{
|
||||
ID: "video-a",
|
||||
DriveID: "drive",
|
||||
FileID: "file-a",
|
||||
Title: "A",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
{
|
||||
ID: "video-b",
|
||||
DriveID: "drive",
|
||||
FileID: "file-b",
|
||||
Title: "B",
|
||||
PublishedAt: now.Add(-time.Hour),
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed video %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
rr := httptest.NewRecorder()
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/list?page=1&size=24&cat=alpha", nil)
|
||||
(&Server{Catalog: cat}).handleList(rr, req)
|
||||
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
var got struct {
|
||||
Items []map[string]any `json:"items"`
|
||||
Total int `json:"total"`
|
||||
}
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if got.Total != 2 || len(got.Items) != 2 {
|
||||
t.Fatalf("response total/items = %d/%d, want 2/2", got.Total, len(got.Items))
|
||||
}
|
||||
for _, item := range got.Items {
|
||||
if _, ok := item["category"]; ok {
|
||||
t.Fatalf("list response exposed category: %#v", item)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleUploadVideoSavesFileVideoTagsAndQueuesPreview(t *testing.T) {
|
||||
@@ -511,6 +779,34 @@ func TestHandlePreviewIgnoresRemotePreviewFileIDAndServesLocalFile(t *testing.T)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleThumbServesHashedPathForLongVideoID(t *testing.T) {
|
||||
localDir := t.TempDir()
|
||||
longID := "localstorage-" + strings.Repeat("x", 240)
|
||||
thumbPath := mediaasset.ThumbnailPath(localDir, longID)
|
||||
if err := os.MkdirAll(filepath.Dir(thumbPath), 0o755); err != nil {
|
||||
t.Fatalf("mkdir thumb dir: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(thumbPath, []byte("thumb-bytes"), 0o644); err != nil {
|
||||
t.Fatalf("write thumb: %v", err)
|
||||
}
|
||||
|
||||
server := &Server{
|
||||
LocalDir: localDir,
|
||||
Proxy: proxy.New(proxy.NewRegistry()),
|
||||
}
|
||||
req := requestWithRouteParam(http.MethodGet, "/p/thumb/"+longID, "videoID", longID, strings.NewReader(``))
|
||||
rr := httptest.NewRecorder()
|
||||
|
||||
server.handleThumb(rr, req)
|
||||
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
if rr.Body.String() != "thumb-bytes" {
|
||||
t.Fatalf("body = %q, want thumb bytes", rr.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleTagsReturnsUnifiedTagPool(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
@@ -529,7 +825,6 @@ func TestHandleTagsReturnsUnifiedTagPool(t *testing.T) {
|
||||
FileID: "file-1",
|
||||
Title: "清纯女大后入",
|
||||
Tags: []string{"后入", "女大"},
|
||||
Category: "random-category",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -576,7 +871,7 @@ func TestHandleTagsReturnsUnifiedTagPool(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleShortsNextUsesPreferredVideoLeastPopulatedTag(t *testing.T) {
|
||||
func TestHandleShortsNextReturnsRandomBatchExcludingSeen(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -600,7 +895,7 @@ func TestHandleShortsNextUsesPreferredVideoLeastPopulatedTag(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/shorts/next", strings.NewReader(`{"seenIds":["current"],"count":3,"preferredFromVideoId":"current"}`))
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/shorts/next", strings.NewReader(`{"seenIds":["current"],"count":3}`))
|
||||
rr := httptest.NewRecorder()
|
||||
(&Server{Catalog: cat}).handleShortsNext(rr, req)
|
||||
|
||||
@@ -623,10 +918,7 @@ func TestHandleShortsNextUsesPreferredVideoLeastPopulatedTag(t *testing.T) {
|
||||
t.Fatalf("total = %d, want 4", got.Total)
|
||||
}
|
||||
if got.RoundComplete {
|
||||
t.Fatalf("roundComplete = true, want false with fallback-filled batch")
|
||||
}
|
||||
if !containsString(ids, "rare-1") {
|
||||
t.Fatalf("ids = %#v, want rare-1 from least populated tag", ids)
|
||||
t.Fatalf("roundComplete = true, want false with a full remaining batch")
|
||||
}
|
||||
if containsString(ids, "current") {
|
||||
t.Fatalf("ids = %#v, should exclude current", ids)
|
||||
@@ -634,6 +926,76 @@ func TestHandleShortsNextUsesPreferredVideoLeastPopulatedTag(t *testing.T) {
|
||||
if len(ids) != 3 {
|
||||
t.Fatalf("ids = %#v, want 3 items", ids)
|
||||
}
|
||||
for _, want := range []string{"common-1", "common-2", "rare-1"} {
|
||||
if !containsString(ids, want) {
|
||||
t.Fatalf("ids = %#v, want remaining id %s", ids, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleShortsNextDoesNotResetForStaleSeenIDs(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*catalog.Video{
|
||||
{ID: "seen-1", DriveID: "drive", FileID: "f-seen-1", Title: "seen 1", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "fresh-1", DriveID: "drive", FileID: "f-fresh-1", Title: "fresh 1", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "fresh-2", DriveID: "drive", FileID: "f-fresh-2", Title: "fresh 2", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "hidden-1", DriveID: "drive", FileID: "f-hidden-1", Title: "hidden 1", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
if err := cat.HideVideo(ctx, "hidden-1"); err != nil {
|
||||
t.Fatalf("hide hidden-1: %v", err)
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/shorts/next", strings.NewReader(`{"seenIds":["seen-1","hidden-1","deleted-stale"],"count":3}`))
|
||||
rr := httptest.NewRecorder()
|
||||
(&Server{Catalog: cat}).handleShortsNext(rr, req)
|
||||
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
var got struct {
|
||||
Items []ShortsItemDTO `json:"items"`
|
||||
Total int `json:"total"`
|
||||
RoundComplete bool `json:"roundComplete"`
|
||||
}
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
ids := make([]string, 0, len(got.Items))
|
||||
for _, item := range got.Items {
|
||||
ids = append(ids, item.ID)
|
||||
}
|
||||
if got.Total != 3 {
|
||||
t.Fatalf("total = %d, want 3", got.Total)
|
||||
}
|
||||
if !got.RoundComplete {
|
||||
t.Fatalf("roundComplete = false, want true after returning all unviewed visible videos")
|
||||
}
|
||||
if containsString(ids, "seen-1") || containsString(ids, "hidden-1") {
|
||||
t.Fatalf("ids = %#v, should not reset and return seen or hidden videos", ids)
|
||||
}
|
||||
for _, want := range []string{"fresh-1", "fresh-2"} {
|
||||
if !containsString(ids, want) {
|
||||
t.Fatalf("ids = %#v, want %s", ids, want)
|
||||
}
|
||||
}
|
||||
if len(ids) != 2 {
|
||||
t.Fatalf("ids = %#v, want exactly the two unviewed visible videos", ids)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleUpdateVideoTagsRejectsUnknownTags(t *testing.T) {
|
||||
@@ -957,6 +1319,37 @@ func sameStringSet(a, b []string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
type apiStreamFakeDrive struct {
|
||||
localPath string
|
||||
fileID string
|
||||
}
|
||||
|
||||
func (d *apiStreamFakeDrive) Kind() string { return "fake" }
|
||||
func (d *apiStreamFakeDrive) ID() string { return "drive-1" }
|
||||
func (d *apiStreamFakeDrive) Init(context.Context) error {
|
||||
return nil
|
||||
}
|
||||
func (d *apiStreamFakeDrive) List(context.Context, string) ([]drives.Entry, error) {
|
||||
return nil, drives.ErrNotSupported
|
||||
}
|
||||
func (d *apiStreamFakeDrive) Stat(context.Context, string) (*drives.Entry, error) {
|
||||
return nil, drives.ErrNotSupported
|
||||
}
|
||||
func (d *apiStreamFakeDrive) StreamURL(_ context.Context, fileID string) (*drives.StreamLink, error) {
|
||||
d.fileID = fileID
|
||||
return &drives.StreamLink{
|
||||
URL: d.localPath,
|
||||
Expires: time.Now().Add(time.Minute),
|
||||
}, nil
|
||||
}
|
||||
func (d *apiStreamFakeDrive) Upload(context.Context, string, string, io.Reader, int64) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
func (d *apiStreamFakeDrive) EnsureDir(context.Context, string) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
func (d *apiStreamFakeDrive) RootID() string { return "root" }
|
||||
|
||||
func requestWithVideoID(method, target, videoID string, body *strings.Reader) *http.Request {
|
||||
return requestWithRouteParam(method, target, "id", videoID, body)
|
||||
}
|
||||
|
||||
+877
-291
File diff suppressed because it is too large
Load Diff
@@ -58,10 +58,11 @@ func TestUpsertDriveDefaultsRootIDByKind(t *testing.T) {
|
||||
}{
|
||||
{id: "p115", kind: "p115", want: "0"},
|
||||
{id: "pikpak", kind: "pikpak", want: ""},
|
||||
{id: "guangyapan", kind: "guangyapan", want: ""},
|
||||
{id: "onedrive", kind: "onedrive", want: "root"},
|
||||
{id: "googledrive", kind: "googledrive", want: "root"},
|
||||
{id: "localstorage", kind: "localstorage", want: "/"},
|
||||
{id: "spider91", kind: "spider91", want: "/"},
|
||||
{id: "scriptcrawler", kind: "scriptcrawler", want: "/"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
if err := cat.UpsertDrive(ctx, &Drive{
|
||||
@@ -84,7 +85,7 @@ func TestUpsertDriveDefaultsRootIDByKind(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpsertDriveIgnoresRootIDForLocalStorageAndSpider91(t *testing.T) {
|
||||
func TestUpsertDriveIgnoresRootIDForLocalStorageAndScriptCrawler(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -101,7 +102,7 @@ func TestUpsertDriveIgnoresRootIDForLocalStorageAndSpider91(t *testing.T) {
|
||||
kind string
|
||||
}{
|
||||
{id: "localstorage", kind: "localstorage"},
|
||||
{id: "spider91", kind: "spider91"},
|
||||
{id: "scriptcrawler", kind: "scriptcrawler"},
|
||||
} {
|
||||
if err := cat.UpsertDrive(ctx, &Drive{
|
||||
ID: tc.id,
|
||||
|
||||
@@ -2,12 +2,13 @@ package catalog
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"sort"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestListVideoFileIDsByDrive 校验 spider91 crawler 用到的轻量 file_id 查询:
|
||||
// TestListVideoFileIDsByDrive 校验上传 worker 用到的轻量 file_id 查询:
|
||||
// - 只返回指定 drive 的 file_id;不返回其它 drive 的
|
||||
// - 跳过 file_id 为空的视频
|
||||
// - 返回顺序无要求,但每个 file_id 只出现一次
|
||||
@@ -32,20 +33,20 @@ func TestListVideoFileIDsByDrive(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
insert("spider91-A-vk001", "spider-a", "vk001.mp4")
|
||||
insert("spider91-A-vk002", "spider-a", "vk002.flv")
|
||||
insert("spider91-A-vk003", "spider-a", "vk003.mp4")
|
||||
insert("scriptcrawler-A-source001", "crawler-a", "source001.mp4")
|
||||
insert("scriptcrawler-A-source002", "crawler-a", "source002.flv")
|
||||
insert("scriptcrawler-A-source003", "crawler-a", "source003.mp4")
|
||||
// 不同 drive 的视频不应出现
|
||||
insert("quark-other-fid", "drive-quark", "abcdef")
|
||||
// 空 file_id 应被过滤
|
||||
insert("spider91-A-empty", "spider-a", "")
|
||||
insert("scriptcrawler-A-empty", "crawler-a", "")
|
||||
|
||||
got, err := cat.ListVideoFileIDsByDrive(ctx, "spider-a")
|
||||
got, err := cat.ListVideoFileIDsByDrive(ctx, "crawler-a")
|
||||
if err != nil {
|
||||
t.Fatalf("ListVideoFileIDsByDrive: %v", err)
|
||||
}
|
||||
sort.Strings(got)
|
||||
want := []string{"vk001.mp4", "vk002.flv", "vk003.mp4"}
|
||||
want := []string{"source001.mp4", "source002.flv", "source003.mp4"}
|
||||
sort.Strings(want)
|
||||
if len(got) != len(want) {
|
||||
t.Fatalf("got %d ids, want %d: got=%v", len(got), len(want), got)
|
||||
@@ -66,11 +67,11 @@ func TestListVideoFileIDsByDrive(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestListSpider91ViewkeysFindsMigratedVideos 校验:即使 spider91 视频
|
||||
// 被迁移到 PikPak(drive_id 改了),ListSpider91Viewkeys 仍能通过 video.id
|
||||
// 前缀找到这些 viewkey。这是 crawler 写 seen 文件的关键不变量,
|
||||
// 否则下一次爬取会把已爬过的 viewkey 当作"新"的再爬一遍。
|
||||
func TestListSpider91ViewkeysFindsMigratedVideos(t *testing.T) {
|
||||
// TestListCrawlerSourceIDsFindsMigratedVideos 校验:即使爬虫视频被上传迁移
|
||||
// 到目标网盘(drive_id 改了),ListCrawlerSourceIDs 仍能通过 video.id 前缀
|
||||
// 找到这些 source_id。这是 crawler 写 seen 文件的关键不变量,否则下一次
|
||||
// 爬取会把已爬过的 source_id 当作"新"的再爬一遍。
|
||||
func TestListCrawlerSourceIDsFindsMigratedVideos(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -91,25 +92,25 @@ func TestListSpider91ViewkeysFindsMigratedVideos(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// 1) 仍在 spider91 drive 下的视频(未迁移)
|
||||
insert("spider91-91Spider-vk001", "91Spider", "vk001.mp4")
|
||||
// 2) 已迁移到 PikPak 的视频:drive_id 变了,但 id 仍是 spider91-91Spider-...
|
||||
insert("spider91-91Spider-vk002", "PikPak", "PIKPAK-FILE-ID-2")
|
||||
insert("spider91-91Spider-vk003", "PikPak", "PIKPAK-FILE-ID-3")
|
||||
// 3) 别的 spider91 drive 的视频,不应混进来
|
||||
insert("spider91-OtherDrive-vk999", "OtherDrive", "vk999.mp4")
|
||||
// 1) 仍在本地爬虫 drive 下的视频(未上传)
|
||||
insert("scriptcrawler-crawler-a-source001", "crawler-a", "source001.mp4")
|
||||
// 2) 已上传到目标盘的视频:drive_id 变了,但 id 仍保留 crawler 来源前缀。
|
||||
insert("scriptcrawler-crawler-a-source002", "target-drive", "TARGET-FILE-ID-2")
|
||||
insert("scriptcrawler-crawler-a-source003", "target-drive", "TARGET-FILE-ID-3")
|
||||
// 3) 别的爬虫 drive 的视频,不应混进来
|
||||
insert("scriptcrawler-other-source999", "other-crawler", "source999.mp4")
|
||||
// 4) 完全无关的视频
|
||||
insert("quark-some-fid", "drive-quark", "abc")
|
||||
|
||||
got, err := cat.ListSpider91Viewkeys(ctx, "91Spider")
|
||||
got, err := cat.ListCrawlerSourceIDs(ctx, "scriptcrawler", "crawler-a")
|
||||
if err != nil {
|
||||
t.Fatalf("ListSpider91Viewkeys: %v", err)
|
||||
t.Fatalf("ListCrawlerSourceIDs: %v", err)
|
||||
}
|
||||
sort.Strings(got)
|
||||
want := []string{"vk001", "vk002", "vk003"}
|
||||
want := []string{"source001", "source002", "source003"}
|
||||
sort.Strings(want)
|
||||
if len(got) != len(want) {
|
||||
t.Fatalf("got %d viewkeys, want %d: got=%v", len(got), len(want), got)
|
||||
t.Fatalf("got %d source ids, want %d: got=%v", len(got), len(want), got)
|
||||
}
|
||||
for i := range got {
|
||||
if got[i] != want[i] {
|
||||
@@ -118,11 +119,58 @@ func TestListSpider91ViewkeysFindsMigratedVideos(t *testing.T) {
|
||||
}
|
||||
|
||||
// 不存在的 drive 返回空列表
|
||||
other, err := cat.ListSpider91Viewkeys(ctx, "no-such-drive")
|
||||
other, err := cat.ListCrawlerSourceIDs(ctx, "scriptcrawler", "no-such-drive")
|
||||
if err != nil {
|
||||
t.Fatalf("ListSpider91Viewkeys empty: %v", err)
|
||||
t.Fatalf("ListCrawlerSourceIDs empty: %v", err)
|
||||
}
|
||||
if len(other) != 0 {
|
||||
t.Fatalf("non-existent drive: got %v, want empty", other)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteVideoWithTombstonePreventsReimport(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: "scriptcrawler-crawler-a-source004",
|
||||
DriveID: "crawler-a",
|
||||
FileID: "source004.mp4",
|
||||
FileName: "source004.mp4",
|
||||
ContentHash: "ABCDEF",
|
||||
Title: "Deleted Source",
|
||||
Size: 2048,
|
||||
PreviewStatus: "ready",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert: %v", err)
|
||||
}
|
||||
|
||||
if err := cat.DeleteVideoWithTombstone(ctx, "scriptcrawler-crawler-a-source004"); err != nil {
|
||||
t.Fatalf("delete with tombstone: %v", err)
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, "scriptcrawler-crawler-a-source004"); err != sql.ErrNoRows {
|
||||
t.Fatalf("get deleted video error = %v, want sql.ErrNoRows", err)
|
||||
}
|
||||
deleted, err := cat.IsDeletedVideoCandidate(ctx, "scriptcrawler-crawler-a-source004", "crawler-a", "source004.mp4", "abcdef", "source004.mp4", 2048)
|
||||
if err != nil {
|
||||
t.Fatalf("check deleted candidate: %v", err)
|
||||
}
|
||||
if !deleted {
|
||||
t.Fatal("deleted candidate was not recognized")
|
||||
}
|
||||
sourceIDs, err := cat.ListCrawlerSourceIDs(ctx, "scriptcrawler", "crawler-a")
|
||||
if err != nil {
|
||||
t.Fatalf("ListCrawlerSourceIDs: %v", err)
|
||||
}
|
||||
if len(sourceIDs) != 1 || sourceIDs[0] != "source004" {
|
||||
t.Fatalf("source ids = %#v, want [source004]", sourceIDs)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
package catalog
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestListVideosKeywordMatchesFileName(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: "p115-115-sone-089-4k",
|
||||
DriveID: "drive",
|
||||
FileID: "file-sone-089-4k",
|
||||
FileName: "www.98T.la@sone-089-4k.mp4",
|
||||
Title: "www.98T.la@sone-089",
|
||||
Author: "4k",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed video: %v", err)
|
||||
}
|
||||
|
||||
items, total, err := cat.ListVideos(ctx, ListParams{
|
||||
Keyword: "www.98T.la@sone-089-4k.mp4",
|
||||
Page: 1,
|
||||
PageSize: 10,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("list videos: %v", err)
|
||||
}
|
||||
if total != 1 {
|
||||
t.Fatalf("total = %d, want 1", total)
|
||||
}
|
||||
if len(items) != 1 || items[0].ID != "p115-115-sone-089-4k" {
|
||||
t.Fatalf("items = %#v, want seeded video", items)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
package catalog
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestIncrementViewStoresLastViewedAt(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: "video-1",
|
||||
DriveID: "drive",
|
||||
FileID: "file-1",
|
||||
Title: "Video 1",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed video: %v", err)
|
||||
}
|
||||
|
||||
if _, err := cat.IncrementView(ctx, "video-1"); err != nil {
|
||||
t.Fatalf("increment view: %v", err)
|
||||
}
|
||||
got, err := cat.GetVideo(ctx, "video-1")
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.Views != 1 {
|
||||
t.Fatalf("views = %d, want 1", got.Views)
|
||||
}
|
||||
if got.LastViewedAt.IsZero() {
|
||||
t.Fatal("last viewed time was not stored")
|
||||
}
|
||||
}
|
||||
|
||||
func TestListVideosRecentSortUsesLastViewedAt(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*Video{
|
||||
{ID: "old-view", DriveID: "drive", FileID: "old-view", Title: "Old View", PublishedAt: now.Add(3 * time.Hour), CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "recent-view", DriveID: "drive", FileID: "recent-view", Title: "Recent View", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "unviewed", DriveID: "drive", FileID: "unviewed", Title: "Unviewed", PublishedAt: now.Add(4 * time.Hour), CreatedAt: now, UpdatedAt: now},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
if _, err := cat.db.ExecContext(ctx,
|
||||
`UPDATE videos SET last_viewed_at = CASE id
|
||||
WHEN 'old-view' THEN ?
|
||||
WHEN 'recent-view' THEN ?
|
||||
ELSE 0
|
||||
END`,
|
||||
now.Add(-time.Hour).UnixMilli(),
|
||||
now.Add(time.Hour).UnixMilli(),
|
||||
); err != nil {
|
||||
t.Fatalf("seed last_viewed_at: %v", err)
|
||||
}
|
||||
|
||||
items, _, err := cat.ListVideos(ctx, ListParams{Sort: "recent", Page: 1, PageSize: 3})
|
||||
if err != nil {
|
||||
t.Fatalf("list recent videos: %v", err)
|
||||
}
|
||||
if len(items) != 3 {
|
||||
t.Fatalf("items = %d, want 3", len(items))
|
||||
}
|
||||
got := []string{items[0].ID, items[1].ID, items[2].ID}
|
||||
want := []string{"recent-view", "old-view", "unviewed"}
|
||||
for i := range want {
|
||||
if got[i] != want[i] {
|
||||
t.Fatalf("recent order = %#v, want %#v", got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
package catalog
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestListVideosHidesMissingDriveVideosWhenDrivesExist(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
if err := cat.UpsertDrive(ctx, &Drive{
|
||||
ID: "active-drive",
|
||||
Kind: "pikpak",
|
||||
Name: "Active",
|
||||
RootID: "root",
|
||||
TeaserEnabled: true,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed drive: %v", err)
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*Video{
|
||||
{
|
||||
ID: "visible-video",
|
||||
DriveID: "active-drive",
|
||||
FileID: "visible-file",
|
||||
Title: "Visible",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
{
|
||||
ID: "orphan-video",
|
||||
DriveID: "deleted-drive",
|
||||
FileID: "orphan-file",
|
||||
Title: "Orphan",
|
||||
PublishedAt: now.Add(time.Second),
|
||||
CreatedAt: now.Add(time.Second),
|
||||
UpdatedAt: now.Add(time.Second),
|
||||
},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed video %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
items, total, err := cat.ListVideos(ctx, ListParams{Page: 1, PageSize: 10, Sort: "latest"})
|
||||
if err != nil {
|
||||
t.Fatalf("list videos: %v", err)
|
||||
}
|
||||
if total != 1 || len(items) != 1 || items[0].ID != "visible-video" {
|
||||
t.Fatalf("items total=%d items=%v, want only visible-video", total, items)
|
||||
}
|
||||
}
|
||||
@@ -19,15 +19,19 @@ CREATE TABLE IF NOT EXISTS videos (
|
||||
thumbnail_url TEXT,
|
||||
thumbnail_status TEXT DEFAULT 'pending', -- pending / ready / failed / skipped
|
||||
thumbnail_failures INTEGER DEFAULT 0, -- consecutive transient thumbnail generation failures
|
||||
preview_file_id TEXT, -- deprecated: 旧版回写网盘后的 teaser file id
|
||||
preview_local TEXT, -- 本地 teaser 路径(兜底)
|
||||
preview_status TEXT DEFAULT 'pending', -- pending / ready / failed
|
||||
preview_file_id TEXT, -- deprecated: 旧版回写网盘后的预览视频 file id
|
||||
preview_local TEXT, -- 本地预览视频路径(兜底)
|
||||
preview_status TEXT DEFAULT 'pending', -- pending / ready / failed / disabled
|
||||
transcode_status TEXT DEFAULT '', -- '' / pending / ready / skipped / failed(浏览器兼容性转码)
|
||||
transcode_error TEXT DEFAULT '',
|
||||
transcoded_file_id TEXT DEFAULT '', -- 转码产物在同一 drive 上的 fileID,播放源优先用它
|
||||
transcoded_size INTEGER DEFAULT 0,
|
||||
views INTEGER DEFAULT 0,
|
||||
last_viewed_at INTEGER DEFAULT 0,
|
||||
favorites INTEGER DEFAULT 0,
|
||||
comments INTEGER DEFAULT 0,
|
||||
likes INTEGER DEFAULT 0,
|
||||
dislikes INTEGER DEFAULT 0,
|
||||
category TEXT,
|
||||
hidden INTEGER DEFAULT 0, -- 1 = hidden from public display
|
||||
tags_manual INTEGER DEFAULT 0, -- 1 = user explicitly curated tags
|
||||
badges TEXT, -- JSON array
|
||||
@@ -70,17 +74,55 @@ CREATE TABLE IF NOT EXISTS deleted_tags (
|
||||
deleted_at INTEGER NOT NULL
|
||||
);
|
||||
|
||||
-- 管理员显式删除过的视频。用于防止后续扫描 / 爬虫把同一个源文件
|
||||
-- 再次入库;不代表原始云盘文件已被删除。
|
||||
CREATE TABLE IF NOT EXISTS deleted_videos (
|
||||
id TEXT PRIMARY KEY,
|
||||
drive_id TEXT NOT NULL DEFAULT '',
|
||||
file_id TEXT NOT NULL DEFAULT '',
|
||||
content_hash TEXT NOT NULL DEFAULT '',
|
||||
file_name TEXT NOT NULL DEFAULT '',
|
||||
size_bytes INTEGER NOT NULL DEFAULT 0,
|
||||
reason TEXT NOT NULL DEFAULT '',
|
||||
deleted_at INTEGER NOT NULL
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_deleted_videos_drive_file
|
||||
ON deleted_videos(drive_id, file_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_deleted_videos_drive_hash
|
||||
ON deleted_videos(drive_id, content_hash);
|
||||
CREATE INDEX IF NOT EXISTS idx_deleted_videos_drive_signature
|
||||
ON deleted_videos(drive_id, file_name, size_bytes);
|
||||
|
||||
-- 爬虫来源记录。用于把已确认重复的 source_id 写回 seen 列表,
|
||||
-- 避免后续爬虫反复下载同一个候选视频。
|
||||
CREATE TABLE IF NOT EXISTS crawler_seen_sources (
|
||||
kind TEXT NOT NULL,
|
||||
drive_id TEXT NOT NULL,
|
||||
source_id TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'imported', -- imported / duplicate
|
||||
canonical_video_id TEXT NOT NULL DEFAULT '',
|
||||
sampled_sha256 TEXT NOT NULL DEFAULT '',
|
||||
size_bytes INTEGER NOT NULL DEFAULT 0,
|
||||
first_seen_at INTEGER NOT NULL,
|
||||
last_seen_at INTEGER NOT NULL,
|
||||
PRIMARY KEY (kind, drive_id, source_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_crawler_seen_sources_drive
|
||||
ON crawler_seen_sources(kind, drive_id, status);
|
||||
|
||||
-- 网盘账户
|
||||
CREATE TABLE IF NOT EXISTS drives (
|
||||
id TEXT PRIMARY KEY,
|
||||
kind TEXT NOT NULL, -- quark / p115 / pikpak / wopan / onedrive / googledrive / localstorage / spider91
|
||||
kind TEXT NOT NULL, -- quark / p115 / p123 / pikpak / wopan / guangyapan / onedrive / googledrive / localstorage / scriptcrawler
|
||||
name TEXT NOT NULL,
|
||||
root_id TEXT NOT NULL DEFAULT '0',
|
||||
scan_root_id TEXT, -- deprecated: 扫描起点固定等于 root_id
|
||||
credentials TEXT, -- JSON: cookie / refresh_token 等
|
||||
status TEXT DEFAULT 'disconnected', -- disconnected / ok / error
|
||||
last_error TEXT,
|
||||
-- 是否给该盘生成 teaser/封面:1 开 / 0 关。
|
||||
-- 是否给该盘生成预览视频:1 开 / 0 关。封面生成不受影响。
|
||||
-- 替代了早期的全局 preview.enabled 设置(保留旧 setting 行不再读)。
|
||||
teaser_enabled INTEGER NOT NULL DEFAULT 1,
|
||||
-- 扫描时要跳过的目录 ID 集合(JSON array of string)。命中其中任意一个的目录及其
|
||||
|
||||
@@ -165,171 +165,3 @@ func TestRandomVideosWithReadyThumbnailsExcluding(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRandomVideosForPreferredVideoChoosesLeastPopulatedTag(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*Video{
|
||||
{ID: "current", DriveID: "drive", FileID: "f-current", Title: "current", Tags: []string{"common", "rare"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "common-1", DriveID: "drive", FileID: "f-common-1", Title: "common 1", Tags: []string{"common"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "common-2", DriveID: "drive", FileID: "f-common-2", Title: "common 2", Tags: []string{"common"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "rare-1", DriveID: "drive", FileID: "f-rare-1", Title: "rare 1", Tags: []string{"rare"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
tag, err := cat.LeastPopulatedVisibleUniqueTag(ctx, []string{"common", "rare"})
|
||||
if err != nil {
|
||||
t.Fatalf("least populated tag: %v", err)
|
||||
}
|
||||
if tag != "rare" {
|
||||
t.Fatalf("least populated tag = %q, want rare", tag)
|
||||
}
|
||||
|
||||
got, err := cat.RandomVideosForPreferredVideoExcluding(ctx, "current", []string{"current"}, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("random preferred: %v", err)
|
||||
}
|
||||
if len(got) != 1 || got[0].ID != "rare-1" {
|
||||
t.Fatalf("preferred result = %#v, want rare-1", videoIDs(got))
|
||||
}
|
||||
|
||||
got, err = cat.RandomVideosForPreferredVideoExcluding(ctx, "current", nil, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("random preferred without explicit exclude: %v", err)
|
||||
}
|
||||
if len(got) != 1 || got[0].ID == "current" {
|
||||
t.Fatalf("preferred result without explicit exclude = %#v, should not return current", videoIDs(got))
|
||||
}
|
||||
}
|
||||
|
||||
func TestRandomVideosForPreferredVideoFallsBackToFillBatch(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*Video{
|
||||
{ID: "current", DriveID: "drive", FileID: "f-current", Title: "current", Tags: []string{"common", "rare"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "common-1", DriveID: "drive", FileID: "f-common-1", Title: "common 1", Tags: []string{"common"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "common-2", DriveID: "drive", FileID: "f-common-2", Title: "common 2", Tags: []string{"common"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "rare-1", DriveID: "drive", FileID: "f-rare-1", Title: "rare 1", Tags: []string{"rare"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "hidden-rare", DriveID: "drive", FileID: "f-hidden-rare", Title: "hidden rare", Tags: []string{"rare"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
if err := cat.HideVideo(ctx, "hidden-rare"); err != nil {
|
||||
t.Fatalf("hide hidden-rare: %v", err)
|
||||
}
|
||||
|
||||
got, err := cat.RandomVideosForPreferredVideoExcluding(ctx, "current", []string{"current"}, 3)
|
||||
if err != nil {
|
||||
t.Fatalf("random preferred: %v", err)
|
||||
}
|
||||
ids := videoIDs(got)
|
||||
if len(ids) != 3 {
|
||||
t.Fatalf("result ids = %#v, want 3 items", ids)
|
||||
}
|
||||
for _, excluded := range []string{"current", "hidden-rare"} {
|
||||
if hasVideoID(ids, excluded) {
|
||||
t.Fatalf("result ids = %#v, should not include %s", ids, excluded)
|
||||
}
|
||||
}
|
||||
if !hasVideoID(ids, "rare-1") {
|
||||
t.Fatalf("result ids = %#v, want rare-1 from least populated tag", ids)
|
||||
}
|
||||
if len(uniqueVideoIDs(ids)) != len(ids) {
|
||||
t.Fatalf("result ids = %#v, want no duplicates", ids)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRandomVideosForPreferredVideoFallbacksWhenPreferenceUnavailable(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*Video{
|
||||
{ID: "untagged", DriveID: "drive", FileID: "f-untagged", Title: "untagged", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "visible-1", DriveID: "drive", FileID: "f-visible-1", Title: "visible 1", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "visible-2", DriveID: "drive", FileID: "f-visible-2", Title: "visible 2", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
got, err := cat.RandomVideosForPreferredVideoExcluding(ctx, "missing", []string{"untagged"}, 2)
|
||||
if err != nil {
|
||||
t.Fatalf("random missing preferred: %v", err)
|
||||
}
|
||||
if !sameVideoIDSet(videoIDs(got), []string{"visible-1", "visible-2"}) {
|
||||
t.Fatalf("missing preferred ids = %#v, want visible fallback videos", videoIDs(got))
|
||||
}
|
||||
|
||||
got, err = cat.RandomVideosForPreferredVideoExcluding(ctx, "untagged", []string{"untagged"}, 2)
|
||||
if err != nil {
|
||||
t.Fatalf("random untagged preferred: %v", err)
|
||||
}
|
||||
if !sameVideoIDSet(videoIDs(got), []string{"visible-1", "visible-2"}) {
|
||||
t.Fatalf("untagged preferred ids = %#v, want visible fallback videos", videoIDs(got))
|
||||
}
|
||||
}
|
||||
|
||||
func videoIDs(videos []*Video) []string {
|
||||
ids := make([]string, 0, len(videos))
|
||||
for _, v := range videos {
|
||||
ids = append(ids, v.ID)
|
||||
}
|
||||
return ids
|
||||
}
|
||||
|
||||
func hasVideoID(ids []string, want string) bool {
|
||||
for _, id := range ids {
|
||||
if id == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func uniqueVideoIDs(ids []string) map[string]struct{} {
|
||||
seen := make(map[string]struct{}, len(ids))
|
||||
for _, id := range ids {
|
||||
seen[id] = struct{}{}
|
||||
}
|
||||
return seen
|
||||
}
|
||||
|
||||
func sameVideoIDSet(a, b []string) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
seen := make(map[string]int, len(a))
|
||||
for _, value := range a {
|
||||
seen[value]++
|
||||
}
|
||||
for _, value := range b {
|
||||
if seen[value] == 0 {
|
||||
return false
|
||||
}
|
||||
seen[value]--
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
+338
-199
@@ -66,10 +66,34 @@ func (c *Catalog) migrate(ctx context.Context) error {
|
||||
if err := c.addColumnIfMissing(ctx, "videos", "thumbnail_failures", "INTEGER DEFAULT 0"); err != nil {
|
||||
return err
|
||||
}
|
||||
// drives.teaser_enabled:每盘 teaser 开关,替代旧的全局 preview.enabled。
|
||||
if err := c.addColumnIfMissing(ctx, "videos", "last_viewed_at", "INTEGER DEFAULT 0"); err != nil {
|
||||
return err
|
||||
}
|
||||
// videos.transcode_*:浏览器兼容性转码状态。
|
||||
// status:''=未检测 / pending=已入队 / ready=已转码 / skipped=检测后无需转码 / failed=失败。
|
||||
// transcoded_file_id 指向转码产物在同一 drive 上的 fileID,播放源优先使用它。
|
||||
if err := c.addColumnIfMissing(ctx, "videos", "transcode_status", "TEXT DEFAULT ''"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addColumnIfMissing(ctx, "videos", "transcode_error", "TEXT DEFAULT ''"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addColumnIfMissing(ctx, "videos", "transcoded_file_id", "TEXT DEFAULT ''"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addColumnIfMissing(ctx, "videos", "transcoded_size", "INTEGER DEFAULT 0"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.dropColumnIfExists(ctx, "videos", "category"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.ensureBaseVideoIndexes(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
// drives.teaser_enabled:每盘预览视频开关,替代旧的全局 preview.enabled。
|
||||
// 升级路径:直接让 ALTER TABLE 的 DEFAULT 1 兜底 —— 每个现存 drive 都默认开启,
|
||||
// 不读旧的 settings.preview.enabled 字段。这样老用户即便之前关过全局开关,
|
||||
// 升级后所有盘也都恢复"默认生成 teaser",跟新建保持一致。
|
||||
// 升级后所有盘也都恢复"默认生成预览视频",跟新建保持一致。
|
||||
if _, err := c.addColumnIfMissingReportNew(ctx, "drives", "teaser_enabled", "INTEGER NOT NULL DEFAULT 1"); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -79,6 +103,22 @@ func (c *Catalog) migrate(ctx context.Context) error {
|
||||
if err := c.addColumnIfMissing(ctx, "drives", "skip_dir_ids", "TEXT NOT NULL DEFAULT '[]'"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `
|
||||
CREATE TABLE IF NOT EXISTS deleted_videos (
|
||||
id TEXT PRIMARY KEY,
|
||||
drive_id TEXT NOT NULL DEFAULT '',
|
||||
file_id TEXT NOT NULL DEFAULT '',
|
||||
content_hash TEXT NOT NULL DEFAULT '',
|
||||
file_name TEXT NOT NULL DEFAULT '',
|
||||
size_bytes INTEGER NOT NULL DEFAULT 0,
|
||||
reason TEXT NOT NULL DEFAULT '',
|
||||
deleted_at INTEGER NOT NULL
|
||||
)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addColumnIfMissing(ctx, "deleted_videos", "reason", "TEXT NOT NULL DEFAULT ''"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.syncDriveScanRootIDToRootID(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -97,18 +137,45 @@ func (c *Catalog) migrate(ctx context.Context) error {
|
||||
if err := c.reconcileThumbnailStatusOnce(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.requeueSkippedPreviews(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_videos_content_hash ON videos(content_hash)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_videos_content_hash_created ON videos(content_hash, created_at, id)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_videos_sampled_sha256 ON videos(size_bytes, sampled_sha256)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_videos_sampled_sha256_created ON videos(size_bytes, sampled_sha256, created_at, id)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_videos_hidden ON videos(hidden)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_videos_visible_pub ON videos(COALESCE(hidden, 0), published_at DESC)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_videos_last_viewed ON videos(last_viewed_at DESC)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_videos_file_name_size ON videos(file_name, size_bytes)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_videos_file_name_size_created ON videos(file_name, size_bytes, created_at, id)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_deleted_videos_drive_file ON deleted_videos(drive_id, file_id)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_deleted_videos_drive_hash ON deleted_videos(drive_id, content_hash)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_deleted_videos_drive_signature ON deleted_videos(drive_id, file_name, size_bytes)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.seedSystemTags(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -118,15 +185,18 @@ func (c *Catalog) migrate(ctx context.Context) error {
|
||||
if err := c.collapseAVCodeTags(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.createCollectionTagsFromCategories(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.classifySystemTags(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.clearVolatileOneDriveThumbnails(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.clearRemoteP123ThumbnailsOnce(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.clearRemoteThumbnails(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.hideZeroSizeVideosFromKnownDrives(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -141,6 +211,172 @@ func (c *Catalog) addColumnIfMissing(ctx context.Context, table, column, definit
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Catalog) dropColumnIfExists(ctx context.Context, table, column string) error {
|
||||
rows, err := c.db.QueryContext(ctx, `PRAGMA table_info(`+table+`)`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer rows.Close()
|
||||
found := false
|
||||
for rows.Next() {
|
||||
var cid int
|
||||
var name, typ string
|
||||
var notNull int
|
||||
var defaultValue any
|
||||
var pk int
|
||||
if err := rows.Scan(&cid, &name, &typ, ¬Null, &defaultValue, &pk); err != nil {
|
||||
return err
|
||||
}
|
||||
if strings.EqualFold(name, column) {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
_ = rows.Close()
|
||||
return err
|
||||
}
|
||||
if err := rows.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
if !found {
|
||||
return nil
|
||||
}
|
||||
if _, err = c.db.ExecContext(ctx, `ALTER TABLE `+table+` DROP COLUMN `+column); err == nil {
|
||||
return nil
|
||||
}
|
||||
if table == "videos" && strings.EqualFold(column, "category") {
|
||||
log.Printf("[catalog] native drop column videos.category failed, rebuilding videos table without category: %v", err)
|
||||
return c.rebuildVideosTableWithoutCategory(ctx)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Catalog) ensureBaseVideoIndexes(ctx context.Context) error {
|
||||
for _, stmt := range []string{
|
||||
`CREATE INDEX IF NOT EXISTS idx_videos_drive ON videos(drive_id, file_id)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_videos_pub ON videos(published_at DESC)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_videos_views ON videos(views DESC)`,
|
||||
} {
|
||||
if _, err := c.db.ExecContext(ctx, stmt); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var currentVideoColumnNames = []string{
|
||||
"id",
|
||||
"drive_id",
|
||||
"file_id",
|
||||
"file_name",
|
||||
"content_hash",
|
||||
"sampled_sha256",
|
||||
"fingerprint_status",
|
||||
"fingerprint_error",
|
||||
"parent_id",
|
||||
"title",
|
||||
"author",
|
||||
"tags",
|
||||
"duration_seconds",
|
||||
"size_bytes",
|
||||
"ext",
|
||||
"quality",
|
||||
"thumbnail_url",
|
||||
"thumbnail_status",
|
||||
"thumbnail_failures",
|
||||
"preview_file_id",
|
||||
"preview_local",
|
||||
"preview_status",
|
||||
"transcode_status",
|
||||
"transcode_error",
|
||||
"transcoded_file_id",
|
||||
"transcoded_size",
|
||||
"views",
|
||||
"last_viewed_at",
|
||||
"favorites",
|
||||
"comments",
|
||||
"likes",
|
||||
"dislikes",
|
||||
"hidden",
|
||||
"tags_manual",
|
||||
"badges",
|
||||
"description",
|
||||
"published_at",
|
||||
"created_at",
|
||||
"updated_at",
|
||||
}
|
||||
|
||||
const createVideosWithoutCategorySQL = `
|
||||
CREATE TABLE videos_category_drop_new (
|
||||
id TEXT PRIMARY KEY,
|
||||
drive_id TEXT NOT NULL,
|
||||
file_id TEXT NOT NULL,
|
||||
file_name TEXT DEFAULT '',
|
||||
content_hash TEXT DEFAULT '',
|
||||
sampled_sha256 TEXT DEFAULT '',
|
||||
fingerprint_status TEXT DEFAULT 'pending',
|
||||
fingerprint_error TEXT DEFAULT '',
|
||||
parent_id TEXT,
|
||||
title TEXT NOT NULL,
|
||||
author TEXT,
|
||||
tags TEXT,
|
||||
duration_seconds INTEGER DEFAULT 0,
|
||||
size_bytes INTEGER DEFAULT 0,
|
||||
ext TEXT,
|
||||
quality TEXT,
|
||||
thumbnail_url TEXT,
|
||||
thumbnail_status TEXT DEFAULT 'pending',
|
||||
thumbnail_failures INTEGER DEFAULT 0,
|
||||
preview_file_id TEXT,
|
||||
preview_local TEXT,
|
||||
preview_status TEXT DEFAULT 'pending',
|
||||
transcode_status TEXT DEFAULT '',
|
||||
transcode_error TEXT DEFAULT '',
|
||||
transcoded_file_id TEXT DEFAULT '',
|
||||
transcoded_size INTEGER DEFAULT 0,
|
||||
views INTEGER DEFAULT 0,
|
||||
last_viewed_at INTEGER DEFAULT 0,
|
||||
favorites INTEGER DEFAULT 0,
|
||||
comments INTEGER DEFAULT 0,
|
||||
likes INTEGER DEFAULT 0,
|
||||
dislikes INTEGER DEFAULT 0,
|
||||
hidden INTEGER DEFAULT 0,
|
||||
tags_manual INTEGER DEFAULT 0,
|
||||
badges TEXT,
|
||||
description TEXT,
|
||||
published_at INTEGER NOT NULL,
|
||||
created_at INTEGER NOT NULL,
|
||||
updated_at INTEGER NOT NULL
|
||||
)`
|
||||
|
||||
func (c *Catalog) rebuildVideosTableWithoutCategory(ctx context.Context) error {
|
||||
tx, err := c.db.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
if _, err := tx.ExecContext(ctx, `DROP TABLE IF EXISTS videos_category_drop_new`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := tx.ExecContext(ctx, createVideosWithoutCategorySQL); err != nil {
|
||||
return err
|
||||
}
|
||||
cols := strings.Join(currentVideoColumnNames, ", ")
|
||||
if _, err := tx.ExecContext(ctx,
|
||||
`INSERT INTO videos_category_drop_new (`+cols+`) SELECT `+cols+` FROM videos`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := tx.ExecContext(ctx, `DROP TABLE videos`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := tx.ExecContext(ctx, `ALTER TABLE videos_category_drop_new RENAME TO videos`); err != nil {
|
||||
return err
|
||||
}
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
// addColumnIfMissingReportNew 与 addColumnIfMissing 同步,但额外返回 added=true 表示
|
||||
// 本次确实创建了新列(即旧 schema 缺这列),方便调用方仅在迁移路径里补做一次性
|
||||
// 数据初始化(如把全局 setting 同步到新 per-drive 字段)。
|
||||
@@ -175,7 +411,7 @@ func (c *Catalog) addColumnIfMissingReportNew(ctx context.Context, table, column
|
||||
// 设为 1(开启),但仅在历史上没跑过这条迁移时执行(用 marker setting 记号)。
|
||||
//
|
||||
// 为什么需要:早期短暂存在过的版本会从旧的全局 preview.enabled = "0" 同步到
|
||||
// 所有 drive 的 teaser_enabled = 0;用户报告升级后页面全显示"Teaser 关"。新版
|
||||
// 所有 drive 的 teaser_enabled = 0;用户报告升级后页面全显示"预览视频关"。新版
|
||||
// 约定 per-drive 默认开启,所以这里跑一次性修正。
|
||||
//
|
||||
// 幂等保证:marker setting 设过了就不再跑,确保用户在 UI 里把某盘关了不会被
|
||||
@@ -242,6 +478,24 @@ UPDATE videos
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) requeueSkippedPreviews(ctx context.Context) error {
|
||||
res, err := c.db.ExecContext(ctx, `
|
||||
UPDATE videos
|
||||
SET preview_file_id = '',
|
||||
preview_local = '',
|
||||
preview_status = 'pending',
|
||||
updated_at = ?
|
||||
WHERE COALESCE(preview_status, 'pending') = 'skipped'
|
||||
`, time.Now().UnixMilli())
|
||||
if err != nil {
|
||||
return fmt.Errorf("requeue skipped previews: %w", err)
|
||||
}
|
||||
if affected, err := res.RowsAffected(); err == nil && affected > 0 {
|
||||
log.Printf("[catalog] requeued %d skipped preview(s) for generation", affected)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) clearVolatileOneDriveThumbnails(ctx context.Context) error {
|
||||
// 把 OneDrive 过期的 mediap.svc.ms thumb URL 清空,让 worker 重新抽帧生成本地封面。
|
||||
// 同步把 thumbnail_status 重置为 'pending':清空后 url 是空的,本应进 worker 重做,
|
||||
@@ -257,6 +511,78 @@ UPDATE videos
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Catalog) clearRemoteP123ThumbnailsOnce(ctx context.Context) error {
|
||||
// 123网盘列表返回的缩略图尺寸和稳定性都不适合作为站内封面;清空历史写入的
|
||||
// 远程 URL,让封面 worker 统一从视频直链抽帧生成本地 /p/thumb/<id>。
|
||||
const markerKey = "videos.p123.remote_thumbnails_cleared"
|
||||
marker, err := c.GetSetting(ctx, markerKey, "")
|
||||
if err != nil {
|
||||
return fmt.Errorf("read %s marker: %w", markerKey, err)
|
||||
}
|
||||
if strings.TrimSpace(marker) == "1" {
|
||||
return nil
|
||||
}
|
||||
|
||||
var p123Drives int
|
||||
if err := c.db.QueryRowContext(ctx, `SELECT COUNT(*) FROM drives WHERE kind = 'p123'`).Scan(&p123Drives); err != nil {
|
||||
return fmt.Errorf("count p123 drives: %w", err)
|
||||
}
|
||||
if p123Drives == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
res, err := c.db.ExecContext(ctx, `
|
||||
UPDATE videos
|
||||
SET thumbnail_url = '',
|
||||
thumbnail_status = 'pending',
|
||||
thumbnail_failures = 0,
|
||||
updated_at = ?
|
||||
WHERE EXISTS (
|
||||
SELECT 1
|
||||
FROM drives
|
||||
WHERE drives.id = videos.drive_id
|
||||
AND drives.kind = 'p123'
|
||||
)
|
||||
AND (
|
||||
lower(COALESCE(thumbnail_url, '')) LIKE 'http://%'
|
||||
OR lower(COALESCE(thumbnail_url, '')) LIKE 'https://%'
|
||||
)
|
||||
`, time.Now().UnixMilli())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if affected, err := res.RowsAffected(); err == nil && affected > 0 {
|
||||
log.Printf("[catalog] cleared %d remote 123pan thumbnail(s) for local regeneration", affected)
|
||||
}
|
||||
if err := c.SetSetting(ctx, markerKey, "1"); err != nil {
|
||||
return fmt.Errorf("write %s marker: %w", markerKey, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) clearRemoteThumbnails(ctx context.Context) error {
|
||||
// 不再使用网盘侧返回的远程缩略图。清空历史 http/https thumbnail_url 后,
|
||||
// 封面 worker 会重新从视频中间帧生成本地 /p/thumb/<id>。
|
||||
res, err := c.db.ExecContext(ctx, `
|
||||
UPDATE videos
|
||||
SET thumbnail_url = '',
|
||||
thumbnail_status = 'pending',
|
||||
thumbnail_failures = 0,
|
||||
updated_at = ?
|
||||
WHERE (
|
||||
lower(COALESCE(thumbnail_url, '')) LIKE 'http://%'
|
||||
OR lower(COALESCE(thumbnail_url, '')) LIKE 'https://%'
|
||||
)
|
||||
`, time.Now().UnixMilli())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if affected, err := res.RowsAffected(); err == nil && affected > 0 {
|
||||
log.Printf("[catalog] cleared %d remote thumbnail(s) for local regeneration", affected)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) hideZeroSizeVideosFromKnownDrives(ctx context.Context) error {
|
||||
_, err := c.db.ExecContext(ctx, `
|
||||
UPDATE videos
|
||||
@@ -340,61 +666,6 @@ WHERE COALESCE(tags, '') NOT IN ('', '[]', 'null')
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) createCollectionTagsFromCategories(ctx context.Context) error {
|
||||
rows, err := c.db.QueryContext(ctx, `
|
||||
SELECT category, COUNT(*) FROM videos
|
||||
WHERE COALESCE(category, '') != ''
|
||||
GROUP BY category`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
type categoryStat struct {
|
||||
category string
|
||||
count int
|
||||
}
|
||||
var categories []categoryStat
|
||||
for rows.Next() {
|
||||
var stat categoryStat
|
||||
if err := rows.Scan(&stat.category, &stat.count); err != nil {
|
||||
return err
|
||||
}
|
||||
categories = append(categories, stat)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := rows.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, stat := range categories {
|
||||
if isAVCodePollutedLabel(stat.category) {
|
||||
if _, err := c.ensureTag(ctx, avTagLabel, fixedtags.AliasesFor(avTagLabel), "system"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addTagToVideosByCategory(ctx, stat.category, avTagLabel, "auto"); err != nil {
|
||||
return err
|
||||
}
|
||||
continue
|
||||
}
|
||||
if stat.count < 3 {
|
||||
continue
|
||||
}
|
||||
if !LooksLikeCollectionTag(stat.category) {
|
||||
continue
|
||||
}
|
||||
if c.tagDeleted(ctx, stat.category) {
|
||||
continue
|
||||
}
|
||||
if _, err := c.ensureTag(ctx, stat.category, nil, "collection"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addCollectionTagToVideos(ctx, stat.category); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) CreateTagAndClassify(ctx context.Context, label string, aliases []string, source string) (int, error) {
|
||||
tag, err := c.ensureTag(ctx, label, aliases, source)
|
||||
if err != nil {
|
||||
@@ -688,41 +959,6 @@ func (c *Catalog) MatchTags(ctx context.Context, text string) ([]string, error)
|
||||
return sortLabelsByTagOrder(tags, uniqueStrings(out)), nil
|
||||
}
|
||||
|
||||
func (c *Catalog) EnsureCollectionTag(ctx context.Context, label string) (string, bool, error) {
|
||||
label = cleanTagLabel(label)
|
||||
if isAVCodePollutedLabel(label) {
|
||||
if _, err := c.ensureTag(ctx, avTagLabel, fixedtags.AliasesFor(avTagLabel), "system"); err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
if err := c.addTagToVideosByCategory(ctx, label, avTagLabel, "auto"); err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
return avTagLabel, true, nil
|
||||
}
|
||||
if !LooksLikeCollectionTag(label) {
|
||||
return "", false, nil
|
||||
}
|
||||
if c.tagDeleted(ctx, label) {
|
||||
return "", false, nil
|
||||
}
|
||||
if !c.tagExists(ctx, label) {
|
||||
count, err := c.categoryVideoCount(ctx, label)
|
||||
if err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
if count < 2 {
|
||||
return "", false, nil
|
||||
}
|
||||
}
|
||||
if _, err := c.ensureTag(ctx, label, nil, "collection"); err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
if err := c.addCollectionTagToVideos(ctx, label); err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
return label, true, nil
|
||||
}
|
||||
|
||||
func (c *Catalog) ensureTag(ctx context.Context, label string, aliases []string, source string) (Tag, error) {
|
||||
label = cleanTagLabel(label)
|
||||
if label == "" {
|
||||
@@ -775,7 +1011,7 @@ func (c *Catalog) classifyTag(ctx context.Context, tag Tag) (int, error) {
|
||||
return 0, err
|
||||
}
|
||||
rows, err := c.db.QueryContext(ctx, `
|
||||
SELECT id, title, COALESCE(author, ''), COALESCE(category, ''), COALESCE(tags_manual, 0)
|
||||
SELECT id, title, COALESCE(author, ''), COALESCE(tags_manual, 0)
|
||||
FROM videos`)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
@@ -784,15 +1020,15 @@ FROM videos`)
|
||||
|
||||
classified := 0
|
||||
for rows.Next() {
|
||||
var videoID, title, author, category string
|
||||
var videoID, title, author string
|
||||
var manual int
|
||||
if err := rows.Scan(&videoID, &title, &author, &category, &manual); err != nil {
|
||||
if err := rows.Scan(&videoID, &title, &author, &manual); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if manual == 1 {
|
||||
continue
|
||||
}
|
||||
matcher := normalizeTagText(title + " " + author + " " + category)
|
||||
matcher := normalizeTagText(title + " " + author)
|
||||
if !matcher.contains(tag.Label) {
|
||||
matchedAlias := false
|
||||
for _, alias := range tag.Aliases {
|
||||
@@ -924,54 +1160,6 @@ func (c *Catalog) insertVideoTag(ctx context.Context, videoID string, tagID int6
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Catalog) addCollectionTagToVideos(ctx context.Context, category string) error {
|
||||
return c.addTagToVideosByCategory(ctx, category, category, "auto")
|
||||
}
|
||||
|
||||
func (c *Catalog) addTagToVideosByCategory(ctx context.Context, category, label, source string) error {
|
||||
tag, err := c.getTagByLabel(ctx, label)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
rows, err := c.db.QueryContext(ctx, `
|
||||
SELECT v.id
|
||||
FROM videos v
|
||||
WHERE v.category = ?
|
||||
AND COALESCE(v.tags_manual, 0) = 0
|
||||
AND NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM video_tags vt
|
||||
WHERE vt.video_id = v.id
|
||||
AND vt.tag_id = ?
|
||||
)`, category, tag.ID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var videoIDs []string
|
||||
for rows.Next() {
|
||||
var videoID string
|
||||
if err := rows.Scan(&videoID); err != nil {
|
||||
return err
|
||||
}
|
||||
videoIDs = append(videoIDs, videoID)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := rows.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, videoID := range videoIDs {
|
||||
if err := c.insertVideoTag(ctx, videoID, tag.ID, source); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.syncVideoTagsJSON(ctx, videoID, false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) collapseAVCodeTags(ctx context.Context) error {
|
||||
if _, err := c.ensureTag(ctx, avTagLabel, fixedtags.AliasesFor(avTagLabel), "system"); err != nil {
|
||||
return err
|
||||
@@ -1161,12 +1349,6 @@ func (c *Catalog) restoreDeletedTag(ctx context.Context, label string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Catalog) categoryVideoCount(ctx context.Context, category string) (int, error) {
|
||||
var count int
|
||||
err := c.db.QueryRowContext(ctx, `SELECT COUNT(*) FROM videos WHERE category = ?`, category).Scan(&count)
|
||||
return count, err
|
||||
}
|
||||
|
||||
func (c *Catalog) getTagByLabelTx(ctx context.Context, tx *sql.Tx, label string) (Tag, error) {
|
||||
row := tx.QueryRowContext(ctx,
|
||||
`SELECT id, label, aliases, source, 0 FROM tags WHERE label = ? COLLATE NOCASE`,
|
||||
@@ -1316,46 +1498,6 @@ func isShortASCIIWord(s string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func LooksLikeCollectionTag(label string) bool {
|
||||
label = cleanTagLabel(label)
|
||||
if label == "" {
|
||||
return false
|
||||
}
|
||||
if isAVCodePollutedLabel(label) {
|
||||
return false
|
||||
}
|
||||
runes := []rune(label)
|
||||
if len(runes) < 2 || len(runes) > 24 {
|
||||
return false
|
||||
}
|
||||
lower := strings.ToLower(label)
|
||||
blocked := map[string]bool{
|
||||
"v": true, "pv": true, "my pack": true, "my upload": true,
|
||||
"视频": true, "视频1": true, "第一直播": true, "男人必备": true,
|
||||
"瑟女聚集地": true, "成人色游": true, "ai女友": true,
|
||||
}
|
||||
if blocked[lower] {
|
||||
return false
|
||||
}
|
||||
hasLetter := false
|
||||
for _, r := range label {
|
||||
if unicode.IsLetter(r) {
|
||||
hasLetter = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasLetter {
|
||||
return false
|
||||
}
|
||||
for _, r := range label {
|
||||
switch r {
|
||||
case ',', '。', '!', '?', ';', '、', ':', '~', '~':
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func IsAVCode(label string) bool {
|
||||
label = cleanTagLabel(label)
|
||||
if label == "" {
|
||||
@@ -1437,9 +1579,7 @@ func sortLabelsByTagOrder(tags []Tag, labels []string) []string {
|
||||
return labels
|
||||
}
|
||||
|
||||
// pruneOrphanCollectionTags 删除所有 source='collection' 且不再被任何 video_tags 引用的标签。
|
||||
// 在 migrate 末尾调用,相当于启动时自愈:之前 DeleteVideo 没顺带清理留下的孤儿,会在重启时被收回。
|
||||
// 只动 collection:system 是固定标签需保留;user 是管理员手动建的;auto/legacy 默认有视频在引用。
|
||||
// pruneOrphanCollectionTags 删除旧版本生成的 source='collection' 孤儿标签。
|
||||
func (c *Catalog) pruneOrphanCollectionTags(ctx context.Context) error {
|
||||
_, err := c.db.ExecContext(ctx, `
|
||||
DELETE FROM tags
|
||||
@@ -1448,8 +1588,7 @@ DELETE FROM tags
|
||||
return err
|
||||
}
|
||||
|
||||
// pruneOrphanCollectionTagsByID 在事务里检查一组候选 tag_id,删除其中
|
||||
// source='collection' 且已经没有视频引用的标签。供 DeleteVideo 调用。
|
||||
// pruneOrphanCollectionTagsByID 在事务里检查并删除旧版本生成的孤儿 collection 标签。
|
||||
func pruneOrphanCollectionTagsByID(ctx context.Context, tx *sql.Tx, tagIDs []int64) error {
|
||||
for _, tagID := range tagIDs {
|
||||
var src string
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
@@ -136,7 +137,6 @@ func TestCreateTagAndClassifyAddsTagToMatchingExistingVideos(t *testing.T) {
|
||||
DriveID: "drive",
|
||||
FileID: "file-1",
|
||||
Title: "清纯短发合集",
|
||||
Category: "普通目录",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -148,7 +148,6 @@ func TestCreateTagAndClassifyAddsTagToMatchingExistingVideos(t *testing.T) {
|
||||
DriveID: "drive",
|
||||
FileID: "file-2",
|
||||
Title: "普通标题",
|
||||
Category: "普通目录",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -232,52 +231,6 @@ func TestDeleteTagRemovesTagFromVideos(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteTagSuppressesAutomaticCollectionRecreation(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
for _, id := range []string{"video-1", "video-2"} {
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: id,
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: "合集视频",
|
||||
Category: "sunny",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed video %s: %v", id, err)
|
||||
}
|
||||
}
|
||||
|
||||
if label, ok, err := cat.EnsureCollectionTag(ctx, "sunny"); err != nil || !ok || label != "sunny" {
|
||||
t.Fatalf("ensure collection = %q, %v, %v; want sunny true nil", label, ok, err)
|
||||
}
|
||||
tag := mustTagByLabel(t, ctx, cat, "sunny")
|
||||
if _, err := cat.DeleteTag(ctx, tag.ID); err != nil {
|
||||
t.Fatalf("delete tag: %v", err)
|
||||
}
|
||||
|
||||
if label, ok, err := cat.EnsureCollectionTag(ctx, "sunny"); err != nil || ok || label != "" {
|
||||
t.Fatalf("ensure deleted collection = %q, %v, %v; want empty false nil", label, ok, err)
|
||||
}
|
||||
for _, tag := range mustListTags(t, ctx, cat) {
|
||||
if tag.Label == "sunny" {
|
||||
t.Fatal("deleted collection tag was recreated automatically")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateTagAndClassifyRestoresDeletedTag(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
@@ -343,13 +296,13 @@ func TestEnsureTagForVideoIDPrefixBackfillsSourceTag(t *testing.T) {
|
||||
id string
|
||||
manual bool
|
||||
}{
|
||||
{id: "spider91-91-spider-1200001"},
|
||||
{id: "spider91-91-spider-1200002", manual: true},
|
||||
{id: "spider91-other-1200003"},
|
||||
{id: "scriptcrawler-crawler-a-source001"},
|
||||
{id: "scriptcrawler-crawler-a-source002", manual: true},
|
||||
{id: "scriptcrawler-other-source003"},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: seed.id,
|
||||
DriveID: "91-spider",
|
||||
DriveID: "crawler-a",
|
||||
FileID: seed.id + ".mp4",
|
||||
Title: "legacy title without source text",
|
||||
PublishedAt: now,
|
||||
@@ -365,28 +318,28 @@ func TestEnsureTagForVideoIDPrefixBackfillsSourceTag(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
added, err := cat.EnsureTagForVideoIDPrefix(ctx, "spider91-91-spider-", "91porn", nil, "system")
|
||||
added, err := cat.EnsureTagForVideoIDPrefix(ctx, "scriptcrawler-crawler-a-", "crawler-tag", nil, "system")
|
||||
if err != nil {
|
||||
t.Fatalf("ensure prefix tag: %v", err)
|
||||
}
|
||||
if added != 1 {
|
||||
t.Fatalf("added = %d, want 1", added)
|
||||
}
|
||||
got, err := cat.GetVideo(ctx, "spider91-91-spider-1200001")
|
||||
got, err := cat.GetVideo(ctx, "scriptcrawler-crawler-a-source001")
|
||||
if err != nil {
|
||||
t.Fatalf("get tagged video: %v", err)
|
||||
}
|
||||
if !sameStrings(got.Tags, []string{"91porn"}) {
|
||||
t.Fatalf("tagged video tags = %#v, want 91porn", got.Tags)
|
||||
if !sameStrings(got.Tags, []string{"crawler-tag"}) {
|
||||
t.Fatalf("tagged video tags = %#v, want crawler-tag", got.Tags)
|
||||
}
|
||||
manual, err := cat.GetVideo(ctx, "spider91-91-spider-1200002")
|
||||
manual, err := cat.GetVideo(ctx, "scriptcrawler-crawler-a-source002")
|
||||
if err != nil {
|
||||
t.Fatalf("get manual video: %v", err)
|
||||
}
|
||||
if len(manual.Tags) != 0 {
|
||||
t.Fatalf("manual video tags = %#v, want unchanged", manual.Tags)
|
||||
}
|
||||
other, err := cat.GetVideo(ctx, "spider91-other-1200003")
|
||||
other, err := cat.GetVideo(ctx, "scriptcrawler-other-source003")
|
||||
if err != nil {
|
||||
t.Fatalf("get other prefix video: %v", err)
|
||||
}
|
||||
@@ -486,7 +439,6 @@ func TestMigrateDoesNotRewriteAlreadySyncedVideoTags(t *testing.T) {
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: "巨乳后入合集",
|
||||
Category: "Better Call Saul S03",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -585,6 +537,25 @@ CREATE TABLE videos (
|
||||
)`); err != nil {
|
||||
t.Fatalf("create legacy videos table: %v", err)
|
||||
}
|
||||
nowMillis := time.Now().UnixMilli()
|
||||
if _, err := db.Exec(`
|
||||
INSERT INTO videos (
|
||||
id, drive_id, file_id, content_hash, parent_id, title, author, tags,
|
||||
duration_seconds, size_bytes, ext, quality, thumbnail_url, preview_file_id,
|
||||
preview_local, preview_status, views, favorites, comments, likes, dislikes,
|
||||
category, hidden, tags_manual, badges, description, published_at, created_at, updated_at
|
||||
) VALUES (
|
||||
'legacy-video', 'drive', 'file-legacy', 'hash-legacy', 'parent-1', 'Legacy Video', 'Legacy Author', '["旧标签"]',
|
||||
180, 1024, 'mp4', 'HD', '/thumb.jpg', 'preview-file',
|
||||
'/preview.mp4', 'ready', 7, 1, 2, 3, 4,
|
||||
'legacy-category', 0, 0, '["精选"]', 'legacy description', ?, ?, ?
|
||||
)`,
|
||||
nowMillis, nowMillis, nowMillis); err != nil {
|
||||
t.Fatalf("insert legacy video: %v", err)
|
||||
}
|
||||
if _, err := db.Exec(`CREATE INDEX idx_legacy_videos_category ON videos(category)`); err != nil {
|
||||
t.Fatalf("create legacy category index: %v", err)
|
||||
}
|
||||
if err := db.Close(); err != nil {
|
||||
t.Fatalf("close raw db: %v", err)
|
||||
}
|
||||
@@ -603,6 +574,45 @@ CREATE TABLE videos (
|
||||
if err := cat.db.QueryRow(`SELECT COALESCE(file_name, '') FROM videos LIMIT 1`).Scan(&fileNameDefault); err != nil && err != sql.ErrNoRows {
|
||||
t.Fatalf("query migrated file_name column: %v", err)
|
||||
}
|
||||
if fileNameDefault != "" {
|
||||
t.Fatalf("file_name default = %q, want empty", fileNameDefault)
|
||||
}
|
||||
if hasColumn(t, cat, "videos", "category") {
|
||||
t.Fatal("legacy category column was not dropped")
|
||||
}
|
||||
if indexExists(t, cat, "idx_legacy_videos_category") {
|
||||
t.Fatal("legacy category index was not dropped")
|
||||
}
|
||||
for _, index := range []string{"idx_videos_drive", "idx_videos_pub", "idx_videos_views"} {
|
||||
if !indexExists(t, cat, index) {
|
||||
t.Fatalf("base video index %s was not recreated", index)
|
||||
}
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
got, err := cat.GetVideo(ctx, "legacy-video")
|
||||
if err != nil {
|
||||
t.Fatalf("get migrated legacy video: %v", err)
|
||||
}
|
||||
if got.Title != "Legacy Video" || got.Author != "Legacy Author" || got.Views != 7 {
|
||||
t.Fatalf("migrated video lost data: %#v", got)
|
||||
}
|
||||
if !sameStrings(got.Tags, []string{"旧标签"}) {
|
||||
t.Fatalf("migrated video tags = %#v, want legacy tag preserved", got.Tags)
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: "new-video",
|
||||
DriveID: "drive",
|
||||
FileID: "file-new",
|
||||
Title: "New Video",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert after migration: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetManualVideoTagsRejectsUnknownLabels(t *testing.T) {
|
||||
@@ -706,31 +716,6 @@ func TestCreateTagAndClassifyMapsAVCodeLabelToAV(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestLooksLikeCollectionTagRejectsAVCodes(t *testing.T) {
|
||||
cases := []string{
|
||||
"DASS-499-C",
|
||||
"dass-499-c",
|
||||
"ADN-778",
|
||||
"SONE-247-C",
|
||||
"JUQ-502-UC",
|
||||
"ABF-032",
|
||||
"SSIS-233",
|
||||
"MIDA-607",
|
||||
"cc-1750027",
|
||||
"FC2-PPV-74663555",
|
||||
"ADN-778-FHD(1)",
|
||||
"ADN-778-中文字幕",
|
||||
"[44x.me]idbd-786",
|
||||
"NTRH-018_FHD_CH",
|
||||
"390JAC-233",
|
||||
}
|
||||
for _, label := range cases {
|
||||
if LooksLikeCollectionTag(label) {
|
||||
t.Fatalf("LooksLikeCollectionTag(%q) = true, want false", label)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigrateCollapsesAVCodeTagsIntoAV(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
@@ -759,7 +744,6 @@ func TestMigrateCollapsesAVCodeTagsIntoAV(t *testing.T) {
|
||||
FileID: seed.id,
|
||||
Title: seed.label + " sample",
|
||||
Tags: []string{seed.label},
|
||||
Category: seed.label,
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -804,7 +788,7 @@ func TestMigrateCollapsesAVCodeTagsIntoAV(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigrateClearsVolatileOneDriveThumbnailURLs(t *testing.T) {
|
||||
func TestMigrateClearsRemoteThumbnailURLs(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -827,6 +811,36 @@ func TestMigrateClearsVolatileOneDriveThumbnailURLs(t *testing.T) {
|
||||
}); err != nil {
|
||||
t.Fatalf("seed onedrive: %v", err)
|
||||
}
|
||||
if err := cat.UpsertDrive(ctx, &Drive{
|
||||
ID: "p123-main",
|
||||
Kind: "p123",
|
||||
Name: "123Pan",
|
||||
RootID: "root",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed p123: %v", err)
|
||||
}
|
||||
if err := cat.UpsertDrive(ctx, &Drive{
|
||||
ID: "pikpak-main",
|
||||
Kind: "pikpak",
|
||||
Name: "PikPak",
|
||||
RootID: "root",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed pikpak: %v", err)
|
||||
}
|
||||
if err := cat.UpsertDrive(ctx, &Drive{
|
||||
ID: "crawler-main",
|
||||
Kind: "scriptcrawler",
|
||||
Name: "Crawler",
|
||||
RootID: "/",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed crawler: %v", err)
|
||||
}
|
||||
|
||||
videos := []*Video{
|
||||
{
|
||||
@@ -850,6 +864,34 @@ func TestMigrateClearsVolatileOneDriveThumbnailURLs(t *testing.T) {
|
||||
Title: "PikPak",
|
||||
ThumbnailURL: "https://sg-thumbnail-drive.mypikpak.net/v0/screenshot-thumbnails/demo",
|
||||
},
|
||||
{
|
||||
ID: "p123-remote-thumb-video",
|
||||
DriveID: "p123-main",
|
||||
FileID: "file-4",
|
||||
Title: "123Pan remote thumb",
|
||||
ThumbnailURL: "https://download.123pan.com/thumb/file_70_70?w=70&h=70",
|
||||
},
|
||||
{
|
||||
ID: "p123-local-thumb-video",
|
||||
DriveID: "p123-main",
|
||||
FileID: "file-5",
|
||||
Title: "123Pan local thumb",
|
||||
ThumbnailURL: "/p/thumb/p123-local-thumb-video",
|
||||
},
|
||||
{
|
||||
ID: "scriptcrawler-crawler-main-local-thumb",
|
||||
DriveID: "crawler-main",
|
||||
FileID: "file-6",
|
||||
Title: "Crawler local thumb",
|
||||
ThumbnailURL: "/p/thumb/scriptcrawler-crawler-main-local-thumb",
|
||||
},
|
||||
{
|
||||
ID: "scriptcrawler-crawler-main-remote-thumb",
|
||||
DriveID: "crawler-main",
|
||||
FileID: "file-7",
|
||||
Title: "Crawler remote thumb",
|
||||
ThumbnailURL: "https://example.invalid/crawler-thumb.jpg",
|
||||
},
|
||||
}
|
||||
for _, v := range videos {
|
||||
v.PublishedAt = now
|
||||
@@ -884,8 +926,47 @@ func TestMigrateClearsVolatileOneDriveThumbnailURLs(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("get pikpak video: %v", err)
|
||||
}
|
||||
if pikpak.ThumbnailURL == "" {
|
||||
t.Fatal("pikpak thumbnail was cleared")
|
||||
if pikpak.ThumbnailURL != "" {
|
||||
t.Fatalf("pikpak thumbnail = %q, want cleared", pikpak.ThumbnailURL)
|
||||
}
|
||||
|
||||
p123Remote, err := cat.GetVideo(ctx, "p123-remote-thumb-video")
|
||||
if err != nil {
|
||||
t.Fatalf("get p123 remote thumb video: %v", err)
|
||||
}
|
||||
if p123Remote.ThumbnailURL != "" {
|
||||
t.Fatalf("p123 remote thumbnail = %q, want cleared", p123Remote.ThumbnailURL)
|
||||
}
|
||||
var p123Status string
|
||||
if err := cat.db.QueryRowContext(ctx, `SELECT thumbnail_status FROM videos WHERE id = ?`, "p123-remote-thumb-video").Scan(&p123Status); err != nil {
|
||||
t.Fatalf("read p123 thumbnail status: %v", err)
|
||||
}
|
||||
if p123Status != "pending" {
|
||||
t.Fatalf("p123 remote thumbnail_status = %q, want pending", p123Status)
|
||||
}
|
||||
|
||||
p123Local, err := cat.GetVideo(ctx, "p123-local-thumb-video")
|
||||
if err != nil {
|
||||
t.Fatalf("get p123 local thumb video: %v", err)
|
||||
}
|
||||
if p123Local.ThumbnailURL != "/p/thumb/p123-local-thumb-video" {
|
||||
t.Fatalf("p123 local thumbnail = %q, want preserved", p123Local.ThumbnailURL)
|
||||
}
|
||||
|
||||
crawlerLocal, err := cat.GetVideo(ctx, "scriptcrawler-crawler-main-local-thumb")
|
||||
if err != nil {
|
||||
t.Fatalf("get crawler local thumb video: %v", err)
|
||||
}
|
||||
if crawlerLocal.ThumbnailURL != "/p/thumb/scriptcrawler-crawler-main-local-thumb" {
|
||||
t.Fatalf("crawler local thumbnail = %q, want preserved", crawlerLocal.ThumbnailURL)
|
||||
}
|
||||
|
||||
crawlerRemote, err := cat.GetVideo(ctx, "scriptcrawler-crawler-main-remote-thumb")
|
||||
if err != nil {
|
||||
t.Fatalf("get crawler remote thumb video: %v", err)
|
||||
}
|
||||
if crawlerRemote.ThumbnailURL != "" {
|
||||
t.Fatalf("crawler remote thumbnail = %q, want cleared", crawlerRemote.ThumbnailURL)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1031,33 +1112,33 @@ func TestTagFilterMatchesCanonicalDuplicateVideo(t *testing.T) {
|
||||
UpdatedAt: now,
|
||||
},
|
||||
{
|
||||
ID: "spider91-dup-1",
|
||||
DriveID: "91-spider",
|
||||
ID: "scriptcrawler-crawler-a-dup-1",
|
||||
DriveID: "crawler-a",
|
||||
FileID: "dup-1.mp4",
|
||||
Title: "Spider duplicate 1",
|
||||
Tags: []string{"91porn"},
|
||||
Title: "Crawler duplicate 1",
|
||||
Tags: []string{"crawler-tag"},
|
||||
Size: 1024,
|
||||
PublishedAt: now.Add(time.Second),
|
||||
CreatedAt: now.Add(time.Second),
|
||||
UpdatedAt: now.Add(time.Second),
|
||||
},
|
||||
{
|
||||
ID: "spider91-dup-2",
|
||||
DriveID: "91-spider",
|
||||
ID: "scriptcrawler-crawler-a-dup-2",
|
||||
DriveID: "crawler-a",
|
||||
FileID: "dup-2.mp4",
|
||||
Title: "Spider duplicate 2",
|
||||
Tags: []string{"91porn"},
|
||||
Title: "Crawler duplicate 2",
|
||||
Tags: []string{"crawler-tag"},
|
||||
Size: 1024,
|
||||
PublishedAt: now.Add(2 * time.Second),
|
||||
CreatedAt: now.Add(2 * time.Second),
|
||||
UpdatedAt: now.Add(2 * time.Second),
|
||||
},
|
||||
{
|
||||
ID: "spider91-visible",
|
||||
DriveID: "91-spider",
|
||||
ID: "scriptcrawler-crawler-a-visible",
|
||||
DriveID: "crawler-a",
|
||||
FileID: "visible.mp4",
|
||||
Title: "Spider visible",
|
||||
Tags: []string{"91porn"},
|
||||
Title: "Crawler visible",
|
||||
Tags: []string{"crawler-tag"},
|
||||
Size: 2048,
|
||||
PublishedAt: now.Add(3 * time.Second),
|
||||
CreatedAt: now.Add(3 * time.Second),
|
||||
@@ -1068,16 +1149,16 @@ func TestTagFilterMatchesCanonicalDuplicateVideo(t *testing.T) {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
for _, id := range []string{"pikpak-canonical", "spider91-dup-1", "spider91-dup-2"} {
|
||||
for _, id := range []string{"pikpak-canonical", "scriptcrawler-crawler-a-dup-1", "scriptcrawler-crawler-a-dup-2"} {
|
||||
if err := cat.UpdateVideoFingerprint(ctx, id, "same-sampled-sha256", "ready", ""); err != nil {
|
||||
t.Fatalf("fingerprint %s: %v", id, err)
|
||||
}
|
||||
}
|
||||
if err := cat.UpdateVideoFingerprint(ctx, "spider91-visible", "unique-sampled-sha256", "ready", ""); err != nil {
|
||||
if err := cat.UpdateVideoFingerprint(ctx, "scriptcrawler-crawler-a-visible", "unique-sampled-sha256", "ready", ""); err != nil {
|
||||
t.Fatalf("fingerprint visible: %v", err)
|
||||
}
|
||||
|
||||
items, total, err := cat.ListVideos(ctx, ListParams{Tag: "91porn", Page: 1, PageSize: 10})
|
||||
items, total, err := cat.ListVideos(ctx, ListParams{Tag: "crawler-tag", Page: 1, PageSize: 10})
|
||||
if err != nil {
|
||||
t.Fatalf("list videos by tag: %v", err)
|
||||
}
|
||||
@@ -1088,13 +1169,13 @@ func TestTagFilterMatchesCanonicalDuplicateVideo(t *testing.T) {
|
||||
for _, item := range items {
|
||||
gotIDs[item.ID] = true
|
||||
}
|
||||
for _, want := range []string{"pikpak-canonical", "spider91-visible"} {
|
||||
for _, want := range []string{"pikpak-canonical", "scriptcrawler-crawler-a-visible"} {
|
||||
if !gotIDs[want] {
|
||||
t.Fatalf("tagged video ids = %#v, want %s", gotIDs, want)
|
||||
}
|
||||
}
|
||||
if got := mustTagByLabel(t, ctx, cat, "91porn").Count; got != 2 {
|
||||
t.Fatalf("91porn count = %d, want 2 visible canonical videos", got)
|
||||
if got := mustTagByLabel(t, ctx, cat, "crawler-tag").Count; got != 2 {
|
||||
t.Fatalf("crawler-tag count = %d, want 2 visible canonical videos", got)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1183,6 +1264,41 @@ func mustTagByLabel(t *testing.T, ctx context.Context, cat *Catalog, label strin
|
||||
return Tag{}
|
||||
}
|
||||
|
||||
func hasColumn(t *testing.T, cat *Catalog, table, column string) bool {
|
||||
t.Helper()
|
||||
rows, err := cat.db.Query(`PRAGMA table_info(` + table + `)`)
|
||||
if err != nil {
|
||||
t.Fatalf("query table info for %s: %v", table, err)
|
||||
}
|
||||
defer rows.Close()
|
||||
for rows.Next() {
|
||||
var cid int
|
||||
var name, typ string
|
||||
var notNull int
|
||||
var defaultValue any
|
||||
var pk int
|
||||
if err := rows.Scan(&cid, &name, &typ, ¬Null, &defaultValue, &pk); err != nil {
|
||||
t.Fatalf("scan table info for %s: %v", table, err)
|
||||
}
|
||||
if strings.EqualFold(name, column) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
t.Fatalf("iterate table info for %s: %v", table, err)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func indexExists(t *testing.T, cat *Catalog, name string) bool {
|
||||
t.Helper()
|
||||
var count int
|
||||
if err := cat.db.QueryRow(`SELECT COUNT(*) FROM sqlite_schema WHERE type = 'index' AND name = ?`, name).Scan(&count); err != nil {
|
||||
t.Fatalf("query index %s: %v", name, err)
|
||||
}
|
||||
return count > 0
|
||||
}
|
||||
|
||||
func videoUpdatedAtByID(t *testing.T, ctx context.Context, cat *Catalog, ids ...string) map[string]int64 {
|
||||
t.Helper()
|
||||
out := make(map[string]int64, len(ids))
|
||||
@@ -1196,9 +1312,9 @@ func videoUpdatedAtByID(t *testing.T, ctx context.Context, cat *Catalog, ids ...
|
||||
return out
|
||||
}
|
||||
|
||||
// 删除 collection 标签的最后一个引用视频后,标签应当自动从 tags 表里消失。
|
||||
// 删除旧版本 collection 标签的最后一个引用视频后,标签应当自动从 tags 表里消失。
|
||||
// user/system 标签不受影响:用户/系统标签的语义由人维护,孤儿状态保留。
|
||||
func TestDeleteVideoPrunesOrphanCollectionTag(t *testing.T) {
|
||||
func TestDeleteVideoPrunesLegacyOrphanCollectionTag(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -1217,7 +1333,6 @@ func TestDeleteVideoPrunesOrphanCollectionTag(t *testing.T) {
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: id,
|
||||
Category: "Better Call Saul S02",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -1226,20 +1341,28 @@ func TestDeleteVideoPrunesOrphanCollectionTag(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
label, ok, err := cat.EnsureCollectionTag(ctx, "Better Call Saul S02")
|
||||
if err != nil {
|
||||
t.Fatalf("ensure collection tag: %v", err)
|
||||
nowMillis := now.UnixMilli()
|
||||
if _, err := cat.db.ExecContext(ctx,
|
||||
`INSERT INTO tags (label, aliases, source, created_at, updated_at) VALUES (?, '[]', 'collection', ?, ?)`,
|
||||
"Better Call Saul S02", nowMillis, nowMillis); err != nil {
|
||||
t.Fatalf("insert legacy collection tag: %v", err)
|
||||
}
|
||||
if !ok || label != "Better Call Saul S02" {
|
||||
t.Fatalf("ensure collection tag = %q ok=%v, want collection tag created", label, ok)
|
||||
var collectionTagID int64
|
||||
if err := cat.db.QueryRowContext(ctx, `SELECT id FROM tags WHERE label = ?`, "Better Call Saul S02").Scan(&collectionTagID); err != nil {
|
||||
t.Fatalf("lookup legacy collection tag: %v", err)
|
||||
}
|
||||
for _, id := range []string{"video-a", "video-b"} {
|
||||
if _, err := cat.db.ExecContext(ctx,
|
||||
`INSERT INTO video_tags (video_id, tag_id, source, created_at) VALUES (?, ?, 'auto', ?)`,
|
||||
id, collectionTagID, nowMillis); err != nil {
|
||||
t.Fatalf("attach legacy collection tag to %s: %v", id, err)
|
||||
}
|
||||
}
|
||||
|
||||
// 用户标签:手动建出来,让它和 video-a 关联,验证 user 标签不会被孤儿清理流程误删。
|
||||
if _, err := cat.CreateTagAndClassify(ctx, "用户标签", nil, "user"); err != nil {
|
||||
t.Fatalf("create user tag: %v", err)
|
||||
}
|
||||
if err := cat.SetManualVideoTags(ctx, "video-a", []string{"用户标签"}); err != nil {
|
||||
t.Fatalf("attach user tag: %v", err)
|
||||
if _, err := cat.db.ExecContext(ctx,
|
||||
`INSERT INTO tags (label, aliases, source, created_at, updated_at) VALUES (?, '[]', 'user', ?, ?)`,
|
||||
"用户标签", nowMillis, nowMillis); err != nil {
|
||||
t.Fatalf("insert user orphan tag: %v", err)
|
||||
}
|
||||
|
||||
collectionExists := func() bool {
|
||||
@@ -1255,7 +1378,7 @@ func TestDeleteVideoPrunesOrphanCollectionTag(t *testing.T) {
|
||||
t.Fatal("collection tag missing right after creation")
|
||||
}
|
||||
|
||||
// 删第一个视频:还有 video-b 在引用 collection 标签,应保留。
|
||||
// 删第一个视频:还有 video-b 在引用旧 collection 标签,应保留。
|
||||
if err := cat.DeleteVideo(ctx, "video-a"); err != nil {
|
||||
t.Fatalf("delete video-a: %v", err)
|
||||
}
|
||||
@@ -1263,7 +1386,7 @@ func TestDeleteVideoPrunesOrphanCollectionTag(t *testing.T) {
|
||||
t.Fatal("collection tag was pruned while another video still references it")
|
||||
}
|
||||
|
||||
// 删最后一个引用视频,collection 标签应当被同步清掉。
|
||||
// 删最后一个引用视频,旧 collection 标签应当被同步清掉。
|
||||
if err := cat.DeleteVideo(ctx, "video-b"); err != nil {
|
||||
t.Fatalf("delete video-b: %v", err)
|
||||
}
|
||||
@@ -1271,7 +1394,7 @@ func TestDeleteVideoPrunesOrphanCollectionTag(t *testing.T) {
|
||||
t.Fatal("orphan collection tag was not pruned after deleting the last referencing video")
|
||||
}
|
||||
|
||||
// 用户手动建的标签即使变成孤儿(已经因为 video-a 删除而失去引用)也必须保留。
|
||||
// 用户标签即使是孤儿也必须保留。
|
||||
var userCount int
|
||||
if err := cat.db.QueryRowContext(ctx,
|
||||
`SELECT COUNT(*) FROM tags WHERE label = ? AND source = 'user'`,
|
||||
@@ -1457,6 +1580,70 @@ func TestReconcileThumbnailStatusOnce(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRequeueSkippedPreviews(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
cases := []struct {
|
||||
id string
|
||||
status string
|
||||
local string
|
||||
fileID string
|
||||
wantStatus string
|
||||
wantLocal string
|
||||
wantFileID string
|
||||
}{
|
||||
{"preview-skipped", "skipped", "/tmp/old-preview.mp4", "old-preview-file", "pending", "", ""},
|
||||
{"preview-ready", "ready", "/tmp/ready-preview.mp4", "ready-preview-file", "ready", "/tmp/ready-preview.mp4", "ready-preview-file"},
|
||||
{"preview-failed", "failed", "/tmp/failed-preview.mp4", "failed-preview-file", "failed", "/tmp/failed-preview.mp4", "failed-preview-file"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: c.id, DriveID: "d", FileID: "source-" + c.id, Title: c.id,
|
||||
PreviewStatus: c.status, PreviewLocal: c.local, PreviewFileID: c.fileID,
|
||||
PublishedAt: now, CreatedAt: now, UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed %s: %v", c.id, err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := cat.requeueSkippedPreviews(ctx); err != nil {
|
||||
t.Fatalf("requeue skipped previews: %v", err)
|
||||
}
|
||||
if err := cat.requeueSkippedPreviews(ctx); err != nil {
|
||||
t.Fatalf("second requeue skipped previews: %v", err)
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
got, err := cat.GetVideo(ctx, c.id)
|
||||
if err != nil {
|
||||
t.Fatalf("get %s: %v", c.id, err)
|
||||
}
|
||||
if got.PreviewStatus != c.wantStatus {
|
||||
t.Errorf("%s: preview status = %q, want %q", c.id, got.PreviewStatus, c.wantStatus)
|
||||
}
|
||||
if got.PreviewLocal != c.wantLocal {
|
||||
t.Errorf("%s: preview local = %q, want %q", c.id, got.PreviewLocal, c.wantLocal)
|
||||
}
|
||||
if got.PreviewFileID != c.wantFileID {
|
||||
t.Errorf("%s: preview file id = %q, want %q", c.id, got.PreviewFileID, c.wantFileID)
|
||||
}
|
||||
}
|
||||
|
||||
pending, err := cat.ListVideosByPreviewStatus(ctx, "d", "pending", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list pending previews: %v", err)
|
||||
}
|
||||
if len(pending) != 1 || pending[0].ID != "preview-skipped" {
|
||||
t.Fatalf("pending previews = %#v, want only preview-skipped", pending)
|
||||
}
|
||||
}
|
||||
|
||||
// TestUpsertVideoSyncsThumbnailStatus 验证 scanner 创建/补回视频时
|
||||
// thumbnail_status 跟随 thumbnail_url 自动设。这是历史 bug 的修复回归测试 ——
|
||||
// 之前 UpsertVideo 的 SQL 不带 thumbnail_status 列,所有新视频都依赖
|
||||
|
||||
@@ -0,0 +1,166 @@
|
||||
package catalog
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestListHiddenVideosForMigration 验证:隐藏的视频不进可见列表,
|
||||
// 但能被 ListHiddenVideos 拿到(供一次性迁移为墓碑)。
|
||||
func TestListHiddenVideosForMigration(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
for _, id := range []string{"v1", "v2", "v3"} {
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: id, DriveID: "drive", FileID: "f-" + id, Title: id,
|
||||
PublishedAt: now, CreatedAt: now, UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed %s: %v", id, err)
|
||||
}
|
||||
}
|
||||
if err := cat.HideVideo(ctx, "v2"); err != nil {
|
||||
t.Fatalf("hide v2: %v", err)
|
||||
}
|
||||
|
||||
visible, total, err := cat.ListVideos(ctx, ListParams{Page: 1, PageSize: 50})
|
||||
if err != nil {
|
||||
t.Fatalf("list visible: %v", err)
|
||||
}
|
||||
if total != 2 || len(visible) != 2 {
|
||||
t.Fatalf("visible total/len = %d/%d, want 2/2", total, len(visible))
|
||||
}
|
||||
for _, v := range visible {
|
||||
if v.ID == "v2" {
|
||||
t.Fatalf("hidden v2 leaked into visible list")
|
||||
}
|
||||
}
|
||||
|
||||
hidden, err := cat.ListHiddenVideos(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("list hidden: %v", err)
|
||||
}
|
||||
if len(hidden) != 1 || hidden[0].ID != "v2" {
|
||||
t.Fatalf("ListHiddenVideos = %v, want only v2", hidden)
|
||||
}
|
||||
|
||||
current, blacklisted, err := cat.VideoManagementCounts(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("counts: %v", err)
|
||||
}
|
||||
if current != 2 || blacklisted != 0 {
|
||||
t.Fatalf("counts = current %d blacklisted %d, want 2/0", current, blacklisted)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBlacklistListAndRemove 验证墓碑表的列出、关键字过滤和移除。
|
||||
func TestBlacklistListAndRemove(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
seed := []struct{ id, drive, file string }{
|
||||
{"d1", "drive", "movie-alpha.avi"},
|
||||
{"d2", "drive", "movie-beta.mp4"},
|
||||
{"d3", "archive", "clip-gamma.wmv"},
|
||||
}
|
||||
for _, s := range seed {
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: s.id, DriveID: s.drive, FileID: "f-" + s.id, FileName: s.file,
|
||||
Title: s.id, PublishedAt: now, CreatedAt: now, UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed %s: %v", s.id, err)
|
||||
}
|
||||
var err error
|
||||
if s.id == "d2" {
|
||||
err = cat.DeleteVideoWithTombstoneReason(ctx, s.id, DeletedVideoReasonDuplicate)
|
||||
} else {
|
||||
err = cat.DeleteVideoWithTombstone(ctx, s.id)
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("tombstone %s: %v", s.id, err)
|
||||
}
|
||||
}
|
||||
|
||||
items, total, err := cat.ListDeletedVideos(ctx, ListParams{Page: 1, PageSize: 50})
|
||||
if err != nil {
|
||||
t.Fatalf("list deleted: %v", err)
|
||||
}
|
||||
if total != 3 || len(items) != 3 {
|
||||
t.Fatalf("deleted total/len = %d/%d, want 3/3", total, len(items))
|
||||
}
|
||||
reasons := map[string]string{}
|
||||
for _, item := range items {
|
||||
reasons[item.ID] = item.Reason
|
||||
}
|
||||
if reasons["d1"] != "" || reasons["d3"] != "" {
|
||||
t.Fatalf("manual tombstone reasons = %#v, want empty", reasons)
|
||||
}
|
||||
if reasons["d2"] != DeletedVideoReasonDuplicate {
|
||||
t.Fatalf("duplicate tombstone reason = %q, want %q", reasons["d2"], DeletedVideoReasonDuplicate)
|
||||
}
|
||||
|
||||
// 关键字过滤
|
||||
filtered, ftotal, err := cat.ListDeletedVideos(ctx, ListParams{Keyword: "movie", Page: 1, PageSize: 50})
|
||||
if err != nil {
|
||||
t.Fatalf("list deleted filtered: %v", err)
|
||||
}
|
||||
if ftotal != 2 || len(filtered) != 2 {
|
||||
t.Fatalf("filtered total/len = %d/%d, want 2/2", ftotal, len(filtered))
|
||||
}
|
||||
|
||||
// 网盘过滤
|
||||
driveFiltered, driveTotal, err := cat.ListDeletedVideos(ctx, ListParams{DriveID: "archive", Page: 1, PageSize: 50})
|
||||
if err != nil {
|
||||
t.Fatalf("list deleted drive filtered: %v", err)
|
||||
}
|
||||
if driveTotal != 1 || len(driveFiltered) != 1 || driveFiltered[0].ID != "d3" {
|
||||
t.Fatalf("drive filtered = total %d items %#v, want only d3", driveTotal, driveFiltered)
|
||||
}
|
||||
|
||||
combined, combinedTotal, err := cat.ListDeletedVideos(ctx, ListParams{Keyword: "movie", DriveID: "archive", Page: 1, PageSize: 50})
|
||||
if err != nil {
|
||||
t.Fatalf("list deleted combined filtered: %v", err)
|
||||
}
|
||||
if combinedTotal != 0 || len(combined) != 0 {
|
||||
t.Fatalf("combined filtered total/len = %d/%d, want 0/0", combinedTotal, len(combined))
|
||||
}
|
||||
|
||||
// 移出黑名单
|
||||
if err := cat.RemoveDeletedVideo(ctx, "d1"); err != nil {
|
||||
t.Fatalf("remove d1: %v", err)
|
||||
}
|
||||
if deleted, err := cat.IsVideoDeleted(ctx, "d1"); err != nil || deleted {
|
||||
t.Fatalf("d1 should no longer be blacklisted (deleted=%v err=%v)", deleted, err)
|
||||
}
|
||||
_, total, err = cat.ListDeletedVideos(ctx, ListParams{Page: 1, PageSize: 50})
|
||||
if err != nil {
|
||||
t.Fatalf("list deleted after remove: %v", err)
|
||||
}
|
||||
if total != 2 {
|
||||
t.Fatalf("deleted total after remove = %d, want 2", total)
|
||||
}
|
||||
|
||||
if err := cat.RemoveDeletedVideo(ctx, "does-not-exist"); err == nil {
|
||||
t.Fatalf("remove missing id should return error")
|
||||
}
|
||||
|
||||
// counts: 删完一个还剩 2 个黑名单;可见视频已全部被墓碑删除
|
||||
current, blacklisted, err := cat.VideoManagementCounts(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("counts: %v", err)
|
||||
}
|
||||
if current != 0 || blacklisted != 2 {
|
||||
t.Fatalf("counts = current %d blacklisted %d, want 0/2", current, blacklisted)
|
||||
}
|
||||
}
|
||||
@@ -16,6 +16,11 @@ const (
|
||||
DefaultAdminPassword = "admin123"
|
||||
)
|
||||
|
||||
var (
|
||||
legacyDefaultVideoExtensions = []string{".mp4", ".mkv", ".mov", ".webm", ".avi"}
|
||||
defaultVideoExtensions = []string{".mp4", ".mkv", ".mov", ".webm", ".avi", ".strm"}
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
Server Server `yaml:"server"`
|
||||
Storage Storage `yaml:"storage"`
|
||||
@@ -202,7 +207,7 @@ type Nightly struct {
|
||||
// 这里保留 yaml 中的静态定义,用于启动时预置盘。生产建议只在 DB 里维护。
|
||||
type Drive struct {
|
||||
ID string `yaml:"id"`
|
||||
Kind string `yaml:"kind"` // quark / p115 / pikpak / wopan / onedrive / googledrive / localstorage
|
||||
Kind string `yaml:"kind"` // quark / p115 / p123 / pikpak / wopan / guangyapan / onedrive / googledrive / localstorage
|
||||
Name string `yaml:"name"`
|
||||
RootID string `yaml:"root_id"`
|
||||
Params map[string]string `yaml:"params,omitempty"`
|
||||
@@ -247,7 +252,9 @@ func (c *Config) applyDefaults() {
|
||||
c.Scanner.MaxDepth = 5
|
||||
}
|
||||
if len(c.Scanner.VideoExtensions) == 0 {
|
||||
c.Scanner.VideoExtensions = []string{".mp4", ".mkv", ".mov", ".webm", ".avi"}
|
||||
c.Scanner.VideoExtensions = append([]string{}, defaultVideoExtensions...)
|
||||
} else if isLegacyDefaultVideoExtensions(c.Scanner.VideoExtensions) {
|
||||
c.Scanner.VideoExtensions = append(c.Scanner.VideoExtensions, ".strm")
|
||||
}
|
||||
if c.Preview.FFmpegPath == "" {
|
||||
c.Preview.FFmpegPath = "ffmpeg"
|
||||
@@ -276,3 +283,19 @@ func (c *Config) applyDefaults() {
|
||||
c.Nightly.CronHour = 1
|
||||
}
|
||||
}
|
||||
|
||||
func isLegacyDefaultVideoExtensions(exts []string) bool {
|
||||
if len(exts) != len(legacyDefaultVideoExtensions) {
|
||||
return false
|
||||
}
|
||||
seen := make(map[string]struct{}, len(exts))
|
||||
for _, ext := range exts {
|
||||
seen[strings.ToLower(strings.TrimSpace(ext))] = struct{}{}
|
||||
}
|
||||
for _, ext := range legacyDefaultVideoExtensions {
|
||||
if _, ok := seen[ext]; !ok {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package config
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
@@ -50,3 +51,64 @@ storage:
|
||||
t.Fatalf("db path = %q, want preserved value", cfg.Storage.DBPath)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadDefaultScannerVideoExtensionsIncludeSTRM(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "config.yaml")
|
||||
if err := os.WriteFile(path, []byte(`{}`), 0o644); err != nil {
|
||||
t.Fatalf("write config: %v", err)
|
||||
}
|
||||
|
||||
cfg, err := Load(path)
|
||||
if err != nil {
|
||||
t.Fatalf("load config: %v", err)
|
||||
}
|
||||
if !hasVideoExtension(cfg.Scanner.VideoExtensions, ".strm") {
|
||||
t.Fatalf("video extensions = %#v, want .strm", cfg.Scanner.VideoExtensions)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadLegacyDefaultScannerVideoExtensionsIncludeSTRM(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "config.yaml")
|
||||
if err := os.WriteFile(path, []byte(`
|
||||
scanner:
|
||||
video_extensions: [".mp4", ".mkv", ".mov", ".webm", ".avi"]
|
||||
`), 0o644); err != nil {
|
||||
t.Fatalf("write config: %v", err)
|
||||
}
|
||||
|
||||
cfg, err := Load(path)
|
||||
if err != nil {
|
||||
t.Fatalf("load config: %v", err)
|
||||
}
|
||||
if !hasVideoExtension(cfg.Scanner.VideoExtensions, ".strm") {
|
||||
t.Fatalf("video extensions = %#v, want .strm appended for legacy default list", cfg.Scanner.VideoExtensions)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadCustomScannerVideoExtensionsArePreserved(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "config.yaml")
|
||||
if err := os.WriteFile(path, []byte(`
|
||||
scanner:
|
||||
video_extensions: [".mp4"]
|
||||
`), 0o644); err != nil {
|
||||
t.Fatalf("write config: %v", err)
|
||||
}
|
||||
|
||||
cfg, err := Load(path)
|
||||
if err != nil {
|
||||
t.Fatalf("load config: %v", err)
|
||||
}
|
||||
if len(cfg.Scanner.VideoExtensions) != 1 || cfg.Scanner.VideoExtensions[0] != ".mp4" {
|
||||
t.Fatalf("video extensions = %#v, want custom list preserved", cfg.Scanner.VideoExtensions)
|
||||
}
|
||||
}
|
||||
|
||||
func hasVideoExtension(exts []string, want string) bool {
|
||||
want = strings.ToLower(strings.TrimSpace(want))
|
||||
for _, ext := range exts {
|
||||
if strings.ToLower(strings.TrimSpace(ext)) == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,280 @@
|
||||
package crawlerupload
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
"github.com/video-site/backend/internal/drives/scriptcrawler"
|
||||
)
|
||||
|
||||
type fakeRegistry struct {
|
||||
byID map[string]drives.Drive
|
||||
}
|
||||
|
||||
func newFakeRegistry() *fakeRegistry {
|
||||
return &fakeRegistry{byID: make(map[string]drives.Drive)}
|
||||
}
|
||||
|
||||
func (r *fakeRegistry) Add(d drives.Drive) {
|
||||
r.byID[d.ID()] = d
|
||||
}
|
||||
|
||||
func (r *fakeRegistry) Get(id string) (drives.Drive, bool) {
|
||||
d, ok := r.byID[id]
|
||||
return d, ok
|
||||
}
|
||||
|
||||
func (r *fakeRegistry) All() []drives.Drive {
|
||||
out := make([]drives.Drive, 0, len(r.byID))
|
||||
for _, d := range r.byID {
|
||||
out = append(out, d)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
type fakeUploadDrive struct {
|
||||
id string
|
||||
kind string
|
||||
rootID string
|
||||
mu sync.Mutex
|
||||
uploadCalls int
|
||||
gotBodies map[string][]byte
|
||||
gotParents map[string]string
|
||||
ensureCalls []string
|
||||
}
|
||||
|
||||
func newFakeUploadDrive(id, kind, rootID string) *fakeUploadDrive {
|
||||
return &fakeUploadDrive{
|
||||
id: id,
|
||||
kind: kind,
|
||||
rootID: rootID,
|
||||
gotBodies: make(map[string][]byte),
|
||||
gotParents: make(map[string]string),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *fakeUploadDrive) Kind() string { return d.kind }
|
||||
func (d *fakeUploadDrive) ID() string { return d.id }
|
||||
func (d *fakeUploadDrive) RootID() string {
|
||||
return d.rootID
|
||||
}
|
||||
func (d *fakeUploadDrive) Init(context.Context) error { return nil }
|
||||
func (d *fakeUploadDrive) List(context.Context, string) ([]drives.Entry, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (d *fakeUploadDrive) Stat(context.Context, string) (*drives.Entry, error) {
|
||||
return nil, drives.ErrNotSupported
|
||||
}
|
||||
func (d *fakeUploadDrive) StreamURL(context.Context, string) (*drives.StreamLink, error) {
|
||||
return nil, drives.ErrNotSupported
|
||||
}
|
||||
func (d *fakeUploadDrive) Upload(context.Context, string, string, io.Reader, int64) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
func (d *fakeUploadDrive) EnsureDir(_ context.Context, pathFromRoot string) (string, error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
d.ensureCalls = append(d.ensureCalls, pathFromRoot)
|
||||
return d.rootID + "/" + pathFromRoot, nil
|
||||
}
|
||||
func (d *fakeUploadDrive) Rename(context.Context, string, string) error {
|
||||
return nil
|
||||
}
|
||||
func (d *fakeUploadDrive) UploadAndReportHash(_ context.Context, parentID, name string, r io.Reader, _ int64) (UploadResult, error) {
|
||||
body, _ := io.ReadAll(r)
|
||||
d.mu.Lock()
|
||||
d.uploadCalls++
|
||||
d.gotBodies[name] = body
|
||||
d.gotParents[name] = parentID
|
||||
d.mu.Unlock()
|
||||
return UploadResult{FileID: "remote-" + name, Hash: strings.Repeat("a", 40), Size: int64(len(body))}, nil
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*fakeUploadDrive)(nil)
|
||||
var _ uploadTarget = (*fakeUploadDrive)(nil)
|
||||
|
||||
func TestRunOnceUploadsScriptCrawlerLocalVideo(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat := setupCatalog(t)
|
||||
src := setupScriptCrawler(t, "crawler-one")
|
||||
target := newFakeUploadDrive("target-drive", "pikpak", "target-root")
|
||||
reg := newFakeRegistry()
|
||||
reg.Add(src)
|
||||
reg.Add(target)
|
||||
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: src.ID(),
|
||||
Kind: scriptcrawler.Kind,
|
||||
Name: "Example Crawler",
|
||||
RootID: "/",
|
||||
Credentials: map[string]string{"script_path": "/tmp/example.py", "upload_drive_id": target.ID()},
|
||||
TeaserEnabled: true,
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert crawler drive: %v", err)
|
||||
}
|
||||
|
||||
videoID := writeCrawlerVideo(t, cat, src, "source-001", ".mp4", []byte("video payload"), true)
|
||||
commonThumbDir := filepath.Join(t.TempDir(), "thumbs")
|
||||
m := New(Config{Catalog: cat, Registry: reg, CommonThumbDir: commonThumbDir})
|
||||
|
||||
if err := m.RunOnce(ctx); err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
|
||||
wantName := desiredUploadName("Sample source-001", "source-001", "mp4")
|
||||
if target.uploadCalls != 1 {
|
||||
t.Fatalf("upload calls = %d, want 1", target.uploadCalls)
|
||||
}
|
||||
if got := string(target.gotBodies[wantName]); got != "video payload" {
|
||||
t.Fatalf("uploaded body = %q, want payload", got)
|
||||
}
|
||||
if got := target.gotParents[wantName]; got != "target-root/Script Crawlers/crawler-one" {
|
||||
t.Fatalf("upload parent = %q, want crawler folder", got)
|
||||
}
|
||||
if len(target.ensureCalls) != 1 || target.ensureCalls[0] != "Script Crawlers/crawler-one" {
|
||||
t.Fatalf("ensure calls = %#v, want crawler upload folder", target.ensureCalls)
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(ctx, videoID)
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.DriveID != target.ID() || !strings.HasPrefix(got.FileID, "remote-") {
|
||||
t.Fatalf("catalog target = drive %q file %q, want target drive", got.DriveID, got.FileID)
|
||||
}
|
||||
if got.FileName != wantName {
|
||||
t.Fatalf("file_name = %q, want %q", got.FileName, wantName)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(src.VideosDir(), "source-001.mp4")); !os.IsNotExist(err) {
|
||||
t.Fatalf("local video still exists or stat failed: %v", err)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(src.ThumbsDir(), "source-001.jpg")); !os.IsNotExist(err) {
|
||||
t.Fatalf("local thumb still exists or stat failed: %v", err)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(commonThumbDir, videoID+".jpg")); err != nil {
|
||||
t.Fatalf("common thumbnail missing: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunOnceRequiresPerCrawlerUploadTarget(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat := setupCatalog(t)
|
||||
src := setupScriptCrawler(t, "crawler-local-only")
|
||||
target := newFakeUploadDrive("target-drive", "pikpak", "target-root")
|
||||
reg := newFakeRegistry()
|
||||
reg.Add(src)
|
||||
reg.Add(target)
|
||||
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: src.ID(),
|
||||
Kind: scriptcrawler.Kind,
|
||||
Name: "Local Only",
|
||||
RootID: "/",
|
||||
Credentials: map[string]string{"script_path": "/tmp/example.py"},
|
||||
TeaserEnabled: true,
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert crawler drive: %v", err)
|
||||
}
|
||||
videoID := writeCrawlerVideo(t, cat, src, "source-002", ".mp4", []byte("video payload"), true)
|
||||
|
||||
m := New(Config{Catalog: cat, Registry: reg})
|
||||
if err := m.RunOnce(ctx); err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if target.uploadCalls != 0 {
|
||||
t.Fatalf("upload calls = %d, want 0", target.uploadCalls)
|
||||
}
|
||||
got, err := cat.GetVideo(ctx, videoID)
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.DriveID != src.ID() {
|
||||
t.Fatalf("drive_id = %q, want local crawler drive", got.DriveID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdaptUploadTargetRejectsUnsupportedTarget(t *testing.T) {
|
||||
src := scriptcrawler.New(scriptcrawler.Config{ID: "crawler", RootDir: t.TempDir()})
|
||||
_, err := adaptUploadTarget(src)
|
||||
if err == nil || !strings.Contains(err.Error(), "does not support crawler upload") {
|
||||
t.Fatalf("err = %v, want unsupported crawler upload target", err)
|
||||
}
|
||||
}
|
||||
|
||||
func setupCatalog(t *testing.T) *catalog.Catalog {
|
||||
t.Helper()
|
||||
cat, err := catalog.Open(filepath.Join(t.TempDir(), "video-site.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
return cat
|
||||
}
|
||||
|
||||
func setupScriptCrawler(t *testing.T, id string) *scriptcrawler.Driver {
|
||||
t.Helper()
|
||||
d := scriptcrawler.New(scriptcrawler.Config{ID: id, RootDir: t.TempDir()})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("scriptcrawler init: %v", err)
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func writeCrawlerVideo(t *testing.T, cat *catalog.Catalog, d *scriptcrawler.Driver, sourceID, ext string, content []byte, readyAssets bool) string {
|
||||
t.Helper()
|
||||
ctx := context.Background()
|
||||
fileID := sourceID + ext
|
||||
videoPath, err := d.VideoPath(fileID)
|
||||
if err != nil {
|
||||
t.Fatalf("video path: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(videoPath, content, 0o644); err != nil {
|
||||
t.Fatalf("write video: %v", err)
|
||||
}
|
||||
thumbPath, err := d.ThumbPath(sourceID + ".jpg")
|
||||
if err != nil {
|
||||
t.Fatalf("thumb path: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(thumbPath, []byte("thumb"), 0o644); err != nil {
|
||||
t.Fatalf("write thumb: %v", err)
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
videoID := scriptcrawler.BuildVideoID(d.ID(), sourceID)
|
||||
previewStatus := "pending"
|
||||
fingerprintStatus := "pending"
|
||||
sampled := ""
|
||||
if readyAssets {
|
||||
previewStatus = "ready"
|
||||
fingerprintStatus = "ready"
|
||||
sampled = strings.Repeat("b", 64)
|
||||
}
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: videoID,
|
||||
DriveID: d.ID(),
|
||||
FileID: fileID,
|
||||
FileName: fileID,
|
||||
Title: "Sample " + sourceID,
|
||||
Author: "tester",
|
||||
Ext: strings.TrimPrefix(ext, "."),
|
||||
Quality: "HD",
|
||||
Size: int64(len(content)),
|
||||
PreviewStatus: previewStatus,
|
||||
FingerprintStatus: fingerprintStatus,
|
||||
SampledSHA256: sampled,
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert video: %v", err)
|
||||
}
|
||||
return videoID
|
||||
}
|
||||
+21
-21
@@ -1,13 +1,13 @@
|
||||
package spider91migrate
|
||||
package crawlerupload
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// 期望的 PikPak 文件名格式(方案 B):
|
||||
// 期望的上传文件名格式:
|
||||
//
|
||||
// <sanitized-title>-<viewkey-后8位>.<ext>
|
||||
// <sanitized-title>-<sourceID-后8位>.<ext>
|
||||
//
|
||||
// 例如:
|
||||
//
|
||||
@@ -15,8 +15,8 @@ import (
|
||||
//
|
||||
// 设计目标:
|
||||
// - 文件名一眼能看出视频内容(用 catalog 里的 title)
|
||||
// - 后缀的 viewkey 8 字符保证同标题不会撞名
|
||||
// - 全部字符在常见文件系统、PikPak、HTTP/Aliyun OSS Key 编码里都安全
|
||||
// - 后缀的 sourceID 8 字符保证同标题不会撞名
|
||||
// - 全部字符在常见文件系统、网盘 API、HTTP/Aliyun OSS Key 编码里都安全
|
||||
//
|
||||
// 字符清洗规则(sanitizeTitle):
|
||||
// - 去除控制字符(< 0x20 或 0x7F)
|
||||
@@ -85,47 +85,47 @@ func truncateRunes(s string, maxRunes int) string {
|
||||
return s
|
||||
}
|
||||
|
||||
// extractViewKey 从 video.ID("spider91-<driveID>-<viewkey>")里
|
||||
// 取出最后一段 viewkey。
|
||||
// extractSourceID 从 video.ID("<kind>-<driveID>-<sourceID>")里
|
||||
// 取出最后一段 sourceID。
|
||||
//
|
||||
// driveID 中如果有 "-" 不影响(用 LastIndex),viewkey 本身(91 网站的
|
||||
// view 标识)目前都是纯 hex 或纯数字,不包含 "-"。
|
||||
func extractViewKey(videoID string) string {
|
||||
// driveID 中如果有 "-" 不影响(用 LastIndex)。爬虫脚本应提供不包含 "-"
|
||||
// 的稳定 source_id;如果包含 "-",这里会取最后一段作为文件名后缀。
|
||||
func extractSourceID(videoID string) string {
|
||||
if i := strings.LastIndex(videoID, "-"); i >= 0 {
|
||||
return videoID[i+1:]
|
||||
}
|
||||
return videoID
|
||||
}
|
||||
|
||||
// viewKeySuffix 取 viewkey 的最后 N 个字符;不足 N 返回原字符串。
|
||||
// sourceIDSuffix 取 sourceID 的最后 N 个字符;不足 N 返回原字符串。
|
||||
//
|
||||
// 默认 N=8(足够稀疏避免标题撞名时的同名冲突)。
|
||||
const viewKeySuffixLen = 8
|
||||
const sourceIDSuffixLen = 8
|
||||
|
||||
func viewKeySuffix(viewkey string) string {
|
||||
r := []rune(viewkey)
|
||||
if len(r) <= viewKeySuffixLen {
|
||||
func sourceIDSuffix(sourceID string) string {
|
||||
r := []rune(sourceID)
|
||||
if len(r) <= sourceIDSuffixLen {
|
||||
return string(r)
|
||||
}
|
||||
return string(r[len(r)-viewKeySuffixLen:])
|
||||
return string(r[len(r)-sourceIDSuffixLen:])
|
||||
}
|
||||
|
||||
// desiredPikPakName 构造 spider91 视频在 PikPak 上的期望文件名。
|
||||
// desiredUploadName 构造爬虫视频上传到目标网盘时的期望文件名。
|
||||
//
|
||||
// desiredPikPakName("超白大奶律师约炮", "476fa8bf4b47e672d2fa", "mp4")
|
||||
// desiredUploadName("超白大奶律师约炮", "476fa8bf4b47e672d2fa", "mp4")
|
||||
// → "超白大奶律师约炮-72d2fa.mp4" // 实际是 e672d2fa(取最后 8)
|
||||
//
|
||||
// ext 不带前导点;空时默认 mp4。
|
||||
func desiredPikPakName(title, viewkey, ext string) string {
|
||||
func desiredUploadName(title, sourceID, ext string) string {
|
||||
clean := sanitizeTitle(title)
|
||||
suffix := viewKeySuffix(strings.TrimSpace(viewkey))
|
||||
suffix := sourceIDSuffix(strings.TrimSpace(sourceID))
|
||||
ext = strings.TrimSpace(ext)
|
||||
ext = strings.TrimPrefix(ext, ".")
|
||||
if ext == "" {
|
||||
ext = "mp4"
|
||||
}
|
||||
if suffix == "" {
|
||||
// viewkey 缺失时退化成 "<title>.<ext>"
|
||||
// sourceID 缺失时退化成 "<title>.<ext>"
|
||||
return clean + "." + ext
|
||||
}
|
||||
return clean + "-" + suffix + "." + ext
|
||||
+18
-18
@@ -1,4 +1,4 @@
|
||||
package spider91migrate
|
||||
package crawlerupload
|
||||
|
||||
import (
|
||||
"strings"
|
||||
@@ -13,11 +13,11 @@ func TestSanitizeTitleHandlesCommonCases(t *testing.T) {
|
||||
{"hello", "hello"},
|
||||
{" hello ", "hello"},
|
||||
{"hello\nworld", "hello world"},
|
||||
{"hello / world", "hello world"}, // 单 forbidden 折叠成空格
|
||||
{"hello / world", "hello world"}, // 单 forbidden 折叠成空格
|
||||
{"a/b\\c:d*e?f\"g<h>i|j", "a b c d e f g h i j"},
|
||||
{"a b", "a b"}, // 多空格折叠
|
||||
{"a b", "a b"}, // 多空格折叠
|
||||
{"a\t\nb", "a b"},
|
||||
{"...trim.dots...", "trim.dots"}, // 首尾点号被 trim 掉
|
||||
{"...trim.dots...", "trim.dots"}, // 首尾点号被 trim 掉
|
||||
{"control\x01char\x1f\x7f", "controlchar"}, // 控制字符直接丢弃
|
||||
{"", "video"}, // 空串回退
|
||||
{" / ", "video"}, // 全是 forbidden+空白 → 回退
|
||||
@@ -51,22 +51,22 @@ func TestSanitizeTitleKeepsCJKAndUnicode(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractViewKey(t *testing.T) {
|
||||
func TestExtractSourceID(t *testing.T) {
|
||||
cases := []struct{ in, want string }{
|
||||
{"spider91-91Spider-476fa8bf4b47e672d2fa", "476fa8bf4b47e672d2fa"},
|
||||
{"spider91-91Spider-1587338723", "1587338723"},
|
||||
{"spider91-some-drive-with-dashes-vk001", "vk001"}, // LastIndex 拿尾段
|
||||
{"scriptcrawler-demo-476fa8bf4b47e672d2fa", "476fa8bf4b47e672d2fa"},
|
||||
{"scriptcrawler-demo-1587338723", "1587338723"},
|
||||
{"scriptcrawler-some-drive-with-dashes-vk001", "vk001"}, // LastIndex 拿尾段
|
||||
{"no-dashes-after-prefix", "prefix"},
|
||||
{"single", "single"}, // 没 dash → 原样返回
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := extractViewKey(c.in); got != c.want {
|
||||
t.Errorf("extractViewKey(%q) = %q, want %q", c.in, got, c.want)
|
||||
if got := extractSourceID(c.in); got != c.want {
|
||||
t.Errorf("extractSourceID(%q) = %q, want %q", c.in, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestViewKeySuffix(t *testing.T) {
|
||||
func TestSourceIDSuffix(t *testing.T) {
|
||||
cases := []struct{ in, want string }{
|
||||
{"476fa8bf4b47e672d2fa", "e672d2fa"},
|
||||
{"1587338723", "87338723"},
|
||||
@@ -76,15 +76,15 @@ func TestViewKeySuffix(t *testing.T) {
|
||||
{"123456789", "23456789"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := viewKeySuffix(c.in); got != c.want {
|
||||
t.Errorf("viewKeySuffix(%q) = %q, want %q", c.in, got, c.want)
|
||||
if got := sourceIDSuffix(c.in); got != c.want {
|
||||
t.Errorf("sourceIDSuffix(%q) = %q, want %q", c.in, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDesiredPikPakName(t *testing.T) {
|
||||
func TestDesiredUploadName(t *testing.T) {
|
||||
cases := []struct {
|
||||
title, viewkey, ext, want string
|
||||
title, sourceID, ext, want string
|
||||
}{
|
||||
{
|
||||
"超白大奶律师约炮第一季",
|
||||
@@ -112,7 +112,7 @@ func TestDesiredPikPakName(t *testing.T) {
|
||||
},
|
||||
{
|
||||
"title",
|
||||
"", // 空 viewkey → 退化成 "<title>.<ext>"
|
||||
"", // 空 sourceID → 退化成 "<title>.<ext>"
|
||||
"webm",
|
||||
"title.webm",
|
||||
},
|
||||
@@ -130,9 +130,9 @@ func TestDesiredPikPakName(t *testing.T) {
|
||||
},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got := desiredPikPakName(c.title, c.viewkey, c.ext)
|
||||
got := desiredUploadName(c.title, c.sourceID, c.ext)
|
||||
if got != c.want {
|
||||
t.Errorf("desiredPikPakName(%q,%q,%q) = %q, want %q", c.title, c.viewkey, c.ext, got, c.want)
|
||||
t.Errorf("desiredUploadName(%q,%q,%q) = %q, want %q", c.title, c.sourceID, c.ext, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,17 @@
|
||||
package googledrive
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"log"
|
||||
"math"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
@@ -21,10 +28,13 @@ import (
|
||||
const (
|
||||
Kind = "googledrive"
|
||||
defaultAPIBaseURL = "https://www.googleapis.com/drive/v3"
|
||||
defaultUploadAPIURL = "https://www.googleapis.com/upload/drive/v3"
|
||||
defaultOAuthURL = "https://www.googleapis.com/oauth2/v4/token"
|
||||
defaultRenewAPIURL = "https://api.oplist.org/googleui/renewapi"
|
||||
defaultListInterval = 1 * time.Second
|
||||
defaultListCooldown = 5 * time.Minute
|
||||
defaultLinkCooldown = 5 * time.Minute
|
||||
uploadChunkSize = int64(8 * 1024 * 1024)
|
||||
|
||||
filesListFields = "files(id,name,mimeType,size,modifiedTime,createdTime,thumbnailLink,shortcutDetails,md5Checksum,sha1Checksum,sha256Checksum),nextPageToken"
|
||||
fileInfoFields = "id,name,mimeType,size,modifiedTime,createdTime,thumbnailLink,shortcutDetails,md5Checksum,sha1Checksum,sha256Checksum"
|
||||
@@ -41,13 +51,19 @@ type Driver struct {
|
||||
renewAPIURL string
|
||||
oauthURL string
|
||||
apiBaseURL string
|
||||
uploadBaseURL string
|
||||
client *resty.Client
|
||||
httpClient *http.Client
|
||||
onTokenUpdate func(access, refresh string)
|
||||
|
||||
listMu sync.Mutex
|
||||
lastListAt time.Time
|
||||
listInterval time.Duration
|
||||
listCooldown time.Duration
|
||||
|
||||
linkCooldownMu sync.Mutex
|
||||
linkCooldownUntil time.Time
|
||||
linkCooldownDuration time.Duration
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
@@ -61,6 +77,7 @@ type Config struct {
|
||||
RenewAPIURL string
|
||||
OAuthURL string
|
||||
APIBaseURL string
|
||||
UploadAPIURL string
|
||||
|
||||
OnTokenUpdate func(access, refresh string)
|
||||
}
|
||||
@@ -82,6 +99,10 @@ func New(c Config) *Driver {
|
||||
if apiBaseURL == "" {
|
||||
apiBaseURL = defaultAPIBaseURL
|
||||
}
|
||||
uploadBaseURL := strings.TrimRight(strings.TrimSpace(c.UploadAPIURL), "/")
|
||||
if uploadBaseURL == "" {
|
||||
uploadBaseURL = deriveUploadBaseURL(apiBaseURL)
|
||||
}
|
||||
return &Driver{
|
||||
id: c.ID,
|
||||
rootID: rootID,
|
||||
@@ -93,15 +114,34 @@ func New(c Config) *Driver {
|
||||
renewAPIURL: renewAPIURL,
|
||||
oauthURL: oauthURL,
|
||||
apiBaseURL: apiBaseURL,
|
||||
uploadBaseURL: uploadBaseURL,
|
||||
onTokenUpdate: c.OnTokenUpdate,
|
||||
client: resty.New().
|
||||
SetTimeout(30*time.Second).
|
||||
SetHeader("Accept", "application/json, text/plain, */*"),
|
||||
listInterval: defaultListInterval,
|
||||
listCooldown: defaultListCooldown,
|
||||
httpClient: &http.Client{
|
||||
Timeout: 0,
|
||||
CheckRedirect: func(*http.Request, []*http.Request) error {
|
||||
return http.ErrUseLastResponse
|
||||
},
|
||||
},
|
||||
listInterval: defaultListInterval,
|
||||
listCooldown: defaultListCooldown,
|
||||
linkCooldownDuration: defaultLinkCooldown,
|
||||
}
|
||||
}
|
||||
|
||||
func deriveUploadBaseURL(apiBaseURL string) string {
|
||||
apiBaseURL = strings.TrimRight(strings.TrimSpace(apiBaseURL), "/")
|
||||
if apiBaseURL == "" || apiBaseURL == defaultAPIBaseURL {
|
||||
return defaultUploadAPIURL
|
||||
}
|
||||
if strings.HasSuffix(apiBaseURL, "/drive/v3") {
|
||||
return strings.TrimSuffix(apiBaseURL, "/drive/v3") + "/upload/drive/v3"
|
||||
}
|
||||
return apiBaseURL
|
||||
}
|
||||
|
||||
func (d *Driver) Kind() string { return Kind }
|
||||
func (d *Driver) ID() string { return d.id }
|
||||
func (d *Driver) RootID() string { return d.rootID }
|
||||
@@ -209,8 +249,19 @@ func (d *Driver) StreamURL(ctx context.Context, fileID string) (*drives.StreamLi
|
||||
if fileID == "" {
|
||||
return nil, errors.New("googledrive stream: empty file id")
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := d.linkCooldownError(time.Now()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, err := d.Stat(ctx, fileID); err != nil {
|
||||
return nil, fmt.Errorf("googledrive stream: %w", err)
|
||||
err = fmt.Errorf("googledrive stream: %w", err)
|
||||
if wait, ok := drives.RateLimitRetryAfter(err); ok {
|
||||
until := d.pauseLinkCooldown(wait)
|
||||
log.Printf("[googledrive] stream link cooling down drive=%s until=%s err=%v", d.id, until.Format(time.RFC3339), err)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
u := d.fileURL(fileID) + "?alt=media&acknowledgeAbuse=true&supportsAllDrives=true"
|
||||
return &drives.StreamLink{
|
||||
@@ -222,12 +273,396 @@ func (d *Driver) StreamURL(ctx context.Context, fileID string) (*drives.StreamLi
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (d *Driver) Upload(context.Context, string, string, io.Reader, int64) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
func (d *Driver) linkCooldownError(now time.Time) error {
|
||||
d.linkCooldownMu.Lock()
|
||||
defer d.linkCooldownMu.Unlock()
|
||||
if d.linkCooldownUntil.IsZero() {
|
||||
return nil
|
||||
}
|
||||
if !now.Before(d.linkCooldownUntil) {
|
||||
d.linkCooldownUntil = time.Time{}
|
||||
return nil
|
||||
}
|
||||
wait := d.linkCooldownUntil.Sub(now)
|
||||
if wait <= 0 {
|
||||
return nil
|
||||
}
|
||||
return &drives.RateLimitError{
|
||||
Provider: Kind,
|
||||
RetryAfter: wait,
|
||||
Err: fmt.Errorf("googledrive stream link cooling down until %s", d.linkCooldownUntil.Format(time.RFC3339)),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Driver) EnsureDir(context.Context, string) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
func (d *Driver) pauseLinkCooldown(wait time.Duration) time.Time {
|
||||
if wait <= 0 {
|
||||
wait = d.linkCooldownDuration
|
||||
}
|
||||
if wait <= 0 {
|
||||
wait = defaultLinkCooldown
|
||||
}
|
||||
until := time.Now().Add(wait)
|
||||
d.linkCooldownMu.Lock()
|
||||
if until.After(d.linkCooldownUntil) {
|
||||
d.linkCooldownUntil = until
|
||||
} else {
|
||||
until = d.linkCooldownUntil
|
||||
}
|
||||
d.linkCooldownMu.Unlock()
|
||||
return until
|
||||
}
|
||||
|
||||
func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader, size int64) (string, error) {
|
||||
res, err := d.UploadAndReportHash(ctx, parentID, name, r, size)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return res.FileID, nil
|
||||
}
|
||||
|
||||
func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string, r io.Reader, size int64) (UploadResult, error) {
|
||||
parentID, name, err := d.normalizeUploadArgs(parentID, name, r, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
sessionURL, err := d.createUploadSession(ctx, parentID, name, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
if strings.TrimSpace(sessionURL) == "" {
|
||||
return UploadResult{}, errors.New("googledrive upload session: empty upload url")
|
||||
}
|
||||
|
||||
hasher := md5.New()
|
||||
var item driveFile
|
||||
var copied int64
|
||||
if size == 0 {
|
||||
completed, err := d.putUploadSessionChunkWithRetry(ctx, sessionURL, 0, 0, nil, hasher)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
if completed != nil {
|
||||
item = *completed
|
||||
}
|
||||
} else {
|
||||
chunkSize := uploadChunkSize
|
||||
if chunkSize <= 0 {
|
||||
chunkSize = 8 * 1024 * 1024
|
||||
}
|
||||
if chunkSize > int64(math.MaxInt32) {
|
||||
chunkSize = int64(math.MaxInt32)
|
||||
}
|
||||
buf := make([]byte, int(chunkSize))
|
||||
for copied < size {
|
||||
partSize := minInt64(chunkSize, size-copied)
|
||||
chunk := buf[:int(partSize)]
|
||||
n, err := io.ReadFull(r, chunk)
|
||||
if err != nil {
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
return UploadResult{}, fmt.Errorf("googledrive upload: size mismatch: declared %d, copied %d", size, copied+int64(n))
|
||||
}
|
||||
return UploadResult{}, fmt.Errorf("googledrive upload: read body: %w", err)
|
||||
}
|
||||
chunk = chunk[:n]
|
||||
completed, err := d.putUploadSessionChunkWithRetry(ctx, sessionURL, copied, size, chunk, hasher)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
if completed != nil {
|
||||
item = *completed
|
||||
}
|
||||
copied += int64(n)
|
||||
}
|
||||
}
|
||||
|
||||
hashHex := hex.EncodeToString(hasher.Sum(nil))
|
||||
if item.ID == "" {
|
||||
fileID, err := d.findUploadedFileID(ctx, parentID, name, hashHex)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
item.ID = fileID
|
||||
}
|
||||
return UploadResult{FileID: item.ID, Hash: hashHex, Size: copied}, nil
|
||||
}
|
||||
|
||||
func (d *Driver) normalizeUploadArgs(parentID, name string, r io.Reader, size int64) (string, string, error) {
|
||||
if r == nil {
|
||||
return "", "", errors.New("googledrive upload: body is required")
|
||||
}
|
||||
if size < 0 {
|
||||
return "", "", fmt.Errorf("googledrive upload: invalid size %d", size)
|
||||
}
|
||||
parentID = strings.TrimSpace(parentID)
|
||||
if parentID == "" || parentID == "/" {
|
||||
parentID = d.rootID
|
||||
}
|
||||
name = strings.TrimSpace(name)
|
||||
if name == "" {
|
||||
return "", "", errors.New("googledrive upload: empty file name")
|
||||
}
|
||||
return parentID, name, nil
|
||||
}
|
||||
|
||||
func (d *Driver) createUploadSession(ctx context.Context, parentID, name string, size int64) (string, error) {
|
||||
return d.createUploadSessionOnce(ctx, parentID, name, size, true)
|
||||
}
|
||||
|
||||
func (d *Driver) createUploadSessionOnce(ctx context.Context, parentID, name string, size int64, retry bool) (string, error) {
|
||||
var apiErr apiErrorResp
|
||||
res, err := d.client.R().
|
||||
SetContext(ctx).
|
||||
SetHeader("Authorization", "Bearer "+d.accessToken).
|
||||
SetHeader("X-Upload-Content-Type", mimeType(driveFile{Name: name})).
|
||||
SetHeader("X-Upload-Content-Length", strconv.FormatInt(size, 10)).
|
||||
SetQueryParams(map[string]string{
|
||||
"uploadType": "resumable",
|
||||
"supportsAllDrives": "true",
|
||||
"fields": fileInfoFields,
|
||||
}).
|
||||
SetBody(map[string]any{
|
||||
"name": name,
|
||||
"parents": []string{parentID},
|
||||
}).
|
||||
SetError(&apiErr).
|
||||
Post(d.uploadFilesURL())
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("googledrive upload session: %w", err)
|
||||
}
|
||||
if isGoogleRateLimit(res, apiErr.Error) {
|
||||
return "", googleRateLimitError(res, apiErr.Error.Message)
|
||||
}
|
||||
if apiErr.Error.Code != 0 {
|
||||
if apiErr.Error.Code == http.StatusUnauthorized && retry {
|
||||
if err := d.refresh(ctx); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return d.createUploadSessionOnce(ctx, parentID, name, size, false)
|
||||
}
|
||||
return "", googleAPIError(apiErr.Error)
|
||||
}
|
||||
if res.IsError() {
|
||||
return "", fmt.Errorf("googledrive upload session: status=%d body=%s", res.StatusCode(), strings.TrimSpace(res.String()))
|
||||
}
|
||||
return strings.TrimSpace(res.Header().Get("Location")), nil
|
||||
}
|
||||
|
||||
func (d *Driver) putUploadSessionChunkWithRetry(ctx context.Context, uploadURL string, start, total int64, data []byte, hasher hash.Hash) (*driveFile, error) {
|
||||
var last error
|
||||
for attempt := 0; attempt < 3; attempt++ {
|
||||
if attempt > 0 {
|
||||
if err := sleepContext(ctx, time.Duration(attempt)*time.Second); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
item, retryable, err := d.putUploadSessionChunk(ctx, uploadURL, start, total, data)
|
||||
if err == nil {
|
||||
if hasher != nil && len(data) > 0 {
|
||||
_, _ = hasher.Write(data)
|
||||
}
|
||||
return item, nil
|
||||
}
|
||||
last = err
|
||||
if !retryable {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if last == nil {
|
||||
last = errors.New("googledrive upload session: retry attempts exhausted")
|
||||
}
|
||||
return nil, last
|
||||
}
|
||||
|
||||
func (d *Driver) putUploadSessionChunk(ctx context.Context, uploadURL string, start, total int64, data []byte) (*driveFile, bool, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPut, uploadURL, bytes.NewReader(data))
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
req.ContentLength = int64(len(data))
|
||||
req.Header.Set("Authorization", "Bearer "+d.accessToken)
|
||||
req.Header.Set("Content-Length", strconv.Itoa(len(data)))
|
||||
if total == 0 {
|
||||
req.Header.Set("Content-Range", "bytes */0")
|
||||
} else {
|
||||
end := start + int64(len(data)) - 1
|
||||
req.Header.Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", start, end, total))
|
||||
}
|
||||
hc := d.httpClient
|
||||
if hc == nil {
|
||||
hc = http.DefaultClient
|
||||
}
|
||||
res, err := hc.Do(req)
|
||||
if err != nil {
|
||||
return nil, true, fmt.Errorf("googledrive upload session: put chunk: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
switch res.StatusCode {
|
||||
case http.StatusOK, http.StatusCreated:
|
||||
var item driveFile
|
||||
if err := json.NewDecoder(res.Body).Decode(&item); err != nil {
|
||||
return nil, false, fmt.Errorf("googledrive upload session: decode completed file: %w", err)
|
||||
}
|
||||
return &item, false, nil
|
||||
case http.StatusPermanentRedirect:
|
||||
return nil, false, nil
|
||||
case http.StatusUnauthorized:
|
||||
if err := d.refresh(ctx); err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
return nil, true, fmt.Errorf("googledrive upload session: unauthorized")
|
||||
default:
|
||||
body, _ := io.ReadAll(io.LimitReader(res.Body, 64*1024))
|
||||
var apiErr apiErrorResp
|
||||
_ = json.Unmarshal(body, &apiErr)
|
||||
if isGoogleUploadHTTPRateLimit(res.StatusCode, res.Header, body, apiErr.Error) {
|
||||
return nil, false, googleUploadRateLimitError(res.StatusCode, res.Header, body, apiErr.Error.Message)
|
||||
}
|
||||
retryable := res.StatusCode == http.StatusTooManyRequests || (res.StatusCode >= 500 && res.StatusCode <= 504)
|
||||
return nil, retryable, fmt.Errorf("googledrive upload session: status=%d body=%s", res.StatusCode, strings.TrimSpace(string(body)))
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Driver) EnsureDir(ctx context.Context, pathFromRoot string) (string, error) {
|
||||
currentID := d.rootID
|
||||
for _, name := range splitPath(pathFromRoot) {
|
||||
childID, err := d.findChildDir(ctx, currentID, name)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if childID == "" {
|
||||
childID, err = d.makeDir(ctx, currentID, name)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
currentID = childID
|
||||
}
|
||||
return currentID, nil
|
||||
}
|
||||
|
||||
func (d *Driver) findChildDir(ctx context.Context, parentID, name string) (string, error) {
|
||||
entries, err := d.List(ctx, parentID)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
for _, e := range entries {
|
||||
if e.IsDir && e.Name == name {
|
||||
return e.ID, nil
|
||||
}
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func (d *Driver) makeDir(ctx context.Context, parentID, name string) (string, error) {
|
||||
var item driveFile
|
||||
err := d.request(ctx, d.filesURL(), http.MethodPost, func(req *resty.Request) {
|
||||
req.SetQueryParam("fields", fileInfoFields)
|
||||
req.SetBody(map[string]any{
|
||||
"name": name,
|
||||
"parents": []string{parentID},
|
||||
"mimeType": "application/vnd.google-apps.folder",
|
||||
})
|
||||
}, &item)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("googledrive mkdir %s: %w", name, err)
|
||||
}
|
||||
if item.ID == "" {
|
||||
return "", fmt.Errorf("googledrive mkdir %s: empty file id", name)
|
||||
}
|
||||
return item.ID, nil
|
||||
}
|
||||
|
||||
func (d *Driver) Rename(ctx context.Context, fileID, newName string) error {
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return errors.New("googledrive rename: empty file id")
|
||||
}
|
||||
newName = strings.TrimSpace(newName)
|
||||
if newName == "" {
|
||||
return errors.New("googledrive rename: empty new name")
|
||||
}
|
||||
var item driveFile
|
||||
err := d.request(ctx, d.fileURL(fileID), http.MethodPatch, func(req *resty.Request) {
|
||||
req.SetQueryParam("fields", fileInfoFields)
|
||||
req.SetBody(map[string]string{"name": newName})
|
||||
}, &item)
|
||||
if err != nil {
|
||||
return fmt.Errorf("googledrive rename: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return errors.New("googledrive remove: empty file id")
|
||||
}
|
||||
if err := d.request(ctx, d.fileURL(fileID), http.MethodDelete, nil, nil); err != nil {
|
||||
return fmt.Errorf("googledrive remove: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) findUploadedFileID(ctx context.Context, parentID, name, md5Hex string) (string, error) {
|
||||
entries, err := d.List(ctx, parentID)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("googledrive upload verify: %w", err)
|
||||
}
|
||||
var hashHit string
|
||||
for _, e := range entries {
|
||||
if e.IsDir {
|
||||
continue
|
||||
}
|
||||
if !strings.EqualFold(e.Hash, md5Hex) {
|
||||
continue
|
||||
}
|
||||
if e.Name == name {
|
||||
return e.ID, nil
|
||||
}
|
||||
if hashHit == "" {
|
||||
hashHit = e.ID
|
||||
}
|
||||
}
|
||||
if hashHit != "" {
|
||||
return hashHit, nil
|
||||
}
|
||||
for _, e := range entries {
|
||||
if !e.IsDir && e.Name == name {
|
||||
return e.ID, nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("googledrive upload: uploaded file %q not found in parent %q", name, parentID)
|
||||
}
|
||||
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
|
||||
func isGoogleUploadHTTPRateLimit(status int, header http.Header, body []byte, apiErr apiErrorBody) bool {
|
||||
if status == http.StatusTooManyRequests {
|
||||
return true
|
||||
}
|
||||
if status == http.StatusForbidden && strings.TrimSpace(header.Get("Retry-After")) != "" {
|
||||
return true
|
||||
}
|
||||
if isGoogleRateLimit(nil, apiErr) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func googleUploadRateLimitError(status int, header http.Header, body []byte, message string) error {
|
||||
if strings.TrimSpace(message) == "" {
|
||||
message = "google drive upload rate limited"
|
||||
}
|
||||
bodyText := strings.TrimSpace(string(body))
|
||||
if bodyText != "" {
|
||||
message = fmt.Sprintf("%s: status=%d body=%s", message, status, bodyText)
|
||||
}
|
||||
return &drives.RateLimitError{
|
||||
Provider: Kind,
|
||||
RetryAfter: parseRetryAfterHeader(header.Get("Retry-After")),
|
||||
Err: errors.New(message),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Driver) refresh(ctx context.Context) error {
|
||||
@@ -288,6 +723,26 @@ func (d *Driver) applyToken(out tokenResp) {
|
||||
}
|
||||
|
||||
func tokenResponseError(prefix string, res *resty.Response, out tokenResp, requireRefresh bool) error {
|
||||
if isGoogleTokenRateLimit(res, out) {
|
||||
message := strings.TrimSpace(out.Text)
|
||||
if message == "" {
|
||||
message = strings.TrimSpace(out.ErrorDescription)
|
||||
}
|
||||
if message == "" {
|
||||
message = strings.TrimSpace(out.Error)
|
||||
}
|
||||
if message == "" {
|
||||
message = "google drive token refresh rate limited"
|
||||
}
|
||||
if res != nil && strings.TrimSpace(res.String()) != "" {
|
||||
message = fmt.Sprintf("%s: status=%d body=%s", message, res.StatusCode(), strings.TrimSpace(res.String()))
|
||||
}
|
||||
return &drives.RateLimitError{
|
||||
Provider: Kind,
|
||||
RetryAfter: parseRetryAfter(res),
|
||||
Err: fmt.Errorf("%s: %s", prefix, message),
|
||||
}
|
||||
}
|
||||
if out.Text != "" {
|
||||
return fmt.Errorf("%s: %s", prefix, out.Text)
|
||||
}
|
||||
@@ -380,6 +835,10 @@ func (d *Driver) filesURL() string {
|
||||
return d.apiBaseURL + "/files"
|
||||
}
|
||||
|
||||
func (d *Driver) uploadFilesURL() string {
|
||||
return d.uploadBaseURL + "/files"
|
||||
}
|
||||
|
||||
func (d *Driver) fileURL(fileID string) string {
|
||||
return d.filesURL() + "/" + url.PathEscape(fileID)
|
||||
}
|
||||
@@ -444,18 +903,58 @@ func isGoogleRateLimit(res *resty.Response, body apiErrorBody) bool {
|
||||
if res != nil && res.StatusCode() == http.StatusTooManyRequests {
|
||||
return true
|
||||
}
|
||||
if res != nil && res.StatusCode() == http.StatusForbidden && strings.TrimSpace(res.Header().Get("Retry-After")) != "" {
|
||||
return true
|
||||
}
|
||||
if body.Code == http.StatusTooManyRequests {
|
||||
return true
|
||||
}
|
||||
for _, e := range body.Errors {
|
||||
reason := strings.ToLower(strings.TrimSpace(e.Reason))
|
||||
switch reason {
|
||||
case "ratelimitexceeded", "userratelimitexceeded", "downloadquotaexceeded", "sharingratelimitexceeded":
|
||||
if googleLimitReason(e.Reason) {
|
||||
return true
|
||||
}
|
||||
domain := compactGoogleLimitText(e.Domain)
|
||||
if domain == "usagelimits" && (body.Code == http.StatusForbidden || body.Code == http.StatusTooManyRequests) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
msg := strings.ToLower(body.Message)
|
||||
return strings.Contains(msg, "rate limit") || strings.Contains(msg, "too many requests") || strings.Contains(msg, "quota exceeded")
|
||||
return false
|
||||
}
|
||||
|
||||
func isGoogleTokenRateLimit(res *resty.Response, out tokenResp) bool {
|
||||
if res != nil {
|
||||
if res.StatusCode() == http.StatusTooManyRequests {
|
||||
return true
|
||||
}
|
||||
if res.StatusCode() == http.StatusForbidden && strings.TrimSpace(res.Header().Get("Retry-After")) != "" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return googleLimitReason(out.Error)
|
||||
}
|
||||
|
||||
func googleLimitReason(reason string) bool {
|
||||
switch compactGoogleLimitText(reason) {
|
||||
case "ratelimitexceeded",
|
||||
"userratelimitexceeded",
|
||||
"dailylimitexceeded",
|
||||
"dailylimitexceededunreg",
|
||||
"downloadquotaexceeded",
|
||||
"sharingratelimitexceeded",
|
||||
"quotaexceeded",
|
||||
"uploadlimitexceeded",
|
||||
"storagelimitexceeded",
|
||||
"storagequotaexceeded":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func compactGoogleLimitText(text string) string {
|
||||
text = strings.ToLower(strings.TrimSpace(text))
|
||||
replacer := strings.NewReplacer("_", "", "-", "", " ", "", ".", "", ":", "")
|
||||
return replacer.Replace(text)
|
||||
}
|
||||
|
||||
func googleRateLimitError(res *resty.Response, message string) error {
|
||||
@@ -486,7 +985,11 @@ func parseRetryAfter(res *resty.Response) time.Duration {
|
||||
if res == nil {
|
||||
return 0
|
||||
}
|
||||
raw := strings.TrimSpace(res.Header().Get("Retry-After"))
|
||||
return parseRetryAfterHeader(res.Header().Get("Retry-After"))
|
||||
}
|
||||
|
||||
func parseRetryAfterHeader(raw string) time.Duration {
|
||||
raw = strings.TrimSpace(raw)
|
||||
if raw == "" {
|
||||
return 0
|
||||
}
|
||||
@@ -502,4 +1005,19 @@ func parseRetryAfter(res *resty.Response) time.Duration {
|
||||
return 0
|
||||
}
|
||||
|
||||
func splitPath(p string) []string {
|
||||
p = strings.Trim(p, "/")
|
||||
if p == "" {
|
||||
return nil
|
||||
}
|
||||
return strings.Split(p, "/")
|
||||
}
|
||||
|
||||
func minInt64(a, b int64) int64 {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*Driver)(nil)
|
||||
|
||||
@@ -2,11 +2,18 @@ package googledrive
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
func TestInitUsesOnlineRenewAPI(t *testing.T) {
|
||||
@@ -131,6 +138,134 @@ func TestStreamURLReturnsAuthenticatedMediaLinkWithoutRedirectRequirement(t *tes
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadAndReportHashUsesResumableSession(t *testing.T) {
|
||||
body := "hello google drive"
|
||||
wantHash := md5.Sum([]byte(body))
|
||||
var sawSession bool
|
||||
var sawUpload bool
|
||||
var srv *httptest.Server
|
||||
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/upload/drive/v3/files":
|
||||
sawSession = true
|
||||
if got := r.Header.Get("Authorization"); got != "Bearer access" {
|
||||
t.Fatalf("session Authorization = %q", got)
|
||||
}
|
||||
if got := r.URL.Query().Get("uploadType"); got != "resumable" {
|
||||
t.Fatalf("uploadType = %q", got)
|
||||
}
|
||||
if got := r.Header.Get("X-Upload-Content-Length"); got != "18" {
|
||||
t.Fatalf("X-Upload-Content-Length = %q", got)
|
||||
}
|
||||
var meta struct {
|
||||
Name string `json:"name"`
|
||||
Parents []string `json:"parents"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&meta); err != nil {
|
||||
t.Fatalf("decode session metadata: %v", err)
|
||||
}
|
||||
if meta.Name != "clip.mp4" || len(meta.Parents) != 1 || meta.Parents[0] != "parent-1" {
|
||||
t.Fatalf("metadata = %+v", meta)
|
||||
}
|
||||
w.Header().Set("Location", srv.URL+"/upload/session/1")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
case "/upload/session/1":
|
||||
sawUpload = true
|
||||
if got := r.Header.Get("Authorization"); got != "Bearer access" {
|
||||
t.Fatalf("upload Authorization = %q", got)
|
||||
}
|
||||
if got := r.Header.Get("Content-Range"); got != "bytes 0-17/18" {
|
||||
t.Fatalf("Content-Range = %q", got)
|
||||
}
|
||||
gotBody, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("read upload body: %v", err)
|
||||
}
|
||||
if string(gotBody) != body {
|
||||
t.Fatalf("upload body = %q", string(gotBody))
|
||||
}
|
||||
writeTestJSONStatus(w, http.StatusCreated, driveFile{
|
||||
ID: "file-uploaded",
|
||||
Name: "clip.mp4",
|
||||
Size: "18",
|
||||
MD5Checksum: hex.EncodeToString(wantHash[:]),
|
||||
})
|
||||
default:
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{ID: "g", APIBaseURL: srv.URL + "/drive/v3"})
|
||||
d.accessToken = "access"
|
||||
res, err := d.UploadAndReportHash(context.Background(), "parent-1", "clip.mp4", strings.NewReader(body), int64(len(body)))
|
||||
if err != nil {
|
||||
t.Fatalf("UploadAndReportHash() error = %v", err)
|
||||
}
|
||||
if !sawSession || !sawUpload {
|
||||
t.Fatalf("saw session/upload = %v/%v, want both", sawSession, sawUpload)
|
||||
}
|
||||
if res.FileID != "file-uploaded" || res.Size != int64(len(body)) || res.Hash != hex.EncodeToString(wantHash[:]) {
|
||||
t.Fatalf("upload result = %+v", res)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnsureDirAndRenameUseGoogleDriveFileAPI(t *testing.T) {
|
||||
var madeDir bool
|
||||
var renamed bool
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.Method == http.MethodGet && r.URL.Path == "/drive/v3/files":
|
||||
writeTestJSON(w, filesResp{})
|
||||
case r.Method == http.MethodPost && r.URL.Path == "/drive/v3/files":
|
||||
madeDir = true
|
||||
var meta struct {
|
||||
Name string `json:"name"`
|
||||
Parents []string `json:"parents"`
|
||||
MimeType string `json:"mimeType"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&meta); err != nil {
|
||||
t.Fatalf("decode mkdir body: %v", err)
|
||||
}
|
||||
if meta.Name != "Crawler Uploads" || len(meta.Parents) != 1 || meta.Parents[0] != "root" || meta.MimeType != "application/vnd.google-apps.folder" {
|
||||
t.Fatalf("mkdir body = %+v", meta)
|
||||
}
|
||||
writeTestJSON(w, driveFile{ID: "folder-crawler", Name: "Crawler Uploads", MimeType: "application/vnd.google-apps.folder"})
|
||||
case r.Method == http.MethodPatch && r.URL.Path == "/drive/v3/files/file-1":
|
||||
renamed = true
|
||||
var body map[string]string
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode rename body: %v", err)
|
||||
}
|
||||
if body["name"] != "new-name.mp4" {
|
||||
t.Fatalf("rename body = %+v", body)
|
||||
}
|
||||
writeTestJSON(w, driveFile{ID: "file-1", Name: "new-name.mp4"})
|
||||
default:
|
||||
t.Fatalf("unexpected %s %s", r.Method, r.URL.Path)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{ID: "g", RootID: "root", APIBaseURL: srv.URL + "/drive/v3"})
|
||||
d.accessToken = "access"
|
||||
d.listInterval = -1
|
||||
|
||||
dirID, err := d.EnsureDir(context.Background(), "Crawler Uploads")
|
||||
if err != nil {
|
||||
t.Fatalf("EnsureDir() error = %v", err)
|
||||
}
|
||||
if dirID != "folder-crawler" || !madeDir {
|
||||
t.Fatalf("dirID/madeDir = %q/%v, want folder-crawler/true", dirID, madeDir)
|
||||
}
|
||||
if err := d.Rename(context.Background(), "file-1", "new-name.mp4"); err != nil {
|
||||
t.Fatalf("Rename() error = %v", err)
|
||||
}
|
||||
if !renamed {
|
||||
t.Fatal("rename endpoint was not called")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRequestRefreshesOnUnauthorized(t *testing.T) {
|
||||
var fileCalls int
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
@@ -179,6 +314,88 @@ func TestRequestRefreshesOnUnauthorized(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRateLimitReasonsFollowGoogleDriveErrorShape(t *testing.T) {
|
||||
reasons := []string{
|
||||
"rateLimitExceeded",
|
||||
"userRateLimitExceeded",
|
||||
"dailyLimitExceeded",
|
||||
"dailyLimitExceededUnreg",
|
||||
"downloadQuotaExceeded",
|
||||
"sharingRateLimitExceeded",
|
||||
"quotaExceeded",
|
||||
}
|
||||
for _, reason := range reasons {
|
||||
body := apiErrorBody{
|
||||
Code: http.StatusForbidden,
|
||||
Message: "google drive quota or rate limited",
|
||||
Errors: []struct {
|
||||
Domain string `json:"domain"`
|
||||
Reason string `json:"reason"`
|
||||
Message string `json:"message"`
|
||||
LocationType string `json:"location_type"`
|
||||
Location string `json:"location"`
|
||||
}{
|
||||
{Domain: "usageLimits", Reason: reason, Message: reason},
|
||||
},
|
||||
}
|
||||
if !isGoogleRateLimit(nil, body) {
|
||||
t.Fatalf("reason %q not treated as rate limit", reason)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLRateLimitStartsSharedLinkCooldown(t *testing.T) {
|
||||
var calls int
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
calls++
|
||||
w.Header().Set("Retry-After", "120")
|
||||
writeTestJSONStatus(w, http.StatusForbidden, apiErrorResp{Error: apiErrorBody{
|
||||
Code: http.StatusForbidden,
|
||||
Message: "User rate limit exceeded.",
|
||||
Errors: []struct {
|
||||
Domain string `json:"domain"`
|
||||
Reason string `json:"reason"`
|
||||
Message string `json:"message"`
|
||||
LocationType string `json:"location_type"`
|
||||
Location string `json:"location"`
|
||||
}{
|
||||
{Domain: "usageLimits", Reason: "userRateLimitExceeded", Message: "User rate limit exceeded."},
|
||||
},
|
||||
}})
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{ID: "g", APIBaseURL: srv.URL})
|
||||
d.accessToken = "access"
|
||||
d.linkCooldownDuration = time.Hour
|
||||
|
||||
_, err := d.StreamURL(context.Background(), "file-1")
|
||||
if err == nil {
|
||||
t.Fatal("first StreamURL succeeded, want rate limit")
|
||||
}
|
||||
var rateLimit *drives.RateLimitError
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("first error = %T %[1]v, want RateLimitError", err)
|
||||
}
|
||||
if rateLimit.RetryAfter != 2*time.Minute {
|
||||
t.Fatalf("retry after = %s, want 2m", rateLimit.RetryAfter)
|
||||
}
|
||||
|
||||
_, err = d.StreamURL(context.Background(), "file-1")
|
||||
if err == nil {
|
||||
t.Fatal("second StreamURL succeeded during cooldown")
|
||||
}
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("second error = %T %[1]v, want RateLimitError", err)
|
||||
}
|
||||
if calls != 1 {
|
||||
t.Fatalf("remote calls = %d, want 1; second call should use shared cooldown", calls)
|
||||
}
|
||||
if rateLimit.RetryAfter <= 0 || rateLimit.RetryAfter > 2*time.Minute {
|
||||
t.Fatalf("second retry after = %s, want remaining cooldown", rateLimit.RetryAfter)
|
||||
}
|
||||
}
|
||||
|
||||
func writeTestJSON(w http.ResponseWriter, v any) {
|
||||
writeTestJSONStatus(w, http.StatusOK, v)
|
||||
}
|
||||
|
||||
@@ -42,8 +42,16 @@ type apiErrorBody struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
Errors []struct {
|
||||
Domain string `json:"domain"`
|
||||
Reason string `json:"reason"`
|
||||
Message string `json:"message"`
|
||||
Domain string `json:"domain"`
|
||||
Reason string `json:"reason"`
|
||||
Message string `json:"message"`
|
||||
LocationType string `json:"location_type"`
|
||||
Location string `json:"location"`
|
||||
} `json:"errors"`
|
||||
}
|
||||
|
||||
type UploadResult struct {
|
||||
FileID string
|
||||
Hash string
|
||||
Size int64
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,300 @@
|
||||
package guangyapan
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
func TestDriverRefreshListAndStream(t *testing.T) {
|
||||
var refreshed bool
|
||||
var listedRoot bool
|
||||
updates := map[string]string{}
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/v1/auth/token":
|
||||
refreshed = true
|
||||
writeTestJSON(w, map[string]any{
|
||||
"access_token": "new-access",
|
||||
"refresh_token": "new-refresh",
|
||||
})
|
||||
case "/v1/user/me":
|
||||
if got := r.Header.Get("Authorization"); got != "Bearer new-access" {
|
||||
t.Fatalf("auth header = %q, want new access token", got)
|
||||
}
|
||||
writeTestJSON(w, map[string]any{"sub": "user-1"})
|
||||
case "/userres/v1/file/get_file_list":
|
||||
if got := r.Header.Get("Authorization"); got != "Bearer new-access" {
|
||||
t.Fatalf("api auth header = %q, want new access token", got)
|
||||
}
|
||||
var body map[string]any
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode list body: %v", err)
|
||||
}
|
||||
if body["parentId"] != "" {
|
||||
t.Fatalf("parentId = %#v, want root empty string", body["parentId"])
|
||||
}
|
||||
listedRoot = true
|
||||
writeTestJSON(w, map[string]any{
|
||||
"code": 0,
|
||||
"msg": "success",
|
||||
"data": map[string]any{
|
||||
"total": 2,
|
||||
"list": []map[string]any{
|
||||
{"fileId": "dir-1", "parentId": "", "fileName": "Movies", "resType": 2},
|
||||
{"fileId": "file-1", "parentId": "", "fileName": "clip.mp4", "fileSize": 123, "resType": 1, "utime": 1700000000},
|
||||
},
|
||||
},
|
||||
})
|
||||
case "/nd.bizuserres.s/v1/get_res_download_url":
|
||||
writeTestJSON(w, map[string]any{
|
||||
"code": 0,
|
||||
"msg": "success",
|
||||
"data": map[string]any{"signedURL": "https://cdn.example.test/clip.mp4"},
|
||||
})
|
||||
default:
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "gy",
|
||||
RefreshToken: "old-refresh",
|
||||
AccountBaseURL: srv.URL,
|
||||
APIBaseURL: srv.URL,
|
||||
OnCredentialsUpdate: func(values map[string]string) {
|
||||
for k, v := range values {
|
||||
updates[k] = v
|
||||
}
|
||||
},
|
||||
})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
if !refreshed {
|
||||
t.Fatal("refresh token endpoint was not called")
|
||||
}
|
||||
if updates["access_token"] != "new-access" || updates["refresh_token"] != "new-refresh" {
|
||||
t.Fatalf("updates = %#v, want refreshed tokens", updates)
|
||||
}
|
||||
|
||||
entries, err := d.List(context.Background(), "")
|
||||
if err != nil {
|
||||
t.Fatalf("list: %v", err)
|
||||
}
|
||||
if !listedRoot || len(entries) != 2 {
|
||||
t.Fatalf("listedRoot=%v entries=%#v", listedRoot, entries)
|
||||
}
|
||||
if !entries[0].IsDir || entries[1].ID != "file-1" || entries[1].Size != 123 {
|
||||
t.Fatalf("entries = %#v", entries)
|
||||
}
|
||||
|
||||
link, err := d.StreamURL(context.Background(), "file-1")
|
||||
if err != nil {
|
||||
t.Fatalf("stream url: %v", err)
|
||||
}
|
||||
if link.URL != "https://cdn.example.test/clip.mp4" {
|
||||
t.Fatalf("stream url = %q", link.URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDriverResolvesRootPath(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/v1/user/me":
|
||||
writeTestJSON(w, map[string]any{"sub": "user-1"})
|
||||
case "/userres/v1/file/get_file_list":
|
||||
var body map[string]any
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode list body: %v", err)
|
||||
}
|
||||
parent, _ := body["parentId"].(string)
|
||||
switch parent {
|
||||
case "":
|
||||
writeTestJSON(w, listTestResponse([]map[string]any{
|
||||
{"fileId": "folder-a", "parentId": "", "fileName": "影视", "resType": 2},
|
||||
}))
|
||||
case "folder-a":
|
||||
writeTestJSON(w, listTestResponse([]map[string]any{
|
||||
{"fileId": "folder-b", "parentId": "folder-a", "fileName": "电影", "resType": 2},
|
||||
}))
|
||||
case "folder-b":
|
||||
writeTestJSON(w, listTestResponse([]map[string]any{
|
||||
{"fileId": "file-1", "parentId": "folder-b", "fileName": "movie.mp4", "fileSize": 456, "resType": 1},
|
||||
}))
|
||||
default:
|
||||
t.Fatalf("unexpected parent %q", parent)
|
||||
}
|
||||
default:
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "gy",
|
||||
RootID: "configured-root",
|
||||
RootPath: "影视/电影",
|
||||
AccessToken: "access",
|
||||
AccountBaseURL: srv.URL,
|
||||
APIBaseURL: srv.URL,
|
||||
})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
if d.RootID() != "folder-b" {
|
||||
t.Fatalf("root id = %q, want folder-b", d.RootID())
|
||||
}
|
||||
entries, err := d.List(context.Background(), "")
|
||||
if err != nil {
|
||||
t.Fatalf("list resolved root: %v", err)
|
||||
}
|
||||
if len(entries) != 1 || entries[0].ID != "file-1" {
|
||||
t.Fatalf("entries = %#v", entries)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDriverSendSMSCodeUpdatesVerificationState(t *testing.T) {
|
||||
updates := map[string]string{}
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/v1/shield/captcha/init":
|
||||
writeTestJSON(w, map[string]any{"captcha_token": "captcha-1"})
|
||||
case "/v1/auth/verification":
|
||||
writeTestJSON(w, map[string]any{"verification_id": "verify-1"})
|
||||
default:
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "gy",
|
||||
PhoneNumber: "13800000000",
|
||||
SendCode: true,
|
||||
AccountBaseURL: srv.URL,
|
||||
APIBaseURL: srv.URL,
|
||||
OnCredentialsUpdate: func(values map[string]string) {
|
||||
for k, v := range values {
|
||||
updates[k] = v
|
||||
}
|
||||
},
|
||||
})
|
||||
err := d.Init(context.Background())
|
||||
if err == nil || !strings.Contains(err.Error(), "验证码已发送") {
|
||||
t.Fatalf("init err = %v, want verification prompt", err)
|
||||
}
|
||||
if updates["captcha_token"] != "captcha-1" || updates["verification_id"] != "verify-1" || updates["send_code"] != "false" {
|
||||
t.Fatalf("updates = %#v, want sms state saved", updates)
|
||||
}
|
||||
if updates["device_id"] == "" {
|
||||
t.Fatalf("updates = %#v, want generated device id saved", updates)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListHTTP429ReturnsRateLimitError(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/userres/v1/file/get_file_list" {
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
w.Header().Set("Retry-After", "120")
|
||||
w.WriteHeader(http.StatusTooManyRequests)
|
||||
writeTestJSON(w, map[string]any{"code": 429, "msg": "操作频繁,请稍后重试"})
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "gy",
|
||||
AccessToken: "access",
|
||||
AccountBaseURL: srv.URL,
|
||||
APIBaseURL: srv.URL,
|
||||
})
|
||||
_, err := d.List(context.Background(), "")
|
||||
if err == nil {
|
||||
t.Fatal("list succeeded, want rate limit error")
|
||||
}
|
||||
var rateLimit *drives.RateLimitError
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want RateLimitError", err)
|
||||
}
|
||||
if rateLimit.RetryAfter != 2*time.Minute {
|
||||
t.Fatalf("retry after = %s, want 2m", rateLimit.RetryAfter)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListCode429ReturnsRateLimitError(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/userres/v1/file/get_file_list" {
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
writeTestJSON(w, map[string]any{"code": 429, "msg": "操作频繁,请稍后再试"})
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "gy",
|
||||
AccessToken: "access",
|
||||
AccountBaseURL: srv.URL,
|
||||
APIBaseURL: srv.URL,
|
||||
})
|
||||
_, err := d.List(context.Background(), "")
|
||||
if err == nil {
|
||||
t.Fatal("list succeeded, want rate limit error")
|
||||
}
|
||||
var rateLimit *drives.RateLimitError
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want RateLimitError", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListInvalidToken403DoesNotReturnRateLimitError(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/userres/v1/file/get_file_list" {
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
w.WriteHeader(http.StatusForbidden)
|
||||
writeTestJSON(w, map[string]any{"code": 401, "msg": "invalid access token"})
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "gy",
|
||||
AccessToken: "access",
|
||||
AccountBaseURL: srv.URL,
|
||||
APIBaseURL: srv.URL,
|
||||
})
|
||||
_, err := d.List(context.Background(), "")
|
||||
if err == nil {
|
||||
t.Fatal("list succeeded, want auth error")
|
||||
}
|
||||
var rateLimit *drives.RateLimitError
|
||||
if errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want non-rate-limit error", err)
|
||||
}
|
||||
}
|
||||
|
||||
func listTestResponse(items []map[string]any) map[string]any {
|
||||
return map[string]any{
|
||||
"code": 0,
|
||||
"msg": "success",
|
||||
"data": map[string]any{
|
||||
"total": len(items),
|
||||
"list": items,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func writeTestJSON(w http.ResponseWriter, v any) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if err := json.NewEncoder(w).Encode(v); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,244 @@
|
||||
package guangyapan
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-resty/resty/v2"
|
||||
"github.com/skip2/go-qrcode"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultQRScope = "user"
|
||||
deviceCodeGrantType = "urn:ietf:params:oauth:grant-type:device_code"
|
||||
defaultQRUserAgent = "GuangYaPan-Login/1.0"
|
||||
)
|
||||
|
||||
type QRConfig struct {
|
||||
AccountBaseURL string
|
||||
HTTPClient *http.Client
|
||||
Now func() time.Time
|
||||
}
|
||||
|
||||
type QRClient struct {
|
||||
accountBaseURL string
|
||||
client *resty.Client
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
type QRCodeSession struct {
|
||||
DeviceCode string `json:"deviceCode"`
|
||||
QRCodeURL string `json:"qrCodeUrl"`
|
||||
QRImageDataURL string `json:"qrImageDataUrl"`
|
||||
IntervalSeconds int `json:"intervalSeconds"`
|
||||
ExpiresAt string `json:"expiresAt,omitempty"`
|
||||
}
|
||||
|
||||
type QRCodeStatus struct {
|
||||
State string `json:"state"`
|
||||
StatusText string `json:"statusText"`
|
||||
IntervalSeconds int `json:"intervalSeconds,omitempty"`
|
||||
AccessToken string `json:"accessToken,omitempty"`
|
||||
RefreshToken string `json:"refreshToken,omitempty"`
|
||||
TokenType string `json:"tokenType,omitempty"`
|
||||
ExpiresIn int64 `json:"expiresIn,omitempty"`
|
||||
}
|
||||
|
||||
type deviceCodeResp struct {
|
||||
DeviceCode string `json:"device_code"`
|
||||
VerificationURIComplete string `json:"verification_uri_complete"`
|
||||
ShortURIComplete string `json:"short_uri_complete"`
|
||||
Interval int `json:"interval"`
|
||||
ExpiresIn int `json:"expires_in"`
|
||||
Error string `json:"error"`
|
||||
ErrorCode int `json:"error_code"`
|
||||
ErrorDesc string `json:"error_description"`
|
||||
}
|
||||
|
||||
type deviceTokenResp struct {
|
||||
AccessToken string `json:"access_token"`
|
||||
RefreshToken string `json:"refresh_token"`
|
||||
TokenType string `json:"token_type"`
|
||||
ExpiresIn int64 `json:"expires_in"`
|
||||
Scope string `json:"scope"`
|
||||
Error string `json:"error"`
|
||||
ErrorCode int `json:"error_code"`
|
||||
ErrorDesc string `json:"error_description"`
|
||||
}
|
||||
|
||||
func NewQRClient(c QRConfig) *QRClient {
|
||||
accountBaseURL := strings.TrimRight(strings.TrimSpace(c.AccountBaseURL), "/")
|
||||
if accountBaseURL == "" {
|
||||
accountBaseURL = defaultAccountBaseURL
|
||||
}
|
||||
httpClient := c.HTTPClient
|
||||
if httpClient == nil {
|
||||
httpClient = &http.Client{Timeout: 20 * time.Second}
|
||||
}
|
||||
now := c.Now
|
||||
if now == nil {
|
||||
now = time.Now
|
||||
}
|
||||
return &QRClient{
|
||||
accountBaseURL: accountBaseURL,
|
||||
client: resty.NewWithClient(httpClient).
|
||||
SetTimeout(20*time.Second).
|
||||
SetBaseURL(accountBaseURL).
|
||||
SetHeader("User-Agent", defaultQRUserAgent).
|
||||
SetHeader("Accept", "application/json").
|
||||
SetHeader("Content-Type", "application/json"),
|
||||
now: now,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *QRClient) Generate(ctx context.Context) (QRCodeSession, error) {
|
||||
var out deviceCodeResp
|
||||
var errOut deviceCodeResp
|
||||
resp, err := c.client.R().
|
||||
SetContext(ctx).
|
||||
SetBody(map[string]any{
|
||||
"client_id": defaultClientID,
|
||||
"scope": defaultQRScope,
|
||||
}).
|
||||
SetResult(&out).
|
||||
SetError(&errOut).
|
||||
Post("/v1/auth/device/code")
|
||||
if err != nil {
|
||||
return QRCodeSession{}, err
|
||||
}
|
||||
if resp.IsError() || out.Error != "" {
|
||||
if out.Error == "" {
|
||||
out = errOut
|
||||
}
|
||||
return QRCodeSession{}, fmt.Errorf("guangyapan qr: %s", deviceAPIError(out.ErrorDesc, out.Error, resp))
|
||||
}
|
||||
|
||||
deviceCode := strings.TrimSpace(out.DeviceCode)
|
||||
if deviceCode == "" {
|
||||
return QRCodeSession{}, errors.New("guangyapan qr: empty device_code")
|
||||
}
|
||||
qrURL := strings.TrimSpace(out.VerificationURIComplete)
|
||||
if qrURL == "" {
|
||||
qrURL = strings.TrimSpace(out.ShortURIComplete)
|
||||
}
|
||||
if qrURL == "" {
|
||||
return QRCodeSession{}, errors.New("guangyapan qr: empty verification uri")
|
||||
}
|
||||
interval := out.Interval
|
||||
if interval <= 0 {
|
||||
interval = 5
|
||||
}
|
||||
expiresIn := out.ExpiresIn
|
||||
if expiresIn <= 0 {
|
||||
expiresIn = 300
|
||||
}
|
||||
png, err := qrcode.Encode(qrURL, qrcode.Medium, 220)
|
||||
if err != nil {
|
||||
return QRCodeSession{}, err
|
||||
}
|
||||
return QRCodeSession{
|
||||
DeviceCode: deviceCode,
|
||||
QRCodeURL: qrURL,
|
||||
QRImageDataURL: "data:image/png;base64," + base64.StdEncoding.EncodeToString(png),
|
||||
IntervalSeconds: interval,
|
||||
ExpiresAt: c.now().Add(time.Duration(expiresIn) * time.Second).Format(time.RFC3339),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *QRClient) Poll(ctx context.Context, deviceCode string) (QRCodeStatus, error) {
|
||||
deviceCode = strings.TrimSpace(deviceCode)
|
||||
if deviceCode == "" {
|
||||
return QRCodeStatus{}, errors.New("deviceCode is required")
|
||||
}
|
||||
|
||||
var out deviceTokenResp
|
||||
var errOut deviceTokenResp
|
||||
resp, err := c.client.R().
|
||||
SetContext(ctx).
|
||||
SetBody(map[string]any{
|
||||
"client_id": defaultClientID,
|
||||
"grant_type": deviceCodeGrantType,
|
||||
"device_code": deviceCode,
|
||||
}).
|
||||
SetResult(&out).
|
||||
SetError(&errOut).
|
||||
Post("/v1/auth/token")
|
||||
if err != nil {
|
||||
return QRCodeStatus{}, err
|
||||
}
|
||||
if resp.IsError() && out.Error == "" {
|
||||
out = errOut
|
||||
}
|
||||
if resp.IsError() && out.Error == "" {
|
||||
_ = json.Unmarshal(resp.Body(), &out)
|
||||
}
|
||||
if out.Error != "" {
|
||||
return qrStatusForDeviceError(out), nil
|
||||
}
|
||||
if resp.IsError() {
|
||||
return QRCodeStatus{}, fmt.Errorf("guangyapan qr: status=%d body=%s", resp.StatusCode(), resp.String())
|
||||
}
|
||||
access := strings.TrimSpace(out.AccessToken)
|
||||
refresh := strings.TrimSpace(out.RefreshToken)
|
||||
if access == "" || refresh == "" {
|
||||
return QRCodeStatus{}, errors.New("guangyapan qr: login succeeded but token response is incomplete")
|
||||
}
|
||||
tokenType := strings.TrimSpace(out.TokenType)
|
||||
if tokenType == "" {
|
||||
tokenType = "Bearer"
|
||||
}
|
||||
return QRCodeStatus{
|
||||
State: "success",
|
||||
StatusText: "登录成功",
|
||||
AccessToken: access,
|
||||
RefreshToken: refresh,
|
||||
TokenType: tokenType,
|
||||
ExpiresIn: out.ExpiresIn,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func qrStatusForDeviceError(out deviceTokenResp) QRCodeStatus {
|
||||
errCode := strings.TrimSpace(out.Error)
|
||||
switch errCode {
|
||||
case "authorization_pending":
|
||||
return QRCodeStatus{State: "pending", StatusText: "等待扫码确认"}
|
||||
case "slow_down":
|
||||
return QRCodeStatus{State: "pending", StatusText: "等待扫码确认,已降低查询频率", IntervalSeconds: 10}
|
||||
case "expired_token":
|
||||
return QRCodeStatus{State: "expired", StatusText: "二维码已过期"}
|
||||
case "access_denied":
|
||||
return QRCodeStatus{State: "denied", StatusText: "用户拒绝了授权"}
|
||||
default:
|
||||
msg := strings.TrimSpace(out.ErrorDesc)
|
||||
if msg == "" {
|
||||
msg = errCode
|
||||
}
|
||||
if msg == "" {
|
||||
msg = "未知错误"
|
||||
}
|
||||
return QRCodeStatus{State: "error", StatusText: msg}
|
||||
}
|
||||
}
|
||||
|
||||
func deviceAPIError(desc, short string, resp *resty.Response) string {
|
||||
msg := strings.TrimSpace(desc)
|
||||
if msg == "" {
|
||||
msg = strings.TrimSpace(short)
|
||||
}
|
||||
if msg == "" && resp != nil {
|
||||
msg = strings.TrimSpace(resp.String())
|
||||
}
|
||||
if msg == "" && resp != nil {
|
||||
msg = fmt.Sprintf("status=%d", resp.StatusCode())
|
||||
}
|
||||
if msg == "" {
|
||||
msg = "unknown error"
|
||||
}
|
||||
return msg
|
||||
}
|
||||
@@ -0,0 +1,102 @@
|
||||
package guangyapan
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestQRClientGenerate(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/v1/auth/device/code" {
|
||||
t.Fatalf("path = %s, want device code endpoint", r.URL.Path)
|
||||
}
|
||||
var body map[string]any
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode body: %v", err)
|
||||
}
|
||||
if body["client_id"] != defaultClientID || body["scope"] != defaultQRScope {
|
||||
t.Fatalf("body = %#v", body)
|
||||
}
|
||||
writeTestJSON(w, map[string]any{
|
||||
"device_code": "device-1",
|
||||
"verification_uri_complete": "https://account.guangyapan.com/device?code=abc",
|
||||
"interval": 7,
|
||||
"expires_in": 180,
|
||||
})
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
client := NewQRClient(QRConfig{
|
||||
AccountBaseURL: srv.URL,
|
||||
Now: func() time.Time { return time.Unix(1700000000, 0) },
|
||||
})
|
||||
session, err := client.Generate(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("generate: %v", err)
|
||||
}
|
||||
if session.DeviceCode != "device-1" || session.QRCodeURL != "https://account.guangyapan.com/device?code=abc" {
|
||||
t.Fatalf("session = %#v", session)
|
||||
}
|
||||
if session.IntervalSeconds != 7 {
|
||||
t.Fatalf("interval = %d, want 7", session.IntervalSeconds)
|
||||
}
|
||||
if session.ExpiresAt != time.Unix(1700000180, 0).Format(time.RFC3339) {
|
||||
t.Fatalf("expiresAt = %q", session.ExpiresAt)
|
||||
}
|
||||
if !strings.HasPrefix(session.QRImageDataURL, "data:image/png;base64,") {
|
||||
t.Fatalf("qr image = %q", session.QRImageDataURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQRClientPollPendingAndSuccess(t *testing.T) {
|
||||
var calls int
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/v1/auth/token" {
|
||||
t.Fatalf("path = %s, want token endpoint", r.URL.Path)
|
||||
}
|
||||
var body map[string]any
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode body: %v", err)
|
||||
}
|
||||
if body["client_id"] != defaultClientID ||
|
||||
body["grant_type"] != deviceCodeGrantType ||
|
||||
body["device_code"] != "device-1" {
|
||||
t.Fatalf("body = %#v", body)
|
||||
}
|
||||
calls++
|
||||
if calls == 1 {
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
writeTestJSON(w, map[string]any{"error": "authorization_pending"})
|
||||
return
|
||||
}
|
||||
writeTestJSON(w, map[string]any{
|
||||
"access_token": "access-1",
|
||||
"refresh_token": "refresh-1",
|
||||
"token_type": "Bearer",
|
||||
"expires_in": 7200,
|
||||
})
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
client := NewQRClient(QRConfig{AccountBaseURL: srv.URL})
|
||||
pending, err := client.Poll(context.Background(), "device-1")
|
||||
if err != nil {
|
||||
t.Fatalf("poll pending: %v", err)
|
||||
}
|
||||
if pending.State != "pending" || pending.AccessToken != "" {
|
||||
t.Fatalf("pending = %#v", pending)
|
||||
}
|
||||
|
||||
success, err := client.Poll(context.Background(), "device-1")
|
||||
if err != nil {
|
||||
t.Fatalf("poll success: %v", err)
|
||||
}
|
||||
if success.State != "success" || success.AccessToken != "access-1" || success.RefreshToken != "refresh-1" {
|
||||
t.Fatalf("success = %#v", success)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,129 @@
|
||||
package guangyapan
|
||||
|
||||
import "time"
|
||||
|
||||
type tokenResp struct {
|
||||
AccessToken string `json:"access_token"`
|
||||
RefreshToken string `json:"refresh_token"`
|
||||
Error string `json:"error"`
|
||||
ErrorCode int `json:"error_code"`
|
||||
ErrorDesc string `json:"error_description"`
|
||||
}
|
||||
|
||||
type verificationResp struct {
|
||||
VerificationID string `json:"verification_id"`
|
||||
Error string `json:"error"`
|
||||
ErrorCode int `json:"error_code"`
|
||||
ErrorDesc string `json:"error_description"`
|
||||
}
|
||||
|
||||
type captchaInitResp struct {
|
||||
CaptchaToken string `json:"captcha_token"`
|
||||
Error string `json:"error"`
|
||||
ErrorCode int `json:"error_code"`
|
||||
ErrorDesc string `json:"error_description"`
|
||||
}
|
||||
|
||||
type verifyResp struct {
|
||||
VerificationToken string `json:"verification_token"`
|
||||
Error string `json:"error"`
|
||||
ErrorCode int `json:"error_code"`
|
||||
ErrorDesc string `json:"error_description"`
|
||||
}
|
||||
|
||||
type userMeResp struct {
|
||||
Sub string `json:"sub"`
|
||||
}
|
||||
|
||||
type listResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data struct {
|
||||
Total int `json:"total"`
|
||||
List []fileItem `json:"list"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type fileItem struct {
|
||||
FileID string `json:"fileId"`
|
||||
ParentID string `json:"parentId"`
|
||||
FileName string `json:"fileName"`
|
||||
FileSize int64 `json:"fileSize"`
|
||||
ResType int `json:"resType"`
|
||||
CTime int64 `json:"ctime"`
|
||||
UTime int64 `json:"utime"`
|
||||
}
|
||||
|
||||
type downloadResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data struct {
|
||||
SignedURL string `json:"signedURL"`
|
||||
DownloadURL string `json:"downloadUrl"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type createDirResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data struct {
|
||||
FileID string `json:"fileId"`
|
||||
FileName string `json:"fileName"`
|
||||
ResType int `json:"resType"`
|
||||
CTime int64 `json:"ctime"`
|
||||
UTime int64 `json:"utime"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type deleteResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data struct {
|
||||
TaskID string `json:"taskId"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type taskStatusResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data struct {
|
||||
Status int `json:"status"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type uploadTokenResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data uploadTokenData `json:"data"`
|
||||
}
|
||||
|
||||
type uploadTokenData struct {
|
||||
TaskID string `json:"taskId"`
|
||||
ObjectPath string `json:"objectPath"`
|
||||
BucketName string `json:"bucketName"`
|
||||
EndPoint string `json:"endPoint"`
|
||||
FullEndPoint string `json:"fullEndPoint"`
|
||||
AccessKeyID string `json:"accessKeyID"`
|
||||
SecretAccessKey string `json:"secretAccessKey"`
|
||||
SessionToken string `json:"sessionToken"`
|
||||
Creds struct {
|
||||
AccessKeyID string `json:"accessKeyID"`
|
||||
SecretAccessKey string `json:"secretAccessKey"`
|
||||
SessionToken string `json:"sessionToken"`
|
||||
} `json:"creds"`
|
||||
}
|
||||
|
||||
type taskInfoResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data struct {
|
||||
FileID string `json:"fileId"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func unixOrZero(v int64) time.Time {
|
||||
if v <= 0 {
|
||||
return time.Time{}
|
||||
}
|
||||
return time.Unix(v, 0)
|
||||
}
|
||||
@@ -5,12 +5,14 @@ import (
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Drive 是多家网盘统一抽象。上层不区分盘,只区分 Kind。
|
||||
type Drive interface {
|
||||
// Kind 返回驱动代号:"quark" / "p115" / "pikpak" / "wopan" / "onedrive" / "googledrive" / "localstorage"
|
||||
// Kind 返回驱动代号:"quark" / "p115" / "p123" / "pikpak" / "wopan" / "guangyapan" / "onedrive" / "googledrive" / "localstorage"
|
||||
Kind() string
|
||||
|
||||
// ID 返回该盘在 catalog 中的唯一标识
|
||||
@@ -30,7 +32,7 @@ type Drive interface {
|
||||
StreamURL(ctx context.Context, fileID string) (*StreamLink, error)
|
||||
|
||||
// Upload 把本地流写入指定目录,返回新文件 fileID。
|
||||
// 当前 teaser 和封面只保存在本地,不再通过该方法写回网盘。
|
||||
// 当前预览视频和封面只保存在本地,不再通过该方法写回网盘。
|
||||
Upload(ctx context.Context, parentID, name string, r io.Reader, size int64) (string, error)
|
||||
|
||||
// EnsureDir 保证指定路径存在(相对根目录),返回最终目录 fileID。
|
||||
@@ -40,6 +42,27 @@ type Drive interface {
|
||||
RootID() string
|
||||
}
|
||||
|
||||
// Remover is an optional drive capability. It mirrors OpenList's optional
|
||||
// Remove interface: callers must type-assert before deleting a source file.
|
||||
type Remover interface {
|
||||
Remove(ctx context.Context, fileID string) error
|
||||
}
|
||||
|
||||
// SourceFile carries the catalog metadata available when an administrator
|
||||
// requests deletion of the original source file.
|
||||
type SourceFile struct {
|
||||
FileID string
|
||||
ParentID string
|
||||
Name string
|
||||
Size int64
|
||||
}
|
||||
|
||||
// SourceRemover is an optional, richer removal capability for providers whose
|
||||
// playback ID is not the same ID required by their delete API.
|
||||
type SourceRemover interface {
|
||||
RemoveSource(ctx context.Context, source SourceFile) error
|
||||
}
|
||||
|
||||
type Entry struct {
|
||||
ID string
|
||||
Name string
|
||||
@@ -98,3 +121,42 @@ func RateLimitRetryAfter(err error) (time.Duration, bool) {
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// TextMentionsHTTPStatus only looks for explicit numeric HTTP status contexts
|
||||
// in errors from tools that do not expose structured response metadata.
|
||||
func TextMentionsHTTPStatus(text string, statuses ...int) bool {
|
||||
text = strings.ToLower(strings.TrimSpace(text))
|
||||
if text == "" {
|
||||
return false
|
||||
}
|
||||
for _, status := range statuses {
|
||||
if status <= 0 {
|
||||
continue
|
||||
}
|
||||
code := strconv.Itoa(status)
|
||||
if strings.HasPrefix(text, code+" ") ||
|
||||
strings.Contains(text, "status="+code) ||
|
||||
strings.Contains(text, "status: "+code) ||
|
||||
strings.Contains(text, "status "+code) ||
|
||||
strings.Contains(text, "status code "+code) ||
|
||||
strings.Contains(text, "http "+code) ||
|
||||
strings.Contains(text, "http status="+code) ||
|
||||
strings.Contains(text, "http status: "+code) ||
|
||||
strings.Contains(text, "http status "+code) ||
|
||||
strings.Contains(text, "server returned "+code) ||
|
||||
strings.Contains(text, "code="+code) ||
|
||||
strings.Contains(text, "code: "+code) ||
|
||||
strings.Contains(text, "error_code="+code) ||
|
||||
strings.Contains(text, "error_code: "+code) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func ErrorMentionsHTTPStatus(err error, statuses ...int) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
return TextMentionsHTTPStatus(err.Error(), statuses...)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
package drives
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestTextMentionsHTTPStatus(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
text string
|
||||
want bool
|
||||
}{
|
||||
{name: "status context", text: "request failed with status: 429 Too Many Requests", want: true},
|
||||
{name: "http context", text: "http 503 service unavailable", want: true},
|
||||
{name: "server returned context", text: "Server returned 403 Forbidden", want: true},
|
||||
{name: "message only", text: "操作频繁,请稍后重试", want: false},
|
||||
{name: "unrelated number", text: "generated 429 bytes", want: false},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if got := TextMentionsHTTPStatus(tc.text, 403, 429, 503); got != tc.want {
|
||||
t.Fatalf("TextMentionsHTTPStatus(%q) = %v, want %v", tc.text, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
@@ -17,20 +18,29 @@ import (
|
||||
|
||||
const Kind = "localstorage"
|
||||
|
||||
const maxSTRMBytes = 64 * 1024
|
||||
|
||||
type Config struct {
|
||||
ID string
|
||||
RootPath string
|
||||
// STRMAllowOutsideRoot 允许 .strm 指向存储根目录之外的本地路径。
|
||||
// 默认关闭:strm 等于可以让 /p/stream 读到服务器上的任意文件,只有
|
||||
// 管理员明确知道自己在做什么(例如 strm 库与 rclone 挂载目录分离)
|
||||
// 时才应打开。
|
||||
STRMAllowOutsideRoot bool
|
||||
}
|
||||
|
||||
type Driver struct {
|
||||
id string
|
||||
rootPath string
|
||||
id string
|
||||
rootPath string
|
||||
strmAllowOutsideRoot bool
|
||||
}
|
||||
|
||||
func New(c Config) *Driver {
|
||||
return &Driver{
|
||||
id: c.ID,
|
||||
rootPath: c.RootPath,
|
||||
id: c.ID,
|
||||
rootPath: c.RootPath,
|
||||
strmAllowOutsideRoot: c.STRMAllowOutsideRoot,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -47,7 +57,7 @@ func (d *Driver) Init(context.Context) error {
|
||||
}
|
||||
info, err := os.Stat(root)
|
||||
if err != nil {
|
||||
return fmt.Errorf("localstorage: stat root: %w", err)
|
||||
return fmt.Errorf("localstorage: stat root %q: %w%s", root, err, localStoragePathHint(d.rootPath))
|
||||
}
|
||||
if !info.IsDir() {
|
||||
return fmt.Errorf("localstorage: root is not a directory: %s", root)
|
||||
@@ -122,7 +132,13 @@ func (d *Driver) StreamURL(ctx context.Context, fileID string) (*drives.StreamLi
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if info.IsDir() || !info.Mode().IsRegular() || info.Size() <= 0 {
|
||||
if info.IsDir() || !info.Mode().IsRegular() {
|
||||
return nil, os.ErrNotExist
|
||||
}
|
||||
if strings.EqualFold(filepath.Ext(p), ".strm") {
|
||||
return d.streamURLFromSTRM(ctx, p)
|
||||
}
|
||||
if info.Size() <= 0 {
|
||||
return nil, os.ErrNotExist
|
||||
}
|
||||
return &drives.StreamLink{
|
||||
@@ -131,6 +147,115 @@ func (d *Driver) StreamURL(ctx context.Context, fileID string) (*drives.StreamLi
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (d *Driver) streamURLFromSTRM(ctx context.Context, strmPath string) (*drives.StreamLink, error) {
|
||||
target, err := readSTRMTarget(strmPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if filepath.IsAbs(target) {
|
||||
return d.localSTRMLink(strmPath, target)
|
||||
}
|
||||
u, err := url.Parse(target)
|
||||
if err == nil {
|
||||
switch strings.ToLower(u.Scheme) {
|
||||
case "http", "https":
|
||||
if u.Host == "" {
|
||||
return nil, fmt.Errorf("localstorage: invalid strm url %q", target)
|
||||
}
|
||||
return &drives.StreamLink{
|
||||
URL: target,
|
||||
Expires: time.Now().Add(24 * time.Hour),
|
||||
}, nil
|
||||
case "file":
|
||||
if u.Host != "" && !strings.EqualFold(u.Host, "localhost") {
|
||||
return nil, fmt.Errorf("localstorage: unsupported strm file url host %q", u.Host)
|
||||
}
|
||||
return d.localSTRMLink(strmPath, u.Path)
|
||||
case "":
|
||||
// Local path below.
|
||||
default:
|
||||
return nil, fmt.Errorf("localstorage: unsupported strm target scheme %q", u.Scheme)
|
||||
}
|
||||
} else if strings.Contains(target, "://") {
|
||||
return nil, fmt.Errorf("localstorage: invalid strm url %q: %w", target, err)
|
||||
}
|
||||
return d.localSTRMLink(strmPath, target)
|
||||
}
|
||||
|
||||
func readSTRMTarget(path string) (string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
data, err := io.ReadAll(io.LimitReader(f, maxSTRMBytes+1))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if len(data) > maxSTRMBytes {
|
||||
return "", errors.New("localstorage: strm file is too large")
|
||||
}
|
||||
lines := strings.Split(string(data), "\n")
|
||||
for i, line := range lines {
|
||||
if i == 0 {
|
||||
line = strings.TrimPrefix(line, "\ufeff")
|
||||
}
|
||||
line = strings.TrimSpace(line)
|
||||
if line != "" {
|
||||
return line, nil
|
||||
}
|
||||
}
|
||||
return "", errors.New("localstorage: empty strm target")
|
||||
}
|
||||
|
||||
func (d *Driver) localSTRMLink(strmPath, target string) (*drives.StreamLink, error) {
|
||||
target = strings.TrimSpace(target)
|
||||
if target == "" {
|
||||
return nil, errors.New("localstorage: empty strm target")
|
||||
}
|
||||
|
||||
var p string
|
||||
if filepath.IsAbs(target) {
|
||||
p = filepath.Clean(target)
|
||||
} else {
|
||||
p = filepath.Join(filepath.Dir(strmPath), filepath.FromSlash(target))
|
||||
}
|
||||
p, err := filepath.Abs(p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
root, err := d.root()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
realPath, within, err := realPathWithinRoot(root, p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !within && !d.strmAllowOutsideRoot {
|
||||
return nil, errors.New("localstorage: strm target escapes root (enable strm_allow_outside_root to allow)")
|
||||
}
|
||||
if strings.EqualFold(filepath.Ext(p), ".strm") || strings.EqualFold(filepath.Ext(realPath), ".strm") {
|
||||
return nil, errors.New("localstorage: nested strm target is not supported")
|
||||
}
|
||||
info, err := os.Stat(realPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if info.IsDir() || !info.Mode().IsRegular() || info.Size() <= 0 {
|
||||
return nil, os.ErrNotExist
|
||||
}
|
||||
return &drives.StreamLink{
|
||||
URL: realPath,
|
||||
Expires: time.Now().Add(24 * time.Hour),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (d *Driver) Upload(context.Context, string, string, io.Reader, int64) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
@@ -139,6 +264,39 @@ func (d *Driver) EnsureDir(context.Context, string) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
p, rel, err := d.pathForID(fileID)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if rel == "" {
|
||||
return errors.New("localstorage: refusing to remove root")
|
||||
}
|
||||
info, err := os.Stat(p)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if info.IsDir() {
|
||||
return errors.New("localstorage: refusing to remove directory")
|
||||
}
|
||||
if !info.Mode().IsRegular() {
|
||||
return errors.New("localstorage: refusing to remove non-regular file")
|
||||
}
|
||||
if err := os.Remove(p); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) root() (string, error) {
|
||||
raw := strings.TrimSpace(d.rootPath)
|
||||
if raw == "" {
|
||||
@@ -158,6 +316,8 @@ func (d *Driver) root() (string, error) {
|
||||
return filepath.Abs(raw)
|
||||
}
|
||||
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
|
||||
func (d *Driver) pathForID(id string) (string, string, error) {
|
||||
root, err := d.root()
|
||||
if err != nil {
|
||||
@@ -174,12 +334,63 @@ func (d *Driver) pathForID(id string) (string, string, error) {
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
if p != root && !strings.HasPrefix(p, root+string(os.PathSeparator)) {
|
||||
if !pathWithinRoot(root, p) {
|
||||
return "", "", errors.New("localstorage: path escapes root")
|
||||
}
|
||||
if _, within, err := realPathWithinRoot(root, p); err != nil {
|
||||
return "", "", err
|
||||
} else if !within {
|
||||
return "", "", errors.New("localstorage: path escapes root")
|
||||
}
|
||||
return p, rel, nil
|
||||
}
|
||||
|
||||
func pathWithinRoot(root, path string) bool {
|
||||
rel, err := filepath.Rel(root, path)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return rel == "." || (rel != ".." && !strings.HasPrefix(rel, ".."+string(os.PathSeparator)))
|
||||
}
|
||||
|
||||
func realPathWithinRoot(root, path string) (string, bool, error) {
|
||||
realRoot, err := filepath.EvalSymlinks(root)
|
||||
if err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
realRoot, err = filepath.Abs(realRoot)
|
||||
if err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
realPath, err := filepath.EvalSymlinks(path)
|
||||
if err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
realPath, err = filepath.Abs(realPath)
|
||||
if err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
return realPath, pathWithinRoot(realRoot, realPath), nil
|
||||
}
|
||||
|
||||
func localStoragePathHint(configured string) string {
|
||||
cwd, _ := os.Getwd()
|
||||
parts := []string{}
|
||||
if strings.TrimSpace(configured) != "" {
|
||||
parts = append(parts, fmt.Sprintf("configured=%q", strings.TrimSpace(configured)))
|
||||
}
|
||||
if cwd != "" {
|
||||
parts = append(parts, fmt.Sprintf("cwd=%q", cwd))
|
||||
}
|
||||
if _, err := os.Stat("/.dockerenv"); err == nil {
|
||||
parts = append(parts, "docker=host paths must be bind-mounted into the container")
|
||||
}
|
||||
if len(parts) == 0 {
|
||||
return ""
|
||||
}
|
||||
return " (" + strings.Join(parts, ", ") + ")"
|
||||
}
|
||||
|
||||
func decodeRel(id string) (string, error) {
|
||||
id = strings.TrimSpace(id)
|
||||
if id == "" || id == "/" {
|
||||
|
||||
@@ -58,6 +58,199 @@ func TestListEncodesRelativePathsAndStreamURLResolvesFile(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLResolvesHTTPSTRM(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
strmPath := filepath.Join(root, "movie.strm")
|
||||
target := "https://media.example/clip.mp4?token=abc"
|
||||
if err := os.WriteFile(strmPath, []byte("\ufeff\n "+target+"\n"), 0o644); err != nil {
|
||||
t.Fatalf("write strm: %v", err)
|
||||
}
|
||||
drv := New(Config{ID: "local", RootPath: root})
|
||||
|
||||
link, err := drv.StreamURL(context.Background(), encodeRel("movie.strm"))
|
||||
if err != nil {
|
||||
t.Fatalf("stream url: %v", err)
|
||||
}
|
||||
if link.URL != target {
|
||||
t.Fatalf("url = %q, want %q", link.URL, target)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLResolvesRelativeLocalSTRM(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
if err := os.MkdirAll(filepath.Join(root, "links"), 0o755); err != nil {
|
||||
t.Fatalf("mkdir links: %v", err)
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Join(root, "media"), 0o755); err != nil {
|
||||
t.Fatalf("mkdir media: %v", err)
|
||||
}
|
||||
videoPath := filepath.Join(root, "media", "clip.mp4")
|
||||
if err := os.WriteFile(videoPath, []byte("video"), 0o644); err != nil {
|
||||
t.Fatalf("write video: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(root, "links", "movie.strm"), []byte("../media/clip.mp4\n"), 0o644); err != nil {
|
||||
t.Fatalf("write strm: %v", err)
|
||||
}
|
||||
drv := New(Config{ID: "local", RootPath: root})
|
||||
|
||||
link, err := drv.StreamURL(context.Background(), encodeRel("links/movie.strm"))
|
||||
if err != nil {
|
||||
t.Fatalf("stream url: %v", err)
|
||||
}
|
||||
if link.URL != videoPath {
|
||||
t.Fatalf("url = %q, want %q", link.URL, videoPath)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLRejectsInvalidSTRMTargets(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
setup func(t *testing.T, root string) string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "empty",
|
||||
setup: func(t *testing.T, root string) string {
|
||||
t.Helper()
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "empty.strm"), []byte("\n \r\n"))
|
||||
return "empty.strm"
|
||||
},
|
||||
want: "empty strm target",
|
||||
},
|
||||
{
|
||||
name: "escapes root",
|
||||
setup: func(t *testing.T, root string) string {
|
||||
t.Helper()
|
||||
writeLocalStorageTestFile(t, filepath.Join(filepath.Dir(root), "outside.mp4"), []byte("video"))
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "escape.strm"), []byte("../outside.mp4\n"))
|
||||
return "escape.strm"
|
||||
},
|
||||
want: "escapes root",
|
||||
},
|
||||
{
|
||||
name: "nested",
|
||||
setup: func(t *testing.T, root string) string {
|
||||
t.Helper()
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "nested.strm"), []byte("https://media.example/clip.mp4\n"))
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "outer.strm"), []byte("nested.strm\n"))
|
||||
return "outer.strm"
|
||||
},
|
||||
want: "nested strm target",
|
||||
},
|
||||
{
|
||||
name: "unsupported scheme",
|
||||
setup: func(t *testing.T, root string) string {
|
||||
t.Helper()
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "ftp.strm"), []byte("ftp://media.example/clip.mp4\n"))
|
||||
return "ftp.strm"
|
||||
},
|
||||
want: "unsupported strm target scheme",
|
||||
},
|
||||
{
|
||||
name: "too large",
|
||||
setup: func(t *testing.T, root string) string {
|
||||
t.Helper()
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "large.strm"), []byte(strings.Repeat("x", maxSTRMBytes+1)))
|
||||
return "large.strm"
|
||||
},
|
||||
want: "strm file is too large",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
rel := tt.setup(t, root)
|
||||
drv := New(Config{ID: "local", RootPath: root})
|
||||
|
||||
_, err := drv.StreamURL(context.Background(), encodeRel(rel))
|
||||
|
||||
if err == nil || !strings.Contains(err.Error(), tt.want) {
|
||||
t.Fatalf("error = %v, want contain %q", err, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLRejectsSTRMTargetEscapingRootThroughSymlink(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
outside := t.TempDir()
|
||||
writeLocalStorageTestFile(t, filepath.Join(outside, "secret.mp4"), []byte("secret"))
|
||||
if err := os.MkdirAll(filepath.Join(root, "links"), 0o755); err != nil {
|
||||
t.Fatalf("mkdir links: %v", err)
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Join(root, "real"), 0o755); err != nil {
|
||||
t.Fatalf("mkdir real: %v", err)
|
||||
}
|
||||
if err := os.Symlink(outside, filepath.Join(root, "real", "outside")); err != nil {
|
||||
t.Fatalf("symlink: %v", err)
|
||||
}
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "links", "movie.strm"), []byte("../real/outside/secret.mp4\n"))
|
||||
drv := New(Config{ID: "local", RootPath: root})
|
||||
|
||||
_, err := drv.StreamURL(context.Background(), encodeRel("links/movie.strm"))
|
||||
|
||||
if err == nil || !strings.Contains(err.Error(), "strm target escapes root") {
|
||||
t.Fatalf("error = %v, want strm target escapes root", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLAllowsSTRMTargetOutsideRootWhenEnabled(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
outside := t.TempDir()
|
||||
target := filepath.Join(outside, "movie.mp4")
|
||||
writeLocalStorageTestFile(t, target, []byte("movie-data"))
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "movie.strm"), []byte(target+"\n"))
|
||||
|
||||
// 默认关闭:根目录外的目标仍被拒绝
|
||||
strict := New(Config{ID: "local", RootPath: root})
|
||||
if _, err := strict.StreamURL(context.Background(), encodeRel("movie.strm")); err == nil || !strings.Contains(err.Error(), "strm target escapes root") {
|
||||
t.Fatalf("default error = %v, want strm target escapes root", err)
|
||||
}
|
||||
|
||||
// 开启 strm_allow_outside_root 后放行
|
||||
relaxed := New(Config{ID: "local", RootPath: root, STRMAllowOutsideRoot: true})
|
||||
link, err := relaxed.StreamURL(context.Background(), encodeRel("movie.strm"))
|
||||
if err != nil {
|
||||
t.Fatalf("StreamURL with allow-outside-root: %v", err)
|
||||
}
|
||||
resolved, err := filepath.EvalSymlinks(target)
|
||||
if err != nil {
|
||||
t.Fatalf("eval target: %v", err)
|
||||
}
|
||||
if link.URL != resolved {
|
||||
t.Fatalf("link url = %q, want %q", link.URL, resolved)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLAllowOutsideRootStillRejectsNestedSTRM(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
outside := t.TempDir()
|
||||
writeLocalStorageTestFile(t, filepath.Join(outside, "inner.strm"), []byte("http://example.com/v.mp4\n"))
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "movie.strm"), []byte(filepath.Join(outside, "inner.strm")+"\n"))
|
||||
|
||||
drv := New(Config{ID: "local", RootPath: root, STRMAllowOutsideRoot: true})
|
||||
if _, err := drv.StreamURL(context.Background(), encodeRel("movie.strm")); err == nil || !strings.Contains(err.Error(), "nested strm") {
|
||||
t.Fatalf("error = %v, want nested strm rejection", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLRejectsSymlinkFileIDEscapingRoot(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
outside := t.TempDir()
|
||||
writeLocalStorageTestFile(t, filepath.Join(outside, "secret.mp4"), []byte("secret"))
|
||||
if err := os.Symlink(filepath.Join(outside, "secret.mp4"), filepath.Join(root, "link.mp4")); err != nil {
|
||||
t.Fatalf("symlink: %v", err)
|
||||
}
|
||||
drv := New(Config{ID: "local", RootPath: root})
|
||||
|
||||
_, err := drv.StreamURL(context.Background(), encodeRel("link.mp4"))
|
||||
|
||||
if err == nil || !strings.Contains(err.Error(), "path escapes root") {
|
||||
t.Fatalf("error = %v, want path escapes root", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLRejectsEscapingID(t *testing.T) {
|
||||
drv := New(Config{ID: "local", RootPath: t.TempDir()})
|
||||
escaped := base64.RawURLEncoding.EncodeToString([]byte("../secret.mp4"))
|
||||
@@ -70,13 +263,73 @@ func TestStreamURLRejectsEscapingID(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestInitRequiresExistingDirectory(t *testing.T) {
|
||||
drv := New(Config{ID: "local", RootPath: filepath.Join(t.TempDir(), "missing")})
|
||||
missing := filepath.Join(t.TempDir(), "missing")
|
||||
drv := New(Config{ID: "local", RootPath: missing})
|
||||
|
||||
err := drv.Init(context.Background())
|
||||
|
||||
if err == nil || !strings.Contains(err.Error(), "stat root") {
|
||||
t.Fatalf("error = %v, want stat root failure", err)
|
||||
}
|
||||
if !strings.Contains(err.Error(), missing) || !strings.Contains(err.Error(), "configured=") {
|
||||
t.Fatalf("error = %v, want diagnostic path details", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPathForIDAllowsRootPathSlash(t *testing.T) {
|
||||
drv := New(Config{ID: "local", RootPath: string(os.PathSeparator)})
|
||||
childID := encodeRel("tmp")
|
||||
|
||||
path, rel, err := drv.pathForID(childID)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("pathForID: %v", err)
|
||||
}
|
||||
if rel != "tmp" {
|
||||
t.Fatalf("rel = %q, want tmp", rel)
|
||||
}
|
||||
if path != filepath.Join(string(os.PathSeparator), "tmp") {
|
||||
t.Fatalf("path = %q, want /tmp", path)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScannerPersistsLocalStorageSTRM(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
root := t.TempDir()
|
||||
if err := os.MkdirAll(filepath.Join(root, "collection"), 0o755); err != nil {
|
||||
t.Fatalf("mkdir collection: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(root, "collection", "clip.strm"), []byte("https://media.example/clip.mp4\n"), 0o644); err != nil {
|
||||
t.Fatalf("write strm: %v", err)
|
||||
}
|
||||
cat, err := catalog.Open(filepath.Join(t.TempDir(), "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
drv := New(Config{ID: "local", RootPath: root})
|
||||
sc := scanner.New(cat, drv, []string{".strm"}, nil, nil)
|
||||
stats, err := sc.Run(ctx, drv.RootID())
|
||||
if err != nil {
|
||||
t.Fatalf("scan: %v", err)
|
||||
}
|
||||
if stats.Added != 1 {
|
||||
t.Fatalf("added = %d, want 1", stats.Added)
|
||||
}
|
||||
|
||||
fileID := encodeRel("collection/clip.strm")
|
||||
got, err := cat.GetVideo(ctx, Kind+"-local-"+fileID)
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.Ext != "strm" || got.FileID != fileID || got.ParentID != encodeRel("collection") {
|
||||
t.Fatalf("video = %#v, want local strm video under collection", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScannerPersistsLocalStorageVideo(t *testing.T) {
|
||||
@@ -113,7 +366,14 @@ func TestScannerPersistsLocalStorageVideo(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.DriveID != "local" || got.FileID != fileID || got.Category != "collection" {
|
||||
t.Fatalf("video = %#v, want local drive video in collection", got)
|
||||
if got.DriveID != "local" || got.FileID != fileID || got.ParentID != encodeRel("collection") {
|
||||
t.Fatalf("video = %#v, want local drive video under collection", got)
|
||||
}
|
||||
}
|
||||
|
||||
func writeLocalStorageTestFile(t *testing.T, path string, data []byte) {
|
||||
t.Helper()
|
||||
if err := os.WriteFile(path, data, 0o644); err != nil {
|
||||
t.Fatalf("write %s: %v", path, err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,12 +78,38 @@ func (d *Driver) EnsureDir(context.Context, string) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
path, err := d.uploadPath(fileID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if info.IsDir() {
|
||||
return errors.New("localupload: refusing to remove directory")
|
||||
}
|
||||
if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) RootID() string { return d.uploadDir() }
|
||||
|
||||
func (d *Driver) uploadDir() string {
|
||||
return d.uploadDirPath
|
||||
}
|
||||
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
|
||||
func (d *Driver) uploadPath(fileID string) (string, error) {
|
||||
if strings.TrimSpace(fileID) == "" || filepath.Base(fileID) != fileID {
|
||||
return "", errors.New("invalid upload file id")
|
||||
|
||||
@@ -501,6 +501,17 @@ func (d *Driver) Rename(ctx context.Context, fileID, newName string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return errors.New("onedrive remove: empty file id")
|
||||
}
|
||||
if err := d.request(ctx, d.itemURL(fileID), http.MethodDelete, nil, nil); err != nil {
|
||||
return fmt.Errorf("onedrive remove: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) request(ctx context.Context, rawURL, method string, configure func(*resty.Request), out any) error {
|
||||
return d.requestOnce(ctx, rawURL, method, configure, out, true)
|
||||
}
|
||||
@@ -583,8 +594,8 @@ func (d *Driver) refresh(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func isRateLimitResponse(res *resty.Response, code, message string) bool {
|
||||
if isRateLimitCode(code) || isRateLimitMessage(message) {
|
||||
func isRateLimitResponse(res *resty.Response, code, _ string) bool {
|
||||
if isRateLimitCode(code) {
|
||||
return true
|
||||
}
|
||||
if res == nil {
|
||||
@@ -621,18 +632,6 @@ func isRateLimitCode(code string) bool {
|
||||
}
|
||||
}
|
||||
|
||||
func isRateLimitMessage(message string) bool {
|
||||
text := strings.ToLower(strings.TrimSpace(message))
|
||||
if text == "" {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "throttl") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "activity limit") ||
|
||||
strings.Contains(text, "temporarily blocked")
|
||||
}
|
||||
|
||||
func onedriveRateLimitError(res *resty.Response, message string) error {
|
||||
if strings.TrimSpace(message) == "" {
|
||||
message = "onedrive rate limited"
|
||||
@@ -741,3 +740,4 @@ func guessMime(name string) string {
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*Driver)(nil)
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
|
||||
@@ -214,7 +214,7 @@ func TestGraph429ReturnsRateLimitErrorWithRetryAfter(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestGraphThrottleMessageReturnsRateLimitError(t *testing.T) {
|
||||
func TestGraphThrottleMessageDoesNotReturnRateLimitError(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusForbidden)
|
||||
@@ -238,11 +238,11 @@ func TestGraphThrottleMessageReturnsRateLimitError(t *testing.T) {
|
||||
|
||||
_, err := d.StreamURL(context.Background(), "file-id")
|
||||
if err == nil {
|
||||
t.Fatal("list succeeded, want rate limit error")
|
||||
t.Fatal("list succeeded, want graph error")
|
||||
}
|
||||
var rateLimit *drives.RateLimitError
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want RateLimitError", err)
|
||||
if errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want non-rate-limit error", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,11 +20,12 @@ import (
|
||||
)
|
||||
|
||||
type Driver struct {
|
||||
id string
|
||||
cookie string
|
||||
rootID string
|
||||
client *sdk.Pan115Client
|
||||
ua string
|
||||
id string
|
||||
cookie string
|
||||
rootID string
|
||||
client *sdk.Pan115Client
|
||||
ua string
|
||||
uploadTempDir string
|
||||
|
||||
listMu sync.Mutex
|
||||
lastListAt time.Time
|
||||
@@ -32,10 +33,11 @@ type Driver struct {
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
ID string
|
||||
Cookie string // 形如 "UID=xxx; CID=xxx; SEID=xxx; KID=xxx"
|
||||
RootID string // 默认 "0"
|
||||
UA string // 默认 UA115Browser
|
||||
ID string
|
||||
Cookie string // 形如 "UID=xxx; CID=xxx; SEID=xxx; KID=xxx"
|
||||
RootID string // 默认 "0"
|
||||
UA string // 默认 UA115Browser
|
||||
UploadTempDir string
|
||||
}
|
||||
|
||||
func New(c Config) *Driver {
|
||||
@@ -48,11 +50,12 @@ func New(c Config) *Driver {
|
||||
ua = sdk.UA115Browser
|
||||
}
|
||||
return &Driver{
|
||||
id: c.ID,
|
||||
cookie: c.Cookie,
|
||||
rootID: rootID,
|
||||
ua: ua,
|
||||
listInterval: 2 * time.Second,
|
||||
id: c.ID,
|
||||
cookie: c.Cookie,
|
||||
rootID: rootID,
|
||||
ua: ua,
|
||||
uploadTempDir: strings.TrimSpace(c.UploadTempDir),
|
||||
listInterval: 2 * time.Second,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,7 +90,7 @@ func (d *Driver) List(ctx context.Context, dirID string) ([]drives.Entry, error)
|
||||
// p115ListCooldown 是列目录触发疑似风控错误时的冷却时长。
|
||||
//
|
||||
// 历史上是 [30min × 3],3 次都失败就放弃;新策略改为 10 分钟无限重试 ——
|
||||
// 只要错误仍属 transient(429 / 405 / WAF / blocked / 安全威胁 / unexpected),
|
||||
// 只要错误仍属明确 HTTP transient 状态(429 / 405),
|
||||
// 就持续等 10 分钟再发一次列目录请求,直到成功或 ctx 取消。这样即使 115
|
||||
// 风控持续较长时间,扫描会自然延后到风控结束,不再丢半棵子树。
|
||||
const p115ListCooldown = 10 * time.Minute
|
||||
@@ -149,20 +152,14 @@ func sleepContext(ctx context.Context, d time.Duration) error {
|
||||
}
|
||||
|
||||
func isTransient115ListError(err error) bool {
|
||||
return isTransient115UpstreamError(err)
|
||||
}
|
||||
|
||||
func isTransient115UpstreamError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
text := strings.ToLower(err.Error())
|
||||
return strings.Contains(text, "405") ||
|
||||
strings.Contains(text, "429") ||
|
||||
strings.Contains(text, "too many request") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "blocked") ||
|
||||
strings.Contains(text, "security") ||
|
||||
strings.Contains(text, "waf") ||
|
||||
strings.Contains(text, "unexpected error") ||
|
||||
strings.Contains(text, "访问被阻断") ||
|
||||
strings.Contains(text, "安全威胁")
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusMethodNotAllowed, http.StatusTooManyRequests)
|
||||
}
|
||||
|
||||
// ListDirsOnly 只列指定目录的直接**子目录**,不返回文件条目。专为 admin 后台
|
||||
@@ -248,11 +245,11 @@ func (d *Driver) streamURLWithUA(ctx context.Context, fileID string, ua string)
|
||||
// 需要先拿到 pickCode
|
||||
f, err := d.client.GetFile(fileID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("115 get file: %w", err)
|
||||
return nil, wrap115StreamTransientError("115 get file", err)
|
||||
}
|
||||
info, ua, err := d.downloadInfo(f.PickCode, ua)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("115 download url: %w", err)
|
||||
return nil, wrap115StreamTransientError("115 download url", err)
|
||||
}
|
||||
if info == nil || info.Url.Url == "" {
|
||||
return nil, errors.New("115 download url: empty")
|
||||
@@ -288,6 +285,18 @@ func (d *Driver) downloadInfo(pickCode string, ua string) (*sdk.DownloadInfo, st
|
||||
return info, ua, nil
|
||||
}
|
||||
|
||||
func wrap115StreamTransientError(op string, err error) error {
|
||||
wrapped := fmt.Errorf("%s: %w", op, err)
|
||||
if !isTransient115UpstreamError(err) {
|
||||
return wrapped
|
||||
}
|
||||
return &drives.RateLimitError{
|
||||
Provider: "p115",
|
||||
RetryAfter: p115ListCooldown,
|
||||
Err: wrapped,
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader, size int64) (string, error) {
|
||||
res, err := d.UploadAndReportSha1(ctx, parentID, name, r, size)
|
||||
if err != nil {
|
||||
@@ -341,7 +350,7 @@ func (d *Driver) UploadAndReportSha1(ctx context.Context, parentID, name string,
|
||||
parentID = d.rootID
|
||||
}
|
||||
|
||||
tmp, sha1Hex, written, err := bufferAndHashSha1(r, size)
|
||||
tmp, sha1Hex, written, err := bufferAndHashSha1(d.uploadTempDir, r, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
@@ -445,12 +454,35 @@ func (d *Driver) Rename(ctx context.Context, fileID, newName string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
if d.client == nil {
|
||||
return errors.New("p115 remove: driver not initialized")
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return errors.New("p115 remove: empty fileID")
|
||||
}
|
||||
if err := d.client.Delete(fileID); err != nil {
|
||||
return fmt.Errorf("p115 remove: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// bufferAndHashSha1 把 r 全量复制到一个临时文件,同时计算 SHA1。
|
||||
// 返回临时文件(位置在末尾,需调用方 Seek 回 0)、SHA1 hex 大写、实际字节数。
|
||||
//
|
||||
// 调用方负责 Close + Remove 临时文件。
|
||||
func bufferAndHashSha1(r io.Reader, declaredSize int64) (*os.File, string, int64, error) {
|
||||
tmp, err := os.CreateTemp("", "p115-upload-*.bin")
|
||||
func bufferAndHashSha1(tempDir string, r io.Reader, declaredSize int64) (*os.File, string, int64, error) {
|
||||
tempDir = strings.TrimSpace(tempDir)
|
||||
if tempDir != "" {
|
||||
if err := os.MkdirAll(tempDir, 0o755); err != nil {
|
||||
return nil, "", 0, fmt.Errorf("p115 upload: create tmp dir: %w", err)
|
||||
}
|
||||
}
|
||||
tmp, err := os.CreateTemp(tempDir, "p115-upload-*.bin")
|
||||
if err != nil {
|
||||
return nil, "", 0, fmt.Errorf("p115 upload: create tmp: %w", err)
|
||||
}
|
||||
@@ -547,3 +579,4 @@ func guessMime(name string) string {
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*Driver)(nil)
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
|
||||
@@ -8,8 +8,12 @@ import (
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
func TestIsTransient115ListError(t *testing.T) {
|
||||
@@ -19,8 +23,9 @@ func TestIsTransient115ListError(t *testing.T) {
|
||||
want bool
|
||||
}{
|
||||
{name: "nil", err: nil, want: false},
|
||||
{name: "blocked html", err: errors.New(`<!doctype html><title>405</title>Sorry, your request has been blocked as it may cause potential threats to the server's security.`), want: true},
|
||||
{name: "chinese waf", err: errors.New("很抱歉,由于您访问的URL有可能对网站造成安全威胁,您的访问被阻断。"), want: true},
|
||||
{name: "blocked html without status context", err: errors.New(`<!doctype html><title>405</title>Sorry, your request has been blocked as it may cause potential threats to the server's security.`), want: false},
|
||||
{name: "chinese waf", err: errors.New("很抱歉,由于您访问的URL有可能对网站造成安全威胁,您的访问被阻断。"), want: false},
|
||||
{name: "status 405", err: errors.New("request failed with status: 405"), want: true},
|
||||
{name: "rate limit", err: errors.New("429 too many requests"), want: true},
|
||||
{name: "regular auth error", err: errors.New("invalid credential"), want: false},
|
||||
}
|
||||
@@ -34,6 +39,42 @@ func TestIsTransient115ListError(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestWrap115StreamTransientError(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
err error
|
||||
wantRateLimit bool
|
||||
}{
|
||||
{name: "unexpected", err: errors.New("unexpected error"), wantRateLimit: false},
|
||||
{name: "405 blocked", err: errors.New("405 request has been blocked"), wantRateLimit: true},
|
||||
{name: "429", err: errors.New("429 too many requests"), wantRateLimit: true},
|
||||
{name: "blocked", err: errors.New("blocked by waf"), wantRateLimit: false},
|
||||
{name: "auth", err: errors.New("invalid credential"), wantRateLimit: false},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := wrap115StreamTransientError("115 get file", tc.err)
|
||||
var rateLimit *drives.RateLimitError
|
||||
isRateLimit := errors.As(got, &rateLimit)
|
||||
if isRateLimit != tc.wantRateLimit {
|
||||
t.Fatalf("rate limit = %v, want %v; err=%v", isRateLimit, tc.wantRateLimit, got)
|
||||
}
|
||||
if !strings.Contains(got.Error(), "115 get file") {
|
||||
t.Fatalf("err = %v, want operation prefix", got)
|
||||
}
|
||||
if tc.wantRateLimit {
|
||||
if rateLimit.Provider != "p115" {
|
||||
t.Fatalf("provider = %q, want p115", rateLimit.Provider)
|
||||
}
|
||||
if rateLimit.RetryAfter != 10*time.Minute {
|
||||
t.Fatalf("retry after = %s, want 10m", rateLimit.RetryAfter)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestBufferAndHashSha1 验证 bufferAndHashSha1:
|
||||
//
|
||||
// - 把 reader 的全部字节落到 tmp 文件
|
||||
@@ -46,7 +87,7 @@ func TestBufferAndHashSha1(t *testing.T) {
|
||||
wantHex := strings.ToUpper(hex.EncodeToString(want[:]))
|
||||
|
||||
t.Run("declared size matches", func(t *testing.T) {
|
||||
tmp, gotHex, n, err := bufferAndHashSha1(bytes.NewReader(body), int64(len(body)))
|
||||
tmp, gotHex, n, err := bufferAndHashSha1("", bytes.NewReader(body), int64(len(body)))
|
||||
if err != nil {
|
||||
t.Fatalf("bufferAndHashSha1 returned error: %v", err)
|
||||
}
|
||||
@@ -71,14 +112,14 @@ func TestBufferAndHashSha1(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("declared size mismatch returns error", func(t *testing.T) {
|
||||
_, _, _, err := bufferAndHashSha1(bytes.NewReader(body), int64(len(body))+1)
|
||||
_, _, _, err := bufferAndHashSha1("", bytes.NewReader(body), int64(len(body))+1)
|
||||
if err == nil {
|
||||
t.Fatal("expected size mismatch error, got nil")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("declared size zero is unchecked", func(t *testing.T) {
|
||||
tmp, gotHex, n, err := bufferAndHashSha1(bytes.NewReader(body), 0)
|
||||
tmp, gotHex, n, err := bufferAndHashSha1("", bytes.NewReader(body), 0)
|
||||
if err != nil {
|
||||
t.Fatalf("bufferAndHashSha1 returned error: %v", err)
|
||||
}
|
||||
@@ -90,6 +131,18 @@ func TestBufferAndHashSha1(t *testing.T) {
|
||||
t.Errorf("written = %d, want %d", n, len(body))
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("uses configured temp dir", func(t *testing.T) {
|
||||
tempDir := filepath.Join(t.TempDir(), "upload-tmp")
|
||||
tmp, _, _, err := bufferAndHashSha1(tempDir, bytes.NewReader(body), int64(len(body)))
|
||||
if err != nil {
|
||||
t.Fatalf("bufferAndHashSha1 returned error: %v", err)
|
||||
}
|
||||
defer cleanup(tmp)
|
||||
if gotDir := filepath.Dir(tmp.Name()); gotDir != tempDir {
|
||||
t.Fatalf("tmp dir = %q, want %q", gotDir, tempDir)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestUploadAndReportSha1RejectsInvalidArgs 检查空 reader / 空 name / 负 size 在
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,512 @@
|
||||
package p123
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/md5"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
func TestStreamURLResolvesDownloadInfoRedirect(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
var downloadReferer string
|
||||
var download *httptest.Server
|
||||
download = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/resolve":
|
||||
downloadReferer = r.Header.Get("Referer")
|
||||
http.Redirect(w, r, download.URL+"/cdn/video.mp4", http.StatusFound)
|
||||
case "/cdn/video.mp4":
|
||||
t.Fatalf("driver followed redirect unexpectedly")
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer download.Close()
|
||||
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
switch r.URL.Path {
|
||||
case "/api/user/sign_in":
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"code": 200,
|
||||
"data": map[string]string{"token": "token-1"},
|
||||
})
|
||||
case "/b/api/user/info":
|
||||
if got := r.Header.Get("Authorization"); got != "Bearer token-1" {
|
||||
t.Fatalf("Authorization = %q, want bearer token", got)
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{"code": 0, "data": map[string]any{}})
|
||||
case "/b/api/file/list/new":
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"code": 0,
|
||||
"data": map[string]any{
|
||||
"Next": "-1",
|
||||
"Total": 1,
|
||||
"InfoList": []map[string]any{
|
||||
{
|
||||
"FileName": "video.mp4",
|
||||
"Size": 1234,
|
||||
"UpdateAt": "2026-01-02 03:04:05",
|
||||
"FileId": 100,
|
||||
"Type": 0,
|
||||
"Etag": "ABCDEF",
|
||||
"S3KeyFlag": "flag-1",
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
case "/b/api/file/download_info":
|
||||
var body map[string]any
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode download_info body: %v", err)
|
||||
}
|
||||
if got := body["fileName"]; got != "video.mp4" {
|
||||
t.Fatalf("fileName = %#v, want cached file metadata", got)
|
||||
}
|
||||
if got := body["etag"]; got != "ABCDEF" {
|
||||
t.Fatalf("etag = %#v, want cached etag", got)
|
||||
}
|
||||
entryURL := download.URL + "/entry?params=" + base64.StdEncoding.EncodeToString([]byte(download.URL+"/resolve"))
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"code": 0,
|
||||
"data": map[string]string{"DownloadUrl": entryURL},
|
||||
})
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer api.Close()
|
||||
|
||||
var savedToken string
|
||||
d := New(Config{
|
||||
ID: "123-main",
|
||||
Username: "user@example.com",
|
||||
Password: "secret",
|
||||
MainAPIBaseURL: api.URL + "/b/api",
|
||||
LoginAPIBaseURL: api.URL + "/api",
|
||||
OnTokenUpdate: func(access string) {
|
||||
savedToken = access
|
||||
},
|
||||
})
|
||||
if err := d.Init(ctx); err != nil {
|
||||
t.Fatalf("Init() error = %v", err)
|
||||
}
|
||||
if savedToken != "token-1" {
|
||||
t.Fatalf("saved token = %q, want token-1", savedToken)
|
||||
}
|
||||
if _, err := d.List(ctx, d.RootID()); err != nil {
|
||||
t.Fatalf("List() error = %v", err)
|
||||
}
|
||||
|
||||
link, err := d.StreamURL(ctx, "100")
|
||||
if err != nil {
|
||||
t.Fatalf("StreamURL() error = %v", err)
|
||||
}
|
||||
if got := link.URL; got != download.URL+"/cdn/video.mp4" {
|
||||
t.Fatalf("URL = %q, want final CDN URL", got)
|
||||
}
|
||||
if got := link.Headers.Get("Referer"); !strings.HasPrefix(got, download.URL) {
|
||||
t.Fatalf("Referer = %q, want original download host", got)
|
||||
}
|
||||
if downloadReferer != defaultReferer {
|
||||
t.Fatalf("resolve Referer = %q, want %q", downloadReferer, defaultReferer)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInitUsesAccessTokenWithoutLogin(t *testing.T) {
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
switch r.URL.Path {
|
||||
case "/api/user/sign_in":
|
||||
t.Fatalf("driver should not password-login when access_token is configured")
|
||||
case "/b/api/user/info":
|
||||
if got := r.Header.Get("Authorization"); got != "Bearer token-1" {
|
||||
t.Fatalf("Authorization = %q, want bearer token", got)
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{"code": 0, "data": map[string]any{}})
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer api.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "123-main",
|
||||
AccessToken: "Bearer token-1",
|
||||
MainAPIBaseURL: api.URL + "/b/api",
|
||||
LoginAPIBaseURL: api.URL + "/api",
|
||||
})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("Init() error = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoginRiskErrorSuggestsAccessToken(t *testing.T) {
|
||||
err := loginError("当前账号存在境外登录风险,请使用短信验证码或者微信进行登录。")
|
||||
if err == nil || !strings.Contains(err.Error(), "access_token") {
|
||||
t.Fatalf("loginError() = %v, want access_token guidance", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRequestCode429ReturnsRateLimitError(t *testing.T) {
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Header().Set("Retry-After", "2")
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"code": 429,
|
||||
"message": "请求太频繁",
|
||||
})
|
||||
}))
|
||||
defer api.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "123-main",
|
||||
AccessToken: "token-1",
|
||||
MainAPIBaseURL: api.URL,
|
||||
})
|
||||
_, err := d.request(context.Background(), endpointFileList, http.MethodGet, nil, nil)
|
||||
var rateLimit *drives.RateLimitError
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want RateLimitError", err)
|
||||
}
|
||||
if rateLimit.RetryAfter != 2*time.Second {
|
||||
t.Fatalf("RetryAfter = %s, want 2s", rateLimit.RetryAfter)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListCoolsDownAndRetriesRateLimit(t *testing.T) {
|
||||
var listCalls int
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if r.URL.Path != "/file/list/new" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
listCalls++
|
||||
if listCalls == 1 {
|
||||
w.Header().Set("Retry-After", "1")
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"code": 429,
|
||||
"message": "请求太频繁",
|
||||
})
|
||||
return
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"code": 0,
|
||||
"data": map[string]any{
|
||||
"Next": "-1",
|
||||
"Total": 1,
|
||||
"InfoList": []map[string]any{
|
||||
{
|
||||
"FileName": "video.mp4",
|
||||
"Size": 1234,
|
||||
"UpdateAt": "2026-01-02 03:04:05",
|
||||
"FileId": 100,
|
||||
"Type": 0,
|
||||
"Etag": "ABCDEF",
|
||||
"S3KeyFlag": "flag-1",
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}))
|
||||
defer api.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "123-main",
|
||||
AccessToken: "token-1",
|
||||
MainAPIBaseURL: api.URL,
|
||||
})
|
||||
entries, err := d.List(context.Background(), d.RootID())
|
||||
if err != nil {
|
||||
t.Fatalf("List() error = %v", err)
|
||||
}
|
||||
if listCalls != 2 {
|
||||
t.Fatalf("list calls = %d, want 2", listCalls)
|
||||
}
|
||||
if len(entries) != 1 || entries[0].ID != "100" {
|
||||
t.Fatalf("entries = %#v, want one file", entries)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveDownloadURL429ReturnsRateLimitError(t *testing.T) {
|
||||
download := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Retry-After", "3")
|
||||
http.Error(w, "too many requests", http.StatusTooManyRequests)
|
||||
}))
|
||||
defer download.Close()
|
||||
|
||||
d := New(Config{ID: "123-main"})
|
||||
_, err := d.resolveDownloadURL(context.Background(), download.URL)
|
||||
var rateLimit *drives.RateLimitError
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want RateLimitError", err)
|
||||
}
|
||||
if rateLimit.RetryAfter != 3*time.Second {
|
||||
t.Fatalf("RetryAfter = %s, want 3s", rateLimit.RetryAfter)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadAndReportHashUsesPresignedPUTAndComplete(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
body := []byte("video bytes for 123 upload")
|
||||
wantMD5 := fmt.Sprintf("%x", md5.Sum(body))
|
||||
|
||||
var putBody []byte
|
||||
upload := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPut {
|
||||
t.Fatalf("upload method = %s, want PUT", r.Method)
|
||||
}
|
||||
if r.ContentLength != int64(len(body)) {
|
||||
t.Fatalf("ContentLength = %d, want %d", r.ContentLength, len(body))
|
||||
}
|
||||
got, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("read upload body: %v", err)
|
||||
}
|
||||
putBody = got
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer upload.Close()
|
||||
|
||||
var uploadRequest map[string]any
|
||||
var uploadURLRequest map[string]any
|
||||
var completeRequest map[string]any
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
switch r.URL.Path {
|
||||
case "/file/upload_request":
|
||||
if err := json.NewDecoder(r.Body).Decode(&uploadRequest); err != nil {
|
||||
t.Fatalf("decode upload_request: %v", err)
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"code": 0,
|
||||
"data": map[string]any{
|
||||
"FileId": 9001,
|
||||
"Bucket": "bucket-1",
|
||||
"Key": "key-1",
|
||||
"StorageNode": "node-1",
|
||||
"UploadId": "upload-1",
|
||||
},
|
||||
})
|
||||
case "/file/s3_upload_object/auth":
|
||||
if err := json.NewDecoder(r.Body).Decode(&uploadURLRequest); err != nil {
|
||||
t.Fatalf("decode s3 auth: %v", err)
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"code": 0,
|
||||
"data": map[string]any{
|
||||
"presignedUrls": map[string]string{
|
||||
"1": upload.URL + "/part-1",
|
||||
},
|
||||
},
|
||||
})
|
||||
case "/file/upload_complete/v2":
|
||||
if err := json.NewDecoder(r.Body).Decode(&completeRequest); err != nil {
|
||||
t.Fatalf("decode complete: %v", err)
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{"code": 0, "data": map[string]any{}})
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer api.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "123-main",
|
||||
AccessToken: "token-1",
|
||||
MainAPIBaseURL: api.URL,
|
||||
})
|
||||
res, err := d.UploadAndReportHash(ctx, "parent-1", "video.mp4", bytes.NewReader(body), int64(len(body)))
|
||||
if err != nil {
|
||||
t.Fatalf("UploadAndReportHash() error = %v", err)
|
||||
}
|
||||
if res.FileID != "9001" {
|
||||
t.Fatalf("FileID = %q, want 9001", res.FileID)
|
||||
}
|
||||
if res.Hash != wantMD5 {
|
||||
t.Fatalf("Hash = %q, want %q", res.Hash, wantMD5)
|
||||
}
|
||||
if res.Size != int64(len(body)) {
|
||||
t.Fatalf("Size = %d, want %d", res.Size, len(body))
|
||||
}
|
||||
if !bytes.Equal(putBody, body) {
|
||||
t.Fatalf("PUT body = %q, want %q", putBody, body)
|
||||
}
|
||||
if uploadRequest["etag"] != wantMD5 {
|
||||
t.Fatalf("upload etag = %#v, want %q", uploadRequest["etag"], wantMD5)
|
||||
}
|
||||
if uploadRequest["fileName"] != "video.mp4" || uploadRequest["parentFileId"] != "parent-1" {
|
||||
t.Fatalf("upload request = %#v, want fileName and parentFileId", uploadRequest)
|
||||
}
|
||||
if uploadURLRequest["partNumberStart"].(float64) != 1 || uploadURLRequest["partNumberEnd"].(float64) != 2 {
|
||||
t.Fatalf("s3 auth request = %#v, want part range 1..2", uploadURLRequest)
|
||||
}
|
||||
if completeRequest["fileId"].(float64) != 9001 || completeRequest["fileSize"].(float64) != float64(len(body)) {
|
||||
t.Fatalf("complete request = %#v, want file id and size", completeRequest)
|
||||
}
|
||||
if completeRequest["isMultipart"].(bool) {
|
||||
t.Fatalf("complete isMultipart = true, want false")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadAndReportHashReuseSkipsPUTAndComplete(t *testing.T) {
|
||||
body := []byte("reused body")
|
||||
var presignedCalled bool
|
||||
var completeCalled bool
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
switch r.URL.Path {
|
||||
case "/file/upload_request":
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"code": 0,
|
||||
"data": map[string]any{
|
||||
"FileId": 7001,
|
||||
"Reuse": true,
|
||||
},
|
||||
})
|
||||
case "/file/s3_upload_object/auth", "/file/s3_repare_upload_parts_batch":
|
||||
presignedCalled = true
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{"code": 0})
|
||||
case "/file/upload_complete/v2":
|
||||
completeCalled = true
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{"code": 0})
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer api.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "123-main",
|
||||
AccessToken: "token-1",
|
||||
MainAPIBaseURL: api.URL,
|
||||
})
|
||||
res, err := d.UploadAndReportHash(context.Background(), "parent-1", "reused.mp4", bytes.NewReader(body), int64(len(body)))
|
||||
if err != nil {
|
||||
t.Fatalf("UploadAndReportHash() error = %v", err)
|
||||
}
|
||||
if res.FileID != "7001" {
|
||||
t.Fatalf("FileID = %q, want 7001", res.FileID)
|
||||
}
|
||||
if presignedCalled {
|
||||
t.Fatal("reuse upload should not request presigned URLs")
|
||||
}
|
||||
if completeCalled {
|
||||
t.Fatal("reuse upload should not call upload_complete")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadPresignedPUT429ReturnsRateLimitError(t *testing.T) {
|
||||
upload := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Retry-After", "4")
|
||||
http.Error(w, "too many requests", http.StatusTooManyRequests)
|
||||
}))
|
||||
defer upload.Close()
|
||||
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
switch r.URL.Path {
|
||||
case "/file/upload_request":
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"code": 0,
|
||||
"data": map[string]any{
|
||||
"FileId": 9001,
|
||||
"Bucket": "bucket-1",
|
||||
"Key": "key-1",
|
||||
"StorageNode": "node-1",
|
||||
"UploadId": "upload-1",
|
||||
},
|
||||
})
|
||||
case "/file/s3_upload_object/auth":
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"code": 0,
|
||||
"data": map[string]any{
|
||||
"presignedUrls": map[string]string{"1": upload.URL},
|
||||
},
|
||||
})
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer api.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "123-main",
|
||||
AccessToken: "token-1",
|
||||
MainAPIBaseURL: api.URL,
|
||||
})
|
||||
_, err := d.UploadAndReportHash(context.Background(), "parent-1", "limited.mp4", strings.NewReader("limited"), int64(len("limited")))
|
||||
var rateLimit *drives.RateLimitError
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want RateLimitError", err)
|
||||
}
|
||||
if rateLimit.RetryAfter != 4*time.Second {
|
||||
t.Fatalf("RetryAfter = %s, want 4s", rateLimit.RetryAfter)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBufferAndHashMD5UsesConfiguredTempDir(t *testing.T) {
|
||||
body := []byte("hello-123-upload-test")
|
||||
tempDir := filepath.Join(t.TempDir(), "upload-tmp")
|
||||
tmp, gotHex, n, err := bufferAndHashMD5(tempDir, bytes.NewReader(body), int64(len(body)))
|
||||
if err != nil {
|
||||
t.Fatalf("bufferAndHashMD5 returned error: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
_ = tmp.Close()
|
||||
_ = os.Remove(tmp.Name())
|
||||
}()
|
||||
if gotDir := filepath.Dir(tmp.Name()); gotDir != tempDir {
|
||||
t.Fatalf("tmp dir = %q, want %q", gotDir, tempDir)
|
||||
}
|
||||
want := md5.Sum(body)
|
||||
if gotHex != fmt.Sprintf("%x", want) {
|
||||
t.Fatalf("md5 = %s, want %x", gotHex, want)
|
||||
}
|
||||
if n != int64(len(body)) {
|
||||
t.Fatalf("written = %d, want %d", n, len(body))
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenameSendsExpectedBody(t *testing.T) {
|
||||
var renameRequest map[string]any
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if r.URL.Path != "/file/rename" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&renameRequest); err != nil {
|
||||
t.Fatalf("decode rename: %v", err)
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{"code": 0, "data": map[string]any{}})
|
||||
}))
|
||||
defer api.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "123-main",
|
||||
AccessToken: "token-1",
|
||||
MainAPIBaseURL: api.URL,
|
||||
})
|
||||
if err := d.Rename(context.Background(), "9001", "new name.mp4"); err != nil {
|
||||
t.Fatalf("Rename() error = %v", err)
|
||||
}
|
||||
if renameRequest["driveId"].(float64) != 0 || renameRequest["fileId"] != "9001" || renameRequest["fileName"] != "new name.mp4" {
|
||||
t.Fatalf("rename request = %#v, want driveId/fileId/fileName", renameRequest)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,285 @@
|
||||
package p123
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"encoding/base64"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-resty/resty/v2"
|
||||
"github.com/skip2/go-qrcode"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultUserAPIBase = "https://user.123pan.cn/api"
|
||||
defaultQRLoginPage = "https://www.123pan.com/wx-app-login.html"
|
||||
defaultQRReferer = "https://user.123pan.com/centerlogin"
|
||||
defaultQROrigin = "https://user.123pan.com"
|
||||
defaultQRUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0 Safari/537.36"
|
||||
|
||||
endpointQRCodeGenerate = "/user/qr-code/generate"
|
||||
endpointQRCodeResult = "/user/qr-code/result"
|
||||
endpointQRCodeWXCode = "/user/qr-code/wx_code"
|
||||
)
|
||||
|
||||
type QRConfig struct {
|
||||
UserAPIBaseURL string
|
||||
HTTPClient *http.Client
|
||||
Now func() time.Time
|
||||
}
|
||||
|
||||
type QRClient struct {
|
||||
userAPIBase string
|
||||
client *resty.Client
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
type QRCodeSession struct {
|
||||
LoginUUID string `json:"loginUuid"`
|
||||
UniID string `json:"uniID"`
|
||||
QRCodeURL string `json:"qrCodeUrl"`
|
||||
QRImageDataURL string `json:"qrImageDataUrl"`
|
||||
ExpiresAt string `json:"expiresAt,omitempty"`
|
||||
}
|
||||
|
||||
type QRCodeStatus struct {
|
||||
LoginStatus int `json:"loginStatus"`
|
||||
StatusText string `json:"statusText"`
|
||||
ScanPlatform int `json:"scanPlatform,omitempty"`
|
||||
PlatformText string `json:"platformText,omitempty"`
|
||||
AccessToken string `json:"accessToken,omitempty"`
|
||||
}
|
||||
|
||||
func NewQRClient(c QRConfig) *QRClient {
|
||||
userAPIBase := strings.TrimRight(strings.TrimSpace(c.UserAPIBaseURL), "/")
|
||||
if userAPIBase == "" {
|
||||
userAPIBase = defaultUserAPIBase
|
||||
}
|
||||
httpClient := c.HTTPClient
|
||||
if httpClient == nil {
|
||||
httpClient = &http.Client{Timeout: 20 * time.Second}
|
||||
}
|
||||
now := c.Now
|
||||
if now == nil {
|
||||
now = time.Now
|
||||
}
|
||||
return &QRClient{
|
||||
userAPIBase: userAPIBase,
|
||||
client: resty.NewWithClient(httpClient).
|
||||
SetTimeout(20*time.Second).
|
||||
SetHeader("Accept", "application/json, text/plain, */*"),
|
||||
now: now,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *QRClient) Generate(ctx context.Context) (QRCodeSession, error) {
|
||||
loginUUID, err := newLoginUUID()
|
||||
if err != nil {
|
||||
return QRCodeSession{}, err
|
||||
}
|
||||
var resp qrGenerateResp
|
||||
res, err := c.request(ctx, loginUUID).
|
||||
SetResult(&resp).
|
||||
Get(c.userAPIBase + endpointQRCodeGenerate)
|
||||
if err != nil {
|
||||
return QRCodeSession{}, err
|
||||
}
|
||||
if resp.Code != 0 {
|
||||
return QRCodeSession{}, qrAPIError(resp.Message, res.StatusCode(), resp.Code)
|
||||
}
|
||||
uniID := strings.TrimSpace(resp.Data.UniID)
|
||||
if uniID == "" {
|
||||
return QRCodeSession{}, errors.New("123pan qr: empty uniID")
|
||||
}
|
||||
qrURL := buildQRLoginURL(resp.Data.URL, uniID)
|
||||
png, err := qrcode.Encode(qrURL, qrcode.Medium, 220)
|
||||
if err != nil {
|
||||
return QRCodeSession{}, err
|
||||
}
|
||||
return QRCodeSession{
|
||||
LoginUUID: loginUUID,
|
||||
UniID: uniID,
|
||||
QRCodeURL: qrURL,
|
||||
QRImageDataURL: "data:image/png;base64," + base64.StdEncoding.EncodeToString(png),
|
||||
ExpiresAt: c.now().Add(5 * time.Minute).Format(time.RFC3339),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *QRClient) Poll(ctx context.Context, loginUUID, uniID string) (QRCodeStatus, error) {
|
||||
loginUUID = strings.TrimSpace(loginUUID)
|
||||
uniID = strings.TrimSpace(uniID)
|
||||
if loginUUID == "" {
|
||||
return QRCodeStatus{}, errors.New("loginUuid is required")
|
||||
}
|
||||
if uniID == "" {
|
||||
return QRCodeStatus{}, errors.New("uniID is required")
|
||||
}
|
||||
var resp qrResultResp
|
||||
res, err := c.request(ctx, loginUUID).
|
||||
SetQueryParam("uniID", uniID).
|
||||
SetResult(&resp).
|
||||
Get(c.userAPIBase + endpointQRCodeResult)
|
||||
if err != nil {
|
||||
return QRCodeStatus{}, err
|
||||
}
|
||||
if resp.Code != 0 && resp.Code != 200 {
|
||||
return QRCodeStatus{}, qrAPIError(resp.Message, res.StatusCode(), resp.Code)
|
||||
}
|
||||
if resp.Code == 200 {
|
||||
resp.Data.LoginStatus = 3
|
||||
if resp.Data.ScanPlatform == 0 {
|
||||
resp.Data.ScanPlatform = resp.Data.LoginType
|
||||
}
|
||||
}
|
||||
status := QRCodeStatus{
|
||||
LoginStatus: resp.Data.LoginStatus,
|
||||
StatusText: qrLoginStatusText(resp.Data.LoginStatus),
|
||||
ScanPlatform: resp.Data.ScanPlatform,
|
||||
PlatformText: qrScanPlatformText(resp.Data.ScanPlatform),
|
||||
}
|
||||
if status.LoginStatus != 3 {
|
||||
return status, nil
|
||||
}
|
||||
if token := resp.TokenValue(); token != "" {
|
||||
status.AccessToken = normalizeAccessToken(token)
|
||||
return status, nil
|
||||
}
|
||||
if resp.Data.ScanPlatform == 4 {
|
||||
token, err := c.finishWechatLogin(ctx, loginUUID, uniID)
|
||||
if err != nil {
|
||||
return QRCodeStatus{}, err
|
||||
}
|
||||
status.AccessToken = normalizeAccessToken(token)
|
||||
return status, nil
|
||||
}
|
||||
return QRCodeStatus{}, errors.New("123pan qr: confirmed login returned empty token")
|
||||
}
|
||||
|
||||
func (c *QRClient) finishWechatLogin(ctx context.Context, loginUUID, uniID string) (string, error) {
|
||||
var wxResp qrWXCodeResp
|
||||
res, err := c.request(ctx, loginUUID).
|
||||
SetBody(map[string]string{"uniID": uniID}).
|
||||
SetResult(&wxResp).
|
||||
Post(c.userAPIBase + endpointQRCodeWXCode)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if wxResp.Code != 0 {
|
||||
return "", qrAPIError(wxResp.Message, res.StatusCode(), wxResp.Code)
|
||||
}
|
||||
wxCode := strings.TrimSpace(wxResp.WXCode())
|
||||
if wxCode == "" {
|
||||
return "", errors.New("123pan qr: empty wechat code")
|
||||
}
|
||||
var signIn loginResp
|
||||
res, err = c.request(ctx, loginUUID).
|
||||
SetBody(map[string]any{
|
||||
"from": "web",
|
||||
"wechat_code": wxCode,
|
||||
"type": 4,
|
||||
}).
|
||||
SetResult(&signIn).
|
||||
Post(c.userAPIBase + endpointSignIn)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if signIn.Code != 200 && signIn.Code != 0 {
|
||||
return "", qrAPIError(signIn.Message, res.StatusCode(), signIn.Code)
|
||||
}
|
||||
token := strings.TrimSpace(signIn.Data.Token)
|
||||
if token == "" {
|
||||
return "", errors.New("123pan qr: empty token")
|
||||
}
|
||||
return token, nil
|
||||
}
|
||||
|
||||
func (c *QRClient) request(ctx context.Context, loginUUID string) *resty.Request {
|
||||
return c.client.R().
|
||||
SetContext(ctx).
|
||||
SetHeaders(map[string]string{
|
||||
"Content-Type": "application/json;charset=UTF-8",
|
||||
"platform": defaultPlatform,
|
||||
"App-Version": defaultAppVersion,
|
||||
"LoginUuid": loginUUID,
|
||||
"Referer": defaultQRReferer,
|
||||
"Origin": defaultQROrigin,
|
||||
"User-Agent": defaultQRUserAgent,
|
||||
})
|
||||
}
|
||||
|
||||
func buildQRLoginURL(raw, uniID string) string {
|
||||
raw = strings.TrimSpace(raw)
|
||||
if raw == "" {
|
||||
raw = defaultQRLoginPage
|
||||
}
|
||||
u, err := url.Parse(raw)
|
||||
if err != nil {
|
||||
return defaultQRLoginPage + "?env=production&uniID=" + url.QueryEscape(uniID) + "&source=123pan&type=login"
|
||||
}
|
||||
q := u.Query()
|
||||
q.Set("env", "production")
|
||||
q.Set("uniID", uniID)
|
||||
q.Set("source", "123pan")
|
||||
q.Set("type", "login")
|
||||
u.RawQuery = q.Encode()
|
||||
return u.String()
|
||||
}
|
||||
|
||||
func newLoginUUID() (string, error) {
|
||||
var b [16]byte
|
||||
if _, err := rand.Read(b[:]); err != nil {
|
||||
return "", err
|
||||
}
|
||||
b[6] = (b[6] & 0x0f) | 0x40
|
||||
b[8] = (b[8] & 0x3f) | 0x80
|
||||
parts := []string{
|
||||
hex.EncodeToString(b[0:4]),
|
||||
hex.EncodeToString(b[4:6]),
|
||||
hex.EncodeToString(b[6:8]),
|
||||
hex.EncodeToString(b[8:10]),
|
||||
hex.EncodeToString(b[10:16]),
|
||||
}
|
||||
return strings.Join(parts, "-"), nil
|
||||
}
|
||||
|
||||
func qrAPIError(message string, httpStatus, apiCode int) error {
|
||||
message = strings.TrimSpace(message)
|
||||
if message == "" {
|
||||
message = fmt.Sprintf("HTTP %d code=%d", httpStatus, apiCode)
|
||||
}
|
||||
return errors.New(message)
|
||||
}
|
||||
|
||||
func qrLoginStatusText(status int) string {
|
||||
switch status {
|
||||
case 0:
|
||||
return "等待扫码"
|
||||
case 1:
|
||||
return "已扫码,等待确认"
|
||||
case 2:
|
||||
return "已拒绝"
|
||||
case 3:
|
||||
return "已确认"
|
||||
case 4:
|
||||
return "已过期"
|
||||
default:
|
||||
return "未知状态"
|
||||
}
|
||||
}
|
||||
|
||||
func qrScanPlatformText(platform int) string {
|
||||
switch platform {
|
||||
case 4:
|
||||
return "微信"
|
||||
case 7:
|
||||
return "123网盘 App"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,182 @@
|
||||
package p123
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestQRCodeGenerateBuildsImage(t *testing.T) {
|
||||
var seenLoginUUID string
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if r.URL.Path != "/api/user/qr-code/generate" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
seenLoginUUID = r.Header.Get("LoginUuid")
|
||||
if seenLoginUUID == "" {
|
||||
t.Fatalf("missing LoginUuid header")
|
||||
}
|
||||
if r.Header.Get("platform") != defaultPlatform {
|
||||
t.Fatalf("platform header = %q, want %q", r.Header.Get("platform"), defaultPlatform)
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"code": 0,
|
||||
"message": "ok",
|
||||
"data": map[string]string{
|
||||
"uniID": "uni-1",
|
||||
"url": "https://www.123pan.com/wx-app-login.html",
|
||||
},
|
||||
})
|
||||
}))
|
||||
t.Cleanup(api.Close)
|
||||
|
||||
got, err := NewQRClient(QRConfig{UserAPIBaseURL: api.URL + "/api"}).Generate(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("Generate() error = %v", err)
|
||||
}
|
||||
if got.LoginUUID != seenLoginUUID {
|
||||
t.Fatalf("loginUuid = %q, want header %q", got.LoginUUID, seenLoginUUID)
|
||||
}
|
||||
if got.UniID != "uni-1" {
|
||||
t.Fatalf("uniID = %q, want uni-1", got.UniID)
|
||||
}
|
||||
if !strings.Contains(got.QRCodeURL, "uniID=uni-1") || !strings.Contains(got.QRCodeURL, "type=login") {
|
||||
t.Fatalf("qrCodeUrl = %q, want login params", got.QRCodeURL)
|
||||
}
|
||||
if !strings.HasPrefix(got.QRImageDataURL, "data:image/png;base64,") {
|
||||
t.Fatalf("qrImageDataUrl missing png data url prefix")
|
||||
}
|
||||
if got.ExpiresAt == "" {
|
||||
t.Fatalf("expiresAt is empty")
|
||||
}
|
||||
}
|
||||
|
||||
func TestQRCodePollCompletesWechatLogin(t *testing.T) {
|
||||
var wxCodeRequested bool
|
||||
var signInRequested bool
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if r.Header.Get("LoginUuid") != "login-1" {
|
||||
t.Fatalf("LoginUuid = %q, want login-1", r.Header.Get("LoginUuid"))
|
||||
}
|
||||
switch r.URL.Path {
|
||||
case "/api/user/qr-code/result":
|
||||
if r.URL.Query().Get("uniID") != "uni-1" {
|
||||
t.Fatalf("uniID = %q, want uni-1", r.URL.Query().Get("uniID"))
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"code": 0,
|
||||
"data": map[string]any{
|
||||
"loginStatus": 3,
|
||||
"scanPlatform": 4,
|
||||
},
|
||||
})
|
||||
case "/api/user/qr-code/wx_code":
|
||||
wxCodeRequested = true
|
||||
var body map[string]string
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode wx_code body: %v", err)
|
||||
}
|
||||
if body["uniID"] != "uni-1" {
|
||||
t.Fatalf("wx_code uniID = %q, want uni-1", body["uniID"])
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"code": 0,
|
||||
"data": map[string]string{"wxCode": "wx-code-1"},
|
||||
})
|
||||
case "/api/user/sign_in":
|
||||
signInRequested = true
|
||||
var body map[string]any
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode sign_in body: %v", err)
|
||||
}
|
||||
if body["wechat_code"] != "wx-code-1" {
|
||||
t.Fatalf("wechat_code = %#v, want wx-code-1", body["wechat_code"])
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"code": 200,
|
||||
"data": map[string]string{"token": "Bearer token-1"},
|
||||
})
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
t.Cleanup(api.Close)
|
||||
|
||||
got, err := NewQRClient(QRConfig{UserAPIBaseURL: api.URL + "/api"}).Poll(context.Background(), "login-1", "uni-1")
|
||||
if err != nil {
|
||||
t.Fatalf("Poll() error = %v", err)
|
||||
}
|
||||
if !wxCodeRequested || !signInRequested {
|
||||
t.Fatalf("wechat completion calls wx=%v signIn=%v, want both", wxCodeRequested, signInRequested)
|
||||
}
|
||||
if got.LoginStatus != 3 || got.AccessToken != "token-1" || got.PlatformText != "微信" {
|
||||
t.Fatalf("status = %#v, want confirmed wechat token", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQRCodePollUsesAppToken(t *testing.T) {
|
||||
var wxCodeRequested bool
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
switch r.URL.Path {
|
||||
case "/api/user/qr-code/result":
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"code": 0,
|
||||
"data": map[string]any{
|
||||
"loginStatus": 3,
|
||||
"scanPlatform": 7,
|
||||
"token": "app-token",
|
||||
},
|
||||
})
|
||||
case "/api/user/qr-code/wx_code":
|
||||
wxCodeRequested = true
|
||||
http.Error(w, "unexpected wx_code", http.StatusInternalServerError)
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
t.Cleanup(api.Close)
|
||||
|
||||
got, err := NewQRClient(QRConfig{UserAPIBaseURL: api.URL + "/api"}).Poll(context.Background(), "login-1", "uni-1")
|
||||
if err != nil {
|
||||
t.Fatalf("Poll() error = %v", err)
|
||||
}
|
||||
if wxCodeRequested {
|
||||
t.Fatalf("wx_code should not be called when app token is already returned")
|
||||
}
|
||||
if got.AccessToken != "app-token" || got.PlatformText != "123网盘 App" {
|
||||
t.Fatalf("status = %#v, want app token", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQRCodePollUsesOfficialAppSuccessCode(t *testing.T) {
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if r.URL.Path != "/api/user/qr-code/result" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"code": 200,
|
||||
"data": map[string]any{
|
||||
"login_type": 7,
|
||||
"token": "app-token",
|
||||
},
|
||||
})
|
||||
}))
|
||||
t.Cleanup(api.Close)
|
||||
|
||||
got, err := NewQRClient(QRConfig{UserAPIBaseURL: api.URL + "/api"}).Poll(context.Background(), "login-1", "uni-1")
|
||||
if err != nil {
|
||||
t.Fatalf("Poll() error = %v", err)
|
||||
}
|
||||
if got.LoginStatus != 3 || got.ScanPlatform != 7 || got.AccessToken != "app-token" {
|
||||
t.Fatalf("status = %#v, want official app success token", got)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,204 @@
|
||||
package p123
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type apiEnvelope struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
type loginResp struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
Data struct {
|
||||
Token string `json:"token"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type qrGenerateResp struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
Data struct {
|
||||
UniID string `json:"uniID"`
|
||||
URL string `json:"url"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type qrResultResp struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
Data struct {
|
||||
LoginStatus int `json:"loginStatus"`
|
||||
ScanPlatform int `json:"scanPlatform"`
|
||||
LoginType int `json:"login_type"`
|
||||
Token string `json:"token"`
|
||||
AccessToken string `json:"accessToken"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func (r qrResultResp) TokenValue() string {
|
||||
if strings.TrimSpace(r.Data.Token) != "" {
|
||||
return r.Data.Token
|
||||
}
|
||||
return r.Data.AccessToken
|
||||
}
|
||||
|
||||
type qrWXCodeResp struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
Data struct {
|
||||
WXCodeLower string `json:"wxCode"`
|
||||
WXCodeTitle string `json:"WxCode"`
|
||||
Code string `json:"code"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func (r qrWXCodeResp) WXCode() string {
|
||||
if r.Data.WXCodeLower != "" {
|
||||
return r.Data.WXCodeLower
|
||||
}
|
||||
if r.Data.WXCodeTitle != "" {
|
||||
return r.Data.WXCodeTitle
|
||||
}
|
||||
return r.Data.Code
|
||||
}
|
||||
|
||||
type fileListResp struct {
|
||||
Data struct {
|
||||
Next string `json:"Next"`
|
||||
Total int `json:"Total"`
|
||||
InfoList []panFile `json:"InfoList"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type panFile struct {
|
||||
FileName string `json:"FileName"`
|
||||
Size int64 `json:"Size"`
|
||||
UpdateAt flexibleTime `json:"UpdateAt"`
|
||||
FileID int64 `json:"FileId"`
|
||||
Type int `json:"Type"`
|
||||
Etag string `json:"Etag"`
|
||||
S3KeyFlag string `json:"S3KeyFlag"`
|
||||
}
|
||||
|
||||
type cachedFile struct {
|
||||
file panFile
|
||||
parentID string
|
||||
}
|
||||
|
||||
type downloadInfoResp struct {
|
||||
Data struct {
|
||||
DownloadURL string `json:"DownloadUrl"`
|
||||
DownloadURLLower string `json:"downloadUrl"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func (r downloadInfoResp) URL() string {
|
||||
if r.Data.DownloadURL != "" {
|
||||
return r.Data.DownloadURL
|
||||
}
|
||||
return r.Data.DownloadURLLower
|
||||
}
|
||||
|
||||
type redirectResp struct {
|
||||
Data struct {
|
||||
RedirectURL string `json:"redirect_url"`
|
||||
RedirectURLCamel string `json:"redirectUrl"`
|
||||
RedirectURLTitle string `json:"RedirectUrl"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func (r redirectResp) URL() string {
|
||||
if r.Data.RedirectURL != "" {
|
||||
return r.Data.RedirectURL
|
||||
}
|
||||
if r.Data.RedirectURLCamel != "" {
|
||||
return r.Data.RedirectURLCamel
|
||||
}
|
||||
return r.Data.RedirectURLTitle
|
||||
}
|
||||
|
||||
type mkdirResp struct {
|
||||
Data struct {
|
||||
FileID int64 `json:"FileId"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type uploadResp struct {
|
||||
Data struct {
|
||||
AccessKeyID string `json:"AccessKeyId"`
|
||||
Bucket string `json:"Bucket"`
|
||||
Key string `json:"Key"`
|
||||
SecretAccessKey string `json:"SecretAccessKey"`
|
||||
SessionToken string `json:"SessionToken"`
|
||||
FileID int64 `json:"FileId"`
|
||||
Reuse bool `json:"Reuse"`
|
||||
EndPoint string `json:"EndPoint"`
|
||||
StorageNode string `json:"StorageNode"`
|
||||
UploadID string `json:"UploadId"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type s3PreSignedURLsResp struct {
|
||||
Data struct {
|
||||
PreSignedURLs map[string]string `json:"presignedUrls"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type flexibleTime struct {
|
||||
t time.Time
|
||||
}
|
||||
|
||||
func (t *flexibleTime) UnmarshalJSON(data []byte) error {
|
||||
if string(data) == "null" || string(data) == `""` {
|
||||
return nil
|
||||
}
|
||||
var s string
|
||||
if err := json.Unmarshal(data, &s); err == nil {
|
||||
t.t = parseTimeString(s)
|
||||
return nil
|
||||
}
|
||||
var n int64
|
||||
if err := json.Unmarshal(data, &n); err == nil {
|
||||
if n > 1_000_000_000_000 {
|
||||
t.t = time.UnixMilli(n)
|
||||
} else {
|
||||
t.t = time.Unix(n, 0)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t flexibleTime) Time() time.Time {
|
||||
return t.t
|
||||
}
|
||||
|
||||
func parseTimeString(s string) time.Time {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
return time.Time{}
|
||||
}
|
||||
for _, layout := range []string{
|
||||
time.RFC3339Nano,
|
||||
time.RFC3339,
|
||||
"2006-01-02 15:04:05",
|
||||
"2006-01-02T15:04:05",
|
||||
} {
|
||||
if parsed, err := time.ParseInLocation(layout, s, time.FixedZone("UTC+8", 8*3600)); err == nil {
|
||||
return parsed
|
||||
}
|
||||
}
|
||||
if n, err := strconv.ParseInt(s, 10, 64); err == nil {
|
||||
if n > 1_000_000_000_000 {
|
||||
return time.UnixMilli(n)
|
||||
}
|
||||
return time.Unix(n, 0)
|
||||
}
|
||||
return time.Time{}
|
||||
}
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"path"
|
||||
@@ -43,8 +44,10 @@ type Driver struct {
|
||||
algorithms []string
|
||||
userAgent string
|
||||
|
||||
client *resty.Client
|
||||
onTokenUpdate func(access, refresh, captcha, deviceID string)
|
||||
client *resty.Client
|
||||
onTokenUpdate func(access, refresh, captcha, deviceID string)
|
||||
uploadToOSSFunc func(context.Context, *s3Params, io.Reader) error
|
||||
uploadTempDir string
|
||||
|
||||
// captchaMu serializes captcha-token refreshes triggered by 4002 / 9
|
||||
// recovery in requestOnce. Without it, N concurrent callers all hitting
|
||||
@@ -75,6 +78,7 @@ type Config struct {
|
||||
DeviceID string
|
||||
RootID string
|
||||
DisableMediaLink bool
|
||||
UploadTempDir string
|
||||
OnTokenUpdate func(access, refresh, captcha, deviceID string)
|
||||
}
|
||||
|
||||
@@ -107,6 +111,7 @@ func New(c Config) *Driver {
|
||||
deviceID: deviceID,
|
||||
disableMediaLink: c.DisableMediaLink,
|
||||
onTokenUpdate: c.OnTokenUpdate,
|
||||
uploadTempDir: strings.TrimSpace(c.UploadTempDir),
|
||||
client: resty.New().
|
||||
SetTimeout(30*time.Second).
|
||||
SetHeader("Accept", "application/json, text/plain, */*"),
|
||||
@@ -173,8 +178,8 @@ func (d *Driver) List(ctx context.Context, dirID string) ([]drives.Entry, error)
|
||||
|
||||
// pikpakListCooldown 是列目录触发疑似限流错误时的冷却时长。
|
||||
//
|
||||
// 与 p115 driver 的 listCooldown 同语义:只要错误属 transient
|
||||
// (error_code=10 / HTTP 429 / 5xx / 通用 "rate limit" 文本),就持续
|
||||
// 与 p115 driver 的 listCooldown 同语义:只要错误属明确限流/临时状态
|
||||
// (结构化 error_code=10 / HTTP 429 / 5xx),就持续
|
||||
// 等 10 分钟再发一次列目录请求,直到成功或 ctx 取消。这样即使 PikPak
|
||||
// 风控持续较长时间,扫描会自然延后到风控结束,不再丢半棵子树。
|
||||
const pikpakListCooldown = 10 * time.Minute
|
||||
@@ -240,7 +245,6 @@ func pikpakSleepContext(ctx context.Context, d time.Duration) error {
|
||||
//
|
||||
// - PikPak 业务码 error_code=10 ("操作频繁",见 OpenList drivers/pikpak/util.go)
|
||||
// - HTTP 429 / 500 / 502 / 503 / 504 / 509(rclone 也把这些归为 retry)
|
||||
// - 通用文本:rate limit / too many requests / blocked / temporarily unavailable
|
||||
//
|
||||
// 不包含 4122/4121/16(access_token 过期)和 9/4002(captcha 过期)—— 这些
|
||||
// 由 requestOnce 内部已经做过一次自动恢复重试;如果恢复后仍然报这类错误,
|
||||
@@ -257,22 +261,14 @@ func isTransientPikPakListError(err error) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
text := strings.ToLower(err.Error())
|
||||
return strings.Contains(text, "error_code=10") ||
|
||||
strings.Contains(text, "429") ||
|
||||
strings.Contains(text, "http 500") ||
|
||||
strings.Contains(text, "http 502") ||
|
||||
strings.Contains(text, "http 503") ||
|
||||
strings.Contains(text, "http 504") ||
|
||||
strings.Contains(text, "http 509") ||
|
||||
strings.Contains(text, "too many request") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "operation frequent") ||
|
||||
strings.Contains(text, "操作频繁") ||
|
||||
strings.Contains(text, "blocked") ||
|
||||
strings.Contains(text, "temporarily unavailable") ||
|
||||
strings.Contains(text, "service unavailable")
|
||||
return drives.ErrorMentionsHTTPStatus(err,
|
||||
http.StatusTooManyRequests,
|
||||
http.StatusInternalServerError,
|
||||
http.StatusBadGateway,
|
||||
http.StatusServiceUnavailable,
|
||||
http.StatusGatewayTimeout,
|
||||
509,
|
||||
)
|
||||
}
|
||||
|
||||
func (d *Driver) Stat(ctx context.Context, fileID string) (*drives.Entry, error) {
|
||||
@@ -354,6 +350,19 @@ func (d *Driver) Rename(ctx context.Context, fileID, newName string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return errors.New("pikpak remove: empty file id")
|
||||
}
|
||||
if err := d.request(ctx, filesURL+":batchTrash", http.MethodPost, func(req *resty.Request) {
|
||||
req.SetBody(map[string]any{"ids": []string{fileID}})
|
||||
}, nil); err != nil {
|
||||
return fmt.Errorf("pikpak remove: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) EnsureDir(ctx context.Context, pathFromRoot string) (string, error) {
|
||||
currentID := d.rootID
|
||||
for _, name := range splitPath(pathFromRoot) {
|
||||
@@ -563,3 +572,4 @@ func ParseBoolDefault(raw string, def bool) bool {
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*Driver)(nil)
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
|
||||
@@ -110,7 +110,7 @@ func TestEnsureDirReusesExistingFolder(t *testing.T) {
|
||||
"files": []map[string]any{{
|
||||
"id": "existing-folder-id",
|
||||
"kind": "drive#folder",
|
||||
"name": "91 Spider",
|
||||
"name": "Crawler Uploads",
|
||||
}},
|
||||
})
|
||||
case http.MethodPost:
|
||||
@@ -124,7 +124,7 @@ func TestEnsureDirReusesExistingFolder(t *testing.T) {
|
||||
defer srv.Close()
|
||||
|
||||
d := newTestDriver(t, srv)
|
||||
got, err := d.EnsureDir(context.Background(), "91 Spider")
|
||||
got, err := d.EnsureDir(context.Background(), "Crawler Uploads")
|
||||
if err != nil {
|
||||
t.Fatalf("ensure dir: %v", err)
|
||||
}
|
||||
@@ -150,7 +150,7 @@ func TestEnsureDirCreatesMissingFolder(t *testing.T) {
|
||||
writePikPakJSON(t, w, map[string]any{
|
||||
"id": "new-folder-id",
|
||||
"kind": "drive#folder",
|
||||
"name": "91 Spider",
|
||||
"name": "Crawler Uploads",
|
||||
})
|
||||
default:
|
||||
t.Fatalf("unexpected method %s", r.Method)
|
||||
@@ -160,14 +160,14 @@ func TestEnsureDirCreatesMissingFolder(t *testing.T) {
|
||||
defer srv.Close()
|
||||
|
||||
d := newTestDriver(t, srv)
|
||||
id, err := d.EnsureDir(context.Background(), "91 Spider")
|
||||
id, err := d.EnsureDir(context.Background(), "Crawler Uploads")
|
||||
if err != nil {
|
||||
t.Fatalf("ensure dir: %v", err)
|
||||
}
|
||||
if id != "new-folder-id" {
|
||||
t.Fatalf("dir id = %q, want new-folder-id", id)
|
||||
}
|
||||
if got.Kind != "drive#folder" || got.ParentID != "root-id" || got.Name != "91 Spider" {
|
||||
if got.Kind != "drive#folder" || got.ParentID != "root-id" || got.Name != "Crawler Uploads" {
|
||||
t.Fatalf("create folder body = %#v", got)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -64,7 +64,7 @@ func isCaptchaTokenRejectedCode(code int64) bool {
|
||||
}
|
||||
|
||||
// APIError is the public alias for the PikPak API error response. Callers
|
||||
// outside this package (e.g. the spider91→PikPak migrator, tests) can either
|
||||
// outside this package (e.g. crawler upload workers and tests) can either
|
||||
// construct it for fakes or unwrap it via errors.As. Prefer IsCaptchaError
|
||||
// over hard-coding the numeric error codes.
|
||||
type APIError = errResp
|
||||
|
||||
@@ -6,7 +6,10 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -26,7 +29,7 @@ import (
|
||||
// - 未命中:resumable.params 含 S3 兼容凭证(access_key / secret /
|
||||
// bucket / endpoint / key / security_token)
|
||||
//
|
||||
// 3. 用 Aliyun OSS SDK PutObject 把字节传到 endpoint+bucket+key
|
||||
// 3. 用 Aliyun OSS SDK PutObject 把字节传到 PikPak 返回的临时 OSS endpoint
|
||||
//
|
||||
// 4. PikPak 服务端轮询 OSS,发现完成后把 resp.File.ID 标记为可用;
|
||||
// 所以 Upload 完成后直接返回 resp.File.ID 即可(一开始就有,
|
||||
@@ -36,9 +39,11 @@ const (
|
||||
ossSecurityTokenHeaderName = "X-OSS-Security-Token"
|
||||
ossUserAgent = "aliyun-sdk-android/2.9.13(Linux/Android 14/M2004j7ac;UKQ1.231108.001)"
|
||||
// 单次 PutObject 的硬上限(OSS 文档限制 5GiB;保守用 5GiB-1)。
|
||||
// spider91 视频通常 ~100MiB,远低于该值。超过则需走 multipart,
|
||||
// 当前未实现,遇到会显式报错。
|
||||
// 超过该值需走 multipart;当前未实现,遇到会显式报错。
|
||||
maxSinglePutSize = 5*1024*1024*1024 - 1
|
||||
// 首次上传失败后最多再重试 3 次。每次重试都会重新申请 PikPak
|
||||
// upload session,以避开偶发不可解析/不可达的临时上传 endpoint。
|
||||
pikpakUploadMaxAttempts = 4
|
||||
)
|
||||
|
||||
// uploadTaskData 是 POST /drive/v1/files 的响应结构。
|
||||
@@ -73,6 +78,20 @@ type UploadResult struct {
|
||||
Size int64
|
||||
}
|
||||
|
||||
type preparedUploadBody struct {
|
||||
reader io.ReadSeeker
|
||||
start int64
|
||||
cleanup func()
|
||||
}
|
||||
|
||||
func (b preparedUploadBody) rewind() error {
|
||||
if b.reader == nil {
|
||||
return errors.New("pikpak upload: nil upload body")
|
||||
}
|
||||
_, err := b.reader.Seek(b.start, io.SeekStart)
|
||||
return err
|
||||
}
|
||||
|
||||
// Upload 实现 drives.Drive 接口;只返回 fileID。
|
||||
// 完整上传元数据见 UploadAndReportHash。
|
||||
func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader, size int64) (string, error) {
|
||||
@@ -85,7 +104,7 @@ func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader,
|
||||
|
||||
// UploadAndReportHash 上传并返回 file ID + GCID + 实际字节数。
|
||||
//
|
||||
// 用于 spider91 → PikPak 迁移 worker:上传完后直接把 hash 写回 catalog
|
||||
// 用于 crawler upload worker:上传完后直接把 hash 写回 catalog
|
||||
// 的 content_hash 字段,避免再读一次本地文件做 hash。
|
||||
//
|
||||
// 参数:
|
||||
@@ -98,8 +117,7 @@ func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader,
|
||||
// - 必须先算 GCID 再申请上传会话(PikPak API 要求 hash 字段),
|
||||
// 所以这里先 io.Copy 到临时文件并同步算 GCID。
|
||||
// - 命中秒传时不发任何字节;否则用 OSS PutObject 上传。
|
||||
// - 单次 PutObject 上限保守用 5GiB-1。spider91 视频远小于此值,
|
||||
// 超出该值会报错(暂不实现 multipart)。
|
||||
// - 单次 PutObject 上限保守用 5GiB-1,超出该值会报错(暂不实现 multipart)。
|
||||
func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string, r io.Reader, size int64) (UploadResult, error) {
|
||||
if r == nil {
|
||||
return UploadResult{}, errors.New("pikpak upload: nil reader")
|
||||
@@ -119,23 +137,59 @@ func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string,
|
||||
parentID = d.rootID
|
||||
}
|
||||
|
||||
// 1) 把 r 全量缓冲到临时文件,同时算 GCID。
|
||||
tmp, gcidHex, actualSize, err := bufferAndHashGCID(r, size)
|
||||
// 1) 算 GCID,并准备一个可重试读取的 body。爬虫迁移传入的是
|
||||
// *os.File,可直接复用原文件,避免再占用一份视频大小的临时空间。
|
||||
body, gcidHex, actualSize, err := d.prepareUploadBody(r, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
defer func() {
|
||||
_ = tmp.Close()
|
||||
_ = os.Remove(tmp.Name())
|
||||
}()
|
||||
if body.cleanup != nil {
|
||||
defer body.cleanup()
|
||||
}
|
||||
|
||||
// 2) 申请上传会话。
|
||||
result := UploadResult{Hash: gcidHex, Size: actualSize}
|
||||
var lastErr error
|
||||
for attempt := 1; attempt <= pikpakUploadMaxAttempts; attempt++ {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
|
||||
resp, err := d.requestUploadSession(ctx, parentID, name, actualSize, gcidHex)
|
||||
if err != nil {
|
||||
lastErr = fmt.Errorf("pikpak upload: request session: %w", err)
|
||||
if !shouldRetryPikPakUploadAttempt(lastErr, attempt) {
|
||||
return UploadResult{}, lastErr
|
||||
}
|
||||
d.logUploadRetry(name, attempt, lastErr)
|
||||
if err := pikpakSleepContext(ctx, pikpakUploadRetryDelay(attempt)); err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
out, err := d.completeUploadAttempt(ctx, body, parentID, name, result, resp)
|
||||
if err == nil {
|
||||
return out, nil
|
||||
}
|
||||
lastErr = err
|
||||
if !shouldRetryPikPakUploadAttempt(lastErr, attempt) {
|
||||
return UploadResult{}, lastErr
|
||||
}
|
||||
d.logUploadRetry(name, attempt, lastErr)
|
||||
if err := pikpakSleepContext(ctx, pikpakUploadRetryDelay(attempt)); err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
}
|
||||
return UploadResult{}, lastErr
|
||||
}
|
||||
|
||||
func (d *Driver) requestUploadSession(ctx context.Context, parentID, name string, size int64, gcidHex string) (uploadTaskData, error) {
|
||||
var resp uploadTaskData
|
||||
if err := d.request(ctx, filesURL, http.MethodPost, func(req *resty.Request) {
|
||||
req.SetBody(map[string]any{
|
||||
"kind": "drive#file",
|
||||
"name": name,
|
||||
"size": actualSize,
|
||||
"size": size,
|
||||
"hash": gcidHex,
|
||||
"upload_type": "UPLOAD_TYPE_RESUMABLE",
|
||||
"objProvider": map[string]any{"provider": "UPLOAD_TYPE_UNKNOWN"},
|
||||
@@ -143,12 +197,13 @@ func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string,
|
||||
"folder_type": "NORMAL",
|
||||
})
|
||||
}, &resp); err != nil {
|
||||
return UploadResult{}, fmt.Errorf("pikpak upload: request session: %w", err)
|
||||
return uploadTaskData{}, err
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
result := UploadResult{Hash: gcidHex, Size: actualSize}
|
||||
|
||||
// 3) 命中秒传:服务端已经知道这个 hash,直接返回新文件 ID。
|
||||
func (d *Driver) completeUploadAttempt(ctx context.Context, body preparedUploadBody, parentID, name string, result UploadResult, resp uploadTaskData) (UploadResult, error) {
|
||||
// 命中秒传:服务端已经知道这个 hash,直接返回新文件 ID。
|
||||
if resp.Resumable == nil {
|
||||
if resp.File.ID != "" {
|
||||
result.FileID = resp.File.ID
|
||||
@@ -163,15 +218,15 @@ func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string,
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// 4) 未命中秒传:把字节传到 S3 兼容存储。
|
||||
if _, err := tmp.Seek(0, io.SeekStart); err != nil {
|
||||
return UploadResult{}, fmt.Errorf("pikpak upload: seek tmp: %w", err)
|
||||
// 未命中秒传:把字节传到 S3 兼容存储。
|
||||
if err := body.rewind(); err != nil {
|
||||
return UploadResult{}, fmt.Errorf("pikpak upload: rewind body: %w", err)
|
||||
}
|
||||
if err := d.uploadToOSS(ctx, &resp.Resumable.Params, tmp); err != nil {
|
||||
if err := d.uploadToOSS(ctx, &resp.Resumable.Params, body.reader); err != nil {
|
||||
return UploadResult{}, fmt.Errorf("pikpak upload: oss put: %w", err)
|
||||
}
|
||||
|
||||
// 5) 拿到 fileID。优先走响应里的预分配 ID;为空就回查目录。
|
||||
// 拿到 fileID。优先走响应里的预分配 ID;为空就回查目录。
|
||||
if resp.File.ID != "" {
|
||||
result.FileID = resp.File.ID
|
||||
return result, nil
|
||||
@@ -184,12 +239,114 @@ func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string,
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func shouldRetryPikPakUploadAttempt(err error, attempt int) bool {
|
||||
return attempt < pikpakUploadMaxAttempts && isRetryablePikPakUploadError(err)
|
||||
}
|
||||
|
||||
func pikpakUploadRetryDelay(attempt int) time.Duration {
|
||||
if attempt <= 0 {
|
||||
return 0
|
||||
}
|
||||
return time.Duration(attempt) * time.Second
|
||||
}
|
||||
|
||||
func (d *Driver) logUploadRetry(name string, attempt int, err error) {
|
||||
log.Printf("[pikpak] upload retry drive=%s name=%q next_attempt=%d/%d err=%v",
|
||||
d.id, name, attempt+1, pikpakUploadMaxAttempts, err)
|
||||
}
|
||||
|
||||
func isRetryablePikPakUploadError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||
return false
|
||||
}
|
||||
var serviceErr oss.ServiceError
|
||||
if errors.As(err, &serviceErr) {
|
||||
return serviceErr.StatusCode == http.StatusTooManyRequests || serviceErr.StatusCode >= 500
|
||||
}
|
||||
var netErr net.Error
|
||||
if errors.As(err, &netErr) {
|
||||
return true
|
||||
}
|
||||
text := strings.ToLower(err.Error())
|
||||
return strings.Contains(text, "no such host") ||
|
||||
strings.Contains(text, "temporary failure in name resolution") ||
|
||||
strings.Contains(text, "server misbehaving") ||
|
||||
strings.Contains(text, "connection reset") ||
|
||||
strings.Contains(text, "connection refused") ||
|
||||
strings.Contains(text, "broken pipe") ||
|
||||
strings.Contains(text, "eof") ||
|
||||
strings.Contains(text, "i/o timeout") ||
|
||||
strings.Contains(text, "tls handshake timeout") ||
|
||||
strings.Contains(text, "http 429") ||
|
||||
strings.Contains(text, "http 500") ||
|
||||
strings.Contains(text, "http 502") ||
|
||||
strings.Contains(text, "http 503") ||
|
||||
strings.Contains(text, "http 504") ||
|
||||
strings.Contains(text, "http 509") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "temporarily unavailable") ||
|
||||
strings.Contains(text, "service unavailable")
|
||||
}
|
||||
|
||||
func (d *Driver) prepareUploadBody(r io.Reader, size int64) (preparedUploadBody, string, int64, error) {
|
||||
if rs, ok := r.(io.ReadSeeker); ok {
|
||||
gcidHex, actualSize, start, err := hashGCIDFromReadSeeker(rs, size)
|
||||
if err != nil {
|
||||
return preparedUploadBody{}, "", 0, err
|
||||
}
|
||||
return preparedUploadBody{reader: rs, start: start, cleanup: func() {}}, gcidHex, actualSize, nil
|
||||
}
|
||||
|
||||
tmp, gcidHex, actualSize, err := bufferAndHashGCID(d.uploadTempDir, r, size)
|
||||
if err != nil {
|
||||
return preparedUploadBody{}, "", 0, err
|
||||
}
|
||||
return preparedUploadBody{
|
||||
reader: tmp,
|
||||
start: 0,
|
||||
cleanup: func() {
|
||||
_ = tmp.Close()
|
||||
_ = os.Remove(tmp.Name())
|
||||
},
|
||||
}, gcidHex, actualSize, nil
|
||||
}
|
||||
|
||||
func hashGCIDFromReadSeeker(r io.ReadSeeker, size int64) (string, int64, int64, error) {
|
||||
start, err := r.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return "", 0, 0, fmt.Errorf("pikpak upload: seek body: %w", err)
|
||||
}
|
||||
|
||||
h := NewGCID(size)
|
||||
written, copyErr := io.Copy(h, r)
|
||||
_, seekErr := r.Seek(start, io.SeekStart)
|
||||
if copyErr != nil {
|
||||
return "", 0, start, fmt.Errorf("pikpak upload: hash body: %w", copyErr)
|
||||
}
|
||||
if seekErr != nil {
|
||||
return "", 0, start, fmt.Errorf("pikpak upload: rewind body: %w", seekErr)
|
||||
}
|
||||
if size > 0 && written != size {
|
||||
return "", 0, start, fmt.Errorf("pikpak upload: size mismatch: declared %d, copied %d", size, written)
|
||||
}
|
||||
return strings.ToUpper(hex.EncodeToString(h.Sum(nil))), written, start, nil
|
||||
}
|
||||
|
||||
// bufferAndHashGCID 把 r 复制到一个临时文件,同时计算 GCID。
|
||||
// 返回临时文件(位置在末尾,需要调用方 Seek 回 0)、GCID hex 大写、实际写入字节数。
|
||||
// 返回临时文件(位置在末尾,需要调用方 Seek 回 start)、GCID hex 大写、实际写入字节数。
|
||||
//
|
||||
// 调用方负责 Close + Remove 临时文件。
|
||||
func bufferAndHashGCID(r io.Reader, size int64) (*os.File, string, int64, error) {
|
||||
tmp, err := os.CreateTemp("", "pikpak-upload-*.bin")
|
||||
func bufferAndHashGCID(tempDir string, r io.Reader, size int64) (*os.File, string, int64, error) {
|
||||
tempDir = strings.TrimSpace(tempDir)
|
||||
if tempDir != "" {
|
||||
if err := os.MkdirAll(tempDir, 0o755); err != nil {
|
||||
return nil, "", 0, fmt.Errorf("pikpak upload: create tmp dir: %w", err)
|
||||
}
|
||||
}
|
||||
tmp, err := os.CreateTemp(tempDir, "pikpak-upload-*.bin")
|
||||
if err != nil {
|
||||
return nil, "", 0, fmt.Errorf("pikpak upload: create tmp: %w", err)
|
||||
}
|
||||
@@ -215,10 +372,13 @@ func bufferAndHashGCID(r io.Reader, size int64) (*os.File, string, int64, error)
|
||||
//
|
||||
// 参数复用 PikPak 的临时凭证;必须带 Security Token 头部 + UserAgent,与 OpenList 一致。
|
||||
func (d *Driver) uploadToOSS(ctx context.Context, p *s3Params, body io.Reader) error {
|
||||
if d.uploadToOSSFunc != nil {
|
||||
return d.uploadToOSSFunc(ctx, p, body)
|
||||
}
|
||||
if p == nil {
|
||||
return errors.New("pikpak upload: nil s3 params")
|
||||
}
|
||||
client, err := oss.New(p.Endpoint, p.AccessKeyID, p.AccessKeySecret)
|
||||
client, err := newPikPakOSSClient(p)
|
||||
if err != nil {
|
||||
return fmt.Errorf("oss client: %w", err)
|
||||
}
|
||||
@@ -235,6 +395,44 @@ func (d *Driver) uploadToOSS(ctx context.Context, p *s3Params, body io.Reader) e
|
||||
)
|
||||
}
|
||||
|
||||
func newPikPakOSSClient(p *s3Params, options ...oss.ClientOption) (*oss.Client, error) {
|
||||
if p == nil {
|
||||
return nil, errors.New("pikpak upload: nil s3 params")
|
||||
}
|
||||
clientOptions := make([]oss.ClientOption, 0, len(options)+1)
|
||||
if isPikPakCNAMEEndpoint(p.Endpoint) {
|
||||
clientOptions = append(clientOptions, oss.UseCname(true))
|
||||
}
|
||||
clientOptions = append(clientOptions, options...)
|
||||
return oss.New(p.Endpoint, p.AccessKeyID, p.AccessKeySecret, clientOptions...)
|
||||
}
|
||||
|
||||
func isPikPakCNAMEEndpoint(endpoint string) bool {
|
||||
host := endpointHost(endpoint)
|
||||
if host == "" {
|
||||
return false
|
||||
}
|
||||
host = strings.TrimSuffix(strings.ToLower(host), ".")
|
||||
return host != "mypikpak.com" && host != "mypikpak.net" &&
|
||||
(strings.HasSuffix(host, ".mypikpak.com") || strings.HasSuffix(host, ".mypikpak.net"))
|
||||
}
|
||||
|
||||
func endpointHost(endpoint string) string {
|
||||
endpoint = strings.TrimSpace(endpoint)
|
||||
if endpoint == "" {
|
||||
return ""
|
||||
}
|
||||
if u, err := url.Parse(endpoint); err == nil && u.Host != "" {
|
||||
endpoint = u.Host
|
||||
} else if idx := strings.IndexByte(endpoint, '/'); idx >= 0 {
|
||||
endpoint = endpoint[:idx]
|
||||
}
|
||||
if host, _, err := net.SplitHostPort(endpoint); err == nil {
|
||||
endpoint = host
|
||||
}
|
||||
return strings.Trim(endpoint, "[]")
|
||||
}
|
||||
|
||||
type readerWithCtx struct {
|
||||
ctx context.Context
|
||||
r io.Reader
|
||||
|
||||
@@ -6,12 +6,17 @@ import (
|
||||
"crypto/sha1"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/aliyun/aliyun-oss-go-sdk/oss"
|
||||
"github.com/go-resty/resty/v2"
|
||||
)
|
||||
|
||||
@@ -139,6 +144,80 @@ func TestUploadInstantSuccessReturnsFileID(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadUsesReadSeekerWithoutTempCopy(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/drive/v1/files", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{
|
||||
"upload_type": "UPLOAD_TYPE_RESUMABLE",
|
||||
"resumable": null,
|
||||
"file": {"id": "instant-file-id", "name": "test.mp4", "kind": "drive#file"}
|
||||
}`))
|
||||
})
|
||||
server := httptest.NewServer(mux)
|
||||
defer server.Close()
|
||||
|
||||
d := newTestDriver(t, server)
|
||||
uploadTempDir := filepath.Join(t.TempDir(), "upload-tmp")
|
||||
d.uploadTempDir = uploadTempDir
|
||||
|
||||
data := bytes.Repeat([]byte{0x31}, 1024)
|
||||
path := filepath.Join(t.TempDir(), "video.bin")
|
||||
if err := os.WriteFile(path, data, 0o644); err != nil {
|
||||
t.Fatalf("write source: %v", err)
|
||||
}
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
t.Fatalf("open source: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
id, err := d.Upload(context.Background(), "parent-id", "test.mp4", f, int64(len(data)))
|
||||
if err != nil {
|
||||
t.Fatalf("upload: %v", err)
|
||||
}
|
||||
if id != "instant-file-id" {
|
||||
t.Fatalf("file id = %q, want instant-file-id", id)
|
||||
}
|
||||
if _, err := os.Stat(uploadTempDir); !os.IsNotExist(err) {
|
||||
t.Fatalf("upload temp dir stat err = %v, want not created for read seeker input", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadBuffersNonSeekReaderInConfiguredTempDir(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/drive/v1/files", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{
|
||||
"upload_type": "UPLOAD_TYPE_RESUMABLE",
|
||||
"resumable": null,
|
||||
"file": {"id": "instant-file-id", "name": "test.mp4", "kind": "drive#file"}
|
||||
}`))
|
||||
})
|
||||
server := httptest.NewServer(mux)
|
||||
defer server.Close()
|
||||
|
||||
d := newTestDriver(t, server)
|
||||
uploadTempDir := filepath.Join(t.TempDir(), "upload-tmp")
|
||||
d.uploadTempDir = uploadTempDir
|
||||
|
||||
data := bytes.Repeat([]byte{0x42}, 1024)
|
||||
id, err := d.Upload(context.Background(), "parent-id", "test.mp4", bytes.NewBuffer(data), int64(len(data)))
|
||||
if err != nil {
|
||||
t.Fatalf("upload: %v", err)
|
||||
}
|
||||
if id != "instant-file-id" {
|
||||
t.Fatalf("file id = %q, want instant-file-id", id)
|
||||
}
|
||||
entries, err := os.ReadDir(uploadTempDir)
|
||||
if err != nil {
|
||||
t.Fatalf("read upload temp dir: %v", err)
|
||||
}
|
||||
if len(entries) != 0 {
|
||||
t.Fatalf("upload temp dir entries = %d, want cleaned", len(entries))
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadInstantSuccessFallsBackToListWhenFileIDMissing(t *testing.T) {
|
||||
listCalled := false
|
||||
mux := http.NewServeMux()
|
||||
@@ -181,6 +260,95 @@ func TestUploadInstantSuccessFallsBackToListWhenFileIDMissing(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadRetriesWithNewSessionWhenOSSEndpointDNSFails(t *testing.T) {
|
||||
sessionRequests := 0
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/drive/v1/files", func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
t.Errorf("method = %q, want POST", r.Method)
|
||||
}
|
||||
sessionRequests++
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(fmt.Sprintf(`{
|
||||
"upload_type": "UPLOAD_TYPE_RESUMABLE",
|
||||
"resumable": {
|
||||
"kind": "drive#resumable",
|
||||
"provider": "UPLOAD_TYPE_UNKNOWN",
|
||||
"params": {
|
||||
"access_key_id": "ak",
|
||||
"access_key_secret": "sk",
|
||||
"bucket": "bucket",
|
||||
"endpoint": "https://vip-lixian-%02d.upload-a10b.mypikpak.com",
|
||||
"key": "object-key-%02d",
|
||||
"security_token": "token"
|
||||
}
|
||||
},
|
||||
"file": {"id": "retry-file-%02d", "name": "retry.mp4", "kind": "drive#file"}
|
||||
}`, sessionRequests, sessionRequests, sessionRequests)))
|
||||
})
|
||||
server := httptest.NewServer(mux)
|
||||
defer server.Close()
|
||||
|
||||
d := newTestDriver(t, server)
|
||||
uploadAttempts := 0
|
||||
var uploaded []byte
|
||||
d.uploadToOSSFunc = func(_ context.Context, _ *s3Params, body io.Reader) error {
|
||||
uploadAttempts++
|
||||
if uploadAttempts == 1 {
|
||||
return &net.DNSError{Err: "no such host", Name: "vip-lixian-01.upload-a10b.mypikpak.com"}
|
||||
}
|
||||
var err error
|
||||
uploaded, err = io.ReadAll(body)
|
||||
return err
|
||||
}
|
||||
|
||||
payload := []byte("retry payload body")
|
||||
id, err := d.Upload(context.Background(), "parent-id", "retry.mp4", bytes.NewReader(payload), int64(len(payload)))
|
||||
if err != nil {
|
||||
t.Fatalf("upload: %v", err)
|
||||
}
|
||||
if id != "retry-file-02" {
|
||||
t.Fatalf("file id = %q, want retry-file-02 from the second session", id)
|
||||
}
|
||||
if sessionRequests != 2 {
|
||||
t.Fatalf("session requests = %d, want 2", sessionRequests)
|
||||
}
|
||||
if uploadAttempts != 2 {
|
||||
t.Fatalf("upload attempts = %d, want 2", uploadAttempts)
|
||||
}
|
||||
if !bytes.Equal(uploaded, payload) {
|
||||
t.Fatalf("uploaded body = %q, want %q", string(uploaded), string(payload))
|
||||
}
|
||||
}
|
||||
|
||||
func TestPikPakOSSClientUsesCNAMEForPikPakUploadEndpoint(t *testing.T) {
|
||||
params := &s3Params{
|
||||
AccessKeyID: "ak",
|
||||
AccessKeySecret: "sk",
|
||||
Bucket: "vip-lixian-07",
|
||||
Endpoint: "http://upload-a10b.mypikpak.com",
|
||||
Key: "upload_tmp/object-key",
|
||||
}
|
||||
client, err := newPikPakOSSClient(params)
|
||||
if err != nil {
|
||||
t.Fatalf("new oss client: %v", err)
|
||||
}
|
||||
bucket, err := client.Bucket(params.Bucket)
|
||||
if err != nil {
|
||||
t.Fatalf("bucket: %v", err)
|
||||
}
|
||||
signed, err := bucket.SignURL(params.Key, oss.HTTPPut, 60)
|
||||
if err != nil {
|
||||
t.Fatalf("sign url: %v", err)
|
||||
}
|
||||
if strings.Contains(signed, "vip-lixian-07.upload-a10b.mypikpak.com") {
|
||||
t.Fatalf("signed url uses invalid bucket-prefixed PikPak host: %s", signed)
|
||||
}
|
||||
if !strings.Contains(signed, "http://upload-a10b.mypikpak.com/upload_tmp%2Fobject-key") {
|
||||
t.Fatalf("signed url = %s, want PikPak endpoint host with object key path", signed)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadRejectsInvalidArguments(t *testing.T) {
|
||||
d := New(Config{ID: "x", Username: "u", Password: "p", Platform: "web"})
|
||||
cases := []struct {
|
||||
@@ -212,7 +380,7 @@ func TestUploadRejectsInvalidArguments(t *testing.T) {
|
||||
func TestBufferAndHashGCIDDetectsSizeMismatch(t *testing.T) {
|
||||
src := bytes.NewReader([]byte("hello"))
|
||||
// 声明 size=10 但实际只有 5 字节
|
||||
_, _, _, err := bufferAndHashGCID(src, 10)
|
||||
_, _, _, err := bufferAndHashGCID("", src, 10)
|
||||
if err == nil {
|
||||
t.Fatal("expected size mismatch error")
|
||||
}
|
||||
@@ -223,7 +391,7 @@ func TestBufferAndHashGCIDDetectsSizeMismatch(t *testing.T) {
|
||||
|
||||
func TestBufferAndHashGCIDComputesCorrectHash(t *testing.T) {
|
||||
data := bytes.Repeat([]byte{0x55}, 1024)
|
||||
tmp, hex, written, err := bufferAndHashGCID(bytes.NewReader(data), int64(len(data)))
|
||||
tmp, hex, written, err := bufferAndHashGCID("", bytes.NewReader(data), int64(len(data)))
|
||||
if err != nil {
|
||||
t.Fatalf("buffer: %v", err)
|
||||
}
|
||||
|
||||
@@ -16,23 +16,23 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
defaultUA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) quark-cloud-drive/2.5.20 Chrome/100.0.4896.160 Electron/18.3.5.4-b478491100 Safari/537.36 Channel/pckk_other_ch"
|
||||
defaultUA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) quark-cloud-drive/2.5.20 Chrome/100.0.4896.160 Electron/18.3.5.4-b478491100 Safari/537.36 Channel/pckk_other_ch"
|
||||
defaultReferer = "https://pan.quark.cn"
|
||||
defaultAPI = "https://drive.quark.cn/1/clouddrive"
|
||||
defaultPR = "ucpro"
|
||||
)
|
||||
|
||||
type Driver struct {
|
||||
id string
|
||||
cookie string
|
||||
rootID string
|
||||
ua string
|
||||
referer string
|
||||
apiBase string
|
||||
pr string
|
||||
client *resty.Client
|
||||
onCookieUpdate func(string)
|
||||
useTranscodingAddress bool
|
||||
id string
|
||||
cookie string
|
||||
rootID string
|
||||
ua string
|
||||
referer string
|
||||
apiBase string
|
||||
pr string
|
||||
client *resty.Client
|
||||
onCookieUpdate func(string)
|
||||
useTranscodingAddress bool
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
@@ -60,7 +60,7 @@ func New(c Config) *Driver {
|
||||
onCookieUpdate: c.OnCookieUpdate,
|
||||
}
|
||||
d.client = resty.New().
|
||||
SetTimeout(30 * time.Second).
|
||||
SetTimeout(30*time.Second).
|
||||
SetHeader("Accept", "application/json, text/plain, */*").
|
||||
SetHeader("Referer", d.referer).
|
||||
SetHeader("User-Agent", d.ua)
|
||||
@@ -263,12 +263,28 @@ func (d *Driver) findChildDir(ctx context.Context, parent, name string) (string,
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// ---------- 上传(第一版不实现,走本地 teaser 兜底) ----------
|
||||
// ---------- 上传(第一版不实现,走本地预览视频兜底) ----------
|
||||
|
||||
func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader, size int64) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return errors.New("quark remove: empty file id")
|
||||
}
|
||||
body := map[string]any{
|
||||
"action_type": 1,
|
||||
"exclude_fids": []string{},
|
||||
"filelist": []string{fileID},
|
||||
}
|
||||
if err := d.request(ctx, "/file/delete", http.MethodPost, nil, body, nil); err != nil {
|
||||
return fmt.Errorf("quark remove: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ---------- helpers ----------
|
||||
|
||||
func fileToEntry(f *file, parentID string) drives.Entry {
|
||||
@@ -343,3 +359,4 @@ func setCookieValue(cookie, key, value string) string {
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*Driver)(nil)
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,986 @@
|
||||
package scriptcrawler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"image"
|
||||
"image/color"
|
||||
"image/jpeg"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/fingerprint"
|
||||
"github.com/video-site/backend/internal/mediaasset"
|
||||
)
|
||||
|
||||
const (
|
||||
scriptCrawlerDuplicateBytes = "duplicate-video-bytes"
|
||||
scriptCrawlerUniqueBytes = "unique-video-bytes"
|
||||
)
|
||||
|
||||
func writeScriptCrawlerFFprobeStub(t *testing.T, dir string, ok bool) string {
|
||||
t.Helper()
|
||||
name := "ffprobe-ok.sh"
|
||||
body := "#!/bin/sh\necho video\nexit 0\n"
|
||||
if !ok {
|
||||
name = "ffprobe-fail.sh"
|
||||
body = "#!/bin/sh\necho 'moov atom not found' >&2\nexit 1\n"
|
||||
}
|
||||
path := filepath.Join(dir, name)
|
||||
if err := os.WriteFile(path, []byte(body), 0o755); err != nil {
|
||||
t.Fatalf("write ffprobe stub: %v", err)
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
func writeScriptCrawlerFFmpegStub(t *testing.T, dir string) string {
|
||||
t.Helper()
|
||||
path := filepath.Join(dir, "ffmpeg-hls.sh")
|
||||
body := "#!/bin/sh\nif [ -n \"$GO_SCRIPTCRAWLER_FFMPEG_ARGS_FILE\" ]; then printf '%s\\n' \"$@\" > \"$GO_SCRIPTCRAWLER_FFMPEG_ARGS_FILE\"; fi\nout=\"\"\nfor arg do out=\"$arg\"; done\nprintf 'hls-video-bytes' > \"$out\"\n"
|
||||
if err := os.WriteFile(path, []byte(body), 0o755); err != nil {
|
||||
t.Fatalf("write ffmpeg stub: %v", err)
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
func writeScriptCrawlerJPEG(t *testing.T, path string, c color.RGBA) {
|
||||
t.Helper()
|
||||
img := image.NewRGBA(image.Rect(0, 0, 48, 48))
|
||||
for y := 0; y < 48; y++ {
|
||||
for x := 0; x < 48; x++ {
|
||||
img.SetRGBA(x, y, c)
|
||||
}
|
||||
}
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
t.Fatalf("create jpeg: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
if err := jpeg.Encode(f, img, &jpeg.Options{Quality: 95}); err != nil {
|
||||
t.Fatalf("encode jpeg: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceImportsLocalFileAndSkipsExisting(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
dummyScript := filepath.Join(tmp, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
CrawlerName: "Demo Crawler",
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Skipped != 0 || res.Failed != 0 {
|
||||
t.Fatalf("result = new:%d skipped:%d failed:%d, want 1/0/0", res.NewVideos, res.Skipped, res.Failed)
|
||||
}
|
||||
v, err := cat.GetVideo(ctx, BuildVideoID("demo", "abc-123"))
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if v.Title != "Imported From Helper" || v.FileID != "abc-123.mp4" || v.Size == 0 {
|
||||
t.Fatalf("video = title:%q file:%q size:%d", v.Title, v.FileID, v.Size)
|
||||
}
|
||||
if !hasString(v.Tags, "Demo Crawler") {
|
||||
t.Fatalf("video tags = %#v, want crawler name tag", v.Tags)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(drv.VideosDir(), "abc-123.mp4")); err != nil {
|
||||
t.Fatalf("video file not copied: %v", err)
|
||||
}
|
||||
|
||||
res, err = c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("second run: %v", err)
|
||||
}
|
||||
if res.NewVideos != 0 || res.Skipped != 1 {
|
||||
t.Fatalf("second result = new:%d skipped:%d, want 0/1", res.NewVideos, res.Skipped)
|
||||
}
|
||||
if res.SeenSnapshot != 1 {
|
||||
t.Fatalf("seen snapshot = %d, want 1", res.SeenSnapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceMarksPreviewDisabledWhenConfigured(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
dummyScript := filepath.Join(tmp, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
DisablePreview: true,
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Failed != 0 {
|
||||
t.Fatalf("result = new:%d failed:%d, want 1/0", res.NewVideos, res.Failed)
|
||||
}
|
||||
v, err := cat.GetVideo(ctx, BuildVideoID("demo", "abc-123"))
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if v.PreviewStatus != "disabled" {
|
||||
t.Fatalf("preview status = %q, want disabled", v.PreviewStatus)
|
||||
}
|
||||
if v.FingerprintStatus != "ready" || v.SampledSHA256 == "" {
|
||||
t.Fatalf("fingerprint status=%q sampled=%q, want ready and sampled hash", v.FingerprintStatus, v.SampledSHA256)
|
||||
}
|
||||
pending, err := cat.ListVideosByPreviewStatus(ctx, "demo", "pending", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list pending previews: %v", err)
|
||||
}
|
||||
if len(pending) != 0 {
|
||||
t.Fatalf("pending previews = %d, want 0", len(pending))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceUsesCurrentDrivePreviewSwitch(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: drv.ID(),
|
||||
Kind: Kind,
|
||||
Name: "Demo",
|
||||
RootID: "/",
|
||||
Credentials: map[string]string{"script_path": "/tmp/crawler.py"},
|
||||
TeaserEnabled: true,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed drive: %v", err)
|
||||
}
|
||||
dummyScript := filepath.Join(tmp, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
DisablePreview: true,
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Failed != 0 {
|
||||
t.Fatalf("result = new:%d failed:%d, want 1/0", res.NewVideos, res.Failed)
|
||||
}
|
||||
v, err := cat.GetVideo(ctx, BuildVideoID("demo", "abc-123"))
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if v.PreviewStatus != "pending" {
|
||||
t.Fatalf("preview status = %q, want pending from current drive switch", v.PreviewStatus)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceUsesDefaultCrawlerNamespace(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
dummyScript := filepath.Join(tmp, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.SeenSnapshot != 0 {
|
||||
t.Fatalf("result = new:%d seen:%d, want 1/0", res.NewVideos, res.SeenSnapshot)
|
||||
}
|
||||
videoID := BuildVideoID("demo", "abc-123")
|
||||
if _, err := cat.GetVideo(ctx, videoID); err != nil {
|
||||
t.Fatalf("get crawler video: %v", err)
|
||||
}
|
||||
|
||||
res, err = c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("second run: %v", err)
|
||||
}
|
||||
if res.NewVideos != 0 || res.Skipped != 1 || res.SeenSnapshot != 1 {
|
||||
t.Fatalf("second result = new:%d skipped:%d seen:%d, want 0/1/1", res.NewVideos, res.Skipped, res.SeenSnapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOncePassesAbsoluteJobPathsWhenWorkDirDiffers(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
t.Chdir(tmp)
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join("data", "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
scriptDir := filepath.Join(tmp, "scripts")
|
||||
if err := os.MkdirAll(scriptDir, 0o755); err != nil {
|
||||
t.Fatalf("mkdir script dir: %v", err)
|
||||
}
|
||||
dummyScript := filepath.Join(scriptDir, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_ASSERT_ABS", "1")
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
WorkDir: scriptDir,
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Skipped != 0 || res.Failed != 0 {
|
||||
t.Fatalf("result = new:%d skipped:%d failed:%d, want 1/0/0", res.NewVideos, res.Skipped, res.Failed)
|
||||
}
|
||||
if !filepath.IsAbs(res.JobFile) || !filepath.IsAbs(res.SeenFile) {
|
||||
t.Fatalf("result paths should be absolute: job=%q seen=%q", res.JobFile, res.SeenFile)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceImportsSimpleMediaURLWithoutSourceID(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/video.mp4" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
_, _ = w.Write([]byte("simple-video-bytes"))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
dummyScript := filepath.Join(tmp, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_SIMPLE", "1")
|
||||
t.Setenv("GO_SCRIPTCRAWLER_MEDIA_URL", srv.URL+"/video.mp4?token=first")
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
HTTPClient: srv.Client(),
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Skipped != 0 || res.Failed != 0 {
|
||||
t.Fatalf("result = new:%d skipped:%d failed:%d, want 1/0/0", res.NewVideos, res.Skipped, res.Failed)
|
||||
}
|
||||
videos, err := cat.ListVideosByDrive(ctx, "demo")
|
||||
if err != nil {
|
||||
t.Fatalf("list videos: %v", err)
|
||||
}
|
||||
if len(videos) != 1 {
|
||||
t.Fatalf("videos = %d, want 1", len(videos))
|
||||
}
|
||||
v := videos[0]
|
||||
if !strings.HasPrefix(v.ID, BuildVideoID("demo", "auto-")) {
|
||||
t.Fatalf("video id = %q, want generated auto source id", v.ID)
|
||||
}
|
||||
if v.Title != "Simple Protocol Video" || v.Ext != "mp4" || v.ThumbnailURL != "" || v.Size == 0 {
|
||||
t.Fatalf("video = title:%q ext:%q thumb:%q size:%d", v.Title, v.Ext, v.ThumbnailURL, v.Size)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(drv.VideosDir(), v.FileID)); err != nil {
|
||||
t.Fatalf("video file not downloaded: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_SCRIPTCRAWLER_MEDIA_URL", srv.URL+"/video.mp4?token=second")
|
||||
res, err = c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("second run: %v", err)
|
||||
}
|
||||
if res.NewVideos != 0 || res.Skipped != 1 {
|
||||
t.Fatalf("second result = new:%d skipped:%d, want 0/1", res.NewVideos, res.Skipped)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceSkipsFingerprintDuplicateAndContinues(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
|
||||
seedFile := "seed-canonical.mp4"
|
||||
if err := os.WriteFile(filepath.Join(drv.VideosDir(), seedFile), []byte(scriptCrawlerDuplicateBytes), 0o644); err != nil {
|
||||
t.Fatalf("write seed video: %v", err)
|
||||
}
|
||||
seed := &catalog.Video{
|
||||
ID: "seed-for-hash",
|
||||
DriveID: drv.ID(),
|
||||
FileID: seedFile,
|
||||
Title: "Seed",
|
||||
Size: int64(len(scriptCrawlerDuplicateBytes)),
|
||||
PublishedAt: time.Now(),
|
||||
}
|
||||
sampled, err := fingerprint.Compute(ctx, drv, seed, fingerprint.Config{}, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("compute seed fingerprint: %v", err)
|
||||
}
|
||||
_ = os.Remove(filepath.Join(drv.VideosDir(), seedFile))
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: "existing-canonical",
|
||||
DriveID: "other-drive",
|
||||
FileID: "existing.mp4",
|
||||
FileName: "existing.mp4",
|
||||
Title: "Existing Canonical",
|
||||
Size: int64(len(scriptCrawlerDuplicateBytes)),
|
||||
Ext: "mp4",
|
||||
SampledSHA256: sampled,
|
||||
FingerprintStatus: "ready",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed canonical video: %v", err)
|
||||
}
|
||||
|
||||
dummyScript := filepath.Join(tmp, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_DUP_UNIQUE", "1")
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Skipped != 1 || res.Failed != 0 || res.TotalEntries != 2 {
|
||||
t.Fatalf("result = total:%d new:%d skipped:%d failed:%d, want 2/1/1/0", res.TotalEntries, res.NewVideos, res.Skipped, res.Failed)
|
||||
}
|
||||
if res.CandidateBudget <= res.TargetNew {
|
||||
t.Fatalf("candidate budget = %d, target = %d; want expanded budget", res.CandidateBudget, res.TargetNew)
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, BuildVideoID("demo", "dup-source")); err == nil {
|
||||
t.Fatal("duplicate candidate should not be imported")
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(drv.VideosDir(), "dup-source.mp4")); !os.IsNotExist(err) {
|
||||
t.Fatalf("duplicate local file stat = %v, want removed", err)
|
||||
}
|
||||
v, err := cat.GetVideo(ctx, BuildVideoID("demo", "unique-source"))
|
||||
if err != nil {
|
||||
t.Fatalf("unique video should be imported: %v", err)
|
||||
}
|
||||
if v.SampledSHA256 == "" || v.FingerprintStatus != "ready" {
|
||||
t.Fatalf("unique fingerprint = %q status=%q, want ready sampled fingerprint", v.SampledSHA256, v.FingerprintStatus)
|
||||
}
|
||||
seen, err := cat.ListCrawlerSourceIDs(ctx, Kind, "demo")
|
||||
if err != nil {
|
||||
t.Fatalf("list seen source ids: %v", err)
|
||||
}
|
||||
seenSet := map[string]bool{}
|
||||
for _, id := range seen {
|
||||
seenSet[id] = true
|
||||
}
|
||||
if !seenSet["dup-source"] || !seenSet["unique-source"] {
|
||||
t.Fatalf("seen ids = %#v, want duplicate and imported source ids", seen)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerProcessItemSkipsNearDuplicateByTitleDurationAndThumbnail(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
commonThumbDir := filepath.Join(tmp, "common-thumbs")
|
||||
if err := os.MkdirAll(commonThumbDir, 0o755); err != nil {
|
||||
t.Fatalf("mkdir common thumbs: %v", err)
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
canonicalID := "existing-canonical"
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: canonicalID,
|
||||
DriveID: "other-drive",
|
||||
FileID: "existing.mp4",
|
||||
FileName: "existing.mp4",
|
||||
Title: "91 Test Similar Title 1215516",
|
||||
DurationSeconds: 257,
|
||||
Size: 12345,
|
||||
Ext: "mp4",
|
||||
ThumbnailURL: "/p/thumb/" + canonicalID,
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed canonical video: %v", err)
|
||||
}
|
||||
writeScriptCrawlerJPEG(t, mediaasset.ThumbnailPathInDir(commonThumbDir, canonicalID), color.RGBA{R: 210, G: 40, B: 40, A: 255})
|
||||
|
||||
outputDir := drv.OutputDir()
|
||||
mediaPath := filepath.Join(outputDir, "near-video.mp4")
|
||||
if err := os.WriteFile(mediaPath, []byte("near-duplicate-but-different-bytes"), 0o644); err != nil {
|
||||
t.Fatalf("write media: %v", err)
|
||||
}
|
||||
thumbPath := filepath.Join(outputDir, "near-thumb.jpg")
|
||||
writeScriptCrawlerJPEG(t, thumbPath, color.RGBA{R: 211, G: 41, B: 41, A: 255})
|
||||
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
CommonThumbDir: commonThumbDir,
|
||||
})
|
||||
imported, err := c.processItem(ctx, Item{
|
||||
SourceID: "near-source",
|
||||
Title: "91 Test Similar Title 1215516 - source suffix",
|
||||
Author: "helper",
|
||||
DurationSeconds: 257,
|
||||
Media: MediaRef{LocalFile: mediaPath},
|
||||
Thumbnail: MediaRef{LocalFile: thumbPath},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("process item: %v", err)
|
||||
}
|
||||
if imported {
|
||||
t.Fatal("near duplicate imported, want skipped")
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, BuildVideoID("demo", "near-source")); err == nil {
|
||||
t.Fatal("near duplicate should not be inserted into catalog")
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(drv.VideosDir(), "near-source.mp4")); !os.IsNotExist(err) {
|
||||
t.Fatalf("near duplicate video stat = %v, want removed", err)
|
||||
}
|
||||
if sourceThumb, err := drv.ThumbPath("near-source.jpg"); err != nil {
|
||||
t.Fatalf("source thumb path: %v", err)
|
||||
} else if _, err := os.Stat(sourceThumb); !os.IsNotExist(err) {
|
||||
t.Fatalf("source thumb stat = %v, want removed", err)
|
||||
}
|
||||
if _, err := os.Stat(mediaasset.ThumbnailPathInDir(commonThumbDir, BuildVideoID("demo", "near-source"))); !os.IsNotExist(err) {
|
||||
t.Fatalf("common thumb stat = %v, want removed", err)
|
||||
}
|
||||
seen, err := cat.ListCrawlerSourceIDs(ctx, Kind, "demo")
|
||||
if err != nil {
|
||||
t.Fatalf("list seen source ids: %v", err)
|
||||
}
|
||||
if !hasString(seen, "near-source") {
|
||||
t.Fatalf("seen ids = %#v, want near-source", seen)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerProcessItemKeepsLargerNearDuplicate(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
commonThumbDir := filepath.Join(tmp, "common-thumbs")
|
||||
if err := os.MkdirAll(commonThumbDir, 0o755); err != nil {
|
||||
t.Fatalf("mkdir common thumbs: %v", err)
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
smallerID := "smaller-canonical"
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: smallerID,
|
||||
DriveID: "other-drive",
|
||||
FileID: "smaller.mp4",
|
||||
FileName: "smaller.mp4",
|
||||
Title: "91 Test Larger Candidate 1215516",
|
||||
DurationSeconds: 257,
|
||||
Size: 5,
|
||||
Ext: "mp4",
|
||||
ThumbnailURL: "/p/thumb/" + smallerID,
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed smaller video: %v", err)
|
||||
}
|
||||
writeScriptCrawlerJPEG(t, mediaasset.ThumbnailPathInDir(commonThumbDir, smallerID), color.RGBA{R: 80, G: 160, B: 80, A: 255})
|
||||
|
||||
outputDir := drv.OutputDir()
|
||||
mediaPath := filepath.Join(outputDir, "larger-video.mp4")
|
||||
if err := os.WriteFile(mediaPath, []byte("near-duplicate-larger-candidate-bytes"), 0o644); err != nil {
|
||||
t.Fatalf("write media: %v", err)
|
||||
}
|
||||
thumbPath := filepath.Join(outputDir, "larger-thumb.jpg")
|
||||
writeScriptCrawlerJPEG(t, thumbPath, color.RGBA{R: 81, G: 161, B: 81, A: 255})
|
||||
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
CommonThumbDir: commonThumbDir,
|
||||
})
|
||||
imported, err := c.processItem(ctx, Item{
|
||||
SourceID: "larger-source",
|
||||
Title: "91 Test Larger Candidate 1215516 - source suffix",
|
||||
Author: "helper",
|
||||
DurationSeconds: 257,
|
||||
Media: MediaRef{LocalFile: mediaPath},
|
||||
Thumbnail: MediaRef{LocalFile: thumbPath},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("process item: %v", err)
|
||||
}
|
||||
if !imported {
|
||||
t.Fatal("larger near duplicate was skipped, want imported")
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, smallerID); err == nil {
|
||||
t.Fatal("smaller near duplicate should be deleted from catalog")
|
||||
}
|
||||
if deleted, err := cat.IsVideoDeleted(ctx, smallerID); err != nil || !deleted {
|
||||
t.Fatalf("smaller tombstone = %v, %v; want deleted tombstone", deleted, err)
|
||||
}
|
||||
larger, err := cat.GetVideo(ctx, BuildVideoID("demo", "larger-source"))
|
||||
if err != nil {
|
||||
t.Fatalf("larger video should be imported: %v", err)
|
||||
}
|
||||
if larger.Size <= 5 {
|
||||
t.Fatalf("larger size = %d, want > 5", larger.Size)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceRejectsInvalidDownloadedVideo(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
dummyScript := filepath.Join(tmp, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
CrawlerName: "Demo Crawler",
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, false),
|
||||
ScriptPath: dummyScript,
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 0 || res.Skipped != 0 || res.Failed != 1 || res.TotalEntries != 1 {
|
||||
t.Fatalf("result = total:%d new:%d skipped:%d failed:%d, want 1/0/0/1", res.TotalEntries, res.NewVideos, res.Skipped, res.Failed)
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, BuildVideoID("demo", "abc-123")); err == nil {
|
||||
t.Fatal("invalid video should not be imported")
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(drv.VideosDir(), "abc-123.mp4")); !os.IsNotExist(err) {
|
||||
t.Fatalf("invalid local video stat = %v, want removed", err)
|
||||
}
|
||||
seen, err := cat.ListCrawlerSourceIDs(ctx, Kind, "demo")
|
||||
if err != nil {
|
||||
t.Fatalf("list seen source ids: %v", err)
|
||||
}
|
||||
if len(seen) != 0 {
|
||||
t.Fatalf("seen ids = %#v, want none for invalid video", seen)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceDownloadsHLSMediaURL(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
dummyScript := filepath.Join(tmp, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HLS", "1")
|
||||
ffmpegArgsFile := filepath.Join(tmp, "ffmpeg-args.txt")
|
||||
t.Setenv("GO_SCRIPTCRAWLER_FFMPEG_ARGS_FILE", ffmpegArgsFile)
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
CrawlerName: "Demo Crawler",
|
||||
PythonPath: wrapper,
|
||||
FFmpegPath: writeScriptCrawlerFFmpegStub(t, tmp),
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Skipped != 0 || res.Failed != 0 {
|
||||
t.Fatalf("result = new:%d skipped:%d failed:%d, want 1/0/0", res.NewVideos, res.Skipped, res.Failed)
|
||||
}
|
||||
v, err := cat.GetVideo(ctx, BuildVideoID("demo", "hls-source"))
|
||||
if err != nil {
|
||||
t.Fatalf("get hls video: %v", err)
|
||||
}
|
||||
if v.FileID != "hls-source.mp4" || v.Size != int64(len("hls-video-bytes")) {
|
||||
t.Fatalf("video file=%q size=%d, want hls-source.mp4 size %d", v.FileID, v.Size, len("hls-video-bytes"))
|
||||
}
|
||||
data, err := os.ReadFile(filepath.Join(drv.VideosDir(), "hls-source.mp4"))
|
||||
if err != nil {
|
||||
t.Fatalf("read hls output: %v", err)
|
||||
}
|
||||
if string(data) != "hls-video-bytes" {
|
||||
t.Fatalf("hls output = %q", string(data))
|
||||
}
|
||||
argsData, err := os.ReadFile(ffmpegArgsFile)
|
||||
if err != nil {
|
||||
t.Fatalf("read ffmpeg args: %v", err)
|
||||
}
|
||||
argsText := "\n" + string(argsData) + "\n"
|
||||
for _, want := range []string{
|
||||
"\n-protocol_whitelist\nhttp,https,tcp,tls,crypto\n",
|
||||
"\n-allowed_extensions\nALL\n",
|
||||
"\n-allowed_segment_extensions\nALL\n",
|
||||
"\n-extension_picky\n0\n",
|
||||
} {
|
||||
if !strings.Contains(argsText, want) {
|
||||
t.Fatalf("ffmpeg args missing %q in:\n%s", strings.TrimSpace(want), string(argsData))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestScriptCrawlerHelperProcess(t *testing.T) {
|
||||
if os.Getenv("GO_WANT_SCRIPTCRAWLER_HELPER") != "1" {
|
||||
return
|
||||
}
|
||||
args := os.Args
|
||||
jobPath := ""
|
||||
for i := 0; i < len(args)-1; i++ {
|
||||
if args[i] == "--job" {
|
||||
jobPath = args[i+1]
|
||||
break
|
||||
}
|
||||
}
|
||||
if jobPath == "" {
|
||||
fmt.Fprintln(os.Stderr, "missing --job")
|
||||
os.Exit(2)
|
||||
}
|
||||
data, err := os.ReadFile(jobPath)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(2)
|
||||
}
|
||||
var job Job
|
||||
if err := json.Unmarshal(data, &job); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(2)
|
||||
}
|
||||
if os.Getenv("GO_WANT_SCRIPTCRAWLER_ASSERT_ABS") == "1" {
|
||||
if !filepath.IsAbs(jobPath) || !filepath.IsAbs(job.SeenSourceIDsFile) || !filepath.IsAbs(job.OutputDir) {
|
||||
fmt.Fprintf(os.Stderr, "expected absolute paths, got job=%q seen=%q output=%q\n", jobPath, job.SeenSourceIDsFile, job.OutputDir)
|
||||
os.Exit(2)
|
||||
}
|
||||
}
|
||||
if os.Getenv("GO_WANT_SCRIPTCRAWLER_SIMPLE") == "1" {
|
||||
event := map[string]any{
|
||||
"title": "Simple Protocol Video",
|
||||
"media_url": os.Getenv("GO_SCRIPTCRAWLER_MEDIA_URL"),
|
||||
}
|
||||
_ = json.NewEncoder(os.Stdout).Encode(event)
|
||||
os.Exit(0)
|
||||
}
|
||||
if os.Getenv("GO_WANT_SCRIPTCRAWLER_HLS") == "1" {
|
||||
event := Event{
|
||||
Type: "item",
|
||||
Item: Item{
|
||||
SourceID: "hls-source",
|
||||
Title: "HLS Protocol Video",
|
||||
Author: "helper",
|
||||
Media: MediaRef{
|
||||
URL: "https://media.example.test/video.m3u8",
|
||||
Headers: map[string]string{
|
||||
"Referer": "https://example.test/",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
_ = json.NewEncoder(os.Stdout).Encode(event)
|
||||
os.Exit(0)
|
||||
}
|
||||
if os.Getenv("GO_WANT_SCRIPTCRAWLER_DUP_UNIQUE") == "1" {
|
||||
duplicateFile := filepath.Join(job.OutputDir, "duplicate.mp4")
|
||||
if err := os.WriteFile(duplicateFile, []byte(scriptCrawlerDuplicateBytes), 0o644); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(2)
|
||||
}
|
||||
uniqueFile := filepath.Join(job.OutputDir, "unique.mp4")
|
||||
if err := os.WriteFile(uniqueFile, []byte(scriptCrawlerUniqueBytes), 0o644); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(2)
|
||||
}
|
||||
for _, event := range []Event{
|
||||
{
|
||||
Type: "item",
|
||||
Item: Item{
|
||||
SourceID: "dup-source",
|
||||
Title: "Duplicate Candidate",
|
||||
Author: "helper",
|
||||
Media: MediaRef{LocalFile: duplicateFile},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: "item",
|
||||
Item: Item{
|
||||
SourceID: "unique-source",
|
||||
Title: "Unique Candidate",
|
||||
Author: "helper",
|
||||
Media: MediaRef{LocalFile: uniqueFile},
|
||||
},
|
||||
},
|
||||
} {
|
||||
_ = json.NewEncoder(os.Stdout).Encode(event)
|
||||
}
|
||||
os.Exit(0)
|
||||
}
|
||||
localFile := filepath.Join(job.OutputDir, "helper.mp4")
|
||||
if err := os.WriteFile(localFile, []byte("helper-video"), 0o644); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(2)
|
||||
}
|
||||
event := Event{
|
||||
Type: "item",
|
||||
Item: Item{
|
||||
SourceID: "abc-123",
|
||||
Title: "Imported From Helper",
|
||||
Author: "helper",
|
||||
Media: MediaRef{LocalFile: localFile},
|
||||
},
|
||||
}
|
||||
_ = json.NewEncoder(os.Stdout).Encode(event)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
func hasString(values []string, want string) bool {
|
||||
for _, value := range values {
|
||||
if value == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -0,0 +1,213 @@
|
||||
// Package scriptcrawler provides a generic local drive for script-based
|
||||
// crawlers. A crawler script discovers videos; the Go runner downloads them
|
||||
// into this drive and the existing preview/fingerprint workers consume them
|
||||
// through the normal drives.Drive interface.
|
||||
package scriptcrawler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
const Kind = "scriptcrawler"
|
||||
|
||||
type Config struct {
|
||||
ID string
|
||||
RootDir string
|
||||
}
|
||||
|
||||
type Driver struct {
|
||||
id string
|
||||
rootDir string
|
||||
}
|
||||
|
||||
func New(c Config) *Driver {
|
||||
return &Driver{id: c.ID, rootDir: c.RootDir}
|
||||
}
|
||||
|
||||
func (d *Driver) Kind() string { return Kind }
|
||||
|
||||
func (d *Driver) ID() string { return d.id }
|
||||
|
||||
func (d *Driver) RootID() string { return "/" }
|
||||
|
||||
func (d *Driver) Init(context.Context) error {
|
||||
if strings.TrimSpace(d.id) == "" {
|
||||
return errors.New("scriptcrawler: empty drive id")
|
||||
}
|
||||
if strings.TrimSpace(d.rootDir) == "" {
|
||||
return errors.New("scriptcrawler: empty root dir")
|
||||
}
|
||||
for _, sub := range []string{"videos", "thumbs", "output", ".crawl"} {
|
||||
if err := os.MkdirAll(filepath.Join(d.rootDir, sub), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) RootDir() string { return d.rootDir }
|
||||
|
||||
func (d *Driver) VideosDir() string { return filepath.Join(d.rootDir, "videos") }
|
||||
|
||||
func (d *Driver) ThumbsDir() string { return filepath.Join(d.rootDir, "thumbs") }
|
||||
|
||||
func (d *Driver) OutputDir() string { return filepath.Join(d.rootDir, "output") }
|
||||
|
||||
func (d *Driver) CrawlDir() string { return filepath.Join(d.rootDir, ".crawl") }
|
||||
|
||||
func (d *Driver) VideoPath(fileID string) (string, error) {
|
||||
return safeJoin(d.VideosDir(), fileID)
|
||||
}
|
||||
|
||||
func (d *Driver) ThumbPath(fileID string) (string, error) {
|
||||
return safeJoin(d.ThumbsDir(), fileID)
|
||||
}
|
||||
|
||||
func (d *Driver) OutputPath(fileName string) (string, error) {
|
||||
return safeJoin(d.OutputDir(), fileName)
|
||||
}
|
||||
|
||||
func (d *Driver) List(context.Context, string) ([]drives.Entry, error) {
|
||||
entries, err := os.ReadDir(d.VideosDir())
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
out := make([]drives.Entry, 0, len(entries))
|
||||
for _, e := range entries {
|
||||
if e.IsDir() {
|
||||
continue
|
||||
}
|
||||
info, err := e.Info()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
out = append(out, drives.Entry{
|
||||
ID: e.Name(),
|
||||
Name: e.Name(),
|
||||
Size: info.Size(),
|
||||
IsDir: false,
|
||||
ModTime: info.ModTime(),
|
||||
})
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (d *Driver) Stat(ctx context.Context, fileID string) (*drives.Entry, error) {
|
||||
path, err := d.VideoPath(fileID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &drives.Entry{
|
||||
ID: fileID,
|
||||
Name: fileID,
|
||||
Size: info.Size(),
|
||||
IsDir: info.IsDir(),
|
||||
ModTime: info.ModTime(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (d *Driver) StreamURL(ctx context.Context, fileID string) (*drives.StreamLink, error) {
|
||||
path, err := d.VideoPath(fileID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if info.IsDir() || info.Size() == 0 {
|
||||
return nil, os.ErrNotExist
|
||||
}
|
||||
return &drives.StreamLink{
|
||||
URL: path,
|
||||
Expires: time.Now().Add(24 * time.Hour),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (d *Driver) Upload(context.Context, string, string, io.Reader, int64) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
|
||||
func (d *Driver) EnsureDir(context.Context, string) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
videoPath, err := d.VideoPath(fileID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
info, err := os.Stat(videoPath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
removeThumbCandidates(d.ThumbPath, strings.TrimSuffix(fileID, filepath.Ext(fileID)))
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if info.IsDir() {
|
||||
return errors.New("scriptcrawler: refusing to remove directory")
|
||||
}
|
||||
if err := os.Remove(videoPath); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
removeThumbCandidates(d.ThumbPath, strings.TrimSuffix(fileID, filepath.Ext(fileID)))
|
||||
return nil
|
||||
}
|
||||
|
||||
func removeThumbCandidates(pathFor func(string) (string, error), stem string) {
|
||||
stem = strings.TrimSpace(stem)
|
||||
if stem == "" {
|
||||
return
|
||||
}
|
||||
for _, ext := range []string{".jpg", ".jpeg", ".png", ".webp"} {
|
||||
path, err := pathFor(stem + ext)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
_ = os.Remove(path)
|
||||
}
|
||||
}
|
||||
|
||||
func safeJoin(root, fileID string) (string, error) {
|
||||
id := strings.TrimSpace(fileID)
|
||||
if id == "" || filepath.Base(id) != id {
|
||||
return "", errors.New("scriptcrawler: invalid file id")
|
||||
}
|
||||
if strings.TrimSpace(root) == "" {
|
||||
return "", errors.New("scriptcrawler: empty root")
|
||||
}
|
||||
rootAbs, err := filepath.Abs(root)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
pathAbs, err := filepath.Abs(filepath.Join(rootAbs, id))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if pathAbs != rootAbs && !strings.HasPrefix(pathAbs, rootAbs+string(os.PathSeparator)) {
|
||||
return "", errors.New("scriptcrawler: file id escapes root")
|
||||
}
|
||||
return pathAbs, nil
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*Driver)(nil)
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
@@ -0,0 +1,405 @@
|
||||
package scriptcrawler
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
|
||||
// DryRun 在不入库的前提下试跑一个爬虫脚本:临时目录里生成 job.json,
|
||||
// 启动脚本进程,拿到第一条(或前 MaxItems 条)item 事件后立即停止,
|
||||
// 再对视频直链做一次小范围探测,验证脚本"能不能爬取到视频"。
|
||||
// 用于后台导入脚本后的"测试脚本"按钮。
|
||||
|
||||
const (
|
||||
defaultDryRunTimeout = 2 * time.Minute
|
||||
dryRunLogTailLines = 60
|
||||
dryRunMediaProbeLimit = 20 * time.Second
|
||||
dryRunStopGrace = 100 * time.Millisecond
|
||||
)
|
||||
|
||||
type DryRunConfig struct {
|
||||
PythonPath string
|
||||
ScriptPath string
|
||||
ProxyURL string
|
||||
ConfigJSON string
|
||||
// MaxItems 收到多少条 item 后停止脚本,默认 1。
|
||||
MaxItems int
|
||||
// Timeout 整个试跑的硬上限,默认 2 分钟。
|
||||
Timeout time.Duration
|
||||
// SkipMediaProbe 跳过视频直链可达性探测(单测注入用)。
|
||||
SkipMediaProbe bool
|
||||
HTTPClient *http.Client
|
||||
}
|
||||
|
||||
type DryRunItem struct {
|
||||
Title string `json:"title"`
|
||||
SourceID string `json:"sourceId,omitempty"`
|
||||
MediaURL string `json:"mediaUrl,omitempty"`
|
||||
MediaLocalFile string `json:"mediaLocalFile,omitempty"`
|
||||
ThumbnailURL string `json:"thumbnailUrl,omitempty"`
|
||||
DetailURL string `json:"detailUrl,omitempty"`
|
||||
}
|
||||
|
||||
type DryRunMediaCheck struct {
|
||||
OK bool `json:"ok"`
|
||||
Status int `json:"status,omitempty"`
|
||||
ContentType string `json:"contentType,omitempty"`
|
||||
ContentLength int64 `json:"contentLengthBytes,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
type DryRunResult struct {
|
||||
OK bool `json:"ok"`
|
||||
Items []DryRunItem `json:"items"`
|
||||
MediaCheck *DryRunMediaCheck `json:"mediaCheck,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Log []string `json:"log,omitempty"`
|
||||
DurationMs int64 `json:"durationMs"`
|
||||
}
|
||||
|
||||
type dryRunLogTail struct {
|
||||
mu sync.Mutex
|
||||
lines []string
|
||||
partial string
|
||||
}
|
||||
|
||||
func newDryRunLogTail() *dryRunLogTail {
|
||||
return &dryRunLogTail{lines: make([]string, 0, dryRunLogTailLines)}
|
||||
}
|
||||
|
||||
func (t *dryRunLogTail) Write(p []byte) (int, error) {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
|
||||
chunk := strings.ReplaceAll(string(p), "\r\n", "\n")
|
||||
parts := strings.Split(t.partial+chunk, "\n")
|
||||
t.partial = parts[len(parts)-1]
|
||||
for _, line := range parts[:len(parts)-1] {
|
||||
t.appendLocked(line)
|
||||
}
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
func (t *dryRunLogTail) snapshot() []string {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
|
||||
lines := append([]string{}, t.lines...)
|
||||
if partial := strings.TrimSpace(t.partial); partial != "" {
|
||||
lines = appendDryRunLogLine(lines, partial)
|
||||
}
|
||||
return lines
|
||||
}
|
||||
|
||||
func (t *dryRunLogTail) appendLocked(line string) {
|
||||
t.lines = appendDryRunLogLine(t.lines, line)
|
||||
}
|
||||
|
||||
func appendDryRunLogLine(lines []string, line string) []string {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
return lines
|
||||
}
|
||||
if len(lines) >= dryRunLogTailLines {
|
||||
lines = lines[1:]
|
||||
}
|
||||
return append(lines, line)
|
||||
}
|
||||
|
||||
func DryRun(ctx context.Context, cfg DryRunConfig) *DryRunResult {
|
||||
started := time.Now()
|
||||
result := &DryRunResult{Items: []DryRunItem{}}
|
||||
defer func() { result.DurationMs = time.Since(started).Milliseconds() }()
|
||||
|
||||
scriptPath := strings.TrimSpace(cfg.ScriptPath)
|
||||
if scriptPath == "" {
|
||||
result.Error = "脚本路径为空,请先导入脚本"
|
||||
return result
|
||||
}
|
||||
if _, err := os.Stat(scriptPath); err != nil {
|
||||
result.Error = fmt.Sprintf("脚本不存在: %v", err)
|
||||
return result
|
||||
}
|
||||
pythonPath := strings.TrimSpace(cfg.PythonPath)
|
||||
if pythonPath == "" {
|
||||
pythonPath = "python3"
|
||||
}
|
||||
maxItems := cfg.MaxItems
|
||||
if maxItems <= 0 {
|
||||
maxItems = 1
|
||||
}
|
||||
timeout := cfg.Timeout
|
||||
if timeout <= 0 {
|
||||
timeout = defaultDryRunTimeout
|
||||
}
|
||||
|
||||
tmpDir, err := os.MkdirTemp("", "crawler-dryrun-")
|
||||
if err != nil {
|
||||
result.Error = fmt.Sprintf("创建临时目录失败: %v", err)
|
||||
return result
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
outputDir := filepath.Join(tmpDir, "output")
|
||||
if err := os.MkdirAll(outputDir, 0o755); err != nil {
|
||||
result.Error = fmt.Sprintf("创建输出目录失败: %v", err)
|
||||
return result
|
||||
}
|
||||
seenPath := filepath.Join(tmpDir, "seen.txt")
|
||||
if err := os.WriteFile(seenPath, nil, 0o644); err != nil {
|
||||
result.Error = fmt.Sprintf("写入 seen 文件失败: %v", err)
|
||||
return result
|
||||
}
|
||||
|
||||
configJSON := json.RawMessage([]byte("{}"))
|
||||
if raw := strings.TrimSpace(cfg.ConfigJSON); raw != "" {
|
||||
if !json.Valid([]byte(raw)) {
|
||||
result.Error = "自定义配置必须是合法 JSON"
|
||||
return result
|
||||
}
|
||||
configJSON = json.RawMessage(raw)
|
||||
}
|
||||
job := Job{
|
||||
Protocol: "crawler.v1",
|
||||
Mode: "crawl",
|
||||
RunID: "dryrun-" + started.UTC().Format("20060102T150405Z"),
|
||||
CrawlerID: "dryrun",
|
||||
TargetNew: maxItems,
|
||||
SeenSourceIDsFile: seenPath,
|
||||
OutputDir: outputDir,
|
||||
Config: configJSON,
|
||||
Network: JobNetwork{ProxyURL: strings.TrimSpace(cfg.ProxyURL)},
|
||||
}
|
||||
jobPath := filepath.Join(tmpDir, "job.json")
|
||||
jobData, err := json.MarshalIndent(job, "", " ")
|
||||
if err != nil {
|
||||
result.Error = fmt.Sprintf("生成 job 文件失败: %v", err)
|
||||
return result
|
||||
}
|
||||
if err := os.WriteFile(jobPath, jobData, 0o600); err != nil {
|
||||
result.Error = fmt.Sprintf("写入 job 文件失败: %v", err)
|
||||
return result
|
||||
}
|
||||
|
||||
runCtx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
|
||||
cmd := exec.CommandContext(runCtx, pythonPath, scriptPath, "--job", jobPath)
|
||||
cmd.Dir = filepath.Dir(scriptPath)
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
|
||||
cmd.Cancel = func() error {
|
||||
return killDryRunProcess(cmd)
|
||||
}
|
||||
// 超时或提前 kill 后,脚本派生的子进程可能仍持有 stdout/stderr 管道;
|
||||
// WaitDelay 强制在宽限期后关闭管道,避免读取端永久阻塞。
|
||||
cmd.WaitDelay = 3 * time.Second
|
||||
if proxyURL := strings.TrimSpace(cfg.ProxyURL); proxyURL != "" {
|
||||
cmd.Env = append(os.Environ(),
|
||||
"HTTP_PROXY="+proxyURL,
|
||||
"HTTPS_PROXY="+proxyURL,
|
||||
"http_proxy="+proxyURL,
|
||||
"https_proxy="+proxyURL,
|
||||
"NO_PROXY=",
|
||||
"no_proxy=",
|
||||
)
|
||||
}
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
result.Error = fmt.Sprintf("启动脚本失败: %v", err)
|
||||
return result
|
||||
}
|
||||
logTail := newDryRunLogTail()
|
||||
cmd.Stderr = logTail
|
||||
if err := cmd.Start(); err != nil {
|
||||
_ = stdout.Close()
|
||||
result.Error = fmt.Sprintf("启动脚本失败: %v", err)
|
||||
return result
|
||||
}
|
||||
|
||||
items := []DryRunItem{}
|
||||
var firstMediaHeaders map[string]string
|
||||
parseFailures := 0
|
||||
scanner := bufio.NewScanner(stdout)
|
||||
scanner.Buffer(make([]byte, 64*1024), 4*1024*1024)
|
||||
for scanner.Scan() {
|
||||
if runCtx.Err() != nil {
|
||||
break
|
||||
}
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
var event Event
|
||||
if err := json.Unmarshal([]byte(line), &event); err != nil {
|
||||
parseFailures++
|
||||
continue
|
||||
}
|
||||
eventType := strings.ToLower(strings.TrimSpace(event.Type))
|
||||
item := event.normalizedItem()
|
||||
if eventType == "" && item.hasPayload() {
|
||||
eventType = "item"
|
||||
}
|
||||
if eventType != "item" {
|
||||
continue
|
||||
}
|
||||
normalized, _, err := normalizeItemForImport(item)
|
||||
if err != nil {
|
||||
result.Error = fmt.Sprintf("item 字段不完整: %v", err)
|
||||
continue
|
||||
}
|
||||
mediaURL := strings.TrimSpace(normalized.Media.URL)
|
||||
if len(items) == 0 {
|
||||
firstMediaHeaders = normalized.Media.Headers
|
||||
}
|
||||
items = append(items, DryRunItem{
|
||||
Title: strings.TrimSpace(normalized.Title),
|
||||
SourceID: strings.TrimSpace(item.SourceID),
|
||||
MediaURL: mediaURL,
|
||||
MediaLocalFile: strings.TrimSpace(normalized.Media.LocalFile),
|
||||
ThumbnailURL: strings.TrimSpace(normalized.Thumbnail.URL),
|
||||
DetailURL: strings.TrimSpace(normalized.DetailURL),
|
||||
})
|
||||
if len(items) >= maxItems {
|
||||
break
|
||||
}
|
||||
}
|
||||
// 拿够了就停掉脚本,避免它继续翻页。给已经自然结束的脚本一个很短
|
||||
// 的宽限期,让 stderr 日志先被管道读完,避免 dry-run 回显偶发为空。
|
||||
waitDone := make(chan struct{})
|
||||
go func() {
|
||||
_ = cmd.Wait()
|
||||
close(waitDone)
|
||||
}()
|
||||
select {
|
||||
case <-waitDone:
|
||||
case <-time.After(dryRunStopGrace):
|
||||
_ = killDryRunProcess(cmd)
|
||||
<-waitDone
|
||||
}
|
||||
|
||||
result.Log = logTail.snapshot()
|
||||
result.Items = items
|
||||
|
||||
if len(items) == 0 {
|
||||
if result.Error == "" {
|
||||
switch {
|
||||
case runCtx.Err() != nil && ctx.Err() == nil:
|
||||
result.Error = fmt.Sprintf("测试超时(%s),脚本没有输出任何视频", timeout)
|
||||
case parseFailures > 0:
|
||||
result.Error = "脚本 stdout 不是合法的 crawler.v1 JSON Lines(日志应输出到 stderr)"
|
||||
default:
|
||||
result.Error = "脚本退出但没有输出任何视频"
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
result.Error = ""
|
||||
|
||||
first := items[0]
|
||||
switch {
|
||||
case cfg.SkipMediaProbe:
|
||||
result.OK = true
|
||||
case first.MediaLocalFile != "":
|
||||
// 脚本自己下载到 output_dir 的模式:试跑用的是临时目录,
|
||||
// 文件已随目录清理,能输出合法 local_file 即视为通过。
|
||||
result.OK = true
|
||||
default:
|
||||
check := probeMediaURL(ctx, cfg, first, firstMediaHeaders)
|
||||
result.MediaCheck = check
|
||||
result.OK = check.OK
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func killDryRunProcess(cmd *exec.Cmd) error {
|
||||
if cmd == nil || cmd.Process == nil {
|
||||
return nil
|
||||
}
|
||||
if err := syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL); err != nil {
|
||||
if err == syscall.ESRCH {
|
||||
return nil
|
||||
}
|
||||
return cmd.Process.Kill()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// probeMediaURL 对视频直链发一个 Range: bytes=0-0 的小请求,
|
||||
// 验证直链可达(带上脚本给的防盗链 headers 和代理)。
|
||||
func probeMediaURL(ctx context.Context, cfg DryRunConfig, item DryRunItem, mediaHeaders map[string]string) *DryRunMediaCheck {
|
||||
check := &DryRunMediaCheck{}
|
||||
if item.MediaURL == "" {
|
||||
check.Error = "item 没有视频直链"
|
||||
return check
|
||||
}
|
||||
|
||||
client := cfg.HTTPClient
|
||||
if client == nil {
|
||||
transport := &http.Transport{
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
ResponseHeaderTimeout: dryRunMediaProbeLimit,
|
||||
}
|
||||
if err := configureExplicitProxy(transport, cfg.ProxyURL); err != nil {
|
||||
check.Error = fmt.Sprintf("代理配置无效: %v", err)
|
||||
return check
|
||||
}
|
||||
client = &http.Client{Transport: transport}
|
||||
}
|
||||
|
||||
probeCtx, cancel := context.WithTimeout(ctx, dryRunMediaProbeLimit)
|
||||
defer cancel()
|
||||
req, err := http.NewRequestWithContext(probeCtx, http.MethodGet, item.MediaURL, nil)
|
||||
if err != nil {
|
||||
check.Error = fmt.Sprintf("视频直链无效: %v", err)
|
||||
return check
|
||||
}
|
||||
req.Header.Set("User-Agent", defaultUserAgent)
|
||||
req.Header.Set("Range", "bytes=0-0")
|
||||
if item.DetailURL != "" {
|
||||
req.Header.Set("Referer", item.DetailURL)
|
||||
}
|
||||
for k, v := range mediaHeaders {
|
||||
k = strings.TrimSpace(k)
|
||||
if k == "" {
|
||||
continue
|
||||
}
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
check.Error = fmt.Sprintf("视频直链请求失败: %v", err)
|
||||
return check
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
check.Status = resp.StatusCode
|
||||
check.ContentType = resp.Header.Get("Content-Type")
|
||||
if cr := resp.Header.Get("Content-Range"); cr != "" {
|
||||
// Content-Range: bytes 0-0/12345 → 取总大小
|
||||
if idx := strings.LastIndex(cr, "/"); idx >= 0 {
|
||||
var total int64
|
||||
if _, err := fmt.Sscanf(cr[idx+1:], "%d", &total); err == nil {
|
||||
check.ContentLength = total
|
||||
}
|
||||
}
|
||||
}
|
||||
if check.ContentLength == 0 && resp.StatusCode == http.StatusOK {
|
||||
check.ContentLength = resp.ContentLength
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusPartialContent {
|
||||
check.Error = fmt.Sprintf("视频直链返回 HTTP %d", resp.StatusCode)
|
||||
return check
|
||||
}
|
||||
check.OK = true
|
||||
return check
|
||||
}
|
||||
@@ -0,0 +1,176 @@
|
||||
package scriptcrawler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func writeDryRunScript(t *testing.T, body string) string {
|
||||
t.Helper()
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "crawler.sh")
|
||||
if err := os.WriteFile(path, []byte("#!/bin/sh\n"+body), 0o755); err != nil {
|
||||
t.Fatalf("write script: %v", err)
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
func TestDryRunCollectsFirstItem(t *testing.T) {
|
||||
script := writeDryRunScript(t, `
|
||||
echo '[log] fetching list page' >&2
|
||||
echo '{"type":"item","item":{"title":"Test Video","media_url":"https://cdn.example.test/v.mp4","source_id":"123","thumbnail_url":"https://cdn.example.test/t.jpg"}}'
|
||||
echo '{"type":"done","stats":{"emitted":1}}'
|
||||
`)
|
||||
result := DryRun(context.Background(), DryRunConfig{
|
||||
PythonPath: "/bin/sh",
|
||||
ScriptPath: script,
|
||||
SkipMediaProbe: true,
|
||||
})
|
||||
if !result.OK {
|
||||
t.Fatalf("ok = false, error = %q, log = %v", result.Error, result.Log)
|
||||
}
|
||||
if len(result.Items) != 1 {
|
||||
t.Fatalf("items = %d, want 1", len(result.Items))
|
||||
}
|
||||
item := result.Items[0]
|
||||
if item.Title != "Test Video" || item.MediaURL != "https://cdn.example.test/v.mp4" || item.SourceID != "123" {
|
||||
t.Fatalf("item = %+v", item)
|
||||
}
|
||||
if len(result.Log) == 0 || !strings.Contains(result.Log[0], "fetching list page") {
|
||||
t.Fatalf("log tail = %v, want stderr captured", result.Log)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDryRunCapturesStderrWhenStoppingAfterFirstItem(t *testing.T) {
|
||||
script := writeDryRunScript(t, `
|
||||
echo '[log] first item ready' >&2
|
||||
echo '{"type":"item","item":{"title":"Early Stop Video","media_url":"https://cdn.example.test/v.mp4","source_id":"early-stop"}}'
|
||||
sleep 30
|
||||
`)
|
||||
start := time.Now()
|
||||
result := DryRun(context.Background(), DryRunConfig{
|
||||
PythonPath: "/bin/sh",
|
||||
ScriptPath: script,
|
||||
SkipMediaProbe: true,
|
||||
})
|
||||
if !result.OK {
|
||||
t.Fatalf("ok = false, error = %q, log = %v", result.Error, result.Log)
|
||||
}
|
||||
if elapsed := time.Since(start); elapsed > 5*time.Second {
|
||||
t.Fatalf("dry run took %s, script was not stopped after first item", elapsed)
|
||||
}
|
||||
if len(result.Log) == 0 || !strings.Contains(result.Log[0], "first item ready") {
|
||||
t.Fatalf("log tail = %v, want stderr captured before early stop", result.Log)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDryRunProbesMediaURL(t *testing.T) {
|
||||
var gotRange, gotReferer string
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
gotRange = r.Header.Get("Range")
|
||||
gotReferer = r.Header.Get("Referer")
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
w.Header().Set("Content-Range", "bytes 0-0/4096")
|
||||
w.WriteHeader(http.StatusPartialContent)
|
||||
_, _ = w.Write([]byte("x"))
|
||||
}))
|
||||
t.Cleanup(srv.Close)
|
||||
|
||||
script := writeDryRunScript(t, fmt.Sprintf(
|
||||
`echo '{"type":"item","title":"Probe Video","media_url":"%s/v.mp4","detail_url":"https://example.test/view"}'`,
|
||||
srv.URL,
|
||||
))
|
||||
result := DryRun(context.Background(), DryRunConfig{
|
||||
PythonPath: "/bin/sh",
|
||||
ScriptPath: script,
|
||||
})
|
||||
if !result.OK {
|
||||
t.Fatalf("ok = false, error = %q, mediaCheck = %+v", result.Error, result.MediaCheck)
|
||||
}
|
||||
if result.MediaCheck == nil || !result.MediaCheck.OK {
|
||||
t.Fatalf("mediaCheck = %+v, want ok", result.MediaCheck)
|
||||
}
|
||||
if result.MediaCheck.Status != http.StatusPartialContent || result.MediaCheck.ContentLength != 4096 {
|
||||
t.Fatalf("mediaCheck = %+v, want 206 with total 4096", result.MediaCheck)
|
||||
}
|
||||
if gotRange != "bytes=0-0" || gotReferer != "https://example.test/view" {
|
||||
t.Fatalf("probe headers range=%q referer=%q", gotRange, gotReferer)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDryRunReportsBrokenMediaURL(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
http.Error(w, "forbidden", http.StatusForbidden)
|
||||
}))
|
||||
t.Cleanup(srv.Close)
|
||||
|
||||
script := writeDryRunScript(t, fmt.Sprintf(
|
||||
`echo '{"type":"item","title":"Dead Link","media_url":"%s/v.mp4"}'`,
|
||||
srv.URL,
|
||||
))
|
||||
result := DryRun(context.Background(), DryRunConfig{
|
||||
PythonPath: "/bin/sh",
|
||||
ScriptPath: script,
|
||||
})
|
||||
if result.OK {
|
||||
t.Fatal("ok = true, want false for HTTP 403 media url")
|
||||
}
|
||||
if result.MediaCheck == nil || result.MediaCheck.OK || result.MediaCheck.Status != http.StatusForbidden {
|
||||
t.Fatalf("mediaCheck = %+v, want failed 403", result.MediaCheck)
|
||||
}
|
||||
if len(result.Items) != 1 {
|
||||
t.Fatalf("items = %d, want item still returned for debugging", len(result.Items))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDryRunRejectsNonJSONStdout(t *testing.T) {
|
||||
script := writeDryRunScript(t, `echo 'plain text progress output'`)
|
||||
result := DryRun(context.Background(), DryRunConfig{
|
||||
PythonPath: "/bin/sh",
|
||||
ScriptPath: script,
|
||||
SkipMediaProbe: true,
|
||||
})
|
||||
if result.OK {
|
||||
t.Fatal("ok = true, want false for non-JSON stdout")
|
||||
}
|
||||
if !strings.Contains(result.Error, "JSON Lines") {
|
||||
t.Fatalf("error = %q, want JSON Lines hint", result.Error)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDryRunTimesOut(t *testing.T) {
|
||||
script := writeDryRunScript(t, `sleep 30`)
|
||||
start := time.Now()
|
||||
result := DryRun(context.Background(), DryRunConfig{
|
||||
PythonPath: "/bin/sh",
|
||||
ScriptPath: script,
|
||||
Timeout: 2 * time.Second,
|
||||
SkipMediaProbe: true,
|
||||
})
|
||||
if result.OK {
|
||||
t.Fatal("ok = true, want false on timeout")
|
||||
}
|
||||
if !strings.Contains(result.Error, "超时") {
|
||||
t.Fatalf("error = %q, want timeout message", result.Error)
|
||||
}
|
||||
if elapsed := time.Since(start); elapsed > 10*time.Second {
|
||||
t.Fatalf("dry run took %s, script was not killed", elapsed)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDryRunMissingScript(t *testing.T) {
|
||||
result := DryRun(context.Background(), DryRunConfig{
|
||||
PythonPath: "/bin/sh",
|
||||
ScriptPath: filepath.Join(t.TempDir(), "missing.py"),
|
||||
})
|
||||
if result.OK || result.Error == "" {
|
||||
t.Fatalf("result = %+v, want error for missing script", result)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
package scriptcrawler
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const maxCrawlerNameRunes = 80
|
||||
|
||||
type Metadata struct {
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
func ReadMetadata(scriptPath string) (Metadata, error) {
|
||||
scriptPath = strings.TrimSpace(scriptPath)
|
||||
if scriptPath == "" {
|
||||
return Metadata{}, errors.New("脚本路径为空")
|
||||
}
|
||||
if filepath.Ext(scriptPath) != ".py" {
|
||||
return Metadata{}, errors.New("目前只支持 .py 爬虫脚本")
|
||||
}
|
||||
data, err := os.ReadFile(scriptPath)
|
||||
if err != nil {
|
||||
return Metadata{}, fmt.Errorf("读取脚本失败: %w", err)
|
||||
}
|
||||
return ExtractMetadata(string(data))
|
||||
}
|
||||
|
||||
func ExtractMetadata(source string) (Metadata, error) {
|
||||
for _, line := range strings.Split(source, "\n") {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
if trimmed == "" || strings.HasPrefix(trimmed, "#") {
|
||||
continue
|
||||
}
|
||||
if !strings.HasPrefix(trimmed, "CRAWLER_NAME") {
|
||||
continue
|
||||
}
|
||||
left, right, ok := strings.Cut(trimmed, "=")
|
||||
if !ok || strings.TrimSpace(left) != "CRAWLER_NAME" {
|
||||
continue
|
||||
}
|
||||
name, ok := parsePythonStringLiteral(right)
|
||||
if !ok {
|
||||
return Metadata{}, errors.New(`CRAWLER_NAME 必须是字符串字面量,例如 CRAWLER_NAME = "示例爬虫"`)
|
||||
}
|
||||
name = strings.TrimSpace(name)
|
||||
if name == "" {
|
||||
return Metadata{}, errors.New("CRAWLER_NAME 不能为空")
|
||||
}
|
||||
if len([]rune(name)) > maxCrawlerNameRunes {
|
||||
return Metadata{}, fmt.Errorf("CRAWLER_NAME 不能超过 %d 个字符", maxCrawlerNameRunes)
|
||||
}
|
||||
return Metadata{Name: name}, nil
|
||||
}
|
||||
return Metadata{}, errors.New(`脚本必须声明 CRAWLER_NAME,例如 CRAWLER_NAME = "示例爬虫"`)
|
||||
}
|
||||
|
||||
func parsePythonStringLiteral(raw string) (string, bool) {
|
||||
s := strings.TrimSpace(raw)
|
||||
if s == "" {
|
||||
return "", false
|
||||
}
|
||||
rawString := false
|
||||
for len(s) > 0 {
|
||||
switch s[0] {
|
||||
case 'r', 'R':
|
||||
rawString = true
|
||||
s = strings.TrimSpace(s[1:])
|
||||
case 'u', 'U', 'b', 'B':
|
||||
s = strings.TrimSpace(s[1:])
|
||||
default:
|
||||
goto parseQuote
|
||||
}
|
||||
}
|
||||
|
||||
parseQuote:
|
||||
if len(s) < 2 || (s[0] != '"' && s[0] != '\'') {
|
||||
return "", false
|
||||
}
|
||||
quote := s[0]
|
||||
var b strings.Builder
|
||||
escaped := false
|
||||
for i := 1; i < len(s); i++ {
|
||||
ch := s[i]
|
||||
if escaped {
|
||||
switch {
|
||||
case rawString:
|
||||
b.WriteByte('\\')
|
||||
b.WriteByte(ch)
|
||||
case ch == 'n':
|
||||
b.WriteByte('\n')
|
||||
case ch == 'r':
|
||||
b.WriteByte('\r')
|
||||
case ch == 't':
|
||||
b.WriteByte('\t')
|
||||
case ch == '\\' || ch == quote || ch == '"' || ch == '\'':
|
||||
b.WriteByte(ch)
|
||||
default:
|
||||
b.WriteByte(ch)
|
||||
}
|
||||
escaped = false
|
||||
continue
|
||||
}
|
||||
if ch == '\\' {
|
||||
escaped = true
|
||||
continue
|
||||
}
|
||||
if ch == quote {
|
||||
return b.String(), true
|
||||
}
|
||||
b.WriteByte(ch)
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
package scriptcrawler
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestExtractMetadataReadsCrawlerName(t *testing.T) {
|
||||
meta, err := ExtractMetadata(`
|
||||
# comment
|
||||
CRAWLER_NAME = "示例爬虫"
|
||||
`)
|
||||
if err != nil {
|
||||
t.Fatalf("extract metadata: %v", err)
|
||||
}
|
||||
if meta.Name != "示例爬虫" {
|
||||
t.Fatalf("name = %q", meta.Name)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractMetadataRejectsMissingCrawlerName(t *testing.T) {
|
||||
_, err := ExtractMetadata(`print("hello")`)
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "CRAWLER_NAME") {
|
||||
t.Fatalf("error = %v, want CRAWLER_NAME guidance", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractMetadataRejectsEmptyCrawlerName(t *testing.T) {
|
||||
_, err := ExtractMetadata(`CRAWLER_NAME = " "`)
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "不能为空") {
|
||||
t.Fatalf("error = %v, want empty-name error", err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
package scriptcrawler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/mediaasset"
|
||||
"github.com/video-site/backend/internal/mediasim"
|
||||
)
|
||||
|
||||
const (
|
||||
nearDuplicateTitleThreshold = 0.90
|
||||
nearDuplicateSSIMThreshold = 0.95
|
||||
nearDuplicateDurationToleranceSeconds = 2
|
||||
nearDuplicateCandidateLimit = 200
|
||||
)
|
||||
|
||||
type nearDuplicateMatch struct {
|
||||
video *catalog.Video
|
||||
titleSimilarity float64
|
||||
thumbnailSSIM float64
|
||||
}
|
||||
|
||||
func (c *Crawler) findNearDuplicateVideo(ctx context.Context, source *catalog.Video, sourceThumbPath string) (*nearDuplicateMatch, error) {
|
||||
if c == nil || c.cfg.Catalog == nil || source == nil {
|
||||
return nil, nil
|
||||
}
|
||||
sourceThumbPath = strings.TrimSpace(sourceThumbPath)
|
||||
commonThumbDir := strings.TrimSpace(c.cfg.CommonThumbDir)
|
||||
if sourceThumbPath == "" || commonThumbDir == "" || strings.TrimSpace(source.Title) == "" || source.DurationSeconds <= 0 {
|
||||
return nil, nil
|
||||
}
|
||||
if _, err := os.Stat(sourceThumbPath); err != nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
candidates, err := c.cfg.Catalog.ListNearDuplicateVideoCandidates(ctx, source, nearDuplicateDurationToleranceSeconds, nearDuplicateCandidateLimit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, candidate := range candidates {
|
||||
if candidate == nil || candidate.ID == source.ID {
|
||||
continue
|
||||
}
|
||||
titleScore := mediasim.TitleSimilarity(source.Title, candidate.Title)
|
||||
if titleScore < nearDuplicateTitleThreshold {
|
||||
continue
|
||||
}
|
||||
candidateThumbPath := mediaasset.ThumbnailPathInDir(commonThumbDir, candidate.ID)
|
||||
if _, err := os.Stat(candidateThumbPath); err != nil {
|
||||
continue
|
||||
}
|
||||
ssimScore, err := mediasim.ImageSSIM(sourceThumbPath, candidateThumbPath)
|
||||
if err != nil {
|
||||
log.Printf("[scriptcrawler] drive=%s source_id=%s candidate=%s thumbnail ssim failed: %v", c.cfg.Driver.ID(), source.ID, candidate.ID, err)
|
||||
continue
|
||||
}
|
||||
if ssimScore >= nearDuplicateSSIMThreshold {
|
||||
return &nearDuplicateMatch{
|
||||
video: candidate,
|
||||
titleSimilarity: titleScore,
|
||||
thumbnailSSIM: ssimScore,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,787 +0,0 @@
|
||||
package spider91
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
)
|
||||
|
||||
// TestCrawlerRunOnceFullFlow 用一个伪 python 脚本 + httptest 服务器
|
||||
// 把 Crawler.RunOnce 的完整流程跑一遍:脚本生成 JSON、下载视频和封面、入库、
|
||||
// 重复运行跳过已存在的 91 源视频 ID。
|
||||
func TestCrawlerRunOnceFullFlow(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-based fake script only on unix")
|
||||
}
|
||||
|
||||
tmp := t.TempDir()
|
||||
|
||||
// 1. 假 HTTP 服务器:根据路径返回视频数据或封面数据
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case strings.Contains(r.URL.Path, "120001.mp4"):
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("FAKEVIDEO1"))
|
||||
case strings.Contains(r.URL.Path, "120002.mp4"):
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("FAKEVIDEO2BYTES"))
|
||||
case strings.Contains(r.URL.Path, "/thumb/120001.jpg"):
|
||||
w.Header().Set("Content-Type", "image/jpeg")
|
||||
_, _ = w.Write([]byte("\xff\xd8\xff\xe0fakejpg1"))
|
||||
case strings.Contains(r.URL.Path, "/thumb/120002.jpg"):
|
||||
w.Header().Set("Content-Type", "image/jpeg")
|
||||
_, _ = w.Write([]byte("\xff\xd8\xff\xe0fakejpg2"))
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
// 2. 假 python 脚本:解析 --output / --stream-output 参数,
|
||||
// 在 stream 模式下逐行 echo 每条视频的 JSON 到 stdout(模拟 Python 端 stream),
|
||||
// 同时仍写 --output 文件作归档。
|
||||
videoEntries := []map[string]string{
|
||||
{
|
||||
"title": "Video One 口交",
|
||||
"thumb_url": srv.URL + "/thumb/not-120001.jpg",
|
||||
"video_url": srv.URL + "/videos/120001.mp4",
|
||||
"viewkey": "vk-001",
|
||||
"detail_url": srv.URL + "/v.php?viewkey=vk-001",
|
||||
},
|
||||
{
|
||||
"title": "Video Two",
|
||||
"thumb_url": srv.URL + "/thumb/not-120002.jpg",
|
||||
"video_url": srv.URL + "/videos/120002.mp4",
|
||||
"viewkey": "vk-002",
|
||||
"detail_url": srv.URL + "/v.php?viewkey=vk-002",
|
||||
},
|
||||
}
|
||||
scriptPath := filepath.Join(tmp, "fake_spider.sh")
|
||||
scriptBody := buildFakeSpiderScript(videoEntries)
|
||||
if err := os.WriteFile(scriptPath, []byte(scriptBody), 0o755); err != nil {
|
||||
t.Fatalf("write script: %v", err)
|
||||
}
|
||||
|
||||
// 3. 准备 catalog + driver + crawler
|
||||
dbPath := filepath.Join(tmp, "test.db")
|
||||
cat, err := catalog.Open(dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
defer cat.Close()
|
||||
|
||||
driveID := "spider91-test"
|
||||
rootDir := filepath.Join(tmp, "spider91", driveID)
|
||||
commonThumbs := filepath.Join(tmp, "previews", "thumbs")
|
||||
drv := New(Config{ID: driveID, RootDir: rootDir})
|
||||
|
||||
// 把 drive 也写入 catalog(Crawler 不直接读,但 main 真实流程会写)
|
||||
if err := cat.UpsertDrive(context.Background(), &catalog.Drive{
|
||||
ID: driveID,
|
||||
Kind: Kind,
|
||||
Name: "test crawler",
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert drive: %v", err)
|
||||
}
|
||||
if _, err := cat.CreateTagAndClassify(context.Background(), "Video One", nil, "user"); err != nil {
|
||||
t.Fatalf("create user tag: %v", err)
|
||||
}
|
||||
|
||||
var newVideos []*catalog.Video
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: "sh",
|
||||
ScriptPath: scriptPath,
|
||||
CommonThumbDir: commonThumbs,
|
||||
SpiderTimeout: 10 * time.Second,
|
||||
DownloadTimeout: 10 * time.Second,
|
||||
OnNewVideo: func(v *catalog.Video) {
|
||||
newVideos = append(newVideos, v)
|
||||
},
|
||||
})
|
||||
|
||||
// 4. 第一次 RunOnce:应该新入库 2 条
|
||||
res, err := c.RunOnce(context.Background(), 15)
|
||||
if err != nil {
|
||||
t.Fatalf("RunOnce: %v", err)
|
||||
}
|
||||
if res.NewVideos != 2 || res.Skipped != 0 || res.Failed != 0 {
|
||||
t.Fatalf("first run result: new=%d skipped=%d failed=%d, want 2/0/0",
|
||||
res.NewVideos, res.Skipped, res.Failed)
|
||||
}
|
||||
if res.TargetNew != 15 {
|
||||
t.Fatalf("first run TargetNew = %d, want 15", res.TargetNew)
|
||||
}
|
||||
if res.SeenSnapshot != 0 {
|
||||
t.Fatalf("first run SeenSnapshot = %d, want 0 (catalog empty before first run)", res.SeenSnapshot)
|
||||
}
|
||||
if len(newVideos) != 2 {
|
||||
t.Fatalf("OnNewVideo called %d times, want 2", len(newVideos))
|
||||
}
|
||||
|
||||
// 5. 检查文件落盘
|
||||
for _, item := range []struct {
|
||||
sourceID string
|
||||
size int64
|
||||
}{
|
||||
{"120001", 10},
|
||||
{"120002", 15},
|
||||
} {
|
||||
videoPath := filepath.Join(rootDir, "videos", item.sourceID+".mp4")
|
||||
info, err := os.Stat(videoPath)
|
||||
if err != nil {
|
||||
t.Fatalf("video %s missing: %v", item.sourceID, err)
|
||||
}
|
||||
if info.Size() != item.size {
|
||||
t.Fatalf("video %s size = %d, want %d", item.sourceID, info.Size(), item.size)
|
||||
}
|
||||
|
||||
thumbPath := filepath.Join(rootDir, "thumbs", item.sourceID+".jpg")
|
||||
if _, err := os.Stat(thumbPath); err != nil {
|
||||
t.Fatalf("thumb %s missing: %v", item.sourceID, err)
|
||||
}
|
||||
|
||||
// 复制到 common thumbs 目录的副本,名字按 videoID 来
|
||||
videoID := BuildVideoID(driveID, item.sourceID)
|
||||
commonThumb := filepath.Join(commonThumbs, videoID+".jpg")
|
||||
if _, err := os.Stat(commonThumb); err != nil {
|
||||
t.Fatalf("common thumb %s missing: %v", commonThumb, err)
|
||||
}
|
||||
}
|
||||
|
||||
// 6. 检查 catalog 入库
|
||||
for _, sourceID := range []string{"120001", "120002"} {
|
||||
videoID := BuildVideoID(driveID, sourceID)
|
||||
v, err := cat.GetVideo(context.Background(), videoID)
|
||||
if err != nil {
|
||||
t.Fatalf("GetVideo %s: %v", videoID, err)
|
||||
}
|
||||
if v.DriveID != driveID {
|
||||
t.Fatalf("video %s drive_id = %q want %q", videoID, v.DriveID, driveID)
|
||||
}
|
||||
if v.FileID != sourceID+".mp4" {
|
||||
t.Fatalf("video %s file_id = %q want %q", videoID, v.FileID, sourceID+".mp4")
|
||||
}
|
||||
if v.ThumbnailURL == "" {
|
||||
t.Fatalf("video %s ThumbnailURL empty (cover should be ready)", videoID)
|
||||
}
|
||||
if v.Author != DefaultAuthor {
|
||||
t.Fatalf("video %s author = %q want %q", videoID, v.Author, DefaultAuthor)
|
||||
}
|
||||
// 每条视频都应该带 "91porn" 标签(UpsertVideo 路径自动同步 tags 表)
|
||||
hasDefaultTag := false
|
||||
for _, tag := range v.Tags {
|
||||
if tag == DefaultTag {
|
||||
hasDefaultTag = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasDefaultTag {
|
||||
t.Fatalf("video %s tags = %v, want contain %q", videoID, v.Tags, DefaultTag)
|
||||
}
|
||||
if sourceID == "120001" {
|
||||
if !containsString(v.Tags, "口交") {
|
||||
t.Fatalf("video %s tags = %v, want contain built-in tag 口交", videoID, v.Tags)
|
||||
}
|
||||
if !containsString(v.Tags, "Video One") {
|
||||
t.Fatalf("video %s tags = %v, want contain user tag Video One", videoID, v.Tags)
|
||||
}
|
||||
}
|
||||
if sourceID == "120002" && (containsString(v.Tags, "口交") || containsString(v.Tags, "Video One")) {
|
||||
t.Fatalf("video %s tags = %v, should not inherit tags from other spider91 videos", videoID, v.Tags)
|
||||
}
|
||||
}
|
||||
|
||||
// 7. 第二次 RunOnce:源视频 ID 已存在 → 全部 skipped,无新文件下载
|
||||
newVideos = nil
|
||||
res2, err := c.RunOnce(context.Background(), 15)
|
||||
if err != nil {
|
||||
t.Fatalf("second RunOnce: %v", err)
|
||||
}
|
||||
if res2.NewVideos != 0 {
|
||||
t.Fatalf("second run NewVideos = %d, want 0", res2.NewVideos)
|
||||
}
|
||||
if res2.Skipped != 2 {
|
||||
t.Fatalf("second run Skipped = %d, want 2", res2.Skipped)
|
||||
}
|
||||
// 第二次运行时 catalog 里已经有 2 条,seen snapshot 应该写出 2 个源视频 ID
|
||||
if res2.SeenSnapshot != 2 {
|
||||
t.Fatalf("second run SeenSnapshot = %d, want 2", res2.SeenSnapshot)
|
||||
}
|
||||
if len(newVideos) != 0 {
|
||||
t.Fatalf("second run OnNewVideo fired %d times, want 0", len(newVideos))
|
||||
}
|
||||
}
|
||||
|
||||
// TestCrawlerRunOnceMissingScript 报错而不是 panic。
|
||||
func TestCrawlerRunOnceMissingScript(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "x.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("catalog: %v", err)
|
||||
}
|
||||
defer cat.Close()
|
||||
drv := New(Config{ID: "x", RootDir: filepath.Join(tmp, "x")})
|
||||
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: "python3",
|
||||
ScriptPath: filepath.Join(tmp, "does-not-exist.py"),
|
||||
})
|
||||
|
||||
if _, err := c.RunOnce(context.Background(), 1); err == nil {
|
||||
t.Fatalf("expected error for missing script")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerPassesProxyToSpiderProcess(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-based fake script only on unix")
|
||||
}
|
||||
|
||||
tmp := t.TempDir()
|
||||
scriptPath := filepath.Join(tmp, "print_proxy_env.sh")
|
||||
script := `#!/bin/sh
|
||||
printf 'HTTP_PROXY=%s\n' "$HTTP_PROXY"
|
||||
printf 'HTTPS_PROXY=%s\n' "$HTTPS_PROXY"
|
||||
printf 'http_proxy=%s\n' "$http_proxy"
|
||||
printf 'https_proxy=%s\n' "$https_proxy"
|
||||
printf 'NO_PROXY=%s\n' "$NO_PROXY"
|
||||
printf 'no_proxy=%s\n' "$no_proxy"
|
||||
`
|
||||
if err := os.WriteFile(scriptPath, []byte(script), 0o755); err != nil {
|
||||
t.Fatalf("write script: %v", err)
|
||||
}
|
||||
|
||||
proxyURL := "socks5h://proxy.local:1080"
|
||||
drv := New(Config{ID: "proxy-drive", RootDir: filepath.Join(tmp, "proxy-drive")})
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
PythonPath: "sh",
|
||||
ScriptPath: scriptPath,
|
||||
ProxyURL: proxyURL,
|
||||
})
|
||||
cmd, stdout, err := c.startSpiderTargetNew(
|
||||
context.Background(),
|
||||
1,
|
||||
filepath.Join(tmp, "seen.txt"),
|
||||
filepath.Join(tmp, "out.json"),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("startSpiderTargetNew: %v", err)
|
||||
}
|
||||
raw, err := io.ReadAll(stdout)
|
||||
if err != nil {
|
||||
t.Fatalf("read stdout: %v", err)
|
||||
}
|
||||
if err := cmd.Wait(); err != nil {
|
||||
t.Fatalf("wait: %v", err)
|
||||
}
|
||||
|
||||
want := strings.Join([]string{
|
||||
"HTTP_PROXY=" + proxyURL,
|
||||
"HTTPS_PROXY=" + proxyURL,
|
||||
"http_proxy=" + proxyURL,
|
||||
"https_proxy=" + proxyURL,
|
||||
"NO_PROXY=",
|
||||
"no_proxy=",
|
||||
}, "\n") + "\n"
|
||||
if string(raw) != want {
|
||||
t.Fatalf("proxy env = %q, want %q", string(raw), want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConfigureExplicitProxySupportsSocksSchemes(t *testing.T) {
|
||||
for _, raw := range []string{
|
||||
"socks5://127.0.0.1:1080",
|
||||
"socks5h://proxy-user:proxy-pass@127.0.0.1:1080",
|
||||
} {
|
||||
t.Run(raw, func(t *testing.T) {
|
||||
transport := &http.Transport{Proxy: http.ProxyFromEnvironment}
|
||||
if err := configureExplicitProxy(transport, raw); err != nil {
|
||||
t.Fatalf("configureExplicitProxy: %v", err)
|
||||
}
|
||||
if transport.Proxy != nil {
|
||||
t.Fatalf("Transport.Proxy should be nil for SOCKS proxy")
|
||||
}
|
||||
if transport.DialContext == nil {
|
||||
t.Fatalf("Transport.DialContext should be set for SOCKS proxy")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
transport := &http.Transport{Proxy: http.ProxyFromEnvironment}
|
||||
if err := configureExplicitProxy(transport, "http://127.0.0.1:7890"); err != nil {
|
||||
t.Fatalf("configureExplicitProxy http: %v", err)
|
||||
}
|
||||
if transport.Proxy == nil {
|
||||
t.Fatalf("Transport.Proxy should be set for HTTP proxy")
|
||||
}
|
||||
if transport.DialContext != nil {
|
||||
t.Fatalf("Transport.DialContext should not be set for HTTP proxy")
|
||||
}
|
||||
|
||||
if err := configureExplicitProxy(&http.Transport{}, "ftp://127.0.0.1:21"); err == nil {
|
||||
t.Fatalf("expected unsupported proxy scheme error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSelectSocksTargetIPPrefersIPv4(t *testing.T) {
|
||||
got := selectSocksTargetIP([]net.IPAddr{
|
||||
{IP: net.ParseIP("2606:4700:20::681a:229")},
|
||||
{IP: net.ParseIP("104.26.3.41")},
|
||||
})
|
||||
if got == nil || got.String() != "104.26.3.41" {
|
||||
t.Fatalf("selectSocksTargetIP = %v, want IPv4 104.26.3.41", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestCrawlerThumbDownloadFailureMarksStatusFailed 验证:网站封面下载失败时
|
||||
// crawler 把 thumbnail_status 显式标 'failed',避免后续封面补队列一直重复
|
||||
// 捞到这条 spider91 视频。
|
||||
//
|
||||
// 历史 bug:之前 thumb 下载失败仅打 log,url=”, status 走 schema DEFAULT 'pending'。
|
||||
// CountVideosNeedingThumbnail 条件是 url=” AND status != 'failed' → count=1。
|
||||
// spider91 drive 的 thumb worker 按设计不处理 spider91 视频 → 没人会改 status,
|
||||
// 后续补队列会一直认为它还缺封面。
|
||||
func TestCrawlerThumbDownloadFailureMarksStatusFailed(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-based fake script only on unix")
|
||||
}
|
||||
tmp := t.TempDir()
|
||||
|
||||
// 假 HTTP 服务器:thumb 路径返回 500,video 正常返回字节。
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case strings.Contains(r.URL.Path, "120101.mp4"):
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("FAKEVIDEO"))
|
||||
case strings.Contains(r.URL.Path, "120101.jpg"):
|
||||
http.Error(w, "broken", http.StatusInternalServerError)
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
videoEntries := []map[string]string{
|
||||
{
|
||||
"title": "Thumb Failure Video",
|
||||
"thumb_url": srv.URL + "/thumb/120101.jpg",
|
||||
"video_url": srv.URL + "/videos/120101.mp4",
|
||||
"viewkey": "vk-thumb-fail",
|
||||
"detail_url": srv.URL + "/v.php?viewkey=vk-thumb-fail",
|
||||
},
|
||||
}
|
||||
scriptPath := filepath.Join(tmp, "fake.sh")
|
||||
if err := os.WriteFile(scriptPath, []byte(buildFakeSpiderScript(videoEntries)), 0o755); err != nil {
|
||||
t.Fatalf("write script: %v", err)
|
||||
}
|
||||
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "test.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("catalog: %v", err)
|
||||
}
|
||||
defer cat.Close()
|
||||
|
||||
driveID := "thumbfail-drive"
|
||||
drv := New(Config{ID: driveID, RootDir: filepath.Join(tmp, "spider91", driveID)})
|
||||
if err := cat.UpsertDrive(context.Background(), &catalog.Drive{
|
||||
ID: driveID, Kind: Kind, Name: "thumbfail",
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert drive: %v", err)
|
||||
}
|
||||
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: "sh",
|
||||
ScriptPath: scriptPath,
|
||||
CommonThumbDir: filepath.Join(tmp, "previews", "thumbs"),
|
||||
SpiderTimeout: 10 * time.Second,
|
||||
DownloadTimeout: 10 * time.Second,
|
||||
})
|
||||
|
||||
res, err := c.RunOnce(context.Background(), 5)
|
||||
if err != nil {
|
||||
t.Fatalf("RunOnce: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 {
|
||||
t.Fatalf("expected 1 new video, got %d (failed=%d)", res.NewVideos, res.Failed)
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(context.Background(), "spider91-"+driveID+"-120101")
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.ThumbnailURL != "" {
|
||||
t.Errorf("ThumbnailURL = %q, want empty (download failed)", got.ThumbnailURL)
|
||||
}
|
||||
|
||||
// 关键断言:CountVideosNeedingThumbnail 应该返回 0。
|
||||
// 该函数的 SQL 条件是 `url = '' AND status != 'failed'`;如果 crawler 没把
|
||||
// status 标 'failed'(schema DEFAULT 'pending'),count 就会是 1。
|
||||
count, err := cat.CountVideosNeedingThumbnail(context.Background(), driveID)
|
||||
if err != nil {
|
||||
t.Fatalf("count: %v", err)
|
||||
}
|
||||
if count != 0 {
|
||||
t.Fatalf("CountVideosNeedingThumbnail = %d, want 0 (status should be 'failed' to unblock teaser worker)", count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerUsesCrawlerVideoURLForFirstDownload(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-based fake script only on unix")
|
||||
}
|
||||
tmp := t.TempDir()
|
||||
|
||||
var detailRequests int32
|
||||
var originalRequests int32
|
||||
var wrongRequests int32
|
||||
var srv *httptest.Server
|
||||
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.URL.Path == "/v.php":
|
||||
atomic.AddInt32(&detailRequests, 1)
|
||||
_, _ = w.Write([]byte(spider91DetailHTML(srv.URL + "/videos/856305.mp4?token=wrong")))
|
||||
case r.URL.Path == "/videos/120201.mp4" && r.URL.Query().Get("token") == "original":
|
||||
atomic.AddInt32(&originalRequests, 1)
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("ORIGINALVIDEO"))
|
||||
case r.URL.Path == "/videos/856305.mp4":
|
||||
atomic.AddInt32(&wrongRequests, 1)
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("WRONGVIDEO"))
|
||||
case r.URL.Path == "/thumb/120201.jpg":
|
||||
w.Header().Set("Content-Type", "image/jpeg")
|
||||
_, _ = w.Write([]byte("\xff\xd8\xff\xe0thumb"))
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
entry := map[string]string{
|
||||
"title": "Use Original URL First",
|
||||
"thumb_url": srv.URL + "/thumb/wrong-thumb.jpg",
|
||||
"video_url": srv.URL + "/videos/120201.mp4?token=original",
|
||||
"viewkey": "vk-use-original",
|
||||
"detail_url": srv.URL + "/v.php?viewkey=vk-use-original",
|
||||
}
|
||||
cat, drv, scriptPath := seedCrawlerTestDeps(t, tmp, "use-original-drive", []map[string]string{entry})
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: "sh",
|
||||
ScriptPath: scriptPath,
|
||||
CommonThumbDir: filepath.Join(tmp, "previews", "thumbs"),
|
||||
SpiderTimeout: 10 * time.Second,
|
||||
DownloadTimeout: 10 * time.Second,
|
||||
})
|
||||
|
||||
res, err := c.RunOnce(context.Background(), 1)
|
||||
if err != nil {
|
||||
t.Fatalf("RunOnce: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Failed != 0 {
|
||||
t.Fatalf("result new=%d failed=%d, want 1/0", res.NewVideos, res.Failed)
|
||||
}
|
||||
if got := atomic.LoadInt32(&detailRequests); got != 0 {
|
||||
t.Fatalf("detail requests = %d, want 0 (first download should use crawler URL)", got)
|
||||
}
|
||||
if got := atomic.LoadInt32(&originalRequests); got != 1 {
|
||||
t.Fatalf("original URL requests = %d, want 1", got)
|
||||
}
|
||||
if got := atomic.LoadInt32(&wrongRequests); got != 0 {
|
||||
t.Fatalf("wrong source URL requests = %d, want 0", got)
|
||||
}
|
||||
info, err := os.Stat(filepath.Join(drv.RootDir(), "videos", "120201.mp4"))
|
||||
if err != nil {
|
||||
t.Fatalf("original video missing: %v", err)
|
||||
}
|
||||
if info.Size() != int64(len("ORIGINALVIDEO")) {
|
||||
t.Fatalf("original video size = %d, want %d", info.Size(), len("ORIGINALVIDEO"))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRefreshesVideoURLAfterExpiredDownload(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-based fake script only on unix")
|
||||
}
|
||||
tmp := t.TempDir()
|
||||
|
||||
var detailRequests int32
|
||||
var staleRequests int32
|
||||
var freshRequests int32
|
||||
var srv *httptest.Server
|
||||
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.URL.Path == "/v.php":
|
||||
n := atomic.AddInt32(&detailRequests, 1)
|
||||
videoURL := srv.URL + "/videos/120202.mp4?token=stale"
|
||||
if n > 1 {
|
||||
videoURL = srv.URL + "/videos/120202.mp4?token=fresh"
|
||||
}
|
||||
_, _ = w.Write([]byte(spider91DetailHTML(videoURL)))
|
||||
case r.URL.Path == "/videos/120202.mp4" && r.URL.Query().Get("token") == "stale":
|
||||
atomic.AddInt32(&staleRequests, 1)
|
||||
http.Error(w, "expired", http.StatusForbidden)
|
||||
case r.URL.Path == "/videos/120202.mp4" && r.URL.Query().Get("token") == "fresh":
|
||||
atomic.AddInt32(&freshRequests, 1)
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("REFRESHEDVIDEO"))
|
||||
case r.URL.Path == "/thumb/120202.jpg":
|
||||
w.Header().Set("Content-Type", "image/jpeg")
|
||||
_, _ = w.Write([]byte("\xff\xd8\xff\xe0thumb"))
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
entry := map[string]string{
|
||||
"title": "Refresh After Expired Download",
|
||||
"thumb_url": srv.URL + "/thumb/wrong-thumb.jpg",
|
||||
"video_url": srv.URL + "/videos/120202.mp4?token=old",
|
||||
"viewkey": "vk-refresh-after",
|
||||
"detail_url": srv.URL + "/v.php?viewkey=vk-refresh-after",
|
||||
}
|
||||
cat, drv, scriptPath := seedCrawlerTestDeps(t, tmp, "refresh-after-drive", []map[string]string{entry})
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: "sh",
|
||||
ScriptPath: scriptPath,
|
||||
CommonThumbDir: filepath.Join(tmp, "previews", "thumbs"),
|
||||
SpiderTimeout: 10 * time.Second,
|
||||
DownloadTimeout: 10 * time.Second,
|
||||
})
|
||||
|
||||
res, err := c.RunOnce(context.Background(), 1)
|
||||
if err != nil {
|
||||
t.Fatalf("RunOnce: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Failed != 0 {
|
||||
t.Fatalf("result new=%d failed=%d, want 1/0", res.NewVideos, res.Failed)
|
||||
}
|
||||
if got := atomic.LoadInt32(&detailRequests); got < 2 {
|
||||
t.Fatalf("detail requests = %d, want at least 2 (initial refresh + retry refresh)", got)
|
||||
}
|
||||
if got := atomic.LoadInt32(&staleRequests); got != 1 {
|
||||
t.Fatalf("stale URL requests = %d, want 1", got)
|
||||
}
|
||||
if got := atomic.LoadInt32(&freshRequests); got != 1 {
|
||||
t.Fatalf("fresh URL requests = %d, want 1", got)
|
||||
}
|
||||
info, err := os.Stat(filepath.Join(drv.RootDir(), "videos", "120202.mp4"))
|
||||
if err != nil {
|
||||
t.Fatalf("refreshed video missing: %v", err)
|
||||
}
|
||||
if info.Size() != int64(len("REFRESHEDVIDEO")) {
|
||||
t.Fatalf("refreshed video size = %d, want %d", info.Size(), len("REFRESHEDVIDEO"))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRejectsRefreshedSourceIDMismatch(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-based fake script only on unix")
|
||||
}
|
||||
tmp := t.TempDir()
|
||||
|
||||
var srv *httptest.Server
|
||||
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.URL.Path == "/v.php":
|
||||
_, _ = w.Write([]byte(spider91DetailHTML(srv.URL + "/videos/856305.mp4?token=fresh")))
|
||||
case r.URL.Path == "/videos/1203058.mp4":
|
||||
http.Error(w, "expired", http.StatusForbidden)
|
||||
case r.URL.Path == "/videos/856305.mp4":
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("WRONGVIDEO"))
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
entry := map[string]string{
|
||||
"title": "Source ID Mismatch",
|
||||
"thumb_url": srv.URL + "/thumb/1203058.jpg",
|
||||
"video_url": srv.URL + "/videos/1203058.mp4?token=old",
|
||||
"viewkey": "86fd91cce1f2e1a154cc",
|
||||
"source_id": "1203058",
|
||||
"detail_url": srv.URL + "/v.php?viewkey=86fd91cce1f2e1a154cc",
|
||||
}
|
||||
cat, drv, scriptPath := seedCrawlerTestDeps(t, tmp, "mismatch-drive", []map[string]string{entry})
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: "sh",
|
||||
ScriptPath: scriptPath,
|
||||
CommonThumbDir: filepath.Join(tmp, "previews", "thumbs"),
|
||||
SpiderTimeout: 10 * time.Second,
|
||||
DownloadTimeout: 10 * time.Second,
|
||||
})
|
||||
|
||||
res, err := c.RunOnce(context.Background(), 1)
|
||||
if err != nil {
|
||||
t.Fatalf("RunOnce: %v", err)
|
||||
}
|
||||
if res.NewVideos != 0 || res.Failed != 1 {
|
||||
t.Fatalf("result new=%d failed=%d, want 0/1", res.NewVideos, res.Failed)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(drv.RootDir(), "videos", "1203058.mp4")); !os.IsNotExist(err) {
|
||||
t.Fatalf("mismatched source file should not be written, stat err=%v", err)
|
||||
}
|
||||
if v, _ := cat.GetVideo(context.Background(), BuildVideoID(drv.ID(), "1203058")); v != nil {
|
||||
t.Fatalf("mismatched video should not be inserted: %+v", v)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSourceIDForItemRequiresNumericSourceID(t *testing.T) {
|
||||
if got := sourceIDForItem(spiderVideoEntry{
|
||||
Viewkey: "86fd91cce1f2e1a154cc",
|
||||
VideoURL: "https://cdn.example/videos/1203058.mp4?token=x",
|
||||
}); got != "1203058" {
|
||||
t.Fatalf("sourceIDForItem(video url) = %q, want 1203058", got)
|
||||
}
|
||||
if got := sourceIDForItem(spiderVideoEntry{
|
||||
Viewkey: "86fd91cce1f2e1a154cc",
|
||||
ThumbURL: "https://img.example/thumb/1203058.jpg",
|
||||
}); got != "1203058" {
|
||||
t.Fatalf("sourceIDForItem(thumb url) = %q, want 1203058", got)
|
||||
}
|
||||
if got := sourceIDForItem(spiderVideoEntry{
|
||||
Viewkey: "86fd91cce1f2e1a154cc",
|
||||
SourceID: "not-numeric",
|
||||
VideoURL: "https://cdn.example/videos/video.mp4",
|
||||
}); got != "" {
|
||||
t.Fatalf("sourceIDForItem(non numeric) = %q, want empty", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeThumbURLForSource(t *testing.T) {
|
||||
got := normalizeThumbURLForSource("https://img.example/thumb/856305.jpg?x=1#frag", "1203058")
|
||||
want := "https://img.example/thumb/1203058.jpg"
|
||||
if got != want {
|
||||
t.Fatalf("normalizeThumbURLForSource = %q, want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpider91ListURLForDetail(t *testing.T) {
|
||||
got := spider91ListURLForDetail("https://www.91porn.com/view_video.php?viewkey=abc&page=5&c=furum&viewtype=basic&category=top")
|
||||
want := "https://www.91porn.com/v.php?category=top&page=5&viewtype=basic"
|
||||
if got != want {
|
||||
t.Fatalf("spider91ListURLForDetail = %q, want %q", got, want)
|
||||
}
|
||||
if got := spider91ListURLForDetail("http://127.0.0.1/v.php?viewkey=abc&page=5&viewtype=basic&category=top"); got != "" {
|
||||
t.Fatalf("spider91ListURLForDetail(localhost) = %q, want empty", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpider91CookieHeader(t *testing.T) {
|
||||
got := spider91CookieHeader([]*http.Cookie{
|
||||
{Name: "CLIPSHARE", Value: "abc"},
|
||||
{Name: "ga", Value: "def"},
|
||||
{Name: "mode", Value: "m"},
|
||||
})
|
||||
want := "mode=d; CLIPSHARE=abc; ga=def"
|
||||
if got != want {
|
||||
t.Fatalf("spider91CookieHeader = %q, want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func spider91DetailHTML(videoURL string) string {
|
||||
fragment := `<video><source src="` + videoURL + `" type="video/mp4"></video>`
|
||||
return `document.write(strencode2("` + url.PathEscape(fragment) + `"));`
|
||||
}
|
||||
|
||||
func seedCrawlerTestDeps(t *testing.T, tmp, driveID string, entries []map[string]string) (*catalog.Catalog, *Driver, string) {
|
||||
t.Helper()
|
||||
scriptPath := filepath.Join(tmp, driveID+"-fake.sh")
|
||||
if err := os.WriteFile(scriptPath, []byte(buildFakeSpiderScript(entries)), 0o755); err != nil {
|
||||
t.Fatalf("write script: %v", err)
|
||||
}
|
||||
cat, err := catalog.Open(filepath.Join(tmp, driveID+".db"))
|
||||
if err != nil {
|
||||
t.Fatalf("catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: driveID, RootDir: filepath.Join(tmp, "spider91", driveID)})
|
||||
if err := cat.UpsertDrive(context.Background(), &catalog.Drive{
|
||||
ID: driveID, Kind: Kind, Name: driveID,
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert drive: %v", err)
|
||||
}
|
||||
return cat, drv, scriptPath
|
||||
}
|
||||
|
||||
// buildFakeSpiderScript 生成一个伪 python 脚本(其实是 sh)。
|
||||
//
|
||||
// 行为:
|
||||
// - 解析 --output FILE / --stream-output 两个 flag
|
||||
// - --stream-output 时:逐行输出每个 entry 的 JSON 到 stdout 并 flush
|
||||
// - --output 时:把完整 JSON 数据写到 FILE(向后兼容,且作归档)
|
||||
//
|
||||
// 用 sh 来写是为了避免 Python 依赖。每条 entry 的 JSON 用 Go marshal 出来后嵌入。
|
||||
func buildFakeSpiderScript(entries []map[string]string) string {
|
||||
var sb strings.Builder
|
||||
sb.WriteString("#!/bin/sh\n")
|
||||
sb.WriteString("out=\"\"; stream=0\n")
|
||||
sb.WriteString("while [ $# -gt 0 ]; do case \"$1\" in --output) out=\"$2\"; shift 2;; --stream-output) stream=1; shift;; *) shift;; esac; done\n")
|
||||
|
||||
// stream 模式:逐行 echo
|
||||
sb.WriteString("if [ \"$stream\" = \"1\" ]; then\n")
|
||||
for _, e := range entries {
|
||||
raw, _ := json.Marshal(e)
|
||||
// 用单引号 here-string 形式确保 JSON 中的双引号原样出来
|
||||
sb.WriteString(" cat <<'STREAM_EOF'\n")
|
||||
sb.Write(raw)
|
||||
sb.WriteString("\nSTREAM_EOF\n")
|
||||
}
|
||||
sb.WriteString("fi\n")
|
||||
|
||||
// 写 --output 文件(带完整 wrapper)
|
||||
sb.WriteString("if [ -n \"$out\" ]; then\n")
|
||||
sb.WriteString(" mkdir -p \"$(dirname \"$out\")\" 2>/dev/null\n")
|
||||
sb.WriteString(" cat > \"$out\" <<'OUT_EOF'\n")
|
||||
wrapper := map[string]any{
|
||||
"crawl_time": "2026-01-01T00:00:00",
|
||||
"total_videos": len(entries),
|
||||
"videos": entries,
|
||||
}
|
||||
wrapped, _ := json.MarshalIndent(wrapper, "", " ")
|
||||
sb.Write(wrapped)
|
||||
sb.WriteString("\nOUT_EOF\n")
|
||||
sb.WriteString("fi\n")
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func containsString(values []string, want string) bool {
|
||||
for _, value := range values {
|
||||
if value == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -1,194 +0,0 @@
|
||||
// Package spider91 把 91porn 爬虫的产物(本地下载好的视频和封面)
|
||||
// 包装成一个 drives.Drive 实现,让它跟其它网盘一样可以挂载到 catalog 上。
|
||||
//
|
||||
// 与其它 drive 不同的是:
|
||||
// - 数据来源不是云盘 API,而是 Python 子进程跑 spider_91porn.py 后下载到本地
|
||||
// - StreamURL 直接返回本地文件路径,由 api.handleSpider91Video 用 http.ServeFile 服务
|
||||
// - List/Stat 用于 GC 兜底(按本地文件名列出 videos/ 目录)
|
||||
package spider91
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
// Kind 是该 drive 的类型代号,写到 catalog.drives.kind。
|
||||
const Kind = "spider91"
|
||||
|
||||
// Config 创建 Driver 所需的配置。
|
||||
type Config struct {
|
||||
// ID 是 catalog 中的 drive id,driver 用它隔离每个 spider91 实例的本地目录。
|
||||
ID string
|
||||
// RootDir 是该 drive 在磁盘上的根目录,driver 会在下面创建 videos/ 和 thumbs/。
|
||||
// 一般由 backend 拼成 <data_dir>/spider91/<driveID>/。
|
||||
RootDir string
|
||||
}
|
||||
|
||||
// Driver 实现 drives.Drive。
|
||||
type Driver struct {
|
||||
id string
|
||||
rootDir string
|
||||
}
|
||||
|
||||
// New 构造一个 Driver。
|
||||
func New(c Config) *Driver {
|
||||
return &Driver{
|
||||
id: c.ID,
|
||||
rootDir: c.RootDir,
|
||||
}
|
||||
}
|
||||
|
||||
// Kind 返回 "spider91"。
|
||||
func (d *Driver) Kind() string { return Kind }
|
||||
|
||||
// ID 返回 catalog 中的 drive id。
|
||||
func (d *Driver) ID() string { return d.id }
|
||||
|
||||
// RootID 返回根目录的逻辑 ID。spider91 没有真正的目录结构,
|
||||
// 这里固定返回 "/" 占位,调用方实际不会用它去 List 子目录。
|
||||
func (d *Driver) RootID() string { return "/" }
|
||||
|
||||
// Init 确保 rootDir/videos 和 rootDir/thumbs 存在。
|
||||
func (d *Driver) Init(ctx context.Context) error {
|
||||
if strings.TrimSpace(d.rootDir) == "" {
|
||||
return errors.New("spider91: empty rootDir")
|
||||
}
|
||||
for _, sub := range []string{"videos", "thumbs"} {
|
||||
if err := os.MkdirAll(filepath.Join(d.rootDir, sub), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// VideosDir 返回视频文件存放目录的绝对路径。
|
||||
func (d *Driver) VideosDir() string { return filepath.Join(d.rootDir, "videos") }
|
||||
|
||||
// ThumbsDir 返回封面文件存放目录的绝对路径。
|
||||
func (d *Driver) ThumbsDir() string { return filepath.Join(d.rootDir, "thumbs") }
|
||||
|
||||
// RootDir 返回 driver 的存储根。
|
||||
func (d *Driver) RootDir() string { return d.rootDir }
|
||||
|
||||
// VideoPath 返回某个视频文件的绝对路径,并校验路径不会逃出 videos/ 目录。
|
||||
func (d *Driver) VideoPath(fileID string) (string, error) {
|
||||
return safeJoin(d.VideosDir(), fileID)
|
||||
}
|
||||
|
||||
// ThumbPath 返回某个封面文件的绝对路径。
|
||||
func (d *Driver) ThumbPath(fileID string) (string, error) {
|
||||
return safeJoin(d.ThumbsDir(), fileID)
|
||||
}
|
||||
|
||||
// List 列出 videos/ 目录下的视频文件,便于上层做 GC 兜底;
|
||||
// dirID 当前会被忽略,spider91 没有目录树。
|
||||
func (d *Driver) List(ctx context.Context, dirID string) ([]drives.Entry, error) {
|
||||
entries, err := os.ReadDir(d.VideosDir())
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
out := make([]drives.Entry, 0, len(entries))
|
||||
for _, e := range entries {
|
||||
if e.IsDir() {
|
||||
continue
|
||||
}
|
||||
info, err := e.Info()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
out = append(out, drives.Entry{
|
||||
ID: e.Name(),
|
||||
Name: e.Name(),
|
||||
Size: info.Size(),
|
||||
IsDir: false,
|
||||
ModTime: info.ModTime(),
|
||||
})
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// Stat 查询单个视频文件的元数据。
|
||||
func (d *Driver) Stat(ctx context.Context, fileID string) (*drives.Entry, error) {
|
||||
path, err := d.VideoPath(fileID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &drives.Entry{
|
||||
ID: fileID,
|
||||
Name: fileID,
|
||||
Size: info.Size(),
|
||||
IsDir: info.IsDir(),
|
||||
ModTime: info.ModTime(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// StreamURL 返回本地视频文件路径,给 ffmpeg / 上层服务使用。
|
||||
// 注意:proxy.serve 不能直接处理本地路径,回放要走 api.handleSpider91Video。
|
||||
// teaser/封面 worker 通过 localPreviewLink 兜底走本地文件,刚好兼容 path 形式的 URL。
|
||||
func (d *Driver) StreamURL(ctx context.Context, fileID string) (*drives.StreamLink, error) {
|
||||
path, err := d.VideoPath(fileID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if info.IsDir() || info.Size() == 0 {
|
||||
return nil, os.ErrNotExist
|
||||
}
|
||||
return &drives.StreamLink{
|
||||
URL: path,
|
||||
Expires: time.Now().Add(24 * time.Hour),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Upload 不支持:上传由 crawler 自己完成,不通过 Drive 接口。
|
||||
func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader, size int64) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
|
||||
// EnsureDir 不支持。
|
||||
func (d *Driver) EnsureDir(ctx context.Context, pathFromRoot string) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
|
||||
// safeJoin 把 fileID 拼到 root 下,保证最终路径不会逃出 root。
|
||||
// fileID 必须是单纯的文件名(不含 / 或 .. 等组件)。
|
||||
func safeJoin(root, fileID string) (string, error) {
|
||||
id := strings.TrimSpace(fileID)
|
||||
if id == "" || filepath.Base(id) != id {
|
||||
return "", errors.New("spider91: invalid file id")
|
||||
}
|
||||
if root == "" {
|
||||
return "", errors.New("spider91: empty root dir")
|
||||
}
|
||||
rootAbs, err := filepath.Abs(root)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
pathAbs, err := filepath.Abs(filepath.Join(rootAbs, id))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if pathAbs != rootAbs && !strings.HasPrefix(pathAbs, rootAbs+string(os.PathSeparator)) {
|
||||
return "", errors.New("spider91: file id escapes root")
|
||||
}
|
||||
return pathAbs, nil
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*Driver)(nil)
|
||||
@@ -1,149 +0,0 @@
|
||||
package spider91
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDriverInitCreatesSubdirs(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
d := New(Config{ID: "test", RootDir: filepath.Join(dir, "drive1")})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
for _, sub := range []string{"videos", "thumbs"} {
|
||||
info, err := os.Stat(filepath.Join(dir, "drive1", sub))
|
||||
if err != nil {
|
||||
t.Fatalf("stat %s: %v", sub, err)
|
||||
}
|
||||
if !info.IsDir() {
|
||||
t.Fatalf("%s is not a dir", sub)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDriverInitRejectsEmptyRoot(t *testing.T) {
|
||||
d := New(Config{ID: "test", RootDir: ""})
|
||||
if err := d.Init(context.Background()); err == nil {
|
||||
t.Fatalf("expected error for empty root")
|
||||
}
|
||||
}
|
||||
|
||||
func TestVideoPathRejectsTraversal(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
d := New(Config{ID: "test", RootDir: dir})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
cases := []string{
|
||||
"",
|
||||
" ",
|
||||
"../etc/passwd",
|
||||
"sub/dir.mp4",
|
||||
"./abc.mp4",
|
||||
}
|
||||
for _, c := range cases {
|
||||
if _, err := d.VideoPath(c); err == nil {
|
||||
t.Fatalf("VideoPath(%q) accepted, want error", c)
|
||||
}
|
||||
if _, err := d.ThumbPath(c); err == nil {
|
||||
t.Fatalf("ThumbPath(%q) accepted, want error", c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestVideoPathHappy(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
d := New(Config{ID: "test", RootDir: dir})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
got, err := d.VideoPath("abc.mp4")
|
||||
if err != nil {
|
||||
t.Fatalf("VideoPath: %v", err)
|
||||
}
|
||||
want := filepath.Join(dir, "videos", "abc.mp4")
|
||||
wantAbs, _ := filepath.Abs(want)
|
||||
if got != wantAbs {
|
||||
t.Fatalf("VideoPath: got %q want %q", got, wantAbs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListReturnsFiles(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
d := New(Config{ID: "test", RootDir: dir})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
mustWrite(t, filepath.Join(d.VideosDir(), "abc.mp4"), "data")
|
||||
mustWrite(t, filepath.Join(d.VideosDir(), "def.mp4"), "x")
|
||||
|
||||
entries, err := d.List(context.Background(), "/")
|
||||
if err != nil {
|
||||
t.Fatalf("List: %v", err)
|
||||
}
|
||||
if len(entries) != 2 {
|
||||
t.Fatalf("List len = %d, want 2", len(entries))
|
||||
}
|
||||
names := map[string]int64{}
|
||||
for _, e := range entries {
|
||||
names[e.Name] = e.Size
|
||||
}
|
||||
if names["abc.mp4"] != 4 || names["def.mp4"] != 1 {
|
||||
t.Fatalf("unexpected entries: %+v", names)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLReturnsLocalPath(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
d := New(Config{ID: "test", RootDir: dir})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
mustWrite(t, filepath.Join(d.VideosDir(), "abc.mp4"), "videodata")
|
||||
|
||||
link, err := d.StreamURL(context.Background(), "abc.mp4")
|
||||
if err != nil {
|
||||
t.Fatalf("StreamURL: %v", err)
|
||||
}
|
||||
if !strings.HasSuffix(link.URL, "videos/abc.mp4") {
|
||||
t.Fatalf("StreamURL.URL = %q, want suffix videos/abc.mp4", link.URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLEmptyFile(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
d := New(Config{ID: "test", RootDir: dir})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
mustWrite(t, filepath.Join(d.VideosDir(), "abc.mp4"), "")
|
||||
if _, err := d.StreamURL(context.Background(), "abc.mp4"); !errors.Is(err, os.ErrNotExist) {
|
||||
t.Fatalf("empty file should return os.ErrNotExist, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildVideoIDStable(t *testing.T) {
|
||||
id1 := BuildVideoID("crawler1", "abc")
|
||||
id2 := BuildVideoID("crawler1", "abc")
|
||||
if id1 != id2 {
|
||||
t.Fatalf("BuildVideoID not deterministic")
|
||||
}
|
||||
if id1 != "spider91-crawler1-abc" {
|
||||
t.Fatalf("BuildVideoID format unexpected: %q", id1)
|
||||
}
|
||||
}
|
||||
|
||||
func mustWrite(t *testing.T, path, content string) {
|
||||
t.Helper()
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
||||
t.Fatalf("mkdir: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
|
||||
t.Fatalf("write: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -1,55 +0,0 @@
|
||||
package spider91
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestDetectVideoExt(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
url string
|
||||
want string
|
||||
}{
|
||||
{"mp4 with token", "https://cdn.example.com/mp43/abc.mp4?st=xyz&e=12345", ".mp4"},
|
||||
{"webm", "https://cdn.example.com/path/video.webm?token=1", ".webm"},
|
||||
{"mkv", "https://cdn.example.com/path/foo.mkv", ".mkv"},
|
||||
{"mov", "https://cdn.example.com/path/foo.mov?x=1", ".mov"},
|
||||
{"flv", "https://cdn.example.com/path/foo.flv", ".flv"},
|
||||
{"m4v", "https://cdn.example.com/path/foo.m4v", ".m4v"},
|
||||
{"avi", "https://cdn.example.com/path/foo.avi", ".avi"},
|
||||
{"m3u8 fallback to mp4", "https://cdn.example.com/path/playlist.m3u8", ".mp4"},
|
||||
{"ts fallback to mp4", "https://cdn.example.com/path/seg001.ts", ".mp4"},
|
||||
{"unknown ext fallback", "https://cdn.example.com/path/foo.weird", ".mp4"},
|
||||
{"no ext fallback", "https://cdn.example.com/v.php?id=12345", ".mp4"},
|
||||
{"empty url", "", ".mp4"},
|
||||
{"uppercase", "https://cdn.example.com/path/FOO.MP4?token=1", ".mp4"},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := detectVideoExt(tc.url)
|
||||
if got != tc.want {
|
||||
t.Fatalf("detectVideoExt(%q) = %q, want %q", tc.url, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectThumbExt(t *testing.T) {
|
||||
tests := []struct {
|
||||
url string
|
||||
want string
|
||||
}{
|
||||
{"https://cdn.example.com/thumb/foo.jpg", ".jpg"},
|
||||
{"https://cdn.example.com/thumb/foo.jpeg", ".jpeg"},
|
||||
{"https://cdn.example.com/thumb/foo.png", ".png"},
|
||||
{"https://cdn.example.com/thumb/foo.webp", ".webp"},
|
||||
{"https://cdn.example.com/thumb/foo.gif", ".gif"},
|
||||
{"https://cdn.example.com/thumb/foo.svg", ".jpg"}, // not in whitelist
|
||||
{"https://cdn.example.com/thumb/no-ext", ".jpg"},
|
||||
{"", ".jpg"},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
got := detectThumbExt(tc.url)
|
||||
if got != tc.want {
|
||||
t.Fatalf("detectThumbExt(%q) = %q, want %q", tc.url, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,19 +2,23 @@ package wopan
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
sdk "github.com/OpenListTeam/wopan-sdk-go"
|
||||
"github.com/go-resty/resty/v2"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
// Driver 封装联通沃盘
|
||||
// Driver 封装联通网盘
|
||||
type Driver struct {
|
||||
id string
|
||||
rootID string
|
||||
@@ -23,14 +27,24 @@ type Driver struct {
|
||||
refreshToken string
|
||||
client *sdk.WoClient
|
||||
onTokenUpdate func(access, refresh string)
|
||||
uploadTempDir string
|
||||
|
||||
listMu sync.Mutex
|
||||
lastListAt time.Time
|
||||
listInterval time.Duration
|
||||
listCooldown time.Duration
|
||||
|
||||
fileIDMu sync.RWMutex
|
||||
fidToID map[string]string
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
ID string
|
||||
AccessToken string
|
||||
RefreshToken string
|
||||
FamilyID string // 空则走个人空间,有值则走家庭空间
|
||||
RootID string // 根目录 ID,默认 "0"
|
||||
ID string
|
||||
AccessToken string
|
||||
RefreshToken string
|
||||
FamilyID string // 空则走个人空间,有值则走家庭空间
|
||||
RootID string // 根目录 ID,默认 "0"
|
||||
UploadTempDir string
|
||||
// 当 SDK 刷新 token 时回调,便于持久化
|
||||
OnTokenUpdate func(access, refresh string)
|
||||
}
|
||||
@@ -47,6 +61,10 @@ func New(c Config) *Driver {
|
||||
accessToken: c.AccessToken,
|
||||
refreshToken: c.RefreshToken,
|
||||
onTokenUpdate: c.OnTokenUpdate,
|
||||
uploadTempDir: strings.TrimSpace(c.UploadTempDir),
|
||||
listInterval: 800 * time.Millisecond,
|
||||
listCooldown: 5 * time.Minute,
|
||||
fidToID: make(map[string]string),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,15 +96,41 @@ func (d *Driver) spaceType() string {
|
||||
}
|
||||
|
||||
func (d *Driver) List(ctx context.Context, dirID string) ([]drives.Entry, error) {
|
||||
d.listMu.Lock()
|
||||
defer d.listMu.Unlock()
|
||||
|
||||
var result []drives.Entry
|
||||
pageNum := 0
|
||||
pageSize := 100
|
||||
for {
|
||||
data, err := d.client.QueryAllFiles(d.spaceType(), dirID, pageNum, pageSize, 0, d.familyID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("wopan list: %w", err)
|
||||
var data *sdk.QueryAllFilesData
|
||||
for attempt := 0; ; attempt++ {
|
||||
if err := d.waitForListSlotLocked(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var err error
|
||||
data, err = d.client.QueryAllFiles(d.spaceType(), dirID, pageNum, pageSize, 0, d.familyID, func(req *resty.Request) {
|
||||
req.SetContext(ctx)
|
||||
})
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
err = wopanRequestError("list", err)
|
||||
wait, ok := drives.RateLimitRetryAfter(err)
|
||||
if !ok {
|
||||
return nil, err
|
||||
}
|
||||
if wait <= 0 {
|
||||
wait = d.listCooldown
|
||||
}
|
||||
log.Printf("[wopan] list cooling down drive=%s dir=%s page=%d cooldown=%s attempt=%d err=%v",
|
||||
d.id, dirID, pageNum, wait, attempt+1, err)
|
||||
if err := sleepContext(ctx, wait); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
for _, f := range data.Files {
|
||||
d.rememberFileID(f)
|
||||
result = append(result, fileToEntry(f, dirID))
|
||||
}
|
||||
if len(data.Files) < pageSize {
|
||||
@@ -103,9 +147,11 @@ func (d *Driver) Stat(ctx context.Context, fileID string) (*drives.Entry, error)
|
||||
}
|
||||
|
||||
func (d *Driver) StreamURL(ctx context.Context, fileID string) (*drives.StreamLink, error) {
|
||||
data, err := d.client.GetDownloadUrlV2([]string{fileID})
|
||||
data, err := d.client.GetDownloadUrlV2([]string{fileID}, func(req *resty.Request) {
|
||||
req.SetContext(ctx)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("wopan download url: %w", err)
|
||||
return nil, wopanRequestError("download url", err)
|
||||
}
|
||||
if len(data.List) == 0 {
|
||||
return nil, fmt.Errorf("wopan download url: empty response")
|
||||
@@ -119,7 +165,12 @@ func (d *Driver) StreamURL(ctx context.Context, fileID string) (*drives.StreamLi
|
||||
|
||||
func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader, size int64) (string, error) {
|
||||
// wopan SDK 要求 *os.File,先把流落到临时文件再上传
|
||||
tmp, err := os.CreateTemp("", "wopan-upload-*.tmp")
|
||||
if d.uploadTempDir != "" {
|
||||
if err := os.MkdirAll(d.uploadTempDir, 0o755); err != nil {
|
||||
return "", fmt.Errorf("wopan upload: create tmp dir: %w", err)
|
||||
}
|
||||
}
|
||||
tmp, err := os.CreateTemp(d.uploadTempDir, "wopan-upload-*.tmp")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
@@ -142,9 +193,151 @@ func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader,
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("wopan upload: %w", err)
|
||||
}
|
||||
if fid != "" {
|
||||
if objectID, err := d.findDeleteFileIDInParent(ctx, parentID, drives.SourceFile{
|
||||
FileID: fid,
|
||||
Name: name,
|
||||
Size: size,
|
||||
}); err == nil {
|
||||
d.rememberFIDMapping(fid, objectID)
|
||||
} else {
|
||||
log.Printf("[wopan] upload drive=%s parent=%s fid=%s resolve object id: %v", d.id, parentID, fid, err)
|
||||
}
|
||||
}
|
||||
return fid, nil
|
||||
}
|
||||
|
||||
func (d *Driver) Rename(ctx context.Context, fileID, newName string) error {
|
||||
if d.client == nil {
|
||||
return fmt.Errorf("wopan rename: driver not initialized")
|
||||
}
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return fmt.Errorf("wopan rename: empty file id")
|
||||
}
|
||||
newName = strings.TrimSpace(newName)
|
||||
if newName == "" {
|
||||
return fmt.Errorf("wopan rename: empty new name")
|
||||
}
|
||||
renameID := fileID
|
||||
if cached := d.cachedDeleteFileID(fileID); cached != "" {
|
||||
renameID = cached
|
||||
}
|
||||
if err := d.client.RenameFileOrDirectory(d.spaceType(), 1, renameID, newName, d.familyID, func(req *resty.Request) {
|
||||
req.SetContext(ctx)
|
||||
}); err != nil {
|
||||
return wopanRequestError("rename", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
if d.client == nil {
|
||||
return fmt.Errorf("wopan remove: driver not initialized")
|
||||
}
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return fmt.Errorf("wopan remove: empty file id")
|
||||
}
|
||||
deleteID := fileID
|
||||
if cached := d.cachedDeleteFileID(fileID); cached != "" {
|
||||
deleteID = cached
|
||||
}
|
||||
if err := d.deleteFileByObjectID(ctx, deleteID); err != nil {
|
||||
return fmt.Errorf("wopan remove: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) RemoveSource(ctx context.Context, source drives.SourceFile) error {
|
||||
if d.client == nil {
|
||||
return fmt.Errorf("wopan remove: driver not initialized")
|
||||
}
|
||||
fileID := strings.TrimSpace(source.FileID)
|
||||
if fileID == "" {
|
||||
return fmt.Errorf("wopan remove: empty file id")
|
||||
}
|
||||
deleteID, err := d.resolveDeleteFileID(ctx, source)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := d.deleteFileByObjectID(ctx, deleteID); err != nil {
|
||||
return fmt.Errorf("wopan remove: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) deleteFileByObjectID(ctx context.Context, fileID string) error {
|
||||
if err := d.client.DeleteFile(d.spaceType(), nil, []string{fileID}, func(req *resty.Request) {
|
||||
req.SetContext(ctx)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) resolveDeleteFileID(ctx context.Context, source drives.SourceFile) (string, error) {
|
||||
fileID := strings.TrimSpace(source.FileID)
|
||||
if fileID == "" {
|
||||
return "", fmt.Errorf("wopan remove: empty file id")
|
||||
}
|
||||
if cached := d.cachedDeleteFileID(fileID); cached != "" {
|
||||
return cached, nil
|
||||
}
|
||||
parentID := strings.TrimSpace(source.ParentID)
|
||||
if parentID == "" {
|
||||
return fileID, nil
|
||||
}
|
||||
return d.findDeleteFileIDInParent(ctx, parentID, source)
|
||||
}
|
||||
|
||||
func (d *Driver) findDeleteFileIDInParent(ctx context.Context, parentID string, source drives.SourceFile) (string, error) {
|
||||
d.listMu.Lock()
|
||||
defer d.listMu.Unlock()
|
||||
|
||||
pageNum := 0
|
||||
pageSize := 100
|
||||
for {
|
||||
var data *sdk.QueryAllFilesData
|
||||
for attempt := 0; ; attempt++ {
|
||||
if err := d.waitForListSlotLocked(ctx); err != nil {
|
||||
return "", err
|
||||
}
|
||||
var err error
|
||||
data, err = d.client.QueryAllFiles(d.spaceType(), parentID, pageNum, pageSize, 0, d.familyID, func(req *resty.Request) {
|
||||
req.SetContext(ctx)
|
||||
})
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
err = wopanRequestError("resolve delete id", err)
|
||||
wait, ok := drives.RateLimitRetryAfter(err)
|
||||
if !ok {
|
||||
return "", err
|
||||
}
|
||||
if wait <= 0 {
|
||||
wait = d.listCooldown
|
||||
}
|
||||
log.Printf("[wopan] resolve delete id cooling down drive=%s parent=%s page=%d cooldown=%s attempt=%d err=%v",
|
||||
d.id, parentID, pageNum, wait, attempt+1, err)
|
||||
if err := sleepContext(ctx, wait); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
for _, f := range data.Files {
|
||||
d.rememberFileID(f)
|
||||
if id, ok := deleteFileIDFromWopanFile(f, source); ok {
|
||||
return id, nil
|
||||
}
|
||||
}
|
||||
if len(data.Files) < pageSize {
|
||||
break
|
||||
}
|
||||
pageNum++
|
||||
}
|
||||
return "", fmt.Errorf("wopan remove: source file %q not found under parent %q", source.FileID, parentID)
|
||||
}
|
||||
|
||||
func (d *Driver) EnsureDir(ctx context.Context, pathFromRoot string) (string, error) {
|
||||
parts := splitPath(pathFromRoot)
|
||||
currentID := d.rootID
|
||||
@@ -154,9 +347,11 @@ func (d *Driver) EnsureDir(ctx context.Context, pathFromRoot string) (string, er
|
||||
return "", err
|
||||
}
|
||||
if childID == "" {
|
||||
resp, err := d.client.CreateDirectory(d.spaceType(), currentID, name, d.familyID)
|
||||
resp, err := d.client.CreateDirectory(d.spaceType(), currentID, name, d.familyID, func(req *resty.Request) {
|
||||
req.SetContext(ctx)
|
||||
})
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("wopan mkdir %s: %w", name, err)
|
||||
return "", wopanRequestError("mkdir "+name, err)
|
||||
}
|
||||
childID = resp.Id
|
||||
}
|
||||
@@ -190,9 +385,12 @@ func fileToEntry(f *sdk.File, parentID string) drives.Entry {
|
||||
mod, _ := time.Parse("2006-01-02 15:04:05", f.CreateTime)
|
||||
name := f.Name
|
||||
isDir := f.Type == 0
|
||||
id := f.Fid
|
||||
id := f.Id
|
||||
if !isDir && f.Fid != "" {
|
||||
id = f.Fid
|
||||
}
|
||||
if id == "" {
|
||||
id = f.Id
|
||||
id = f.Fid
|
||||
}
|
||||
if isDir && !strings.HasSuffix(name, "/") {
|
||||
// 不改 name,只标志
|
||||
@@ -208,6 +406,128 @@ func fileToEntry(f *sdk.File, parentID string) drives.Entry {
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Driver) rememberFileID(f *sdk.File) {
|
||||
if f == nil || f.Type == 0 {
|
||||
return
|
||||
}
|
||||
objectID := strings.TrimSpace(f.Id)
|
||||
fid := strings.TrimSpace(f.Fid)
|
||||
if objectID == "" {
|
||||
return
|
||||
}
|
||||
d.fileIDMu.Lock()
|
||||
if d.fidToID == nil {
|
||||
d.fidToID = make(map[string]string)
|
||||
}
|
||||
d.fidToID[objectID] = objectID
|
||||
if fid != "" {
|
||||
d.fidToID[fid] = objectID
|
||||
}
|
||||
d.fileIDMu.Unlock()
|
||||
}
|
||||
|
||||
func (d *Driver) rememberFIDMapping(fid, objectID string) {
|
||||
fid = strings.TrimSpace(fid)
|
||||
objectID = strings.TrimSpace(objectID)
|
||||
if fid == "" || objectID == "" {
|
||||
return
|
||||
}
|
||||
d.fileIDMu.Lock()
|
||||
if d.fidToID == nil {
|
||||
d.fidToID = make(map[string]string)
|
||||
}
|
||||
d.fidToID[fid] = objectID
|
||||
d.fidToID[objectID] = objectID
|
||||
d.fileIDMu.Unlock()
|
||||
}
|
||||
|
||||
func (d *Driver) cachedDeleteFileID(fileID string) string {
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return ""
|
||||
}
|
||||
d.fileIDMu.RLock()
|
||||
defer d.fileIDMu.RUnlock()
|
||||
return strings.TrimSpace(d.fidToID[fileID])
|
||||
}
|
||||
|
||||
func deleteFileIDFromWopanFile(f *sdk.File, source drives.SourceFile) (string, bool) {
|
||||
if f == nil || f.Type == 0 {
|
||||
return "", false
|
||||
}
|
||||
sourceID := strings.TrimSpace(source.FileID)
|
||||
if sourceID == "" {
|
||||
return "", false
|
||||
}
|
||||
objectID := strings.TrimSpace(f.Id)
|
||||
fid := strings.TrimSpace(f.Fid)
|
||||
if objectID == "" {
|
||||
return "", false
|
||||
}
|
||||
if sourceID != objectID && sourceID != fid {
|
||||
return "", false
|
||||
}
|
||||
return objectID, true
|
||||
}
|
||||
|
||||
func (d *Driver) waitForListSlotLocked(ctx context.Context) error {
|
||||
if d.listInterval <= 0 || d.lastListAt.IsZero() {
|
||||
d.lastListAt = time.Now()
|
||||
return ctx.Err()
|
||||
}
|
||||
next := d.lastListAt.Add(d.listInterval)
|
||||
now := time.Now()
|
||||
if now.Before(next) {
|
||||
if err := sleepContext(ctx, next.Sub(now)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
d.lastListAt = time.Now()
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
func sleepContext(ctx context.Context, d time.Duration) error {
|
||||
if d <= 0 {
|
||||
return ctx.Err()
|
||||
}
|
||||
timer := time.NewTimer(d)
|
||||
defer timer.Stop()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-timer.C:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func wopanRequestError(step string, err error) error {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
wrapped := fmt.Errorf("wopan %s: %w", step, err)
|
||||
if isWopanRateLimitError(err) {
|
||||
return &drives.RateLimitError{
|
||||
Provider: "wopan",
|
||||
Err: wrapped,
|
||||
}
|
||||
}
|
||||
return wrapped
|
||||
}
|
||||
|
||||
func isWopanRateLimitError(err error) bool {
|
||||
if err == nil || errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||
return false
|
||||
}
|
||||
return drives.ErrorMentionsHTTPStatus(err,
|
||||
http.StatusTooManyRequests,
|
||||
http.StatusInternalServerError,
|
||||
http.StatusBadGateway,
|
||||
http.StatusServiceUnavailable,
|
||||
http.StatusGatewayTimeout,
|
||||
509,
|
||||
)
|
||||
}
|
||||
|
||||
func guessMime(name string) string {
|
||||
ext := strings.ToLower(path.Ext(name))
|
||||
switch ext {
|
||||
@@ -229,3 +549,5 @@ func guessMime(name string) string {
|
||||
|
||||
// 确保实现接口
|
||||
var _ drives.Drive = (*Driver)(nil)
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
var _ drives.SourceRemover = (*Driver)(nil)
|
||||
|
||||
@@ -0,0 +1,113 @@
|
||||
package wopan
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
sdk "github.com/OpenListTeam/wopan-sdk-go"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
func TestFileToEntryUsesDirectoryIDAndFileFID(t *testing.T) {
|
||||
dir := fileToEntry(&sdk.File{
|
||||
Id: "dir-object-id",
|
||||
Fid: "0",
|
||||
Type: 0,
|
||||
Name: "collection",
|
||||
}, "root")
|
||||
if !dir.IsDir {
|
||||
t.Fatal("directory entry IsDir = false")
|
||||
}
|
||||
if dir.ID != "dir-object-id" {
|
||||
t.Fatalf("directory id = %q, want object id", dir.ID)
|
||||
}
|
||||
|
||||
file := fileToEntry(&sdk.File{
|
||||
Id: "file-object-id",
|
||||
Fid: "fid/with/slash",
|
||||
Type: 1,
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
}, "dir-object-id")
|
||||
if file.IsDir {
|
||||
t.Fatal("file entry IsDir = true")
|
||||
}
|
||||
if file.ID != "fid/with/slash" {
|
||||
t.Fatalf("file id = %q, want fid for download", file.ID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteFileIDFromWopanFileUsesObjectIDForFID(t *testing.T) {
|
||||
got, ok := deleteFileIDFromWopanFile(&sdk.File{
|
||||
Id: "file-object-id",
|
||||
Fid: "fid/with/slash",
|
||||
Type: 1,
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
}, drives.SourceFile{
|
||||
FileID: "fid/with/slash",
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
})
|
||||
if !ok {
|
||||
t.Fatal("delete file id not resolved")
|
||||
}
|
||||
if got != "file-object-id" {
|
||||
t.Fatalf("delete file id = %q, want object id", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteFileIDFromWopanFileAcceptsObjectID(t *testing.T) {
|
||||
got, ok := deleteFileIDFromWopanFile(&sdk.File{
|
||||
Id: "file-object-id",
|
||||
Fid: "fid-1",
|
||||
Type: 1,
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
}, drives.SourceFile{
|
||||
FileID: "file-object-id",
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
})
|
||||
if !ok {
|
||||
t.Fatal("delete file id not resolved")
|
||||
}
|
||||
if got != "file-object-id" {
|
||||
t.Fatalf("delete file id = %q, want object id", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteFileIDFromWopanFileRejectsIDMismatch(t *testing.T) {
|
||||
if _, ok := deleteFileIDFromWopanFile(&sdk.File{
|
||||
Id: "file-object-id",
|
||||
Fid: "fid-1",
|
||||
Type: 1,
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
}, drives.SourceFile{
|
||||
FileID: "other-fid",
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
}); ok {
|
||||
t.Fatal("delete file id resolved despite id mismatch")
|
||||
}
|
||||
}
|
||||
|
||||
func TestWopanRequestErrorWrapsRateLimit(t *testing.T) {
|
||||
err := wopanRequestError("list", errors.New("request failed with status: 429 Too Many Requests"))
|
||||
var rateLimit *drives.RateLimitError
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want RateLimitError", err)
|
||||
}
|
||||
if rateLimit.Provider != "wopan" {
|
||||
t.Fatalf("provider = %q, want wopan", rateLimit.Provider)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWopanRequestErrorLeavesNormalErrors(t *testing.T) {
|
||||
err := wopanRequestError("download url", errors.New("invalid access token"))
|
||||
var rateLimit *drives.RateLimitError
|
||||
if errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want non-rate-limit error", err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,349 @@
|
||||
package wopan
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-resty/resty/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultQRCodeAPIBase = "https://panservice.mail.wo.cn/wohome/open/v1/QRCode"
|
||||
defaultQRCodeClient = "1001000021"
|
||||
)
|
||||
|
||||
type QRConfig struct {
|
||||
APIBaseURL string
|
||||
HTTPClient *http.Client
|
||||
Now func() time.Time
|
||||
}
|
||||
|
||||
type QRClient struct {
|
||||
apiBase string
|
||||
client *resty.Client
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
type QRCodeSession struct {
|
||||
UUID string `json:"uuid"`
|
||||
QRImageDataURL string `json:"qrImageDataUrl"`
|
||||
ExpiresAt string `json:"expiresAt,omitempty"`
|
||||
}
|
||||
|
||||
type QRCodeStatus struct {
|
||||
State int `json:"state"`
|
||||
StatusText string `json:"statusText"`
|
||||
AccessToken string `json:"accessToken,omitempty"`
|
||||
RefreshToken string `json:"refreshToken,omitempty"`
|
||||
FamilyID string `json:"familyID,omitempty"`
|
||||
}
|
||||
|
||||
func NewQRClient(c QRConfig) *QRClient {
|
||||
apiBase := strings.TrimRight(strings.TrimSpace(c.APIBaseURL), "/")
|
||||
if apiBase == "" {
|
||||
apiBase = defaultQRCodeAPIBase
|
||||
}
|
||||
httpClient := c.HTTPClient
|
||||
if httpClient == nil {
|
||||
httpClient = &http.Client{Timeout: 20 * time.Second}
|
||||
}
|
||||
now := c.Now
|
||||
if now == nil {
|
||||
now = time.Now
|
||||
}
|
||||
return &QRClient{
|
||||
apiBase: apiBase,
|
||||
client: resty.NewWithClient(httpClient).
|
||||
SetTimeout(20*time.Second).
|
||||
SetHeader("Accept", "application/json"),
|
||||
now: now,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *QRClient) Generate(ctx context.Context) (QRCodeSession, error) {
|
||||
var envelope qrEnvelope
|
||||
res, err := c.request(ctx).
|
||||
SetResult(&envelope).
|
||||
Get(c.apiBase + "/generate")
|
||||
if err != nil {
|
||||
return QRCodeSession{}, err
|
||||
}
|
||||
if res.IsError() {
|
||||
return QRCodeSession{}, qrAPIError(envelope.message(), res.StatusCode())
|
||||
}
|
||||
|
||||
var result qrGenerateResult
|
||||
if err := decodeResult(envelope.Result, &result); err != nil {
|
||||
return QRCodeSession{}, err
|
||||
}
|
||||
result.UUID = strings.TrimSpace(result.UUID)
|
||||
result.Image = strings.TrimSpace(result.Image)
|
||||
if result.UUID == "" {
|
||||
return QRCodeSession{}, errors.New("wopan qr: empty uuid")
|
||||
}
|
||||
if result.Image == "" {
|
||||
return QRCodeSession{}, errors.New("wopan qr: empty image")
|
||||
}
|
||||
return QRCodeSession{
|
||||
UUID: result.UUID,
|
||||
QRImageDataURL: qrImageDataURL(result.Image),
|
||||
ExpiresAt: c.now().Add(60 * time.Second).Format(time.RFC3339),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *QRClient) Poll(ctx context.Context, uuid string) (QRCodeStatus, error) {
|
||||
uuid = strings.TrimSpace(uuid)
|
||||
if uuid == "" {
|
||||
return QRCodeStatus{}, errors.New("uuid is required")
|
||||
}
|
||||
|
||||
var envelope qrEnvelope
|
||||
res, err := c.request(ctx).
|
||||
SetQueryParam("uuid", uuid).
|
||||
SetResult(&envelope).
|
||||
Get(c.apiBase + "/query")
|
||||
if err != nil {
|
||||
return QRCodeStatus{}, err
|
||||
}
|
||||
if res.IsError() {
|
||||
return QRCodeStatus{}, qrAPIError(envelope.message(), res.StatusCode())
|
||||
}
|
||||
|
||||
result, err := decodeResultMap(envelope.Result)
|
||||
if err != nil {
|
||||
return QRCodeStatus{}, err
|
||||
}
|
||||
state := intValue(result["state"])
|
||||
status := QRCodeStatus{
|
||||
State: state,
|
||||
StatusText: qrStateText(state),
|
||||
}
|
||||
if state != 3 {
|
||||
return status, nil
|
||||
}
|
||||
|
||||
status.AccessToken = findStringByKeys(result, "access_token", "accessToken", "token", "tokenValue")
|
||||
status.RefreshToken = findStringByKeys(result, "refresh_token", "refreshToken")
|
||||
status.FamilyID = findStringByKeys(result, "family_id", "familyId", "familyID", "defaultFamilyId", "defaultHomeId", "homeId")
|
||||
if status.AccessToken == "" || status.RefreshToken == "" {
|
||||
missing := make([]string, 0, 2)
|
||||
if status.AccessToken == "" {
|
||||
missing = append(missing, "access_token")
|
||||
}
|
||||
if status.RefreshToken == "" {
|
||||
missing = append(missing, "refresh_token")
|
||||
}
|
||||
return QRCodeStatus{}, fmt.Errorf("wopan qr: login succeeded but missing %s; available keys: %s",
|
||||
strings.Join(missing, ", "), strings.Join(collectJSONKeys(result), ", "))
|
||||
}
|
||||
return status, nil
|
||||
}
|
||||
|
||||
func (c *QRClient) request(ctx context.Context) *resty.Request {
|
||||
return c.client.R().
|
||||
SetContext(ctx).
|
||||
SetHeaders(map[string]string{
|
||||
"client-id": defaultQRCodeClient,
|
||||
"x-yp-client-id": defaultQRCodeClient,
|
||||
"Accept": "application/json",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
})
|
||||
}
|
||||
|
||||
type qrEnvelope struct {
|
||||
Meta qrMeta `json:"meta"`
|
||||
Result json.RawMessage `json:"result"`
|
||||
Code any `json:"code,omitempty"`
|
||||
Message string `json:"message,omitempty"`
|
||||
Msg string `json:"msg,omitempty"`
|
||||
}
|
||||
|
||||
type qrMeta struct {
|
||||
Code any `json:"code,omitempty"`
|
||||
Message string `json:"message,omitempty"`
|
||||
Msg string `json:"msg,omitempty"`
|
||||
}
|
||||
|
||||
type qrGenerateResult struct {
|
||||
UUID string `json:"uuid"`
|
||||
Image string `json:"image"`
|
||||
}
|
||||
|
||||
func (e qrEnvelope) message() string {
|
||||
for _, s := range []string{e.Message, e.Msg, e.Meta.Message, e.Meta.Msg} {
|
||||
if strings.TrimSpace(s) != "" {
|
||||
return strings.TrimSpace(s)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func decodeResult(raw json.RawMessage, dst any) error {
|
||||
if len(raw) == 0 || string(raw) == "null" {
|
||||
return errors.New("wopan qr: empty result")
|
||||
}
|
||||
if err := json.Unmarshal(raw, dst); err != nil {
|
||||
return fmt.Errorf("wopan qr: decode result: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func decodeResultMap(raw json.RawMessage) (map[string]any, error) {
|
||||
var result map[string]any
|
||||
if err := decodeResult(raw, &result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if result == nil {
|
||||
return nil, errors.New("wopan qr: empty result")
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func qrImageDataURL(image string) string {
|
||||
image = strings.TrimSpace(image)
|
||||
if strings.HasPrefix(strings.ToLower(image), "data:image/") {
|
||||
return image
|
||||
}
|
||||
return "data:image/png;base64," + image
|
||||
}
|
||||
|
||||
func qrAPIError(message string, httpStatus int) error {
|
||||
message = strings.TrimSpace(message)
|
||||
if message == "" {
|
||||
message = fmt.Sprintf("HTTP %d", httpStatus)
|
||||
}
|
||||
return errors.New(message)
|
||||
}
|
||||
|
||||
func qrStateText(state int) string {
|
||||
switch state {
|
||||
case 1:
|
||||
return "等待扫码"
|
||||
case 2:
|
||||
return "已扫码,请在联通网盘 App 确认"
|
||||
case 3:
|
||||
return "登录成功"
|
||||
case 4:
|
||||
return "二维码已过期"
|
||||
default:
|
||||
return "未知状态"
|
||||
}
|
||||
}
|
||||
|
||||
func intValue(v any) int {
|
||||
switch x := v.(type) {
|
||||
case int:
|
||||
return x
|
||||
case int64:
|
||||
return int(x)
|
||||
case float64:
|
||||
return int(x)
|
||||
case json.Number:
|
||||
n, _ := x.Int64()
|
||||
return int(n)
|
||||
case string:
|
||||
n, _ := strconv.Atoi(strings.TrimSpace(x))
|
||||
return n
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func findStringByKeys(v any, keys ...string) string {
|
||||
targets := make(map[string]struct{}, len(keys))
|
||||
for _, key := range keys {
|
||||
targets[normalizeJSONKey(key)] = struct{}{}
|
||||
}
|
||||
return findStringByNormalizedKeys(v, targets)
|
||||
}
|
||||
|
||||
func findStringByNormalizedKeys(v any, targets map[string]struct{}) string {
|
||||
switch x := v.(type) {
|
||||
case map[string]any:
|
||||
for key, value := range x {
|
||||
if _, ok := targets[normalizeJSONKey(key)]; ok {
|
||||
if s := stringValue(value); s != "" {
|
||||
return s
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, value := range x {
|
||||
if s := findStringByNormalizedKeys(value, targets); s != "" {
|
||||
return s
|
||||
}
|
||||
}
|
||||
case []any:
|
||||
for _, value := range x {
|
||||
if s := findStringByNormalizedKeys(value, targets); s != "" {
|
||||
return s
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func stringValue(v any) string {
|
||||
switch x := v.(type) {
|
||||
case string:
|
||||
return strings.TrimSpace(x)
|
||||
case int:
|
||||
return strconv.Itoa(x)
|
||||
case int64:
|
||||
return strconv.FormatInt(x, 10)
|
||||
case float64:
|
||||
if x == float64(int64(x)) {
|
||||
return strconv.FormatInt(int64(x), 10)
|
||||
}
|
||||
return strconv.FormatFloat(x, 'f', -1, 64)
|
||||
case json.Number:
|
||||
return strings.TrimSpace(x.String())
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeJSONKey(key string) string {
|
||||
key = strings.ToLower(strings.TrimSpace(key))
|
||||
key = strings.ReplaceAll(key, "_", "")
|
||||
key = strings.ReplaceAll(key, "-", "")
|
||||
key = strings.ReplaceAll(key, " ", "")
|
||||
return key
|
||||
}
|
||||
|
||||
func collectJSONKeys(v any) []string {
|
||||
seen := map[string]struct{}{}
|
||||
var walk func(any)
|
||||
walk = func(value any) {
|
||||
switch x := value.(type) {
|
||||
case map[string]any:
|
||||
for key, child := range x {
|
||||
if strings.TrimSpace(key) != "" {
|
||||
seen[key] = struct{}{}
|
||||
}
|
||||
walk(child)
|
||||
}
|
||||
case []any:
|
||||
for _, child := range x {
|
||||
walk(child)
|
||||
}
|
||||
}
|
||||
}
|
||||
walk(v)
|
||||
|
||||
keys := make([]string, 0, len(seen))
|
||||
for key := range seen {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
if len(keys) > 16 {
|
||||
keys = append(keys[:16], "...")
|
||||
}
|
||||
return keys
|
||||
}
|
||||
@@ -0,0 +1,128 @@
|
||||
package wopan
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestQRCodeGenerateUsesServiceImage(t *testing.T) {
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if r.URL.Path != "/QRCode/generate" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
if r.Header.Get("client-id") != defaultQRCodeClient {
|
||||
t.Fatalf("client-id = %q, want %q", r.Header.Get("client-id"), defaultQRCodeClient)
|
||||
}
|
||||
if r.Header.Get("x-yp-client-id") != defaultQRCodeClient {
|
||||
t.Fatalf("x-yp-client-id = %q, want %q", r.Header.Get("x-yp-client-id"), defaultQRCodeClient)
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"meta": map[string]string{"code": "0000", "message": "ok"},
|
||||
"result": map[string]string{
|
||||
"uuid": "uuid-1",
|
||||
"image": "iVBORw0KGgo=",
|
||||
},
|
||||
})
|
||||
}))
|
||||
t.Cleanup(api.Close)
|
||||
|
||||
got, err := NewQRClient(QRConfig{APIBaseURL: api.URL + "/QRCode"}).Generate(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("Generate() error = %v", err)
|
||||
}
|
||||
if got.UUID != "uuid-1" {
|
||||
t.Fatalf("uuid = %q, want uuid-1", got.UUID)
|
||||
}
|
||||
if got.QRImageDataURL != "data:image/png;base64,iVBORw0KGgo=" {
|
||||
t.Fatalf("qrImageDataUrl = %q, want PNG data URL", got.QRImageDataURL)
|
||||
}
|
||||
if got.ExpiresAt == "" {
|
||||
t.Fatalf("expiresAt is empty")
|
||||
}
|
||||
}
|
||||
|
||||
func TestQRCodePollPending(t *testing.T) {
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if r.URL.Path != "/QRCode/query" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
if r.URL.Query().Get("uuid") != "uuid-1" {
|
||||
t.Fatalf("uuid query = %q, want uuid-1", r.URL.Query().Get("uuid"))
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"meta": map[string]string{"code": "0000", "message": "ok"},
|
||||
"result": map[string]any{
|
||||
"state": 1,
|
||||
"token": nil,
|
||||
"refreshToken": nil,
|
||||
},
|
||||
})
|
||||
}))
|
||||
t.Cleanup(api.Close)
|
||||
|
||||
got, err := NewQRClient(QRConfig{APIBaseURL: api.URL + "/QRCode"}).Poll(context.Background(), "uuid-1")
|
||||
if err != nil {
|
||||
t.Fatalf("Poll() error = %v", err)
|
||||
}
|
||||
if got.State != 1 || got.StatusText != "等待扫码" || got.AccessToken != "" || got.RefreshToken != "" {
|
||||
t.Fatalf("status = %#v, want pending without tokens", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQRCodePollSuccessMapsTokenFields(t *testing.T) {
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if r.URL.Path != "/QRCode/query" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"meta": map[string]string{"code": "0000", "message": "ok"},
|
||||
"result": map[string]any{
|
||||
"state": 3,
|
||||
"token": "access-1",
|
||||
"refreshToken": "refresh-1",
|
||||
},
|
||||
})
|
||||
}))
|
||||
t.Cleanup(api.Close)
|
||||
|
||||
got, err := NewQRClient(QRConfig{APIBaseURL: api.URL + "/QRCode"}).Poll(context.Background(), "uuid-1")
|
||||
if err != nil {
|
||||
t.Fatalf("Poll() error = %v", err)
|
||||
}
|
||||
if got.State != 3 || got.AccessToken != "access-1" || got.RefreshToken != "refresh-1" {
|
||||
t.Fatalf("status = %#v, want token and refreshToken mapped", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQRCodePollSuccessReportsMissingTokenKeys(t *testing.T) {
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"meta": map[string]string{"code": "0000", "message": "ok"},
|
||||
"result": map[string]any{
|
||||
"state": 3,
|
||||
"user": map[string]string{"name": "demo"},
|
||||
},
|
||||
})
|
||||
}))
|
||||
t.Cleanup(api.Close)
|
||||
|
||||
_, err := NewQRClient(QRConfig{APIBaseURL: api.URL + "/QRCode"}).Poll(context.Background(), "uuid-1")
|
||||
if err == nil {
|
||||
t.Fatal("Poll() error is nil, want missing token error")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "missing access_token, refresh_token") ||
|
||||
!strings.Contains(err.Error(), "available keys") {
|
||||
t.Fatalf("error = %q, want missing token keys", err.Error())
|
||||
}
|
||||
}
|
||||
@@ -149,6 +149,28 @@ func (w *Worker) Status() TaskStatus {
|
||||
return status
|
||||
}
|
||||
|
||||
// WaitIdle blocks until the fingerprint queue is empty and no item is being processed.
|
||||
func (w *Worker) WaitIdle(ctx context.Context) error {
|
||||
if w == nil {
|
||||
return nil
|
||||
}
|
||||
if w.queue.lengthExcluding("") == 0 {
|
||||
return nil
|
||||
}
|
||||
ticker := time.NewTicker(200 * time.Millisecond)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-ticker.C:
|
||||
if w.queue.lengthExcluding("") == 0 {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Worker) processQueued(ctx context.Context, v *catalog.Video) {
|
||||
defer w.queue.release(v.ID)
|
||||
if w.Catalog == nil || w.Drive == nil || v == nil || v.ID == "" {
|
||||
@@ -327,11 +349,74 @@ func readHTTPRange(ctx context.Context, hc *http.Client, link *drives.StreamLink
|
||||
return data, nil
|
||||
}
|
||||
}
|
||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024))
|
||||
if remoteRangeResponseLooksRateLimited(link.URL, resp.StatusCode, body) {
|
||||
return nil, &drives.RateLimitError{
|
||||
Provider: "fingerprint",
|
||||
RetryAfter: parseRetryAfter(resp.Header.Get("Retry-After")),
|
||||
Err: fmt.Errorf("remote sample rate limited: status=%d body=%s", resp.StatusCode, strings.TrimSpace(string(body))),
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("fingerprint: range request got status=%d for bytes=%d-%d", resp.StatusCode, r.start, end)
|
||||
}
|
||||
return io.ReadAll(io.LimitReader(resp.Body, r.length))
|
||||
}
|
||||
|
||||
func remoteRangeResponseLooksRateLimited(rawURL string, status int, body []byte) bool {
|
||||
if status == http.StatusTooManyRequests {
|
||||
return true
|
||||
}
|
||||
if isWopanMediaURL(rawURL) && (status == http.StatusForbidden || status == http.StatusTooManyRequests ||
|
||||
status == http.StatusInternalServerError || status == http.StatusBadGateway ||
|
||||
status == http.StatusServiceUnavailable || status == http.StatusGatewayTimeout ||
|
||||
status == 509) {
|
||||
return true
|
||||
}
|
||||
if isGuangYaPanMediaURL(rawURL) && (status == http.StatusForbidden || status == http.StatusTooManyRequests ||
|
||||
status == http.StatusInternalServerError || status == http.StatusBadGateway ||
|
||||
status == http.StatusServiceUnavailable || status == http.StatusGatewayTimeout ||
|
||||
status == 509) {
|
||||
return true
|
||||
}
|
||||
if status == http.StatusForbidden && isGoogleDriveMediaURL(rawURL) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isWopanMediaURL(rawURL string) bool {
|
||||
u, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
host := strings.ToLower(u.Hostname())
|
||||
path := strings.ToLower(u.Path)
|
||||
return (strings.HasSuffix(host, "pan.wo.cn") ||
|
||||
strings.HasSuffix(host, "smartont.net") ||
|
||||
strings.Contains(host, "wo.cn")) &&
|
||||
strings.Contains(path, "/openapi/download")
|
||||
}
|
||||
|
||||
func isGuangYaPanMediaURL(rawURL string) bool {
|
||||
u, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
host := strings.ToLower(u.Hostname())
|
||||
return strings.HasSuffix(host, "guangyacdn.com") ||
|
||||
strings.HasSuffix(host, "guangyapan.com")
|
||||
}
|
||||
|
||||
func isGoogleDriveMediaURL(rawURL string) bool {
|
||||
u, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
host := strings.ToLower(u.Host)
|
||||
path := strings.ToLower(u.Path)
|
||||
return strings.Contains(host, "googleapis.com") && strings.Contains(path, "/drive/")
|
||||
}
|
||||
|
||||
func parseRetryAfter(raw string) time.Duration {
|
||||
raw = strings.TrimSpace(raw)
|
||||
if raw == "" {
|
||||
|
||||
@@ -2,6 +2,7 @@ package fingerprint
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
@@ -85,6 +86,75 @@ func TestComputeRemoteUsesRangeSamples(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeRemote429ReturnsRateLimit(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Retry-After", "60")
|
||||
w.WriteHeader(http.StatusTooManyRequests)
|
||||
_, _ = w.Write([]byte(`{"error":{"code":429}}`))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
drv := &fakeDrive{paths: map[string]string{"remote": srv.URL + "/video.mp4"}}
|
||||
_, err := Compute(ctx, drv, &catalog.Video{ID: "remote", FileID: "remote", Size: 1024 * 1024}, Config{
|
||||
SampleSizeBytes: 4,
|
||||
FullHashMaxSize: 8,
|
||||
HTTPClient: srv.Client(),
|
||||
}, srv.Client())
|
||||
if err == nil {
|
||||
t.Fatal("compute succeeded, want rate limit")
|
||||
}
|
||||
var rateLimit *drives.RateLimitError
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want RateLimitError", err)
|
||||
}
|
||||
if rateLimit.RetryAfter != time.Minute {
|
||||
t.Fatalf("retry after = %s, want 1m", rateLimit.RetryAfter)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWopanRemoteRangeErrorsLookRateLimited(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
rawURL string
|
||||
status int
|
||||
}{
|
||||
{rawURL: "https://gxdownload.pan.wo.cn:8445/openapi/download?fid=encoded", status: http.StatusForbidden},
|
||||
{rawURL: "https://du.smartont.net:8445/openapi/download?fid=encoded", status: http.StatusServiceUnavailable},
|
||||
{rawURL: "https://du.smartont.net:8445/openapi/download?fid=encoded", status: 509},
|
||||
} {
|
||||
if !remoteRangeResponseLooksRateLimited(tc.rawURL, tc.status, nil) {
|
||||
t.Fatalf("remoteRangeResponseLooksRateLimited(%q, %d) = false, want true", tc.rawURL, tc.status)
|
||||
}
|
||||
}
|
||||
if remoteRangeResponseLooksRateLimited("https://example.com/video.mp4", http.StatusForbidden, nil) {
|
||||
t.Fatal("generic 403 should not be treated as wopan rate limit")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGuangYaPanRemoteRangeErrorsLookRateLimited(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
rawURL string
|
||||
status int
|
||||
}{
|
||||
{rawURL: "https://txgz02-httpdown.guangyacdn.com/download/?fid=encoded", status: http.StatusForbidden},
|
||||
{rawURL: "https://txgz02-httpdown.guangyacdn.com/download/?fid=encoded", status: http.StatusServiceUnavailable},
|
||||
{rawURL: "https://txgz02-httpdown.guangyacdn.com/download/?fid=encoded", status: 509},
|
||||
} {
|
||||
if !remoteRangeResponseLooksRateLimited(tc.rawURL, tc.status, nil) {
|
||||
t.Fatalf("remoteRangeResponseLooksRateLimited(%q, %d) = false, want true", tc.rawURL, tc.status)
|
||||
}
|
||||
}
|
||||
if remoteRangeResponseLooksRateLimited("https://example.com/video.mp4", http.StatusForbidden, nil) {
|
||||
t.Fatal("generic 403 should not be treated as guangyapan rate limit")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGoogleDriveRemoteRangeForbiddenLooksRateLimitedByURL(t *testing.T) {
|
||||
if !remoteRangeResponseLooksRateLimited("https://www.googleapis.com/drive/v3/files/file-1?alt=media", http.StatusForbidden, nil) {
|
||||
t.Fatal("google drive media 403 should be treated as rate limit by URL and status")
|
||||
}
|
||||
}
|
||||
|
||||
type fakeDrive struct {
|
||||
paths map[string]string
|
||||
}
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
package mediaasset
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const maxPlainStemBytes = 180
|
||||
const maxLegacyFilenameBytes = 255
|
||||
|
||||
func PreviewPath(localDir, videoID string) string {
|
||||
return filepath.Join(localDir, PreviewFilename(videoID))
|
||||
}
|
||||
|
||||
func ThumbnailPath(localDir, videoID string) string {
|
||||
return ThumbnailPathInDir(filepath.Join(localDir, "thumbs"), videoID)
|
||||
}
|
||||
|
||||
func ThumbnailPathInDir(thumbDir, videoID string) string {
|
||||
return filepath.Join(thumbDir, ThumbnailFilename(videoID))
|
||||
}
|
||||
|
||||
func PreviewPathCandidates(localDir, videoID string) []string {
|
||||
return pathCandidates(localDir, videoID, ".mp4", "")
|
||||
}
|
||||
|
||||
func ThumbnailPathCandidates(localDir, videoID string) []string {
|
||||
return pathCandidates(localDir, videoID, ".jpg", "thumbs")
|
||||
}
|
||||
|
||||
func PreviewFilename(videoID string) string {
|
||||
return safeFilename(videoID, ".mp4")
|
||||
}
|
||||
|
||||
func ThumbnailFilename(videoID string) string {
|
||||
return safeFilename(videoID, ".jpg")
|
||||
}
|
||||
|
||||
func pathCandidates(localDir, videoID, ext, subdir string) []string {
|
||||
safe := safeFilename(videoID, ext)
|
||||
legacy := videoID + ext
|
||||
base := localDir
|
||||
if subdir != "" {
|
||||
base = filepath.Join(base, subdir)
|
||||
}
|
||||
out := []string{filepath.Join(base, safe)}
|
||||
if legacy != safe && isPlainSafeStem(videoID) && len([]byte(legacy)) <= maxLegacyFilenameBytes {
|
||||
out = append(out, filepath.Join(base, legacy))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func safeFilename(videoID, ext string) string {
|
||||
if isPlainSafeStem(videoID) && len([]byte(videoID))+len(ext) <= maxPlainStemBytes {
|
||||
return videoID + ext
|
||||
}
|
||||
sum := sha256.Sum256([]byte(videoID))
|
||||
return "v-" + hex.EncodeToString(sum[:]) + ext
|
||||
}
|
||||
|
||||
func isPlainSafeStem(value string) bool {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" || value == "." || value == ".." {
|
||||
return false
|
||||
}
|
||||
return !strings.ContainsAny(value, `/\`+"\x00")
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
package mediaasset
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFilenamesKeepShortSafeIDs(t *testing.T) {
|
||||
if got := ThumbnailFilename("video-1"); got != "video-1.jpg" {
|
||||
t.Fatalf("thumbnail filename = %q, want video-1.jpg", got)
|
||||
}
|
||||
if got := PreviewFilename("video-1"); got != "video-1.mp4" {
|
||||
t.Fatalf("preview filename = %q, want video-1.mp4", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFilenamesHashLongOrUnsafeIDs(t *testing.T) {
|
||||
longID := "localstorage-" + strings.Repeat("x", 240)
|
||||
got := ThumbnailFilename(longID)
|
||||
if !strings.HasPrefix(got, "v-") || !strings.HasSuffix(got, ".jpg") {
|
||||
t.Fatalf("thumbnail filename = %q, want hashed jpg", got)
|
||||
}
|
||||
if len([]byte(got)) >= len([]byte(longID+".jpg")) {
|
||||
t.Fatalf("thumbnail filename = %q should be shorter than original id", got)
|
||||
}
|
||||
|
||||
unsafe := ThumbnailFilename("dir/video")
|
||||
if unsafe == "dir/video.jpg" || strings.ContainsAny(unsafe, `/\`) {
|
||||
t.Fatalf("unsafe thumbnail filename = %q, want hashed single filename", unsafe)
|
||||
}
|
||||
}
|
||||
|
||||
func TestThumbnailPathCandidatesIncludeLegacyForHashedIDs(t *testing.T) {
|
||||
localDir := t.TempDir()
|
||||
mediumID := "localstorage-" + strings.Repeat("x", 190)
|
||||
got := ThumbnailPathCandidates(localDir, mediumID)
|
||||
if len(got) != 2 {
|
||||
t.Fatalf("candidates = %#v, want hashed and legacy paths", got)
|
||||
}
|
||||
if got[0] != ThumbnailPath(localDir, mediumID) {
|
||||
t.Fatalf("first candidate = %q, want safe path %q", got[0], ThumbnailPath(localDir, mediumID))
|
||||
}
|
||||
if filepath.Base(got[1]) != mediumID+".jpg" {
|
||||
t.Fatalf("legacy candidate = %q, want original id jpg", got[1])
|
||||
}
|
||||
}
|
||||
|
||||
func TestThumbnailPathCandidatesSkipOverlongLegacy(t *testing.T) {
|
||||
localDir := t.TempDir()
|
||||
longID := "localstorage-" + strings.Repeat("x", 240)
|
||||
got := ThumbnailPathCandidates(localDir, longID)
|
||||
if len(got) != 1 {
|
||||
t.Fatalf("candidates = %#v, want only hashed path for overlong id", got)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,280 @@
|
||||
package mediasim
|
||||
|
||||
import (
|
||||
"image"
|
||||
_ "image/gif"
|
||||
_ "image/jpeg"
|
||||
_ "image/png"
|
||||
"math"
|
||||
"os"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
const (
|
||||
ssimSampleSize = 96
|
||||
minCoreTitleRunes = 12
|
||||
)
|
||||
|
||||
var titleCoreSeparators = []string{
|
||||
" - ",
|
||||
" -- ",
|
||||
" — ",
|
||||
" – ",
|
||||
" | ",
|
||||
" | ",
|
||||
"_",
|
||||
"_",
|
||||
"-",
|
||||
"—",
|
||||
"–",
|
||||
"-",
|
||||
"|",
|
||||
}
|
||||
|
||||
// TitleSimilarity returns the best normalized Levenshtein similarity in [0, 1]
|
||||
// between the full titles and their leading core title segments.
|
||||
func TitleSimilarity(a, b string) float64 {
|
||||
leftVariants := titleVariants(a)
|
||||
rightVariants := titleVariants(b)
|
||||
if len(leftVariants) == 0 && len(rightVariants) == 0 {
|
||||
return 1
|
||||
}
|
||||
if len(leftVariants) == 0 || len(rightVariants) == 0 {
|
||||
return 0
|
||||
}
|
||||
best := 0.0
|
||||
for _, left := range leftVariants {
|
||||
for _, right := range rightVariants {
|
||||
score := normalizedLevenshteinSimilarity(left, right)
|
||||
if score > best {
|
||||
best = score
|
||||
}
|
||||
}
|
||||
}
|
||||
return best
|
||||
}
|
||||
|
||||
// TitleKeys returns the normalized full title and core-title variants used by
|
||||
// TitleSimilarity. It is intended for cheap caller-side prefiltering before
|
||||
// running the heavier Levenshtein comparison.
|
||||
func TitleKeys(value string) []string {
|
||||
return append([]string(nil), titleVariants(value)...)
|
||||
}
|
||||
|
||||
func normalizedLevenshteinSimilarity(left, right string) float64 {
|
||||
leftRunes := []rune(left)
|
||||
rightRunes := []rune(right)
|
||||
if len(leftRunes) == 0 && len(rightRunes) == 0 {
|
||||
return 1
|
||||
}
|
||||
if len(leftRunes) == 0 || len(rightRunes) == 0 {
|
||||
return 0
|
||||
}
|
||||
maxLen := len(leftRunes)
|
||||
if len(rightRunes) > maxLen {
|
||||
maxLen = len(rightRunes)
|
||||
}
|
||||
return 1 - float64(levenshtein(leftRunes, rightRunes))/float64(maxLen)
|
||||
}
|
||||
|
||||
func titleVariants(value string) []string {
|
||||
full := normalizeTitle(value)
|
||||
if full == "" {
|
||||
return nil
|
||||
}
|
||||
out := appendTitleVariant(nil, full)
|
||||
if core := normalizeTitleCore(value); core != "" && core != full {
|
||||
out = appendTitleVariant(out, core)
|
||||
}
|
||||
for _, tail := range titleTailVariants(value) {
|
||||
normalized := normalizeTitle(tail)
|
||||
if len([]rune(normalized)) >= minCoreTitleRunes {
|
||||
out = appendTitleVariant(out, normalized)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func appendTitleVariant(out []string, value string) []string {
|
||||
for _, existing := range out {
|
||||
if existing == value {
|
||||
return out
|
||||
}
|
||||
}
|
||||
return append(out, value)
|
||||
}
|
||||
|
||||
func titleTailVariants(value string) []string {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" {
|
||||
return nil
|
||||
}
|
||||
var out []string
|
||||
for _, sep := range []string{"@", "@"} {
|
||||
if idx := strings.LastIndex(value, sep); idx >= 0 && idx+len(sep) < len(value) {
|
||||
out = append(out, strings.TrimSpace(value[idx+len(sep):]))
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func normalizeTitleCore(value string) string {
|
||||
head := strings.TrimSpace(value)
|
||||
for _, sep := range titleCoreSeparators {
|
||||
if idx := strings.Index(head, sep); idx > 0 {
|
||||
head = strings.TrimSpace(head[:idx])
|
||||
break
|
||||
}
|
||||
}
|
||||
normalized := normalizeTitle(head)
|
||||
if len([]rune(normalized)) < minCoreTitleRunes {
|
||||
return ""
|
||||
}
|
||||
return normalized
|
||||
}
|
||||
|
||||
func normalizeTitle(value string) string {
|
||||
value = strings.ToLower(strings.TrimSpace(value))
|
||||
for _, ext := range []string{".mp4", ".m4v", ".mkv", ".mov", ".avi", ".webm", ".ts", ".m3u8"} {
|
||||
if strings.HasSuffix(value, ext) {
|
||||
value = strings.TrimSuffix(value, ext)
|
||||
break
|
||||
}
|
||||
}
|
||||
var b strings.Builder
|
||||
for _, r := range value {
|
||||
if unicode.IsLetter(r) || unicode.IsDigit(r) {
|
||||
b.WriteRune(r)
|
||||
}
|
||||
}
|
||||
if b.Len() > 0 {
|
||||
return b.String()
|
||||
}
|
||||
return strings.Join(strings.Fields(value), "")
|
||||
}
|
||||
|
||||
func levenshtein(a, b []rune) int {
|
||||
if len(a) < len(b) {
|
||||
a, b = b, a
|
||||
}
|
||||
previous := make([]int, len(b)+1)
|
||||
current := make([]int, len(b)+1)
|
||||
for j := range previous {
|
||||
previous[j] = j
|
||||
}
|
||||
for i := 1; i <= len(a); i++ {
|
||||
current[0] = i
|
||||
for j := 1; j <= len(b); j++ {
|
||||
cost := 0
|
||||
if a[i-1] != b[j-1] {
|
||||
cost = 1
|
||||
}
|
||||
current[j] = minInt(
|
||||
previous[j]+1,
|
||||
current[j-1]+1,
|
||||
previous[j-1]+cost,
|
||||
)
|
||||
}
|
||||
previous, current = current, previous
|
||||
}
|
||||
return previous[len(b)]
|
||||
}
|
||||
|
||||
func minInt(values ...int) int {
|
||||
min := values[0]
|
||||
for _, value := range values[1:] {
|
||||
if value < min {
|
||||
min = value
|
||||
}
|
||||
}
|
||||
return min
|
||||
}
|
||||
|
||||
// ImageSSIM compares two local images using luminance SSIM over a fixed grid.
|
||||
func ImageSSIM(leftPath, rightPath string) (float64, error) {
|
||||
left, err := decodeImage(leftPath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
right, err := decodeImage(rightPath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return SSIM(left, right), nil
|
||||
}
|
||||
|
||||
func decodeImage(path string) (image.Image, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
img, _, err := image.Decode(f)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return img, nil
|
||||
}
|
||||
|
||||
// SSIM compares two images after nearest-neighbor sampling onto the same grid.
|
||||
func SSIM(left, right image.Image) float64 {
|
||||
if left == nil || right == nil {
|
||||
return 0
|
||||
}
|
||||
leftSamples := grayscaleSamples(left, ssimSampleSize, ssimSampleSize)
|
||||
rightSamples := grayscaleSamples(right, ssimSampleSize, ssimSampleSize)
|
||||
if len(leftSamples) == 0 || len(leftSamples) != len(rightSamples) {
|
||||
return 0
|
||||
}
|
||||
|
||||
var leftMean, rightMean float64
|
||||
for i := range leftSamples {
|
||||
leftMean += leftSamples[i]
|
||||
rightMean += rightSamples[i]
|
||||
}
|
||||
n := float64(len(leftSamples))
|
||||
leftMean /= n
|
||||
rightMean /= n
|
||||
|
||||
var leftVariance, rightVariance, covariance float64
|
||||
for i := range leftSamples {
|
||||
leftDelta := leftSamples[i] - leftMean
|
||||
rightDelta := rightSamples[i] - rightMean
|
||||
leftVariance += leftDelta * leftDelta
|
||||
rightVariance += rightDelta * rightDelta
|
||||
covariance += leftDelta * rightDelta
|
||||
}
|
||||
leftVariance /= n
|
||||
rightVariance /= n
|
||||
covariance /= n
|
||||
|
||||
const c1 = 6.5025 // (0.01 * 255)^2
|
||||
const c2 = 58.5225 // (0.03 * 255)^2
|
||||
denominator := (leftMean*leftMean + rightMean*rightMean + c1) * (leftVariance + rightVariance + c2)
|
||||
if denominator == 0 {
|
||||
return 0
|
||||
}
|
||||
score := ((2*leftMean*rightMean + c1) * (2*covariance + c2)) / denominator
|
||||
if math.IsNaN(score) || math.IsInf(score, 0) {
|
||||
return 0
|
||||
}
|
||||
return score
|
||||
}
|
||||
|
||||
func grayscaleSamples(img image.Image, width, height int) []float64 {
|
||||
bounds := img.Bounds()
|
||||
if bounds.Dx() <= 0 || bounds.Dy() <= 0 || width <= 0 || height <= 0 {
|
||||
return nil
|
||||
}
|
||||
out := make([]float64, 0, width*height)
|
||||
for y := 0; y < height; y++ {
|
||||
sourceY := bounds.Min.Y + y*bounds.Dy()/height
|
||||
for x := 0; x < width; x++ {
|
||||
sourceX := bounds.Min.X + x*bounds.Dx()/width
|
||||
r, g, b, _ := img.At(sourceX, sourceY).RGBA()
|
||||
out = append(out, 0.299*float64(r>>8)+0.587*float64(g>>8)+0.114*float64(b>>8))
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
package mediasim
|
||||
|
||||
import (
|
||||
"image"
|
||||
"image/color"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTitleSimilarityNormalizesPunctuationAndWhitespace(t *testing.T) {
|
||||
score := TitleSimilarity("AB-123 测试视频.mp4", "ab123测试视频")
|
||||
if score < 0.90 {
|
||||
t.Fatalf("similarity = %.3f, want >= 0.90", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTitleSimilarityUsesLeadingCoreTitle(t *testing.T) {
|
||||
score := TitleSimilarity(
|
||||
"反差极品大二女友,叫声可射~,“射进小骚逼里面~” - 性感小皮鞭",
|
||||
"反差极品大二女友,叫声可射~,“射进小骚逼里面~”",
|
||||
)
|
||||
if score < 0.99 {
|
||||
t.Fatalf("similarity = %.3f, want core-title match", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTitleSimilarityDoesNotMatchBySharedSuffixOnly(t *testing.T) {
|
||||
score := TitleSimilarity(
|
||||
"高颜值大学生宿舍自拍视频完整流出 - 同一个来源",
|
||||
"户外旅行风景记录城市夜景合集 - 同一个来源",
|
||||
)
|
||||
if score >= 0.90 {
|
||||
t.Fatalf("similarity = %.3f, want < 0.90", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTitleSimilarityRejectsDifferentTitles(t *testing.T) {
|
||||
score := TitleSimilarity("完全不同的视频标题", "another unrelated movie")
|
||||
if score >= 0.90 {
|
||||
t.Fatalf("similarity = %.3f, want < 0.90", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSSIMScoresIdenticalAndDifferentImages(t *testing.T) {
|
||||
red := solidImage(color.RGBA{R: 220, G: 20, B: 20, A: 255})
|
||||
redAgain := solidImage(color.RGBA{R: 220, G: 20, B: 20, A: 255})
|
||||
blue := solidImage(color.RGBA{R: 20, G: 20, B: 220, A: 255})
|
||||
|
||||
if score := SSIM(red, redAgain); score < 0.999 {
|
||||
t.Fatalf("identical SSIM = %.6f, want close to 1", score)
|
||||
}
|
||||
if score := SSIM(red, blue); score >= 0.95 {
|
||||
t.Fatalf("different SSIM = %.6f, want < 0.95", score)
|
||||
}
|
||||
}
|
||||
|
||||
func solidImage(c color.RGBA) image.Image {
|
||||
img := image.NewRGBA(image.Rect(0, 0, 32, 32))
|
||||
for y := 0; y < 32; y++ {
|
||||
for x := 0; x < 32; x++ {
|
||||
img.SetRGBA(x, y, c)
|
||||
}
|
||||
}
|
||||
return img
|
||||
}
|
||||
@@ -1,19 +1,19 @@
|
||||
// Package nightly orchestrates the single nightly maintenance pipeline that
|
||||
// replaces the legacy scanLoop / crawlerLoop / spider91 migrator periodic loop.
|
||||
// replaces the legacy scanLoop / crawlerLoop / crawler upload periodic loop.
|
||||
//
|
||||
// Pipeline (fired once per day at cron_hour, also via TriggerNow for admin
|
||||
// "扫描所有网盘"):
|
||||
//
|
||||
// Phase 1: for each non-spider91 cloud drive
|
||||
// scan + delete-detection + enqueue thumb + enqueue teaser
|
||||
// wait until all thumb / teaser queues are idle
|
||||
// Phase 2: if any spider91 drive configured
|
||||
// crawl + enqueue teaser for new videos
|
||||
// wait until teaser queues are idle
|
||||
// Phase 3: spider91 → cloud migration (single sweep, captcha cooldown still
|
||||
// Phase 1: for each non-crawler cloud drive
|
||||
// scan + delete-detection + enqueue thumb + enqueue preview video
|
||||
// wait until all thumb / preview-video queues are idle
|
||||
// Phase 2: if any script crawler configured
|
||||
// crawl + enqueue preview video for new videos
|
||||
// wait until preview-video queues are idle
|
||||
// Phase 3: crawler local video → cloud upload (single sweep, captcha cooldown still
|
||||
// honored within this call)
|
||||
// Phase 4: cleanup duplicate local preview/thumbnail assets after sampled
|
||||
// fingerprints have identified canonical videos
|
||||
// Phase 4: full-library duplicate video maintenance:
|
||||
// exact size+sampled_sha256 dedupe, then title/duration/thumbnail dedupe
|
||||
//
|
||||
// A 6h soft deadline guards each pipeline run; phases check deadline at their
|
||||
// boundaries and exit cleanly if exceeded (no in-flight ffmpeg / upload is
|
||||
@@ -64,32 +64,32 @@ type Config struct {
|
||||
MaxDuration time.Duration
|
||||
|
||||
// ListScanTargets returns the drive IDs to run Phase 1 on, in deterministic
|
||||
// order. Should exclude spider91 and localupload drives.
|
||||
// order. Should exclude crawler and localupload drives.
|
||||
ListScanTargets func(ctx context.Context) []string
|
||||
|
||||
// RunScan synchronously runs scan + cleanup + enqueueDriveGeneration for
|
||||
// one drive. Errors are expected to be logged inside, not surfaced.
|
||||
RunScan func(ctx context.Context, driveID string)
|
||||
|
||||
// ListSpider91Drives returns spider91 drive IDs to crawl in Phase 2.
|
||||
// Returns empty slice when no spider91 drive is configured.
|
||||
ListSpider91Drives func(ctx context.Context) []string
|
||||
// ListCrawlerDrives returns script crawler drive IDs to crawl in Phase 2.
|
||||
// Returns empty slice when no crawler is configured.
|
||||
ListCrawlerDrives func(ctx context.Context) []string
|
||||
|
||||
// RunSpider91Crawl synchronously runs one crawl cycle (downloads + thumbs +
|
||||
// teaser enqueue) for a single spider91 drive.
|
||||
RunSpider91Crawl func(ctx context.Context, driveID string)
|
||||
// RunCrawlerCrawl synchronously runs one crawl cycle (downloads + thumbs +
|
||||
// preview-video enqueue) for a single crawler drive.
|
||||
RunCrawlerCrawl func(ctx context.Context, driveID string)
|
||||
|
||||
// WaitPreviewQueuesIdle blocks until both the thumbnail and teaser queues
|
||||
// WaitPreviewQueuesIdle blocks until both the thumbnail and preview-video queues
|
||||
// across all drives are drained (queue empty + no in-flight task). It must
|
||||
// honor ctx cancellation.
|
||||
WaitPreviewQueuesIdle func(ctx context.Context) error
|
||||
|
||||
// RunMigration runs spider91migrate.Migrator.RunOnce for Phase 3.
|
||||
// RunMigration runs crawlerupload.Migrator.RunOnce for Phase 3.
|
||||
RunMigration func(ctx context.Context) error
|
||||
|
||||
// RunDedupeAssetCleanup removes generated local assets from non-canonical
|
||||
// videos in size+sampled_sha256 duplicate groups. It must not delete cloud
|
||||
// files or catalog rows.
|
||||
// RunDedupeAssetCleanup runs full-library duplicate video maintenance. It
|
||||
// removes duplicate catalog rows and local generated assets, but never
|
||||
// deletes cloud source files.
|
||||
RunDedupeAssetCleanup func(ctx context.Context) error
|
||||
|
||||
// Now is injected for tests; nil → time.Now.
|
||||
@@ -115,6 +115,7 @@ type Runner struct {
|
||||
queued bool
|
||||
startedAt time.Time
|
||||
lastFinishedAt time.Time
|
||||
currentCancel context.CancelFunc
|
||||
}
|
||||
|
||||
// New constructs a Runner. cfg is shallow-copied; defaults are applied.
|
||||
@@ -175,6 +176,28 @@ func (r *Runner) TriggerNow() bool {
|
||||
}
|
||||
}
|
||||
|
||||
// StopCurrent cancels the currently running pipeline and drops one queued
|
||||
// manual trigger, if present. It returns true when there was something to stop.
|
||||
func (r *Runner) StopCurrent() bool {
|
||||
r.stateMu.Lock()
|
||||
wasRunning := r.running
|
||||
wasQueued := r.queued
|
||||
cancel := r.currentCancel
|
||||
r.queued = false
|
||||
r.stateMu.Unlock()
|
||||
|
||||
if wasQueued {
|
||||
select {
|
||||
case <-r.trigger:
|
||||
default:
|
||||
}
|
||||
}
|
||||
if cancel != nil {
|
||||
cancel()
|
||||
}
|
||||
return wasRunning || wasQueued || cancel != nil
|
||||
}
|
||||
|
||||
func (r *Runner) Status() Status {
|
||||
r.stateMu.Lock()
|
||||
running := r.running
|
||||
@@ -232,14 +255,25 @@ func shouldRun(now time.Time, lastRunDate string) bool {
|
||||
//
|
||||
// 流水线没有总耗时上限:一直跑到 ctx 取消(进程退出)或所有 phase 完成。
|
||||
func (r *Runner) runPipelineLocked(ctx context.Context, manual bool) {
|
||||
if manual {
|
||||
r.stateMu.Lock()
|
||||
queued := r.queued
|
||||
r.stateMu.Unlock()
|
||||
if !queued {
|
||||
log.Printf("[nightly] manual trigger was canceled before start")
|
||||
return
|
||||
}
|
||||
}
|
||||
if !r.runMu.TryLock() {
|
||||
log.Printf("[nightly] another pipeline is already running, skipping this trigger")
|
||||
return
|
||||
}
|
||||
|
||||
started := r.cfg.Now()
|
||||
r.markStarted(started)
|
||||
runCtx, cancel := context.WithCancel(ctx)
|
||||
r.markStarted(started, cancel)
|
||||
defer func() {
|
||||
cancel()
|
||||
r.markFinished(r.cfg.Now())
|
||||
r.runMu.Unlock()
|
||||
}()
|
||||
@@ -250,7 +284,7 @@ func (r *Runner) runPipelineLocked(ctx context.Context, manual bool) {
|
||||
}
|
||||
log.Printf("[nightly] pipeline (%s) start", mode)
|
||||
|
||||
r.runPipeline(ctx)
|
||||
r.runPipeline(runCtx)
|
||||
|
||||
finished := r.cfg.Now()
|
||||
log.Printf("[nightly] pipeline (%s) finish; took=%s", mode, finished.Sub(started).Round(time.Second))
|
||||
@@ -264,12 +298,13 @@ func (r *Runner) runPipelineLocked(ctx context.Context, manual bool) {
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Runner) markStarted(started time.Time) {
|
||||
func (r *Runner) markStarted(started time.Time, cancel context.CancelFunc) {
|
||||
r.stateMu.Lock()
|
||||
defer r.stateMu.Unlock()
|
||||
r.running = true
|
||||
r.queued = false
|
||||
r.startedAt = started
|
||||
r.currentCancel = cancel
|
||||
}
|
||||
|
||||
func (r *Runner) markFinished(finished time.Time) {
|
||||
@@ -278,6 +313,7 @@ func (r *Runner) markFinished(finished time.Time) {
|
||||
r.running = false
|
||||
r.startedAt = time.Time{}
|
||||
r.lastFinishedAt = finished
|
||||
r.currentCancel = nil
|
||||
}
|
||||
|
||||
// runPipeline executes the three phases. It returns when the pipeline finishes
|
||||
@@ -315,23 +351,23 @@ func (r *Runner) runPipeline(ctx context.Context) {
|
||||
if r.checkDeadline(ctx, "phase 2") {
|
||||
return
|
||||
}
|
||||
spiderIDs := []string{}
|
||||
if r.cfg.ListSpider91Drives != nil {
|
||||
spiderIDs = r.cfg.ListSpider91Drives(ctx)
|
||||
crawlerIDs := []string{}
|
||||
if r.cfg.ListCrawlerDrives != nil {
|
||||
crawlerIDs = r.cfg.ListCrawlerDrives(ctx)
|
||||
}
|
||||
if len(spiderIDs) == 0 {
|
||||
log.Printf("[nightly] phase 2/3 skipped: no spider91 drive configured")
|
||||
if len(crawlerIDs) == 0 {
|
||||
log.Printf("[nightly] phase 2/3 skipped: no crawler configured")
|
||||
r.runDedupeAssetCleanupPhase(ctx)
|
||||
return
|
||||
}
|
||||
log.Printf("[nightly] phase 2: crawling %d spider91 drive(s)", len(spiderIDs))
|
||||
for _, id := range spiderIDs {
|
||||
log.Printf("[nightly] phase 2: crawling %d crawler drive(s)", len(crawlerIDs))
|
||||
for _, id := range crawlerIDs {
|
||||
if ctx.Err() != nil {
|
||||
log.Printf("[nightly] phase 2 aborted by ctx: %v", ctx.Err())
|
||||
return
|
||||
}
|
||||
log.Printf("[nightly] phase 2: crawling drive=%s", id)
|
||||
r.cfg.RunSpider91Crawl(ctx, id)
|
||||
r.cfg.RunCrawlerCrawl(ctx, id)
|
||||
}
|
||||
log.Printf("[nightly] phase 2: waiting for teaser queue to drain")
|
||||
if err := r.waitIdle(ctx, "phase 2"); err != nil {
|
||||
@@ -342,7 +378,7 @@ func (r *Runner) runPipeline(ctx context.Context) {
|
||||
if r.checkDeadline(ctx, "phase 3") {
|
||||
return
|
||||
}
|
||||
log.Printf("[nightly] phase 3: spider91 migration")
|
||||
log.Printf("[nightly] phase 3: crawler upload")
|
||||
if r.cfg.RunMigration != nil {
|
||||
if err := r.cfg.RunMigration(ctx); err != nil {
|
||||
log.Printf("[nightly] phase 3 migration: %v", err)
|
||||
@@ -382,9 +418,9 @@ func (r *Runner) runDedupeAssetCleanupPhase(ctx context.Context) {
|
||||
if r.cfg.RunDedupeAssetCleanup == nil {
|
||||
return
|
||||
}
|
||||
log.Printf("[nightly] phase 4: duplicate asset cleanup")
|
||||
log.Printf("[nightly] phase 4: duplicate video maintenance")
|
||||
if err := r.cfg.RunDedupeAssetCleanup(ctx); err != nil {
|
||||
log.Printf("[nightly] phase 4 duplicate asset cleanup: %v", err)
|
||||
log.Printf("[nightly] phase 4 duplicate video maintenance: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -99,11 +99,11 @@ func TestRunPipelineHonoursPhaseOrder(t *testing.T) {
|
||||
RunScan: func(_ context.Context, id string) {
|
||||
rec.push("scan:" + id)
|
||||
},
|
||||
ListSpider91Drives: func(context.Context) []string {
|
||||
rec.push("list-spider")
|
||||
ListCrawlerDrives: func(context.Context) []string {
|
||||
rec.push("list-crawler")
|
||||
return []string{"sp-1"}
|
||||
},
|
||||
RunSpider91Crawl: func(_ context.Context, id string) {
|
||||
RunCrawlerCrawl: func(_ context.Context, id string) {
|
||||
rec.push("crawl:" + id)
|
||||
},
|
||||
WaitPreviewQueuesIdle: func(context.Context) error {
|
||||
@@ -128,7 +128,7 @@ func TestRunPipelineHonoursPhaseOrder(t *testing.T) {
|
||||
"scan:drive-a",
|
||||
"scan:drive-b",
|
||||
"wait-idle", // after phase 1
|
||||
"list-spider",
|
||||
"list-crawler",
|
||||
"crawl:sp-1",
|
||||
"wait-idle", // after phase 2
|
||||
"migrate",
|
||||
@@ -144,15 +144,15 @@ func TestRunPipelineHonoursPhaseOrder(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunPipelineSkipsMigrationWhenNoSpider91(t *testing.T) {
|
||||
func TestRunPipelineSkipsMigrationWhenNoCrawler(t *testing.T) {
|
||||
rec := &recorder{}
|
||||
|
||||
r := New(Config{
|
||||
Settings: newStubSettings(),
|
||||
ListScanTargets: func(context.Context) []string { return []string{"drive-a"} },
|
||||
RunScan: func(_ context.Context, id string) { rec.push("scan:" + id) },
|
||||
ListSpider91Drives: func(context.Context) []string { return nil },
|
||||
RunSpider91Crawl: func(_ context.Context, id string) { rec.push("crawl:" + id) },
|
||||
Settings: newStubSettings(),
|
||||
ListScanTargets: func(context.Context) []string { return []string{"drive-a"} },
|
||||
RunScan: func(_ context.Context, id string) { rec.push("scan:" + id) },
|
||||
ListCrawlerDrives: func(context.Context) []string { return nil },
|
||||
RunCrawlerCrawl: func(_ context.Context, id string) { rec.push("crawl:" + id) },
|
||||
WaitPreviewQueuesIdle: func(context.Context) error {
|
||||
rec.push("wait-idle")
|
||||
return nil
|
||||
@@ -171,7 +171,7 @@ func TestRunPipelineSkipsMigrationWhenNoSpider91(t *testing.T) {
|
||||
|
||||
for _, c := range rec.snapshot() {
|
||||
if c == "migrate" || c == "crawl:sp-1" {
|
||||
t.Fatalf("phase 2/3 should be skipped when no spider91 drive, got call %q", c)
|
||||
t.Fatalf("phase 2/3 should be skipped when no crawler, got call %q", c)
|
||||
}
|
||||
}
|
||||
foundCleanup := false
|
||||
@@ -181,7 +181,7 @@ func TestRunPipelineSkipsMigrationWhenNoSpider91(t *testing.T) {
|
||||
}
|
||||
}
|
||||
if !foundCleanup {
|
||||
t.Fatalf("dedupe cleanup should still run when spider91 is absent; calls=%v", rec.snapshot())
|
||||
t.Fatalf("dedupe cleanup should still run when crawler is absent; calls=%v", rec.snapshot())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -200,8 +200,8 @@ func TestRunPipelineExitsWhenContextCancelledMidPhase(t *testing.T) {
|
||||
cancel()
|
||||
}
|
||||
},
|
||||
ListSpider91Drives: func(context.Context) []string { return []string{"x"} },
|
||||
RunSpider91Crawl: func(context.Context, string) { rec.push("crawl") },
|
||||
ListCrawlerDrives: func(context.Context) []string { return []string{"x"} },
|
||||
RunCrawlerCrawl: func(context.Context, string) { rec.push("crawl") },
|
||||
WaitPreviewQueuesIdle: func(context.Context) error { rec.push("wait-idle"); return nil },
|
||||
RunMigration: func(context.Context) error { rec.push("migrate"); return nil },
|
||||
RunDedupeAssetCleanup: func(context.Context) error { rec.push("dedupe-cleanup"); return nil },
|
||||
@@ -289,12 +289,12 @@ func TestCtxCancelPreventsLaterPhases(t *testing.T) {
|
||||
WaitPreviewQueuesIdle: func(ctx context.Context) error {
|
||||
return ctx.Err()
|
||||
},
|
||||
ListSpider91Drives: func(context.Context) []string {
|
||||
rec.push("list-spider")
|
||||
ListCrawlerDrives: func(context.Context) []string {
|
||||
rec.push("list-crawler")
|
||||
return []string{"x"}
|
||||
},
|
||||
RunSpider91Crawl: func(context.Context, string) { rec.push("crawl") },
|
||||
RunMigration: func(context.Context) error { rec.push("migrate"); return nil },
|
||||
RunCrawlerCrawl: func(context.Context, string) { rec.push("crawl") },
|
||||
RunMigration: func(context.Context) error { rec.push("migrate"); return nil },
|
||||
})
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
@@ -303,7 +303,7 @@ func TestCtxCancelPreventsLaterPhases(t *testing.T) {
|
||||
r.runPipeline(ctx)
|
||||
|
||||
for _, c := range rec.snapshot() {
|
||||
if c == "crawl" || c == "migrate" || c == "list-spider" {
|
||||
if c == "crawl" || c == "migrate" || c == "list-crawler" {
|
||||
t.Fatalf("later phase should not run after ctx done; got %q", c)
|
||||
}
|
||||
}
|
||||
@@ -395,6 +395,61 @@ func TestStatusTracksQueuedRunningAndFinished(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestStopCurrentCancelsRunningPipeline(t *testing.T) {
|
||||
scanStarted := make(chan struct{})
|
||||
scanCanceled := make(chan struct{})
|
||||
var startedOnce sync.Once
|
||||
r := New(Config{
|
||||
Settings: newStubSettings(),
|
||||
ListScanTargets: func(context.Context) []string {
|
||||
return []string{"drive"}
|
||||
},
|
||||
RunScan: func(ctx context.Context, _ string) {
|
||||
startedOnce.Do(func() { close(scanStarted) })
|
||||
<-ctx.Done()
|
||||
close(scanCanceled)
|
||||
},
|
||||
})
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
go r.Run(ctx)
|
||||
|
||||
if !r.TriggerNow() {
|
||||
t.Fatal("TriggerNow should queue a manual run")
|
||||
}
|
||||
select {
|
||||
case <-scanStarted:
|
||||
case <-time.After(time.Second):
|
||||
t.Fatal("pipeline did not start")
|
||||
}
|
||||
|
||||
if !r.StopCurrent() {
|
||||
t.Fatal("StopCurrent should report a running pipeline")
|
||||
}
|
||||
select {
|
||||
case <-scanCanceled:
|
||||
case <-time.After(time.Second):
|
||||
t.Fatal("StopCurrent did not cancel pipeline context")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStopCurrentDropsQueuedTrigger(t *testing.T) {
|
||||
r := New(Config{Settings: newStubSettings()})
|
||||
if !r.TriggerNow() {
|
||||
t.Fatal("TriggerNow should queue a manual run")
|
||||
}
|
||||
if !r.StopCurrent() {
|
||||
t.Fatal("StopCurrent should report a queued pipeline")
|
||||
}
|
||||
if got := r.Status(); got.State != "idle" || got.Running || got.Queued {
|
||||
t.Fatalf("status = %#v, want idle after dropping queued trigger", got)
|
||||
}
|
||||
if !r.TriggerNow() {
|
||||
t.Fatal("TriggerNow should accept a new request after queued stop")
|
||||
}
|
||||
}
|
||||
|
||||
func TestTriggerNowAcceptsOnlyOneConcurrentRequest(t *testing.T) {
|
||||
r := New(Config{Settings: newStubSettings()})
|
||||
|
||||
|
||||
@@ -21,15 +21,16 @@ import (
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
"github.com/video-site/backend/internal/mediaasset"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
FFmpegPath string
|
||||
FFprobePath string
|
||||
DurationSeconds int // 兼容旧配置;当前 teaser 每段固定 3 秒
|
||||
DurationSeconds int // 兼容旧配置;当前预览视频每段固定 3 秒
|
||||
Width int
|
||||
Segments int // 兼容旧配置;当前 30 秒及以上视频固定使用 4 段
|
||||
LocalDir string // 本地 teaser 和封面目录
|
||||
LocalDir string // 本地预览视频和封面目录
|
||||
}
|
||||
|
||||
type Generator struct {
|
||||
@@ -236,23 +237,43 @@ func appendUniqueStart(starts []float64, start, eachSec float64) []float64 {
|
||||
return append(starts, start)
|
||||
}
|
||||
|
||||
// thumbnailOffsets 选封面抽帧的时间点(秒)。独立于 teaser。
|
||||
func thumbnailOffsets() []float64 {
|
||||
return []float64{5, 1, 0}
|
||||
// thumbnailOffsets 选封面抽帧的时间点(秒)。独立于预览视频。
|
||||
// 默认取视频中间帧;时长未知时退回早期帧。
|
||||
func thumbnailOffsets(duration float64) []float64 {
|
||||
if duration <= 0 {
|
||||
return []float64{5, 1, 0}
|
||||
}
|
||||
mid := duration / 2
|
||||
out := []float64{mid}
|
||||
for _, fallback := range []float64{5, 1, 0} {
|
||||
if !containsOffset(out, fallback) {
|
||||
out = append(out, fallback)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func containsOffset(offsets []float64, target float64) bool {
|
||||
for _, offset := range offsets {
|
||||
if math.Abs(offset-target) < 0.01 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// --- 封面 ---
|
||||
|
||||
// GenerateThumbnail 抽一张 jpg 封面。默认从第 5 秒抽帧,失败时回退到更早时间点。
|
||||
// GenerateThumbnail 抽一张 jpg 封面。默认从视频中间抽帧,失败时回退到更早时间点。
|
||||
func (g *Generator) GenerateThumbnail(ctx context.Context, link *drives.StreamLink, videoID string, duration float64) (string, error) {
|
||||
dir := filepath.Join(g.cfg.LocalDir, "thumbs")
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
return "", err
|
||||
}
|
||||
dst := filepath.Join(dir, videoID+".jpg")
|
||||
dst := mediaasset.ThumbnailPath(g.cfg.LocalDir, videoID)
|
||||
|
||||
var lastErr error
|
||||
offsets := thumbnailOffsets()
|
||||
offsets := thumbnailOffsets(duration)
|
||||
for i, offset := range offsets {
|
||||
if i > 0 {
|
||||
_ = os.Remove(dst)
|
||||
@@ -363,9 +384,9 @@ func (g *Generator) Probe(ctx context.Context, link *drives.StreamLink) (float64
|
||||
return strconv.ParseFloat(raw, 64)
|
||||
}
|
||||
|
||||
// --- Teaser ---
|
||||
// --- 预览视频 ---
|
||||
|
||||
// Generate 拉取 teaser 到本地临时文件,返回路径。
|
||||
// Generate 拉取预览视频到本地临时文件,返回路径。
|
||||
// 根据 Config.Segments 和视频时长决定是单段还是多段拼接。
|
||||
func (g *Generator) Generate(ctx context.Context, link *drives.StreamLink, duration float64) (string, error) {
|
||||
return g.generate(ctx, duration, func(int) (*drives.StreamLink, error) {
|
||||
@@ -931,22 +952,17 @@ func redactURLs(text string) string {
|
||||
}
|
||||
|
||||
func ffmpegOutputLooksRateLimited(output []byte) bool {
|
||||
text := strings.ToLower(string(output))
|
||||
if !strings.Contains(text, "429") {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "throttl") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "rate-limit") ||
|
||||
strings.Contains(text, "server returned 429")
|
||||
return drives.TextMentionsHTTPStatus(string(output), http.StatusTooManyRequests)
|
||||
}
|
||||
|
||||
// --- 本地落盘 ---
|
||||
|
||||
// MoveToLocal 把临时文件改名到稳定位置,返回最终路径
|
||||
func (g *Generator) MoveToLocal(tmpPath, videoID string) (string, error) {
|
||||
dst := filepath.Join(g.cfg.LocalDir, videoID+".mp4")
|
||||
if err := os.MkdirAll(g.cfg.LocalDir, 0o755); err != nil {
|
||||
return "", err
|
||||
}
|
||||
dst := mediaasset.PreviewPath(g.cfg.LocalDir, videoID)
|
||||
if err := os.Rename(tmpPath, dst); err != nil {
|
||||
// 跨盘 rename 可能失败,fallback 到 copy
|
||||
if cerr := copyFile(tmpPath, dst); cerr != nil {
|
||||
@@ -1040,12 +1056,10 @@ type ThumbWorker struct {
|
||||
}
|
||||
|
||||
const (
|
||||
defaultTransientMediaCooldown = 5 * time.Minute
|
||||
defaultGenerationRateLimitCooldown = 5 * time.Minute
|
||||
defaultThumbTransientMediaMaxFailures = 3
|
||||
defaultWorkerQueueSize = 10000
|
||||
maxPreviewTeaserSizeBytes int64 = 5 * 1024 * 1024 * 1024
|
||||
previewStatusSkipped = "skipped"
|
||||
defaultTransientMediaCooldown = 5 * time.Minute
|
||||
defaultGenerationRateLimitCooldown = 5 * time.Minute
|
||||
defaultThumbTransientMediaMaxFailures = 3
|
||||
defaultWorkerQueueSize = 10000
|
||||
)
|
||||
|
||||
type rateLimitState struct {
|
||||
@@ -1100,6 +1114,19 @@ func (q *videoQueue) release(v *catalog.Video) {
|
||||
q.mu.Unlock()
|
||||
}
|
||||
|
||||
func (q *videoQueue) idsSnapshot() []string {
|
||||
q.mu.Lock()
|
||||
defer q.mu.Unlock()
|
||||
if len(q.ids) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := make([]string, 0, len(q.ids))
|
||||
for id := range q.ids {
|
||||
out = append(out, id)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (q *videoQueue) lengthExcluding(currentID string) int {
|
||||
q.mu.Lock()
|
||||
defer q.mu.Unlock()
|
||||
@@ -1227,6 +1254,13 @@ func (w *Worker) Status() TaskStatus {
|
||||
return taskStatus(&w.activity, &w.rateLimit, w.queue.lengthExcluding(currentID))
|
||||
}
|
||||
|
||||
func (w *Worker) ActiveVideoIDs() []string {
|
||||
if w == nil {
|
||||
return nil
|
||||
}
|
||||
return w.queue.idsSnapshot()
|
||||
}
|
||||
|
||||
func (w *ThumbWorker) Status() TaskStatus {
|
||||
if w == nil {
|
||||
return TaskStatus{State: "idle"}
|
||||
@@ -1338,12 +1372,19 @@ func (w *ThumbWorker) Run(ctx context.Context) {
|
||||
|
||||
func (w *Worker) processQueued(ctx context.Context, v *catalog.Video) {
|
||||
defer w.queue.release(v)
|
||||
w.activity.start(v)
|
||||
if w.Catalog == nil || v == nil || v.ID == "" {
|
||||
return
|
||||
}
|
||||
current, err := w.Catalog.GetVideo(ctx, v.ID)
|
||||
if err != nil || current.Hidden {
|
||||
return
|
||||
}
|
||||
w.activity.start(current)
|
||||
defer w.activity.done()
|
||||
if !waitForRateLimitCooldown(ctx, &w.rateLimit, "preview", w.Drive) {
|
||||
return
|
||||
}
|
||||
w.process(ctx, v)
|
||||
w.process(ctx, current)
|
||||
}
|
||||
|
||||
func (w *ThumbWorker) processQueued(ctx context.Context, v *catalog.Video) {
|
||||
@@ -1396,11 +1437,17 @@ func (w *Worker) skipIfRateLimited(v *catalog.Video) bool {
|
||||
}
|
||||
|
||||
func (w *Worker) pauseForRateLimit(err error, step, title string) bool {
|
||||
_, ok := drives.RateLimitRetryAfter(err)
|
||||
wait, ok := drives.RateLimitRetryAfter(err)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
until := w.rateLimit.pause(time.Now(), defaultGenerationRateLimitCooldown)
|
||||
if wait <= 0 {
|
||||
wait = w.RateLimitCooldown
|
||||
if wait <= 0 {
|
||||
wait = defaultGenerationRateLimitCooldown
|
||||
}
|
||||
}
|
||||
until := w.rateLimit.pause(time.Now(), wait)
|
||||
log.Printf("[preview] drive=%s rate-limited until=%s step=%s video=%s: %v", w.Drive.ID(), until.Format(time.RFC3339), step, title, err)
|
||||
return true
|
||||
}
|
||||
@@ -1429,11 +1476,17 @@ func (w *ThumbWorker) skipIfRateLimited(v *catalog.Video) bool {
|
||||
}
|
||||
|
||||
func (w *ThumbWorker) pauseForRateLimit(err error, step, title string) bool {
|
||||
_, ok := drives.RateLimitRetryAfter(err)
|
||||
wait, ok := drives.RateLimitRetryAfter(err)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
until := w.rateLimit.pause(time.Now(), defaultGenerationRateLimitCooldown)
|
||||
if wait <= 0 {
|
||||
wait = w.RateLimitCooldown
|
||||
if wait <= 0 {
|
||||
wait = defaultGenerationRateLimitCooldown
|
||||
}
|
||||
}
|
||||
until := w.rateLimit.pause(time.Now(), wait)
|
||||
log.Printf("[thumb] drive=%s rate-limited until=%s step=%s video=%s: %v", w.Drive.ID(), until.Format(time.RFC3339), step, title, err)
|
||||
return true
|
||||
}
|
||||
@@ -1475,38 +1528,17 @@ func driveErrorShouldCooldown(d drives.Drive, err error) bool {
|
||||
}
|
||||
switch d.Kind() {
|
||||
case "p115":
|
||||
text := strings.ToLower(err.Error())
|
||||
return strings.Contains(text, "server returned 403") ||
|
||||
strings.Contains(text, "403 forbidden") ||
|
||||
strings.Contains(text, "server returned 405") ||
|
||||
strings.Contains(text, "405 method") ||
|
||||
strings.Contains(text, "access denied") ||
|
||||
strings.Contains(text, "moov atom not found") ||
|
||||
strings.Contains(text, "partial file") ||
|
||||
strings.Contains(text, "request has been blocked") ||
|
||||
strings.Contains(text, "访问被阻断")
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusForbidden, http.StatusMethodNotAllowed, http.StatusTooManyRequests)
|
||||
case "pikpak":
|
||||
// PikPak 在 teaser / 封面生成阶段(取链或拉直链字节)可能命中:
|
||||
// - error_code=10 操作频繁
|
||||
// - HTTP 429 / 5xx / 509 限流和服务端不可用
|
||||
// - 通用文本:rate limit / too many requests / blocked
|
||||
// 命中时让 worker 冷却 5 分钟,避免连续请求加重风控。
|
||||
text := strings.ToLower(err.Error())
|
||||
return strings.Contains(text, "error_code=10") ||
|
||||
strings.Contains(text, "操作频繁") ||
|
||||
strings.Contains(text, "429") ||
|
||||
strings.Contains(text, "http 500") ||
|
||||
strings.Contains(text, "http 502") ||
|
||||
strings.Contains(text, "http 503") ||
|
||||
strings.Contains(text, "http 504") ||
|
||||
strings.Contains(text, "http 509") ||
|
||||
strings.Contains(text, "too many request") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "blocked") ||
|
||||
strings.Contains(text, "moov atom not found") ||
|
||||
strings.Contains(text, "partial file") ||
|
||||
strings.Contains(text, "service unavailable")
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusTooManyRequests, http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout, 509)
|
||||
case "p123":
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusForbidden, http.StatusTooManyRequests, http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout)
|
||||
case "wopan":
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusForbidden, http.StatusTooManyRequests, http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout, 509)
|
||||
case "guangyapan":
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusForbidden, http.StatusTooManyRequests, http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout, 509)
|
||||
case "googledrive":
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusForbidden, http.StatusTooManyRequests, http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout)
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -1515,18 +1547,22 @@ func (w *ThumbWorker) process(ctx context.Context, v *catalog.Video) bool {
|
||||
if w.skipIfRateLimited(v) {
|
||||
return false
|
||||
}
|
||||
if w.Catalog == nil || v == nil || v.ID == "" {
|
||||
return false
|
||||
}
|
||||
queued := v
|
||||
current := v
|
||||
if loaded, err := w.Catalog.GetVideo(ctx, v.ID); err == nil {
|
||||
if loaded.PreviewLocal == "" {
|
||||
loaded.PreviewLocal = queued.PreviewLocal
|
||||
}
|
||||
current = loaded
|
||||
v = loaded
|
||||
if loaded.ThumbnailURL != "" && loaded.DurationSeconds > 0 {
|
||||
_ = w.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{ThumbnailStatus: "ready"})
|
||||
return false
|
||||
}
|
||||
loaded, err := w.Catalog.GetVideo(ctx, v.ID)
|
||||
if err != nil || loaded.Hidden {
|
||||
return false
|
||||
}
|
||||
if loaded.PreviewLocal == "" {
|
||||
loaded.PreviewLocal = queued.PreviewLocal
|
||||
}
|
||||
current := loaded
|
||||
v = loaded
|
||||
if loaded.ThumbnailURL != "" && loaded.DurationSeconds > 0 {
|
||||
_ = w.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{ThumbnailStatus: "ready"})
|
||||
return false
|
||||
}
|
||||
if current.ThumbnailURL != "" {
|
||||
durationBackfillFailed := false
|
||||
@@ -1618,13 +1654,18 @@ func (w *ThumbWorker) probeDuration(ctx context.Context, v *catalog.Video, link
|
||||
}
|
||||
|
||||
func (w *ThumbWorker) generateThumbnailFromLink(ctx context.Context, v *catalog.Video, link *drives.StreamLink) error {
|
||||
if _, err := w.Gen.GenerateThumbnail(ctx, link, v.ID, 0); err != nil {
|
||||
local, err := w.Gen.GenerateThumbnail(ctx, link, v.ID, float64(v.DurationSeconds))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_ = w.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{
|
||||
if err := w.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{
|
||||
ThumbnailURL: "/p/thumb/" + v.ID,
|
||||
ThumbnailStatus: "ready",
|
||||
})
|
||||
}); err != nil {
|
||||
_ = os.Remove(local)
|
||||
log.Printf("[thumb] update %s after generate: %v", v.Title, err)
|
||||
return nil
|
||||
}
|
||||
log.Printf("[thumb] ready %s", v.Title)
|
||||
return nil
|
||||
}
|
||||
@@ -1642,15 +1683,6 @@ func localPreviewLink(v *catalog.Video) (*drives.StreamLink, bool) {
|
||||
}
|
||||
|
||||
func (w *Worker) process(ctx context.Context, v *catalog.Video) {
|
||||
if shouldSkipTeaser(v) {
|
||||
removePreviousLocalTeaser(v.PreviewLocal, "")
|
||||
if err := w.Catalog.UpdatePreview(ctx, v.ID, "", previewStatusSkipped); err != nil {
|
||||
log.Printf("[preview] skip %s: update status: %v", v.Title, err)
|
||||
return
|
||||
}
|
||||
log.Printf("[preview] skip %s: size=%d exceeds 5GiB teaser limit", v.Title, v.Size)
|
||||
return
|
||||
}
|
||||
if w.skipIfRateLimited(v) {
|
||||
return
|
||||
}
|
||||
@@ -1677,7 +1709,7 @@ func (w *Worker) process(ctx context.Context, v *catalog.Video) {
|
||||
}
|
||||
}
|
||||
|
||||
// 2) teaser
|
||||
// 2) 预览视频
|
||||
tmp, err := w.generateTeaser(ctx, v, link, duration)
|
||||
if err != nil {
|
||||
if w.pauseForRecoverableError(err, "generate", v.Title) {
|
||||
@@ -1695,14 +1727,14 @@ func (w *Worker) process(ctx context.Context, v *catalog.Video) {
|
||||
}
|
||||
|
||||
removePreviousLocalTeaser(v.PreviewLocal, local)
|
||||
w.Catalog.UpdatePreview(ctx, v.ID, local, "ready")
|
||||
if err := w.Catalog.UpdatePreview(ctx, v.ID, local, "ready"); err != nil {
|
||||
removePreviousLocalTeaser(local, "")
|
||||
log.Printf("[preview] update %s after generate: %v", v.Title, err)
|
||||
return
|
||||
}
|
||||
log.Printf("[preview] ready %s (duration=%.1fs)", v.Title, duration)
|
||||
}
|
||||
|
||||
func shouldSkipTeaser(v *catalog.Video) bool {
|
||||
return v != nil && v.Size > maxPreviewTeaserSizeBytes
|
||||
}
|
||||
|
||||
func (w *Worker) generateTeaser(ctx context.Context, v *catalog.Video, link *drives.StreamLink, duration float64) (string, error) {
|
||||
gen, ok := w.Gen.(refreshingTeaserGenerator)
|
||||
if !ok || w.Drive == nil || w.Drive.Kind() != "p115" {
|
||||
|
||||
@@ -168,16 +168,29 @@ func TestMediumAndLongVideosStillRequirePlannedTeaserSegments(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestThumbnailOffsetsUseFiveSecondsWithEarlyFallbacks(t *testing.T) {
|
||||
got := thumbnailOffsets()
|
||||
want := []float64{5, 1, 0}
|
||||
if len(got) != len(want) {
|
||||
t.Fatalf("offsets = %#v, want %#v", got, want)
|
||||
func TestThumbnailOffsetsPreferMiddleFrame(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
duration float64
|
||||
want []float64
|
||||
}{
|
||||
{name: "unknown duration", duration: 0, want: []float64{5, 1, 0}},
|
||||
{name: "long video", duration: 2804.9, want: []float64{1402.45, 5, 1, 0}},
|
||||
{name: "short video", duration: 8.9, want: []float64{4.45, 5, 1, 0}},
|
||||
{name: "middle equals fallback", duration: 10, want: []float64{5, 1, 0}},
|
||||
}
|
||||
for i := range want {
|
||||
if got[i] != want[i] {
|
||||
t.Fatalf("offset[%d] = %.2f, want %.2f", i, got[i], want[i])
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := thumbnailOffsets(tt.duration)
|
||||
if len(got) != len(tt.want) {
|
||||
t.Fatalf("offsets = %#v, want %#v", got, tt.want)
|
||||
}
|
||||
for i := range tt.want {
|
||||
if math.Abs(got[i]-tt.want[i]) > 0.001 {
|
||||
t.Fatalf("offset[%d] = %.2f, want %.2f", i, got[i], tt.want[i])
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -39,8 +39,8 @@ func TestThumbWorkerUpdatesThumbnailAndDurationWithoutChangingPreviewStatus(t *t
|
||||
if gen.thumbnailVideoID != video.ID {
|
||||
t.Fatalf("thumbnail video id = %q, want %q", gen.thumbnailVideoID, video.ID)
|
||||
}
|
||||
if gen.thumbnailDuration != 0 {
|
||||
t.Fatalf("thumbnail duration = %.1f, want fixed-offset thumbnail generation", gen.thumbnailDuration)
|
||||
if gen.thumbnailDuration != 42 {
|
||||
t.Fatalf("thumbnail duration = %.1f, want probed duration", gen.thumbnailDuration)
|
||||
}
|
||||
if gen.probeCalls != 1 {
|
||||
t.Fatalf("probe calls = %d, want 1 for thumbnail generation", gen.probeCalls)
|
||||
@@ -89,6 +89,35 @@ func TestThumbWorkerBackfillsDurationWhenThumbnailAlreadyExists(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestThumbWorkerGeneratesThumbnailForCrawlerLikeVideoID(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "scriptcrawler-crawler-main-source001")
|
||||
|
||||
gen := &fakeThumbGenerator{probeDuration: 42}
|
||||
drv := &previewFakeDrive{kind: "pikpak"}
|
||||
worker := NewThumbWorker(gen, cat, drv)
|
||||
|
||||
worker.process(ctx, video)
|
||||
|
||||
got, err := cat.GetVideo(ctx, video.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.ThumbnailURL != "/p/thumb/"+video.ID {
|
||||
t.Fatalf("thumbnail = %q, want generated thumb URL", got.ThumbnailURL)
|
||||
}
|
||||
ready, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "ready", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list ready thumbnails: %v", err)
|
||||
}
|
||||
if len(ready) != 1 || ready[0].ID != video.ID {
|
||||
t.Fatalf("ready thumbnails = %#v, want only %s", ready, video.ID)
|
||||
}
|
||||
if gen.probeCalls != 1 || gen.generateCalls != 1 {
|
||||
t.Fatalf("generator calls probe=%d generate=%d, want one thumbnail generation", gen.probeCalls, gen.generateCalls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestThumbWorkerSkipsDurationBackfillWhenExistingThumbnailCannotBeProbed(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "thumb-worker-existing-thumbnail-probe-fails")
|
||||
@@ -320,42 +349,10 @@ func TestPreviewWorkerNeverCallsDriveUploadOrEnsureDir(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestPreviewWorkerSkipsTeaserForVideoLargerThanFiveGiB(t *testing.T) {
|
||||
func TestPreviewWorkerGeneratesTeaserForLargeVideo(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "preview-large-video")
|
||||
video.Size = maxPreviewTeaserSizeBytes + 1
|
||||
if err := cat.UpsertVideo(ctx, video); err != nil {
|
||||
t.Fatalf("update video: %v", err)
|
||||
}
|
||||
|
||||
gen := &fakeTeaserGenerator{}
|
||||
drv := &previewFakeDrive{}
|
||||
worker := NewWorker(gen, cat, drv)
|
||||
|
||||
worker.process(ctx, video)
|
||||
|
||||
got, err := cat.GetVideo(ctx, video.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.PreviewStatus != previewStatusSkipped {
|
||||
t.Fatalf("preview status = %q, want skipped", got.PreviewStatus)
|
||||
}
|
||||
if got.PreviewLocal != "" {
|
||||
t.Fatalf("preview local = %q, want empty", got.PreviewLocal)
|
||||
}
|
||||
if drv.streamCalls != 0 {
|
||||
t.Fatalf("stream calls = %d, want 0", drv.streamCalls)
|
||||
}
|
||||
if gen.generateCalls != 0 {
|
||||
t.Fatalf("generate calls = %d, want 0", gen.generateCalls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPreviewWorkerGeneratesTeaserAtFiveGiBBoundary(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "preview-five-gib-video")
|
||||
video.Size = maxPreviewTeaserSizeBytes
|
||||
video.Size = 6 * 1024 * 1024 * 1024
|
||||
if err := cat.UpsertVideo(ctx, video); err != nil {
|
||||
t.Fatalf("update video: %v", err)
|
||||
}
|
||||
@@ -413,7 +410,7 @@ func TestPreviewWorkerRateLimitLeavesCurrentPendingAndSkipsNextVideo(t *testing.
|
||||
if gen.generateCalls != 1 {
|
||||
t.Fatalf("generate calls = %d, want 1", gen.generateCalls)
|
||||
}
|
||||
assertCooldownAround(t, worker.Status().CooldownUntil, before, 5*time.Minute)
|
||||
assertCooldownAround(t, worker.Status().CooldownUntil, before, 2*time.Hour)
|
||||
|
||||
gen.generateErr = nil
|
||||
worker.process(ctx, &second)
|
||||
@@ -429,7 +426,7 @@ func TestPreviewWorkerRateLimitLeavesCurrentPendingAndSkipsNextVideo(t *testing.
|
||||
}
|
||||
}
|
||||
|
||||
func TestThumbWorkerRateLimitCoolsDownFiveMinutes(t *testing.T) {
|
||||
func TestThumbWorkerRateLimitHonorsRetryAfter(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "thumb-rate-limit")
|
||||
|
||||
@@ -453,12 +450,12 @@ func TestThumbWorkerRateLimitCoolsDownFiveMinutes(t *testing.T) {
|
||||
if got.ThumbnailURL != "" {
|
||||
t.Fatalf("thumbnail = %q, want unchanged after rate limit", got.ThumbnailURL)
|
||||
}
|
||||
assertCooldownAround(t, worker.Status().CooldownUntil, before, 5*time.Minute)
|
||||
assertCooldownAround(t, worker.Status().CooldownUntil, before, 2*time.Hour)
|
||||
}
|
||||
|
||||
func TestThumbWorkerP115TransientErrorFailsAfterRetryLimit(t *testing.T) {
|
||||
func TestThumbWorkerP115MessageOnlyErrorFailsWithoutCooldown(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "thumb-p115-transient")
|
||||
cat, video := seedPreviewTestVideo(t, "thumb-p115-message-only")
|
||||
|
||||
gen := &fakeThumbGenerator{
|
||||
generateErr: errors.New("ffmpeg thumb: exit status 183, stderr: partial file Cannot determine format of input 0:0 after EOF"),
|
||||
@@ -466,69 +463,26 @@ func TestThumbWorkerP115TransientErrorFailsAfterRetryLimit(t *testing.T) {
|
||||
drv := &previewFakeDrive{kind: "p115"}
|
||||
worker := NewThumbWorker(gen, cat, drv)
|
||||
|
||||
for attempt := 1; attempt <= defaultThumbTransientMediaMaxFailures; attempt++ {
|
||||
worker.rateLimit = rateLimitState{}
|
||||
worker.process(ctx, video)
|
||||
|
||||
if attempt < defaultThumbTransientMediaMaxFailures {
|
||||
pending, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "pending", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list pending thumbnails: %v", err)
|
||||
}
|
||||
if len(pending) != 1 || pending[0].ID != video.ID {
|
||||
t.Fatalf("attempt %d pending thumbnails = %#v, want only %s", attempt, pending, video.ID)
|
||||
}
|
||||
missing, err := cat.CountVideosNeedingThumbnail(ctx, video.DriveID)
|
||||
if err != nil {
|
||||
t.Fatalf("count missing thumbnails: %v", err)
|
||||
}
|
||||
if missing != 1 {
|
||||
t.Fatalf("attempt %d missing thumbnails = %d, want 1 before retry limit", attempt, missing)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
failed, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "failed", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list failed thumbnails: %v", err)
|
||||
}
|
||||
if len(failed) != 1 || failed[0].ID != video.ID {
|
||||
t.Fatalf("failed thumbnails = %#v, want only %s", failed, video.ID)
|
||||
}
|
||||
missing, err := cat.CountVideosNeedingThumbnail(ctx, video.DriveID)
|
||||
if err != nil {
|
||||
t.Fatalf("count missing thumbnails: %v", err)
|
||||
}
|
||||
if missing != 0 {
|
||||
t.Fatalf("missing thumbnails = %d, want 0 after retry limit marks failed", missing)
|
||||
}
|
||||
}
|
||||
|
||||
if gen.generateCalls != defaultThumbTransientMediaMaxFailures {
|
||||
t.Fatalf("generate calls = %d, want %d", gen.generateCalls, defaultThumbTransientMediaMaxFailures)
|
||||
}
|
||||
|
||||
if err := cat.UpdateVideoMeta(ctx, video.ID, catalog.VideoMetaPatch{
|
||||
ThumbnailStatus: "pending",
|
||||
ResetThumbnailFailures: true,
|
||||
}); err != nil {
|
||||
t.Fatalf("reset thumbnail status: %v", err)
|
||||
}
|
||||
worker.rateLimit = rateLimitState{}
|
||||
worker.process(ctx, video)
|
||||
|
||||
pending, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "pending", 0)
|
||||
failed, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "failed", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list pending thumbnails after reset: %v", err)
|
||||
t.Fatalf("list failed thumbnails: %v", err)
|
||||
}
|
||||
if len(pending) != 1 || pending[0].ID != video.ID {
|
||||
t.Fatalf("pending thumbnails after reset = %#v, want only %s", pending, video.ID)
|
||||
if len(failed) != 1 || failed[0].ID != video.ID {
|
||||
t.Fatalf("failed thumbnails = %#v, want only %s", failed, video.ID)
|
||||
}
|
||||
if !worker.Status().CooldownUntil.IsZero() {
|
||||
t.Fatalf("cooldown until = %s, want no cooldown for message-only media error", worker.Status().CooldownUntil)
|
||||
}
|
||||
if gen.generateCalls != 1 {
|
||||
t.Fatalf("generate calls = %d, want 1", gen.generateCalls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestThumbWorkerRequeuesP115TransientErrorBeforeRetryLimit(t *testing.T) {
|
||||
func TestThumbWorkerDoesNotRequeueP115MessageOnlyError(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "thumb-p115-requeue")
|
||||
cat, video := seedPreviewTestVideo(t, "thumb-p115-no-requeue")
|
||||
|
||||
gen := &fakeThumbGenerator{
|
||||
generateErr: errors.New("ffmpeg thumb: partial file Cannot determine format of input 0:0 after EOF"),
|
||||
@@ -540,11 +494,8 @@ func TestThumbWorkerRequeuesP115TransientErrorBeforeRetryLimit(t *testing.T) {
|
||||
|
||||
select {
|
||||
case queued := <-worker.ch:
|
||||
if queued.ID != video.ID {
|
||||
t.Fatalf("requeued video id = %q, want %q", queued.ID, video.ID)
|
||||
}
|
||||
t.Fatalf("unexpected requeued video id = %q", queued.ID)
|
||||
default:
|
||||
t.Fatal("expected transient thumbnail failure to requeue the same video")
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(ctx, video.ID)
|
||||
@@ -552,14 +503,43 @@ func TestThumbWorkerRequeuesP115TransientErrorBeforeRetryLimit(t *testing.T) {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.ThumbnailURL != "" {
|
||||
t.Fatalf("thumbnail = %q, want empty after transient failure", got.ThumbnailURL)
|
||||
t.Fatalf("thumbnail = %q, want empty after message-only failure", got.ThumbnailURL)
|
||||
}
|
||||
pending, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "pending", 0)
|
||||
failed, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "failed", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list pending thumbnails: %v", err)
|
||||
t.Fatalf("list failed thumbnails: %v", err)
|
||||
}
|
||||
if len(pending) != 1 || pending[0].ID != video.ID {
|
||||
t.Fatalf("pending thumbnails = %#v, want only %s", pending, video.ID)
|
||||
if len(failed) != 1 || failed[0].ID != video.ID {
|
||||
t.Fatalf("failed thumbnails = %#v, want only %s", failed, video.ID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestThumbWorkerPikPakMoovAtomErrorFailsWithoutCooldown(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "thumb-pikpak-missing-moov")
|
||||
|
||||
mediaErr := errors.New("ffprobe: exit status 1, stderr: moov atom not found Invalid data found when processing input")
|
||||
gen := &fakeThumbGenerator{
|
||||
probeErr: mediaErr,
|
||||
generateErr: mediaErr,
|
||||
}
|
||||
drv := &previewFakeDrive{kind: "pikpak"}
|
||||
worker := NewThumbWorker(gen, cat, drv)
|
||||
|
||||
worker.process(ctx, video)
|
||||
|
||||
failed, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "failed", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list failed thumbnails: %v", err)
|
||||
}
|
||||
if len(failed) != 1 || failed[0].ID != video.ID {
|
||||
t.Fatalf("failed thumbnails = %#v, want only %s", failed, video.ID)
|
||||
}
|
||||
if !worker.Status().CooldownUntil.IsZero() {
|
||||
t.Fatalf("cooldown until = %s, want no cooldown for invalid PikPak MP4", worker.Status().CooldownUntil)
|
||||
}
|
||||
if gen.generateCalls != 1 {
|
||||
t.Fatalf("generate calls = %d, want 1", gen.generateCalls)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -587,6 +567,86 @@ func TestPreviewWorkerP115TransientErrorKeepsVideoPending(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestP123TransientErrorsShouldCooldown(t *testing.T) {
|
||||
drv := &previewFakeDrive{kind: "p123"}
|
||||
for _, err := range []error{
|
||||
errors.New("Server returned 403 Forbidden"),
|
||||
errors.New("http 503 service unavailable"),
|
||||
} {
|
||||
if !driveErrorShouldCooldown(drv, err) {
|
||||
t.Fatalf("driveErrorShouldCooldown(%v) = false, want true", err)
|
||||
}
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("请求太频繁")) {
|
||||
t.Fatal("message-only throttling text should not trigger p123 cooldown")
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("invalid credential")) {
|
||||
t.Fatal("invalid credential should not trigger p123 cooldown")
|
||||
}
|
||||
}
|
||||
|
||||
func TestWopanTransientErrorsShouldCooldown(t *testing.T) {
|
||||
drv := &previewFakeDrive{kind: "wopan"}
|
||||
for _, err := range []error{
|
||||
errors.New("ffmpeg: Server returned 403 Forbidden"),
|
||||
errors.New("wopan download url: request failed with status: 429 Too Many Requests"),
|
||||
errors.New("http 503 service unavailable"),
|
||||
} {
|
||||
if !driveErrorShouldCooldown(drv, err) {
|
||||
t.Fatalf("driveErrorShouldCooldown(%v) = false, want true", err)
|
||||
}
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("操作频繁,请稍后重试")) {
|
||||
t.Fatal("message-only throttling text should not trigger wopan cooldown")
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("invalid access token")) {
|
||||
t.Fatal("invalid access token should not trigger wopan cooldown")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGuangYaPanTransientErrorsShouldCooldown(t *testing.T) {
|
||||
drv := &previewFakeDrive{kind: "guangyapan"}
|
||||
for _, err := range []error{
|
||||
errors.New("ffmpeg: Server returned 403 Forbidden"),
|
||||
errors.New("guangyapan api rate limited: status=429 msg=操作频繁,请稍后重试"),
|
||||
errors.New("http 503 service unavailable"),
|
||||
} {
|
||||
if !driveErrorShouldCooldown(drv, err) {
|
||||
t.Fatalf("driveErrorShouldCooldown(%v) = false, want true", err)
|
||||
}
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("操作频繁,请稍后重试")) {
|
||||
t.Fatal("message-only throttling text should not trigger guangyapan cooldown")
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("invalid access token")) {
|
||||
t.Fatal("invalid access token should not trigger guangyapan cooldown")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGoogleDriveMediaErrorsShouldCooldown(t *testing.T) {
|
||||
drv := &previewFakeDrive{kind: "googledrive"}
|
||||
for _, err := range []error{
|
||||
errors.New("ffmpeg: Server returned 403 Forbidden"),
|
||||
errors.New("http 503 service unavailable"),
|
||||
} {
|
||||
if !driveErrorShouldCooldown(drv, err) {
|
||||
t.Fatalf("driveErrorShouldCooldown(%v) = false, want true", err)
|
||||
}
|
||||
}
|
||||
for _, err := range []error{
|
||||
errors.New("google drive api error: usageLimits userRateLimitExceeded"),
|
||||
errors.New("downloadQuotaExceeded: The download quota for this file has been exceeded"),
|
||||
errors.New("sharingRateLimitExceeded"),
|
||||
} {
|
||||
if driveErrorShouldCooldown(drv, err) {
|
||||
t.Fatalf("message-only google drive error %v should not trigger cooldown", err)
|
||||
}
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("invalid credentials")) {
|
||||
t.Fatal("invalid credentials should not trigger googledrive cooldown")
|
||||
}
|
||||
}
|
||||
|
||||
func assertCooldownAround(t *testing.T, until time.Time, before time.Time, want time.Duration) {
|
||||
t.Helper()
|
||||
if until.IsZero() {
|
||||
|
||||
@@ -147,13 +147,19 @@ func (p *Proxy) ServeStream(w http.ResponseWriter, r *http.Request, driveID, fil
|
||||
// CDN 不校验请求头,直连可获得最佳带宽并避免占用 backend 出站
|
||||
// - onedrive:Microsoft Graph 返回的 @microsoft.graph.downloadUrl 是短期
|
||||
// 免鉴权下载 URL,不需要后端继续代传视频字节
|
||||
// - p123:123网盘 download_info 返回的下载页会再跳 CDN;driver 已在后端
|
||||
// 先解出最终 Location,浏览器可直接 302 到该短期地址
|
||||
// - wopan:联通网盘 GetDownloadUrlV2 返回的是短期直链,OpenList 也是直接
|
||||
// 将该 URL 交给客户端使用;不需要后端持续代传视频字节
|
||||
// - guangyapan:光鸭 get_res_download_url 返回 signedURL / downloadUrl,
|
||||
// 浏览器可直接访问,不需要后端持续代传视频字节
|
||||
//
|
||||
// 其余网盘(如沃盘 / 夸克等)仍走反代,因为它们的下载
|
||||
// 其余网盘(如夸克等)仍走反代,因为它们的下载
|
||||
// 链接通常需要随请求带上后端持有的 Cookie / Authorization / Range
|
||||
// 的特殊处理,浏览器拿不到这些上下文。
|
||||
func shouldRedirect(d drives.Drive) bool {
|
||||
switch d.Kind() {
|
||||
case "p115", "pikpak", "onedrive":
|
||||
case "p115", "pikpak", "onedrive", "p123", "wopan", "guangyapan":
|
||||
return true
|
||||
}
|
||||
return false
|
||||
@@ -214,7 +220,7 @@ func (p *Proxy) serve(w http.ResponseWriter, r *http.Request, link *drives.Strea
|
||||
_, _ = io.Copy(w, resp.Body)
|
||||
}
|
||||
|
||||
// ServeLocal 服务本地 teaser 文件
|
||||
// ServeLocal 服务本地预览视频文件
|
||||
func (p *Proxy) ServeLocal(w http.ResponseWriter, r *http.Request, path string) {
|
||||
http.ServeFile(w, r, path)
|
||||
}
|
||||
|
||||
@@ -176,6 +176,81 @@ func TestServeStreamRedirectsOneDrive(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeStreamRedirectsP123(t *testing.T) {
|
||||
reg := NewRegistry()
|
||||
drv := &proxyFakeSimpleDrive{
|
||||
kind: "p123",
|
||||
url: "https://cdn.123pan.example/video.mp4",
|
||||
}
|
||||
reg.Set("p123", drv)
|
||||
|
||||
p := New(reg)
|
||||
req := httptest.NewRequest(http.MethodGet, "/p/stream/p123/file-1", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
|
||||
p.ServeStream(rr, req, "p123", "file-1")
|
||||
|
||||
if rr.Code != http.StatusFound {
|
||||
t.Fatalf("status = %d, want %d", rr.Code, http.StatusFound)
|
||||
}
|
||||
if got := rr.Header().Get("Location"); got != "https://cdn.123pan.example/video.mp4" {
|
||||
t.Fatalf("Location = %q", got)
|
||||
}
|
||||
if drv.calls != 1 {
|
||||
t.Fatalf("link calls = %d, want 1", drv.calls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeStreamRedirectsWopan(t *testing.T) {
|
||||
reg := NewRegistry()
|
||||
drv := &proxyFakeSimpleDrive{
|
||||
kind: "wopan",
|
||||
url: "https://du.smartont.net:8445/openapi/download?fid=encoded",
|
||||
}
|
||||
reg.Set("wopan", drv)
|
||||
|
||||
p := New(reg)
|
||||
req := httptest.NewRequest(http.MethodGet, "/p/stream/wopan/file-1", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
|
||||
p.ServeStream(rr, req, "wopan", "file-1")
|
||||
|
||||
if rr.Code != http.StatusFound {
|
||||
t.Fatalf("status = %d, want %d", rr.Code, http.StatusFound)
|
||||
}
|
||||
if got := rr.Header().Get("Location"); got != "https://du.smartont.net:8445/openapi/download?fid=encoded" {
|
||||
t.Fatalf("Location = %q", got)
|
||||
}
|
||||
if drv.calls != 1 {
|
||||
t.Fatalf("link calls = %d, want 1", drv.calls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeStreamRedirectsGuangYaPan(t *testing.T) {
|
||||
reg := NewRegistry()
|
||||
drv := &proxyFakeSimpleDrive{
|
||||
kind: "guangyapan",
|
||||
url: "https://cdn.guangyapan.example/video.mp4?sign=encoded",
|
||||
}
|
||||
reg.Set("guangyapan", drv)
|
||||
|
||||
p := New(reg)
|
||||
req := httptest.NewRequest(http.MethodGet, "/p/stream/guangyapan/file-1", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
|
||||
p.ServeStream(rr, req, "guangyapan", "file-1")
|
||||
|
||||
if rr.Code != http.StatusFound {
|
||||
t.Fatalf("status = %d, want %d", rr.Code, http.StatusFound)
|
||||
}
|
||||
if got := rr.Header().Get("Location"); got != "https://cdn.guangyapan.example/video.mp4?sign=encoded" {
|
||||
t.Fatalf("Location = %q", got)
|
||||
}
|
||||
if drv.calls != 1 {
|
||||
t.Fatalf("link calls = %d, want 1", drv.calls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeStreamServesLocalFilePath(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "video.mp4")
|
||||
if err := os.WriteFile(path, []byte("0123456789"), 0o644); err != nil {
|
||||
|
||||
@@ -2,6 +2,7 @@ package scanner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"log"
|
||||
"path"
|
||||
@@ -23,8 +24,10 @@ type Scanner struct {
|
||||
//
|
||||
// nil / 空集合 → 行为等同于不跳过任何目录。
|
||||
SkipDirIDs map[string]struct{}
|
||||
// 回调:新视频被加入后触发 teaser 生成
|
||||
// 回调:新视频被加入后触发预览视频生成
|
||||
OnNewVideo func(v *catalog.Video)
|
||||
// OnProgress 在扫描进度变化时触发。回调只应读取 Stats 里的计数,不应修改 map 字段。
|
||||
OnProgress func(stats Stats)
|
||||
// ProgressInterval 控制扫描内部 heartbeat 的最小输出间隔。
|
||||
// 0 → 默认 30s;< 0 → 关闭 heartbeat(仅留外层 start / done 两行)。
|
||||
// heartbeat 单行格式:
|
||||
@@ -91,6 +94,9 @@ func (s *Scanner) Run(ctx context.Context, startDirID string) (Stats, error) {
|
||||
driveID = s.Drive.ID()
|
||||
}
|
||||
progress := func(currentDir string) {
|
||||
if s.OnProgress != nil {
|
||||
s.OnProgress(stats)
|
||||
}
|
||||
if interval < 0 {
|
||||
return
|
||||
}
|
||||
@@ -127,8 +133,11 @@ func (s *Scanner) walk(ctx context.Context, dirID, dirName string, stats *Stats,
|
||||
}
|
||||
|
||||
for _, e := range entries {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if e.IsDir {
|
||||
// 跳过 previews 目录,避免扫到自己生成的 teaser
|
||||
// 跳过 previews 目录,避免扫到自己生成的预览视频
|
||||
if strings.EqualFold(e.Name, "previews") {
|
||||
continue
|
||||
}
|
||||
@@ -137,13 +146,15 @@ func (s *Scanner) walk(ctx context.Context, dirID, dirName string, stats *Stats,
|
||||
continue
|
||||
}
|
||||
if err := s.walk(ctx, e.ID, e.Name, stats, progress); err != nil {
|
||||
if ctxErr := ctx.Err(); ctxErr != nil {
|
||||
return ctxErr
|
||||
}
|
||||
stats.Errors++
|
||||
log.Printf("[scanner] walk %s error: %v", e.Name, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
stats.Scanned++
|
||||
ext := strings.ToLower(path.Ext(e.Name))
|
||||
if !s.Exts[ext] {
|
||||
continue
|
||||
@@ -151,9 +162,22 @@ func (s *Scanner) walk(ctx context.Context, dirID, dirName string, stats *Stats,
|
||||
if e.Size <= 0 {
|
||||
continue
|
||||
}
|
||||
stats.Scanned++
|
||||
progress(dirName)
|
||||
stats.SeenFileIDs[e.ID] = struct{}{}
|
||||
|
||||
id := s.Drive.Kind() + "-" + s.Drive.ID() + "-" + e.ID
|
||||
id := s.Drive.Kind() + "-" + s.Drive.ID() + "-" + videoIDFilePart(e.ID)
|
||||
if deleted, err := s.Catalog.IsDeletedVideoCandidate(ctx, id, s.Drive.ID(), e.ID, e.Hash, e.Name, e.Size); err != nil {
|
||||
if ctxErr := ctx.Err(); ctxErr != nil {
|
||||
return ctxErr
|
||||
}
|
||||
stats.Errors++
|
||||
log.Printf("[scanner] check deleted video %s error: %v", id, err)
|
||||
continue
|
||||
} else if deleted {
|
||||
continue
|
||||
}
|
||||
|
||||
parsed := Parse(e.Name)
|
||||
if parsed.Title == "" {
|
||||
parsed.Title = strings.TrimSuffix(e.Name, ext)
|
||||
@@ -162,45 +186,55 @@ func (s *Scanner) walk(ctx context.Context, dirID, dirName string, stats *Stats,
|
||||
if matched, err := s.Catalog.MatchTags(ctx, e.Name+" "+dirName+" "+parsed.Author); err == nil {
|
||||
tags = mergeTags(tags, matched)
|
||||
}
|
||||
if label, ok, err := s.Catalog.EnsureCollectionTag(ctx, dirName); err == nil && ok {
|
||||
tags = mergeTags(tags, []string{label})
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
existing, _ := s.Catalog.GetVideo(ctx, id)
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if existing != nil {
|
||||
patch := catalog.VideoMetaPatch{}
|
||||
if e.Hash != "" && existing.ContentHash == "" {
|
||||
patch.ContentHash = e.Hash
|
||||
existing.ContentHash = e.Hash
|
||||
}
|
||||
if e.Name != "" && existing.FileName == "" {
|
||||
if e.Name != "" && existing.FileName != e.Name {
|
||||
patch.FileName = e.Name
|
||||
existing.FileName = e.Name
|
||||
patch.Title = parsed.Title
|
||||
patch.TitleSet = true
|
||||
patch.Author = parsed.Author
|
||||
patch.AuthorSet = true
|
||||
}
|
||||
// 已存在但轻量元数据空缺时,顺便补齐。
|
||||
if existing.Category == "" && dirName != "" {
|
||||
patch.Category = dirName
|
||||
}
|
||||
if existing.ThumbnailURL == "" && e.ThumbnailURL != "" {
|
||||
patch.ThumbnailURL = e.ThumbnailURL
|
||||
}
|
||||
if patch.Category != "" || patch.ThumbnailURL != "" || patch.ContentHash != "" || patch.FileName != "" {
|
||||
if patch.ContentHash != "" || patch.FileName != "" || patch.TitleSet || patch.AuthorSet {
|
||||
_ = s.Catalog.UpdateVideoMeta(ctx, id, patch)
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if dup := s.findDuplicate(ctx, e.Hash, e.Name, e.Size, id); dup != nil {
|
||||
s.backfillDuplicateThumbnail(ctx, dup, e.ThumbnailURL)
|
||||
continue
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if !sameTags(existing.Tags, tags) {
|
||||
_ = s.Catalog.SetAutoVideoTags(ctx, id, tags)
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if dup := s.findDuplicate(ctx, e.Hash, e.Name, e.Size, id); dup != nil {
|
||||
s.backfillDuplicateThumbnail(ctx, dup, e.ThumbnailURL)
|
||||
continue
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
v := &catalog.Video{
|
||||
@@ -216,18 +250,23 @@ func (s *Scanner) walk(ctx context.Context, dirID, dirName string, stats *Stats,
|
||||
Ext: strings.TrimPrefix(ext, "."),
|
||||
Quality: "HD",
|
||||
Size: e.Size,
|
||||
ThumbnailURL: e.ThumbnailURL,
|
||||
PreviewStatus: "pending",
|
||||
Category: dirName,
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}
|
||||
if err := s.Catalog.UpsertVideo(ctx, v); err != nil {
|
||||
if ctxErr := ctx.Err(); ctxErr != nil {
|
||||
return ctxErr
|
||||
}
|
||||
log.Printf("[scanner] upsert %s error: %v", v.Title, err)
|
||||
continue
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.Added++
|
||||
progress(dirName)
|
||||
if s.OnNewVideo != nil {
|
||||
s.OnNewVideo(v)
|
||||
}
|
||||
@@ -268,13 +307,6 @@ func (s *Scanner) findDuplicateByFileSignature(ctx context.Context, fileName str
|
||||
return dup
|
||||
}
|
||||
|
||||
func (s *Scanner) backfillDuplicateThumbnail(ctx context.Context, canonical *catalog.Video, thumbnailURL string) {
|
||||
if canonical.ThumbnailURL != "" || thumbnailURL == "" {
|
||||
return
|
||||
}
|
||||
_ = s.Catalog.UpdateVideoMeta(ctx, canonical.ID, catalog.VideoMetaPatch{ThumbnailURL: thumbnailURL})
|
||||
}
|
||||
|
||||
func sameTags(a, b []string) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
@@ -301,3 +333,10 @@ func mergeTags(lists ...[]string) []string {
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func videoIDFilePart(fileID string) string {
|
||||
if !strings.ContainsAny(fileID, `/\`+"\x00") {
|
||||
return fileID
|
||||
}
|
||||
return "b64_" + base64.RawURLEncoding.EncodeToString([]byte(fileID))
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package scanner
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
@@ -14,7 +15,7 @@ import (
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
func TestRunPersistsRemoteThumbnailFromDriveEntry(t *testing.T) {
|
||||
func TestRunIgnoresRemoteThumbnailFromDriveEntry(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -50,8 +51,8 @@ func TestRunPersistsRemoteThumbnailFromDriveEntry(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.ThumbnailURL != "https://thumbnail.example/clip.jpg" {
|
||||
t.Fatalf("thumbnail = %q, want remote thumbnail", got.ThumbnailURL)
|
||||
if got.ThumbnailURL != "" {
|
||||
t.Fatalf("thumbnail = %q, want empty so local thumbnail worker regenerates it", got.ThumbnailURL)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -90,7 +91,184 @@ func TestRunIgnoresZeroSizeVideoFiles(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunBackfillsRemoteThumbnailForExistingVideo(t *testing.T) {
|
||||
func TestRunScannedCountsOnlyVideoCandidates(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
drv := &scannerFakeDrive{
|
||||
entries: []drives.Entry{
|
||||
{ID: "file-1", Name: "clip.mp4", Size: 123},
|
||||
{ID: "file-2", Name: "notes.txt", Size: 123},
|
||||
{ID: "file-3", Name: "empty.mp4", Size: 0},
|
||||
},
|
||||
}
|
||||
sc := New(cat, drv, []string{".mp4"}, nil, nil)
|
||||
|
||||
stats, err := sc.Run(ctx, "")
|
||||
if err != nil {
|
||||
t.Fatalf("scan: %v", err)
|
||||
}
|
||||
if stats.Scanned != 1 {
|
||||
t.Fatalf("scanned = %d, want one non-empty video candidate", stats.Scanned)
|
||||
}
|
||||
if stats.Added != 1 {
|
||||
t.Fatalf("added = %d, want one added video", stats.Added)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunUsesPathSafeVideoIDForUnsafeFileID(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
drv := &scannerFakeDrive{
|
||||
entries: []drives.Entry{{
|
||||
ID: "fid/with space",
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
}},
|
||||
}
|
||||
sc := New(cat, drv, []string{".mp4"}, nil, nil)
|
||||
|
||||
stats, err := sc.Run(ctx, "")
|
||||
if err != nil {
|
||||
t.Fatalf("scan: %v", err)
|
||||
}
|
||||
if stats.Added != 1 {
|
||||
t.Fatalf("added = %d, want 1", stats.Added)
|
||||
}
|
||||
if _, ok := stats.SeenFileIDs["fid/with space"]; !ok {
|
||||
t.Fatalf("seen file ids = %#v, want original file id", stats.SeenFileIDs)
|
||||
}
|
||||
|
||||
wantID := "fake-drive-b64_ZmlkL3dpdGggc3BhY2U"
|
||||
got, err := cat.GetVideo(ctx, wantID)
|
||||
if err != nil {
|
||||
t.Fatalf("get video %s: %v", wantID, err)
|
||||
}
|
||||
if strings.Contains(got.ID, "/") {
|
||||
t.Fatalf("video id = %q, must not contain slash", got.ID)
|
||||
}
|
||||
if got.FileID != "fid/with space" {
|
||||
t.Fatalf("file id = %q, want original", got.FileID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunStopsWhenContextCanceledDuringFileLoop(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
drv := &scannerFakeDrive{
|
||||
entries: []drives.Entry{
|
||||
{ID: "file-1", Name: "one.mp4", Size: 123},
|
||||
{ID: "file-2", Name: "two.mp4", Size: 123},
|
||||
{ID: "file-3", Name: "three.mp4", Size: 123},
|
||||
},
|
||||
}
|
||||
callbacks := 0
|
||||
sc := New(cat, drv, []string{".mp4"}, nil, func(*catalog.Video) {
|
||||
callbacks++
|
||||
cancel()
|
||||
})
|
||||
|
||||
stats, err := sc.Run(ctx, "")
|
||||
|
||||
if !errors.Is(err, context.Canceled) {
|
||||
t.Fatalf("scan error = %v, want context.Canceled", err)
|
||||
}
|
||||
if stats.Added != 1 || callbacks != 1 {
|
||||
t.Fatalf("added=%d callbacks=%d, want exactly one video before cancellation", stats.Added, callbacks)
|
||||
}
|
||||
if _, err := cat.GetVideo(context.Background(), "fake-drive-file-1"); err != nil {
|
||||
t.Fatalf("first video should be persisted before cancellation: %v", err)
|
||||
}
|
||||
if _, err := cat.GetVideo(context.Background(), "fake-drive-file-2"); err != sql.ErrNoRows {
|
||||
t.Fatalf("second video lookup error = %v, want sql.ErrNoRows", err)
|
||||
}
|
||||
if _, err := cat.GetVideo(context.Background(), "fake-drive-file-3"); err != sql.ErrNoRows {
|
||||
t.Fatalf("third video lookup error = %v, want sql.ErrNoRows", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunSkipsAdminDeletedVideo(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: "fake-drive-file-1",
|
||||
DriveID: "drive",
|
||||
FileID: "file-1",
|
||||
FileName: "clip.mp4",
|
||||
ContentHash: "HASH-1",
|
||||
Title: "Deleted Clip",
|
||||
Size: 123,
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed video: %v", err)
|
||||
}
|
||||
if err := cat.DeleteVideoWithTombstone(ctx, "fake-drive-file-1"); err != nil {
|
||||
t.Fatalf("delete with tombstone: %v", err)
|
||||
}
|
||||
|
||||
drv := &scannerFakeDrive{
|
||||
entries: []drives.Entry{{
|
||||
ID: "file-1",
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
Hash: "hash-1",
|
||||
MimeType: "video/mp4",
|
||||
ModTime: now,
|
||||
}},
|
||||
}
|
||||
sc := New(cat, drv, []string{".mp4"}, nil, nil)
|
||||
|
||||
stats, err := sc.Run(ctx, "")
|
||||
if err != nil {
|
||||
t.Fatalf("scan: %v", err)
|
||||
}
|
||||
if stats.Added != 0 {
|
||||
t.Fatalf("added = %d, want 0", stats.Added)
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, "fake-drive-file-1"); err != sql.ErrNoRows {
|
||||
t.Fatalf("deleted video was recreated, get error = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunDoesNotBackfillRemoteThumbnailForExistingVideo(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -140,8 +318,69 @@ func TestRunBackfillsRemoteThumbnailForExistingVideo(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.ThumbnailURL != "https://thumbnail.example/backfilled.jpg" {
|
||||
t.Fatalf("thumbnail = %q, want backfilled remote thumbnail", got.ThumbnailURL)
|
||||
if got.ThumbnailURL != "" {
|
||||
t.Fatalf("thumbnail = %q, want empty so local thumbnail worker regenerates it", got.ThumbnailURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunSyncsRenamedExistingVideoMetadata(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: "fake-drive-file-1",
|
||||
DriveID: "drive",
|
||||
FileID: "file-1",
|
||||
FileName: "old-name - Old Author.mp4",
|
||||
Title: "old-name",
|
||||
Author: "Old Author",
|
||||
PreviewStatus: "pending",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed video: %v", err)
|
||||
}
|
||||
|
||||
drv := &scannerFakeDrive{
|
||||
entries: []drives.Entry{{
|
||||
ID: "file-1",
|
||||
Name: "[4K] renamed clip.mp4",
|
||||
Size: 123,
|
||||
ModTime: now,
|
||||
}},
|
||||
}
|
||||
sc := New(cat, drv, []string{".mp4"}, nil, nil)
|
||||
|
||||
stats, err := sc.Run(ctx, "")
|
||||
if err != nil {
|
||||
t.Fatalf("scan: %v", err)
|
||||
}
|
||||
if stats.Added != 0 {
|
||||
t.Fatalf("added = %d, want existing video to be updated in place", stats.Added)
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(ctx, "fake-drive-file-1")
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.FileName != "[4K] renamed clip.mp4" {
|
||||
t.Fatalf("file_name = %q, want remote name", got.FileName)
|
||||
}
|
||||
if got.Title != "renamed clip" {
|
||||
t.Fatalf("title = %q, want parsed title from remote name", got.Title)
|
||||
}
|
||||
if got.Author != "" {
|
||||
t.Fatalf("author = %q, want cleared author from remote name without author suffix", got.Author)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -196,7 +435,7 @@ func TestRunReplacesExistingVideoTagsWithFixedFilenameTags(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunAddsShortCollectionDirectoryAsTag(t *testing.T) {
|
||||
func TestRunDoesNotCreateTagFromDirectoryName(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -214,7 +453,6 @@ func TestRunAddsShortCollectionDirectoryAsTag(t *testing.T) {
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: "Existing",
|
||||
Category: "sunny",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -245,84 +483,6 @@ func TestRunAddsShortCollectionDirectoryAsTag(t *testing.T) {
|
||||
t.Fatalf("scan: %v", err)
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(ctx, "fake-drive-file-1")
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if !sameStrings(got.Tags, []string{"sunny"}) {
|
||||
t.Fatalf("tags = %#v, want sunny", got.Tags)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunDoesNotRecreateDeletedCollectionDirectoryTag(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
now := time.Now()
|
||||
for _, id := range []string{"existing-1", "existing-2"} {
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: id,
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: "Existing",
|
||||
Category: "sunny",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed existing sunny video: %v", err)
|
||||
}
|
||||
}
|
||||
if label, ok, err := cat.EnsureCollectionTag(ctx, "sunny"); err != nil || !ok || label != "sunny" {
|
||||
t.Fatalf("ensure collection = %q, %v, %v; want sunny true nil", label, ok, err)
|
||||
}
|
||||
tags, err := cat.ListTags(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("list tags: %v", err)
|
||||
}
|
||||
var tagID int64
|
||||
for _, tag := range tags {
|
||||
if tag.Label == "sunny" {
|
||||
tagID = tag.ID
|
||||
break
|
||||
}
|
||||
}
|
||||
if tagID == 0 {
|
||||
t.Fatal("sunny tag not found before delete")
|
||||
}
|
||||
if _, err := cat.DeleteTag(ctx, tagID); err != nil {
|
||||
t.Fatalf("delete tag: %v", err)
|
||||
}
|
||||
|
||||
drv := &scannerTreeFakeDrive{
|
||||
entries: map[string][]drives.Entry{
|
||||
"root": {{
|
||||
ID: "dir-1",
|
||||
Name: "sunny",
|
||||
IsDir: true,
|
||||
}},
|
||||
"dir-1": {{
|
||||
ID: "file-1",
|
||||
ParentID: "dir-1",
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
ModTime: now,
|
||||
}},
|
||||
},
|
||||
}
|
||||
sc := New(cat, drv, []string{".mp4"}, nil, nil)
|
||||
|
||||
if _, err := sc.Run(ctx, ""); err != nil {
|
||||
t.Fatalf("scan: %v", err)
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(ctx, "fake-drive-file-1")
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
@@ -330,15 +490,6 @@ func TestRunDoesNotRecreateDeletedCollectionDirectoryTag(t *testing.T) {
|
||||
if len(got.Tags) != 0 {
|
||||
t.Fatalf("tags = %#v, want none", got.Tags)
|
||||
}
|
||||
tags, err = cat.ListTags(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("list tags after scan: %v", err)
|
||||
}
|
||||
for _, tag := range tags {
|
||||
if tag.Label == "sunny" {
|
||||
t.Fatal("deleted collection tag was recreated during scan")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunMapsAVCodeDirectoryToAVTag(t *testing.T) {
|
||||
@@ -359,7 +510,6 @@ func TestRunMapsAVCodeDirectoryToAVTag(t *testing.T) {
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: "Existing",
|
||||
Category: "cc-1750027",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
|
||||
@@ -1,719 +0,0 @@
|
||||
// Package spider91migrate 周期性把 spider91 drive 下载到本地的视频
|
||||
// 上传到一个指定的目标 drive 目录(PikPak、115 或 OneDrive),上传成功后:
|
||||
//
|
||||
// - 改写 catalog 行:drive_id / file_id / content_hash 改成目标盘的;
|
||||
// 视频自身的 id 不变(仍是 spider91-<driveID>-<viewkey>),video_tags、
|
||||
// 收藏、点赞、views 等关联数据全部保留
|
||||
// - 删除本地 mp4(spider91/<id>/videos/<viewkey>.<ext>)和 thumb(spider91/<id>/thumbs/<viewkey>.jpg)
|
||||
//
|
||||
// 之后回放时,videoSource() 自动落到 /p/stream/<target>/<file_id>,
|
||||
// proxy 层走对应盘的直链 / 302 直连。
|
||||
//
|
||||
// 下次目标盘扫盘时,scanner 通过 (content_hash) / (file_name+size)
|
||||
// 已有的 findDuplicate 兜底逻辑,不会为同一物理文件再建一行。
|
||||
package spider91migrate
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
"github.com/video-site/backend/internal/drives/onedrive"
|
||||
"github.com/video-site/backend/internal/drives/p115"
|
||||
"github.com/video-site/backend/internal/drives/pikpak"
|
||||
"github.com/video-site/backend/internal/drives/spider91"
|
||||
)
|
||||
|
||||
// uploadTarget 是 migrator 调用目标 drive 的最小接口。任何一种"接收 spider91 上传"的
|
||||
// 网盘都要实现它;当前 PikPak 和 115 各自通过适配器满足。
|
||||
//
|
||||
// 这一层抽象把"迁移调用方"和"具体盘的 SDK 协议"解耦:
|
||||
// - PikPak 走 GCID + OSS PutObject(pikpak.UploadResult)
|
||||
// - 115 走 SHA1 + 秒传 / OSS / 分片(p115.UploadResult)
|
||||
// - OneDrive 走 SHA1 + 小文件 PUT / 大文件 upload session
|
||||
//
|
||||
// 各家返回值都被归一成本地的 UploadResult,并在 catalog 改写阶段统一处理。
|
||||
type uploadTarget interface {
|
||||
ID() string
|
||||
Kind() string
|
||||
RootID() string
|
||||
EnsureDir(ctx context.Context, pathFromRoot string) (string, error)
|
||||
UploadAndReportHash(ctx context.Context, parentID, name string, r io.Reader, size int64) (UploadResult, error)
|
||||
Rename(ctx context.Context, fileID, newName string) error
|
||||
}
|
||||
|
||||
// UploadResult 是 uploadTarget.UploadAndReportHash 的归一返回。
|
||||
//
|
||||
// FileID 目标盘上的新文件 ID;
|
||||
// Hash GCID(PikPak)或 SHA1 HEX(115 / OneDrive),写入 catalog.content_hash 用于跨盘去重;
|
||||
// Size 实际上传字节数。
|
||||
type UploadResult struct {
|
||||
FileID string
|
||||
Hash string
|
||||
Size int64
|
||||
}
|
||||
|
||||
const spider91UploadDirName = "91 Spider"
|
||||
|
||||
// pikpakAdapter / p115Adapter / onedriveAdapter 把具体 driver 包装成 uploadTarget。
|
||||
//
|
||||
// 之所以不让 driver 直接实现 uploadTarget:
|
||||
//
|
||||
// 1. 各 driver 的 UploadAndReportXxx 返回的是各自包内的 UploadResult 类型,
|
||||
// 直接共用同名同签名方法会引入循环依赖;
|
||||
// 2. driver 包不应该感知 spider91migrate 这一层业务定义。
|
||||
type pikpakAdapter struct {
|
||||
d *pikpak.Driver
|
||||
}
|
||||
|
||||
func (a *pikpakAdapter) ID() string { return a.d.ID() }
|
||||
func (a *pikpakAdapter) Kind() string { return a.d.Kind() }
|
||||
func (a *pikpakAdapter) RootID() string { return a.d.RootID() }
|
||||
func (a *pikpakAdapter) EnsureDir(ctx context.Context, pathFromRoot string) (string, error) {
|
||||
return a.d.EnsureDir(ctx, pathFromRoot)
|
||||
}
|
||||
func (a *pikpakAdapter) UploadAndReportHash(ctx context.Context, parentID, name string, r io.Reader, size int64) (UploadResult, error) {
|
||||
res, err := a.d.UploadAndReportHash(ctx, parentID, name, r, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
return UploadResult{FileID: res.FileID, Hash: res.Hash, Size: res.Size}, nil
|
||||
}
|
||||
func (a *pikpakAdapter) Rename(ctx context.Context, fileID, newName string) error {
|
||||
return a.d.Rename(ctx, fileID, newName)
|
||||
}
|
||||
|
||||
type p115Adapter struct {
|
||||
d *p115.Driver
|
||||
}
|
||||
|
||||
func (a *p115Adapter) ID() string { return a.d.ID() }
|
||||
func (a *p115Adapter) Kind() string { return a.d.Kind() }
|
||||
func (a *p115Adapter) RootID() string { return a.d.RootID() }
|
||||
func (a *p115Adapter) EnsureDir(ctx context.Context, pathFromRoot string) (string, error) {
|
||||
return a.d.EnsureDir(ctx, pathFromRoot)
|
||||
}
|
||||
func (a *p115Adapter) UploadAndReportHash(ctx context.Context, parentID, name string, r io.Reader, size int64) (UploadResult, error) {
|
||||
res, err := a.d.UploadAndReportSha1(ctx, parentID, name, r, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
return UploadResult{FileID: res.FileID, Hash: res.Sha1, Size: res.Size}, nil
|
||||
}
|
||||
func (a *p115Adapter) Rename(ctx context.Context, fileID, newName string) error {
|
||||
return a.d.Rename(ctx, fileID, newName)
|
||||
}
|
||||
|
||||
type onedriveAdapter struct {
|
||||
d *onedrive.Driver
|
||||
}
|
||||
|
||||
func (a *onedriveAdapter) ID() string { return a.d.ID() }
|
||||
func (a *onedriveAdapter) Kind() string { return a.d.Kind() }
|
||||
func (a *onedriveAdapter) RootID() string { return a.d.RootID() }
|
||||
func (a *onedriveAdapter) EnsureDir(ctx context.Context, pathFromRoot string) (string, error) {
|
||||
return a.d.EnsureDir(ctx, pathFromRoot)
|
||||
}
|
||||
func (a *onedriveAdapter) UploadAndReportHash(ctx context.Context, parentID, name string, r io.Reader, size int64) (UploadResult, error) {
|
||||
res, err := a.d.UploadAndReportHash(ctx, parentID, name, r, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
return UploadResult{FileID: res.FileID, Hash: res.Hash, Size: res.Size}, nil
|
||||
}
|
||||
func (a *onedriveAdapter) Rename(ctx context.Context, fileID, newName string) error {
|
||||
return a.d.Rename(ctx, fileID, newName)
|
||||
}
|
||||
|
||||
// adaptUploadTarget 把通用 drive 包装成 uploadTarget。
|
||||
// 不支持的盘 kind 返回 error;调用方静默跳过。
|
||||
func adaptUploadTarget(d drives.Drive) (uploadTarget, error) {
|
||||
switch v := d.(type) {
|
||||
case *pikpak.Driver:
|
||||
return &pikpakAdapter{d: v}, nil
|
||||
case *p115.Driver:
|
||||
return &p115Adapter{d: v}, nil
|
||||
case *onedrive.Driver:
|
||||
return &onedriveAdapter{d: v}, nil
|
||||
case uploadTarget:
|
||||
// 测试或自定义实现可以直接传入;优先使用具体类型分支以拿到适配器。
|
||||
return v, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("drive %q kind=%s does not support spider91 upload", d.ID(), d.Kind())
|
||||
}
|
||||
}
|
||||
|
||||
// Registry 是 worker 用来按 driveID 取 driver 的最小依赖。
|
||||
type Registry interface {
|
||||
Get(id string) (drives.Drive, bool)
|
||||
All() []drives.Drive
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
Catalog *catalog.Catalog
|
||||
Registry Registry
|
||||
GetTargetDriveID func() string // 通常对应 App.Spider91UploadDriveID()
|
||||
// Interval 已废弃 —— 旧版迁移 worker 是周期 ticker,新版只通过 nightly
|
||||
// pipeline 调用 RunOnce,不再有内置定时器。保留字段不删是为了兼容外
|
||||
// 部 yaml / 测试代码里仍传值的场景。
|
||||
Interval time.Duration
|
||||
BatchLimit int // 单轮最多迁多少个,0 时默认 50
|
||||
// KeepLatestN 是每个 spider91 drive 在本地保留的最新视频数。
|
||||
// 超过的部分中"已迁移"的会被清理;未迁移的不动。0 时默认 15;< 0 关闭清理。
|
||||
KeepLatestN int
|
||||
// CaptchaCooldown 是迁移 worker 在遇到 PikPak captcha 错误(error_code
|
||||
// 4002 / 9)后整体进入冷却的时长。冷却期间 runOnce 直接返回,不再发起任何
|
||||
// PikPak API 请求,避免被进一步风控。0 时默认 5 分钟;< 0 关闭冷却(仅用于测试)。
|
||||
CaptchaCooldown time.Duration
|
||||
OnMigrated func(videoID string)
|
||||
}
|
||||
|
||||
type Migrator struct {
|
||||
cfg Config
|
||||
mu sync.Mutex
|
||||
running bool
|
||||
|
||||
// cooldownMu 保护 cooldownUntil。captcha 冷却的语义:
|
||||
// - migrateDrive 遇到上传失败且 pikpak.IsCaptchaError(err) == true 时
|
||||
// 调 setCooldown,未来 cfg.CaptchaCooldown 内 runOnce 直接 noop
|
||||
// - 一次冷却期内只打印一行进入日志和一行恢复日志,避免之前那种
|
||||
// "每秒一条 4002" 的刷屏
|
||||
cooldownMu sync.Mutex
|
||||
cooldownUntil time.Time
|
||||
cooldownLogged bool
|
||||
}
|
||||
|
||||
func New(cfg Config) *Migrator {
|
||||
if cfg.BatchLimit == 0 {
|
||||
cfg.BatchLimit = 50
|
||||
}
|
||||
if cfg.KeepLatestN == 0 {
|
||||
cfg.KeepLatestN = 15
|
||||
}
|
||||
if cfg.CaptchaCooldown == 0 {
|
||||
cfg.CaptchaCooldown = 5 * time.Minute
|
||||
}
|
||||
return &Migrator{
|
||||
cfg: cfg,
|
||||
}
|
||||
}
|
||||
|
||||
// inCooldown 返回当前是否处于 captcha 冷却期,以及冷却结束时间。
|
||||
// 冷却期间应该跳过整个 runOnce —— 不要列盘、不要尝试上传,
|
||||
// 让 PikPak 喘口气。
|
||||
func (m *Migrator) inCooldown() (bool, time.Time) {
|
||||
m.cooldownMu.Lock()
|
||||
defer m.cooldownMu.Unlock()
|
||||
return time.Now().Before(m.cooldownUntil), m.cooldownUntil
|
||||
}
|
||||
|
||||
// cooldownState 返回当前冷却状态。若发现冷却已经过期,会清掉状态并让
|
||||
// 调用方打印一次恢复日志。
|
||||
func (m *Migrator) cooldownState() (active bool, until time.Time, resumed bool) {
|
||||
m.cooldownMu.Lock()
|
||||
defer m.cooldownMu.Unlock()
|
||||
if m.cooldownUntil.IsZero() {
|
||||
return false, time.Time{}, false
|
||||
}
|
||||
until = m.cooldownUntil
|
||||
if time.Now().Before(until) {
|
||||
return true, until, false
|
||||
}
|
||||
m.cooldownUntil = time.Time{}
|
||||
m.cooldownLogged = false
|
||||
return false, until, true
|
||||
}
|
||||
|
||||
// setCooldown 把冷却结束时间往后推 cfg.CaptchaCooldown,并返回结束时间。
|
||||
// 当 cfg.CaptchaCooldown < 0(仅测试用)时不改任何状态、返回零值。
|
||||
func (m *Migrator) setCooldown() time.Time {
|
||||
if m.cfg.CaptchaCooldown < 0 {
|
||||
return time.Time{}
|
||||
}
|
||||
m.cooldownMu.Lock()
|
||||
defer m.cooldownMu.Unlock()
|
||||
m.cooldownUntil = time.Now().Add(m.cfg.CaptchaCooldown)
|
||||
m.cooldownLogged = false
|
||||
return m.cooldownUntil
|
||||
}
|
||||
|
||||
// markCooldownLogged 是 runOnce 用来只打一次"在冷却中"日志的小工具。
|
||||
// 第一次返回 false(应该打),第二次起返回 true(不再打),冷却到期 / 重新设置时复位。
|
||||
func (m *Migrator) markCooldownLogged() bool {
|
||||
m.cooldownMu.Lock()
|
||||
defer m.cooldownMu.Unlock()
|
||||
if m.cooldownLogged {
|
||||
return true
|
||||
}
|
||||
m.cooldownLogged = true
|
||||
return false
|
||||
}
|
||||
|
||||
// Trigger 安排一次"立即跑"。多次调用会被合并成一次(channel buffer=1)。
|
||||
// RunOnce 跑一次完整迁移:列出所有 spider91 drive,对每个超过 KeepLatestN 的旧
|
||||
// 视频上传到目标 drive,事务性改写 catalog 行,删本地文件。
|
||||
//
|
||||
// 这是上层 nightly 流水线 Phase 3 的入口;不再有周期 ticker / Trigger 通道。
|
||||
// captcha cooldown 状态在单次 RunOnce 内仍生效(多 drive 时遇到 4002 立即停整轮);
|
||||
// 跨调用持久 5 分钟,下次 RunOnce 命中冷却期会直接 noop。
|
||||
//
|
||||
// 当前实现不会向调用方返回 error —— 单条迁移失败已在内部记日志并跳过;
|
||||
// 整轮被 cooldown / context 取消时也通过日志可观测。保留 error 返回签名是为
|
||||
// 给未来需要把 nightly 失败状态展示给 admin 用。
|
||||
func (m *Migrator) RunOnce(ctx context.Context) error {
|
||||
m.runOnce(ctx)
|
||||
return nil
|
||||
}
|
||||
|
||||
// runOnce 单轮:扫所有 spider91 drive,对每条还有本地文件的视频做迁移。
|
||||
//
|
||||
// 互斥保证:同一 Migrator 内不会并发跑两轮(避免重复上传)。
|
||||
func (m *Migrator) runOnce(ctx context.Context) {
|
||||
m.mu.Lock()
|
||||
if m.running {
|
||||
m.mu.Unlock()
|
||||
return
|
||||
}
|
||||
m.running = true
|
||||
m.mu.Unlock()
|
||||
defer func() {
|
||||
m.mu.Lock()
|
||||
m.running = false
|
||||
m.mu.Unlock()
|
||||
}()
|
||||
|
||||
// captcha 冷却期间整轮跳过 —— 不做任何 PikPak API 调用、不做本地清理,
|
||||
// 等冷却结束。这样从用户视角看:进入冷却 → 一行日志 → 完全静默 → 冷却
|
||||
// 结束自然恢复。避免之前每秒一条 4002 的日志雪崩。
|
||||
if active, until, resumed := m.cooldownState(); active {
|
||||
if !m.markCooldownLogged() {
|
||||
log.Printf("[spider91migrate] captcha cooldown active until %s, skipping run", until.Format(time.RFC3339))
|
||||
}
|
||||
return
|
||||
} else if resumed {
|
||||
log.Printf("[spider91migrate] captcha cooldown ended at %s, resuming migration", until.Format(time.RFC3339))
|
||||
}
|
||||
|
||||
target, pp, err := m.resolveTarget()
|
||||
if err != nil {
|
||||
// 没目标就静默 —— 用户选择了本地保存,或还没配 115/PikPak drive。
|
||||
return
|
||||
}
|
||||
|
||||
migrated := 0
|
||||
for _, src := range m.spider91Drives() {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return
|
||||
}
|
||||
n, err := m.migrateDrive(ctx, src, target, pp)
|
||||
if err != nil {
|
||||
log.Printf("[spider91migrate] drive=%s migrate batch error: %v", src.ID(), err)
|
||||
}
|
||||
migrated += n
|
||||
if active, _ := m.inCooldown(); active {
|
||||
if migrated > 0 {
|
||||
log.Printf("[spider91migrate] migrated %d video(s) to drive=%s", migrated, target)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
if migrated > 0 {
|
||||
log.Printf("[spider91migrate] migrated %d video(s) to drive=%s", migrated, target)
|
||||
}
|
||||
|
||||
// 收尾:扫每个 spider91 drive 的本地目录,把 catalog 已经迁到别处但本地
|
||||
// 仍有残留的孤儿文件清掉。这是纯防御性兜底——正常路径下 migrateDrive
|
||||
// 已经在迁移成功后立刻 CleanupSpider91Local,不会留孤儿。
|
||||
for _, src := range m.spider91Drives() {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return
|
||||
}
|
||||
deleted, err := m.cleanupOldLocalVideos(ctx, src)
|
||||
if err != nil {
|
||||
log.Printf("[spider91migrate] cleanup drive=%s: %v", src.ID(), err)
|
||||
}
|
||||
if deleted > 0 {
|
||||
log.Printf("[spider91migrate] cleanup drive=%s deleted %d orphan local file(s)", src.ID(), deleted)
|
||||
}
|
||||
}
|
||||
|
||||
// 回填:把已迁移到 PikPak 的 spider91-* 视频里文件名仍是旧格式
|
||||
// (比如刚迁完没改、或人工导入)的统一改成方案 B 期望的格式。
|
||||
// 这一步幂等:已经是期望格式的不会再调 Rename。
|
||||
if renamed, err := m.backfillFileNames(ctx, target, pp); err != nil {
|
||||
log.Printf("[spider91migrate] backfill names: %v", err)
|
||||
} else if renamed > 0 {
|
||||
log.Printf("[spider91migrate] backfilled %d %s file name(s) to desired format", renamed, m.targetKindForLog())
|
||||
}
|
||||
}
|
||||
|
||||
// targetKindForLog 把当前目标盘 kind 转成对人友好的简称,用于日志。
|
||||
// 解析失败时回退 "target"。
|
||||
func (m *Migrator) targetKindForLog() string {
|
||||
if m.cfg.GetTargetDriveID == nil || m.cfg.Registry == nil {
|
||||
return "target"
|
||||
}
|
||||
id := m.cfg.GetTargetDriveID()
|
||||
if id == "" {
|
||||
return "target"
|
||||
}
|
||||
d, ok := m.cfg.Registry.Get(id)
|
||||
if !ok {
|
||||
return "target"
|
||||
}
|
||||
return d.Kind()
|
||||
}
|
||||
|
||||
// resolveTarget 返回 (target drive ID, target uploadTarget, err)。
|
||||
// 没设置、drive 找不到,或 drive 类型不支持上传时返回 err(调用方静默跳过)。
|
||||
func (m *Migrator) resolveTarget() (string, uploadTarget, error) {
|
||||
if m.cfg.GetTargetDriveID == nil {
|
||||
return "", nil, errors.New("no target getter")
|
||||
}
|
||||
id := m.cfg.GetTargetDriveID()
|
||||
if id == "" {
|
||||
return "", nil, errors.New("target drive not configured")
|
||||
}
|
||||
d, ok := m.cfg.Registry.Get(id)
|
||||
if !ok {
|
||||
return "", nil, fmt.Errorf("target drive %q not in registry", id)
|
||||
}
|
||||
t, err := adaptUploadTarget(d)
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
return id, t, nil
|
||||
}
|
||||
|
||||
// spider91Drives 返回当前注册的所有 spider91 driver。
|
||||
func (m *Migrator) spider91Drives() []*spider91.Driver {
|
||||
all := m.cfg.Registry.All()
|
||||
out := make([]*spider91.Driver, 0, len(all))
|
||||
for _, d := range all {
|
||||
if d.Kind() != spider91.Kind {
|
||||
continue
|
||||
}
|
||||
if sd, ok := d.(*spider91.Driver); ok {
|
||||
out = append(out, sd)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// migrateDrive 对单个 spider91 drive 跑一批迁移;返回成功迁移的条数。
|
||||
//
|
||||
// 策略(与"本地缓存最新 N 个"语义一致):
|
||||
// - 列出 spider91 drive 本地 videos/ 目录所有 mp4 文件,按 mtime 降序排
|
||||
// - 跳过最新 KeepLatestN 个:这些是用户希望保留在本地的最新爬取
|
||||
// - 对剩下的(更旧)逐个处理:
|
||||
// - 还没迁移(drive_id 仍是 src.ID())→ 上传到目标盘 + 改 catalog + 删本地
|
||||
// - 已经迁移过但本地还有残留 → 仅删本地(兜底)
|
||||
//
|
||||
// KeepLatestN < 0 时不保护任何本地文件,全部尝试迁移(旧行为,主要给测试用)。
|
||||
func (m *Migrator) migrateDrive(ctx context.Context, src *spider91.Driver, targetDriveID string, pp uploadTarget) (int, error) {
|
||||
keepN := m.cfg.KeepLatestN
|
||||
if keepN < 0 {
|
||||
keepN = 0
|
||||
}
|
||||
|
||||
type localFile struct {
|
||||
name string
|
||||
modTime time.Time
|
||||
}
|
||||
|
||||
entries, err := os.ReadDir(src.VideosDir())
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return 0, nil
|
||||
}
|
||||
return 0, fmt.Errorf("read videos dir: %w", err)
|
||||
}
|
||||
|
||||
files := make([]localFile, 0, len(entries))
|
||||
for _, e := range entries {
|
||||
if e.IsDir() {
|
||||
continue
|
||||
}
|
||||
info, err := e.Info()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
files = append(files, localFile{name: e.Name(), modTime: info.ModTime()})
|
||||
}
|
||||
|
||||
// 本地数量没超过 keepN 时不动任何文件 —— 这条是 KeepLatestN 语义的核心
|
||||
if m.cfg.KeepLatestN >= 0 && len(files) <= keepN {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// 按 mtime 降序:最新的排前面,保留前 keepN 个
|
||||
sort.Slice(files, func(i, j int) bool { return files[i].modTime.After(files[j].modTime) })
|
||||
|
||||
// 候选 = 跳过最新 keepN 个之外的(更旧的)。KeepLatestN < 0 时 candidates=files。
|
||||
skip := keepN
|
||||
if m.cfg.KeepLatestN < 0 {
|
||||
skip = 0
|
||||
}
|
||||
candidates := files
|
||||
if skip < len(files) {
|
||||
candidates = files[skip:]
|
||||
} else {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
migrated := 0
|
||||
for _, f := range candidates {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return migrated, err
|
||||
}
|
||||
if migrated >= m.cfg.BatchLimit {
|
||||
break
|
||||
}
|
||||
|
||||
viewkey := stripExt(f.name)
|
||||
videoID := "spider91-" + src.ID() + "-" + viewkey
|
||||
v, err := m.cfg.Catalog.GetVideo(ctx, videoID)
|
||||
if err != nil || v == nil {
|
||||
// 找不到 catalog 行:保险起见保留本地,让管理员可见
|
||||
continue
|
||||
}
|
||||
|
||||
if v.DriveID != src.ID() {
|
||||
// catalog 已迁移到别的 drive,但本地还有残留 → 兜底删本地
|
||||
CleanupSpider91Local(src, v.FileID)
|
||||
continue
|
||||
}
|
||||
|
||||
ok, err := m.migrateOne(ctx, v, src, targetDriveID, pp)
|
||||
if err != nil {
|
||||
log.Printf("[spider91migrate] %s: %v", v.ID, err)
|
||||
// captcha 错误(4002 / 9)说明 PikPak 当前正拒绝我们;继续在
|
||||
// 同一轮里尝试其它文件大概率会拿到同样的 4002,并且每多一次
|
||||
// 失败就多一份"被风控加深"的风险。立即中止当前 batch 并
|
||||
// 打开冷却窗口,等 cfg.CaptchaCooldown 之后再重试。
|
||||
if pikpak.IsCaptchaError(err) {
|
||||
until := m.setCooldown()
|
||||
log.Printf("[spider91migrate] drive=%s captcha-blocked, cooling down until %s", src.ID(), until.Format(time.RFC3339))
|
||||
return migrated, nil
|
||||
}
|
||||
continue
|
||||
}
|
||||
if ok {
|
||||
migrated++
|
||||
if m.cfg.OnMigrated != nil {
|
||||
m.cfg.OnMigrated(v.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
return migrated, nil
|
||||
}
|
||||
|
||||
// migrateOne 把单条 spider91 视频上传到目标盘并改写 catalog。
|
||||
// 返回 (true, nil) 表示真的迁了一条;(false, nil) 表示跳过(本地文件已不在等);
|
||||
// (false, err) 表示真出错。
|
||||
func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, src *spider91.Driver, targetDriveID string, pp uploadTarget) (bool, error) {
|
||||
path, err := src.VideoPath(v.FileID)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("resolve local path: %w", err)
|
||||
}
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
// 本地文件被人手动删了,但 catalog 还显示 spider91 drive;
|
||||
// 这种状态没法迁移。跳过即可(保留行让管理员可见,避免数据丢失)。
|
||||
return false, nil
|
||||
}
|
||||
return false, fmt.Errorf("stat local: %w", err)
|
||||
}
|
||||
if info.IsDir() || info.Size() == 0 {
|
||||
return false, fmt.Errorf("local file invalid: dir=%v size=%d", info.IsDir(), info.Size())
|
||||
}
|
||||
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("open local: %w", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// 上传到目标盘 rootID 下的固定 "91 Spider" 子目录。若用户把目标盘 rootID
|
||||
// 配成某个自定义目录,这里会在该自定义目录下查找/创建 "91 Spider"。
|
||||
// 上传名走 desiredPikPakName 算出来的方案 B 格式:
|
||||
//
|
||||
// <sanitized title>-<viewkey 后 8 位>.<ext>
|
||||
//
|
||||
// 这样网盘 Web 端列出来的文件名能直接看出是哪个视频,
|
||||
// 又用 viewkey 后 8 位避免同标题撞名。所有目标盘共用同一格式,
|
||||
// 简化前端 / catalog 的认知。
|
||||
parent, err := pp.EnsureDir(ctx, spider91UploadDirName)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("%s ensure %q dir: %w", pp.Kind(), spider91UploadDirName, err)
|
||||
}
|
||||
uploadName := desiredPikPakName(v.Title, extractViewKey(v.ID), v.Ext)
|
||||
res, err := pp.UploadAndReportHash(ctx, parent, uploadName, f, info.Size())
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("%s upload: %w", pp.Kind(), err)
|
||||
}
|
||||
if res.FileID == "" {
|
||||
return false, fmt.Errorf("%s returned empty file id", pp.Kind())
|
||||
}
|
||||
|
||||
// 事务性改写 catalog 行:drive_id / file_id / content_hash
|
||||
if err := m.cfg.Catalog.MigrateVideoToDrive(ctx, v.ID, targetDriveID, res.FileID, res.Hash); err != nil {
|
||||
return false, fmt.Errorf("catalog migrate: %w", err)
|
||||
}
|
||||
// 同步 catalog 里的 file_name,让下次目标盘扫盘时 (file_name, size) 也能匹配上
|
||||
if err := m.cfg.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{FileName: uploadName}); err != nil {
|
||||
log.Printf("[spider91migrate] %s update file_name after migrate: %v", v.ID, err)
|
||||
}
|
||||
|
||||
// 删除本地 mp4 和 thumb(thumb 在 previews/thumbs/ 还有副本,不影响展示)
|
||||
CleanupSpider91Local(src, v.FileID)
|
||||
|
||||
log.Printf("[spider91migrate] %s migrated to drive=%s(kind=%s) file=%s name=%q", v.ID, targetDriveID, pp.Kind(), res.FileID, uploadName)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// CleanupSpider91Local 删除已迁移视频的本地 mp4 和 thumb。
|
||||
//
|
||||
// thumb 删除是 best-effort —— 找不到就算了(spider91 thumb 文件名带后缀,
|
||||
// 我们不知道具体是 .jpg 还是别的,逐个尝试常见后缀)。
|
||||
//
|
||||
// 暴露成包级函数方便 cleanup 模块复用(任务 6)。
|
||||
func CleanupSpider91Local(src *spider91.Driver, fileID string) {
|
||||
videoPath, err := src.VideoPath(fileID)
|
||||
if err == nil {
|
||||
if err := os.Remove(videoPath); err != nil && !os.IsNotExist(err) {
|
||||
log.Printf("[spider91migrate] remove local mp4 %s: %v", videoPath, err)
|
||||
}
|
||||
}
|
||||
// thumb 文件名是 <viewkey>.<ext>;fileID 是 <viewkey>.<videoExt>,
|
||||
// 不一定相同。尝试用 fileID 去掉视频扩展名后拼 thumb 常见后缀。
|
||||
thumbBase := stripExt(fileID)
|
||||
for _, ext := range []string{".jpg", ".jpeg", ".png", ".webp"} {
|
||||
thumbPath, err := src.ThumbPath(thumbBase + ext)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
_ = os.Remove(thumbPath) // 忽略错误:找不到很正常
|
||||
}
|
||||
}
|
||||
|
||||
func stripExt(name string) string {
|
||||
ext := filepath.Ext(name)
|
||||
return name[:len(name)-len(ext)]
|
||||
}
|
||||
|
||||
// cleanupOldLocalVideos 是防御性兜底:扫 spider91 drive 本地 videos/ 目录,
|
||||
// 删除所有 catalog 中已经迁移到别处(drive_id != src.ID())的本地残留。
|
||||
//
|
||||
// 与 migrateDrive 的区别:
|
||||
// - 不上传任何东西
|
||||
// - 不依赖 KeepLatestN —— 哪怕这个孤儿在"最新 N"窗口内,已迁移就该删
|
||||
// - 只看 catalog 状态,不看 mtime
|
||||
//
|
||||
// 正常路径下 migrateDrive 迁移成功后立刻 CleanupSpider91Local,所以这里
|
||||
// 应该不会有任何工作。极端情况(手工改 catalog、迁移过程中 crash)才会
|
||||
// 找到孤儿。
|
||||
//
|
||||
// 返回实际删除的文件个数。
|
||||
func (m *Migrator) cleanupOldLocalVideos(ctx context.Context, src *spider91.Driver) (int, error) {
|
||||
entries, err := os.ReadDir(src.VideosDir())
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return 0, nil
|
||||
}
|
||||
return 0, err
|
||||
}
|
||||
|
||||
deleted := 0
|
||||
for _, e := range entries {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return deleted, err
|
||||
}
|
||||
if e.IsDir() {
|
||||
continue
|
||||
}
|
||||
viewkey := stripExt(e.Name())
|
||||
videoID := "spider91-" + src.ID() + "-" + viewkey
|
||||
v, err := m.cfg.Catalog.GetVideo(ctx, videoID)
|
||||
if err != nil || v == nil {
|
||||
// 找不到 catalog 行:保险起见保留,等管理员处理
|
||||
continue
|
||||
}
|
||||
if v.DriveID == src.ID() {
|
||||
// 还没迁移,归 migrateDrive 管,不在这里动
|
||||
continue
|
||||
}
|
||||
// 已迁移到别的 drive 但本地还有 → 删
|
||||
path, perr := src.VideoPath(e.Name())
|
||||
if perr != nil {
|
||||
continue
|
||||
}
|
||||
if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
|
||||
log.Printf("[spider91migrate] cleanup remove %s: %v", path, err)
|
||||
continue
|
||||
}
|
||||
// thumb 一并删(best-effort)
|
||||
thumbBase := stripExt(e.Name())
|
||||
for _, ext := range []string{".jpg", ".jpeg", ".png", ".webp"} {
|
||||
tp, terr := src.ThumbPath(thumbBase + ext)
|
||||
if terr != nil {
|
||||
continue
|
||||
}
|
||||
_ = os.Remove(tp)
|
||||
}
|
||||
deleted++
|
||||
}
|
||||
return deleted, nil
|
||||
}
|
||||
|
||||
// backfillFileNames 扫描目标 drive(PikPak、115 或 OneDrive)下所有 spider91-* 起始 ID 的视频,
|
||||
// 对文件名不是 desiredPikPakName(...) 期望格式的,调 target.Rename 修正,
|
||||
// 并把 catalog.file_name 同步到新名字。
|
||||
//
|
||||
// 幂等:已经是期望格式的视频不会触发任何调用。
|
||||
//
|
||||
// 返回成功改名的条数。
|
||||
func (m *Migrator) backfillFileNames(ctx context.Context, targetDriveID string, pp uploadTarget) (int, error) {
|
||||
videos, err := m.cfg.Catalog.ListVideosByDriveID(ctx, targetDriveID, 10000)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("list videos: %w", err)
|
||||
}
|
||||
renamed := 0
|
||||
for _, v := range videos {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return renamed, err
|
||||
}
|
||||
if !strings.HasPrefix(v.ID, "spider91-") {
|
||||
continue
|
||||
}
|
||||
want := desiredPikPakName(v.Title, extractViewKey(v.ID), v.Ext)
|
||||
if v.FileName == want {
|
||||
continue
|
||||
}
|
||||
if v.FileID == "" {
|
||||
continue
|
||||
}
|
||||
if err := pp.Rename(ctx, v.FileID, want); err != nil {
|
||||
log.Printf("[spider91migrate] rename %s -> %q: %v", v.ID, want, err)
|
||||
continue
|
||||
}
|
||||
if err := m.cfg.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{FileName: want}); err != nil {
|
||||
log.Printf("[spider91migrate] %s update file_name after rename: %v", v.ID, err)
|
||||
// 目标盘已经改名成功,但 catalog 更新失败 —— 下轮会重试。继续。
|
||||
}
|
||||
log.Printf("[spider91migrate] renamed %s on %s: %q -> %q", v.ID, pp.Kind(), v.FileName, want)
|
||||
renamed++
|
||||
}
|
||||
return renamed, nil
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5,6 +5,8 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/video-site/backend/internal/mediaasset"
|
||||
)
|
||||
|
||||
type VideoAssetRef struct {
|
||||
@@ -71,14 +73,15 @@ func Compute(
|
||||
continue
|
||||
}
|
||||
driveUsage := out.Drives[ref.DriveID]
|
||||
thumbPath := filepath.Join(localDir, "thumbs", ref.ID+".jpg")
|
||||
if size, exists, err := regularFileSize(thumbPath); err != nil {
|
||||
return Usage{}, err
|
||||
} else if exists {
|
||||
key := ref.DriveID + "\x00thumb\x00" + thumbPath
|
||||
if !seen[key] {
|
||||
driveUsage.ThumbnailBytes += size
|
||||
seen[key] = true
|
||||
for _, thumbPath := range mediaasset.ThumbnailPathCandidates(localDir, ref.ID) {
|
||||
if size, exists, err := regularFileSize(thumbPath); err != nil {
|
||||
return Usage{}, err
|
||||
} else if exists {
|
||||
key := ref.DriveID + "\x00thumb\x00" + thumbPath
|
||||
if !seen[key] {
|
||||
driveUsage.ThumbnailBytes += size
|
||||
seen[key] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,10 @@ package storageusage
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/video-site/backend/internal/mediaasset"
|
||||
)
|
||||
|
||||
func TestComputeCountsLocalThumbnailsAndTeasersByDrive(t *testing.T) {
|
||||
@@ -13,6 +16,8 @@ func TestComputeCountsLocalThumbnailsAndTeasersByDrive(t *testing.T) {
|
||||
}
|
||||
writeSizedFile(t, filepath.Join(localDir, "thumbs", "video-a.jpg"), 3)
|
||||
writeSizedFile(t, filepath.Join(localDir, "thumbs", "video-b.jpg"), 5)
|
||||
longID := "localstorage-" + strings.Repeat("x", 240)
|
||||
writeSizedFile(t, mediaasset.ThumbnailPath(localDir, longID), 13)
|
||||
teaserA := filepath.Join(localDir, "video-a.mp4")
|
||||
teaserB := filepath.Join(localDir, "video-b.mp4")
|
||||
writeSizedFile(t, teaserA, 7)
|
||||
@@ -24,6 +29,7 @@ func TestComputeCountsLocalThumbnailsAndTeasersByDrive(t *testing.T) {
|
||||
{ID: "video-a", DriveID: "drive-a", PreviewLocal: teaserA},
|
||||
{ID: "video-a-copy", DriveID: "drive-a", PreviewLocal: teaserA},
|
||||
{ID: "video-b", DriveID: "drive-b", PreviewLocal: teaserB},
|
||||
{ID: longID, DriveID: "drive-b"},
|
||||
{ID: "outside", DriveID: "drive-b", PreviewLocal: outside},
|
||||
{ID: "unknown-drive-video", DriveID: "missing", PreviewLocal: teaserB},
|
||||
}, []string{"drive-a", "drive-b"}, func(string) (DiskStats, error) {
|
||||
@@ -41,11 +47,11 @@ func TestComputeCountsLocalThumbnailsAndTeasersByDrive(t *testing.T) {
|
||||
t.Fatalf("drive-a usage = %#v, want thumbnails=3 teaser=7 total=10", driveA)
|
||||
}
|
||||
driveB := got.Drives["drive-b"]
|
||||
if driveB.ThumbnailBytes != 5 || driveB.TeaserBytes != 11 || driveB.TotalBytes != 16 {
|
||||
t.Fatalf("drive-b usage = %#v, want thumbnails=5 teaser=11 total=16", driveB)
|
||||
if driveB.ThumbnailBytes != 18 || driveB.TeaserBytes != 11 || driveB.TotalBytes != 29 {
|
||||
t.Fatalf("drive-b usage = %#v, want thumbnails=18 teaser=11 total=29", driveB)
|
||||
}
|
||||
if got.ThumbnailBytes != 8 || got.TeaserBytes != 18 || got.TotalBytes != 26 {
|
||||
t.Fatalf("totals = %#v, want thumbnails=8 teaser=18 total=26", got)
|
||||
if got.ThumbnailBytes != 21 || got.TeaserBytes != 18 || got.TotalBytes != 39 {
|
||||
t.Fatalf("totals = %#v, want thumbnails=21 teaser=18 total=39", got)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,178 @@
|
||||
// Package transcode 实现"浏览器兼容性转码":把网盘/本地存储中浏览器
|
||||
// <video> 播不动的视频(AVI/WMV/FLV、MPEG-4 Part 2、RMVB 等)转成
|
||||
// H.264 + AAC 的 MP4,并把产物上传回同一存储,播放源切到产物文件。
|
||||
//
|
||||
// 与封面/预览生成不同,转码不会自动运行——只能由管理员在网盘管理页
|
||||
// 手动开启,也可以随时手动停止。
|
||||
package transcode
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// MediaInfo 是 ffprobe 探测出来的、做兼容性判定所需的最小信息。
|
||||
type MediaInfo struct {
|
||||
// FormatName 是 ffprobe 的 format_name,逗号分隔的 demuxer 别名,
|
||||
// 例如 "mov,mp4,m4a,3gp,3g2,mj2" / "avi" / "matroska,webm"。
|
||||
FormatName string
|
||||
VideoCodecs []string
|
||||
AudioCodecs []string
|
||||
}
|
||||
|
||||
// browserCompatibleVideoCodecs 是主流浏览器 <video> 普遍可解码的视频编码。
|
||||
// HEVC/H.265 只有部分平台支持,保守起见不算兼容。
|
||||
var browserCompatibleVideoCodecs = map[string]bool{
|
||||
"h264": true,
|
||||
"vp8": true,
|
||||
"vp9": true,
|
||||
"av1": true,
|
||||
}
|
||||
|
||||
// browserCompatibleAudioCodecs 是主流浏览器普遍可解码的音频编码。
|
||||
var browserCompatibleAudioCodecs = map[string]bool{
|
||||
"aac": true,
|
||||
"mp3": true,
|
||||
"opus": true,
|
||||
"vorbis": true,
|
||||
"flac": true,
|
||||
}
|
||||
|
||||
// NeedsTranscode 判断这个文件是否需要转码才能在浏览器里播放。
|
||||
// ext 是 catalog 里记录的扩展名(小写、不带点),用来区分 mkv 和 webm
|
||||
// (两者的 format_name 都是 "matroska,webm")。
|
||||
func NeedsTranscode(info MediaInfo, ext string) bool {
|
||||
if !containerCompatible(info.FormatName, ext) {
|
||||
return true
|
||||
}
|
||||
for _, codec := range info.VideoCodecs {
|
||||
if !browserCompatibleVideoCodecs[strings.ToLower(codec)] {
|
||||
return true
|
||||
}
|
||||
}
|
||||
for _, codec := range info.AudioCodecs {
|
||||
if !browserCompatibleAudioCodecs[strings.ToLower(codec)] {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func containerCompatible(formatName, ext string) bool {
|
||||
format := strings.ToLower(formatName)
|
||||
for _, name := range strings.Split(format, ",") {
|
||||
if name == "mp4" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
// matroska,webm:只有真 .webm 信任为浏览器可播容器;.mkv 保守转码。
|
||||
if strings.Contains(format, "webm") && strings.EqualFold(ext, "webm") {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ProbeFile 用 ffprobe 探测本地文件的容器与音视频编码。
|
||||
func ProbeFile(ctx context.Context, ffprobePath, path string) (MediaInfo, error) {
|
||||
ctx2, cancel := context.WithTimeout(ctx, 60*time.Second)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx2, ffprobePath,
|
||||
"-v", "error",
|
||||
"-show_entries", "format=format_name",
|
||||
"-show_entries", "stream=codec_type,codec_name",
|
||||
"-of", "json",
|
||||
path,
|
||||
)
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
return MediaInfo{}, fmt.Errorf("transcode: ffprobe: %w", err)
|
||||
}
|
||||
var parsed struct {
|
||||
Format struct {
|
||||
FormatName string `json:"format_name"`
|
||||
} `json:"format"`
|
||||
Streams []struct {
|
||||
CodecType string `json:"codec_type"`
|
||||
CodecName string `json:"codec_name"`
|
||||
} `json:"streams"`
|
||||
}
|
||||
if err := json.Unmarshal(out, &parsed); err != nil {
|
||||
return MediaInfo{}, fmt.Errorf("transcode: parse ffprobe output: %w", err)
|
||||
}
|
||||
info := MediaInfo{FormatName: parsed.Format.FormatName}
|
||||
for _, s := range parsed.Streams {
|
||||
switch s.CodecType {
|
||||
case "video":
|
||||
info.VideoCodecs = append(info.VideoCodecs, s.CodecName)
|
||||
case "audio":
|
||||
info.AudioCodecs = append(info.AudioCodecs, s.CodecName)
|
||||
}
|
||||
}
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// buildFFmpegArgs 按探测结果生成转码参数:
|
||||
// - 编码本就兼容、只是容器不行(如 AVI 里装 H.264)→ 流拷贝 remux,零质量损失;
|
||||
// - 否则视频转 H.264(裁到偶数尺寸 + yuv420p 保证兼容性)、音频转 AAC。
|
||||
//
|
||||
// 两种情况都加 +faststart 把 moov 提前,便于边下边播。
|
||||
func buildFFmpegArgs(info MediaInfo, inPath, outPath string) []string {
|
||||
args := []string{"-y", "-i", inPath}
|
||||
videoOK := true
|
||||
for _, codec := range info.VideoCodecs {
|
||||
if !browserCompatibleVideoCodecs[strings.ToLower(codec)] {
|
||||
videoOK = false
|
||||
break
|
||||
}
|
||||
}
|
||||
audioOK := true
|
||||
for _, codec := range info.AudioCodecs {
|
||||
if !browserCompatibleAudioCodecs[strings.ToLower(codec)] {
|
||||
audioOK = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if videoOK {
|
||||
args = append(args, "-c:v", "copy")
|
||||
} else {
|
||||
args = append(args,
|
||||
"-c:v", "libx264",
|
||||
"-preset", "veryfast",
|
||||
"-crf", "23",
|
||||
"-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2",
|
||||
"-pix_fmt", "yuv420p",
|
||||
)
|
||||
}
|
||||
if len(info.AudioCodecs) == 0 {
|
||||
args = append(args, "-an")
|
||||
} else if audioOK {
|
||||
args = append(args, "-c:a", "copy")
|
||||
} else {
|
||||
args = append(args, "-c:a", "aac", "-b:a", "128k")
|
||||
}
|
||||
args = append(args, "-movflags", "+faststart", "-f", "mp4", outPath)
|
||||
return args
|
||||
}
|
||||
|
||||
// TranscodeFile 把本地输入文件转成浏览器可播的 MP4 写到 outPath。
|
||||
func TranscodeFile(ctx context.Context, ffmpegPath string, info MediaInfo, inPath, outPath string) error {
|
||||
args := buildFFmpegArgs(info, inPath, outPath)
|
||||
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("transcode: ffmpeg: %w: %s", err, tailOf(string(out), 400))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func tailOf(s string, n int) string {
|
||||
s = strings.TrimSpace(s)
|
||||
if len(s) <= n {
|
||||
return s
|
||||
}
|
||||
return s[len(s)-n:]
|
||||
}
|
||||
@@ -0,0 +1,125 @@
|
||||
package transcode
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
)
|
||||
|
||||
func TestNeedsTranscode(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
info MediaInfo
|
||||
ext string
|
||||
want bool
|
||||
}{
|
||||
{
|
||||
name: "h264 aac mp4 is compatible",
|
||||
info: MediaInfo{FormatName: "mov,mp4,m4a,3gp,3g2,mj2", VideoCodecs: []string{"h264"}, AudioCodecs: []string{"aac"}},
|
||||
ext: "mp4",
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "mpeg4 in avi needs transcode",
|
||||
info: MediaInfo{FormatName: "avi", VideoCodecs: []string{"mpeg4"}, AudioCodecs: []string{"mp3"}},
|
||||
ext: "avi",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "h264 in avi needs remux",
|
||||
info: MediaInfo{FormatName: "avi", VideoCodecs: []string{"h264"}, AudioCodecs: []string{"aac"}},
|
||||
ext: "avi",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "hevc in mp4 needs transcode",
|
||||
info: MediaInfo{FormatName: "mov,mp4,m4a,3gp,3g2,mj2", VideoCodecs: []string{"hevc"}, AudioCodecs: []string{"aac"}},
|
||||
ext: "mp4",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "vp9 opus webm is compatible",
|
||||
info: MediaInfo{FormatName: "matroska,webm", VideoCodecs: []string{"vp9"}, AudioCodecs: []string{"opus"}},
|
||||
ext: "webm",
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "h264 in mkv is conservative transcode",
|
||||
info: MediaInfo{FormatName: "matroska,webm", VideoCodecs: []string{"h264"}, AudioCodecs: []string{"aac"}},
|
||||
ext: "mkv",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "pcm audio in mov needs transcode",
|
||||
info: MediaInfo{FormatName: "mov,mp4,m4a,3gp,3g2,mj2", VideoCodecs: []string{"h264"}, AudioCodecs: []string{"pcm_s16le"}},
|
||||
ext: "mov",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "video only h264 mp4 is compatible",
|
||||
info: MediaInfo{FormatName: "mov,mp4,m4a,3gp,3g2,mj2", VideoCodecs: []string{"h264"}},
|
||||
ext: "mp4",
|
||||
want: false,
|
||||
},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if got := NeedsTranscode(tc.info, tc.ext); got != tc.want {
|
||||
t.Fatalf("NeedsTranscode(%+v, %q) = %v, want %v", tc.info, tc.ext, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildFFmpegArgsRemuxWhenCodecsCompatible(t *testing.T) {
|
||||
// AVI 里装 H.264+AAC:只需要换容器,应该走流拷贝
|
||||
info := MediaInfo{FormatName: "avi", VideoCodecs: []string{"h264"}, AudioCodecs: []string{"aac"}}
|
||||
args := strings.Join(buildFFmpegArgs(info, "in.avi", "out.mp4"), " ")
|
||||
if !strings.Contains(args, "-c:v copy") {
|
||||
t.Fatalf("expected video stream copy, got: %s", args)
|
||||
}
|
||||
if !strings.Contains(args, "-c:a copy") {
|
||||
t.Fatalf("expected audio stream copy, got: %s", args)
|
||||
}
|
||||
if !strings.Contains(args, "+faststart") {
|
||||
t.Fatalf("expected faststart flag, got: %s", args)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildFFmpegArgsTranscodesIncompatibleCodecs(t *testing.T) {
|
||||
info := MediaInfo{FormatName: "avi", VideoCodecs: []string{"mpeg4"}, AudioCodecs: []string{"wmav2"}}
|
||||
args := strings.Join(buildFFmpegArgs(info, "in.avi", "out.mp4"), " ")
|
||||
if !strings.Contains(args, "-c:v libx264") {
|
||||
t.Fatalf("expected libx264 video encode, got: %s", args)
|
||||
}
|
||||
if !strings.Contains(args, "-c:a aac") {
|
||||
t.Fatalf("expected aac audio encode, got: %s", args)
|
||||
}
|
||||
if !strings.Contains(args, "yuv420p") {
|
||||
t.Fatalf("expected yuv420p pixel format, got: %s", args)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildFFmpegArgsDropsAudioWhenNoAudioStream(t *testing.T) {
|
||||
info := MediaInfo{FormatName: "avi", VideoCodecs: []string{"mpeg4"}}
|
||||
args := strings.Join(buildFFmpegArgs(info, "in.avi", "out.mp4"), " ")
|
||||
if !strings.Contains(args, "-an") {
|
||||
t.Fatalf("expected -an for video without audio, got: %s", args)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTranscodedName(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
fileName, title, id, want string
|
||||
}{
|
||||
{"www.98T.la@167.avi", "www.98T.la@167", "p115-1", "www.98T.la@167.mp4"},
|
||||
{"", "标题", "p115-2", "标题.mp4"},
|
||||
{"a/b\\c.wmv", "", "p115-3", "a_b_c.mp4"},
|
||||
} {
|
||||
v := &catalog.Video{FileName: tc.fileName, Title: tc.title, ID: tc.id}
|
||||
if got := transcodedName(v); got != tc.want {
|
||||
t.Fatalf("transcodedName(%q,%q,%q) = %q, want %q", tc.fileName, tc.title, tc.id, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user