mirror of
https://github.com/nianzhibai/91.git
synced 2026-06-25 05:02:39 +08:00
Compare commits
58 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2adaac3d7d | |||
| ee8af315b0 | |||
| 6884473dbf | |||
| f0458f7043 | |||
| e32da9016b | |||
| 2427f58165 | |||
| 00aaeed736 | |||
| 5efbceb205 | |||
| 0faeaf408f | |||
| 1b5eda92b0 | |||
| 840a858dbd | |||
| 1ee5ee35be | |||
| 12b737b6fe | |||
| bd33d26a1f | |||
| 36fe32cb84 | |||
| 194d98895a | |||
| 2437fbd779 | |||
| 4dd66b8120 | |||
| 30b736cf36 | |||
| 57391e0e98 | |||
| 052e142520 | |||
| f9351324c6 | |||
| bb83277d62 | |||
| aa856db1f6 | |||
| 7e5e67697e | |||
| 9cc8e02bec | |||
| 139e63eef2 | |||
| b8388eba59 | |||
| 76782f3801 | |||
| 1ae1408fb6 | |||
| 738406162a | |||
| 0f111b846d | |||
| 4dd9015bd7 | |||
| 84fbb6f51c | |||
| 992b20da93 | |||
| 1770693666 | |||
| 177041633a | |||
| ae324d3752 | |||
| 7f1e4eaa29 | |||
| 811d87cc27 | |||
| e4408f5655 | |||
| e93c906921 | |||
| 96e423b952 | |||
| a8ccc19e9e | |||
| 7ddf33d726 | |||
| c1355385e1 | |||
| ec5a01b6aa | |||
| 71d4a16db1 | |||
| 940e5dd76d | |||
| e826c05d5c | |||
| 3465b9e837 | |||
| d33c1b1b20 | |||
| 5fc8e9ebb7 | |||
| dc7d2a5de3 | |||
| 2f2bfbfcdc | |||
| 9def08b0c5 | |||
| c87208117e | |||
| a770b3af6b |
+15
-3
@@ -30,8 +30,20 @@ tmp/
|
||||
|
||||
# 91 爬虫脚本独立运行时的默认输出文件(backend 跑时会显式 --output 到 backend/data/spider91/,所以不会落在这里)
|
||||
91porn_videos.json
|
||||
91VideoSpider/91porn_videos.json
|
||||
91VideoSpider/data/
|
||||
91VideoSpider/__pycache__/
|
||||
__pycache__/
|
||||
*.pyc
|
||||
|
||||
# Local scratch images
|
||||
/*.png
|
||||
/*.jpg
|
||||
/*.jpeg
|
||||
/*.gif
|
||||
/*.webp
|
||||
/*.bmp
|
||||
/*.ico
|
||||
/image.jpg
|
||||
/image003.jpg
|
||||
/image004.jpg
|
||||
/image005.png
|
||||
/image006.png
|
||||
/image02.png
|
||||
|
||||
@@ -1,859 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
91porn 视频爬虫脚本
|
||||
===================
|
||||
爬取 https://www.91porn.com/v.php?category=top&viewtype=basic 下的所有视频信息:
|
||||
- 视频名称
|
||||
- 封面图直链
|
||||
- 视频直链 (MP4)
|
||||
|
||||
依赖安装:
|
||||
pip install requests beautifulsoup4 lxml PySocks
|
||||
|
||||
使用方法:
|
||||
# 全量爬取(默认行为,从 page=1 一直爬到末尾,写到 OUTPUT_FILE)
|
||||
python spider_91porn.py
|
||||
|
||||
# 只爬指定页(单页模式,手动调试用)
|
||||
python spider_91porn.py --page 1 --output /tmp/spider91_page1.json
|
||||
|
||||
# 凑够 N 个新视频模式(backend 凌晨任务用)
|
||||
python spider_91porn.py --target-new 15 --seen-viewkeys-file /tmp/seen.txt --output /tmp/new.json
|
||||
|
||||
CLI 参数:
|
||||
--page N 只爬第 N 页,配合 --output 用于手动调试
|
||||
--target-new N 从 page 1 起翻页直到凑够 N 个新视频(不在 seen 列表里的)
|
||||
--seen-viewkeys-file FILE 每行一个已知 viewkey 或 mp4 源 ID,命中即跳过;与 --target-new 配合使用
|
||||
--output FILE 输出 JSON 路径,覆盖默认的 OUTPUT_FILE
|
||||
--no-resume 禁用断点续爬(单页/target-new 模式下自动禁用)
|
||||
--quiet 压缩日志,每条视频只输出一行
|
||||
-h / --help 帮助
|
||||
|
||||
配置说明 (编辑脚本内 "配置区域"):
|
||||
- MIN_PAGE_DELAY / MAX_PAGE_DELAY : 列表页请求间隔 (默认 3-6 秒)
|
||||
- MIN_DETAIL_DELAY / MAX_DETAIL_DELAY : 详情页请求间隔 (默认 2-5 秒)
|
||||
- MAX_PAGES : 限制最大爬取页数 (None=不限, 如 5=只爬前5页)
|
||||
- OUTPUT_FILE : 输出文件名
|
||||
|
||||
输出格式 (JSON):
|
||||
{
|
||||
"videos": [
|
||||
{
|
||||
"title": "视频标题",
|
||||
"thumb_url": "https://...thumb/xxxx.jpg",
|
||||
"video_url": "https://...mp43/xxxx.mp4?st=...",
|
||||
"viewkey": "abc123...",
|
||||
"source_id": "xxxx",
|
||||
"detail_url": "https://...view_video.php?viewkey=..."
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
|
||||
注意:
|
||||
1. 视频直链包含时效性token (e参数为过期时间戳),会过期,需定期重新爬取
|
||||
2. 脚本已内置随机延时,请勿移除,避免对服务器造成压力
|
||||
3. 网站有Cloudflare保护,如遇到403/5xx错误,可能需要使用带cookie的session
|
||||
4. 本脚本仅供学习交流,请遵守当地法律法规
|
||||
|
||||
作者: OpenCode
|
||||
日期: 2026-05-22
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import requests
|
||||
import re
|
||||
import time
|
||||
import random
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import sys
|
||||
import html
|
||||
from urllib.parse import urljoin, unquote, urlparse
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
print("错误: 缺少依赖库 beautifulsoup4")
|
||||
print("请运行: pip install beautifulsoup4 lxml")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def prefer_ipv4_for_plain_socks5_proxy():
|
||||
"""PySocks may pick IPv6 first for socks5://; some SOCKS5 servers only accept IPv4."""
|
||||
proxy_envs = (
|
||||
os.environ.get("HTTPS_PROXY", ""),
|
||||
os.environ.get("HTTP_PROXY", ""),
|
||||
os.environ.get("https_proxy", ""),
|
||||
os.environ.get("http_proxy", ""),
|
||||
)
|
||||
uses_plain_socks5 = any(v.strip().lower().startswith("socks5://") for v in proxy_envs)
|
||||
if not uses_plain_socks5 or getattr(socket, "_spider91_ipv4_first", False):
|
||||
return
|
||||
|
||||
original_getaddrinfo = socket.getaddrinfo
|
||||
|
||||
def getaddrinfo_ipv4_first(*args, **kwargs):
|
||||
infos = original_getaddrinfo(*args, **kwargs)
|
||||
return sorted(infos, key=lambda info: 0 if info[0] == socket.AF_INET else 1)
|
||||
|
||||
socket.getaddrinfo = getaddrinfo_ipv4_first
|
||||
socket._spider91_ipv4_first = True
|
||||
|
||||
# ===================== 配置区域 =====================
|
||||
BASE_URL = "https://www.91porn.com/v.php"
|
||||
LIST_PARAMS = {
|
||||
"category": "top",
|
||||
"viewtype": "basic"
|
||||
}
|
||||
|
||||
# 请求头 (模拟真实浏览器)
|
||||
HEADERS = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/125.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": (
|
||||
"text/html,application/xhtml+xml,application/xml;"
|
||||
"q=0.9,image/avif,image/webp,image/apng,*/*;"
|
||||
"q=0.8,application/signed-exchange;v=b3;q=0.7"
|
||||
),
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
# 注意: 不要包含 "br" (brotli),除非安装了 brotli 库
|
||||
# "Accept-Encoding": "gzip, deflate, br",
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "none",
|
||||
"Sec-Fetch-User": "?1",
|
||||
}
|
||||
|
||||
# 延时配置 (秒) - 控制爬取频率,避免被封
|
||||
MIN_PAGE_DELAY = 3.0 # 列表页之间最小延时
|
||||
MAX_PAGE_DELAY = 6.0 # 列表页之间最大延时
|
||||
MIN_DETAIL_DELAY = 2.0 # 详情页之间最小延时
|
||||
MAX_DETAIL_DELAY = 5.0 # 详情页之间最大延时
|
||||
|
||||
# 重试配置
|
||||
MAX_RETRIES = 3
|
||||
RETRY_DELAY = 5.0
|
||||
|
||||
# 输出配置
|
||||
OUTPUT_FILE = "91porn_videos.json"
|
||||
MAX_PAGES = None # 设置为 None 爬取所有页,或设置整数如 5 只爬前5页
|
||||
RESUME = True # 是否跳过输出文件中已存在的 viewkey (断点续爬)
|
||||
MAX_EMPTY_PAGES = 2 # 连续空页数达到此值时停止爬取
|
||||
# ===================================================
|
||||
|
||||
|
||||
class Porn91Spider:
|
||||
def __init__(
|
||||
self,
|
||||
output_file: str = None,
|
||||
start_page: int = 1,
|
||||
max_pages: int = None,
|
||||
resume: bool = None,
|
||||
max_empty_pages: int = None,
|
||||
quiet: bool = False,
|
||||
target_new: int = None,
|
||||
seen_viewkeys: list = None,
|
||||
stream_output: bool = False,
|
||||
):
|
||||
"""
|
||||
构造函数。所有参数都有默认值,等同于使用脚本顶部的全局配置。
|
||||
backend 调用时会传 output_file/seen_viewkeys/target_new,等价于:
|
||||
"从第 1 页开始爬,跳过 seen_viewkeys 里的视频,凑够 target_new 个新视频后停止"
|
||||
|
||||
stream_output=True 时(backend 流水线用):
|
||||
- 每凑齐一个 video 直链就把该 entry 作为一行 JSON 写到 stdout 并 flush,
|
||||
便于上层(Go crawler)边读边下载,不再等所有详情页处理完。
|
||||
- 所有日志改走 stderr,避免与 stdout JSONL 流混合。
|
||||
- --output 仍生效,作为离线归档用(脚本退出时一次性写完整 JSON)。
|
||||
"""
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(HEADERS)
|
||||
# 91porn 没有固定 mode cookie 时,详情页首次请求可能返回与列表卡片
|
||||
# 不一致的视频源;固定桌面模式让列表页和详情页解析保持一致。
|
||||
self.session.cookies.set("mode", "d")
|
||||
|
||||
# 解析后的实际配置;优先使用构造参数,回退到模块级配置
|
||||
self.output_file = output_file if output_file is not None else OUTPUT_FILE
|
||||
self.start_page = max(1, int(start_page or 1))
|
||||
# max_pages=None 表示不限制;max_pages=N 表示从 start_page 起爬 N 页
|
||||
self.max_pages = max_pages if max_pages is None or max_pages > 0 else None
|
||||
# resume 默认跟模块配置;单页模式下调用方应该显式传 False
|
||||
self.resume = RESUME if resume is None else bool(resume)
|
||||
self.max_empty_pages = (
|
||||
MAX_EMPTY_PAGES if max_empty_pages is None else int(max_empty_pages)
|
||||
)
|
||||
# target_new 是 backend 触发时的核心模式:累计处理这么多新源视频后退出。
|
||||
self.target_new = target_new if target_new and target_new > 0 else None
|
||||
self.quiet = bool(quiet)
|
||||
# stream_output:每解析出一个 video 直链立即输出一行 JSON 到 stdout
|
||||
# (配合 backend Go 端 bufio.Scanner 实时消费,下载一个就开始下一个)。
|
||||
# 开启后所有 log 都走 stderr。
|
||||
self.stream_output = bool(stream_output)
|
||||
|
||||
# 添加重试适配器
|
||||
try:
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
retry_strategy = Retry(
|
||||
total=MAX_RETRIES,
|
||||
backoff_factor=1,
|
||||
status_forcelist=[429, 500, 502, 503, 504],
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry_strategy)
|
||||
self.session.mount("https://", adapter)
|
||||
self.session.mount("http://", adapter)
|
||||
except ImportError:
|
||||
pass # urllib3 版本可能较低
|
||||
|
||||
self.results = []
|
||||
self.pages_crawled = 0
|
||||
self.processed_videos = 0
|
||||
self.skipped_videos = 0
|
||||
self.failed_videos = 0
|
||||
self.skip_viewkeys = set()
|
||||
|
||||
# backend 通过 --seen-viewkeys-file 传进来一批已入库的历史 ID。
|
||||
# 兼容旧名:文件里可能是 viewkey,也可能是新逻辑使用的 mp4 源 ID。
|
||||
if seen_viewkeys:
|
||||
for vk in seen_viewkeys:
|
||||
if not vk:
|
||||
continue
|
||||
vk = vk.strip()
|
||||
if vk:
|
||||
self.skip_viewkeys.add(vk)
|
||||
|
||||
# 断点续爬:加载已有结果,跳过已处理的 viewkey
|
||||
if self.resume and os.path.exists(self.output_file):
|
||||
try:
|
||||
with open(self.output_file, 'r', encoding='utf-8') as f:
|
||||
existing_data = json.load(f)
|
||||
existing_videos = existing_data.get('videos', [])
|
||||
self.results = existing_videos
|
||||
for v in existing_videos:
|
||||
vk = v.get('viewkey', '')
|
||||
if vk:
|
||||
self.skip_viewkeys.add(vk)
|
||||
self.processed_videos = existing_data.get('successful', 0)
|
||||
self.failed_videos = existing_data.get('failed', 0)
|
||||
self.log(f"加载已有数据: {len(self.results)} 个视频, 将跳过已处理项")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def log(self, message: str):
|
||||
"""带时间戳的日志输出。stream_output 模式下走 stderr,避免污染 stdout JSONL。"""
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
line = f"[{timestamp}] {message}"
|
||||
if self.stream_output:
|
||||
print(line, file=sys.stderr, flush=True)
|
||||
else:
|
||||
print(line)
|
||||
|
||||
def emit_stream_video(self, video: dict):
|
||||
"""stream_output 模式下把单条 video entry 作为一行 JSON 写到 stdout 并立即刷盘。
|
||||
Go 端 bufio.Scanner 按行读取,每收到一行就立即下载视频和封面。"""
|
||||
if not self.stream_output:
|
||||
return
|
||||
try:
|
||||
print(json.dumps(video, ensure_ascii=False), flush=True)
|
||||
except Exception as e:
|
||||
# stdout 异常基本只在管道断开时发生(消费方进程死了);
|
||||
# 写到 stderr 让 backend 看到,然后让 crawl 循环自己 break。
|
||||
print(f"[stream] emit failed: {e}", file=sys.stderr, flush=True)
|
||||
|
||||
def random_sleep(self, min_sec: float, max_sec: float):
|
||||
"""随机延时,模拟人类行为"""
|
||||
delay = random.uniform(min_sec, max_sec)
|
||||
if not self.quiet:
|
||||
self.log(f" 随机延时 {delay:.2f} 秒...")
|
||||
time.sleep(delay)
|
||||
|
||||
def fetch_page(self, url: str, description: str = "", referer: str = "") -> str:
|
||||
"""
|
||||
获取页面HTML内容,带错误处理和重试
|
||||
"""
|
||||
headers_extra = {}
|
||||
if referer:
|
||||
headers_extra["Referer"] = referer
|
||||
|
||||
for attempt in range(1, MAX_RETRIES + 1):
|
||||
try:
|
||||
self.log(f"正在请求: {description or url} (尝试 {attempt}/{MAX_RETRIES})")
|
||||
response = self.session.get(url, timeout=30, headers=headers_extra)
|
||||
|
||||
# 检查是否被Cloudflare拦截 (需在 raise_for_status 之前)
|
||||
if response.status_code == 403:
|
||||
self.log("警告: 收到 403 Forbidden,可能被拦截")
|
||||
if attempt < MAX_RETRIES:
|
||||
self.random_sleep(RETRY_DELAY, RETRY_DELAY + 3)
|
||||
continue
|
||||
return ""
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
# 优先使用 content.decode('utf-8'),避免 requests 编码检测问题
|
||||
try:
|
||||
html_content = response.content.decode('utf-8', errors='replace')
|
||||
except Exception:
|
||||
html_content = response.text
|
||||
|
||||
# Cloudflare 挑战检测:如果页面主要内容只有挑战页面,而非正常内容
|
||||
# 注意:网站本身会加载 challenge-platform 脚本,所以不能仅凭此判断
|
||||
is_cf_challenge = (
|
||||
"Just a moment" in html_content and
|
||||
len(html_content) < 8000
|
||||
)
|
||||
if is_cf_challenge:
|
||||
self.log("警告: 页面被Cloudflare挑战拦截,需要浏览器环境或正确cookie")
|
||||
if attempt < MAX_RETRIES:
|
||||
self.random_sleep(RETRY_DELAY, RETRY_DELAY + 5)
|
||||
continue
|
||||
return ""
|
||||
|
||||
return html_content
|
||||
except requests.exceptions.HTTPError as e:
|
||||
self.log(f"HTTP错误: {e}")
|
||||
if attempt < MAX_RETRIES:
|
||||
self.random_sleep(RETRY_DELAY, RETRY_DELAY + 3)
|
||||
else:
|
||||
return ""
|
||||
except requests.exceptions.RequestException as e:
|
||||
self.log(f"请求失败: {e}")
|
||||
if attempt < MAX_RETRIES:
|
||||
self.random_sleep(RETRY_DELAY, RETRY_DELAY + 3)
|
||||
else:
|
||||
self.log(f"达到最大重试次数,放弃: {url}")
|
||||
return ""
|
||||
return ""
|
||||
|
||||
def parse_list_page(self, html: str) -> list:
|
||||
"""
|
||||
解析列表页,提取视频基本信息
|
||||
返回: [{title, detail_url, thumb_url, viewkey}, ...]
|
||||
"""
|
||||
videos = []
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
|
||||
# 只解析正常视频卡片。页面中还混有 col-lg-8 的异常大卡片,里面的标题、
|
||||
# thumb、detail URL 会串到其它视频,不能作为入库来源。
|
||||
video_cards = soup.select('div.col-xs-12.col-sm-4.col-md-3.col-lg-3')
|
||||
|
||||
seen_cards = set()
|
||||
|
||||
for card in video_cards:
|
||||
link = card.find('a', href=re.compile(r'view_video\.php\?viewkey='))
|
||||
if not link:
|
||||
continue
|
||||
href = link.get('href', '')
|
||||
if not href:
|
||||
continue
|
||||
|
||||
# 提取 viewkey
|
||||
match = re.search(r'viewkey=([^&]+)', href)
|
||||
if not match:
|
||||
continue
|
||||
viewkey = match.group(1)
|
||||
|
||||
detail_url = urljoin(BASE_URL, href)
|
||||
|
||||
# 提取标题
|
||||
title = self._extract_title(link)
|
||||
|
||||
# 提取列表卡片来源 ID 和封面图 URL
|
||||
thumb_url = ""
|
||||
source_id = ""
|
||||
overlay = link.find(id=re.compile(r'^playvthumb_\d+$'))
|
||||
if overlay:
|
||||
source_id = overlay.get('id', '').rsplit('_', 1)[-1]
|
||||
img = link.find('img', class_=re.compile(r'img-responsive'))
|
||||
if img:
|
||||
thumb_url = img.get('src', '') or img.get('data-original', '')
|
||||
if thumb_url:
|
||||
thumb_url = urljoin(BASE_URL, thumb_url)
|
||||
if not source_id and thumb_url:
|
||||
source_id = self._extract_thumb_source_id(thumb_url)
|
||||
|
||||
card_key = source_id or detail_url
|
||||
if card_key in seen_cards:
|
||||
continue
|
||||
seen_cards.add(card_key)
|
||||
|
||||
videos.append({
|
||||
"title": title,
|
||||
"detail_url": detail_url,
|
||||
"thumb_url": thumb_url,
|
||||
"viewkey": viewkey,
|
||||
"source_id": source_id
|
||||
})
|
||||
|
||||
return videos
|
||||
|
||||
def _extract_title(self, link) -> str:
|
||||
"""
|
||||
从视频链接标签中提取并清理标题
|
||||
"""
|
||||
# 优先从 span.video-title 获取 (已渲染的干净标题)
|
||||
title_el = link.find('span', class_=re.compile(r'video-title'))
|
||||
if title_el:
|
||||
title = title_el.get_text(strip=True)
|
||||
if title:
|
||||
return html.unescape(title)
|
||||
|
||||
# 备用: 从 link 的 title 属性提取
|
||||
title = link.get('title', '').strip()
|
||||
if title:
|
||||
return html.unescape(title)
|
||||
|
||||
# 最后手段: 从链接文本提取并清理前缀
|
||||
text = link.get_text(separator=' ', strip=True)
|
||||
# 去掉前缀: "HD" / "91" / 时间戳 "HH:MM:SS"
|
||||
text = re.sub(r'^(HD\s+|91\s+)?\d{2}:\d{2}:\d{2}\s*', '', text)
|
||||
text = re.sub(r'\s+', ' ', text).strip()
|
||||
return html.unescape(text)[:120]
|
||||
|
||||
def parse_detail_page(self, html: str) -> dict:
|
||||
"""
|
||||
解析详情页,提取视频直链
|
||||
返回: {"video_url": "...", "source_id": "...", "title": "..."} 或空字典
|
||||
"""
|
||||
result = {}
|
||||
|
||||
if not html:
|
||||
return result
|
||||
|
||||
title = self._extract_detail_title(html)
|
||||
if title:
|
||||
result["title"] = title
|
||||
|
||||
# 方法1: 解码 strencode2 (主要方式, 页面通过 document.write 动态写入 video 标签)
|
||||
# 格式: document.write(strencode2("%3c%73%6f..."));
|
||||
strencode_match = re.search(r'strencode2\(["\']([^"\']+)["\']\)', html)
|
||||
if strencode_match:
|
||||
encoded = strencode_match.group(1)
|
||||
try:
|
||||
# strencode2 在JS中等价于 unescape / decodeURIComponent
|
||||
decoded = unquote(encoded)
|
||||
|
||||
# 从解码后的 HTML 片段中提取 src
|
||||
src_match = re.search(r"src=['\"]([^'\"]+)['\"]", decoded)
|
||||
if src_match:
|
||||
video_url = src_match.group(1)
|
||||
# 规范化双斜杠 (如 https://host//path -> https://host/path)
|
||||
video_url = re.sub(r'(https?://[^/]+)//+', r'\1/', video_url)
|
||||
result["video_url"] = video_url
|
||||
result["source_id"] = self._extract_source_id(video_url)
|
||||
return result
|
||||
except Exception as e:
|
||||
self.log(f" 解码 strencode2 失败: {e}")
|
||||
|
||||
# 方法2: 通用正则匹配页面中的 mp4 链接 (备用, 过滤广告)
|
||||
mp4_match = re.search(
|
||||
r'https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*',
|
||||
html
|
||||
)
|
||||
if mp4_match:
|
||||
url = mp4_match.group(0)
|
||||
if 'kwai' not in url and 'ad-' not in url.lower():
|
||||
result["video_url"] = url
|
||||
result["source_id"] = self._extract_source_id(url)
|
||||
return result
|
||||
|
||||
return result
|
||||
|
||||
def _extract_detail_title(self, html_text: str) -> str:
|
||||
soup = BeautifulSoup(html_text, 'lxml')
|
||||
title_el = soup.find('title')
|
||||
if not title_el:
|
||||
return ""
|
||||
title = title_el.get_text(" ", strip=True)
|
||||
title = re.sub(r'\s*-\s*91porn.*$', '', title, flags=re.IGNORECASE).strip()
|
||||
return html.unescape(title)[:160]
|
||||
|
||||
def _extract_source_id(self, video_url: str) -> str:
|
||||
path = urlparse(video_url or "").path
|
||||
name = os.path.basename(path)
|
||||
stem, ext = os.path.splitext(name)
|
||||
if ext.lower() not in {".mp4", ".m4v", ".mov", ".webm", ".mkv", ".avi"}:
|
||||
return ""
|
||||
source_id = re.sub(r'[^0-9]+', '', stem)
|
||||
if not source_id or source_id != stem:
|
||||
return ""
|
||||
return source_id
|
||||
|
||||
def _extract_thumb_source_id(self, thumb_url: str) -> str:
|
||||
path = urlparse(thumb_url or "").path
|
||||
match = re.search(r'/thumb/(\d+)\.[A-Za-z0-9]+$', path)
|
||||
return match.group(1) if match else ""
|
||||
|
||||
def _thumb_url_for_source(self, thumb_url: str, source_id: str) -> str:
|
||||
if not thumb_url or not source_id:
|
||||
return thumb_url
|
||||
parsed = urlparse(thumb_url)
|
||||
match = re.search(r'/thumb/([^/?#]+)\.[A-Za-z0-9]+$', parsed.path)
|
||||
if not match:
|
||||
return thumb_url
|
||||
current = match.group(1)
|
||||
if current == source_id:
|
||||
return thumb_url
|
||||
path = re.sub(
|
||||
r'/thumb/[^/?#]+\.[A-Za-z0-9]+$',
|
||||
f'/thumb/{source_id}.jpg',
|
||||
parsed.path,
|
||||
)
|
||||
return parsed._replace(path=path, query="", fragment="").geturl()
|
||||
|
||||
def crawl(self):
|
||||
"""
|
||||
主爬取流程。停止条件(任一满足即停):
|
||||
- 达到 max_pages 配置
|
||||
- 连续 max_empty_pages 页都没有视频
|
||||
- target_new 模式下,已经累计处理 target_new 个新视频
|
||||
"""
|
||||
self.log("=" * 60)
|
||||
self.log("91porn 视频爬虫启动")
|
||||
self.log("=" * 60)
|
||||
self.log(f"配置: 列表页延时 {MIN_PAGE_DELAY}-{MAX_PAGE_DELAY}s, 详情页延时 {MIN_DETAIL_DELAY}-{MAX_DETAIL_DELAY}s")
|
||||
self.log(f"配置: 最大重试 {MAX_RETRIES} 次, 连续空页上限 {self.max_empty_pages}")
|
||||
self.log(f"配置: 起始页 {self.start_page}, 最大爬取页数 {self.max_pages if self.max_pages else '不限'}")
|
||||
if self.target_new:
|
||||
self.log(f"配置: 目标新增视频数 {self.target_new}")
|
||||
self.log(f"配置: 输出文件 {os.path.abspath(self.output_file)}")
|
||||
if self.skip_viewkeys:
|
||||
self.log(f"配置: 已跳过 {len(self.skip_viewkeys)} 个已知 viewkey")
|
||||
self.log("")
|
||||
|
||||
page_num = self.start_page
|
||||
consecutive_empty = 0
|
||||
crawled_in_session = 0
|
||||
|
||||
while True:
|
||||
if self.max_pages is not None and crawled_in_session >= self.max_pages:
|
||||
self.log(f"达到配置的页数上限 {self.max_pages},停止")
|
||||
break
|
||||
if consecutive_empty >= self.max_empty_pages:
|
||||
self.log(f"连续 {self.max_empty_pages} 页无结果,已达到末尾")
|
||||
break
|
||||
if self.target_new is not None and self.processed_videos >= self.target_new:
|
||||
self.log(f"已累计 {self.processed_videos} 个新视频,达到目标 {self.target_new},停止")
|
||||
break
|
||||
|
||||
if page_num == 1:
|
||||
page_url = f"{BASE_URL}?category=top&viewtype=basic"
|
||||
else:
|
||||
page_url = f"{BASE_URL}?category=top&viewtype=basic&page={page_num}"
|
||||
|
||||
if crawled_in_session > 0:
|
||||
self.log("")
|
||||
self.random_sleep(MIN_PAGE_DELAY, MAX_PAGE_DELAY)
|
||||
|
||||
self.log(f"[页 {page_num}] 请求: {page_url}")
|
||||
page_html = self.fetch_page(page_url, f"列表页 第{page_num}页")
|
||||
|
||||
if not page_html:
|
||||
self.log(f"[页 {page_num}] 获取失败,跳过")
|
||||
consecutive_empty += 1
|
||||
page_num += 1
|
||||
crawled_in_session += 1
|
||||
continue
|
||||
|
||||
page_videos = self.parse_list_page(page_html)
|
||||
|
||||
# 判断页面是否真的没有视频(而非全部已处理)
|
||||
if not page_videos:
|
||||
self.log(f"[页 {page_num}] 页面无视频,可能已到末尾")
|
||||
consecutive_empty += 1
|
||||
page_num += 1
|
||||
crawled_in_session += 1
|
||||
continue
|
||||
|
||||
consecutive_empty = 0
|
||||
|
||||
# 过滤已处理的 viewkey,只保留新视频
|
||||
new_videos = [v for v in page_videos if v['viewkey'] not in self.skip_viewkeys]
|
||||
skipped_on_page = len(page_videos) - len(new_videos)
|
||||
|
||||
if skipped_on_page > 0:
|
||||
self.log(f"[页 {page_num}] 发现 {len(page_videos)} 个链接, 其中 {skipped_on_page} 个已处理, {len(new_videos)} 个新视频")
|
||||
else:
|
||||
self.log(f"[页 {page_num}] 发现 {len(new_videos)} 个视频")
|
||||
|
||||
if new_videos:
|
||||
self._process_video_list(new_videos, referer=page_url)
|
||||
self.pages_crawled += 1
|
||||
page_num += 1
|
||||
crawled_in_session += 1
|
||||
|
||||
self._save_results()
|
||||
self._print_summary()
|
||||
|
||||
def _process_video_list(self, videos: list, referer: str = ""):
|
||||
"""
|
||||
处理一批视频列表,逐个获取详情页
|
||||
"""
|
||||
for idx, video in enumerate(videos, 1):
|
||||
# target_new 模式下,凑够后立即停止,不再请求详情页
|
||||
if self.target_new is not None and self.processed_videos >= self.target_new:
|
||||
return
|
||||
# 跳过已处理的 viewkey (断点续爬)
|
||||
if video['viewkey'] in self.skip_viewkeys:
|
||||
self.log(f" [SKIP] 已处理过: {video['viewkey']}")
|
||||
self.skipped_videos += 1
|
||||
continue
|
||||
|
||||
self.log(f" 处理视频 {idx}/{len(videos)}: {video['title'][:40]}...")
|
||||
|
||||
# 延时控制 (同一批次内第一个视频不延时)
|
||||
if idx > 1:
|
||||
self.random_sleep(MIN_DETAIL_DELAY, MAX_DETAIL_DELAY)
|
||||
|
||||
# 获取详情页
|
||||
detail_html = self.fetch_page(video['detail_url'], f"详情页 viewkey={video['viewkey']}", referer=referer)
|
||||
|
||||
if not detail_html:
|
||||
self.log(f" [FAIL] 详情页获取失败: {video['viewkey']}")
|
||||
video["video_url"] = ""
|
||||
self.results.append(video)
|
||||
self.skip_viewkeys.add(video['viewkey'])
|
||||
self.failed_videos += 1
|
||||
continue
|
||||
|
||||
# 解析视频直链
|
||||
detail_info = self.parse_detail_page(detail_html)
|
||||
|
||||
if detail_info.get("video_url"):
|
||||
video["video_url"] = detail_info["video_url"]
|
||||
if detail_info.get("title"):
|
||||
video["title"] = detail_info["title"]
|
||||
list_source_id = video.get("source_id", "")
|
||||
detail_source_id = detail_info.get("source_id", "")
|
||||
if list_source_id and detail_source_id and list_source_id != detail_source_id:
|
||||
self.log(
|
||||
f" [FAIL] 详情页视频源不匹配: list_source_id={list_source_id} "
|
||||
f"detail_source_id={detail_source_id} viewkey={video['viewkey']}"
|
||||
)
|
||||
self.failed_videos += 1
|
||||
self.skip_viewkeys.add(video['viewkey'])
|
||||
continue
|
||||
if not list_source_id and detail_source_id:
|
||||
video["source_id"] = detail_source_id
|
||||
if video.get("source_id"):
|
||||
video["thumb_url"] = self._thumb_url_for_source(
|
||||
video.get("thumb_url", ""),
|
||||
video["source_id"],
|
||||
)
|
||||
if video["source_id"] in self.skip_viewkeys:
|
||||
self.log(f" [SKIP] 已处理过 source_id: {video['source_id']}")
|
||||
self.skipped_videos += 1
|
||||
continue
|
||||
self.results.append(video)
|
||||
self.skip_viewkeys.add(video['viewkey'])
|
||||
if video.get("source_id"):
|
||||
self.skip_viewkeys.add(video["source_id"])
|
||||
self.processed_videos += 1
|
||||
self.log(f" [OK] 成功提取视频直链")
|
||||
# 流式:立刻把这条 entry 交给 Go 端开始下载,不等本批余下视频
|
||||
self.emit_stream_video(video)
|
||||
else:
|
||||
self.log(f" [FAIL] 未找到视频直链: {video['viewkey']}")
|
||||
video["video_url"] = ""
|
||||
self.results.append(video)
|
||||
self.skip_viewkeys.add(video['viewkey'])
|
||||
self.failed_videos += 1
|
||||
|
||||
def _save_results(self):
|
||||
"""
|
||||
保存结果到JSON文件
|
||||
"""
|
||||
output_data = {
|
||||
"crawl_time": datetime.now().isoformat(),
|
||||
"source_url": BASE_URL,
|
||||
"pages_crawled": self.pages_crawled,
|
||||
"total_videos": len(self.results),
|
||||
"successful": self.processed_videos,
|
||||
"skipped": self.skipped_videos,
|
||||
"failed": self.failed_videos,
|
||||
"videos": self.results
|
||||
}
|
||||
|
||||
try:
|
||||
# 保证父目录存在;写入临时文件后原子 rename,避免读到半截 JSON
|
||||
out_path = self.output_file
|
||||
parent = os.path.dirname(os.path.abspath(out_path))
|
||||
if parent:
|
||||
os.makedirs(parent, exist_ok=True)
|
||||
tmp_path = out_path + ".part"
|
||||
with open(tmp_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(output_data, f, ensure_ascii=False, indent=2)
|
||||
os.replace(tmp_path, out_path)
|
||||
self.log(f"结果已保存到: {os.path.abspath(out_path)}")
|
||||
except Exception as e:
|
||||
self.log(f"保存文件失败: {e}")
|
||||
# 尝试输出到控制台作为备份
|
||||
print("\n--- 备份输出 ---")
|
||||
print(json.dumps(output_data, ensure_ascii=False, indent=2))
|
||||
|
||||
def _print_summary(self):
|
||||
"""
|
||||
打印爬取摘要
|
||||
"""
|
||||
self.log("")
|
||||
self.log("=" * 60)
|
||||
self.log("爬取完成!")
|
||||
self.log("=" * 60)
|
||||
self.log(f"爬取页数: {self.pages_crawled}")
|
||||
self.log(f"总视频数: {len(self.results)}")
|
||||
self.log(f"成功提取直链: {self.processed_videos}")
|
||||
self.log(f"跳过(已处理): {self.skipped_videos}")
|
||||
self.log(f"失败/缺失直链: {self.failed_videos}")
|
||||
self.log(f"输出文件: {os.path.abspath(self.output_file)}")
|
||||
self.log("=" * 60)
|
||||
|
||||
|
||||
def print_help():
|
||||
print("""
|
||||
================================================
|
||||
91porn 视频爬虫 v1.0
|
||||
================================================
|
||||
|
||||
本脚本将爬取 91porn "本月最热" 分类下的所有视频信息:
|
||||
- 视频名称
|
||||
- 封面图直链
|
||||
- 视频直链 (MP4)
|
||||
|
||||
依赖安装:
|
||||
pip install requests beautifulsoup4 lxml PySocks
|
||||
|
||||
使用方法:
|
||||
python spider_91porn.py
|
||||
|
||||
配置说明 (编辑脚本内 "配置区域"):
|
||||
MIN_PAGE_DELAY / MAX_PAGE_DELAY : 列表页请求间隔 (默认 3-6 秒)
|
||||
MIN_DETAIL_DELAY / MAX_DETAIL_DELAY : 详情页请求间隔 (默认 2-5 秒)
|
||||
MAX_PAGES : 限制最大爬取页数 (None=不限, 如 5=只爬前5页)
|
||||
OUTPUT_FILE : 输出文件名 (默认 91porn_videos.json)
|
||||
|
||||
按 Ctrl+C 可随时中断并保存已爬取的数据
|
||||
|
||||
注意:
|
||||
1. 视频直链包含时效性token,会过期,需定期重新爬取
|
||||
2. 脚本已内置随机延时,请勿移除,避免对服务器造成压力
|
||||
3. 如遇到Cloudflare拦截,需要先通过浏览器获取Cookie
|
||||
4. 本脚本仅供学习交流,请遵守当地法律法规
|
||||
================================================
|
||||
""")
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) > 1 and sys.argv[1] in ('-h', '--help', 'help'):
|
||||
print_help()
|
||||
return
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="spider_91porn.py",
|
||||
description="91porn 视频元数据爬虫",
|
||||
add_help=False, # 让 -h/--help 走 print_help() 中文版本
|
||||
)
|
||||
parser.add_argument("--page", type=int, default=None,
|
||||
help="只爬指定页(单页模式,配合 --output 用于定时任务)")
|
||||
parser.add_argument("--output", type=str, default=None,
|
||||
help="输出 JSON 路径,覆盖默认 OUTPUT_FILE")
|
||||
parser.add_argument("--max-pages", type=int, default=None,
|
||||
help="单页模式下,从 --page 起最多再爬几页(默认 1)")
|
||||
parser.add_argument("--no-resume", action="store_true",
|
||||
help="禁用断点续爬(单页模式默认禁用)")
|
||||
parser.add_argument("--quiet", action="store_true",
|
||||
help="压缩日志,每条视频只输出关键事件")
|
||||
parser.add_argument("--target-new", type=int, default=None,
|
||||
help="目标新增模式:从 page 1 起翻页直到累计处理这么多新源视频后停止(backend 凌晨任务用)")
|
||||
parser.add_argument("--seen-viewkeys-file", type=str, default=None,
|
||||
help="文件路径,每行一个已处理过的 viewkey 或 mp4 源 ID;脚本会跳过这些视频")
|
||||
parser.add_argument("--stream-output", action="store_true",
|
||||
help="流式模式:每解析一条视频直链就立即把它作为一行 JSON 写到 stdout 并 flush;"
|
||||
"日志改走 stderr。配合 backend 边读边下载使用。")
|
||||
|
||||
args, _ = parser.parse_known_args()
|
||||
cli_out = sys.stderr if args.stream_output else sys.stdout
|
||||
prefer_ipv4_for_plain_socks5_proxy()
|
||||
|
||||
print("""
|
||||
================================================
|
||||
91porn 视频爬虫启动中...
|
||||
================================================
|
||||
按 Ctrl+C 可随时中断并保存进度
|
||||
""", file=cli_out)
|
||||
|
||||
# 加载已知 ID(来自 backend 的 catalog 已入库列表;兼容旧参数名)
|
||||
seen_viewkeys = []
|
||||
if args.seen_viewkeys_file:
|
||||
try:
|
||||
with open(args.seen_viewkeys_file, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
seen_viewkeys.append(line)
|
||||
except FileNotFoundError:
|
||||
print(f"警告: --seen-viewkeys-file 不存在: {args.seen_viewkeys_file}", file=cli_out)
|
||||
except Exception as e:
|
||||
print(f"警告: 读取 --seen-viewkeys-file 失败: {e}", file=cli_out)
|
||||
|
||||
# 决定运行模式
|
||||
if args.target_new is not None:
|
||||
# 凑够 N 个新视频模式:从 page 1 起翻页,直到累计 target_new 个新视频
|
||||
spider = Porn91Spider(
|
||||
output_file=args.output,
|
||||
start_page=1,
|
||||
max_pages=None,
|
||||
resume=False, # 凑够 N 模式靠 seen_viewkeys 去重,不读 OUTPUT_FILE
|
||||
quiet=args.quiet,
|
||||
target_new=args.target_new,
|
||||
seen_viewkeys=seen_viewkeys,
|
||||
stream_output=args.stream_output,
|
||||
)
|
||||
elif args.page is not None:
|
||||
# 单页模式(保留作手动调试用):start_page=N, max_pages=1
|
||||
start_page = max(1, args.page)
|
||||
max_pages = args.max_pages if args.max_pages and args.max_pages > 0 else 1
|
||||
spider = Porn91Spider(
|
||||
output_file=args.output,
|
||||
start_page=start_page,
|
||||
max_pages=max_pages,
|
||||
resume=False,
|
||||
quiet=args.quiet,
|
||||
seen_viewkeys=seen_viewkeys,
|
||||
stream_output=args.stream_output,
|
||||
)
|
||||
else:
|
||||
# 全量模式(向后兼容):从 page 1 起爬到末尾
|
||||
spider = Porn91Spider(
|
||||
output_file=args.output,
|
||||
resume=False if args.no_resume else None,
|
||||
quiet=args.quiet,
|
||||
seen_viewkeys=seen_viewkeys,
|
||||
stream_output=args.stream_output,
|
||||
)
|
||||
|
||||
try:
|
||||
spider.crawl()
|
||||
except KeyboardInterrupt:
|
||||
spider.log("\n用户中断,正在保存已爬取的数据...")
|
||||
spider._save_results()
|
||||
spider._print_summary()
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
spider.log(f"发生未预料的错误: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
spider._save_results()
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -48,7 +48,6 @@ WORKDIR /opt/video-site-91
|
||||
COPY --from=backend /out/server ./server
|
||||
COPY --from=frontend /app/dist ./dist
|
||||
COPY backend/config.example.yaml ./config.example.yaml
|
||||
COPY 91VideoSpider/ ./91VideoSpider/
|
||||
COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
|
||||
|
||||
ARG VERSION=dev
|
||||
|
||||
@@ -20,14 +20,11 @@
|
||||
|
||||
## 功能特性
|
||||
|
||||
- **多后端支持** — 兼容 115 云盘、PikPak 云盘、123云盘、OneDrive、Google Drive 和本地存储
|
||||
- **低带宽播放** — 115 云盘、PikPak 云盘、123云盘、OneDrive 都支持302模式,在线播放视频时,不占用服务器带宽,播放体验不受服务器带宽影响;Google Drive 不支持302模式,走服务器中转,观看体验会受服务器带宽影响
|
||||
- **多后端支持** — 兼容 115 云盘、PikPak 云盘、123网盘、联通网盘、光鸭网盘、OneDrive、Google Drive 和本地存储
|
||||
- **低带宽播放** — 115 云盘、PikPak 云盘、123网盘、联通网盘、光鸭网盘、OneDrive 支持302模式,在线播放视频时,不占用服务器带宽,播放体验不受服务器带宽影响;Google Drive 不支持302模式,走服务器中转,观看体验会受服务器带宽影响
|
||||
- **封面 & 预览片段** — 自动为每个视频生成封面图和预览片段,首页快速选片
|
||||
- **91 爬虫** — 内置爬虫,支持抓取 91 本月最热视频
|
||||
- **双主题** — 黑黄经典主题 / 粉白清新主题,随时切换
|
||||
- **爬虫脚本** — 项目支持导入自定义脚本,但是有一些规范,具体可以参考 [SpiderFor91](https://github.com/Just-Spider/SpiderFor91),项目不再内置任何爬虫脚本
|
||||
- **短视频模式** — 一键切换抖音风格,沉浸刷片
|
||||
- **低资源占用** — 2C2G 服务器稳定运行,主要性能消耗就是封面图和预览视频的生成
|
||||
|
||||
---
|
||||
|
||||
## 预览图
|
||||
@@ -84,6 +81,14 @@ sudo bash install.sh
|
||||
|
||||
> `video-site-91` 为等效别名,两者可互换使用。
|
||||
|
||||
**已部署用户升级:**
|
||||
|
||||
```bash
|
||||
91 update
|
||||
```
|
||||
|
||||
升级会保留现有 `config.yaml`、数据库、封面、预览、上传文件和爬虫数据。脚本会自动安装或检查 `ffmpeg` / `ffprobe` 等运行依赖,并在新版本启动失败时回滚到升级前文件。
|
||||
|
||||
**自定义端口:**
|
||||
|
||||
```bash
|
||||
@@ -155,6 +160,7 @@ docker compose up -d # 更新并重启
|
||||
```
|
||||
|
||||
> 所有配置、数据库、封面、预览及上传文件均保存在 `./data/` 目录下。
|
||||
> 从旧版本升级 Docker 部署时,执行 `docker compose pull && docker compose up -d` 即可;`./data/` 不会被镜像更新覆盖。
|
||||
|
||||
---
|
||||
|
||||
@@ -180,14 +186,6 @@ docker compose up -d # 更新并重启
|
||||
|
||||
---
|
||||
|
||||
## 更多文档
|
||||
|
||||
| 文档 | 内容 |
|
||||
|------|------|
|
||||
| [backend/README.md](backend/README.md) | 后端实现、接口说明、网盘字段 |
|
||||
|
||||
---
|
||||
|
||||
## 使用须知
|
||||
|
||||
本项目面向**个人私有部署**,请仅接入你有权访问和管理的内容,并遵守对应网盘、站点的服务条款及所在地法律法规。
|
||||
@@ -196,6 +194,14 @@ docker compose up -d # 更新并重启
|
||||
|
||||
---
|
||||
|
||||
## PR提交规范
|
||||
欢迎大家提交PR,一起来完善这个项目,但是这里要说明一下PR提交的规范
|
||||
1. 一个PR的功能改动要单一,不建议一个PR修改了大量功能。单个PR单个功能修改,这样也更容易Merge
|
||||
2. 完善项目的PR比新增功能的PR更容易Merge(例如:例如你发现开发者没有实现爬取的视频上传到某个网盘,并且你有这个需求,此时你可以实现一下这个功能然后提交PR,也感谢你为开发者分担工作量)
|
||||
3. 新增功能的PR不容易Merge,因为某些功能的需求可能不是所有人都需要的,如果一味的不断增加功能,会让项目变得过于庞大。当然如果你肯定你的新功能和想法很好,并且相信将会对于项目有很大的改善,那么热烈欢迎你的PR
|
||||
|
||||
---
|
||||
|
||||
## 许可证
|
||||
|
||||
本项目基于 [MIT License](LICENSE) 开源。
|
||||
|
||||
+11
-9
@@ -2,7 +2,7 @@
|
||||
|
||||
视频聚合站的 Go 后端。提供三件事:
|
||||
|
||||
1. 多家网盘统一抽象(夸克 / 115 / PikPak / 联通沃盘 / OneDrive / Google Drive / 本地存储)
|
||||
1. 多家网盘统一抽象(夸克 / 115 / PikPak / 联通网盘 / 光鸭网盘 / OneDrive / Google Drive / 本地存储)
|
||||
2. 视频元数据目录(SQLite)+ 扫描 + 预览视频预生成
|
||||
3. REST API(前台)+ 管理后台 + 直链代理
|
||||
4. 标签池、视频隐藏、按网盘统计和详情页来源网盘类型展示能力
|
||||
@@ -19,7 +19,8 @@ internal/
|
||||
quark/ 夸克(自己实现,参考 OpenList quark_uc)
|
||||
p115/ 115(壳子 + SheltonZhu/115driver)
|
||||
pikpak/ PikPak(自己实现,参考 OpenList pikpak)
|
||||
wopan/ 联通沃盘(壳子 + OpenListTeam/wopan-sdk-go)
|
||||
wopan/ 联通网盘(壳子 + OpenListTeam/wopan-sdk-go)
|
||||
guangyapan/ 光鸭网盘(参考 AList GuangYaPan)
|
||||
onedrive/ OneDrive(OpenList 在线续期 + Microsoft Graph 文件接口)
|
||||
googledrive/ Google Drive(OpenList 在线续期 + Google Drive API;播放走后端代理)
|
||||
localstorage/ 本地目录扫描(服务器已有视频目录)
|
||||
@@ -108,8 +109,9 @@ go run ./cmd/server 后端 9192
|
||||
| p115 | `cookie`(形如 `UID=...; CID=...; SEID=...; KID=...`) |
|
||||
| pikpak | `username`、`password`(token、验证码和设备 ID 由服务端自动处理并保存) |
|
||||
| wopan | `access_token`、`refresh_token`,可选 `family_id` |
|
||||
| guangyapan | 推荐后台扫码登录自动写入 `access_token`、`refresh_token`;也可手工填写 token;可选 `root_path` |
|
||||
| onedrive | `refresh_token` |
|
||||
| googledrive | `refresh_token` |
|
||||
| googledrive | 默认只需 `refresh_token`;自建 OAuth 客户端模式还需 `use_online_api=false`、`client_id`、`client_secret` |
|
||||
| localstorage | `path`(服务器上的已有视频目录,如 `/mnt/videos`) |
|
||||
|
||||
### PikPak 速度说明
|
||||
@@ -120,7 +122,7 @@ go run ./cmd/server 后端 9192
|
||||
|
||||
OneDrive 按 OpenList 默认应用方式调用 `https://api.oplist.org/onedrive/renewapi` 在线刷新 token,不需要配置 Azure 应用的 `client_id` / `client_secret` / `redirect_uri`。后台新建 OneDrive 时只需要填 OpenList 代刷得到的 `refresh_token`;服务端会默认挂载根目录并自动回写新 token。
|
||||
|
||||
Google Drive 按 OpenList 在线 API 调用 `https://api.oplist.org/googleui/renewapi` 刷新 token。后台新建 Google Drive 时只需要填 OpenList Google Drive 获取到的 `refresh_token`。Google Drive 下载地址必须携带 `Authorization` 头,浏览器不能直接 302 使用,所以本站会由后端代理 `/p/stream` 播放,不加入零带宽 302 白名单。
|
||||
Google Drive 默认按 OpenList 在线 API 调用 `https://api.oplist.org/googleui/renewapi` 刷新 token。后台新建 Google Drive 时只需要填 OpenList Google Drive 获取到的 `refresh_token`。如果不想依赖 OpenList 在线 API,可以关闭“使用 OpenList 在线续期 API”,并填写同一个 Google OAuth 客户端授权得到的 `refresh_token`、`client_id`、`client_secret`,服务端会直接请求 Google OAuth token 接口续期。Google Drive 下载地址必须携带 `Authorization` 头,浏览器不能直接 302 使用,所以本站会由后端代理 `/p/stream` 播放,不加入零带宽 302 白名单。
|
||||
|
||||
## 文件名约定
|
||||
|
||||
@@ -145,18 +147,18 @@ Google Drive 按 OpenList 在线 API 调用 `https://api.oplist.org/googleui/ren
|
||||
|
||||
1. 同一网盘同一文件按 `(drive_id, file_id)` 形成稳定视频 ID,重复扫描只更新同一行。
|
||||
2. 扫描时优先按网盘侧 `content_hash` 去重;没有 hash 时退化为 `file_name + size_bytes`。
|
||||
3. 扫描、爬虫、本地上传或服务启动挂载网盘后,后台指纹 worker 会异步读取视频的少量 Range 片段,生成 `sampled_sha256`。前台列表、首页、搜索、推荐会按 `size_bytes + sampled_sha256` 只展示最早入库的 canonical 视频。
|
||||
3. 扫描、本地上传或服务启动挂载网盘后,后台指纹 worker 会异步读取视频的少量 Range 片段,生成 `sampled_sha256`。前台列表、首页、搜索、推荐会按 `size_bytes + sampled_sha256` 只展示最早入库的 canonical 视频。
|
||||
|
||||
`sampled_sha256` 是文件级去重:适合识别同一个视频文件被复制到 115 / PikPak / OneDrive 等不同网盘的情况。它不会删除任何网盘文件,也不用于识别转码、裁剪、加水印后的同源视频。
|
||||
`sampled_sha256` 是文件级去重:适合识别同一个视频文件被复制到 115 / PikPak / OneDrive / Google Drive 等不同网盘的情况。它不会删除任何网盘文件,也不用于识别转码、裁剪、加水印后的同源视频。
|
||||
|
||||
封面和预览视频仍然优先生成,不等待指纹完成。夜间流水线最后会做一次重复资产清理:对 `size_bytes + sampled_sha256` 命中的非 canonical 视频,只删除本机生成的重复封面和预览视频,并把对应字段重置为 `pending`。网盘原文件和视频元数据记录不会被删除;如果 canonical 视频以后被移除,这些重复项会重新进入生成队列。
|
||||
|
||||
## 管理能力
|
||||
|
||||
- `/admin/drives`:新增、编辑、删除网盘,触发扫描。
|
||||
- `/admin/videos`:按网盘筛选视频,每页 100 条分页,查看各网盘预览视频统计,编辑标题/作者/分类/标签,单条或全量重生预览视频。
|
||||
- `/admin/videos`:按网盘筛选视频,每页 100 条分页,查看各网盘预览视频统计,编辑标题/作者/分类/标签,单条或全量重生预览视频;拉黑视频页可查看被删除或被隐藏的视频,并支持移出黑名单后在下次扫盘重新入库。
|
||||
- `/admin/tags`:新增标签并用内置规则自动匹配已有视频;删除非系统标签时会从所有视频上同步移除该标签。
|
||||
- 播放页视频信息会展示来源网盘类型;同时提供“不再展示”,点击后会把视频标记为全局隐藏。隐藏视频不会再出现在首页、列表、搜索、相关推荐和详情接口中。目前没有管理后台恢复入口,如需恢复可把数据库里对应视频的 `hidden` 字段改回 `0`。
|
||||
- 播放页视频信息会展示来源网盘类型,并提供删除入口。被删除或被隐藏的视频会进入黑名单,不会再出现在首页、列表、搜索和详情接口中;在后台移出黑名单后,会在下次扫盘时重新发现并入库。
|
||||
|
||||
## 预览视频生成
|
||||
|
||||
@@ -170,7 +172,7 @@ ffmpeg -ss <起点> -headers "UA/Cookie/Referer" -i <直链> \
|
||||
|
||||
当前策略是每段固定 3 秒;30 秒以下最多 3 段,30 秒及以上固定 4 段;长视频在 20% 到 80% 区间均匀取段。生成的预览视频和封面都只保存在本地 `data/previews/`,不会回写到网盘;旧数据中的 `preview_file_id` 会被忽略。
|
||||
|
||||
服务启动或网盘重新挂载时,如果预览视频开关已开启,后端会把历史 `pending` 任务重新入队,避免重启后长期停在“待生成”。OneDrive 扫盘和直链生成预览视频 / 封面时可能触发 Microsoft Graph 429、`TooManyRequests`、`activityLimitReached` 或 throttled 文本;后端会识别这类错误并让当前网盘进入冷却期,保留任务为 `pending`,避免连续请求触发更严重限流。扫盘阶段会按 `Retry-After` 或默认冷却时间等待后继续当前目录。
|
||||
服务启动或网盘重新挂载时,如果预览视频开关已开启,后端会把历史 `pending` 任务重新入队,避免重启后长期停在“待生成”。OneDrive 扫盘和直链生成预览视频 / 封面时可能触发 Microsoft Graph 429、`TooManyRequests`、`activityLimitReached` 或 throttled 文本;Google Drive 可能返回 429、`usageLimits`、`userRateLimitExceeded`、`downloadQuotaExceeded` 等限制标识。后端会识别这类错误并让当前网盘进入冷却期,保留任务为 `pending`,避免连续请求触发更严重限流。扫盘阶段会按 `Retry-After` 或默认冷却时间等待后继续当前目录。
|
||||
|
||||
前端卡片的 `previewSrc` 统一指向 `/p/preview/<videoID>`,后端只从本地 `preview_local` 文件读取。
|
||||
|
||||
|
||||
+1747
-490
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,32 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
)
|
||||
|
||||
func TestCrawlerIntCredFallbacks(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
d *catalog.Drive
|
||||
key string
|
||||
def int
|
||||
want int
|
||||
}{
|
||||
{"nil drive", nil, "page", 1, 1},
|
||||
{"nil creds", &catalog.Drive{}, "page", 7, 7},
|
||||
{"empty value", &catalog.Drive{Credentials: map[string]string{"page": ""}}, "page", 5, 5},
|
||||
{"non-numeric", &catalog.Drive{Credentials: map[string]string{"page": "abc"}}, "page", 9, 9},
|
||||
{"happy", &catalog.Drive{Credentials: map[string]string{"page": "42"}}, "page", 1, 42},
|
||||
{"missing key", &catalog.Drive{Credentials: map[string]string{"a": "1"}}, "b", 99, 99},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := crawlerIntCred(tc.d, tc.key, tc.def)
|
||||
if got != tc.want {
|
||||
t.Fatalf("crawlerIntCred(%s) = %d, want %d", tc.name, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,95 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"testing"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
"github.com/video-site/backend/internal/proxy"
|
||||
)
|
||||
|
||||
func TestSpider91IntCredFallbacks(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
d *catalog.Drive
|
||||
key string
|
||||
def int
|
||||
want int
|
||||
}{
|
||||
{"nil drive", nil, "page", 1, 1},
|
||||
{"nil creds", &catalog.Drive{}, "page", 7, 7},
|
||||
{"empty value", &catalog.Drive{Credentials: map[string]string{"page": ""}}, "page", 5, 5},
|
||||
{"non-numeric", &catalog.Drive{Credentials: map[string]string{"page": "abc"}}, "page", 9, 9},
|
||||
{"happy", &catalog.Drive{Credentials: map[string]string{"page": "42"}}, "page", 1, 42},
|
||||
{"missing key", &catalog.Drive{Credentials: map[string]string{"a": "1"}}, "b", 99, 99},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := spider91IntCred(tc.d, tc.key, tc.def)
|
||||
if got != tc.want {
|
||||
t.Fatalf("spider91IntCred(%s) = %d, want %d", tc.name, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpider91UploadDriveIDDoesNotAutoSelectTarget(t *testing.T) {
|
||||
reg := proxy.NewRegistry()
|
||||
reg.Set("p115-one", &spider91UploadTargetFakeDrive{id: "p115-one", kind: "p115"})
|
||||
reg.Set("p123-one", &spider91UploadTargetFakeDrive{id: "p123-one", kind: "p123"})
|
||||
reg.Set("onedrive-one", &spider91UploadTargetFakeDrive{id: "onedrive-one", kind: "onedrive"})
|
||||
|
||||
app := &App{registry: reg}
|
||||
if got := app.Spider91UploadDriveID(); got != "" {
|
||||
t.Fatalf("empty upload target selected %q, want local-only empty target", got)
|
||||
}
|
||||
|
||||
app.spider91UploadDriveID = "p115-one"
|
||||
if got := app.Spider91UploadDriveID(); got != "p115-one" {
|
||||
t.Fatalf("explicit upload target = %q, want p115-one", got)
|
||||
}
|
||||
|
||||
app.spider91UploadDriveID = "p123-one"
|
||||
if got := app.Spider91UploadDriveID(); got != "p123-one" {
|
||||
t.Fatalf("explicit p123 upload target = %q, want p123-one", got)
|
||||
}
|
||||
|
||||
app.spider91UploadDriveID = "onedrive-one"
|
||||
if got := app.Spider91UploadDriveID(); got != "onedrive-one" {
|
||||
t.Fatalf("explicit onedrive upload target = %q, want onedrive-one", got)
|
||||
}
|
||||
|
||||
app.spider91UploadDriveID = "missing"
|
||||
if got := app.Spider91UploadDriveID(); got != "" {
|
||||
t.Fatalf("missing upload target = %q, want empty", got)
|
||||
}
|
||||
}
|
||||
|
||||
type spider91UploadTargetFakeDrive struct {
|
||||
id string
|
||||
kind string
|
||||
}
|
||||
|
||||
func (d *spider91UploadTargetFakeDrive) Kind() string { return d.kind }
|
||||
func (d *spider91UploadTargetFakeDrive) ID() string { return d.id }
|
||||
func (d *spider91UploadTargetFakeDrive) Init(context.Context) error {
|
||||
return nil
|
||||
}
|
||||
func (d *spider91UploadTargetFakeDrive) List(context.Context, string) ([]drives.Entry, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (d *spider91UploadTargetFakeDrive) Stat(context.Context, string) (*drives.Entry, error) {
|
||||
return nil, drives.ErrNotSupported
|
||||
}
|
||||
func (d *spider91UploadTargetFakeDrive) StreamURL(context.Context, string) (*drives.StreamLink, error) {
|
||||
return nil, drives.ErrNotSupported
|
||||
}
|
||||
func (d *spider91UploadTargetFakeDrive) Upload(context.Context, string, string, io.Reader, int64) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
func (d *spider91UploadTargetFakeDrive) EnsureDir(context.Context, string) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
func (d *spider91UploadTargetFakeDrive) RootID() string { return "root" }
|
||||
+873
-58
File diff suppressed because it is too large
Load Diff
@@ -33,14 +33,11 @@ scanner:
|
||||
# 单次扫描每家网盘目录递归层数上限
|
||||
max_depth: 5
|
||||
# 被扫描的扩展名
|
||||
video_extensions: [".mp4", ".mkv", ".mov", ".webm", ".avi"]
|
||||
video_extensions: [".mp4", ".mkv", ".mov", ".webm", ".avi", ".strm"]
|
||||
|
||||
nightly:
|
||||
# 凌晨流水线触发整点(0-23),默认 1 即每天 01:00。流程:
|
||||
# Phase 1 扫所有非 spider91 / 非 localupload 网盘 → 检测新增 / 删除
|
||||
# → 入队封面和预览视频 → 等所有队列 idle
|
||||
# Phase 2 spider91 爬虫(如配置)→ 入队预览视频 → 等队列 idle
|
||||
# Phase 3 spider91 → 云盘迁移(一次性 sweep)
|
||||
# 凌晨流水线触发整点(0-23),默认 1 即每天 01:00。
|
||||
# 运行时会统一编排扫描、媒体资产生成和后续清理任务。
|
||||
cron_hour: 1
|
||||
# 单次流水线总耗时上限(软超时);超过后当前 phase 跑完不启动后续 phase。
|
||||
max_duration: 6h
|
||||
@@ -59,7 +56,7 @@ preview:
|
||||
width: 480
|
||||
|
||||
# 盘列表。上线后请通过管理后台添加,本文件可留空。
|
||||
# kind 支持 quark / p115 / p123 / pikpak / wopan / onedrive / googledrive / localstorage。
|
||||
# kind 支持 quark / p115 / p123 / pikpak / wopan / guangyapan / onedrive / googledrive / localstorage。
|
||||
# OneDrive 示例:
|
||||
# - id: "my-onedrive"
|
||||
# kind: "onedrive"
|
||||
@@ -74,11 +71,29 @@ preview:
|
||||
# root_id: "root"
|
||||
# params:
|
||||
# refresh_token: "..."
|
||||
# # 默认 use_online_api=true,会使用 OpenList 在线续期 API。
|
||||
# # 如需使用自己创建的 Google OAuth 客户端,取消下面三行注释:
|
||||
# # use_online_api: "false"
|
||||
# # client_id: "..."
|
||||
# # client_secret: "..."
|
||||
# 光鸭网盘示例:
|
||||
# - id: "my-guangyapan"
|
||||
# kind: "guangyapan"
|
||||
# name: "我的光鸭网盘"
|
||||
# # 留空表示光鸭网盘根目录;也可以填写光鸭目录 fileId
|
||||
# root_id: ""
|
||||
# params:
|
||||
# # 推荐在后台使用扫码登录自动写入 access_token / refresh_token。
|
||||
# refresh_token: "..."
|
||||
# # 可选:按路径解析扫描根目录,优先于 root_id
|
||||
# # root_path: "影视/电影"
|
||||
# 本地存储示例:
|
||||
# - id: "local-media"
|
||||
# kind: "localstorage"
|
||||
# name: "本地视频目录"
|
||||
# root_id: "/"
|
||||
# params:
|
||||
# # Docker 部署时这里和 .strm 里的绝对路径都必须使用容器内路径。
|
||||
# # 例如宿主机 /mnt/videos 挂载为 /media,就填写 /media。
|
||||
# path: "/mnt/videos"
|
||||
drives: []
|
||||
|
||||
+1326
-85
File diff suppressed because it is too large
Load Diff
+1406
-76
File diff suppressed because it is too large
Load Diff
+111
-93
@@ -11,6 +11,7 @@ import (
|
||||
"io"
|
||||
"math/rand/v2"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
@@ -24,7 +25,6 @@ import (
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives/localstorage"
|
||||
"github.com/video-site/backend/internal/drives/localupload"
|
||||
"github.com/video-site/backend/internal/drives/spider91"
|
||||
"github.com/video-site/backend/internal/mediaasset"
|
||||
"github.com/video-site/backend/internal/proxy"
|
||||
)
|
||||
@@ -54,12 +54,16 @@ type Server struct {
|
||||
LocalDir string
|
||||
UploadDir string
|
||||
OnVideoUploaded func(*catalog.Video)
|
||||
// OnHideVideo 处理前台「不再展示」。隐藏机制已废弃,改走拉黑逻辑:
|
||||
// 删除库中记录 + 本地封面/预览,保留网盘源文件,并写黑名单墓碑
|
||||
// (扫盘不再入库)。未注入时回退为旧的 hidden 标记。
|
||||
OnHideVideo func(ctx context.Context, videoID string) error
|
||||
|
||||
tagCacheMu sync.Mutex
|
||||
tagCacheUntil time.Time
|
||||
tagCache []TagDTO
|
||||
|
||||
// GetTheme 返回当前生效的主题("dark" | "pink")。前台 /api/settings/theme 用,
|
||||
// GetTheme 返回当前生效的主题("dark" | "pink" | "sky")。前台 /api/settings/theme 用,
|
||||
// 不需要登录。无注入时返回 "dark"。
|
||||
GetTheme func() string
|
||||
}
|
||||
@@ -89,7 +93,6 @@ type VideoDTO struct {
|
||||
Dislikes int `json:"dislikes"`
|
||||
PublishedAt string `json:"publishedAt"`
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
Category string `json:"category,omitempty"`
|
||||
}
|
||||
|
||||
type TagDTO struct {
|
||||
@@ -146,20 +149,19 @@ func (s *Server) RegisterRoutes(r chi.Router, a *auth.Authenticator) {
|
||||
r.Post("/api/shorts/next", s.handleShortsNext)
|
||||
|
||||
// 代理路由同样需要鉴权,防止绕过
|
||||
r.Get("/p/stream/{driveID}/{fileID}", s.handleStream)
|
||||
r.Get("/p/stream/{driveID}/*", s.handleStream)
|
||||
r.Get("/p/upload/{videoID}", s.handleUploadedVideo)
|
||||
r.Get("/p/spider91/{videoID}", s.handleSpider91Video)
|
||||
r.Get("/p/preview/{videoID}", s.handlePreview)
|
||||
r.Get("/p/thumb/{videoID}", s.handleThumb)
|
||||
})
|
||||
}
|
||||
|
||||
// handleGetTheme 返回当前生效的主题。无需登录。响应永远是
|
||||
// {"theme": "dark"} 或 {"theme": "pink"},便于前端无脑解析。
|
||||
// {"theme": "dark" | "pink" | "sky"},便于前端无脑解析。
|
||||
func (s *Server) handleGetTheme(w http.ResponseWriter, r *http.Request) {
|
||||
theme := "dark"
|
||||
if s.GetTheme != nil {
|
||||
if v := s.GetTheme(); v == "pink" || v == "dark" {
|
||||
if v := s.GetTheme(); v == "pink" || v == "dark" || v == "sky" {
|
||||
theme = v
|
||||
}
|
||||
}
|
||||
@@ -290,7 +292,6 @@ func (s *Server) handleList(w http.ResponseWriter, r *http.Request) {
|
||||
params := catalog.ListParams{
|
||||
Keyword: q.Get("q"),
|
||||
Tag: q.Get("tag"),
|
||||
Category: q.Get("cat"),
|
||||
Sort: sort,
|
||||
Page: page,
|
||||
PageSize: size,
|
||||
@@ -313,7 +314,7 @@ func (s *Server) handleList(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
func (s *Server) handleVideoDetail(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
id := routeParam(r, "id")
|
||||
v, err := s.Catalog.GetVideo(r.Context(), id)
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusNotFound, err)
|
||||
@@ -343,7 +344,7 @@ func (s *Server) handleVideoDetail(w http.ResponseWriter, r *http.Request) {
|
||||
VideoSrc: s.videoSource(v),
|
||||
Poster: thumbnailURL(v),
|
||||
Description: v.Description,
|
||||
EmbedURL: fmt.Sprintf(`<iframe src="/embed/%s" width="640" height="360" frameborder="0" allowfullscreen></iframe>`, v.ID),
|
||||
EmbedURL: fmt.Sprintf(`<iframe src="/embed/%s" width="640" height="360" frameborder="0" allowfullscreen></iframe>`, pathSegment(v.ID)),
|
||||
AuthorProfile: AuthorProfile{
|
||||
ID: "author-" + v.Author,
|
||||
Name: v.Author,
|
||||
@@ -525,11 +526,9 @@ func (s *Server) handleTags(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
// shortsNextReq 客户端把当前轮已看过的 video id 列表传上来。
|
||||
// PreferredFromVideoID 来自短视频页最近一次点赞成功的视频,用于优先推荐相似标签。
|
||||
type shortsNextReq struct {
|
||||
SeenIDs []string `json:"seenIds"`
|
||||
Count int `json:"count"`
|
||||
PreferredFromVideoID string `json:"preferredFromVideoId"`
|
||||
SeenIDs []string `json:"seenIds"`
|
||||
Count int `json:"count"`
|
||||
}
|
||||
|
||||
// ShortsItemDTO 是短视频流单条的精简结构。比 VideoDTO 多 videoSrc / poster,
|
||||
@@ -547,8 +546,8 @@ type ShortsItemDTO struct {
|
||||
// - 服务器从未在 seenIds 中的可见视频里随机抽至多 count 条返回
|
||||
// - 当返回数量 < count 且小于全库可见总数时,说明本轮即将结束,
|
||||
// 返回 roundComplete=true,前端应在用户看完返回的这些后清空本地已看记录开新一轮
|
||||
// - 当 seenIds 已经覆盖全库时,本接口直接返回新一轮的随机一批
|
||||
// (传 seenIds=[] 即可让客户端在轮次完成后重新开始)
|
||||
// - 当 seenIds 真实覆盖当前全部可见视频时,本接口直接返回新一轮的随机一批
|
||||
// (不能仅看 seenIds 长度,里面可能有隐藏、删除或历史脏 ID)
|
||||
func (s *Server) handleShortsNext(w http.ResponseWriter, r *http.Request) {
|
||||
var body shortsNextReq
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
|
||||
@@ -569,22 +568,18 @@ func (s *Server) handleShortsNext(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// 如果客户端已看记录已经 ≥ 全库,则视为新一轮,直接忽略 seenIds
|
||||
exclude := body.SeenIDs
|
||||
if total > 0 && len(exclude) >= total {
|
||||
exclude = nil
|
||||
}
|
||||
|
||||
var items []*catalog.Video
|
||||
if strings.TrimSpace(body.PreferredFromVideoID) != "" {
|
||||
items, err = s.Catalog.RandomVideosForPreferredVideoExcluding(r.Context(), body.PreferredFromVideoID, exclude, count)
|
||||
} else {
|
||||
items, err = s.Catalog.RandomVideosExcluding(r.Context(), exclude, count)
|
||||
}
|
||||
items, err := s.Catalog.RandomVideosExcluding(r.Context(), body.SeenIDs, count)
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
if total > 0 && len(items) == 0 && len(body.SeenIDs) > 0 {
|
||||
items, err = s.Catalog.RandomVideosExcluding(r.Context(), nil, count)
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// 注入 sourceLabel 以便前端展示来源网盘
|
||||
driveLabels := make(map[string]string)
|
||||
@@ -622,7 +617,7 @@ type updateVideoTagsReq struct {
|
||||
}
|
||||
|
||||
func (s *Server) handleUpdateVideoTags(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
id := routeParam(r, "id")
|
||||
var body updateVideoTagsReq
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
writeErr(w, http.StatusBadRequest, err)
|
||||
@@ -645,7 +640,7 @@ func (s *Server) handleUpdateVideoTags(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
func (s *Server) handleLike(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
id := routeParam(r, "id")
|
||||
likes, err := s.Catalog.IncrementLike(r.Context(), id)
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
@@ -657,7 +652,7 @@ func (s *Server) handleLike(w http.ResponseWriter, r *http.Request) {
|
||||
// handleUnlike 取消点赞:likes - 1(保底 0)。
|
||||
// 短视频模式中爱心按钮点击切换状态时使用。
|
||||
func (s *Server) handleUnlike(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
id := routeParam(r, "id")
|
||||
likes, err := s.Catalog.DecrementLike(r.Context(), id)
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
@@ -671,7 +666,7 @@ func (s *Server) handleUnlike(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
func (s *Server) handleView(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
id := routeParam(r, "id")
|
||||
views, err := s.Catalog.IncrementView(r.Context(), id)
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
@@ -685,8 +680,15 @@ func (s *Server) handleView(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
func (s *Server) handleHideVideo(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
if err := s.Catalog.HideVideo(r.Context(), id); err != nil {
|
||||
id := routeParam(r, "id")
|
||||
var err error
|
||||
if s.OnHideVideo != nil {
|
||||
// 走拉黑逻辑:删记录 + 删本地封面/预览 + 写墓碑,保留网盘源文件。
|
||||
err = s.OnHideVideo(r.Context(), id)
|
||||
} else {
|
||||
err = s.Catalog.HideVideo(r.Context(), id)
|
||||
}
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
writeErr(w, http.StatusNotFound, err)
|
||||
return
|
||||
@@ -802,12 +804,12 @@ func (s *Server) handleUploadVideo(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
func (s *Server) handleStream(w http.ResponseWriter, r *http.Request) {
|
||||
driveID := chi.URLParam(r, "driveID")
|
||||
fileID := chi.URLParam(r, "fileID")
|
||||
driveID := routeParam(r, "driveID")
|
||||
fileID := routeWildcardParam(r, "*")
|
||||
s.Proxy.ServeStream(w, r, driveID, fileID)
|
||||
}
|
||||
func (s *Server) handleUploadedVideo(w http.ResponseWriter, r *http.Request) {
|
||||
videoID := chi.URLParam(r, "videoID")
|
||||
videoID := routeParam(r, "videoID")
|
||||
v, err := s.Catalog.GetVideo(r.Context(), videoID)
|
||||
if err != nil || v.Hidden || v.DriveID != localUploadDriveID {
|
||||
http.NotFound(w, r)
|
||||
@@ -827,46 +829,8 @@ func (s *Server) handleUploadedVideo(w http.ResponseWriter, r *http.Request) {
|
||||
http.ServeFile(w, r, path)
|
||||
}
|
||||
|
||||
// handleSpider91Video 服务 spider91 drive 下载到本地的视频文件。
|
||||
// 路径形如 /p/spider91/<videoID>,videoID = "spider91-<driveID>-<sourceID>"。
|
||||
// 通过 catalog 拿到 file_id("<sourceID>.mp4"),再让 driver 解析到绝对路径并 ServeFile。
|
||||
func (s *Server) handleSpider91Video(w http.ResponseWriter, r *http.Request) {
|
||||
videoID := chi.URLParam(r, "videoID")
|
||||
v, err := s.Catalog.GetVideo(r.Context(), videoID)
|
||||
if err != nil || v.Hidden {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
if s.Proxy == nil || s.Proxy.Registry == nil {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
d, ok := s.Proxy.Registry.Get(v.DriveID)
|
||||
if !ok || d.Kind() != spider91.Kind {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
sd, ok := d.(*spider91.Driver)
|
||||
if !ok {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
path, err := sd.VideoPath(v.FileID)
|
||||
if err != nil {
|
||||
http.Error(w, "invalid video id", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
info, err := os.Stat(path)
|
||||
if err != nil || info.IsDir() || info.Size() == 0 {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
w.Header().Set("Cache-Control", "private, max-age=300")
|
||||
http.ServeFile(w, r, path)
|
||||
}
|
||||
|
||||
func (s *Server) handlePreview(w http.ResponseWriter, r *http.Request) {
|
||||
videoID := chi.URLParam(r, "videoID")
|
||||
videoID := routeParam(r, "videoID")
|
||||
v, err := s.Catalog.GetVideo(r.Context(), videoID)
|
||||
if err != nil {
|
||||
http.NotFound(w, r)
|
||||
@@ -891,7 +855,7 @@ func (s *Server) handlePreview(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
func (s *Server) handleThumb(w http.ResponseWriter, r *http.Request) {
|
||||
videoID := chi.URLParam(r, "videoID")
|
||||
videoID := routeParam(r, "videoID")
|
||||
var clean string
|
||||
for _, path := range mediaasset.ThumbnailPathCandidates(s.LocalDir, videoID) {
|
||||
candidate := filepath.Clean(path)
|
||||
@@ -926,7 +890,7 @@ func mapVideo(v *catalog.Video) VideoDTO {
|
||||
}
|
||||
return VideoDTO{
|
||||
ID: v.ID,
|
||||
Href: "/video/" + v.ID,
|
||||
Href: "/video/" + pathSegment(v.ID),
|
||||
Title: v.Title,
|
||||
Thumbnail: thumbnailURL(v),
|
||||
PreviewSrc: previewURL(v),
|
||||
@@ -943,12 +907,11 @@ func mapVideo(v *catalog.Video) VideoDTO {
|
||||
Dislikes: v.Dislikes,
|
||||
PublishedAt: v.PublishedAt.Format("2006-01-02"),
|
||||
Tags: tags,
|
||||
Category: v.Category,
|
||||
}
|
||||
}
|
||||
|
||||
func previewURL(v *catalog.Video) string {
|
||||
base := "/p/preview/" + v.ID
|
||||
base := "/p/preview/" + pathSegment(v.ID)
|
||||
if v.UpdatedAt.IsZero() {
|
||||
return base
|
||||
}
|
||||
@@ -956,9 +919,12 @@ func previewURL(v *catalog.Video) string {
|
||||
}
|
||||
|
||||
func thumbnailURL(v *catalog.Video) string {
|
||||
base := "/p/thumb/" + v.ID
|
||||
base := "/p/thumb/" + pathSegment(v.ID)
|
||||
if v.ThumbnailURL != "" {
|
||||
base = v.ThumbnailURL
|
||||
if thumbnailURLMatchesVideoID(base, v.ID) {
|
||||
base = "/p/thumb/" + pathSegment(v.ID)
|
||||
}
|
||||
}
|
||||
if !strings.HasPrefix(base, "/p/thumb/") || v.UpdatedAt.IsZero() {
|
||||
return base
|
||||
@@ -966,25 +932,77 @@ func thumbnailURL(v *catalog.Video) string {
|
||||
return base + "?v=" + strconv.FormatInt(v.UpdatedAt.UnixMilli(), 10)
|
||||
}
|
||||
|
||||
// transcodedSource 在视频有就绪的浏览器兼容性转码产物时返回产物的播放地址。
|
||||
// 产物和原始文件在同一个 drive 上,走同一条 /p/stream 代理/302 链路。
|
||||
func transcodedSource(v *catalog.Video) (string, bool) {
|
||||
if v.TranscodeStatus == "ready" && v.TranscodedFileID != "" && v.DriveID != localUploadDriveID {
|
||||
return fmt.Sprintf("/p/stream/%s/%s", pathSegment(v.DriveID), pathSegment(v.TranscodedFileID)), true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func (s *Server) videoSource(v *catalog.Video) string {
|
||||
if v.DriveID == localUploadDriveID {
|
||||
return "/p/upload/" + v.ID
|
||||
return "/p/upload/" + pathSegment(v.ID)
|
||||
}
|
||||
if s.Proxy != nil && s.Proxy.Registry != nil {
|
||||
if d, ok := s.Proxy.Registry.Get(v.DriveID); ok && d.Kind() == spider91.Kind {
|
||||
return "/p/spider91/" + v.ID
|
||||
}
|
||||
if src, ok := transcodedSource(v); ok {
|
||||
return src
|
||||
}
|
||||
return fmt.Sprintf("/p/stream/%s/%s", v.DriveID, v.FileID)
|
||||
return fmt.Sprintf("/p/stream/%s/%s", pathSegment(v.DriveID), pathSegment(v.FileID))
|
||||
}
|
||||
|
||||
// videoSource 兼容旧调用点,没有 server context 时按之前逻辑回退到 /p/stream。
|
||||
// 内部新增的代码请使用 (*Server).videoSource。
|
||||
func videoSource(v *catalog.Video) string {
|
||||
if v.DriveID == localUploadDriveID {
|
||||
return "/p/upload/" + v.ID
|
||||
return "/p/upload/" + pathSegment(v.ID)
|
||||
}
|
||||
return fmt.Sprintf("/p/stream/%s/%s", v.DriveID, v.FileID)
|
||||
if src, ok := transcodedSource(v); ok {
|
||||
return src
|
||||
}
|
||||
return fmt.Sprintf("/p/stream/%s/%s", pathSegment(v.DriveID), pathSegment(v.FileID))
|
||||
}
|
||||
|
||||
func pathSegment(value string) string {
|
||||
return url.PathEscape(value)
|
||||
}
|
||||
|
||||
func routeParam(r *http.Request, key string) string {
|
||||
value := chi.URLParam(r, key)
|
||||
if value == "" {
|
||||
return ""
|
||||
}
|
||||
if decoded, err := url.PathUnescape(value); err == nil {
|
||||
return decoded
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
func routeWildcardParam(r *http.Request, key string) string {
|
||||
value := chi.URLParam(r, key)
|
||||
if value == "" {
|
||||
return ""
|
||||
}
|
||||
value = strings.TrimPrefix(value, "/")
|
||||
if decoded, err := url.PathUnescape(value); err == nil {
|
||||
return decoded
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
func thumbnailURLMatchesVideoID(value, videoID string) bool {
|
||||
if !strings.HasPrefix(value, "/p/thumb/") {
|
||||
return false
|
||||
}
|
||||
tail := strings.TrimPrefix(value, "/p/thumb/")
|
||||
if idx := strings.IndexByte(tail, '?'); idx >= 0 {
|
||||
tail = tail[:idx]
|
||||
}
|
||||
if tail == videoID {
|
||||
return true
|
||||
}
|
||||
decoded, err := url.PathUnescape(tail)
|
||||
return err == nil && decoded == videoID
|
||||
}
|
||||
|
||||
func driveKindLabel(kind string) string {
|
||||
@@ -994,19 +1012,19 @@ func driveKindLabel(kind string) string {
|
||||
case "p115":
|
||||
return "115 网盘"
|
||||
case "p123":
|
||||
return "123 云盘"
|
||||
return "123网盘"
|
||||
case "pikpak":
|
||||
return "PikPak"
|
||||
case "wopan":
|
||||
return "联通沃盘"
|
||||
return "联通网盘"
|
||||
case "guangyapan":
|
||||
return "光鸭网盘"
|
||||
case "onedrive":
|
||||
return "OneDrive"
|
||||
case "googledrive":
|
||||
return "Google Drive"
|
||||
case localstorage.Kind:
|
||||
return "本地存储"
|
||||
case spider91.Kind:
|
||||
return "91 爬虫"
|
||||
default:
|
||||
return kind
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
@@ -17,6 +18,7 @@ import (
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
"github.com/video-site/backend/internal/mediaasset"
|
||||
"github.com/video-site/backend/internal/proxy"
|
||||
)
|
||||
@@ -66,6 +68,68 @@ func TestVideoSourceKeepsDirectStreamForMp4(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestVideoURLsEscapePathSegments(t *testing.T) {
|
||||
updated := time.UnixMilli(1778863000123)
|
||||
v := &catalog.Video{
|
||||
ID: "wopan-drive-fid/with space",
|
||||
DriveID: "drive-1",
|
||||
FileID: "fid/with space",
|
||||
Title: "Video",
|
||||
UpdatedAt: updated,
|
||||
}
|
||||
|
||||
dto := mapVideo(v)
|
||||
if dto.Href != "/video/wopan-drive-fid%2Fwith%20space" {
|
||||
t.Fatalf("href = %q, want escaped video id", dto.Href)
|
||||
}
|
||||
if dto.PreviewSrc != "/p/preview/wopan-drive-fid%2Fwith%20space?v=1778863000123" {
|
||||
t.Fatalf("preview = %q, want escaped video id", dto.PreviewSrc)
|
||||
}
|
||||
if dto.Thumbnail != "/p/thumb/wopan-drive-fid%2Fwith%20space?v=1778863000123" {
|
||||
t.Fatalf("thumbnail = %q, want escaped video id", dto.Thumbnail)
|
||||
}
|
||||
if got := videoSource(v); got != "/p/stream/drive-1/fid%2Fwith%20space" {
|
||||
t.Fatalf("video source = %q, want escaped file id", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestThumbnailURLRewritesStoredLocalURLForUnsafeVideoID(t *testing.T) {
|
||||
got := thumbnailURL(&catalog.Video{
|
||||
ID: "wopan-drive-fid/with space",
|
||||
ThumbnailURL: "/p/thumb/wopan-drive-fid/with space",
|
||||
UpdatedAt: time.UnixMilli(1778863000123),
|
||||
})
|
||||
|
||||
if got != "/p/thumb/wopan-drive-fid%2Fwith%20space?v=1778863000123" {
|
||||
t.Fatalf("thumbnail URL = %q, want escaped local URL", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleStreamDecodesEscapedWildcardFileID(t *testing.T) {
|
||||
local := filepath.Join(t.TempDir(), "video.mp4")
|
||||
if err := os.WriteFile(local, []byte("ok"), 0o644); err != nil {
|
||||
t.Fatalf("write local video: %v", err)
|
||||
}
|
||||
drv := &apiStreamFakeDrive{localPath: local}
|
||||
reg := proxy.NewRegistry()
|
||||
reg.Set("drive-1", drv)
|
||||
srv := &Server{Proxy: proxy.New(reg)}
|
||||
|
||||
router := chi.NewRouter()
|
||||
router.Get("/p/stream/{driveID}/*", srv.handleStream)
|
||||
req := httptest.NewRequest(http.MethodGet, "/p/stream/drive-1/fid%2Fwith%20space", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
|
||||
router.ServeHTTP(rr, req)
|
||||
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
if drv.fileID != "fid/with space" {
|
||||
t.Fatalf("fileID = %q, want decoded original", drv.fileID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVideoSourceUsesLocalUploadRoute(t *testing.T) {
|
||||
v := &catalog.Video{
|
||||
ID: "video-1",
|
||||
@@ -100,6 +164,49 @@ func TestPreviewURLFallsBackWithoutUpdatedAt(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleVideoDetailDecodesEscapedVideoID(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: "wopan-drive-fid/with space",
|
||||
DriveID: "drive-1",
|
||||
FileID: "fid/with space",
|
||||
Title: "Video",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed video: %v", err)
|
||||
}
|
||||
|
||||
router := chi.NewRouter()
|
||||
router.Get("/api/video/{id}", (&Server{Catalog: cat}).handleVideoDetail)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/video/wopan-drive-fid%2Fwith%20space", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
|
||||
router.ServeHTTP(rr, req)
|
||||
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
var got VideoDetailDTO
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
if got.ID != "wopan-drive-fid/with space" {
|
||||
t.Fatalf("id = %q, want original video id", got.ID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestThumbnailURLVersionsLocalGeneratedThumbnails(t *testing.T) {
|
||||
got := thumbnailURL(&catalog.Video{
|
||||
ID: "video-1",
|
||||
@@ -391,6 +498,68 @@ func TestHandleListLatestPrefersReadyThumbnails(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleListIgnoresCategoryQueryAndDoesNotExposeCategory(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*catalog.Video{
|
||||
{
|
||||
ID: "video-a",
|
||||
DriveID: "drive",
|
||||
FileID: "file-a",
|
||||
Title: "A",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
{
|
||||
ID: "video-b",
|
||||
DriveID: "drive",
|
||||
FileID: "file-b",
|
||||
Title: "B",
|
||||
PublishedAt: now.Add(-time.Hour),
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed video %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
rr := httptest.NewRecorder()
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/list?page=1&size=24&cat=alpha", nil)
|
||||
(&Server{Catalog: cat}).handleList(rr, req)
|
||||
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
var got struct {
|
||||
Items []map[string]any `json:"items"`
|
||||
Total int `json:"total"`
|
||||
}
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if got.Total != 2 || len(got.Items) != 2 {
|
||||
t.Fatalf("response total/items = %d/%d, want 2/2", got.Total, len(got.Items))
|
||||
}
|
||||
for _, item := range got.Items {
|
||||
if _, ok := item["category"]; ok {
|
||||
t.Fatalf("list response exposed category: %#v", item)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleUploadVideoSavesFileVideoTagsAndQueuesPreview(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
@@ -656,7 +825,6 @@ func TestHandleTagsReturnsUnifiedTagPool(t *testing.T) {
|
||||
FileID: "file-1",
|
||||
Title: "清纯女大后入",
|
||||
Tags: []string{"后入", "女大"},
|
||||
Category: "random-category",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -703,7 +871,7 @@ func TestHandleTagsReturnsUnifiedTagPool(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleShortsNextUsesPreferredVideoLeastPopulatedTag(t *testing.T) {
|
||||
func TestHandleShortsNextReturnsRandomBatchExcludingSeen(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -727,7 +895,7 @@ func TestHandleShortsNextUsesPreferredVideoLeastPopulatedTag(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/shorts/next", strings.NewReader(`{"seenIds":["current"],"count":3,"preferredFromVideoId":"current"}`))
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/shorts/next", strings.NewReader(`{"seenIds":["current"],"count":3}`))
|
||||
rr := httptest.NewRecorder()
|
||||
(&Server{Catalog: cat}).handleShortsNext(rr, req)
|
||||
|
||||
@@ -750,10 +918,7 @@ func TestHandleShortsNextUsesPreferredVideoLeastPopulatedTag(t *testing.T) {
|
||||
t.Fatalf("total = %d, want 4", got.Total)
|
||||
}
|
||||
if got.RoundComplete {
|
||||
t.Fatalf("roundComplete = true, want false with fallback-filled batch")
|
||||
}
|
||||
if !containsString(ids, "rare-1") {
|
||||
t.Fatalf("ids = %#v, want rare-1 from least populated tag", ids)
|
||||
t.Fatalf("roundComplete = true, want false with a full remaining batch")
|
||||
}
|
||||
if containsString(ids, "current") {
|
||||
t.Fatalf("ids = %#v, should exclude current", ids)
|
||||
@@ -761,6 +926,76 @@ func TestHandleShortsNextUsesPreferredVideoLeastPopulatedTag(t *testing.T) {
|
||||
if len(ids) != 3 {
|
||||
t.Fatalf("ids = %#v, want 3 items", ids)
|
||||
}
|
||||
for _, want := range []string{"common-1", "common-2", "rare-1"} {
|
||||
if !containsString(ids, want) {
|
||||
t.Fatalf("ids = %#v, want remaining id %s", ids, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleShortsNextDoesNotResetForStaleSeenIDs(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*catalog.Video{
|
||||
{ID: "seen-1", DriveID: "drive", FileID: "f-seen-1", Title: "seen 1", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "fresh-1", DriveID: "drive", FileID: "f-fresh-1", Title: "fresh 1", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "fresh-2", DriveID: "drive", FileID: "f-fresh-2", Title: "fresh 2", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "hidden-1", DriveID: "drive", FileID: "f-hidden-1", Title: "hidden 1", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
if err := cat.HideVideo(ctx, "hidden-1"); err != nil {
|
||||
t.Fatalf("hide hidden-1: %v", err)
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/shorts/next", strings.NewReader(`{"seenIds":["seen-1","hidden-1","deleted-stale"],"count":3}`))
|
||||
rr := httptest.NewRecorder()
|
||||
(&Server{Catalog: cat}).handleShortsNext(rr, req)
|
||||
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
var got struct {
|
||||
Items []ShortsItemDTO `json:"items"`
|
||||
Total int `json:"total"`
|
||||
RoundComplete bool `json:"roundComplete"`
|
||||
}
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
ids := make([]string, 0, len(got.Items))
|
||||
for _, item := range got.Items {
|
||||
ids = append(ids, item.ID)
|
||||
}
|
||||
if got.Total != 3 {
|
||||
t.Fatalf("total = %d, want 3", got.Total)
|
||||
}
|
||||
if !got.RoundComplete {
|
||||
t.Fatalf("roundComplete = false, want true after returning all unviewed visible videos")
|
||||
}
|
||||
if containsString(ids, "seen-1") || containsString(ids, "hidden-1") {
|
||||
t.Fatalf("ids = %#v, should not reset and return seen or hidden videos", ids)
|
||||
}
|
||||
for _, want := range []string{"fresh-1", "fresh-2"} {
|
||||
if !containsString(ids, want) {
|
||||
t.Fatalf("ids = %#v, want %s", ids, want)
|
||||
}
|
||||
}
|
||||
if len(ids) != 2 {
|
||||
t.Fatalf("ids = %#v, want exactly the two unviewed visible videos", ids)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleUpdateVideoTagsRejectsUnknownTags(t *testing.T) {
|
||||
@@ -1084,6 +1319,37 @@ func sameStringSet(a, b []string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
type apiStreamFakeDrive struct {
|
||||
localPath string
|
||||
fileID string
|
||||
}
|
||||
|
||||
func (d *apiStreamFakeDrive) Kind() string { return "fake" }
|
||||
func (d *apiStreamFakeDrive) ID() string { return "drive-1" }
|
||||
func (d *apiStreamFakeDrive) Init(context.Context) error {
|
||||
return nil
|
||||
}
|
||||
func (d *apiStreamFakeDrive) List(context.Context, string) ([]drives.Entry, error) {
|
||||
return nil, drives.ErrNotSupported
|
||||
}
|
||||
func (d *apiStreamFakeDrive) Stat(context.Context, string) (*drives.Entry, error) {
|
||||
return nil, drives.ErrNotSupported
|
||||
}
|
||||
func (d *apiStreamFakeDrive) StreamURL(_ context.Context, fileID string) (*drives.StreamLink, error) {
|
||||
d.fileID = fileID
|
||||
return &drives.StreamLink{
|
||||
URL: d.localPath,
|
||||
Expires: time.Now().Add(time.Minute),
|
||||
}, nil
|
||||
}
|
||||
func (d *apiStreamFakeDrive) Upload(context.Context, string, string, io.Reader, int64) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
func (d *apiStreamFakeDrive) EnsureDir(context.Context, string) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
func (d *apiStreamFakeDrive) RootID() string { return "root" }
|
||||
|
||||
func requestWithVideoID(method, target, videoID string, body *strings.Reader) *http.Request {
|
||||
return requestWithRouteParam(method, target, "id", videoID, body)
|
||||
}
|
||||
|
||||
+698
-295
File diff suppressed because it is too large
Load Diff
@@ -58,10 +58,11 @@ func TestUpsertDriveDefaultsRootIDByKind(t *testing.T) {
|
||||
}{
|
||||
{id: "p115", kind: "p115", want: "0"},
|
||||
{id: "pikpak", kind: "pikpak", want: ""},
|
||||
{id: "guangyapan", kind: "guangyapan", want: ""},
|
||||
{id: "onedrive", kind: "onedrive", want: "root"},
|
||||
{id: "googledrive", kind: "googledrive", want: "root"},
|
||||
{id: "localstorage", kind: "localstorage", want: "/"},
|
||||
{id: "spider91", kind: "spider91", want: "/"},
|
||||
{id: "scriptcrawler", kind: "scriptcrawler", want: "/"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
if err := cat.UpsertDrive(ctx, &Drive{
|
||||
@@ -84,7 +85,7 @@ func TestUpsertDriveDefaultsRootIDByKind(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpsertDriveIgnoresRootIDForLocalStorageAndSpider91(t *testing.T) {
|
||||
func TestUpsertDriveIgnoresRootIDForLocalStorageAndScriptCrawler(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -101,7 +102,7 @@ func TestUpsertDriveIgnoresRootIDForLocalStorageAndSpider91(t *testing.T) {
|
||||
kind string
|
||||
}{
|
||||
{id: "localstorage", kind: "localstorage"},
|
||||
{id: "spider91", kind: "spider91"},
|
||||
{id: "scriptcrawler", kind: "scriptcrawler"},
|
||||
} {
|
||||
if err := cat.UpsertDrive(ctx, &Drive{
|
||||
ID: tc.id,
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestListVideoFileIDsByDrive 校验 spider91 crawler 用到的轻量 file_id 查询:
|
||||
// TestListVideoFileIDsByDrive 校验上传 worker 用到的轻量 file_id 查询:
|
||||
// - 只返回指定 drive 的 file_id;不返回其它 drive 的
|
||||
// - 跳过 file_id 为空的视频
|
||||
// - 返回顺序无要求,但每个 file_id 只出现一次
|
||||
@@ -33,20 +33,20 @@ func TestListVideoFileIDsByDrive(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
insert("spider91-A-vk001", "spider-a", "vk001.mp4")
|
||||
insert("spider91-A-vk002", "spider-a", "vk002.flv")
|
||||
insert("spider91-A-vk003", "spider-a", "vk003.mp4")
|
||||
insert("scriptcrawler-A-source001", "crawler-a", "source001.mp4")
|
||||
insert("scriptcrawler-A-source002", "crawler-a", "source002.flv")
|
||||
insert("scriptcrawler-A-source003", "crawler-a", "source003.mp4")
|
||||
// 不同 drive 的视频不应出现
|
||||
insert("quark-other-fid", "drive-quark", "abcdef")
|
||||
// 空 file_id 应被过滤
|
||||
insert("spider91-A-empty", "spider-a", "")
|
||||
insert("scriptcrawler-A-empty", "crawler-a", "")
|
||||
|
||||
got, err := cat.ListVideoFileIDsByDrive(ctx, "spider-a")
|
||||
got, err := cat.ListVideoFileIDsByDrive(ctx, "crawler-a")
|
||||
if err != nil {
|
||||
t.Fatalf("ListVideoFileIDsByDrive: %v", err)
|
||||
}
|
||||
sort.Strings(got)
|
||||
want := []string{"vk001.mp4", "vk002.flv", "vk003.mp4"}
|
||||
want := []string{"source001.mp4", "source002.flv", "source003.mp4"}
|
||||
sort.Strings(want)
|
||||
if len(got) != len(want) {
|
||||
t.Fatalf("got %d ids, want %d: got=%v", len(got), len(want), got)
|
||||
@@ -67,11 +67,11 @@ func TestListVideoFileIDsByDrive(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestListSpider91ViewkeysFindsMigratedVideos 校验:即使 spider91 视频
|
||||
// 被迁移到 PikPak(drive_id 改了),ListSpider91Viewkeys 仍能通过 video.id
|
||||
// 前缀找到这些 viewkey。这是 crawler 写 seen 文件的关键不变量,
|
||||
// 否则下一次爬取会把已爬过的 viewkey 当作"新"的再爬一遍。
|
||||
func TestListSpider91ViewkeysFindsMigratedVideos(t *testing.T) {
|
||||
// TestListCrawlerSourceIDsFindsMigratedVideos 校验:即使爬虫视频被上传迁移
|
||||
// 到目标网盘(drive_id 改了),ListCrawlerSourceIDs 仍能通过 video.id 前缀
|
||||
// 找到这些 source_id。这是 crawler 写 seen 文件的关键不变量,否则下一次
|
||||
// 爬取会把已爬过的 source_id 当作"新"的再爬一遍。
|
||||
func TestListCrawlerSourceIDsFindsMigratedVideos(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -92,25 +92,25 @@ func TestListSpider91ViewkeysFindsMigratedVideos(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// 1) 仍在 spider91 drive 下的视频(未迁移)
|
||||
insert("spider91-91Spider-vk001", "91Spider", "vk001.mp4")
|
||||
// 2) 已迁移到 PikPak 的视频:drive_id 变了,但 id 仍是 spider91-91Spider-...
|
||||
insert("spider91-91Spider-vk002", "PikPak", "PIKPAK-FILE-ID-2")
|
||||
insert("spider91-91Spider-vk003", "PikPak", "PIKPAK-FILE-ID-3")
|
||||
// 3) 别的 spider91 drive 的视频,不应混进来
|
||||
insert("spider91-OtherDrive-vk999", "OtherDrive", "vk999.mp4")
|
||||
// 1) 仍在本地爬虫 drive 下的视频(未上传)
|
||||
insert("scriptcrawler-crawler-a-source001", "crawler-a", "source001.mp4")
|
||||
// 2) 已上传到目标盘的视频:drive_id 变了,但 id 仍保留 crawler 来源前缀。
|
||||
insert("scriptcrawler-crawler-a-source002", "target-drive", "TARGET-FILE-ID-2")
|
||||
insert("scriptcrawler-crawler-a-source003", "target-drive", "TARGET-FILE-ID-3")
|
||||
// 3) 别的爬虫 drive 的视频,不应混进来
|
||||
insert("scriptcrawler-other-source999", "other-crawler", "source999.mp4")
|
||||
// 4) 完全无关的视频
|
||||
insert("quark-some-fid", "drive-quark", "abc")
|
||||
|
||||
got, err := cat.ListSpider91Viewkeys(ctx, "91Spider")
|
||||
got, err := cat.ListCrawlerSourceIDs(ctx, "scriptcrawler", "crawler-a")
|
||||
if err != nil {
|
||||
t.Fatalf("ListSpider91Viewkeys: %v", err)
|
||||
t.Fatalf("ListCrawlerSourceIDs: %v", err)
|
||||
}
|
||||
sort.Strings(got)
|
||||
want := []string{"vk001", "vk002", "vk003"}
|
||||
want := []string{"source001", "source002", "source003"}
|
||||
sort.Strings(want)
|
||||
if len(got) != len(want) {
|
||||
t.Fatalf("got %d viewkeys, want %d: got=%v", len(got), len(want), got)
|
||||
t.Fatalf("got %d source ids, want %d: got=%v", len(got), len(want), got)
|
||||
}
|
||||
for i := range got {
|
||||
if got[i] != want[i] {
|
||||
@@ -119,9 +119,9 @@ func TestListSpider91ViewkeysFindsMigratedVideos(t *testing.T) {
|
||||
}
|
||||
|
||||
// 不存在的 drive 返回空列表
|
||||
other, err := cat.ListSpider91Viewkeys(ctx, "no-such-drive")
|
||||
other, err := cat.ListCrawlerSourceIDs(ctx, "scriptcrawler", "no-such-drive")
|
||||
if err != nil {
|
||||
t.Fatalf("ListSpider91Viewkeys empty: %v", err)
|
||||
t.Fatalf("ListCrawlerSourceIDs empty: %v", err)
|
||||
}
|
||||
if len(other) != 0 {
|
||||
t.Fatalf("non-existent drive: got %v, want empty", other)
|
||||
@@ -138,12 +138,12 @@ func TestDeleteVideoWithTombstonePreventsReimport(t *testing.T) {
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: "spider91-91Spider-vk004",
|
||||
DriveID: "91Spider",
|
||||
FileID: "vk004.mp4",
|
||||
FileName: "vk004.mp4",
|
||||
ID: "scriptcrawler-crawler-a-source004",
|
||||
DriveID: "crawler-a",
|
||||
FileID: "source004.mp4",
|
||||
FileName: "source004.mp4",
|
||||
ContentHash: "ABCDEF",
|
||||
Title: "Deleted Spider",
|
||||
Title: "Deleted Source",
|
||||
Size: 2048,
|
||||
PreviewStatus: "ready",
|
||||
PublishedAt: now,
|
||||
@@ -153,24 +153,24 @@ func TestDeleteVideoWithTombstonePreventsReimport(t *testing.T) {
|
||||
t.Fatalf("upsert: %v", err)
|
||||
}
|
||||
|
||||
if err := cat.DeleteVideoWithTombstone(ctx, "spider91-91Spider-vk004"); err != nil {
|
||||
if err := cat.DeleteVideoWithTombstone(ctx, "scriptcrawler-crawler-a-source004"); err != nil {
|
||||
t.Fatalf("delete with tombstone: %v", err)
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, "spider91-91Spider-vk004"); err != sql.ErrNoRows {
|
||||
if _, err := cat.GetVideo(ctx, "scriptcrawler-crawler-a-source004"); err != sql.ErrNoRows {
|
||||
t.Fatalf("get deleted video error = %v, want sql.ErrNoRows", err)
|
||||
}
|
||||
deleted, err := cat.IsDeletedVideoCandidate(ctx, "spider91-91Spider-vk004", "91Spider", "vk004.mp4", "abcdef", "vk004.mp4", 2048)
|
||||
deleted, err := cat.IsDeletedVideoCandidate(ctx, "scriptcrawler-crawler-a-source004", "crawler-a", "source004.mp4", "abcdef", "source004.mp4", 2048)
|
||||
if err != nil {
|
||||
t.Fatalf("check deleted candidate: %v", err)
|
||||
}
|
||||
if !deleted {
|
||||
t.Fatal("deleted candidate was not recognized")
|
||||
}
|
||||
viewkeys, err := cat.ListSpider91Viewkeys(ctx, "91Spider")
|
||||
sourceIDs, err := cat.ListCrawlerSourceIDs(ctx, "scriptcrawler", "crawler-a")
|
||||
if err != nil {
|
||||
t.Fatalf("ListSpider91Viewkeys: %v", err)
|
||||
t.Fatalf("ListCrawlerSourceIDs: %v", err)
|
||||
}
|
||||
if len(viewkeys) != 1 || viewkeys[0] != "vk004" {
|
||||
t.Fatalf("viewkeys = %#v, want [vk004]", viewkeys)
|
||||
if len(sourceIDs) != 1 || sourceIDs[0] != "source004" {
|
||||
t.Fatalf("source ids = %#v, want [source004]", sourceIDs)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
package catalog
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestListVideosKeywordMatchesFileName(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: "p115-115-sone-089-4k",
|
||||
DriveID: "drive",
|
||||
FileID: "file-sone-089-4k",
|
||||
FileName: "www.98T.la@sone-089-4k.mp4",
|
||||
Title: "www.98T.la@sone-089",
|
||||
Author: "4k",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed video: %v", err)
|
||||
}
|
||||
|
||||
items, total, err := cat.ListVideos(ctx, ListParams{
|
||||
Keyword: "www.98T.la@sone-089-4k.mp4",
|
||||
Page: 1,
|
||||
PageSize: 10,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("list videos: %v", err)
|
||||
}
|
||||
if total != 1 {
|
||||
t.Fatalf("total = %d, want 1", total)
|
||||
}
|
||||
if len(items) != 1 || items[0].ID != "p115-115-sone-089-4k" {
|
||||
t.Fatalf("items = %#v, want seeded video", items)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
package catalog
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestIncrementViewStoresLastViewedAt(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: "video-1",
|
||||
DriveID: "drive",
|
||||
FileID: "file-1",
|
||||
Title: "Video 1",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed video: %v", err)
|
||||
}
|
||||
|
||||
if _, err := cat.IncrementView(ctx, "video-1"); err != nil {
|
||||
t.Fatalf("increment view: %v", err)
|
||||
}
|
||||
got, err := cat.GetVideo(ctx, "video-1")
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.Views != 1 {
|
||||
t.Fatalf("views = %d, want 1", got.Views)
|
||||
}
|
||||
if got.LastViewedAt.IsZero() {
|
||||
t.Fatal("last viewed time was not stored")
|
||||
}
|
||||
}
|
||||
|
||||
func TestListVideosRecentSortUsesLastViewedAt(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*Video{
|
||||
{ID: "old-view", DriveID: "drive", FileID: "old-view", Title: "Old View", PublishedAt: now.Add(3 * time.Hour), CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "recent-view", DriveID: "drive", FileID: "recent-view", Title: "Recent View", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "unviewed", DriveID: "drive", FileID: "unviewed", Title: "Unviewed", PublishedAt: now.Add(4 * time.Hour), CreatedAt: now, UpdatedAt: now},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
if _, err := cat.db.ExecContext(ctx,
|
||||
`UPDATE videos SET last_viewed_at = CASE id
|
||||
WHEN 'old-view' THEN ?
|
||||
WHEN 'recent-view' THEN ?
|
||||
ELSE 0
|
||||
END`,
|
||||
now.Add(-time.Hour).UnixMilli(),
|
||||
now.Add(time.Hour).UnixMilli(),
|
||||
); err != nil {
|
||||
t.Fatalf("seed last_viewed_at: %v", err)
|
||||
}
|
||||
|
||||
items, _, err := cat.ListVideos(ctx, ListParams{Sort: "recent", Page: 1, PageSize: 3})
|
||||
if err != nil {
|
||||
t.Fatalf("list recent videos: %v", err)
|
||||
}
|
||||
if len(items) != 3 {
|
||||
t.Fatalf("items = %d, want 3", len(items))
|
||||
}
|
||||
got := []string{items[0].ID, items[1].ID, items[2].ID}
|
||||
want := []string{"recent-view", "old-view", "unviewed"}
|
||||
for i := range want {
|
||||
if got[i] != want[i] {
|
||||
t.Fatalf("recent order = %#v, want %#v", got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -21,13 +21,17 @@ CREATE TABLE IF NOT EXISTS videos (
|
||||
thumbnail_failures INTEGER DEFAULT 0, -- consecutive transient thumbnail generation failures
|
||||
preview_file_id TEXT, -- deprecated: 旧版回写网盘后的预览视频 file id
|
||||
preview_local TEXT, -- 本地预览视频路径(兜底)
|
||||
preview_status TEXT DEFAULT 'pending', -- pending / ready / failed
|
||||
preview_status TEXT DEFAULT 'pending', -- pending / ready / failed / disabled
|
||||
transcode_status TEXT DEFAULT '', -- '' / pending / ready / skipped / failed(浏览器兼容性转码)
|
||||
transcode_error TEXT DEFAULT '',
|
||||
transcoded_file_id TEXT DEFAULT '', -- 转码产物在同一 drive 上的 fileID,播放源优先用它
|
||||
transcoded_size INTEGER DEFAULT 0,
|
||||
views INTEGER DEFAULT 0,
|
||||
last_viewed_at INTEGER DEFAULT 0,
|
||||
favorites INTEGER DEFAULT 0,
|
||||
comments INTEGER DEFAULT 0,
|
||||
likes INTEGER DEFAULT 0,
|
||||
dislikes INTEGER DEFAULT 0,
|
||||
category TEXT,
|
||||
hidden INTEGER DEFAULT 0, -- 1 = hidden from public display
|
||||
tags_manual INTEGER DEFAULT 0, -- 1 = user explicitly curated tags
|
||||
badges TEXT, -- JSON array
|
||||
@@ -70,7 +74,7 @@ CREATE TABLE IF NOT EXISTS deleted_tags (
|
||||
deleted_at INTEGER NOT NULL
|
||||
);
|
||||
|
||||
-- 管理员显式删除过的视频。用于防止后续扫描 / spider91 爬虫把同一个源文件
|
||||
-- 管理员显式删除过的视频。用于防止后续扫描 / 爬虫把同一个源文件
|
||||
-- 再次入库;不代表原始云盘文件已被删除。
|
||||
CREATE TABLE IF NOT EXISTS deleted_videos (
|
||||
id TEXT PRIMARY KEY,
|
||||
@@ -79,6 +83,7 @@ CREATE TABLE IF NOT EXISTS deleted_videos (
|
||||
content_hash TEXT NOT NULL DEFAULT '',
|
||||
file_name TEXT NOT NULL DEFAULT '',
|
||||
size_bytes INTEGER NOT NULL DEFAULT 0,
|
||||
reason TEXT NOT NULL DEFAULT '',
|
||||
deleted_at INTEGER NOT NULL
|
||||
);
|
||||
|
||||
@@ -89,17 +94,35 @@ CREATE INDEX IF NOT EXISTS idx_deleted_videos_drive_hash
|
||||
CREATE INDEX IF NOT EXISTS idx_deleted_videos_drive_signature
|
||||
ON deleted_videos(drive_id, file_name, size_bytes);
|
||||
|
||||
-- 爬虫来源记录。用于把已确认重复的 source_id 写回 seen 列表,
|
||||
-- 避免后续爬虫反复下载同一个候选视频。
|
||||
CREATE TABLE IF NOT EXISTS crawler_seen_sources (
|
||||
kind TEXT NOT NULL,
|
||||
drive_id TEXT NOT NULL,
|
||||
source_id TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'imported', -- imported / duplicate
|
||||
canonical_video_id TEXT NOT NULL DEFAULT '',
|
||||
sampled_sha256 TEXT NOT NULL DEFAULT '',
|
||||
size_bytes INTEGER NOT NULL DEFAULT 0,
|
||||
first_seen_at INTEGER NOT NULL,
|
||||
last_seen_at INTEGER NOT NULL,
|
||||
PRIMARY KEY (kind, drive_id, source_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_crawler_seen_sources_drive
|
||||
ON crawler_seen_sources(kind, drive_id, status);
|
||||
|
||||
-- 网盘账户
|
||||
CREATE TABLE IF NOT EXISTS drives (
|
||||
id TEXT PRIMARY KEY,
|
||||
kind TEXT NOT NULL, -- quark / p115 / p123 / pikpak / wopan / onedrive / googledrive / localstorage / spider91
|
||||
kind TEXT NOT NULL, -- quark / p115 / p123 / pikpak / wopan / guangyapan / onedrive / googledrive / localstorage / scriptcrawler
|
||||
name TEXT NOT NULL,
|
||||
root_id TEXT NOT NULL DEFAULT '0',
|
||||
scan_root_id TEXT, -- deprecated: 扫描起点固定等于 root_id
|
||||
credentials TEXT, -- JSON: cookie / refresh_token 等
|
||||
status TEXT DEFAULT 'disconnected', -- disconnected / ok / error
|
||||
last_error TEXT,
|
||||
-- 是否给该盘生成预览视频/封面:1 开 / 0 关。
|
||||
-- 是否给该盘生成预览视频:1 开 / 0 关。封面生成不受影响。
|
||||
-- 替代了早期的全局 preview.enabled 设置(保留旧 setting 行不再读)。
|
||||
teaser_enabled INTEGER NOT NULL DEFAULT 1,
|
||||
-- 扫描时要跳过的目录 ID 集合(JSON array of string)。命中其中任意一个的目录及其
|
||||
|
||||
@@ -165,171 +165,3 @@ func TestRandomVideosWithReadyThumbnailsExcluding(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRandomVideosForPreferredVideoChoosesLeastPopulatedTag(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*Video{
|
||||
{ID: "current", DriveID: "drive", FileID: "f-current", Title: "current", Tags: []string{"common", "rare"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "common-1", DriveID: "drive", FileID: "f-common-1", Title: "common 1", Tags: []string{"common"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "common-2", DriveID: "drive", FileID: "f-common-2", Title: "common 2", Tags: []string{"common"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "rare-1", DriveID: "drive", FileID: "f-rare-1", Title: "rare 1", Tags: []string{"rare"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
tag, err := cat.LeastPopulatedVisibleUniqueTag(ctx, []string{"common", "rare"})
|
||||
if err != nil {
|
||||
t.Fatalf("least populated tag: %v", err)
|
||||
}
|
||||
if tag != "rare" {
|
||||
t.Fatalf("least populated tag = %q, want rare", tag)
|
||||
}
|
||||
|
||||
got, err := cat.RandomVideosForPreferredVideoExcluding(ctx, "current", []string{"current"}, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("random preferred: %v", err)
|
||||
}
|
||||
if len(got) != 1 || got[0].ID != "rare-1" {
|
||||
t.Fatalf("preferred result = %#v, want rare-1", videoIDs(got))
|
||||
}
|
||||
|
||||
got, err = cat.RandomVideosForPreferredVideoExcluding(ctx, "current", nil, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("random preferred without explicit exclude: %v", err)
|
||||
}
|
||||
if len(got) != 1 || got[0].ID == "current" {
|
||||
t.Fatalf("preferred result without explicit exclude = %#v, should not return current", videoIDs(got))
|
||||
}
|
||||
}
|
||||
|
||||
func TestRandomVideosForPreferredVideoFallsBackToFillBatch(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*Video{
|
||||
{ID: "current", DriveID: "drive", FileID: "f-current", Title: "current", Tags: []string{"common", "rare"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "common-1", DriveID: "drive", FileID: "f-common-1", Title: "common 1", Tags: []string{"common"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "common-2", DriveID: "drive", FileID: "f-common-2", Title: "common 2", Tags: []string{"common"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "rare-1", DriveID: "drive", FileID: "f-rare-1", Title: "rare 1", Tags: []string{"rare"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "hidden-rare", DriveID: "drive", FileID: "f-hidden-rare", Title: "hidden rare", Tags: []string{"rare"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
if err := cat.HideVideo(ctx, "hidden-rare"); err != nil {
|
||||
t.Fatalf("hide hidden-rare: %v", err)
|
||||
}
|
||||
|
||||
got, err := cat.RandomVideosForPreferredVideoExcluding(ctx, "current", []string{"current"}, 3)
|
||||
if err != nil {
|
||||
t.Fatalf("random preferred: %v", err)
|
||||
}
|
||||
ids := videoIDs(got)
|
||||
if len(ids) != 3 {
|
||||
t.Fatalf("result ids = %#v, want 3 items", ids)
|
||||
}
|
||||
for _, excluded := range []string{"current", "hidden-rare"} {
|
||||
if hasVideoID(ids, excluded) {
|
||||
t.Fatalf("result ids = %#v, should not include %s", ids, excluded)
|
||||
}
|
||||
}
|
||||
if !hasVideoID(ids, "rare-1") {
|
||||
t.Fatalf("result ids = %#v, want rare-1 from least populated tag", ids)
|
||||
}
|
||||
if len(uniqueVideoIDs(ids)) != len(ids) {
|
||||
t.Fatalf("result ids = %#v, want no duplicates", ids)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRandomVideosForPreferredVideoFallbacksWhenPreferenceUnavailable(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*Video{
|
||||
{ID: "untagged", DriveID: "drive", FileID: "f-untagged", Title: "untagged", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "visible-1", DriveID: "drive", FileID: "f-visible-1", Title: "visible 1", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "visible-2", DriveID: "drive", FileID: "f-visible-2", Title: "visible 2", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
got, err := cat.RandomVideosForPreferredVideoExcluding(ctx, "missing", []string{"untagged"}, 2)
|
||||
if err != nil {
|
||||
t.Fatalf("random missing preferred: %v", err)
|
||||
}
|
||||
if !sameVideoIDSet(videoIDs(got), []string{"visible-1", "visible-2"}) {
|
||||
t.Fatalf("missing preferred ids = %#v, want visible fallback videos", videoIDs(got))
|
||||
}
|
||||
|
||||
got, err = cat.RandomVideosForPreferredVideoExcluding(ctx, "untagged", []string{"untagged"}, 2)
|
||||
if err != nil {
|
||||
t.Fatalf("random untagged preferred: %v", err)
|
||||
}
|
||||
if !sameVideoIDSet(videoIDs(got), []string{"visible-1", "visible-2"}) {
|
||||
t.Fatalf("untagged preferred ids = %#v, want visible fallback videos", videoIDs(got))
|
||||
}
|
||||
}
|
||||
|
||||
func videoIDs(videos []*Video) []string {
|
||||
ids := make([]string, 0, len(videos))
|
||||
for _, v := range videos {
|
||||
ids = append(ids, v.ID)
|
||||
}
|
||||
return ids
|
||||
}
|
||||
|
||||
func hasVideoID(ids []string, want string) bool {
|
||||
for _, id := range ids {
|
||||
if id == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func uniqueVideoIDs(ids []string) map[string]struct{} {
|
||||
seen := make(map[string]struct{}, len(ids))
|
||||
for _, id := range ids {
|
||||
seen[id] = struct{}{}
|
||||
}
|
||||
return seen
|
||||
}
|
||||
|
||||
func sameVideoIDSet(a, b []string) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
seen := make(map[string]int, len(a))
|
||||
for _, value := range a {
|
||||
seen[value]++
|
||||
}
|
||||
for _, value := range b {
|
||||
if seen[value] == 0 {
|
||||
return false
|
||||
}
|
||||
seen[value]--
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
+230
-209
@@ -66,6 +66,30 @@ func (c *Catalog) migrate(ctx context.Context) error {
|
||||
if err := c.addColumnIfMissing(ctx, "videos", "thumbnail_failures", "INTEGER DEFAULT 0"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addColumnIfMissing(ctx, "videos", "last_viewed_at", "INTEGER DEFAULT 0"); err != nil {
|
||||
return err
|
||||
}
|
||||
// videos.transcode_*:浏览器兼容性转码状态。
|
||||
// status:''=未检测 / pending=已入队 / ready=已转码 / skipped=检测后无需转码 / failed=失败。
|
||||
// transcoded_file_id 指向转码产物在同一 drive 上的 fileID,播放源优先使用它。
|
||||
if err := c.addColumnIfMissing(ctx, "videos", "transcode_status", "TEXT DEFAULT ''"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addColumnIfMissing(ctx, "videos", "transcode_error", "TEXT DEFAULT ''"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addColumnIfMissing(ctx, "videos", "transcoded_file_id", "TEXT DEFAULT ''"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addColumnIfMissing(ctx, "videos", "transcoded_size", "INTEGER DEFAULT 0"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.dropColumnIfExists(ctx, "videos", "category"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.ensureBaseVideoIndexes(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
// drives.teaser_enabled:每盘预览视频开关,替代旧的全局 preview.enabled。
|
||||
// 升级路径:直接让 ALTER TABLE 的 DEFAULT 1 兜底 —— 每个现存 drive 都默认开启,
|
||||
// 不读旧的 settings.preview.enabled 字段。这样老用户即便之前关过全局开关,
|
||||
@@ -87,10 +111,14 @@ CREATE TABLE IF NOT EXISTS deleted_videos (
|
||||
content_hash TEXT NOT NULL DEFAULT '',
|
||||
file_name TEXT NOT NULL DEFAULT '',
|
||||
size_bytes INTEGER NOT NULL DEFAULT 0,
|
||||
reason TEXT NOT NULL DEFAULT '',
|
||||
deleted_at INTEGER NOT NULL
|
||||
)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addColumnIfMissing(ctx, "deleted_videos", "reason", "TEXT NOT NULL DEFAULT ''"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.syncDriveScanRootIDToRootID(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -109,6 +137,9 @@ CREATE TABLE IF NOT EXISTS deleted_videos (
|
||||
if err := c.reconcileThumbnailStatusOnce(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.requeueSkippedPreviews(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_videos_content_hash ON videos(content_hash)`); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -127,6 +158,9 @@ CREATE TABLE IF NOT EXISTS deleted_videos (
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_videos_visible_pub ON videos(COALESCE(hidden, 0), published_at DESC)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_videos_last_viewed ON videos(last_viewed_at DESC)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_videos_file_name_size ON videos(file_name, size_bytes)`); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -151,9 +185,6 @@ CREATE TABLE IF NOT EXISTS deleted_videos (
|
||||
if err := c.collapseAVCodeTags(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.createCollectionTagsFromCategories(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.classifySystemTags(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -163,7 +194,7 @@ CREATE TABLE IF NOT EXISTS deleted_videos (
|
||||
if err := c.clearRemoteP123ThumbnailsOnce(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.clearRemoteNonSpider91Thumbnails(ctx); err != nil {
|
||||
if err := c.clearRemoteThumbnails(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.hideZeroSizeVideosFromKnownDrives(ctx); err != nil {
|
||||
@@ -180,6 +211,172 @@ func (c *Catalog) addColumnIfMissing(ctx context.Context, table, column, definit
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Catalog) dropColumnIfExists(ctx context.Context, table, column string) error {
|
||||
rows, err := c.db.QueryContext(ctx, `PRAGMA table_info(`+table+`)`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer rows.Close()
|
||||
found := false
|
||||
for rows.Next() {
|
||||
var cid int
|
||||
var name, typ string
|
||||
var notNull int
|
||||
var defaultValue any
|
||||
var pk int
|
||||
if err := rows.Scan(&cid, &name, &typ, ¬Null, &defaultValue, &pk); err != nil {
|
||||
return err
|
||||
}
|
||||
if strings.EqualFold(name, column) {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
_ = rows.Close()
|
||||
return err
|
||||
}
|
||||
if err := rows.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
if !found {
|
||||
return nil
|
||||
}
|
||||
if _, err = c.db.ExecContext(ctx, `ALTER TABLE `+table+` DROP COLUMN `+column); err == nil {
|
||||
return nil
|
||||
}
|
||||
if table == "videos" && strings.EqualFold(column, "category") {
|
||||
log.Printf("[catalog] native drop column videos.category failed, rebuilding videos table without category: %v", err)
|
||||
return c.rebuildVideosTableWithoutCategory(ctx)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Catalog) ensureBaseVideoIndexes(ctx context.Context) error {
|
||||
for _, stmt := range []string{
|
||||
`CREATE INDEX IF NOT EXISTS idx_videos_drive ON videos(drive_id, file_id)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_videos_pub ON videos(published_at DESC)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_videos_views ON videos(views DESC)`,
|
||||
} {
|
||||
if _, err := c.db.ExecContext(ctx, stmt); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var currentVideoColumnNames = []string{
|
||||
"id",
|
||||
"drive_id",
|
||||
"file_id",
|
||||
"file_name",
|
||||
"content_hash",
|
||||
"sampled_sha256",
|
||||
"fingerprint_status",
|
||||
"fingerprint_error",
|
||||
"parent_id",
|
||||
"title",
|
||||
"author",
|
||||
"tags",
|
||||
"duration_seconds",
|
||||
"size_bytes",
|
||||
"ext",
|
||||
"quality",
|
||||
"thumbnail_url",
|
||||
"thumbnail_status",
|
||||
"thumbnail_failures",
|
||||
"preview_file_id",
|
||||
"preview_local",
|
||||
"preview_status",
|
||||
"transcode_status",
|
||||
"transcode_error",
|
||||
"transcoded_file_id",
|
||||
"transcoded_size",
|
||||
"views",
|
||||
"last_viewed_at",
|
||||
"favorites",
|
||||
"comments",
|
||||
"likes",
|
||||
"dislikes",
|
||||
"hidden",
|
||||
"tags_manual",
|
||||
"badges",
|
||||
"description",
|
||||
"published_at",
|
||||
"created_at",
|
||||
"updated_at",
|
||||
}
|
||||
|
||||
const createVideosWithoutCategorySQL = `
|
||||
CREATE TABLE videos_category_drop_new (
|
||||
id TEXT PRIMARY KEY,
|
||||
drive_id TEXT NOT NULL,
|
||||
file_id TEXT NOT NULL,
|
||||
file_name TEXT DEFAULT '',
|
||||
content_hash TEXT DEFAULT '',
|
||||
sampled_sha256 TEXT DEFAULT '',
|
||||
fingerprint_status TEXT DEFAULT 'pending',
|
||||
fingerprint_error TEXT DEFAULT '',
|
||||
parent_id TEXT,
|
||||
title TEXT NOT NULL,
|
||||
author TEXT,
|
||||
tags TEXT,
|
||||
duration_seconds INTEGER DEFAULT 0,
|
||||
size_bytes INTEGER DEFAULT 0,
|
||||
ext TEXT,
|
||||
quality TEXT,
|
||||
thumbnail_url TEXT,
|
||||
thumbnail_status TEXT DEFAULT 'pending',
|
||||
thumbnail_failures INTEGER DEFAULT 0,
|
||||
preview_file_id TEXT,
|
||||
preview_local TEXT,
|
||||
preview_status TEXT DEFAULT 'pending',
|
||||
transcode_status TEXT DEFAULT '',
|
||||
transcode_error TEXT DEFAULT '',
|
||||
transcoded_file_id TEXT DEFAULT '',
|
||||
transcoded_size INTEGER DEFAULT 0,
|
||||
views INTEGER DEFAULT 0,
|
||||
last_viewed_at INTEGER DEFAULT 0,
|
||||
favorites INTEGER DEFAULT 0,
|
||||
comments INTEGER DEFAULT 0,
|
||||
likes INTEGER DEFAULT 0,
|
||||
dislikes INTEGER DEFAULT 0,
|
||||
hidden INTEGER DEFAULT 0,
|
||||
tags_manual INTEGER DEFAULT 0,
|
||||
badges TEXT,
|
||||
description TEXT,
|
||||
published_at INTEGER NOT NULL,
|
||||
created_at INTEGER NOT NULL,
|
||||
updated_at INTEGER NOT NULL
|
||||
)`
|
||||
|
||||
func (c *Catalog) rebuildVideosTableWithoutCategory(ctx context.Context) error {
|
||||
tx, err := c.db.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
if _, err := tx.ExecContext(ctx, `DROP TABLE IF EXISTS videos_category_drop_new`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := tx.ExecContext(ctx, createVideosWithoutCategorySQL); err != nil {
|
||||
return err
|
||||
}
|
||||
cols := strings.Join(currentVideoColumnNames, ", ")
|
||||
if _, err := tx.ExecContext(ctx,
|
||||
`INSERT INTO videos_category_drop_new (`+cols+`) SELECT `+cols+` FROM videos`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := tx.ExecContext(ctx, `DROP TABLE videos`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := tx.ExecContext(ctx, `ALTER TABLE videos_category_drop_new RENAME TO videos`); err != nil {
|
||||
return err
|
||||
}
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
// addColumnIfMissingReportNew 与 addColumnIfMissing 同步,但额外返回 added=true 表示
|
||||
// 本次确实创建了新列(即旧 schema 缺这列),方便调用方仅在迁移路径里补做一次性
|
||||
// 数据初始化(如把全局 setting 同步到新 per-drive 字段)。
|
||||
@@ -281,6 +478,24 @@ UPDATE videos
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) requeueSkippedPreviews(ctx context.Context) error {
|
||||
res, err := c.db.ExecContext(ctx, `
|
||||
UPDATE videos
|
||||
SET preview_file_id = '',
|
||||
preview_local = '',
|
||||
preview_status = 'pending',
|
||||
updated_at = ?
|
||||
WHERE COALESCE(preview_status, 'pending') = 'skipped'
|
||||
`, time.Now().UnixMilli())
|
||||
if err != nil {
|
||||
return fmt.Errorf("requeue skipped previews: %w", err)
|
||||
}
|
||||
if affected, err := res.RowsAffected(); err == nil && affected > 0 {
|
||||
log.Printf("[catalog] requeued %d skipped preview(s) for generation", affected)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) clearVolatileOneDriveThumbnails(ctx context.Context) error {
|
||||
// 把 OneDrive 过期的 mediap.svc.ms thumb URL 清空,让 worker 重新抽帧生成本地封面。
|
||||
// 同步把 thumbnail_status 重置为 'pending':清空后 url 是空的,本应进 worker 重做,
|
||||
@@ -297,7 +512,7 @@ UPDATE videos
|
||||
}
|
||||
|
||||
func (c *Catalog) clearRemoteP123ThumbnailsOnce(ctx context.Context) error {
|
||||
// 123 云盘列表返回的缩略图尺寸和稳定性都不适合作为站内封面;清空历史写入的
|
||||
// 123网盘列表返回的缩略图尺寸和稳定性都不适合作为站内封面;清空历史写入的
|
||||
// 远程 URL,让封面 worker 统一从视频直链抽帧生成本地 /p/thumb/<id>。
|
||||
const markerKey = "videos.p123.remote_thumbnails_cleared"
|
||||
marker, err := c.GetSetting(ctx, markerKey, "")
|
||||
@@ -345,10 +560,9 @@ func (c *Catalog) clearRemoteP123ThumbnailsOnce(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) clearRemoteNonSpider91Thumbnails(ctx context.Context) error {
|
||||
// 非 91Spider 视频不再使用网盘侧返回的远程缩略图。清空历史 http/https
|
||||
// thumbnail_url 后,封面 worker 会重新从视频中间帧生成本地 /p/thumb/<id>。
|
||||
// 91Spider 的封面是爬虫下载后保存到本地 /p/thumb/<id>,不受这条规则影响。
|
||||
func (c *Catalog) clearRemoteThumbnails(ctx context.Context) error {
|
||||
// 不再使用网盘侧返回的远程缩略图。清空历史 http/https thumbnail_url 后,
|
||||
// 封面 worker 会重新从视频中间帧生成本地 /p/thumb/<id>。
|
||||
res, err := c.db.ExecContext(ctx, `
|
||||
UPDATE videos
|
||||
SET thumbnail_url = '',
|
||||
@@ -359,18 +573,12 @@ UPDATE videos
|
||||
lower(COALESCE(thumbnail_url, '')) LIKE 'http://%'
|
||||
OR lower(COALESCE(thumbnail_url, '')) LIKE 'https://%'
|
||||
)
|
||||
AND NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM drives
|
||||
WHERE drives.id = videos.drive_id
|
||||
AND drives.kind = 'spider91'
|
||||
)
|
||||
`, time.Now().UnixMilli())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if affected, err := res.RowsAffected(); err == nil && affected > 0 {
|
||||
log.Printf("[catalog] cleared %d remote non-91Spider thumbnail(s) for local regeneration", affected)
|
||||
log.Printf("[catalog] cleared %d remote thumbnail(s) for local regeneration", affected)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -458,61 +666,6 @@ WHERE COALESCE(tags, '') NOT IN ('', '[]', 'null')
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) createCollectionTagsFromCategories(ctx context.Context) error {
|
||||
rows, err := c.db.QueryContext(ctx, `
|
||||
SELECT category, COUNT(*) FROM videos
|
||||
WHERE COALESCE(category, '') != ''
|
||||
GROUP BY category`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
type categoryStat struct {
|
||||
category string
|
||||
count int
|
||||
}
|
||||
var categories []categoryStat
|
||||
for rows.Next() {
|
||||
var stat categoryStat
|
||||
if err := rows.Scan(&stat.category, &stat.count); err != nil {
|
||||
return err
|
||||
}
|
||||
categories = append(categories, stat)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := rows.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, stat := range categories {
|
||||
if isAVCodePollutedLabel(stat.category) {
|
||||
if _, err := c.ensureTag(ctx, avTagLabel, fixedtags.AliasesFor(avTagLabel), "system"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addTagToVideosByCategory(ctx, stat.category, avTagLabel, "auto"); err != nil {
|
||||
return err
|
||||
}
|
||||
continue
|
||||
}
|
||||
if stat.count < 3 {
|
||||
continue
|
||||
}
|
||||
if !LooksLikeCollectionTag(stat.category) {
|
||||
continue
|
||||
}
|
||||
if c.tagDeleted(ctx, stat.category) {
|
||||
continue
|
||||
}
|
||||
if _, err := c.ensureTag(ctx, stat.category, nil, "collection"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addCollectionTagToVideos(ctx, stat.category); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) CreateTagAndClassify(ctx context.Context, label string, aliases []string, source string) (int, error) {
|
||||
tag, err := c.ensureTag(ctx, label, aliases, source)
|
||||
if err != nil {
|
||||
@@ -806,41 +959,6 @@ func (c *Catalog) MatchTags(ctx context.Context, text string) ([]string, error)
|
||||
return sortLabelsByTagOrder(tags, uniqueStrings(out)), nil
|
||||
}
|
||||
|
||||
func (c *Catalog) EnsureCollectionTag(ctx context.Context, label string) (string, bool, error) {
|
||||
label = cleanTagLabel(label)
|
||||
if isAVCodePollutedLabel(label) {
|
||||
if _, err := c.ensureTag(ctx, avTagLabel, fixedtags.AliasesFor(avTagLabel), "system"); err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
if err := c.addTagToVideosByCategory(ctx, label, avTagLabel, "auto"); err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
return avTagLabel, true, nil
|
||||
}
|
||||
if !LooksLikeCollectionTag(label) {
|
||||
return "", false, nil
|
||||
}
|
||||
if c.tagDeleted(ctx, label) {
|
||||
return "", false, nil
|
||||
}
|
||||
if !c.tagExists(ctx, label) {
|
||||
count, err := c.categoryVideoCount(ctx, label)
|
||||
if err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
if count < 2 {
|
||||
return "", false, nil
|
||||
}
|
||||
}
|
||||
if _, err := c.ensureTag(ctx, label, nil, "collection"); err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
if err := c.addCollectionTagToVideos(ctx, label); err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
return label, true, nil
|
||||
}
|
||||
|
||||
func (c *Catalog) ensureTag(ctx context.Context, label string, aliases []string, source string) (Tag, error) {
|
||||
label = cleanTagLabel(label)
|
||||
if label == "" {
|
||||
@@ -893,7 +1011,7 @@ func (c *Catalog) classifyTag(ctx context.Context, tag Tag) (int, error) {
|
||||
return 0, err
|
||||
}
|
||||
rows, err := c.db.QueryContext(ctx, `
|
||||
SELECT id, title, COALESCE(author, ''), COALESCE(category, ''), COALESCE(tags_manual, 0)
|
||||
SELECT id, title, COALESCE(author, ''), COALESCE(tags_manual, 0)
|
||||
FROM videos`)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
@@ -902,15 +1020,15 @@ FROM videos`)
|
||||
|
||||
classified := 0
|
||||
for rows.Next() {
|
||||
var videoID, title, author, category string
|
||||
var videoID, title, author string
|
||||
var manual int
|
||||
if err := rows.Scan(&videoID, &title, &author, &category, &manual); err != nil {
|
||||
if err := rows.Scan(&videoID, &title, &author, &manual); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if manual == 1 {
|
||||
continue
|
||||
}
|
||||
matcher := normalizeTagText(title + " " + author + " " + category)
|
||||
matcher := normalizeTagText(title + " " + author)
|
||||
if !matcher.contains(tag.Label) {
|
||||
matchedAlias := false
|
||||
for _, alias := range tag.Aliases {
|
||||
@@ -1042,54 +1160,6 @@ func (c *Catalog) insertVideoTag(ctx context.Context, videoID string, tagID int6
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Catalog) addCollectionTagToVideos(ctx context.Context, category string) error {
|
||||
return c.addTagToVideosByCategory(ctx, category, category, "auto")
|
||||
}
|
||||
|
||||
func (c *Catalog) addTagToVideosByCategory(ctx context.Context, category, label, source string) error {
|
||||
tag, err := c.getTagByLabel(ctx, label)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
rows, err := c.db.QueryContext(ctx, `
|
||||
SELECT v.id
|
||||
FROM videos v
|
||||
WHERE v.category = ?
|
||||
AND COALESCE(v.tags_manual, 0) = 0
|
||||
AND NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM video_tags vt
|
||||
WHERE vt.video_id = v.id
|
||||
AND vt.tag_id = ?
|
||||
)`, category, tag.ID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var videoIDs []string
|
||||
for rows.Next() {
|
||||
var videoID string
|
||||
if err := rows.Scan(&videoID); err != nil {
|
||||
return err
|
||||
}
|
||||
videoIDs = append(videoIDs, videoID)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := rows.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, videoID := range videoIDs {
|
||||
if err := c.insertVideoTag(ctx, videoID, tag.ID, source); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.syncVideoTagsJSON(ctx, videoID, false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) collapseAVCodeTags(ctx context.Context) error {
|
||||
if _, err := c.ensureTag(ctx, avTagLabel, fixedtags.AliasesFor(avTagLabel), "system"); err != nil {
|
||||
return err
|
||||
@@ -1279,12 +1349,6 @@ func (c *Catalog) restoreDeletedTag(ctx context.Context, label string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Catalog) categoryVideoCount(ctx context.Context, category string) (int, error) {
|
||||
var count int
|
||||
err := c.db.QueryRowContext(ctx, `SELECT COUNT(*) FROM videos WHERE category = ?`, category).Scan(&count)
|
||||
return count, err
|
||||
}
|
||||
|
||||
func (c *Catalog) getTagByLabelTx(ctx context.Context, tx *sql.Tx, label string) (Tag, error) {
|
||||
row := tx.QueryRowContext(ctx,
|
||||
`SELECT id, label, aliases, source, 0 FROM tags WHERE label = ? COLLATE NOCASE`,
|
||||
@@ -1434,46 +1498,6 @@ func isShortASCIIWord(s string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func LooksLikeCollectionTag(label string) bool {
|
||||
label = cleanTagLabel(label)
|
||||
if label == "" {
|
||||
return false
|
||||
}
|
||||
if isAVCodePollutedLabel(label) {
|
||||
return false
|
||||
}
|
||||
runes := []rune(label)
|
||||
if len(runes) < 2 || len(runes) > 24 {
|
||||
return false
|
||||
}
|
||||
lower := strings.ToLower(label)
|
||||
blocked := map[string]bool{
|
||||
"v": true, "pv": true, "my pack": true, "my upload": true,
|
||||
"视频": true, "视频1": true, "第一直播": true, "男人必备": true,
|
||||
"瑟女聚集地": true, "成人色游": true, "ai女友": true,
|
||||
}
|
||||
if blocked[lower] {
|
||||
return false
|
||||
}
|
||||
hasLetter := false
|
||||
for _, r := range label {
|
||||
if unicode.IsLetter(r) {
|
||||
hasLetter = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasLetter {
|
||||
return false
|
||||
}
|
||||
for _, r := range label {
|
||||
switch r {
|
||||
case ',', '。', '!', '?', ';', '、', ':', '~', '~':
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func IsAVCode(label string) bool {
|
||||
label = cleanTagLabel(label)
|
||||
if label == "" {
|
||||
@@ -1555,9 +1579,7 @@ func sortLabelsByTagOrder(tags []Tag, labels []string) []string {
|
||||
return labels
|
||||
}
|
||||
|
||||
// pruneOrphanCollectionTags 删除所有 source='collection' 且不再被任何 video_tags 引用的标签。
|
||||
// 在 migrate 末尾调用,相当于启动时自愈:之前 DeleteVideo 没顺带清理留下的孤儿,会在重启时被收回。
|
||||
// 只动 collection:system 是固定标签需保留;user 是管理员手动建的;auto/legacy 默认有视频在引用。
|
||||
// pruneOrphanCollectionTags 删除旧版本生成的 source='collection' 孤儿标签。
|
||||
func (c *Catalog) pruneOrphanCollectionTags(ctx context.Context) error {
|
||||
_, err := c.db.ExecContext(ctx, `
|
||||
DELETE FROM tags
|
||||
@@ -1566,8 +1588,7 @@ DELETE FROM tags
|
||||
return err
|
||||
}
|
||||
|
||||
// pruneOrphanCollectionTagsByID 在事务里检查一组候选 tag_id,删除其中
|
||||
// source='collection' 且已经没有视频引用的标签。供 DeleteVideo 调用。
|
||||
// pruneOrphanCollectionTagsByID 在事务里检查并删除旧版本生成的孤儿 collection 标签。
|
||||
func pruneOrphanCollectionTagsByID(ctx context.Context, tx *sql.Tx, tagIDs []int64) error {
|
||||
for _, tagID := range tagIDs {
|
||||
var src string
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
@@ -136,7 +137,6 @@ func TestCreateTagAndClassifyAddsTagToMatchingExistingVideos(t *testing.T) {
|
||||
DriveID: "drive",
|
||||
FileID: "file-1",
|
||||
Title: "清纯短发合集",
|
||||
Category: "普通目录",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -148,7 +148,6 @@ func TestCreateTagAndClassifyAddsTagToMatchingExistingVideos(t *testing.T) {
|
||||
DriveID: "drive",
|
||||
FileID: "file-2",
|
||||
Title: "普通标题",
|
||||
Category: "普通目录",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -232,52 +231,6 @@ func TestDeleteTagRemovesTagFromVideos(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteTagSuppressesAutomaticCollectionRecreation(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
for _, id := range []string{"video-1", "video-2"} {
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: id,
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: "合集视频",
|
||||
Category: "sunny",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed video %s: %v", id, err)
|
||||
}
|
||||
}
|
||||
|
||||
if label, ok, err := cat.EnsureCollectionTag(ctx, "sunny"); err != nil || !ok || label != "sunny" {
|
||||
t.Fatalf("ensure collection = %q, %v, %v; want sunny true nil", label, ok, err)
|
||||
}
|
||||
tag := mustTagByLabel(t, ctx, cat, "sunny")
|
||||
if _, err := cat.DeleteTag(ctx, tag.ID); err != nil {
|
||||
t.Fatalf("delete tag: %v", err)
|
||||
}
|
||||
|
||||
if label, ok, err := cat.EnsureCollectionTag(ctx, "sunny"); err != nil || ok || label != "" {
|
||||
t.Fatalf("ensure deleted collection = %q, %v, %v; want empty false nil", label, ok, err)
|
||||
}
|
||||
for _, tag := range mustListTags(t, ctx, cat) {
|
||||
if tag.Label == "sunny" {
|
||||
t.Fatal("deleted collection tag was recreated automatically")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateTagAndClassifyRestoresDeletedTag(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
@@ -343,13 +296,13 @@ func TestEnsureTagForVideoIDPrefixBackfillsSourceTag(t *testing.T) {
|
||||
id string
|
||||
manual bool
|
||||
}{
|
||||
{id: "spider91-91-spider-1200001"},
|
||||
{id: "spider91-91-spider-1200002", manual: true},
|
||||
{id: "spider91-other-1200003"},
|
||||
{id: "scriptcrawler-crawler-a-source001"},
|
||||
{id: "scriptcrawler-crawler-a-source002", manual: true},
|
||||
{id: "scriptcrawler-other-source003"},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: seed.id,
|
||||
DriveID: "91-spider",
|
||||
DriveID: "crawler-a",
|
||||
FileID: seed.id + ".mp4",
|
||||
Title: "legacy title without source text",
|
||||
PublishedAt: now,
|
||||
@@ -365,28 +318,28 @@ func TestEnsureTagForVideoIDPrefixBackfillsSourceTag(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
added, err := cat.EnsureTagForVideoIDPrefix(ctx, "spider91-91-spider-", "91porn", nil, "system")
|
||||
added, err := cat.EnsureTagForVideoIDPrefix(ctx, "scriptcrawler-crawler-a-", "crawler-tag", nil, "system")
|
||||
if err != nil {
|
||||
t.Fatalf("ensure prefix tag: %v", err)
|
||||
}
|
||||
if added != 1 {
|
||||
t.Fatalf("added = %d, want 1", added)
|
||||
}
|
||||
got, err := cat.GetVideo(ctx, "spider91-91-spider-1200001")
|
||||
got, err := cat.GetVideo(ctx, "scriptcrawler-crawler-a-source001")
|
||||
if err != nil {
|
||||
t.Fatalf("get tagged video: %v", err)
|
||||
}
|
||||
if !sameStrings(got.Tags, []string{"91porn"}) {
|
||||
t.Fatalf("tagged video tags = %#v, want 91porn", got.Tags)
|
||||
if !sameStrings(got.Tags, []string{"crawler-tag"}) {
|
||||
t.Fatalf("tagged video tags = %#v, want crawler-tag", got.Tags)
|
||||
}
|
||||
manual, err := cat.GetVideo(ctx, "spider91-91-spider-1200002")
|
||||
manual, err := cat.GetVideo(ctx, "scriptcrawler-crawler-a-source002")
|
||||
if err != nil {
|
||||
t.Fatalf("get manual video: %v", err)
|
||||
}
|
||||
if len(manual.Tags) != 0 {
|
||||
t.Fatalf("manual video tags = %#v, want unchanged", manual.Tags)
|
||||
}
|
||||
other, err := cat.GetVideo(ctx, "spider91-other-1200003")
|
||||
other, err := cat.GetVideo(ctx, "scriptcrawler-other-source003")
|
||||
if err != nil {
|
||||
t.Fatalf("get other prefix video: %v", err)
|
||||
}
|
||||
@@ -486,7 +439,6 @@ func TestMigrateDoesNotRewriteAlreadySyncedVideoTags(t *testing.T) {
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: "巨乳后入合集",
|
||||
Category: "Better Call Saul S03",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -585,6 +537,25 @@ CREATE TABLE videos (
|
||||
)`); err != nil {
|
||||
t.Fatalf("create legacy videos table: %v", err)
|
||||
}
|
||||
nowMillis := time.Now().UnixMilli()
|
||||
if _, err := db.Exec(`
|
||||
INSERT INTO videos (
|
||||
id, drive_id, file_id, content_hash, parent_id, title, author, tags,
|
||||
duration_seconds, size_bytes, ext, quality, thumbnail_url, preview_file_id,
|
||||
preview_local, preview_status, views, favorites, comments, likes, dislikes,
|
||||
category, hidden, tags_manual, badges, description, published_at, created_at, updated_at
|
||||
) VALUES (
|
||||
'legacy-video', 'drive', 'file-legacy', 'hash-legacy', 'parent-1', 'Legacy Video', 'Legacy Author', '["旧标签"]',
|
||||
180, 1024, 'mp4', 'HD', '/thumb.jpg', 'preview-file',
|
||||
'/preview.mp4', 'ready', 7, 1, 2, 3, 4,
|
||||
'legacy-category', 0, 0, '["精选"]', 'legacy description', ?, ?, ?
|
||||
)`,
|
||||
nowMillis, nowMillis, nowMillis); err != nil {
|
||||
t.Fatalf("insert legacy video: %v", err)
|
||||
}
|
||||
if _, err := db.Exec(`CREATE INDEX idx_legacy_videos_category ON videos(category)`); err != nil {
|
||||
t.Fatalf("create legacy category index: %v", err)
|
||||
}
|
||||
if err := db.Close(); err != nil {
|
||||
t.Fatalf("close raw db: %v", err)
|
||||
}
|
||||
@@ -603,6 +574,45 @@ CREATE TABLE videos (
|
||||
if err := cat.db.QueryRow(`SELECT COALESCE(file_name, '') FROM videos LIMIT 1`).Scan(&fileNameDefault); err != nil && err != sql.ErrNoRows {
|
||||
t.Fatalf("query migrated file_name column: %v", err)
|
||||
}
|
||||
if fileNameDefault != "" {
|
||||
t.Fatalf("file_name default = %q, want empty", fileNameDefault)
|
||||
}
|
||||
if hasColumn(t, cat, "videos", "category") {
|
||||
t.Fatal("legacy category column was not dropped")
|
||||
}
|
||||
if indexExists(t, cat, "idx_legacy_videos_category") {
|
||||
t.Fatal("legacy category index was not dropped")
|
||||
}
|
||||
for _, index := range []string{"idx_videos_drive", "idx_videos_pub", "idx_videos_views"} {
|
||||
if !indexExists(t, cat, index) {
|
||||
t.Fatalf("base video index %s was not recreated", index)
|
||||
}
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
got, err := cat.GetVideo(ctx, "legacy-video")
|
||||
if err != nil {
|
||||
t.Fatalf("get migrated legacy video: %v", err)
|
||||
}
|
||||
if got.Title != "Legacy Video" || got.Author != "Legacy Author" || got.Views != 7 {
|
||||
t.Fatalf("migrated video lost data: %#v", got)
|
||||
}
|
||||
if !sameStrings(got.Tags, []string{"旧标签"}) {
|
||||
t.Fatalf("migrated video tags = %#v, want legacy tag preserved", got.Tags)
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: "new-video",
|
||||
DriveID: "drive",
|
||||
FileID: "file-new",
|
||||
Title: "New Video",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert after migration: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetManualVideoTagsRejectsUnknownLabels(t *testing.T) {
|
||||
@@ -706,31 +716,6 @@ func TestCreateTagAndClassifyMapsAVCodeLabelToAV(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestLooksLikeCollectionTagRejectsAVCodes(t *testing.T) {
|
||||
cases := []string{
|
||||
"DASS-499-C",
|
||||
"dass-499-c",
|
||||
"ADN-778",
|
||||
"SONE-247-C",
|
||||
"JUQ-502-UC",
|
||||
"ABF-032",
|
||||
"SSIS-233",
|
||||
"MIDA-607",
|
||||
"cc-1750027",
|
||||
"FC2-PPV-74663555",
|
||||
"ADN-778-FHD(1)",
|
||||
"ADN-778-中文字幕",
|
||||
"[44x.me]idbd-786",
|
||||
"NTRH-018_FHD_CH",
|
||||
"390JAC-233",
|
||||
}
|
||||
for _, label := range cases {
|
||||
if LooksLikeCollectionTag(label) {
|
||||
t.Fatalf("LooksLikeCollectionTag(%q) = true, want false", label)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigrateCollapsesAVCodeTagsIntoAV(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
@@ -759,7 +744,6 @@ func TestMigrateCollapsesAVCodeTagsIntoAV(t *testing.T) {
|
||||
FileID: seed.id,
|
||||
Title: seed.label + " sample",
|
||||
Tags: []string{seed.label},
|
||||
Category: seed.label,
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -804,7 +788,7 @@ func TestMigrateCollapsesAVCodeTagsIntoAV(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigrateClearsRemoteNonSpiderThumbnailURLs(t *testing.T) {
|
||||
func TestMigrateClearsRemoteThumbnailURLs(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -848,14 +832,14 @@ func TestMigrateClearsRemoteNonSpiderThumbnailURLs(t *testing.T) {
|
||||
t.Fatalf("seed pikpak: %v", err)
|
||||
}
|
||||
if err := cat.UpsertDrive(ctx, &Drive{
|
||||
ID: "spider91-main",
|
||||
Kind: "spider91",
|
||||
Name: "91Spider",
|
||||
RootID: "root",
|
||||
ID: "crawler-main",
|
||||
Kind: "scriptcrawler",
|
||||
Name: "Crawler",
|
||||
RootID: "/",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed spider91: %v", err)
|
||||
t.Fatalf("seed crawler: %v", err)
|
||||
}
|
||||
|
||||
videos := []*Video{
|
||||
@@ -895,11 +879,18 @@ func TestMigrateClearsRemoteNonSpiderThumbnailURLs(t *testing.T) {
|
||||
ThumbnailURL: "/p/thumb/p123-local-thumb-video",
|
||||
},
|
||||
{
|
||||
ID: "spider91-local-thumb-video",
|
||||
DriveID: "spider91-main",
|
||||
ID: "scriptcrawler-crawler-main-local-thumb",
|
||||
DriveID: "crawler-main",
|
||||
FileID: "file-6",
|
||||
Title: "91Spider local thumb",
|
||||
ThumbnailURL: "/p/thumb/spider91-local-thumb-video",
|
||||
Title: "Crawler local thumb",
|
||||
ThumbnailURL: "/p/thumb/scriptcrawler-crawler-main-local-thumb",
|
||||
},
|
||||
{
|
||||
ID: "scriptcrawler-crawler-main-remote-thumb",
|
||||
DriveID: "crawler-main",
|
||||
FileID: "file-7",
|
||||
Title: "Crawler remote thumb",
|
||||
ThumbnailURL: "https://example.invalid/crawler-thumb.jpg",
|
||||
},
|
||||
}
|
||||
for _, v := range videos {
|
||||
@@ -962,12 +953,20 @@ func TestMigrateClearsRemoteNonSpiderThumbnailURLs(t *testing.T) {
|
||||
t.Fatalf("p123 local thumbnail = %q, want preserved", p123Local.ThumbnailURL)
|
||||
}
|
||||
|
||||
spider91Local, err := cat.GetVideo(ctx, "spider91-local-thumb-video")
|
||||
crawlerLocal, err := cat.GetVideo(ctx, "scriptcrawler-crawler-main-local-thumb")
|
||||
if err != nil {
|
||||
t.Fatalf("get spider91 local thumb video: %v", err)
|
||||
t.Fatalf("get crawler local thumb video: %v", err)
|
||||
}
|
||||
if spider91Local.ThumbnailURL != "/p/thumb/spider91-local-thumb-video" {
|
||||
t.Fatalf("spider91 local thumbnail = %q, want preserved", spider91Local.ThumbnailURL)
|
||||
if crawlerLocal.ThumbnailURL != "/p/thumb/scriptcrawler-crawler-main-local-thumb" {
|
||||
t.Fatalf("crawler local thumbnail = %q, want preserved", crawlerLocal.ThumbnailURL)
|
||||
}
|
||||
|
||||
crawlerRemote, err := cat.GetVideo(ctx, "scriptcrawler-crawler-main-remote-thumb")
|
||||
if err != nil {
|
||||
t.Fatalf("get crawler remote thumb video: %v", err)
|
||||
}
|
||||
if crawlerRemote.ThumbnailURL != "" {
|
||||
t.Fatalf("crawler remote thumbnail = %q, want cleared", crawlerRemote.ThumbnailURL)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1113,33 +1112,33 @@ func TestTagFilterMatchesCanonicalDuplicateVideo(t *testing.T) {
|
||||
UpdatedAt: now,
|
||||
},
|
||||
{
|
||||
ID: "spider91-dup-1",
|
||||
DriveID: "91-spider",
|
||||
ID: "scriptcrawler-crawler-a-dup-1",
|
||||
DriveID: "crawler-a",
|
||||
FileID: "dup-1.mp4",
|
||||
Title: "Spider duplicate 1",
|
||||
Tags: []string{"91porn"},
|
||||
Title: "Crawler duplicate 1",
|
||||
Tags: []string{"crawler-tag"},
|
||||
Size: 1024,
|
||||
PublishedAt: now.Add(time.Second),
|
||||
CreatedAt: now.Add(time.Second),
|
||||
UpdatedAt: now.Add(time.Second),
|
||||
},
|
||||
{
|
||||
ID: "spider91-dup-2",
|
||||
DriveID: "91-spider",
|
||||
ID: "scriptcrawler-crawler-a-dup-2",
|
||||
DriveID: "crawler-a",
|
||||
FileID: "dup-2.mp4",
|
||||
Title: "Spider duplicate 2",
|
||||
Tags: []string{"91porn"},
|
||||
Title: "Crawler duplicate 2",
|
||||
Tags: []string{"crawler-tag"},
|
||||
Size: 1024,
|
||||
PublishedAt: now.Add(2 * time.Second),
|
||||
CreatedAt: now.Add(2 * time.Second),
|
||||
UpdatedAt: now.Add(2 * time.Second),
|
||||
},
|
||||
{
|
||||
ID: "spider91-visible",
|
||||
DriveID: "91-spider",
|
||||
ID: "scriptcrawler-crawler-a-visible",
|
||||
DriveID: "crawler-a",
|
||||
FileID: "visible.mp4",
|
||||
Title: "Spider visible",
|
||||
Tags: []string{"91porn"},
|
||||
Title: "Crawler visible",
|
||||
Tags: []string{"crawler-tag"},
|
||||
Size: 2048,
|
||||
PublishedAt: now.Add(3 * time.Second),
|
||||
CreatedAt: now.Add(3 * time.Second),
|
||||
@@ -1150,16 +1149,16 @@ func TestTagFilterMatchesCanonicalDuplicateVideo(t *testing.T) {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
for _, id := range []string{"pikpak-canonical", "spider91-dup-1", "spider91-dup-2"} {
|
||||
for _, id := range []string{"pikpak-canonical", "scriptcrawler-crawler-a-dup-1", "scriptcrawler-crawler-a-dup-2"} {
|
||||
if err := cat.UpdateVideoFingerprint(ctx, id, "same-sampled-sha256", "ready", ""); err != nil {
|
||||
t.Fatalf("fingerprint %s: %v", id, err)
|
||||
}
|
||||
}
|
||||
if err := cat.UpdateVideoFingerprint(ctx, "spider91-visible", "unique-sampled-sha256", "ready", ""); err != nil {
|
||||
if err := cat.UpdateVideoFingerprint(ctx, "scriptcrawler-crawler-a-visible", "unique-sampled-sha256", "ready", ""); err != nil {
|
||||
t.Fatalf("fingerprint visible: %v", err)
|
||||
}
|
||||
|
||||
items, total, err := cat.ListVideos(ctx, ListParams{Tag: "91porn", Page: 1, PageSize: 10})
|
||||
items, total, err := cat.ListVideos(ctx, ListParams{Tag: "crawler-tag", Page: 1, PageSize: 10})
|
||||
if err != nil {
|
||||
t.Fatalf("list videos by tag: %v", err)
|
||||
}
|
||||
@@ -1170,13 +1169,13 @@ func TestTagFilterMatchesCanonicalDuplicateVideo(t *testing.T) {
|
||||
for _, item := range items {
|
||||
gotIDs[item.ID] = true
|
||||
}
|
||||
for _, want := range []string{"pikpak-canonical", "spider91-visible"} {
|
||||
for _, want := range []string{"pikpak-canonical", "scriptcrawler-crawler-a-visible"} {
|
||||
if !gotIDs[want] {
|
||||
t.Fatalf("tagged video ids = %#v, want %s", gotIDs, want)
|
||||
}
|
||||
}
|
||||
if got := mustTagByLabel(t, ctx, cat, "91porn").Count; got != 2 {
|
||||
t.Fatalf("91porn count = %d, want 2 visible canonical videos", got)
|
||||
if got := mustTagByLabel(t, ctx, cat, "crawler-tag").Count; got != 2 {
|
||||
t.Fatalf("crawler-tag count = %d, want 2 visible canonical videos", got)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1265,6 +1264,41 @@ func mustTagByLabel(t *testing.T, ctx context.Context, cat *Catalog, label strin
|
||||
return Tag{}
|
||||
}
|
||||
|
||||
func hasColumn(t *testing.T, cat *Catalog, table, column string) bool {
|
||||
t.Helper()
|
||||
rows, err := cat.db.Query(`PRAGMA table_info(` + table + `)`)
|
||||
if err != nil {
|
||||
t.Fatalf("query table info for %s: %v", table, err)
|
||||
}
|
||||
defer rows.Close()
|
||||
for rows.Next() {
|
||||
var cid int
|
||||
var name, typ string
|
||||
var notNull int
|
||||
var defaultValue any
|
||||
var pk int
|
||||
if err := rows.Scan(&cid, &name, &typ, ¬Null, &defaultValue, &pk); err != nil {
|
||||
t.Fatalf("scan table info for %s: %v", table, err)
|
||||
}
|
||||
if strings.EqualFold(name, column) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
t.Fatalf("iterate table info for %s: %v", table, err)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func indexExists(t *testing.T, cat *Catalog, name string) bool {
|
||||
t.Helper()
|
||||
var count int
|
||||
if err := cat.db.QueryRow(`SELECT COUNT(*) FROM sqlite_schema WHERE type = 'index' AND name = ?`, name).Scan(&count); err != nil {
|
||||
t.Fatalf("query index %s: %v", name, err)
|
||||
}
|
||||
return count > 0
|
||||
}
|
||||
|
||||
func videoUpdatedAtByID(t *testing.T, ctx context.Context, cat *Catalog, ids ...string) map[string]int64 {
|
||||
t.Helper()
|
||||
out := make(map[string]int64, len(ids))
|
||||
@@ -1278,9 +1312,9 @@ func videoUpdatedAtByID(t *testing.T, ctx context.Context, cat *Catalog, ids ...
|
||||
return out
|
||||
}
|
||||
|
||||
// 删除 collection 标签的最后一个引用视频后,标签应当自动从 tags 表里消失。
|
||||
// 删除旧版本 collection 标签的最后一个引用视频后,标签应当自动从 tags 表里消失。
|
||||
// user/system 标签不受影响:用户/系统标签的语义由人维护,孤儿状态保留。
|
||||
func TestDeleteVideoPrunesOrphanCollectionTag(t *testing.T) {
|
||||
func TestDeleteVideoPrunesLegacyOrphanCollectionTag(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -1299,7 +1333,6 @@ func TestDeleteVideoPrunesOrphanCollectionTag(t *testing.T) {
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: id,
|
||||
Category: "Better Call Saul S02",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -1308,20 +1341,28 @@ func TestDeleteVideoPrunesOrphanCollectionTag(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
label, ok, err := cat.EnsureCollectionTag(ctx, "Better Call Saul S02")
|
||||
if err != nil {
|
||||
t.Fatalf("ensure collection tag: %v", err)
|
||||
nowMillis := now.UnixMilli()
|
||||
if _, err := cat.db.ExecContext(ctx,
|
||||
`INSERT INTO tags (label, aliases, source, created_at, updated_at) VALUES (?, '[]', 'collection', ?, ?)`,
|
||||
"Better Call Saul S02", nowMillis, nowMillis); err != nil {
|
||||
t.Fatalf("insert legacy collection tag: %v", err)
|
||||
}
|
||||
if !ok || label != "Better Call Saul S02" {
|
||||
t.Fatalf("ensure collection tag = %q ok=%v, want collection tag created", label, ok)
|
||||
var collectionTagID int64
|
||||
if err := cat.db.QueryRowContext(ctx, `SELECT id FROM tags WHERE label = ?`, "Better Call Saul S02").Scan(&collectionTagID); err != nil {
|
||||
t.Fatalf("lookup legacy collection tag: %v", err)
|
||||
}
|
||||
for _, id := range []string{"video-a", "video-b"} {
|
||||
if _, err := cat.db.ExecContext(ctx,
|
||||
`INSERT INTO video_tags (video_id, tag_id, source, created_at) VALUES (?, ?, 'auto', ?)`,
|
||||
id, collectionTagID, nowMillis); err != nil {
|
||||
t.Fatalf("attach legacy collection tag to %s: %v", id, err)
|
||||
}
|
||||
}
|
||||
|
||||
// 用户标签:手动建出来,让它和 video-a 关联,验证 user 标签不会被孤儿清理流程误删。
|
||||
if _, err := cat.CreateTagAndClassify(ctx, "用户标签", nil, "user"); err != nil {
|
||||
t.Fatalf("create user tag: %v", err)
|
||||
}
|
||||
if err := cat.SetManualVideoTags(ctx, "video-a", []string{"用户标签"}); err != nil {
|
||||
t.Fatalf("attach user tag: %v", err)
|
||||
if _, err := cat.db.ExecContext(ctx,
|
||||
`INSERT INTO tags (label, aliases, source, created_at, updated_at) VALUES (?, '[]', 'user', ?, ?)`,
|
||||
"用户标签", nowMillis, nowMillis); err != nil {
|
||||
t.Fatalf("insert user orphan tag: %v", err)
|
||||
}
|
||||
|
||||
collectionExists := func() bool {
|
||||
@@ -1337,7 +1378,7 @@ func TestDeleteVideoPrunesOrphanCollectionTag(t *testing.T) {
|
||||
t.Fatal("collection tag missing right after creation")
|
||||
}
|
||||
|
||||
// 删第一个视频:还有 video-b 在引用 collection 标签,应保留。
|
||||
// 删第一个视频:还有 video-b 在引用旧 collection 标签,应保留。
|
||||
if err := cat.DeleteVideo(ctx, "video-a"); err != nil {
|
||||
t.Fatalf("delete video-a: %v", err)
|
||||
}
|
||||
@@ -1345,7 +1386,7 @@ func TestDeleteVideoPrunesOrphanCollectionTag(t *testing.T) {
|
||||
t.Fatal("collection tag was pruned while another video still references it")
|
||||
}
|
||||
|
||||
// 删最后一个引用视频,collection 标签应当被同步清掉。
|
||||
// 删最后一个引用视频,旧 collection 标签应当被同步清掉。
|
||||
if err := cat.DeleteVideo(ctx, "video-b"); err != nil {
|
||||
t.Fatalf("delete video-b: %v", err)
|
||||
}
|
||||
@@ -1353,7 +1394,7 @@ func TestDeleteVideoPrunesOrphanCollectionTag(t *testing.T) {
|
||||
t.Fatal("orphan collection tag was not pruned after deleting the last referencing video")
|
||||
}
|
||||
|
||||
// 用户手动建的标签即使变成孤儿(已经因为 video-a 删除而失去引用)也必须保留。
|
||||
// 用户标签即使是孤儿也必须保留。
|
||||
var userCount int
|
||||
if err := cat.db.QueryRowContext(ctx,
|
||||
`SELECT COUNT(*) FROM tags WHERE label = ? AND source = 'user'`,
|
||||
@@ -1539,6 +1580,70 @@ func TestReconcileThumbnailStatusOnce(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRequeueSkippedPreviews(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
cases := []struct {
|
||||
id string
|
||||
status string
|
||||
local string
|
||||
fileID string
|
||||
wantStatus string
|
||||
wantLocal string
|
||||
wantFileID string
|
||||
}{
|
||||
{"preview-skipped", "skipped", "/tmp/old-preview.mp4", "old-preview-file", "pending", "", ""},
|
||||
{"preview-ready", "ready", "/tmp/ready-preview.mp4", "ready-preview-file", "ready", "/tmp/ready-preview.mp4", "ready-preview-file"},
|
||||
{"preview-failed", "failed", "/tmp/failed-preview.mp4", "failed-preview-file", "failed", "/tmp/failed-preview.mp4", "failed-preview-file"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: c.id, DriveID: "d", FileID: "source-" + c.id, Title: c.id,
|
||||
PreviewStatus: c.status, PreviewLocal: c.local, PreviewFileID: c.fileID,
|
||||
PublishedAt: now, CreatedAt: now, UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed %s: %v", c.id, err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := cat.requeueSkippedPreviews(ctx); err != nil {
|
||||
t.Fatalf("requeue skipped previews: %v", err)
|
||||
}
|
||||
if err := cat.requeueSkippedPreviews(ctx); err != nil {
|
||||
t.Fatalf("second requeue skipped previews: %v", err)
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
got, err := cat.GetVideo(ctx, c.id)
|
||||
if err != nil {
|
||||
t.Fatalf("get %s: %v", c.id, err)
|
||||
}
|
||||
if got.PreviewStatus != c.wantStatus {
|
||||
t.Errorf("%s: preview status = %q, want %q", c.id, got.PreviewStatus, c.wantStatus)
|
||||
}
|
||||
if got.PreviewLocal != c.wantLocal {
|
||||
t.Errorf("%s: preview local = %q, want %q", c.id, got.PreviewLocal, c.wantLocal)
|
||||
}
|
||||
if got.PreviewFileID != c.wantFileID {
|
||||
t.Errorf("%s: preview file id = %q, want %q", c.id, got.PreviewFileID, c.wantFileID)
|
||||
}
|
||||
}
|
||||
|
||||
pending, err := cat.ListVideosByPreviewStatus(ctx, "d", "pending", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list pending previews: %v", err)
|
||||
}
|
||||
if len(pending) != 1 || pending[0].ID != "preview-skipped" {
|
||||
t.Fatalf("pending previews = %#v, want only preview-skipped", pending)
|
||||
}
|
||||
}
|
||||
|
||||
// TestUpsertVideoSyncsThumbnailStatus 验证 scanner 创建/补回视频时
|
||||
// thumbnail_status 跟随 thumbnail_url 自动设。这是历史 bug 的修复回归测试 ——
|
||||
// 之前 UpsertVideo 的 SQL 不带 thumbnail_status 列,所有新视频都依赖
|
||||
|
||||
@@ -0,0 +1,166 @@
|
||||
package catalog
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestListHiddenVideosForMigration 验证:隐藏的视频不进可见列表,
|
||||
// 但能被 ListHiddenVideos 拿到(供一次性迁移为墓碑)。
|
||||
func TestListHiddenVideosForMigration(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
for _, id := range []string{"v1", "v2", "v3"} {
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: id, DriveID: "drive", FileID: "f-" + id, Title: id,
|
||||
PublishedAt: now, CreatedAt: now, UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed %s: %v", id, err)
|
||||
}
|
||||
}
|
||||
if err := cat.HideVideo(ctx, "v2"); err != nil {
|
||||
t.Fatalf("hide v2: %v", err)
|
||||
}
|
||||
|
||||
visible, total, err := cat.ListVideos(ctx, ListParams{Page: 1, PageSize: 50})
|
||||
if err != nil {
|
||||
t.Fatalf("list visible: %v", err)
|
||||
}
|
||||
if total != 2 || len(visible) != 2 {
|
||||
t.Fatalf("visible total/len = %d/%d, want 2/2", total, len(visible))
|
||||
}
|
||||
for _, v := range visible {
|
||||
if v.ID == "v2" {
|
||||
t.Fatalf("hidden v2 leaked into visible list")
|
||||
}
|
||||
}
|
||||
|
||||
hidden, err := cat.ListHiddenVideos(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("list hidden: %v", err)
|
||||
}
|
||||
if len(hidden) != 1 || hidden[0].ID != "v2" {
|
||||
t.Fatalf("ListHiddenVideos = %v, want only v2", hidden)
|
||||
}
|
||||
|
||||
current, blacklisted, err := cat.VideoManagementCounts(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("counts: %v", err)
|
||||
}
|
||||
if current != 2 || blacklisted != 0 {
|
||||
t.Fatalf("counts = current %d blacklisted %d, want 2/0", current, blacklisted)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBlacklistListAndRemove 验证墓碑表的列出、关键字过滤和移除。
|
||||
func TestBlacklistListAndRemove(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
seed := []struct{ id, drive, file string }{
|
||||
{"d1", "drive", "movie-alpha.avi"},
|
||||
{"d2", "drive", "movie-beta.mp4"},
|
||||
{"d3", "archive", "clip-gamma.wmv"},
|
||||
}
|
||||
for _, s := range seed {
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: s.id, DriveID: s.drive, FileID: "f-" + s.id, FileName: s.file,
|
||||
Title: s.id, PublishedAt: now, CreatedAt: now, UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed %s: %v", s.id, err)
|
||||
}
|
||||
var err error
|
||||
if s.id == "d2" {
|
||||
err = cat.DeleteVideoWithTombstoneReason(ctx, s.id, DeletedVideoReasonDuplicate)
|
||||
} else {
|
||||
err = cat.DeleteVideoWithTombstone(ctx, s.id)
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("tombstone %s: %v", s.id, err)
|
||||
}
|
||||
}
|
||||
|
||||
items, total, err := cat.ListDeletedVideos(ctx, ListParams{Page: 1, PageSize: 50})
|
||||
if err != nil {
|
||||
t.Fatalf("list deleted: %v", err)
|
||||
}
|
||||
if total != 3 || len(items) != 3 {
|
||||
t.Fatalf("deleted total/len = %d/%d, want 3/3", total, len(items))
|
||||
}
|
||||
reasons := map[string]string{}
|
||||
for _, item := range items {
|
||||
reasons[item.ID] = item.Reason
|
||||
}
|
||||
if reasons["d1"] != "" || reasons["d3"] != "" {
|
||||
t.Fatalf("manual tombstone reasons = %#v, want empty", reasons)
|
||||
}
|
||||
if reasons["d2"] != DeletedVideoReasonDuplicate {
|
||||
t.Fatalf("duplicate tombstone reason = %q, want %q", reasons["d2"], DeletedVideoReasonDuplicate)
|
||||
}
|
||||
|
||||
// 关键字过滤
|
||||
filtered, ftotal, err := cat.ListDeletedVideos(ctx, ListParams{Keyword: "movie", Page: 1, PageSize: 50})
|
||||
if err != nil {
|
||||
t.Fatalf("list deleted filtered: %v", err)
|
||||
}
|
||||
if ftotal != 2 || len(filtered) != 2 {
|
||||
t.Fatalf("filtered total/len = %d/%d, want 2/2", ftotal, len(filtered))
|
||||
}
|
||||
|
||||
// 网盘过滤
|
||||
driveFiltered, driveTotal, err := cat.ListDeletedVideos(ctx, ListParams{DriveID: "archive", Page: 1, PageSize: 50})
|
||||
if err != nil {
|
||||
t.Fatalf("list deleted drive filtered: %v", err)
|
||||
}
|
||||
if driveTotal != 1 || len(driveFiltered) != 1 || driveFiltered[0].ID != "d3" {
|
||||
t.Fatalf("drive filtered = total %d items %#v, want only d3", driveTotal, driveFiltered)
|
||||
}
|
||||
|
||||
combined, combinedTotal, err := cat.ListDeletedVideos(ctx, ListParams{Keyword: "movie", DriveID: "archive", Page: 1, PageSize: 50})
|
||||
if err != nil {
|
||||
t.Fatalf("list deleted combined filtered: %v", err)
|
||||
}
|
||||
if combinedTotal != 0 || len(combined) != 0 {
|
||||
t.Fatalf("combined filtered total/len = %d/%d, want 0/0", combinedTotal, len(combined))
|
||||
}
|
||||
|
||||
// 移出黑名单
|
||||
if err := cat.RemoveDeletedVideo(ctx, "d1"); err != nil {
|
||||
t.Fatalf("remove d1: %v", err)
|
||||
}
|
||||
if deleted, err := cat.IsVideoDeleted(ctx, "d1"); err != nil || deleted {
|
||||
t.Fatalf("d1 should no longer be blacklisted (deleted=%v err=%v)", deleted, err)
|
||||
}
|
||||
_, total, err = cat.ListDeletedVideos(ctx, ListParams{Page: 1, PageSize: 50})
|
||||
if err != nil {
|
||||
t.Fatalf("list deleted after remove: %v", err)
|
||||
}
|
||||
if total != 2 {
|
||||
t.Fatalf("deleted total after remove = %d, want 2", total)
|
||||
}
|
||||
|
||||
if err := cat.RemoveDeletedVideo(ctx, "does-not-exist"); err == nil {
|
||||
t.Fatalf("remove missing id should return error")
|
||||
}
|
||||
|
||||
// counts: 删完一个还剩 2 个黑名单;可见视频已全部被墓碑删除
|
||||
current, blacklisted, err := cat.VideoManagementCounts(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("counts: %v", err)
|
||||
}
|
||||
if current != 0 || blacklisted != 2 {
|
||||
t.Fatalf("counts = current %d blacklisted %d, want 0/2", current, blacklisted)
|
||||
}
|
||||
}
|
||||
@@ -16,6 +16,11 @@ const (
|
||||
DefaultAdminPassword = "admin123"
|
||||
)
|
||||
|
||||
var (
|
||||
legacyDefaultVideoExtensions = []string{".mp4", ".mkv", ".mov", ".webm", ".avi"}
|
||||
defaultVideoExtensions = []string{".mp4", ".mkv", ".mov", ".webm", ".avi", ".strm"}
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
Server Server `yaml:"server"`
|
||||
Storage Storage `yaml:"storage"`
|
||||
@@ -202,7 +207,7 @@ type Nightly struct {
|
||||
// 这里保留 yaml 中的静态定义,用于启动时预置盘。生产建议只在 DB 里维护。
|
||||
type Drive struct {
|
||||
ID string `yaml:"id"`
|
||||
Kind string `yaml:"kind"` // quark / p115 / p123 / pikpak / wopan / onedrive / googledrive / localstorage
|
||||
Kind string `yaml:"kind"` // quark / p115 / p123 / pikpak / wopan / guangyapan / onedrive / googledrive / localstorage
|
||||
Name string `yaml:"name"`
|
||||
RootID string `yaml:"root_id"`
|
||||
Params map[string]string `yaml:"params,omitempty"`
|
||||
@@ -247,7 +252,9 @@ func (c *Config) applyDefaults() {
|
||||
c.Scanner.MaxDepth = 5
|
||||
}
|
||||
if len(c.Scanner.VideoExtensions) == 0 {
|
||||
c.Scanner.VideoExtensions = []string{".mp4", ".mkv", ".mov", ".webm", ".avi"}
|
||||
c.Scanner.VideoExtensions = append([]string{}, defaultVideoExtensions...)
|
||||
} else if isLegacyDefaultVideoExtensions(c.Scanner.VideoExtensions) {
|
||||
c.Scanner.VideoExtensions = append(c.Scanner.VideoExtensions, ".strm")
|
||||
}
|
||||
if c.Preview.FFmpegPath == "" {
|
||||
c.Preview.FFmpegPath = "ffmpeg"
|
||||
@@ -276,3 +283,19 @@ func (c *Config) applyDefaults() {
|
||||
c.Nightly.CronHour = 1
|
||||
}
|
||||
}
|
||||
|
||||
func isLegacyDefaultVideoExtensions(exts []string) bool {
|
||||
if len(exts) != len(legacyDefaultVideoExtensions) {
|
||||
return false
|
||||
}
|
||||
seen := make(map[string]struct{}, len(exts))
|
||||
for _, ext := range exts {
|
||||
seen[strings.ToLower(strings.TrimSpace(ext))] = struct{}{}
|
||||
}
|
||||
for _, ext := range legacyDefaultVideoExtensions {
|
||||
if _, ok := seen[ext]; !ok {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package config
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
@@ -50,3 +51,64 @@ storage:
|
||||
t.Fatalf("db path = %q, want preserved value", cfg.Storage.DBPath)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadDefaultScannerVideoExtensionsIncludeSTRM(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "config.yaml")
|
||||
if err := os.WriteFile(path, []byte(`{}`), 0o644); err != nil {
|
||||
t.Fatalf("write config: %v", err)
|
||||
}
|
||||
|
||||
cfg, err := Load(path)
|
||||
if err != nil {
|
||||
t.Fatalf("load config: %v", err)
|
||||
}
|
||||
if !hasVideoExtension(cfg.Scanner.VideoExtensions, ".strm") {
|
||||
t.Fatalf("video extensions = %#v, want .strm", cfg.Scanner.VideoExtensions)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadLegacyDefaultScannerVideoExtensionsIncludeSTRM(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "config.yaml")
|
||||
if err := os.WriteFile(path, []byte(`
|
||||
scanner:
|
||||
video_extensions: [".mp4", ".mkv", ".mov", ".webm", ".avi"]
|
||||
`), 0o644); err != nil {
|
||||
t.Fatalf("write config: %v", err)
|
||||
}
|
||||
|
||||
cfg, err := Load(path)
|
||||
if err != nil {
|
||||
t.Fatalf("load config: %v", err)
|
||||
}
|
||||
if !hasVideoExtension(cfg.Scanner.VideoExtensions, ".strm") {
|
||||
t.Fatalf("video extensions = %#v, want .strm appended for legacy default list", cfg.Scanner.VideoExtensions)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadCustomScannerVideoExtensionsArePreserved(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "config.yaml")
|
||||
if err := os.WriteFile(path, []byte(`
|
||||
scanner:
|
||||
video_extensions: [".mp4"]
|
||||
`), 0o644); err != nil {
|
||||
t.Fatalf("write config: %v", err)
|
||||
}
|
||||
|
||||
cfg, err := Load(path)
|
||||
if err != nil {
|
||||
t.Fatalf("load config: %v", err)
|
||||
}
|
||||
if len(cfg.Scanner.VideoExtensions) != 1 || cfg.Scanner.VideoExtensions[0] != ".mp4" {
|
||||
t.Fatalf("video extensions = %#v, want custom list preserved", cfg.Scanner.VideoExtensions)
|
||||
}
|
||||
}
|
||||
|
||||
func hasVideoExtension(exts []string, want string) bool {
|
||||
want = strings.ToLower(strings.TrimSpace(want))
|
||||
for _, ext := range exts {
|
||||
if strings.ToLower(strings.TrimSpace(ext)) == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
+447
-192
@@ -1,21 +1,20 @@
|
||||
// Package spider91migrate 周期性把 spider91 drive 下载到本地的视频
|
||||
// 上传到一个指定的目标 drive 目录(PikPak、115、123 或 OneDrive),上传成功后:
|
||||
// Package crawlerupload uploads videos saved by script crawlers to a configured
|
||||
// target drive. Each crawler drive chooses its own upload target.
|
||||
//
|
||||
// - 改写 catalog 行:drive_id / file_id / content_hash 改成目标盘的;
|
||||
// 视频自身的 id 不变(仍是 spider91-<driveID>-<viewkey>),video_tags、
|
||||
// 收藏、点赞、views 等关联数据全部保留
|
||||
// - 删除本地 mp4(spider91/<id>/videos/<viewkey>.<ext>)和源 thumb
|
||||
// (spider91/<id>/thumbs/<viewkey>.jpg);公共 /p/thumb/<videoID> 副本会保留
|
||||
// 视频自身的 id 不变,video_tags、收藏、点赞、views 等关联数据全部保留
|
||||
// - 删除爬虫本地 mp4 和源 thumb;公共 /p/thumb/<videoID> 副本会保留
|
||||
//
|
||||
// 之后回放时,videoSource() 自动落到 /p/stream/<target>/<file_id>,
|
||||
// proxy 层走对应盘的直链 / 302 直连。
|
||||
//
|
||||
// 下次目标盘扫盘时,scanner 通过 (content_hash) / (file_name+size)
|
||||
// 已有的 findDuplicate 兜底逻辑,不会为同一物理文件再建一行。
|
||||
package spider91migrate
|
||||
package crawlerupload
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
@@ -29,22 +28,28 @@ import (
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
"github.com/video-site/backend/internal/drives/googledrive"
|
||||
"github.com/video-site/backend/internal/drives/guangyapan"
|
||||
"github.com/video-site/backend/internal/drives/onedrive"
|
||||
"github.com/video-site/backend/internal/drives/p115"
|
||||
"github.com/video-site/backend/internal/drives/p123"
|
||||
"github.com/video-site/backend/internal/drives/pikpak"
|
||||
"github.com/video-site/backend/internal/drives/spider91"
|
||||
"github.com/video-site/backend/internal/drives/scriptcrawler"
|
||||
"github.com/video-site/backend/internal/drives/wopan"
|
||||
"github.com/video-site/backend/internal/mediaasset"
|
||||
)
|
||||
|
||||
// uploadTarget 是 migrator 调用目标 drive 的最小接口。任何一种"接收 spider91 上传"的
|
||||
// 网盘都要实现它;当前 PikPak、115、123 和 OneDrive 各自通过适配器满足。
|
||||
// uploadTarget 是 migrator 调用目标 drive 的最小接口。任何一种"接收爬虫上传"的
|
||||
// 网盘都要实现它;当前 PikPak、115、123、OneDrive、Google Drive、联通网盘和光鸭网盘各自通过适配器满足。
|
||||
//
|
||||
// 这一层抽象把"迁移调用方"和"具体盘的 SDK 协议"解耦:
|
||||
// - PikPak 走 GCID + OSS PutObject(pikpak.UploadResult)
|
||||
// - 115 走 SHA1 + 秒传 / OSS / 分片(p115.UploadResult)
|
||||
// - 123 走 MD5 + 秒传 / S3 预签名分片(p123.UploadResult)
|
||||
// - OneDrive 走 SHA1 + 小文件 PUT / 大文件 upload session
|
||||
// - Google Drive 走 MD5 + resumable upload session
|
||||
// - 联通网盘 走 SDK Upload2C,当前上游不返回内容 hash
|
||||
// - 光鸭网盘 走 OSS 分片上传,当前上游不返回内容 hash
|
||||
//
|
||||
// 各家返回值都被归一成本地的 UploadResult,并在 catalog 改写阶段统一处理。
|
||||
type uploadTarget interface {
|
||||
@@ -56,10 +61,21 @@ type uploadTarget interface {
|
||||
Rename(ctx context.Context, fileID, newName string) error
|
||||
}
|
||||
|
||||
// LocalSource is the local source interface used by the migration
|
||||
// worker. scriptcrawler.Driver satisfies it when mounted for a crawler that
|
||||
// keeps videos in local storage before uploading them to a target drive.
|
||||
type LocalSource interface {
|
||||
drives.Drive
|
||||
VideosDir() string
|
||||
ThumbsDir() string
|
||||
VideoPath(fileID string) (string, error)
|
||||
ThumbPath(fileID string) (string, error)
|
||||
}
|
||||
|
||||
// UploadResult 是 uploadTarget.UploadAndReportHash 的归一返回。
|
||||
//
|
||||
// FileID 目标盘上的新文件 ID;
|
||||
// Hash GCID(PikPak)、MD5 HEX(123)或 SHA1 HEX(115 / OneDrive),写入 catalog.content_hash 用于跨盘去重;
|
||||
// Hash GCID(PikPak)、MD5 HEX(123 / Google Drive)或 SHA1 HEX(115 / OneDrive),写入 catalog.content_hash 用于跨盘去重;联通网盘和光鸭网盘暂为空;
|
||||
// Size 实际上传字节数。
|
||||
type UploadResult struct {
|
||||
FileID string
|
||||
@@ -67,15 +83,35 @@ type UploadResult struct {
|
||||
Size int64
|
||||
}
|
||||
|
||||
const spider91UploadDirName = "91 Spider"
|
||||
type UploadProgress struct {
|
||||
DriveID string
|
||||
State string
|
||||
CurrentTitle string
|
||||
QueueLength int
|
||||
DoneCount int
|
||||
TotalCount int
|
||||
}
|
||||
|
||||
// pikpakAdapter / p115Adapter / p123Adapter / onedriveAdapter 把具体 driver 包装成 uploadTarget。
|
||||
const scriptCrawlerUploadRootDirName = "Script Crawlers"
|
||||
|
||||
type migrationPlan struct {
|
||||
source LocalSource
|
||||
row *catalog.Drive
|
||||
targetDriveID string
|
||||
target uploadTarget
|
||||
uploadDir string
|
||||
keepLatestN int
|
||||
requireAssetsReady bool
|
||||
requirePreviewReady bool
|
||||
}
|
||||
|
||||
// pikpakAdapter / p115Adapter / p123Adapter / onedriveAdapter / googledriveAdapter / wopanAdapter / guangyapanAdapter 把具体 driver 包装成 uploadTarget。
|
||||
//
|
||||
// 之所以不让 driver 直接实现 uploadTarget:
|
||||
//
|
||||
// 1. 各 driver 的 UploadAndReportXxx 返回的是各自包内的 UploadResult 类型,
|
||||
// 直接共用同名同签名方法会引入循环依赖;
|
||||
// 2. driver 包不应该感知 spider91migrate 这一层业务定义。
|
||||
// 2. driver 包不应该感知 crawlerupload 这一层业务定义。
|
||||
type pikpakAdapter struct {
|
||||
d *pikpak.Driver
|
||||
}
|
||||
@@ -160,6 +196,69 @@ func (a *onedriveAdapter) Rename(ctx context.Context, fileID, newName string) er
|
||||
return a.d.Rename(ctx, fileID, newName)
|
||||
}
|
||||
|
||||
type googledriveAdapter struct {
|
||||
d *googledrive.Driver
|
||||
}
|
||||
|
||||
func (a *googledriveAdapter) ID() string { return a.d.ID() }
|
||||
func (a *googledriveAdapter) Kind() string { return a.d.Kind() }
|
||||
func (a *googledriveAdapter) RootID() string { return a.d.RootID() }
|
||||
func (a *googledriveAdapter) EnsureDir(ctx context.Context, pathFromRoot string) (string, error) {
|
||||
return a.d.EnsureDir(ctx, pathFromRoot)
|
||||
}
|
||||
func (a *googledriveAdapter) UploadAndReportHash(ctx context.Context, parentID, name string, r io.Reader, size int64) (UploadResult, error) {
|
||||
res, err := a.d.UploadAndReportHash(ctx, parentID, name, r, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
return UploadResult{FileID: res.FileID, Hash: res.Hash, Size: res.Size}, nil
|
||||
}
|
||||
func (a *googledriveAdapter) Rename(ctx context.Context, fileID, newName string) error {
|
||||
return a.d.Rename(ctx, fileID, newName)
|
||||
}
|
||||
|
||||
type wopanAdapter struct {
|
||||
d *wopan.Driver
|
||||
}
|
||||
|
||||
func (a *wopanAdapter) ID() string { return a.d.ID() }
|
||||
func (a *wopanAdapter) Kind() string { return a.d.Kind() }
|
||||
func (a *wopanAdapter) RootID() string { return a.d.RootID() }
|
||||
func (a *wopanAdapter) EnsureDir(ctx context.Context, pathFromRoot string) (string, error) {
|
||||
return a.d.EnsureDir(ctx, pathFromRoot)
|
||||
}
|
||||
func (a *wopanAdapter) UploadAndReportHash(ctx context.Context, parentID, name string, r io.Reader, size int64) (UploadResult, error) {
|
||||
fileID, err := a.d.Upload(ctx, parentID, name, r, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
return UploadResult{FileID: fileID, Size: size}, nil
|
||||
}
|
||||
func (a *wopanAdapter) Rename(ctx context.Context, fileID, newName string) error {
|
||||
return a.d.Rename(ctx, fileID, newName)
|
||||
}
|
||||
|
||||
type guangyapanAdapter struct {
|
||||
d *guangyapan.Driver
|
||||
}
|
||||
|
||||
func (a *guangyapanAdapter) ID() string { return a.d.ID() }
|
||||
func (a *guangyapanAdapter) Kind() string { return a.d.Kind() }
|
||||
func (a *guangyapanAdapter) RootID() string { return a.d.RootID() }
|
||||
func (a *guangyapanAdapter) EnsureDir(ctx context.Context, pathFromRoot string) (string, error) {
|
||||
return a.d.EnsureDir(ctx, pathFromRoot)
|
||||
}
|
||||
func (a *guangyapanAdapter) UploadAndReportHash(ctx context.Context, parentID, name string, r io.Reader, size int64) (UploadResult, error) {
|
||||
fileID, err := a.d.Upload(ctx, parentID, name, r, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
return UploadResult{FileID: fileID, Size: size}, nil
|
||||
}
|
||||
func (a *guangyapanAdapter) Rename(ctx context.Context, fileID, newName string) error {
|
||||
return a.d.Rename(ctx, fileID, newName)
|
||||
}
|
||||
|
||||
// adaptUploadTarget 把通用 drive 包装成 uploadTarget。
|
||||
// 不支持的盘 kind 返回 error;调用方静默跳过。
|
||||
func adaptUploadTarget(d drives.Drive) (uploadTarget, error) {
|
||||
@@ -172,11 +271,17 @@ func adaptUploadTarget(d drives.Drive) (uploadTarget, error) {
|
||||
return &p123Adapter{d: v}, nil
|
||||
case *onedrive.Driver:
|
||||
return &onedriveAdapter{d: v}, nil
|
||||
case *googledrive.Driver:
|
||||
return &googledriveAdapter{d: v}, nil
|
||||
case *wopan.Driver:
|
||||
return &wopanAdapter{d: v}, nil
|
||||
case *guangyapan.Driver:
|
||||
return &guangyapanAdapter{d: v}, nil
|
||||
case uploadTarget:
|
||||
// 测试或自定义实现可以直接传入;优先使用具体类型分支以拿到适配器。
|
||||
return v, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("drive %q kind=%s does not support spider91 upload", d.ID(), d.Kind())
|
||||
return nil, fmt.Errorf("drive %q kind=%s does not support crawler upload", d.ID(), d.Kind())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -187,23 +292,23 @@ type Registry interface {
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
Catalog *catalog.Catalog
|
||||
Registry Registry
|
||||
GetTargetDriveID func() string // 通常对应 App.Spider91UploadDriveID()
|
||||
Catalog *catalog.Catalog
|
||||
Registry Registry
|
||||
// Interval 已废弃 —— 旧版迁移 worker 是周期 ticker,新版只通过 nightly
|
||||
// pipeline 调用 RunOnce,不再有内置定时器。保留字段不删是为了兼容外
|
||||
// 部 yaml / 测试代码里仍传值的场景。
|
||||
Interval time.Duration
|
||||
BatchLimit int // 单轮最多迁多少个,0 时默认 50
|
||||
// KeepLatestN 是每个 spider91 drive 在本地保留的最新视频数。
|
||||
// 超过的部分中"已迁移"的会被清理;未迁移的不动。0 时默认 15;< 0 关闭清理。
|
||||
// KeepLatestN is deprecated. Script crawler uploads use 0 internally so all
|
||||
// local videos that satisfy asset requirements are eligible for upload.
|
||||
KeepLatestN int
|
||||
// CaptchaCooldown 是迁移 worker 在遇到 PikPak captcha 错误(error_code
|
||||
// 4002 / 9)后整体进入冷却的时长。冷却期间 runOnce 直接返回,不再发起任何
|
||||
// PikPak API 请求,避免被进一步风控。0 时默认 5 分钟;< 0 关闭冷却(仅用于测试)。
|
||||
CaptchaCooldown time.Duration
|
||||
CommonThumbDir string
|
||||
OnMigrated func(videoID string)
|
||||
CaptchaCooldown time.Duration
|
||||
CommonThumbDir string
|
||||
OnMigrated func(videoID string)
|
||||
OnUploadProgress func(UploadProgress)
|
||||
}
|
||||
|
||||
type Migrator struct {
|
||||
@@ -287,9 +392,8 @@ func (m *Migrator) markCooldownLogged() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// Trigger 安排一次"立即跑"。多次调用会被合并成一次(channel buffer=1)。
|
||||
// RunOnce 跑一次完整迁移:列出所有 spider91 drive,对每个超过 KeepLatestN 的旧
|
||||
// 视频上传到目标 drive,事务性改写 catalog 行,删本地文件。
|
||||
// RunOnce 跑一次完整迁移:列出所有配置了 upload_drive_id 的 scriptcrawler
|
||||
// drive,把本地视频上传到目标 drive,事务性改写 catalog 行,删本地文件。
|
||||
//
|
||||
// 这是上层 nightly 流水线 Phase 3 的入口;不再有周期 ticker / Trigger 通道。
|
||||
// captcha cooldown 状态在单次 RunOnce 内仍生效(多 drive 时遇到 4002 立即停整轮);
|
||||
@@ -303,7 +407,7 @@ func (m *Migrator) RunOnce(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// runOnce 单轮:扫所有 spider91 drive,对每条还有本地文件的视频做迁移。
|
||||
// runOnce 单轮:扫所有 scriptcrawler drive,对每条还有本地文件的视频做迁移。
|
||||
//
|
||||
// 互斥保证:同一 Migrator 内不会并发跑两轮(避免重复上传)。
|
||||
func (m *Migrator) runOnce(ctx context.Context) {
|
||||
@@ -325,93 +429,79 @@ func (m *Migrator) runOnce(ctx context.Context) {
|
||||
// 结束自然恢复。避免之前每秒一条 4002 的日志雪崩。
|
||||
if active, until, resumed := m.cooldownState(); active {
|
||||
if !m.markCooldownLogged() {
|
||||
log.Printf("[spider91migrate] captcha cooldown active until %s, skipping run", until.Format(time.RFC3339))
|
||||
log.Printf("[crawlerupload] captcha cooldown active until %s, skipping run", until.Format(time.RFC3339))
|
||||
}
|
||||
return
|
||||
} else if resumed {
|
||||
log.Printf("[spider91migrate] captcha cooldown ended at %s, resuming migration", until.Format(time.RFC3339))
|
||||
log.Printf("[crawlerupload] captcha cooldown ended at %s, resuming migration", until.Format(time.RFC3339))
|
||||
}
|
||||
|
||||
target, pp, err := m.resolveTarget()
|
||||
if err != nil {
|
||||
// 没目标就静默 —— 用户选择了本地保存,或还没配 115/PikPak drive。
|
||||
plans := m.migrationPlans(ctx)
|
||||
if len(plans) == 0 {
|
||||
// 没目标就静默 —— 用户选择了本地保存,或目标盘还没挂载。
|
||||
return
|
||||
}
|
||||
|
||||
migrated := 0
|
||||
for _, src := range m.spider91Drives() {
|
||||
for _, plan := range plans {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return
|
||||
}
|
||||
n, err := m.migrateDrive(ctx, src, target, pp)
|
||||
n, err := m.migrateDrive(ctx, plan)
|
||||
if err != nil {
|
||||
log.Printf("[spider91migrate] drive=%s migrate batch error: %v", src.ID(), err)
|
||||
log.Printf("[crawlerupload] drive=%s migrate batch error: %v", plan.source.ID(), err)
|
||||
}
|
||||
migrated += n
|
||||
if active, _ := m.inCooldown(); active {
|
||||
if migrated > 0 {
|
||||
log.Printf("[spider91migrate] migrated %d video(s) to drive=%s", migrated, target)
|
||||
log.Printf("[crawlerupload] migrated %d video(s)", migrated)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
if migrated > 0 {
|
||||
log.Printf("[spider91migrate] migrated %d video(s) to drive=%s", migrated, target)
|
||||
log.Printf("[crawlerupload] migrated %d video(s)", migrated)
|
||||
}
|
||||
|
||||
// 收尾:扫每个 spider91 drive 的本地目录,把 catalog 已经迁到别处但本地
|
||||
// 收尾:扫每个本地爬虫 drive 的 videos 目录,把 catalog 已经迁到别处但本地
|
||||
// 仍有残留的孤儿文件清掉。这是纯防御性兜底——正常路径下 migrateDrive
|
||||
// 已经在迁移成功后立刻 CleanupSpider91Local,不会留孤儿。
|
||||
for _, src := range m.spider91Drives() {
|
||||
// 已经在迁移成功后立刻 CleanupLocal,不会留孤儿。
|
||||
for _, plan := range plans {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return
|
||||
}
|
||||
deleted, err := m.cleanupOldLocalVideos(ctx, src)
|
||||
deleted, err := m.cleanupOldLocalVideos(ctx, plan)
|
||||
if err != nil {
|
||||
log.Printf("[spider91migrate] cleanup drive=%s: %v", src.ID(), err)
|
||||
log.Printf("[crawlerupload] cleanup drive=%s: %v", plan.source.ID(), err)
|
||||
}
|
||||
if deleted > 0 {
|
||||
log.Printf("[spider91migrate] cleanup drive=%s deleted %d orphan local file(s)", src.ID(), deleted)
|
||||
log.Printf("[crawlerupload] cleanup drive=%s deleted %d orphan local file(s)", plan.source.ID(), deleted)
|
||||
}
|
||||
}
|
||||
|
||||
// 回填:把已迁移到 PikPak 的 spider91-* 视频里文件名仍是旧格式
|
||||
// (比如刚迁完没改、或人工导入)的统一改成方案 B 期望的格式。
|
||||
// 这一步幂等:已经是期望格式的不会再调 Rename。
|
||||
if renamed, err := m.backfillFileNames(ctx, target, pp); err != nil {
|
||||
log.Printf("[spider91migrate] backfill names: %v", err)
|
||||
} else if renamed > 0 {
|
||||
log.Printf("[spider91migrate] backfilled %d %s file name(s) to desired format", renamed, m.targetKindForLog())
|
||||
}
|
||||
}
|
||||
|
||||
// targetKindForLog 把当前目标盘 kind 转成对人友好的简称,用于日志。
|
||||
// 解析失败时回退 "target"。
|
||||
func (m *Migrator) targetKindForLog() string {
|
||||
if m.cfg.GetTargetDriveID == nil || m.cfg.Registry == nil {
|
||||
return "target"
|
||||
func (m *Migrator) reportUploadProgress(progress UploadProgress) {
|
||||
if m == nil || m.cfg.OnUploadProgress == nil {
|
||||
return
|
||||
}
|
||||
id := m.cfg.GetTargetDriveID()
|
||||
if id == "" {
|
||||
return "target"
|
||||
progress.DriveID = strings.TrimSpace(progress.DriveID)
|
||||
if progress.DriveID == "" {
|
||||
return
|
||||
}
|
||||
d, ok := m.cfg.Registry.Get(id)
|
||||
if !ok {
|
||||
return "target"
|
||||
if progress.State == "" {
|
||||
progress.State = "idle"
|
||||
}
|
||||
return d.Kind()
|
||||
m.cfg.OnUploadProgress(progress)
|
||||
}
|
||||
|
||||
// resolveTarget 返回 (target drive ID, target uploadTarget, err)。
|
||||
// 没设置、drive 找不到,或 drive 类型不支持上传时返回 err(调用方静默跳过)。
|
||||
func (m *Migrator) resolveTarget() (string, uploadTarget, error) {
|
||||
if m.cfg.GetTargetDriveID == nil {
|
||||
return "", nil, errors.New("no target getter")
|
||||
}
|
||||
id := m.cfg.GetTargetDriveID()
|
||||
func (m *Migrator) resolveTargetID(id string) (string, uploadTarget, error) {
|
||||
id = strings.TrimSpace(id)
|
||||
if id == "" {
|
||||
return "", nil, errors.New("target drive not configured")
|
||||
}
|
||||
if m.cfg.Registry == nil {
|
||||
return "", nil, errors.New("registry not configured")
|
||||
}
|
||||
d, ok := m.cfg.Registry.Get(id)
|
||||
if !ok {
|
||||
return "", nil, fmt.Errorf("target drive %q not in registry", id)
|
||||
@@ -423,33 +513,71 @@ func (m *Migrator) resolveTarget() (string, uploadTarget, error) {
|
||||
return id, t, nil
|
||||
}
|
||||
|
||||
// spider91Drives 返回当前注册的所有 spider91 driver。
|
||||
func (m *Migrator) spider91Drives() []*spider91.Driver {
|
||||
func (m *Migrator) migrationPlans(ctx context.Context) []migrationPlan {
|
||||
if m == nil || m.cfg.Catalog == nil || m.cfg.Registry == nil {
|
||||
return nil
|
||||
}
|
||||
all := m.cfg.Registry.All()
|
||||
out := make([]*spider91.Driver, 0, len(all))
|
||||
out := make([]migrationPlan, 0, len(all))
|
||||
for _, d := range all {
|
||||
if d.Kind() != spider91.Kind {
|
||||
if d == nil {
|
||||
continue
|
||||
}
|
||||
if sd, ok := d.(*spider91.Driver); ok {
|
||||
out = append(out, sd)
|
||||
src, ok := d.(LocalSource)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
row, err := m.cfg.Catalog.GetDrive(ctx, d.ID())
|
||||
if err != nil || row == nil || row.Kind != scriptcrawler.Kind {
|
||||
continue
|
||||
}
|
||||
targetID := strings.TrimSpace(row.Credentials["upload_drive_id"])
|
||||
if targetID == "" {
|
||||
continue
|
||||
}
|
||||
resolvedID, target, err := m.resolveTargetID(targetID)
|
||||
if err != nil {
|
||||
log.Printf("[crawlerupload] crawler=%s upload target=%q unavailable: %v", row.ID, targetID, err)
|
||||
continue
|
||||
}
|
||||
out = append(out, migrationPlan{
|
||||
source: src,
|
||||
row: row,
|
||||
targetDriveID: resolvedID,
|
||||
target: target,
|
||||
uploadDir: scriptCrawlerUploadDir(row.ID),
|
||||
keepLatestN: 0,
|
||||
requireAssetsReady: true,
|
||||
requirePreviewReady: row.TeaserEnabled,
|
||||
})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// migrateDrive 对单个 spider91 drive 跑一批迁移;返回成功迁移的条数。
|
||||
//
|
||||
// 策略(与"本地缓存最新 N 个"语义一致):
|
||||
// - 列出 spider91 drive 本地 videos/ 目录所有 mp4 文件,按 mtime 降序排
|
||||
// - 跳过最新 KeepLatestN 个:这些是用户希望保留在本地的最新爬取
|
||||
// - 对剩下的(更旧)逐个处理:
|
||||
// - 还没迁移(drive_id 仍是 src.ID())→ 上传到目标盘 + 改 catalog + 删本地
|
||||
// - 已经迁移过但本地还有残留 → 仅删本地(兜底)
|
||||
//
|
||||
// KeepLatestN < 0 时不保护任何本地文件,全部尝试迁移(旧行为,主要给测试用)。
|
||||
func (m *Migrator) migrateDrive(ctx context.Context, src *spider91.Driver, targetDriveID string, pp uploadTarget) (int, error) {
|
||||
keepN := m.cfg.KeepLatestN
|
||||
func scriptCrawlerUploadDir(driveID string) string {
|
||||
driveID = sanitizeUploadDirSegment(driveID)
|
||||
if driveID == "" {
|
||||
driveID = "crawler"
|
||||
}
|
||||
return scriptCrawlerUploadRootDirName + "/" + driveID
|
||||
}
|
||||
|
||||
func sanitizeUploadDirSegment(raw string) string {
|
||||
clean := sanitizeTitle(raw)
|
||||
clean = strings.Trim(clean, "/")
|
||||
if clean == "." || clean == ".." {
|
||||
return ""
|
||||
}
|
||||
return clean
|
||||
}
|
||||
|
||||
// migrateDrive 对单个本地爬虫 drive 跑一批迁移;返回成功迁移的条数。
|
||||
func (m *Migrator) migrateDrive(ctx context.Context, plan migrationPlan) (int, error) {
|
||||
src := plan.source
|
||||
if src == nil || plan.target == nil || plan.targetDriveID == "" {
|
||||
return 0, nil
|
||||
}
|
||||
keepN := plan.keepLatestN
|
||||
if keepN < 0 {
|
||||
keepN = 0
|
||||
}
|
||||
@@ -479,28 +607,46 @@ func (m *Migrator) migrateDrive(ctx context.Context, src *spider91.Driver, targe
|
||||
files = append(files, localFile{name: e.Name(), modTime: info.ModTime()})
|
||||
}
|
||||
|
||||
// 本地数量没超过 keepN 时不动任何文件 —— 这条是 KeepLatestN 语义的核心
|
||||
if m.cfg.KeepLatestN >= 0 && len(files) <= keepN {
|
||||
if plan.keepLatestN >= 0 && len(files) <= keepN {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// 按 mtime 降序:最新的排前面,保留前 keepN 个
|
||||
sort.Slice(files, func(i, j int) bool { return files[i].modTime.After(files[j].modTime) })
|
||||
|
||||
// 候选 = 跳过最新 keepN 个之外的(更旧的)。KeepLatestN < 0 时 candidates=files。
|
||||
skip := keepN
|
||||
if m.cfg.KeepLatestN < 0 {
|
||||
if plan.keepLatestN < 0 {
|
||||
skip = 0
|
||||
}
|
||||
candidates := files
|
||||
if skip < len(files) {
|
||||
candidates = files[skip:]
|
||||
} else {
|
||||
m.reportUploadProgress(UploadProgress{DriveID: src.ID(), State: "idle"})
|
||||
return 0, nil
|
||||
}
|
||||
totalCandidates := len(candidates)
|
||||
m.reportUploadProgress(UploadProgress{
|
||||
DriveID: src.ID(),
|
||||
State: "uploading",
|
||||
QueueLength: totalCandidates,
|
||||
TotalCount: totalCandidates,
|
||||
})
|
||||
defer m.reportUploadProgress(UploadProgress{DriveID: src.ID(), State: "idle"})
|
||||
|
||||
localVideos, err := m.cfg.Catalog.ListVideosByDriveID(ctx, src.ID(), 100000)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("list local catalog videos: %w", err)
|
||||
}
|
||||
byFileID := make(map[string]*catalog.Video, len(localVideos))
|
||||
for _, v := range localVideos {
|
||||
if v != nil && strings.TrimSpace(v.FileID) != "" {
|
||||
byFileID[v.FileID] = v
|
||||
}
|
||||
}
|
||||
|
||||
migrated := 0
|
||||
for _, f := range candidates {
|
||||
processed := 0
|
||||
for index, f := range candidates {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return migrated, err
|
||||
}
|
||||
@@ -508,30 +654,96 @@ func (m *Migrator) migrateDrive(ctx context.Context, src *spider91.Driver, targe
|
||||
break
|
||||
}
|
||||
|
||||
viewkey := stripExt(f.name)
|
||||
videoID := "spider91-" + src.ID() + "-" + viewkey
|
||||
v, err := m.cfg.Catalog.GetVideo(ctx, videoID)
|
||||
if err != nil || v == nil {
|
||||
// 找不到 catalog 行:保险起见保留本地,让管理员可见
|
||||
v := m.findVideoForLocalFile(ctx, plan, f.name, byFileID)
|
||||
if v == nil {
|
||||
processed++
|
||||
m.reportUploadProgress(UploadProgress{
|
||||
DriveID: src.ID(),
|
||||
State: "uploading",
|
||||
QueueLength: maxInt(totalCandidates-processed, 0),
|
||||
DoneCount: processed,
|
||||
TotalCount: totalCandidates,
|
||||
})
|
||||
continue
|
||||
}
|
||||
m.reportUploadProgress(UploadProgress{
|
||||
DriveID: src.ID(),
|
||||
State: "uploading",
|
||||
CurrentTitle: v.Title,
|
||||
QueueLength: maxInt(totalCandidates-index-1, 0),
|
||||
DoneCount: processed,
|
||||
TotalCount: totalCandidates,
|
||||
})
|
||||
|
||||
if v.DriveID != src.ID() {
|
||||
// catalog 已迁移到别的 drive,但本地还有残留 → 兜底删本地
|
||||
CleanupSpider91Local(src, v.FileID)
|
||||
CleanupLocal(src, f.name)
|
||||
processed++
|
||||
m.reportUploadProgress(UploadProgress{
|
||||
DriveID: src.ID(),
|
||||
State: "uploading",
|
||||
QueueLength: maxInt(totalCandidates-processed, 0),
|
||||
DoneCount: processed,
|
||||
TotalCount: totalCandidates,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
ok, err := m.migrateOne(ctx, v, src, targetDriveID, pp)
|
||||
if targetDuplicate, err := m.cfg.Catalog.FindEquivalentVideoOnDrive(ctx, v, plan.targetDriveID); err != nil {
|
||||
if !errors.Is(err, sql.ErrNoRows) {
|
||||
log.Printf("[crawlerupload] %s find target duplicate: %v", v.ID, err)
|
||||
}
|
||||
} else if targetDuplicate != nil {
|
||||
ok, err := m.bindToExistingTarget(ctx, v, targetDuplicate, plan)
|
||||
if err != nil {
|
||||
log.Printf("[crawlerupload] %s: %v", v.ID, err)
|
||||
continue
|
||||
}
|
||||
if ok {
|
||||
migrated++
|
||||
if m.cfg.OnMigrated != nil {
|
||||
m.cfg.OnMigrated(v.ID)
|
||||
}
|
||||
}
|
||||
processed++
|
||||
m.reportUploadProgress(UploadProgress{
|
||||
DriveID: src.ID(),
|
||||
State: "uploading",
|
||||
QueueLength: maxInt(totalCandidates-processed, 0),
|
||||
DoneCount: processed,
|
||||
TotalCount: totalCandidates,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
if plan.requireAssetsReady {
|
||||
ready, err := m.crawlerVideoAssetsReady(ctx, v, plan.requirePreviewReady)
|
||||
if err != nil {
|
||||
log.Printf("[crawlerupload] %s check generated assets: %v", v.ID, err)
|
||||
continue
|
||||
}
|
||||
if !ready {
|
||||
processed++
|
||||
m.reportUploadProgress(UploadProgress{
|
||||
DriveID: src.ID(),
|
||||
State: "uploading",
|
||||
QueueLength: maxInt(totalCandidates-processed, 0),
|
||||
DoneCount: processed,
|
||||
TotalCount: totalCandidates,
|
||||
})
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
ok, err := m.migrateOne(ctx, v, plan)
|
||||
if err != nil {
|
||||
log.Printf("[spider91migrate] %s: %v", v.ID, err)
|
||||
log.Printf("[crawlerupload] %s: %v", v.ID, err)
|
||||
// captcha 错误(4002 / 9)说明 PikPak 当前正拒绝我们;继续在
|
||||
// 同一轮里尝试其它文件大概率会拿到同样的 4002,并且每多一次
|
||||
// 失败就多一份"被风控加深"的风险。立即中止当前 batch 并
|
||||
// 打开冷却窗口,等 cfg.CaptchaCooldown 之后再重试。
|
||||
if pikpak.IsCaptchaError(err) {
|
||||
until := m.setCooldown()
|
||||
log.Printf("[spider91migrate] drive=%s captcha-blocked, cooling down until %s", src.ID(), until.Format(time.RFC3339))
|
||||
log.Printf("[crawlerupload] drive=%s captcha-blocked, cooling down until %s", src.ID(), until.Format(time.RFC3339))
|
||||
return migrated, nil
|
||||
}
|
||||
continue
|
||||
@@ -542,14 +754,65 @@ func (m *Migrator) migrateDrive(ctx context.Context, src *spider91.Driver, targe
|
||||
m.cfg.OnMigrated(v.ID)
|
||||
}
|
||||
}
|
||||
processed++
|
||||
m.reportUploadProgress(UploadProgress{
|
||||
DriveID: src.ID(),
|
||||
State: "uploading",
|
||||
QueueLength: maxInt(totalCandidates-processed, 0),
|
||||
DoneCount: processed,
|
||||
TotalCount: totalCandidates,
|
||||
})
|
||||
}
|
||||
return migrated, nil
|
||||
}
|
||||
|
||||
// migrateOne 把单条 spider91 视频上传到目标盘并改写 catalog。
|
||||
func maxInt(a, b int) int {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func (m *Migrator) findVideoForLocalFile(ctx context.Context, plan migrationPlan, localFile string, byFileID map[string]*catalog.Video) *catalog.Video {
|
||||
if v := byFileID[localFile]; v != nil {
|
||||
return v
|
||||
}
|
||||
sourceID := stripExt(localFile)
|
||||
driveID := ""
|
||||
if plan.source != nil {
|
||||
driveID = plan.source.ID()
|
||||
}
|
||||
id := scriptcrawler.BuildVideoID(driveID, sourceID)
|
||||
v, err := m.cfg.Catalog.GetVideo(ctx, id)
|
||||
if err == nil && v != nil {
|
||||
return v
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Migrator) crawlerVideoAssetsReady(ctx context.Context, v *catalog.Video, requirePreview bool) (bool, error) {
|
||||
if v == nil {
|
||||
return false, nil
|
||||
}
|
||||
fingerprintReady := strings.EqualFold(strings.TrimSpace(v.FingerprintStatus), "ready") || strings.TrimSpace(v.SampledSHA256) != ""
|
||||
if !fingerprintReady {
|
||||
return false, nil
|
||||
}
|
||||
if !requirePreview {
|
||||
return true, nil
|
||||
}
|
||||
if strings.EqualFold(strings.TrimSpace(v.PreviewStatus), "ready") {
|
||||
return true, nil
|
||||
}
|
||||
return m.cfg.Catalog.HasReadyEquivalentPreview(ctx, v)
|
||||
}
|
||||
|
||||
// migrateOne 把单条本地爬虫视频上传到目标盘并改写 catalog。
|
||||
// 返回 (true, nil) 表示真的迁了一条;(false, nil) 表示跳过(本地文件已不在等);
|
||||
// (false, err) 表示真出错。
|
||||
func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, src *spider91.Driver, targetDriveID string, pp uploadTarget) (bool, error) {
|
||||
func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, plan migrationPlan) (bool, error) {
|
||||
src := plan.source
|
||||
pp := plan.target
|
||||
path, err := src.VideoPath(v.FileID)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("resolve local path: %w", err)
|
||||
@@ -557,8 +820,8 @@ func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, src *spider
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
// 本地文件被人手动删了,但 catalog 还显示 spider91 drive;
|
||||
// 这种状态没法迁移。跳过即可(保留行让管理员可见,避免数据丢失)。
|
||||
// 本地文件被人手动删了,但 catalog 还指向该爬虫;
|
||||
// 这种状态没法上传。跳过即可(保留行让管理员可见,避免数据丢失)。
|
||||
return false, nil
|
||||
}
|
||||
return false, fmt.Errorf("stat local: %w", err)
|
||||
@@ -573,20 +836,11 @@ func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, src *spider
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// 上传到目标盘 rootID 下的固定 "91 Spider" 子目录。若用户把目标盘 rootID
|
||||
// 配成某个自定义目录,这里会在该自定义目录下查找/创建 "91 Spider"。
|
||||
// 上传名走 desiredPikPakName 算出来的方案 B 格式:
|
||||
//
|
||||
// <sanitized title>-<viewkey 后 8 位>.<ext>
|
||||
//
|
||||
// 这样网盘 Web 端列出来的文件名能直接看出是哪个视频,
|
||||
// 又用 viewkey 后 8 位避免同标题撞名。所有目标盘共用同一格式,
|
||||
// 简化前端 / catalog 的认知。
|
||||
parent, err := pp.EnsureDir(ctx, spider91UploadDirName)
|
||||
parent, err := pp.EnsureDir(ctx, plan.uploadDir)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("%s ensure %q dir: %w", pp.Kind(), spider91UploadDirName, err)
|
||||
return false, fmt.Errorf("%s ensure %q dir: %w", pp.Kind(), plan.uploadDir, err)
|
||||
}
|
||||
uploadName := desiredPikPakName(v.Title, extractViewKey(v.ID), v.Ext)
|
||||
uploadName := desiredUploadName(v.Title, sourceIDForUploadName(v, plan), v.Ext)
|
||||
res, err := pp.UploadAndReportHash(ctx, parent, uploadName, f, info.Size())
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("%s upload: %w", pp.Kind(), err)
|
||||
@@ -596,23 +850,67 @@ func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, src *spider
|
||||
}
|
||||
|
||||
// 事务性改写 catalog 行:drive_id / file_id / content_hash
|
||||
if err := m.cfg.Catalog.MigrateVideoToDrive(ctx, v.ID, targetDriveID, res.FileID, res.Hash); err != nil {
|
||||
if err := m.cfg.Catalog.MigrateVideoToDrive(ctx, v.ID, plan.targetDriveID, res.FileID, res.Hash); err != nil {
|
||||
return false, fmt.Errorf("catalog migrate: %w", err)
|
||||
}
|
||||
m.preserveCrawledThumbnail(ctx, src, v)
|
||||
// 同步 catalog 里的 file_name,让下次目标盘扫盘时 (file_name, size) 也能匹配上
|
||||
if err := m.cfg.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{FileName: uploadName}); err != nil {
|
||||
log.Printf("[spider91migrate] %s update file_name after migrate: %v", v.ID, err)
|
||||
log.Printf("[crawlerupload] %s update file_name after migrate: %v", v.ID, err)
|
||||
}
|
||||
|
||||
// 删除本地 mp4 和源 thumb(公共 /p/thumb 副本已在 preserveCrawledThumbnail 中保留)。
|
||||
CleanupSpider91Local(src, v.FileID)
|
||||
CleanupLocal(src, v.FileID)
|
||||
|
||||
log.Printf("[spider91migrate] %s migrated to drive=%s(kind=%s) file=%s name=%q", v.ID, targetDriveID, pp.Kind(), res.FileID, uploadName)
|
||||
log.Printf("[crawlerupload] %s migrated to drive=%s(kind=%s) file=%s name=%q", v.ID, plan.targetDriveID, pp.Kind(), res.FileID, uploadName)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (m *Migrator) preserveCrawledThumbnail(ctx context.Context, src *spider91.Driver, v *catalog.Video) {
|
||||
func (m *Migrator) bindToExistingTarget(ctx context.Context, v, target *catalog.Video, plan migrationPlan) (bool, error) {
|
||||
if v == nil || target == nil || plan.source == nil {
|
||||
return false, nil
|
||||
}
|
||||
if plan.targetDriveID == "" || target.FileID == "" {
|
||||
return false, nil
|
||||
}
|
||||
if err := m.cfg.Catalog.MigrateVideoToDrive(ctx, v.ID, plan.targetDriveID, target.FileID, firstNonEmpty(target.ContentHash, v.ContentHash)); err != nil {
|
||||
return false, fmt.Errorf("catalog bind existing target: %w", err)
|
||||
}
|
||||
if target.FileName != "" {
|
||||
if err := m.cfg.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{FileName: target.FileName}); err != nil {
|
||||
log.Printf("[crawlerupload] %s update file_name after duplicate bind: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
m.preserveCrawledThumbnail(ctx, plan.source, v)
|
||||
CleanupLocal(plan.source, v.FileID)
|
||||
log.Printf("[crawlerupload] %s bound to existing drive=%s(kind=%s) file=%s duplicate=%s", v.ID, plan.targetDriveID, plan.target.Kind(), target.FileID, target.ID)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func firstNonEmpty(values ...string) string {
|
||||
for _, value := range values {
|
||||
if strings.TrimSpace(value) != "" {
|
||||
return value
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func sourceIDForUploadName(v *catalog.Video, plan migrationPlan) string {
|
||||
if v == nil {
|
||||
return ""
|
||||
}
|
||||
prefix := scriptcrawler.Kind + "-" + plan.source.ID() + "-"
|
||||
if strings.HasPrefix(v.ID, prefix) {
|
||||
return strings.TrimPrefix(v.ID, prefix)
|
||||
}
|
||||
if v.FileID != "" {
|
||||
return stripExt(v.FileID)
|
||||
}
|
||||
return extractSourceID(v.ID)
|
||||
}
|
||||
|
||||
func (m *Migrator) preserveCrawledThumbnail(ctx context.Context, src LocalSource, v *catalog.Video) {
|
||||
if m == nil || m.cfg.Catalog == nil || src == nil || v == nil || v.ID == "" || v.FileID == "" {
|
||||
return
|
||||
}
|
||||
@@ -620,38 +918,38 @@ func (m *Migrator) preserveCrawledThumbnail(ctx context.Context, src *spider91.D
|
||||
if commonDir == "" {
|
||||
return
|
||||
}
|
||||
thumbPath, ok := findSpider91ThumbPath(src, v.FileID)
|
||||
thumbPath, ok := findCrawlerThumbPath(src, v.FileID)
|
||||
if !ok {
|
||||
if v.ThumbnailURL == "" {
|
||||
log.Printf("[spider91migrate] %s crawled thumbnail missing before migration cleanup", v.ID)
|
||||
log.Printf("[crawlerupload] %s crawled thumbnail missing before migration cleanup", v.ID)
|
||||
}
|
||||
return
|
||||
}
|
||||
if err := os.MkdirAll(commonDir, 0o755); err != nil {
|
||||
log.Printf("[spider91migrate] %s mkdir common thumbs: %v", v.ID, err)
|
||||
log.Printf("[crawlerupload] %s mkdir common thumbs: %v", v.ID, err)
|
||||
return
|
||||
}
|
||||
dst := mediaasset.ThumbnailPathInDir(commonDir, v.ID)
|
||||
if _, err := os.Stat(dst); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
log.Printf("[spider91migrate] %s stat common thumb: %v", v.ID, err)
|
||||
log.Printf("[crawlerupload] %s stat common thumb: %v", v.ID, err)
|
||||
return
|
||||
}
|
||||
if err := copyFileAtomic(thumbPath, dst); err != nil {
|
||||
log.Printf("[spider91migrate] %s preserve crawled thumbnail: %v", v.ID, err)
|
||||
log.Printf("[crawlerupload] %s preserve crawled thumbnail: %v", v.ID, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
if err := m.cfg.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{
|
||||
ThumbnailURL: "/p/thumb/" + v.ID,
|
||||
}); err != nil {
|
||||
log.Printf("[spider91migrate] %s update crawled thumbnail url: %v", v.ID, err)
|
||||
log.Printf("[crawlerupload] %s update crawled thumbnail url: %v", v.ID, err)
|
||||
return
|
||||
}
|
||||
v.ThumbnailURL = "/p/thumb/" + v.ID
|
||||
}
|
||||
|
||||
func findSpider91ThumbPath(src *spider91.Driver, fileID string) (string, bool) {
|
||||
func findCrawlerThumbPath(src LocalSource, fileID string) (string, bool) {
|
||||
thumbBase := stripExt(fileID)
|
||||
for _, ext := range []string{".jpg", ".jpeg", ".png", ".webp"} {
|
||||
thumbPath, err := src.ThumbPath(thumbBase + ext)
|
||||
@@ -691,20 +989,19 @@ func copyFileAtomic(src, dst string) error {
|
||||
return os.Rename(tmp, dst)
|
||||
}
|
||||
|
||||
// CleanupSpider91Local 删除已迁移视频的本地 mp4 和 thumb。
|
||||
// CleanupLocal 删除已上传视频的本地 mp4 和 thumb。
|
||||
//
|
||||
// thumb 删除是 best-effort —— 找不到就算了(spider91 thumb 文件名带后缀,
|
||||
// 我们不知道具体是 .jpg 还是别的,逐个尝试常见后缀)。
|
||||
// thumb 删除是 best-effort —— 找不到就算了;逐个尝试常见后缀。
|
||||
//
|
||||
// 暴露成包级函数方便 cleanup 模块复用(任务 6)。
|
||||
func CleanupSpider91Local(src *spider91.Driver, fileID string) {
|
||||
// 暴露成包级函数方便 cleanup 模块复用。
|
||||
func CleanupLocal(src LocalSource, fileID string) {
|
||||
videoPath, err := src.VideoPath(fileID)
|
||||
if err == nil {
|
||||
if err := os.Remove(videoPath); err != nil && !os.IsNotExist(err) {
|
||||
log.Printf("[spider91migrate] remove local mp4 %s: %v", videoPath, err)
|
||||
log.Printf("[crawlerupload] remove local mp4 %s: %v", videoPath, err)
|
||||
}
|
||||
}
|
||||
// thumb 文件名是 <viewkey>.<ext>;fileID 是 <viewkey>.<videoExt>,
|
||||
// thumb 文件名是 <sourceID>.<ext>;fileID 是 <sourceID>.<videoExt>,
|
||||
// 不一定相同。尝试用 fileID 去掉视频扩展名后拼 thumb 常见后缀。
|
||||
thumbBase := stripExt(fileID)
|
||||
for _, ext := range []string{".jpg", ".jpeg", ".png", ".webp"} {
|
||||
@@ -721,7 +1018,7 @@ func stripExt(name string) string {
|
||||
return name[:len(name)-len(ext)]
|
||||
}
|
||||
|
||||
// cleanupOldLocalVideos 是防御性兜底:扫 spider91 drive 本地 videos/ 目录,
|
||||
// cleanupOldLocalVideos 是防御性兜底:扫爬虫本地 videos/ 目录,
|
||||
// 删除所有 catalog 中已经迁移到别处(drive_id != src.ID())的本地残留。
|
||||
//
|
||||
// 与 migrateDrive 的区别:
|
||||
@@ -729,12 +1026,16 @@ func stripExt(name string) string {
|
||||
// - 不依赖 KeepLatestN —— 哪怕这个孤儿在"最新 N"窗口内,已迁移就该删
|
||||
// - 只看 catalog 状态,不看 mtime
|
||||
//
|
||||
// 正常路径下 migrateDrive 迁移成功后立刻 CleanupSpider91Local,所以这里
|
||||
// 正常路径下 migrateDrive 迁移成功后立刻 CleanupLocal,所以这里
|
||||
// 应该不会有任何工作。极端情况(手工改 catalog、迁移过程中 crash)才会
|
||||
// 找到孤儿。
|
||||
//
|
||||
// 返回实际删除的文件个数。
|
||||
func (m *Migrator) cleanupOldLocalVideos(ctx context.Context, src *spider91.Driver) (int, error) {
|
||||
func (m *Migrator) cleanupOldLocalVideos(ctx context.Context, plan migrationPlan) (int, error) {
|
||||
src := plan.source
|
||||
if src == nil {
|
||||
return 0, nil
|
||||
}
|
||||
entries, err := os.ReadDir(src.VideosDir())
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
@@ -751,24 +1052,19 @@ func (m *Migrator) cleanupOldLocalVideos(ctx context.Context, src *spider91.Driv
|
||||
if e.IsDir() {
|
||||
continue
|
||||
}
|
||||
viewkey := stripExt(e.Name())
|
||||
videoID := "spider91-" + src.ID() + "-" + viewkey
|
||||
v, err := m.cfg.Catalog.GetVideo(ctx, videoID)
|
||||
if err != nil || v == nil {
|
||||
// 找不到 catalog 行:保险起见保留,等管理员处理
|
||||
v := m.findVideoForLocalFile(ctx, plan, e.Name(), nil)
|
||||
if v == nil {
|
||||
continue
|
||||
}
|
||||
if v.DriveID == src.ID() {
|
||||
// 还没迁移,归 migrateDrive 管,不在这里动
|
||||
continue
|
||||
}
|
||||
// 已迁移到别的 drive 但本地还有 → 删
|
||||
path, perr := src.VideoPath(e.Name())
|
||||
if perr != nil {
|
||||
continue
|
||||
}
|
||||
if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
|
||||
log.Printf("[spider91migrate] cleanup remove %s: %v", path, err)
|
||||
log.Printf("[crawlerupload] cleanup remove %s: %v", path, err)
|
||||
continue
|
||||
}
|
||||
// thumb 一并删(best-effort)
|
||||
@@ -784,44 +1080,3 @@ func (m *Migrator) cleanupOldLocalVideos(ctx context.Context, src *spider91.Driv
|
||||
}
|
||||
return deleted, nil
|
||||
}
|
||||
|
||||
// backfillFileNames 扫描目标 drive(PikPak、115、123 或 OneDrive)下所有 spider91-* 起始 ID 的视频,
|
||||
// 对文件名不是 desiredPikPakName(...) 期望格式的,调 target.Rename 修正,
|
||||
// 并把 catalog.file_name 同步到新名字。
|
||||
//
|
||||
// 幂等:已经是期望格式的视频不会触发任何调用。
|
||||
//
|
||||
// 返回成功改名的条数。
|
||||
func (m *Migrator) backfillFileNames(ctx context.Context, targetDriveID string, pp uploadTarget) (int, error) {
|
||||
videos, err := m.cfg.Catalog.ListVideosByDriveID(ctx, targetDriveID, 10000)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("list videos: %w", err)
|
||||
}
|
||||
renamed := 0
|
||||
for _, v := range videos {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return renamed, err
|
||||
}
|
||||
if !strings.HasPrefix(v.ID, "spider91-") {
|
||||
continue
|
||||
}
|
||||
want := desiredPikPakName(v.Title, extractViewKey(v.ID), v.Ext)
|
||||
if v.FileName == want {
|
||||
continue
|
||||
}
|
||||
if v.FileID == "" {
|
||||
continue
|
||||
}
|
||||
if err := pp.Rename(ctx, v.FileID, want); err != nil {
|
||||
log.Printf("[spider91migrate] rename %s -> %q: %v", v.ID, want, err)
|
||||
continue
|
||||
}
|
||||
if err := m.cfg.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{FileName: want}); err != nil {
|
||||
log.Printf("[spider91migrate] %s update file_name after rename: %v", v.ID, err)
|
||||
// 目标盘已经改名成功,但 catalog 更新失败 —— 下轮会重试。继续。
|
||||
}
|
||||
log.Printf("[spider91migrate] renamed %s on %s: %q -> %q", v.ID, pp.Kind(), v.FileName, want)
|
||||
renamed++
|
||||
}
|
||||
return renamed, nil
|
||||
}
|
||||
@@ -0,0 +1,280 @@
|
||||
package crawlerupload
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
"github.com/video-site/backend/internal/drives/scriptcrawler"
|
||||
)
|
||||
|
||||
type fakeRegistry struct {
|
||||
byID map[string]drives.Drive
|
||||
}
|
||||
|
||||
func newFakeRegistry() *fakeRegistry {
|
||||
return &fakeRegistry{byID: make(map[string]drives.Drive)}
|
||||
}
|
||||
|
||||
func (r *fakeRegistry) Add(d drives.Drive) {
|
||||
r.byID[d.ID()] = d
|
||||
}
|
||||
|
||||
func (r *fakeRegistry) Get(id string) (drives.Drive, bool) {
|
||||
d, ok := r.byID[id]
|
||||
return d, ok
|
||||
}
|
||||
|
||||
func (r *fakeRegistry) All() []drives.Drive {
|
||||
out := make([]drives.Drive, 0, len(r.byID))
|
||||
for _, d := range r.byID {
|
||||
out = append(out, d)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
type fakeUploadDrive struct {
|
||||
id string
|
||||
kind string
|
||||
rootID string
|
||||
mu sync.Mutex
|
||||
uploadCalls int
|
||||
gotBodies map[string][]byte
|
||||
gotParents map[string]string
|
||||
ensureCalls []string
|
||||
}
|
||||
|
||||
func newFakeUploadDrive(id, kind, rootID string) *fakeUploadDrive {
|
||||
return &fakeUploadDrive{
|
||||
id: id,
|
||||
kind: kind,
|
||||
rootID: rootID,
|
||||
gotBodies: make(map[string][]byte),
|
||||
gotParents: make(map[string]string),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *fakeUploadDrive) Kind() string { return d.kind }
|
||||
func (d *fakeUploadDrive) ID() string { return d.id }
|
||||
func (d *fakeUploadDrive) RootID() string {
|
||||
return d.rootID
|
||||
}
|
||||
func (d *fakeUploadDrive) Init(context.Context) error { return nil }
|
||||
func (d *fakeUploadDrive) List(context.Context, string) ([]drives.Entry, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (d *fakeUploadDrive) Stat(context.Context, string) (*drives.Entry, error) {
|
||||
return nil, drives.ErrNotSupported
|
||||
}
|
||||
func (d *fakeUploadDrive) StreamURL(context.Context, string) (*drives.StreamLink, error) {
|
||||
return nil, drives.ErrNotSupported
|
||||
}
|
||||
func (d *fakeUploadDrive) Upload(context.Context, string, string, io.Reader, int64) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
func (d *fakeUploadDrive) EnsureDir(_ context.Context, pathFromRoot string) (string, error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
d.ensureCalls = append(d.ensureCalls, pathFromRoot)
|
||||
return d.rootID + "/" + pathFromRoot, nil
|
||||
}
|
||||
func (d *fakeUploadDrive) Rename(context.Context, string, string) error {
|
||||
return nil
|
||||
}
|
||||
func (d *fakeUploadDrive) UploadAndReportHash(_ context.Context, parentID, name string, r io.Reader, _ int64) (UploadResult, error) {
|
||||
body, _ := io.ReadAll(r)
|
||||
d.mu.Lock()
|
||||
d.uploadCalls++
|
||||
d.gotBodies[name] = body
|
||||
d.gotParents[name] = parentID
|
||||
d.mu.Unlock()
|
||||
return UploadResult{FileID: "remote-" + name, Hash: strings.Repeat("a", 40), Size: int64(len(body))}, nil
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*fakeUploadDrive)(nil)
|
||||
var _ uploadTarget = (*fakeUploadDrive)(nil)
|
||||
|
||||
func TestRunOnceUploadsScriptCrawlerLocalVideo(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat := setupCatalog(t)
|
||||
src := setupScriptCrawler(t, "crawler-one")
|
||||
target := newFakeUploadDrive("target-drive", "pikpak", "target-root")
|
||||
reg := newFakeRegistry()
|
||||
reg.Add(src)
|
||||
reg.Add(target)
|
||||
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: src.ID(),
|
||||
Kind: scriptcrawler.Kind,
|
||||
Name: "Example Crawler",
|
||||
RootID: "/",
|
||||
Credentials: map[string]string{"script_path": "/tmp/example.py", "upload_drive_id": target.ID()},
|
||||
TeaserEnabled: true,
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert crawler drive: %v", err)
|
||||
}
|
||||
|
||||
videoID := writeCrawlerVideo(t, cat, src, "source-001", ".mp4", []byte("video payload"), true)
|
||||
commonThumbDir := filepath.Join(t.TempDir(), "thumbs")
|
||||
m := New(Config{Catalog: cat, Registry: reg, CommonThumbDir: commonThumbDir})
|
||||
|
||||
if err := m.RunOnce(ctx); err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
|
||||
wantName := desiredUploadName("Sample source-001", "source-001", "mp4")
|
||||
if target.uploadCalls != 1 {
|
||||
t.Fatalf("upload calls = %d, want 1", target.uploadCalls)
|
||||
}
|
||||
if got := string(target.gotBodies[wantName]); got != "video payload" {
|
||||
t.Fatalf("uploaded body = %q, want payload", got)
|
||||
}
|
||||
if got := target.gotParents[wantName]; got != "target-root/Script Crawlers/crawler-one" {
|
||||
t.Fatalf("upload parent = %q, want crawler folder", got)
|
||||
}
|
||||
if len(target.ensureCalls) != 1 || target.ensureCalls[0] != "Script Crawlers/crawler-one" {
|
||||
t.Fatalf("ensure calls = %#v, want crawler upload folder", target.ensureCalls)
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(ctx, videoID)
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.DriveID != target.ID() || !strings.HasPrefix(got.FileID, "remote-") {
|
||||
t.Fatalf("catalog target = drive %q file %q, want target drive", got.DriveID, got.FileID)
|
||||
}
|
||||
if got.FileName != wantName {
|
||||
t.Fatalf("file_name = %q, want %q", got.FileName, wantName)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(src.VideosDir(), "source-001.mp4")); !os.IsNotExist(err) {
|
||||
t.Fatalf("local video still exists or stat failed: %v", err)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(src.ThumbsDir(), "source-001.jpg")); !os.IsNotExist(err) {
|
||||
t.Fatalf("local thumb still exists or stat failed: %v", err)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(commonThumbDir, videoID+".jpg")); err != nil {
|
||||
t.Fatalf("common thumbnail missing: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunOnceRequiresPerCrawlerUploadTarget(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat := setupCatalog(t)
|
||||
src := setupScriptCrawler(t, "crawler-local-only")
|
||||
target := newFakeUploadDrive("target-drive", "pikpak", "target-root")
|
||||
reg := newFakeRegistry()
|
||||
reg.Add(src)
|
||||
reg.Add(target)
|
||||
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: src.ID(),
|
||||
Kind: scriptcrawler.Kind,
|
||||
Name: "Local Only",
|
||||
RootID: "/",
|
||||
Credentials: map[string]string{"script_path": "/tmp/example.py"},
|
||||
TeaserEnabled: true,
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert crawler drive: %v", err)
|
||||
}
|
||||
videoID := writeCrawlerVideo(t, cat, src, "source-002", ".mp4", []byte("video payload"), true)
|
||||
|
||||
m := New(Config{Catalog: cat, Registry: reg})
|
||||
if err := m.RunOnce(ctx); err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if target.uploadCalls != 0 {
|
||||
t.Fatalf("upload calls = %d, want 0", target.uploadCalls)
|
||||
}
|
||||
got, err := cat.GetVideo(ctx, videoID)
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.DriveID != src.ID() {
|
||||
t.Fatalf("drive_id = %q, want local crawler drive", got.DriveID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdaptUploadTargetRejectsUnsupportedTarget(t *testing.T) {
|
||||
src := scriptcrawler.New(scriptcrawler.Config{ID: "crawler", RootDir: t.TempDir()})
|
||||
_, err := adaptUploadTarget(src)
|
||||
if err == nil || !strings.Contains(err.Error(), "does not support crawler upload") {
|
||||
t.Fatalf("err = %v, want unsupported crawler upload target", err)
|
||||
}
|
||||
}
|
||||
|
||||
func setupCatalog(t *testing.T) *catalog.Catalog {
|
||||
t.Helper()
|
||||
cat, err := catalog.Open(filepath.Join(t.TempDir(), "video-site.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
return cat
|
||||
}
|
||||
|
||||
func setupScriptCrawler(t *testing.T, id string) *scriptcrawler.Driver {
|
||||
t.Helper()
|
||||
d := scriptcrawler.New(scriptcrawler.Config{ID: id, RootDir: t.TempDir()})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("scriptcrawler init: %v", err)
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func writeCrawlerVideo(t *testing.T, cat *catalog.Catalog, d *scriptcrawler.Driver, sourceID, ext string, content []byte, readyAssets bool) string {
|
||||
t.Helper()
|
||||
ctx := context.Background()
|
||||
fileID := sourceID + ext
|
||||
videoPath, err := d.VideoPath(fileID)
|
||||
if err != nil {
|
||||
t.Fatalf("video path: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(videoPath, content, 0o644); err != nil {
|
||||
t.Fatalf("write video: %v", err)
|
||||
}
|
||||
thumbPath, err := d.ThumbPath(sourceID + ".jpg")
|
||||
if err != nil {
|
||||
t.Fatalf("thumb path: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(thumbPath, []byte("thumb"), 0o644); err != nil {
|
||||
t.Fatalf("write thumb: %v", err)
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
videoID := scriptcrawler.BuildVideoID(d.ID(), sourceID)
|
||||
previewStatus := "pending"
|
||||
fingerprintStatus := "pending"
|
||||
sampled := ""
|
||||
if readyAssets {
|
||||
previewStatus = "ready"
|
||||
fingerprintStatus = "ready"
|
||||
sampled = strings.Repeat("b", 64)
|
||||
}
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: videoID,
|
||||
DriveID: d.ID(),
|
||||
FileID: fileID,
|
||||
FileName: fileID,
|
||||
Title: "Sample " + sourceID,
|
||||
Author: "tester",
|
||||
Ext: strings.TrimPrefix(ext, "."),
|
||||
Quality: "HD",
|
||||
Size: int64(len(content)),
|
||||
PreviewStatus: previewStatus,
|
||||
FingerprintStatus: fingerprintStatus,
|
||||
SampledSHA256: sampled,
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert video: %v", err)
|
||||
}
|
||||
return videoID
|
||||
}
|
||||
+21
-21
@@ -1,13 +1,13 @@
|
||||
package spider91migrate
|
||||
package crawlerupload
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// 期望的 PikPak 文件名格式(方案 B):
|
||||
// 期望的上传文件名格式:
|
||||
//
|
||||
// <sanitized-title>-<viewkey-后8位>.<ext>
|
||||
// <sanitized-title>-<sourceID-后8位>.<ext>
|
||||
//
|
||||
// 例如:
|
||||
//
|
||||
@@ -15,8 +15,8 @@ import (
|
||||
//
|
||||
// 设计目标:
|
||||
// - 文件名一眼能看出视频内容(用 catalog 里的 title)
|
||||
// - 后缀的 viewkey 8 字符保证同标题不会撞名
|
||||
// - 全部字符在常见文件系统、PikPak、HTTP/Aliyun OSS Key 编码里都安全
|
||||
// - 后缀的 sourceID 8 字符保证同标题不会撞名
|
||||
// - 全部字符在常见文件系统、网盘 API、HTTP/Aliyun OSS Key 编码里都安全
|
||||
//
|
||||
// 字符清洗规则(sanitizeTitle):
|
||||
// - 去除控制字符(< 0x20 或 0x7F)
|
||||
@@ -85,47 +85,47 @@ func truncateRunes(s string, maxRunes int) string {
|
||||
return s
|
||||
}
|
||||
|
||||
// extractViewKey 从 video.ID("spider91-<driveID>-<viewkey>")里
|
||||
// 取出最后一段 viewkey。
|
||||
// extractSourceID 从 video.ID("<kind>-<driveID>-<sourceID>")里
|
||||
// 取出最后一段 sourceID。
|
||||
//
|
||||
// driveID 中如果有 "-" 不影响(用 LastIndex),viewkey 本身(91 网站的
|
||||
// view 标识)目前都是纯 hex 或纯数字,不包含 "-"。
|
||||
func extractViewKey(videoID string) string {
|
||||
// driveID 中如果有 "-" 不影响(用 LastIndex)。爬虫脚本应提供不包含 "-"
|
||||
// 的稳定 source_id;如果包含 "-",这里会取最后一段作为文件名后缀。
|
||||
func extractSourceID(videoID string) string {
|
||||
if i := strings.LastIndex(videoID, "-"); i >= 0 {
|
||||
return videoID[i+1:]
|
||||
}
|
||||
return videoID
|
||||
}
|
||||
|
||||
// viewKeySuffix 取 viewkey 的最后 N 个字符;不足 N 返回原字符串。
|
||||
// sourceIDSuffix 取 sourceID 的最后 N 个字符;不足 N 返回原字符串。
|
||||
//
|
||||
// 默认 N=8(足够稀疏避免标题撞名时的同名冲突)。
|
||||
const viewKeySuffixLen = 8
|
||||
const sourceIDSuffixLen = 8
|
||||
|
||||
func viewKeySuffix(viewkey string) string {
|
||||
r := []rune(viewkey)
|
||||
if len(r) <= viewKeySuffixLen {
|
||||
func sourceIDSuffix(sourceID string) string {
|
||||
r := []rune(sourceID)
|
||||
if len(r) <= sourceIDSuffixLen {
|
||||
return string(r)
|
||||
}
|
||||
return string(r[len(r)-viewKeySuffixLen:])
|
||||
return string(r[len(r)-sourceIDSuffixLen:])
|
||||
}
|
||||
|
||||
// desiredPikPakName 构造 spider91 视频在 PikPak 上的期望文件名。
|
||||
// desiredUploadName 构造爬虫视频上传到目标网盘时的期望文件名。
|
||||
//
|
||||
// desiredPikPakName("超白大奶律师约炮", "476fa8bf4b47e672d2fa", "mp4")
|
||||
// desiredUploadName("超白大奶律师约炮", "476fa8bf4b47e672d2fa", "mp4")
|
||||
// → "超白大奶律师约炮-72d2fa.mp4" // 实际是 e672d2fa(取最后 8)
|
||||
//
|
||||
// ext 不带前导点;空时默认 mp4。
|
||||
func desiredPikPakName(title, viewkey, ext string) string {
|
||||
func desiredUploadName(title, sourceID, ext string) string {
|
||||
clean := sanitizeTitle(title)
|
||||
suffix := viewKeySuffix(strings.TrimSpace(viewkey))
|
||||
suffix := sourceIDSuffix(strings.TrimSpace(sourceID))
|
||||
ext = strings.TrimSpace(ext)
|
||||
ext = strings.TrimPrefix(ext, ".")
|
||||
if ext == "" {
|
||||
ext = "mp4"
|
||||
}
|
||||
if suffix == "" {
|
||||
// viewkey 缺失时退化成 "<title>.<ext>"
|
||||
// sourceID 缺失时退化成 "<title>.<ext>"
|
||||
return clean + "." + ext
|
||||
}
|
||||
return clean + "-" + suffix + "." + ext
|
||||
+18
-18
@@ -1,4 +1,4 @@
|
||||
package spider91migrate
|
||||
package crawlerupload
|
||||
|
||||
import (
|
||||
"strings"
|
||||
@@ -13,11 +13,11 @@ func TestSanitizeTitleHandlesCommonCases(t *testing.T) {
|
||||
{"hello", "hello"},
|
||||
{" hello ", "hello"},
|
||||
{"hello\nworld", "hello world"},
|
||||
{"hello / world", "hello world"}, // 单 forbidden 折叠成空格
|
||||
{"hello / world", "hello world"}, // 单 forbidden 折叠成空格
|
||||
{"a/b\\c:d*e?f\"g<h>i|j", "a b c d e f g h i j"},
|
||||
{"a b", "a b"}, // 多空格折叠
|
||||
{"a b", "a b"}, // 多空格折叠
|
||||
{"a\t\nb", "a b"},
|
||||
{"...trim.dots...", "trim.dots"}, // 首尾点号被 trim 掉
|
||||
{"...trim.dots...", "trim.dots"}, // 首尾点号被 trim 掉
|
||||
{"control\x01char\x1f\x7f", "controlchar"}, // 控制字符直接丢弃
|
||||
{"", "video"}, // 空串回退
|
||||
{" / ", "video"}, // 全是 forbidden+空白 → 回退
|
||||
@@ -51,22 +51,22 @@ func TestSanitizeTitleKeepsCJKAndUnicode(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractViewKey(t *testing.T) {
|
||||
func TestExtractSourceID(t *testing.T) {
|
||||
cases := []struct{ in, want string }{
|
||||
{"spider91-91Spider-476fa8bf4b47e672d2fa", "476fa8bf4b47e672d2fa"},
|
||||
{"spider91-91Spider-1587338723", "1587338723"},
|
||||
{"spider91-some-drive-with-dashes-vk001", "vk001"}, // LastIndex 拿尾段
|
||||
{"scriptcrawler-demo-476fa8bf4b47e672d2fa", "476fa8bf4b47e672d2fa"},
|
||||
{"scriptcrawler-demo-1587338723", "1587338723"},
|
||||
{"scriptcrawler-some-drive-with-dashes-vk001", "vk001"}, // LastIndex 拿尾段
|
||||
{"no-dashes-after-prefix", "prefix"},
|
||||
{"single", "single"}, // 没 dash → 原样返回
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := extractViewKey(c.in); got != c.want {
|
||||
t.Errorf("extractViewKey(%q) = %q, want %q", c.in, got, c.want)
|
||||
if got := extractSourceID(c.in); got != c.want {
|
||||
t.Errorf("extractSourceID(%q) = %q, want %q", c.in, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestViewKeySuffix(t *testing.T) {
|
||||
func TestSourceIDSuffix(t *testing.T) {
|
||||
cases := []struct{ in, want string }{
|
||||
{"476fa8bf4b47e672d2fa", "e672d2fa"},
|
||||
{"1587338723", "87338723"},
|
||||
@@ -76,15 +76,15 @@ func TestViewKeySuffix(t *testing.T) {
|
||||
{"123456789", "23456789"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := viewKeySuffix(c.in); got != c.want {
|
||||
t.Errorf("viewKeySuffix(%q) = %q, want %q", c.in, got, c.want)
|
||||
if got := sourceIDSuffix(c.in); got != c.want {
|
||||
t.Errorf("sourceIDSuffix(%q) = %q, want %q", c.in, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDesiredPikPakName(t *testing.T) {
|
||||
func TestDesiredUploadName(t *testing.T) {
|
||||
cases := []struct {
|
||||
title, viewkey, ext, want string
|
||||
title, sourceID, ext, want string
|
||||
}{
|
||||
{
|
||||
"超白大奶律师约炮第一季",
|
||||
@@ -112,7 +112,7 @@ func TestDesiredPikPakName(t *testing.T) {
|
||||
},
|
||||
{
|
||||
"title",
|
||||
"", // 空 viewkey → 退化成 "<title>.<ext>"
|
||||
"", // 空 sourceID → 退化成 "<title>.<ext>"
|
||||
"webm",
|
||||
"title.webm",
|
||||
},
|
||||
@@ -130,9 +130,9 @@ func TestDesiredPikPakName(t *testing.T) {
|
||||
},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got := desiredPikPakName(c.title, c.viewkey, c.ext)
|
||||
got := desiredUploadName(c.title, c.sourceID, c.ext)
|
||||
if got != c.want {
|
||||
t.Errorf("desiredPikPakName(%q,%q,%q) = %q, want %q", c.title, c.viewkey, c.ext, got, c.want)
|
||||
t.Errorf("desiredUploadName(%q,%q,%q) = %q, want %q", c.title, c.sourceID, c.ext, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,17 @@
|
||||
package googledrive
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"log"
|
||||
"math"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
@@ -21,10 +28,13 @@ import (
|
||||
const (
|
||||
Kind = "googledrive"
|
||||
defaultAPIBaseURL = "https://www.googleapis.com/drive/v3"
|
||||
defaultUploadAPIURL = "https://www.googleapis.com/upload/drive/v3"
|
||||
defaultOAuthURL = "https://www.googleapis.com/oauth2/v4/token"
|
||||
defaultRenewAPIURL = "https://api.oplist.org/googleui/renewapi"
|
||||
defaultListInterval = 1 * time.Second
|
||||
defaultListCooldown = 5 * time.Minute
|
||||
defaultLinkCooldown = 5 * time.Minute
|
||||
uploadChunkSize = int64(8 * 1024 * 1024)
|
||||
|
||||
filesListFields = "files(id,name,mimeType,size,modifiedTime,createdTime,thumbnailLink,shortcutDetails,md5Checksum,sha1Checksum,sha256Checksum),nextPageToken"
|
||||
fileInfoFields = "id,name,mimeType,size,modifiedTime,createdTime,thumbnailLink,shortcutDetails,md5Checksum,sha1Checksum,sha256Checksum"
|
||||
@@ -41,13 +51,19 @@ type Driver struct {
|
||||
renewAPIURL string
|
||||
oauthURL string
|
||||
apiBaseURL string
|
||||
uploadBaseURL string
|
||||
client *resty.Client
|
||||
httpClient *http.Client
|
||||
onTokenUpdate func(access, refresh string)
|
||||
|
||||
listMu sync.Mutex
|
||||
lastListAt time.Time
|
||||
listInterval time.Duration
|
||||
listCooldown time.Duration
|
||||
|
||||
linkCooldownMu sync.Mutex
|
||||
linkCooldownUntil time.Time
|
||||
linkCooldownDuration time.Duration
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
@@ -61,6 +77,7 @@ type Config struct {
|
||||
RenewAPIURL string
|
||||
OAuthURL string
|
||||
APIBaseURL string
|
||||
UploadAPIURL string
|
||||
|
||||
OnTokenUpdate func(access, refresh string)
|
||||
}
|
||||
@@ -82,6 +99,10 @@ func New(c Config) *Driver {
|
||||
if apiBaseURL == "" {
|
||||
apiBaseURL = defaultAPIBaseURL
|
||||
}
|
||||
uploadBaseURL := strings.TrimRight(strings.TrimSpace(c.UploadAPIURL), "/")
|
||||
if uploadBaseURL == "" {
|
||||
uploadBaseURL = deriveUploadBaseURL(apiBaseURL)
|
||||
}
|
||||
return &Driver{
|
||||
id: c.ID,
|
||||
rootID: rootID,
|
||||
@@ -93,15 +114,34 @@ func New(c Config) *Driver {
|
||||
renewAPIURL: renewAPIURL,
|
||||
oauthURL: oauthURL,
|
||||
apiBaseURL: apiBaseURL,
|
||||
uploadBaseURL: uploadBaseURL,
|
||||
onTokenUpdate: c.OnTokenUpdate,
|
||||
client: resty.New().
|
||||
SetTimeout(30*time.Second).
|
||||
SetHeader("Accept", "application/json, text/plain, */*"),
|
||||
listInterval: defaultListInterval,
|
||||
listCooldown: defaultListCooldown,
|
||||
httpClient: &http.Client{
|
||||
Timeout: 0,
|
||||
CheckRedirect: func(*http.Request, []*http.Request) error {
|
||||
return http.ErrUseLastResponse
|
||||
},
|
||||
},
|
||||
listInterval: defaultListInterval,
|
||||
listCooldown: defaultListCooldown,
|
||||
linkCooldownDuration: defaultLinkCooldown,
|
||||
}
|
||||
}
|
||||
|
||||
func deriveUploadBaseURL(apiBaseURL string) string {
|
||||
apiBaseURL = strings.TrimRight(strings.TrimSpace(apiBaseURL), "/")
|
||||
if apiBaseURL == "" || apiBaseURL == defaultAPIBaseURL {
|
||||
return defaultUploadAPIURL
|
||||
}
|
||||
if strings.HasSuffix(apiBaseURL, "/drive/v3") {
|
||||
return strings.TrimSuffix(apiBaseURL, "/drive/v3") + "/upload/drive/v3"
|
||||
}
|
||||
return apiBaseURL
|
||||
}
|
||||
|
||||
func (d *Driver) Kind() string { return Kind }
|
||||
func (d *Driver) ID() string { return d.id }
|
||||
func (d *Driver) RootID() string { return d.rootID }
|
||||
@@ -209,8 +249,19 @@ func (d *Driver) StreamURL(ctx context.Context, fileID string) (*drives.StreamLi
|
||||
if fileID == "" {
|
||||
return nil, errors.New("googledrive stream: empty file id")
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := d.linkCooldownError(time.Now()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, err := d.Stat(ctx, fileID); err != nil {
|
||||
return nil, fmt.Errorf("googledrive stream: %w", err)
|
||||
err = fmt.Errorf("googledrive stream: %w", err)
|
||||
if wait, ok := drives.RateLimitRetryAfter(err); ok {
|
||||
until := d.pauseLinkCooldown(wait)
|
||||
log.Printf("[googledrive] stream link cooling down drive=%s until=%s err=%v", d.id, until.Format(time.RFC3339), err)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
u := d.fileURL(fileID) + "?alt=media&acknowledgeAbuse=true&supportsAllDrives=true"
|
||||
return &drives.StreamLink{
|
||||
@@ -222,12 +273,396 @@ func (d *Driver) StreamURL(ctx context.Context, fileID string) (*drives.StreamLi
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (d *Driver) Upload(context.Context, string, string, io.Reader, int64) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
func (d *Driver) linkCooldownError(now time.Time) error {
|
||||
d.linkCooldownMu.Lock()
|
||||
defer d.linkCooldownMu.Unlock()
|
||||
if d.linkCooldownUntil.IsZero() {
|
||||
return nil
|
||||
}
|
||||
if !now.Before(d.linkCooldownUntil) {
|
||||
d.linkCooldownUntil = time.Time{}
|
||||
return nil
|
||||
}
|
||||
wait := d.linkCooldownUntil.Sub(now)
|
||||
if wait <= 0 {
|
||||
return nil
|
||||
}
|
||||
return &drives.RateLimitError{
|
||||
Provider: Kind,
|
||||
RetryAfter: wait,
|
||||
Err: fmt.Errorf("googledrive stream link cooling down until %s", d.linkCooldownUntil.Format(time.RFC3339)),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Driver) EnsureDir(context.Context, string) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
func (d *Driver) pauseLinkCooldown(wait time.Duration) time.Time {
|
||||
if wait <= 0 {
|
||||
wait = d.linkCooldownDuration
|
||||
}
|
||||
if wait <= 0 {
|
||||
wait = defaultLinkCooldown
|
||||
}
|
||||
until := time.Now().Add(wait)
|
||||
d.linkCooldownMu.Lock()
|
||||
if until.After(d.linkCooldownUntil) {
|
||||
d.linkCooldownUntil = until
|
||||
} else {
|
||||
until = d.linkCooldownUntil
|
||||
}
|
||||
d.linkCooldownMu.Unlock()
|
||||
return until
|
||||
}
|
||||
|
||||
func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader, size int64) (string, error) {
|
||||
res, err := d.UploadAndReportHash(ctx, parentID, name, r, size)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return res.FileID, nil
|
||||
}
|
||||
|
||||
func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string, r io.Reader, size int64) (UploadResult, error) {
|
||||
parentID, name, err := d.normalizeUploadArgs(parentID, name, r, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
sessionURL, err := d.createUploadSession(ctx, parentID, name, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
if strings.TrimSpace(sessionURL) == "" {
|
||||
return UploadResult{}, errors.New("googledrive upload session: empty upload url")
|
||||
}
|
||||
|
||||
hasher := md5.New()
|
||||
var item driveFile
|
||||
var copied int64
|
||||
if size == 0 {
|
||||
completed, err := d.putUploadSessionChunkWithRetry(ctx, sessionURL, 0, 0, nil, hasher)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
if completed != nil {
|
||||
item = *completed
|
||||
}
|
||||
} else {
|
||||
chunkSize := uploadChunkSize
|
||||
if chunkSize <= 0 {
|
||||
chunkSize = 8 * 1024 * 1024
|
||||
}
|
||||
if chunkSize > int64(math.MaxInt32) {
|
||||
chunkSize = int64(math.MaxInt32)
|
||||
}
|
||||
buf := make([]byte, int(chunkSize))
|
||||
for copied < size {
|
||||
partSize := minInt64(chunkSize, size-copied)
|
||||
chunk := buf[:int(partSize)]
|
||||
n, err := io.ReadFull(r, chunk)
|
||||
if err != nil {
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
return UploadResult{}, fmt.Errorf("googledrive upload: size mismatch: declared %d, copied %d", size, copied+int64(n))
|
||||
}
|
||||
return UploadResult{}, fmt.Errorf("googledrive upload: read body: %w", err)
|
||||
}
|
||||
chunk = chunk[:n]
|
||||
completed, err := d.putUploadSessionChunkWithRetry(ctx, sessionURL, copied, size, chunk, hasher)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
if completed != nil {
|
||||
item = *completed
|
||||
}
|
||||
copied += int64(n)
|
||||
}
|
||||
}
|
||||
|
||||
hashHex := hex.EncodeToString(hasher.Sum(nil))
|
||||
if item.ID == "" {
|
||||
fileID, err := d.findUploadedFileID(ctx, parentID, name, hashHex)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
item.ID = fileID
|
||||
}
|
||||
return UploadResult{FileID: item.ID, Hash: hashHex, Size: copied}, nil
|
||||
}
|
||||
|
||||
func (d *Driver) normalizeUploadArgs(parentID, name string, r io.Reader, size int64) (string, string, error) {
|
||||
if r == nil {
|
||||
return "", "", errors.New("googledrive upload: body is required")
|
||||
}
|
||||
if size < 0 {
|
||||
return "", "", fmt.Errorf("googledrive upload: invalid size %d", size)
|
||||
}
|
||||
parentID = strings.TrimSpace(parentID)
|
||||
if parentID == "" || parentID == "/" {
|
||||
parentID = d.rootID
|
||||
}
|
||||
name = strings.TrimSpace(name)
|
||||
if name == "" {
|
||||
return "", "", errors.New("googledrive upload: empty file name")
|
||||
}
|
||||
return parentID, name, nil
|
||||
}
|
||||
|
||||
func (d *Driver) createUploadSession(ctx context.Context, parentID, name string, size int64) (string, error) {
|
||||
return d.createUploadSessionOnce(ctx, parentID, name, size, true)
|
||||
}
|
||||
|
||||
func (d *Driver) createUploadSessionOnce(ctx context.Context, parentID, name string, size int64, retry bool) (string, error) {
|
||||
var apiErr apiErrorResp
|
||||
res, err := d.client.R().
|
||||
SetContext(ctx).
|
||||
SetHeader("Authorization", "Bearer "+d.accessToken).
|
||||
SetHeader("X-Upload-Content-Type", mimeType(driveFile{Name: name})).
|
||||
SetHeader("X-Upload-Content-Length", strconv.FormatInt(size, 10)).
|
||||
SetQueryParams(map[string]string{
|
||||
"uploadType": "resumable",
|
||||
"supportsAllDrives": "true",
|
||||
"fields": fileInfoFields,
|
||||
}).
|
||||
SetBody(map[string]any{
|
||||
"name": name,
|
||||
"parents": []string{parentID},
|
||||
}).
|
||||
SetError(&apiErr).
|
||||
Post(d.uploadFilesURL())
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("googledrive upload session: %w", err)
|
||||
}
|
||||
if isGoogleRateLimit(res, apiErr.Error) {
|
||||
return "", googleRateLimitError(res, apiErr.Error.Message)
|
||||
}
|
||||
if apiErr.Error.Code != 0 {
|
||||
if apiErr.Error.Code == http.StatusUnauthorized && retry {
|
||||
if err := d.refresh(ctx); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return d.createUploadSessionOnce(ctx, parentID, name, size, false)
|
||||
}
|
||||
return "", googleAPIError(apiErr.Error)
|
||||
}
|
||||
if res.IsError() {
|
||||
return "", fmt.Errorf("googledrive upload session: status=%d body=%s", res.StatusCode(), strings.TrimSpace(res.String()))
|
||||
}
|
||||
return strings.TrimSpace(res.Header().Get("Location")), nil
|
||||
}
|
||||
|
||||
func (d *Driver) putUploadSessionChunkWithRetry(ctx context.Context, uploadURL string, start, total int64, data []byte, hasher hash.Hash) (*driveFile, error) {
|
||||
var last error
|
||||
for attempt := 0; attempt < 3; attempt++ {
|
||||
if attempt > 0 {
|
||||
if err := sleepContext(ctx, time.Duration(attempt)*time.Second); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
item, retryable, err := d.putUploadSessionChunk(ctx, uploadURL, start, total, data)
|
||||
if err == nil {
|
||||
if hasher != nil && len(data) > 0 {
|
||||
_, _ = hasher.Write(data)
|
||||
}
|
||||
return item, nil
|
||||
}
|
||||
last = err
|
||||
if !retryable {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if last == nil {
|
||||
last = errors.New("googledrive upload session: retry attempts exhausted")
|
||||
}
|
||||
return nil, last
|
||||
}
|
||||
|
||||
func (d *Driver) putUploadSessionChunk(ctx context.Context, uploadURL string, start, total int64, data []byte) (*driveFile, bool, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPut, uploadURL, bytes.NewReader(data))
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
req.ContentLength = int64(len(data))
|
||||
req.Header.Set("Authorization", "Bearer "+d.accessToken)
|
||||
req.Header.Set("Content-Length", strconv.Itoa(len(data)))
|
||||
if total == 0 {
|
||||
req.Header.Set("Content-Range", "bytes */0")
|
||||
} else {
|
||||
end := start + int64(len(data)) - 1
|
||||
req.Header.Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", start, end, total))
|
||||
}
|
||||
hc := d.httpClient
|
||||
if hc == nil {
|
||||
hc = http.DefaultClient
|
||||
}
|
||||
res, err := hc.Do(req)
|
||||
if err != nil {
|
||||
return nil, true, fmt.Errorf("googledrive upload session: put chunk: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
switch res.StatusCode {
|
||||
case http.StatusOK, http.StatusCreated:
|
||||
var item driveFile
|
||||
if err := json.NewDecoder(res.Body).Decode(&item); err != nil {
|
||||
return nil, false, fmt.Errorf("googledrive upload session: decode completed file: %w", err)
|
||||
}
|
||||
return &item, false, nil
|
||||
case http.StatusPermanentRedirect:
|
||||
return nil, false, nil
|
||||
case http.StatusUnauthorized:
|
||||
if err := d.refresh(ctx); err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
return nil, true, fmt.Errorf("googledrive upload session: unauthorized")
|
||||
default:
|
||||
body, _ := io.ReadAll(io.LimitReader(res.Body, 64*1024))
|
||||
var apiErr apiErrorResp
|
||||
_ = json.Unmarshal(body, &apiErr)
|
||||
if isGoogleUploadHTTPRateLimit(res.StatusCode, res.Header, body, apiErr.Error) {
|
||||
return nil, false, googleUploadRateLimitError(res.StatusCode, res.Header, body, apiErr.Error.Message)
|
||||
}
|
||||
retryable := res.StatusCode == http.StatusTooManyRequests || (res.StatusCode >= 500 && res.StatusCode <= 504)
|
||||
return nil, retryable, fmt.Errorf("googledrive upload session: status=%d body=%s", res.StatusCode, strings.TrimSpace(string(body)))
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Driver) EnsureDir(ctx context.Context, pathFromRoot string) (string, error) {
|
||||
currentID := d.rootID
|
||||
for _, name := range splitPath(pathFromRoot) {
|
||||
childID, err := d.findChildDir(ctx, currentID, name)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if childID == "" {
|
||||
childID, err = d.makeDir(ctx, currentID, name)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
currentID = childID
|
||||
}
|
||||
return currentID, nil
|
||||
}
|
||||
|
||||
func (d *Driver) findChildDir(ctx context.Context, parentID, name string) (string, error) {
|
||||
entries, err := d.List(ctx, parentID)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
for _, e := range entries {
|
||||
if e.IsDir && e.Name == name {
|
||||
return e.ID, nil
|
||||
}
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func (d *Driver) makeDir(ctx context.Context, parentID, name string) (string, error) {
|
||||
var item driveFile
|
||||
err := d.request(ctx, d.filesURL(), http.MethodPost, func(req *resty.Request) {
|
||||
req.SetQueryParam("fields", fileInfoFields)
|
||||
req.SetBody(map[string]any{
|
||||
"name": name,
|
||||
"parents": []string{parentID},
|
||||
"mimeType": "application/vnd.google-apps.folder",
|
||||
})
|
||||
}, &item)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("googledrive mkdir %s: %w", name, err)
|
||||
}
|
||||
if item.ID == "" {
|
||||
return "", fmt.Errorf("googledrive mkdir %s: empty file id", name)
|
||||
}
|
||||
return item.ID, nil
|
||||
}
|
||||
|
||||
func (d *Driver) Rename(ctx context.Context, fileID, newName string) error {
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return errors.New("googledrive rename: empty file id")
|
||||
}
|
||||
newName = strings.TrimSpace(newName)
|
||||
if newName == "" {
|
||||
return errors.New("googledrive rename: empty new name")
|
||||
}
|
||||
var item driveFile
|
||||
err := d.request(ctx, d.fileURL(fileID), http.MethodPatch, func(req *resty.Request) {
|
||||
req.SetQueryParam("fields", fileInfoFields)
|
||||
req.SetBody(map[string]string{"name": newName})
|
||||
}, &item)
|
||||
if err != nil {
|
||||
return fmt.Errorf("googledrive rename: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return errors.New("googledrive remove: empty file id")
|
||||
}
|
||||
if err := d.request(ctx, d.fileURL(fileID), http.MethodDelete, nil, nil); err != nil {
|
||||
return fmt.Errorf("googledrive remove: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) findUploadedFileID(ctx context.Context, parentID, name, md5Hex string) (string, error) {
|
||||
entries, err := d.List(ctx, parentID)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("googledrive upload verify: %w", err)
|
||||
}
|
||||
var hashHit string
|
||||
for _, e := range entries {
|
||||
if e.IsDir {
|
||||
continue
|
||||
}
|
||||
if !strings.EqualFold(e.Hash, md5Hex) {
|
||||
continue
|
||||
}
|
||||
if e.Name == name {
|
||||
return e.ID, nil
|
||||
}
|
||||
if hashHit == "" {
|
||||
hashHit = e.ID
|
||||
}
|
||||
}
|
||||
if hashHit != "" {
|
||||
return hashHit, nil
|
||||
}
|
||||
for _, e := range entries {
|
||||
if !e.IsDir && e.Name == name {
|
||||
return e.ID, nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("googledrive upload: uploaded file %q not found in parent %q", name, parentID)
|
||||
}
|
||||
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
|
||||
func isGoogleUploadHTTPRateLimit(status int, header http.Header, body []byte, apiErr apiErrorBody) bool {
|
||||
if status == http.StatusTooManyRequests {
|
||||
return true
|
||||
}
|
||||
if status == http.StatusForbidden && strings.TrimSpace(header.Get("Retry-After")) != "" {
|
||||
return true
|
||||
}
|
||||
if isGoogleRateLimit(nil, apiErr) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func googleUploadRateLimitError(status int, header http.Header, body []byte, message string) error {
|
||||
if strings.TrimSpace(message) == "" {
|
||||
message = "google drive upload rate limited"
|
||||
}
|
||||
bodyText := strings.TrimSpace(string(body))
|
||||
if bodyText != "" {
|
||||
message = fmt.Sprintf("%s: status=%d body=%s", message, status, bodyText)
|
||||
}
|
||||
return &drives.RateLimitError{
|
||||
Provider: Kind,
|
||||
RetryAfter: parseRetryAfterHeader(header.Get("Retry-After")),
|
||||
Err: errors.New(message),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Driver) refresh(ctx context.Context) error {
|
||||
@@ -288,6 +723,26 @@ func (d *Driver) applyToken(out tokenResp) {
|
||||
}
|
||||
|
||||
func tokenResponseError(prefix string, res *resty.Response, out tokenResp, requireRefresh bool) error {
|
||||
if isGoogleTokenRateLimit(res, out) {
|
||||
message := strings.TrimSpace(out.Text)
|
||||
if message == "" {
|
||||
message = strings.TrimSpace(out.ErrorDescription)
|
||||
}
|
||||
if message == "" {
|
||||
message = strings.TrimSpace(out.Error)
|
||||
}
|
||||
if message == "" {
|
||||
message = "google drive token refresh rate limited"
|
||||
}
|
||||
if res != nil && strings.TrimSpace(res.String()) != "" {
|
||||
message = fmt.Sprintf("%s: status=%d body=%s", message, res.StatusCode(), strings.TrimSpace(res.String()))
|
||||
}
|
||||
return &drives.RateLimitError{
|
||||
Provider: Kind,
|
||||
RetryAfter: parseRetryAfter(res),
|
||||
Err: fmt.Errorf("%s: %s", prefix, message),
|
||||
}
|
||||
}
|
||||
if out.Text != "" {
|
||||
return fmt.Errorf("%s: %s", prefix, out.Text)
|
||||
}
|
||||
@@ -380,6 +835,10 @@ func (d *Driver) filesURL() string {
|
||||
return d.apiBaseURL + "/files"
|
||||
}
|
||||
|
||||
func (d *Driver) uploadFilesURL() string {
|
||||
return d.uploadBaseURL + "/files"
|
||||
}
|
||||
|
||||
func (d *Driver) fileURL(fileID string) string {
|
||||
return d.filesURL() + "/" + url.PathEscape(fileID)
|
||||
}
|
||||
@@ -444,18 +903,58 @@ func isGoogleRateLimit(res *resty.Response, body apiErrorBody) bool {
|
||||
if res != nil && res.StatusCode() == http.StatusTooManyRequests {
|
||||
return true
|
||||
}
|
||||
if res != nil && res.StatusCode() == http.StatusForbidden && strings.TrimSpace(res.Header().Get("Retry-After")) != "" {
|
||||
return true
|
||||
}
|
||||
if body.Code == http.StatusTooManyRequests {
|
||||
return true
|
||||
}
|
||||
for _, e := range body.Errors {
|
||||
reason := strings.ToLower(strings.TrimSpace(e.Reason))
|
||||
switch reason {
|
||||
case "ratelimitexceeded", "userratelimitexceeded", "downloadquotaexceeded", "sharingratelimitexceeded":
|
||||
if googleLimitReason(e.Reason) {
|
||||
return true
|
||||
}
|
||||
domain := compactGoogleLimitText(e.Domain)
|
||||
if domain == "usagelimits" && (body.Code == http.StatusForbidden || body.Code == http.StatusTooManyRequests) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
msg := strings.ToLower(body.Message)
|
||||
return strings.Contains(msg, "rate limit") || strings.Contains(msg, "too many requests") || strings.Contains(msg, "quota exceeded")
|
||||
return false
|
||||
}
|
||||
|
||||
func isGoogleTokenRateLimit(res *resty.Response, out tokenResp) bool {
|
||||
if res != nil {
|
||||
if res.StatusCode() == http.StatusTooManyRequests {
|
||||
return true
|
||||
}
|
||||
if res.StatusCode() == http.StatusForbidden && strings.TrimSpace(res.Header().Get("Retry-After")) != "" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return googleLimitReason(out.Error)
|
||||
}
|
||||
|
||||
func googleLimitReason(reason string) bool {
|
||||
switch compactGoogleLimitText(reason) {
|
||||
case "ratelimitexceeded",
|
||||
"userratelimitexceeded",
|
||||
"dailylimitexceeded",
|
||||
"dailylimitexceededunreg",
|
||||
"downloadquotaexceeded",
|
||||
"sharingratelimitexceeded",
|
||||
"quotaexceeded",
|
||||
"uploadlimitexceeded",
|
||||
"storagelimitexceeded",
|
||||
"storagequotaexceeded":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func compactGoogleLimitText(text string) string {
|
||||
text = strings.ToLower(strings.TrimSpace(text))
|
||||
replacer := strings.NewReplacer("_", "", "-", "", " ", "", ".", "", ":", "")
|
||||
return replacer.Replace(text)
|
||||
}
|
||||
|
||||
func googleRateLimitError(res *resty.Response, message string) error {
|
||||
@@ -486,7 +985,11 @@ func parseRetryAfter(res *resty.Response) time.Duration {
|
||||
if res == nil {
|
||||
return 0
|
||||
}
|
||||
raw := strings.TrimSpace(res.Header().Get("Retry-After"))
|
||||
return parseRetryAfterHeader(res.Header().Get("Retry-After"))
|
||||
}
|
||||
|
||||
func parseRetryAfterHeader(raw string) time.Duration {
|
||||
raw = strings.TrimSpace(raw)
|
||||
if raw == "" {
|
||||
return 0
|
||||
}
|
||||
@@ -502,4 +1005,19 @@ func parseRetryAfter(res *resty.Response) time.Duration {
|
||||
return 0
|
||||
}
|
||||
|
||||
func splitPath(p string) []string {
|
||||
p = strings.Trim(p, "/")
|
||||
if p == "" {
|
||||
return nil
|
||||
}
|
||||
return strings.Split(p, "/")
|
||||
}
|
||||
|
||||
func minInt64(a, b int64) int64 {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*Driver)(nil)
|
||||
|
||||
@@ -2,11 +2,18 @@ package googledrive
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
func TestInitUsesOnlineRenewAPI(t *testing.T) {
|
||||
@@ -131,6 +138,134 @@ func TestStreamURLReturnsAuthenticatedMediaLinkWithoutRedirectRequirement(t *tes
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadAndReportHashUsesResumableSession(t *testing.T) {
|
||||
body := "hello google drive"
|
||||
wantHash := md5.Sum([]byte(body))
|
||||
var sawSession bool
|
||||
var sawUpload bool
|
||||
var srv *httptest.Server
|
||||
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/upload/drive/v3/files":
|
||||
sawSession = true
|
||||
if got := r.Header.Get("Authorization"); got != "Bearer access" {
|
||||
t.Fatalf("session Authorization = %q", got)
|
||||
}
|
||||
if got := r.URL.Query().Get("uploadType"); got != "resumable" {
|
||||
t.Fatalf("uploadType = %q", got)
|
||||
}
|
||||
if got := r.Header.Get("X-Upload-Content-Length"); got != "18" {
|
||||
t.Fatalf("X-Upload-Content-Length = %q", got)
|
||||
}
|
||||
var meta struct {
|
||||
Name string `json:"name"`
|
||||
Parents []string `json:"parents"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&meta); err != nil {
|
||||
t.Fatalf("decode session metadata: %v", err)
|
||||
}
|
||||
if meta.Name != "clip.mp4" || len(meta.Parents) != 1 || meta.Parents[0] != "parent-1" {
|
||||
t.Fatalf("metadata = %+v", meta)
|
||||
}
|
||||
w.Header().Set("Location", srv.URL+"/upload/session/1")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
case "/upload/session/1":
|
||||
sawUpload = true
|
||||
if got := r.Header.Get("Authorization"); got != "Bearer access" {
|
||||
t.Fatalf("upload Authorization = %q", got)
|
||||
}
|
||||
if got := r.Header.Get("Content-Range"); got != "bytes 0-17/18" {
|
||||
t.Fatalf("Content-Range = %q", got)
|
||||
}
|
||||
gotBody, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("read upload body: %v", err)
|
||||
}
|
||||
if string(gotBody) != body {
|
||||
t.Fatalf("upload body = %q", string(gotBody))
|
||||
}
|
||||
writeTestJSONStatus(w, http.StatusCreated, driveFile{
|
||||
ID: "file-uploaded",
|
||||
Name: "clip.mp4",
|
||||
Size: "18",
|
||||
MD5Checksum: hex.EncodeToString(wantHash[:]),
|
||||
})
|
||||
default:
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{ID: "g", APIBaseURL: srv.URL + "/drive/v3"})
|
||||
d.accessToken = "access"
|
||||
res, err := d.UploadAndReportHash(context.Background(), "parent-1", "clip.mp4", strings.NewReader(body), int64(len(body)))
|
||||
if err != nil {
|
||||
t.Fatalf("UploadAndReportHash() error = %v", err)
|
||||
}
|
||||
if !sawSession || !sawUpload {
|
||||
t.Fatalf("saw session/upload = %v/%v, want both", sawSession, sawUpload)
|
||||
}
|
||||
if res.FileID != "file-uploaded" || res.Size != int64(len(body)) || res.Hash != hex.EncodeToString(wantHash[:]) {
|
||||
t.Fatalf("upload result = %+v", res)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnsureDirAndRenameUseGoogleDriveFileAPI(t *testing.T) {
|
||||
var madeDir bool
|
||||
var renamed bool
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.Method == http.MethodGet && r.URL.Path == "/drive/v3/files":
|
||||
writeTestJSON(w, filesResp{})
|
||||
case r.Method == http.MethodPost && r.URL.Path == "/drive/v3/files":
|
||||
madeDir = true
|
||||
var meta struct {
|
||||
Name string `json:"name"`
|
||||
Parents []string `json:"parents"`
|
||||
MimeType string `json:"mimeType"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&meta); err != nil {
|
||||
t.Fatalf("decode mkdir body: %v", err)
|
||||
}
|
||||
if meta.Name != "Crawler Uploads" || len(meta.Parents) != 1 || meta.Parents[0] != "root" || meta.MimeType != "application/vnd.google-apps.folder" {
|
||||
t.Fatalf("mkdir body = %+v", meta)
|
||||
}
|
||||
writeTestJSON(w, driveFile{ID: "folder-crawler", Name: "Crawler Uploads", MimeType: "application/vnd.google-apps.folder"})
|
||||
case r.Method == http.MethodPatch && r.URL.Path == "/drive/v3/files/file-1":
|
||||
renamed = true
|
||||
var body map[string]string
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode rename body: %v", err)
|
||||
}
|
||||
if body["name"] != "new-name.mp4" {
|
||||
t.Fatalf("rename body = %+v", body)
|
||||
}
|
||||
writeTestJSON(w, driveFile{ID: "file-1", Name: "new-name.mp4"})
|
||||
default:
|
||||
t.Fatalf("unexpected %s %s", r.Method, r.URL.Path)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{ID: "g", RootID: "root", APIBaseURL: srv.URL + "/drive/v3"})
|
||||
d.accessToken = "access"
|
||||
d.listInterval = -1
|
||||
|
||||
dirID, err := d.EnsureDir(context.Background(), "Crawler Uploads")
|
||||
if err != nil {
|
||||
t.Fatalf("EnsureDir() error = %v", err)
|
||||
}
|
||||
if dirID != "folder-crawler" || !madeDir {
|
||||
t.Fatalf("dirID/madeDir = %q/%v, want folder-crawler/true", dirID, madeDir)
|
||||
}
|
||||
if err := d.Rename(context.Background(), "file-1", "new-name.mp4"); err != nil {
|
||||
t.Fatalf("Rename() error = %v", err)
|
||||
}
|
||||
if !renamed {
|
||||
t.Fatal("rename endpoint was not called")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRequestRefreshesOnUnauthorized(t *testing.T) {
|
||||
var fileCalls int
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
@@ -179,6 +314,88 @@ func TestRequestRefreshesOnUnauthorized(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRateLimitReasonsFollowGoogleDriveErrorShape(t *testing.T) {
|
||||
reasons := []string{
|
||||
"rateLimitExceeded",
|
||||
"userRateLimitExceeded",
|
||||
"dailyLimitExceeded",
|
||||
"dailyLimitExceededUnreg",
|
||||
"downloadQuotaExceeded",
|
||||
"sharingRateLimitExceeded",
|
||||
"quotaExceeded",
|
||||
}
|
||||
for _, reason := range reasons {
|
||||
body := apiErrorBody{
|
||||
Code: http.StatusForbidden,
|
||||
Message: "google drive quota or rate limited",
|
||||
Errors: []struct {
|
||||
Domain string `json:"domain"`
|
||||
Reason string `json:"reason"`
|
||||
Message string `json:"message"`
|
||||
LocationType string `json:"location_type"`
|
||||
Location string `json:"location"`
|
||||
}{
|
||||
{Domain: "usageLimits", Reason: reason, Message: reason},
|
||||
},
|
||||
}
|
||||
if !isGoogleRateLimit(nil, body) {
|
||||
t.Fatalf("reason %q not treated as rate limit", reason)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLRateLimitStartsSharedLinkCooldown(t *testing.T) {
|
||||
var calls int
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
calls++
|
||||
w.Header().Set("Retry-After", "120")
|
||||
writeTestJSONStatus(w, http.StatusForbidden, apiErrorResp{Error: apiErrorBody{
|
||||
Code: http.StatusForbidden,
|
||||
Message: "User rate limit exceeded.",
|
||||
Errors: []struct {
|
||||
Domain string `json:"domain"`
|
||||
Reason string `json:"reason"`
|
||||
Message string `json:"message"`
|
||||
LocationType string `json:"location_type"`
|
||||
Location string `json:"location"`
|
||||
}{
|
||||
{Domain: "usageLimits", Reason: "userRateLimitExceeded", Message: "User rate limit exceeded."},
|
||||
},
|
||||
}})
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{ID: "g", APIBaseURL: srv.URL})
|
||||
d.accessToken = "access"
|
||||
d.linkCooldownDuration = time.Hour
|
||||
|
||||
_, err := d.StreamURL(context.Background(), "file-1")
|
||||
if err == nil {
|
||||
t.Fatal("first StreamURL succeeded, want rate limit")
|
||||
}
|
||||
var rateLimit *drives.RateLimitError
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("first error = %T %[1]v, want RateLimitError", err)
|
||||
}
|
||||
if rateLimit.RetryAfter != 2*time.Minute {
|
||||
t.Fatalf("retry after = %s, want 2m", rateLimit.RetryAfter)
|
||||
}
|
||||
|
||||
_, err = d.StreamURL(context.Background(), "file-1")
|
||||
if err == nil {
|
||||
t.Fatal("second StreamURL succeeded during cooldown")
|
||||
}
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("second error = %T %[1]v, want RateLimitError", err)
|
||||
}
|
||||
if calls != 1 {
|
||||
t.Fatalf("remote calls = %d, want 1; second call should use shared cooldown", calls)
|
||||
}
|
||||
if rateLimit.RetryAfter <= 0 || rateLimit.RetryAfter > 2*time.Minute {
|
||||
t.Fatalf("second retry after = %s, want remaining cooldown", rateLimit.RetryAfter)
|
||||
}
|
||||
}
|
||||
|
||||
func writeTestJSON(w http.ResponseWriter, v any) {
|
||||
writeTestJSONStatus(w, http.StatusOK, v)
|
||||
}
|
||||
|
||||
@@ -42,8 +42,16 @@ type apiErrorBody struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
Errors []struct {
|
||||
Domain string `json:"domain"`
|
||||
Reason string `json:"reason"`
|
||||
Message string `json:"message"`
|
||||
Domain string `json:"domain"`
|
||||
Reason string `json:"reason"`
|
||||
Message string `json:"message"`
|
||||
LocationType string `json:"location_type"`
|
||||
Location string `json:"location"`
|
||||
} `json:"errors"`
|
||||
}
|
||||
|
||||
type UploadResult struct {
|
||||
FileID string
|
||||
Hash string
|
||||
Size int64
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,300 @@
|
||||
package guangyapan
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
func TestDriverRefreshListAndStream(t *testing.T) {
|
||||
var refreshed bool
|
||||
var listedRoot bool
|
||||
updates := map[string]string{}
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/v1/auth/token":
|
||||
refreshed = true
|
||||
writeTestJSON(w, map[string]any{
|
||||
"access_token": "new-access",
|
||||
"refresh_token": "new-refresh",
|
||||
})
|
||||
case "/v1/user/me":
|
||||
if got := r.Header.Get("Authorization"); got != "Bearer new-access" {
|
||||
t.Fatalf("auth header = %q, want new access token", got)
|
||||
}
|
||||
writeTestJSON(w, map[string]any{"sub": "user-1"})
|
||||
case "/userres/v1/file/get_file_list":
|
||||
if got := r.Header.Get("Authorization"); got != "Bearer new-access" {
|
||||
t.Fatalf("api auth header = %q, want new access token", got)
|
||||
}
|
||||
var body map[string]any
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode list body: %v", err)
|
||||
}
|
||||
if body["parentId"] != "" {
|
||||
t.Fatalf("parentId = %#v, want root empty string", body["parentId"])
|
||||
}
|
||||
listedRoot = true
|
||||
writeTestJSON(w, map[string]any{
|
||||
"code": 0,
|
||||
"msg": "success",
|
||||
"data": map[string]any{
|
||||
"total": 2,
|
||||
"list": []map[string]any{
|
||||
{"fileId": "dir-1", "parentId": "", "fileName": "Movies", "resType": 2},
|
||||
{"fileId": "file-1", "parentId": "", "fileName": "clip.mp4", "fileSize": 123, "resType": 1, "utime": 1700000000},
|
||||
},
|
||||
},
|
||||
})
|
||||
case "/nd.bizuserres.s/v1/get_res_download_url":
|
||||
writeTestJSON(w, map[string]any{
|
||||
"code": 0,
|
||||
"msg": "success",
|
||||
"data": map[string]any{"signedURL": "https://cdn.example.test/clip.mp4"},
|
||||
})
|
||||
default:
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "gy",
|
||||
RefreshToken: "old-refresh",
|
||||
AccountBaseURL: srv.URL,
|
||||
APIBaseURL: srv.URL,
|
||||
OnCredentialsUpdate: func(values map[string]string) {
|
||||
for k, v := range values {
|
||||
updates[k] = v
|
||||
}
|
||||
},
|
||||
})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
if !refreshed {
|
||||
t.Fatal("refresh token endpoint was not called")
|
||||
}
|
||||
if updates["access_token"] != "new-access" || updates["refresh_token"] != "new-refresh" {
|
||||
t.Fatalf("updates = %#v, want refreshed tokens", updates)
|
||||
}
|
||||
|
||||
entries, err := d.List(context.Background(), "")
|
||||
if err != nil {
|
||||
t.Fatalf("list: %v", err)
|
||||
}
|
||||
if !listedRoot || len(entries) != 2 {
|
||||
t.Fatalf("listedRoot=%v entries=%#v", listedRoot, entries)
|
||||
}
|
||||
if !entries[0].IsDir || entries[1].ID != "file-1" || entries[1].Size != 123 {
|
||||
t.Fatalf("entries = %#v", entries)
|
||||
}
|
||||
|
||||
link, err := d.StreamURL(context.Background(), "file-1")
|
||||
if err != nil {
|
||||
t.Fatalf("stream url: %v", err)
|
||||
}
|
||||
if link.URL != "https://cdn.example.test/clip.mp4" {
|
||||
t.Fatalf("stream url = %q", link.URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDriverResolvesRootPath(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/v1/user/me":
|
||||
writeTestJSON(w, map[string]any{"sub": "user-1"})
|
||||
case "/userres/v1/file/get_file_list":
|
||||
var body map[string]any
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode list body: %v", err)
|
||||
}
|
||||
parent, _ := body["parentId"].(string)
|
||||
switch parent {
|
||||
case "":
|
||||
writeTestJSON(w, listTestResponse([]map[string]any{
|
||||
{"fileId": "folder-a", "parentId": "", "fileName": "影视", "resType": 2},
|
||||
}))
|
||||
case "folder-a":
|
||||
writeTestJSON(w, listTestResponse([]map[string]any{
|
||||
{"fileId": "folder-b", "parentId": "folder-a", "fileName": "电影", "resType": 2},
|
||||
}))
|
||||
case "folder-b":
|
||||
writeTestJSON(w, listTestResponse([]map[string]any{
|
||||
{"fileId": "file-1", "parentId": "folder-b", "fileName": "movie.mp4", "fileSize": 456, "resType": 1},
|
||||
}))
|
||||
default:
|
||||
t.Fatalf("unexpected parent %q", parent)
|
||||
}
|
||||
default:
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "gy",
|
||||
RootID: "configured-root",
|
||||
RootPath: "影视/电影",
|
||||
AccessToken: "access",
|
||||
AccountBaseURL: srv.URL,
|
||||
APIBaseURL: srv.URL,
|
||||
})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
if d.RootID() != "folder-b" {
|
||||
t.Fatalf("root id = %q, want folder-b", d.RootID())
|
||||
}
|
||||
entries, err := d.List(context.Background(), "")
|
||||
if err != nil {
|
||||
t.Fatalf("list resolved root: %v", err)
|
||||
}
|
||||
if len(entries) != 1 || entries[0].ID != "file-1" {
|
||||
t.Fatalf("entries = %#v", entries)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDriverSendSMSCodeUpdatesVerificationState(t *testing.T) {
|
||||
updates := map[string]string{}
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/v1/shield/captcha/init":
|
||||
writeTestJSON(w, map[string]any{"captcha_token": "captcha-1"})
|
||||
case "/v1/auth/verification":
|
||||
writeTestJSON(w, map[string]any{"verification_id": "verify-1"})
|
||||
default:
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "gy",
|
||||
PhoneNumber: "13800000000",
|
||||
SendCode: true,
|
||||
AccountBaseURL: srv.URL,
|
||||
APIBaseURL: srv.URL,
|
||||
OnCredentialsUpdate: func(values map[string]string) {
|
||||
for k, v := range values {
|
||||
updates[k] = v
|
||||
}
|
||||
},
|
||||
})
|
||||
err := d.Init(context.Background())
|
||||
if err == nil || !strings.Contains(err.Error(), "验证码已发送") {
|
||||
t.Fatalf("init err = %v, want verification prompt", err)
|
||||
}
|
||||
if updates["captcha_token"] != "captcha-1" || updates["verification_id"] != "verify-1" || updates["send_code"] != "false" {
|
||||
t.Fatalf("updates = %#v, want sms state saved", updates)
|
||||
}
|
||||
if updates["device_id"] == "" {
|
||||
t.Fatalf("updates = %#v, want generated device id saved", updates)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListHTTP429ReturnsRateLimitError(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/userres/v1/file/get_file_list" {
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
w.Header().Set("Retry-After", "120")
|
||||
w.WriteHeader(http.StatusTooManyRequests)
|
||||
writeTestJSON(w, map[string]any{"code": 429, "msg": "操作频繁,请稍后重试"})
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "gy",
|
||||
AccessToken: "access",
|
||||
AccountBaseURL: srv.URL,
|
||||
APIBaseURL: srv.URL,
|
||||
})
|
||||
_, err := d.List(context.Background(), "")
|
||||
if err == nil {
|
||||
t.Fatal("list succeeded, want rate limit error")
|
||||
}
|
||||
var rateLimit *drives.RateLimitError
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want RateLimitError", err)
|
||||
}
|
||||
if rateLimit.RetryAfter != 2*time.Minute {
|
||||
t.Fatalf("retry after = %s, want 2m", rateLimit.RetryAfter)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListCode429ReturnsRateLimitError(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/userres/v1/file/get_file_list" {
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
writeTestJSON(w, map[string]any{"code": 429, "msg": "操作频繁,请稍后再试"})
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "gy",
|
||||
AccessToken: "access",
|
||||
AccountBaseURL: srv.URL,
|
||||
APIBaseURL: srv.URL,
|
||||
})
|
||||
_, err := d.List(context.Background(), "")
|
||||
if err == nil {
|
||||
t.Fatal("list succeeded, want rate limit error")
|
||||
}
|
||||
var rateLimit *drives.RateLimitError
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want RateLimitError", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListInvalidToken403DoesNotReturnRateLimitError(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/userres/v1/file/get_file_list" {
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
w.WriteHeader(http.StatusForbidden)
|
||||
writeTestJSON(w, map[string]any{"code": 401, "msg": "invalid access token"})
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "gy",
|
||||
AccessToken: "access",
|
||||
AccountBaseURL: srv.URL,
|
||||
APIBaseURL: srv.URL,
|
||||
})
|
||||
_, err := d.List(context.Background(), "")
|
||||
if err == nil {
|
||||
t.Fatal("list succeeded, want auth error")
|
||||
}
|
||||
var rateLimit *drives.RateLimitError
|
||||
if errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want non-rate-limit error", err)
|
||||
}
|
||||
}
|
||||
|
||||
func listTestResponse(items []map[string]any) map[string]any {
|
||||
return map[string]any{
|
||||
"code": 0,
|
||||
"msg": "success",
|
||||
"data": map[string]any{
|
||||
"total": len(items),
|
||||
"list": items,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func writeTestJSON(w http.ResponseWriter, v any) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if err := json.NewEncoder(w).Encode(v); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,244 @@
|
||||
package guangyapan
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-resty/resty/v2"
|
||||
"github.com/skip2/go-qrcode"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultQRScope = "user"
|
||||
deviceCodeGrantType = "urn:ietf:params:oauth:grant-type:device_code"
|
||||
defaultQRUserAgent = "GuangYaPan-Login/1.0"
|
||||
)
|
||||
|
||||
type QRConfig struct {
|
||||
AccountBaseURL string
|
||||
HTTPClient *http.Client
|
||||
Now func() time.Time
|
||||
}
|
||||
|
||||
type QRClient struct {
|
||||
accountBaseURL string
|
||||
client *resty.Client
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
type QRCodeSession struct {
|
||||
DeviceCode string `json:"deviceCode"`
|
||||
QRCodeURL string `json:"qrCodeUrl"`
|
||||
QRImageDataURL string `json:"qrImageDataUrl"`
|
||||
IntervalSeconds int `json:"intervalSeconds"`
|
||||
ExpiresAt string `json:"expiresAt,omitempty"`
|
||||
}
|
||||
|
||||
type QRCodeStatus struct {
|
||||
State string `json:"state"`
|
||||
StatusText string `json:"statusText"`
|
||||
IntervalSeconds int `json:"intervalSeconds,omitempty"`
|
||||
AccessToken string `json:"accessToken,omitempty"`
|
||||
RefreshToken string `json:"refreshToken,omitempty"`
|
||||
TokenType string `json:"tokenType,omitempty"`
|
||||
ExpiresIn int64 `json:"expiresIn,omitempty"`
|
||||
}
|
||||
|
||||
type deviceCodeResp struct {
|
||||
DeviceCode string `json:"device_code"`
|
||||
VerificationURIComplete string `json:"verification_uri_complete"`
|
||||
ShortURIComplete string `json:"short_uri_complete"`
|
||||
Interval int `json:"interval"`
|
||||
ExpiresIn int `json:"expires_in"`
|
||||
Error string `json:"error"`
|
||||
ErrorCode int `json:"error_code"`
|
||||
ErrorDesc string `json:"error_description"`
|
||||
}
|
||||
|
||||
type deviceTokenResp struct {
|
||||
AccessToken string `json:"access_token"`
|
||||
RefreshToken string `json:"refresh_token"`
|
||||
TokenType string `json:"token_type"`
|
||||
ExpiresIn int64 `json:"expires_in"`
|
||||
Scope string `json:"scope"`
|
||||
Error string `json:"error"`
|
||||
ErrorCode int `json:"error_code"`
|
||||
ErrorDesc string `json:"error_description"`
|
||||
}
|
||||
|
||||
func NewQRClient(c QRConfig) *QRClient {
|
||||
accountBaseURL := strings.TrimRight(strings.TrimSpace(c.AccountBaseURL), "/")
|
||||
if accountBaseURL == "" {
|
||||
accountBaseURL = defaultAccountBaseURL
|
||||
}
|
||||
httpClient := c.HTTPClient
|
||||
if httpClient == nil {
|
||||
httpClient = &http.Client{Timeout: 20 * time.Second}
|
||||
}
|
||||
now := c.Now
|
||||
if now == nil {
|
||||
now = time.Now
|
||||
}
|
||||
return &QRClient{
|
||||
accountBaseURL: accountBaseURL,
|
||||
client: resty.NewWithClient(httpClient).
|
||||
SetTimeout(20*time.Second).
|
||||
SetBaseURL(accountBaseURL).
|
||||
SetHeader("User-Agent", defaultQRUserAgent).
|
||||
SetHeader("Accept", "application/json").
|
||||
SetHeader("Content-Type", "application/json"),
|
||||
now: now,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *QRClient) Generate(ctx context.Context) (QRCodeSession, error) {
|
||||
var out deviceCodeResp
|
||||
var errOut deviceCodeResp
|
||||
resp, err := c.client.R().
|
||||
SetContext(ctx).
|
||||
SetBody(map[string]any{
|
||||
"client_id": defaultClientID,
|
||||
"scope": defaultQRScope,
|
||||
}).
|
||||
SetResult(&out).
|
||||
SetError(&errOut).
|
||||
Post("/v1/auth/device/code")
|
||||
if err != nil {
|
||||
return QRCodeSession{}, err
|
||||
}
|
||||
if resp.IsError() || out.Error != "" {
|
||||
if out.Error == "" {
|
||||
out = errOut
|
||||
}
|
||||
return QRCodeSession{}, fmt.Errorf("guangyapan qr: %s", deviceAPIError(out.ErrorDesc, out.Error, resp))
|
||||
}
|
||||
|
||||
deviceCode := strings.TrimSpace(out.DeviceCode)
|
||||
if deviceCode == "" {
|
||||
return QRCodeSession{}, errors.New("guangyapan qr: empty device_code")
|
||||
}
|
||||
qrURL := strings.TrimSpace(out.VerificationURIComplete)
|
||||
if qrURL == "" {
|
||||
qrURL = strings.TrimSpace(out.ShortURIComplete)
|
||||
}
|
||||
if qrURL == "" {
|
||||
return QRCodeSession{}, errors.New("guangyapan qr: empty verification uri")
|
||||
}
|
||||
interval := out.Interval
|
||||
if interval <= 0 {
|
||||
interval = 5
|
||||
}
|
||||
expiresIn := out.ExpiresIn
|
||||
if expiresIn <= 0 {
|
||||
expiresIn = 300
|
||||
}
|
||||
png, err := qrcode.Encode(qrURL, qrcode.Medium, 220)
|
||||
if err != nil {
|
||||
return QRCodeSession{}, err
|
||||
}
|
||||
return QRCodeSession{
|
||||
DeviceCode: deviceCode,
|
||||
QRCodeURL: qrURL,
|
||||
QRImageDataURL: "data:image/png;base64," + base64.StdEncoding.EncodeToString(png),
|
||||
IntervalSeconds: interval,
|
||||
ExpiresAt: c.now().Add(time.Duration(expiresIn) * time.Second).Format(time.RFC3339),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *QRClient) Poll(ctx context.Context, deviceCode string) (QRCodeStatus, error) {
|
||||
deviceCode = strings.TrimSpace(deviceCode)
|
||||
if deviceCode == "" {
|
||||
return QRCodeStatus{}, errors.New("deviceCode is required")
|
||||
}
|
||||
|
||||
var out deviceTokenResp
|
||||
var errOut deviceTokenResp
|
||||
resp, err := c.client.R().
|
||||
SetContext(ctx).
|
||||
SetBody(map[string]any{
|
||||
"client_id": defaultClientID,
|
||||
"grant_type": deviceCodeGrantType,
|
||||
"device_code": deviceCode,
|
||||
}).
|
||||
SetResult(&out).
|
||||
SetError(&errOut).
|
||||
Post("/v1/auth/token")
|
||||
if err != nil {
|
||||
return QRCodeStatus{}, err
|
||||
}
|
||||
if resp.IsError() && out.Error == "" {
|
||||
out = errOut
|
||||
}
|
||||
if resp.IsError() && out.Error == "" {
|
||||
_ = json.Unmarshal(resp.Body(), &out)
|
||||
}
|
||||
if out.Error != "" {
|
||||
return qrStatusForDeviceError(out), nil
|
||||
}
|
||||
if resp.IsError() {
|
||||
return QRCodeStatus{}, fmt.Errorf("guangyapan qr: status=%d body=%s", resp.StatusCode(), resp.String())
|
||||
}
|
||||
access := strings.TrimSpace(out.AccessToken)
|
||||
refresh := strings.TrimSpace(out.RefreshToken)
|
||||
if access == "" || refresh == "" {
|
||||
return QRCodeStatus{}, errors.New("guangyapan qr: login succeeded but token response is incomplete")
|
||||
}
|
||||
tokenType := strings.TrimSpace(out.TokenType)
|
||||
if tokenType == "" {
|
||||
tokenType = "Bearer"
|
||||
}
|
||||
return QRCodeStatus{
|
||||
State: "success",
|
||||
StatusText: "登录成功",
|
||||
AccessToken: access,
|
||||
RefreshToken: refresh,
|
||||
TokenType: tokenType,
|
||||
ExpiresIn: out.ExpiresIn,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func qrStatusForDeviceError(out deviceTokenResp) QRCodeStatus {
|
||||
errCode := strings.TrimSpace(out.Error)
|
||||
switch errCode {
|
||||
case "authorization_pending":
|
||||
return QRCodeStatus{State: "pending", StatusText: "等待扫码确认"}
|
||||
case "slow_down":
|
||||
return QRCodeStatus{State: "pending", StatusText: "等待扫码确认,已降低查询频率", IntervalSeconds: 10}
|
||||
case "expired_token":
|
||||
return QRCodeStatus{State: "expired", StatusText: "二维码已过期"}
|
||||
case "access_denied":
|
||||
return QRCodeStatus{State: "denied", StatusText: "用户拒绝了授权"}
|
||||
default:
|
||||
msg := strings.TrimSpace(out.ErrorDesc)
|
||||
if msg == "" {
|
||||
msg = errCode
|
||||
}
|
||||
if msg == "" {
|
||||
msg = "未知错误"
|
||||
}
|
||||
return QRCodeStatus{State: "error", StatusText: msg}
|
||||
}
|
||||
}
|
||||
|
||||
func deviceAPIError(desc, short string, resp *resty.Response) string {
|
||||
msg := strings.TrimSpace(desc)
|
||||
if msg == "" {
|
||||
msg = strings.TrimSpace(short)
|
||||
}
|
||||
if msg == "" && resp != nil {
|
||||
msg = strings.TrimSpace(resp.String())
|
||||
}
|
||||
if msg == "" && resp != nil {
|
||||
msg = fmt.Sprintf("status=%d", resp.StatusCode())
|
||||
}
|
||||
if msg == "" {
|
||||
msg = "unknown error"
|
||||
}
|
||||
return msg
|
||||
}
|
||||
@@ -0,0 +1,102 @@
|
||||
package guangyapan
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestQRClientGenerate(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/v1/auth/device/code" {
|
||||
t.Fatalf("path = %s, want device code endpoint", r.URL.Path)
|
||||
}
|
||||
var body map[string]any
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode body: %v", err)
|
||||
}
|
||||
if body["client_id"] != defaultClientID || body["scope"] != defaultQRScope {
|
||||
t.Fatalf("body = %#v", body)
|
||||
}
|
||||
writeTestJSON(w, map[string]any{
|
||||
"device_code": "device-1",
|
||||
"verification_uri_complete": "https://account.guangyapan.com/device?code=abc",
|
||||
"interval": 7,
|
||||
"expires_in": 180,
|
||||
})
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
client := NewQRClient(QRConfig{
|
||||
AccountBaseURL: srv.URL,
|
||||
Now: func() time.Time { return time.Unix(1700000000, 0) },
|
||||
})
|
||||
session, err := client.Generate(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("generate: %v", err)
|
||||
}
|
||||
if session.DeviceCode != "device-1" || session.QRCodeURL != "https://account.guangyapan.com/device?code=abc" {
|
||||
t.Fatalf("session = %#v", session)
|
||||
}
|
||||
if session.IntervalSeconds != 7 {
|
||||
t.Fatalf("interval = %d, want 7", session.IntervalSeconds)
|
||||
}
|
||||
if session.ExpiresAt != time.Unix(1700000180, 0).Format(time.RFC3339) {
|
||||
t.Fatalf("expiresAt = %q", session.ExpiresAt)
|
||||
}
|
||||
if !strings.HasPrefix(session.QRImageDataURL, "data:image/png;base64,") {
|
||||
t.Fatalf("qr image = %q", session.QRImageDataURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQRClientPollPendingAndSuccess(t *testing.T) {
|
||||
var calls int
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/v1/auth/token" {
|
||||
t.Fatalf("path = %s, want token endpoint", r.URL.Path)
|
||||
}
|
||||
var body map[string]any
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode body: %v", err)
|
||||
}
|
||||
if body["client_id"] != defaultClientID ||
|
||||
body["grant_type"] != deviceCodeGrantType ||
|
||||
body["device_code"] != "device-1" {
|
||||
t.Fatalf("body = %#v", body)
|
||||
}
|
||||
calls++
|
||||
if calls == 1 {
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
writeTestJSON(w, map[string]any{"error": "authorization_pending"})
|
||||
return
|
||||
}
|
||||
writeTestJSON(w, map[string]any{
|
||||
"access_token": "access-1",
|
||||
"refresh_token": "refresh-1",
|
||||
"token_type": "Bearer",
|
||||
"expires_in": 7200,
|
||||
})
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
client := NewQRClient(QRConfig{AccountBaseURL: srv.URL})
|
||||
pending, err := client.Poll(context.Background(), "device-1")
|
||||
if err != nil {
|
||||
t.Fatalf("poll pending: %v", err)
|
||||
}
|
||||
if pending.State != "pending" || pending.AccessToken != "" {
|
||||
t.Fatalf("pending = %#v", pending)
|
||||
}
|
||||
|
||||
success, err := client.Poll(context.Background(), "device-1")
|
||||
if err != nil {
|
||||
t.Fatalf("poll success: %v", err)
|
||||
}
|
||||
if success.State != "success" || success.AccessToken != "access-1" || success.RefreshToken != "refresh-1" {
|
||||
t.Fatalf("success = %#v", success)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,129 @@
|
||||
package guangyapan
|
||||
|
||||
import "time"
|
||||
|
||||
type tokenResp struct {
|
||||
AccessToken string `json:"access_token"`
|
||||
RefreshToken string `json:"refresh_token"`
|
||||
Error string `json:"error"`
|
||||
ErrorCode int `json:"error_code"`
|
||||
ErrorDesc string `json:"error_description"`
|
||||
}
|
||||
|
||||
type verificationResp struct {
|
||||
VerificationID string `json:"verification_id"`
|
||||
Error string `json:"error"`
|
||||
ErrorCode int `json:"error_code"`
|
||||
ErrorDesc string `json:"error_description"`
|
||||
}
|
||||
|
||||
type captchaInitResp struct {
|
||||
CaptchaToken string `json:"captcha_token"`
|
||||
Error string `json:"error"`
|
||||
ErrorCode int `json:"error_code"`
|
||||
ErrorDesc string `json:"error_description"`
|
||||
}
|
||||
|
||||
type verifyResp struct {
|
||||
VerificationToken string `json:"verification_token"`
|
||||
Error string `json:"error"`
|
||||
ErrorCode int `json:"error_code"`
|
||||
ErrorDesc string `json:"error_description"`
|
||||
}
|
||||
|
||||
type userMeResp struct {
|
||||
Sub string `json:"sub"`
|
||||
}
|
||||
|
||||
type listResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data struct {
|
||||
Total int `json:"total"`
|
||||
List []fileItem `json:"list"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type fileItem struct {
|
||||
FileID string `json:"fileId"`
|
||||
ParentID string `json:"parentId"`
|
||||
FileName string `json:"fileName"`
|
||||
FileSize int64 `json:"fileSize"`
|
||||
ResType int `json:"resType"`
|
||||
CTime int64 `json:"ctime"`
|
||||
UTime int64 `json:"utime"`
|
||||
}
|
||||
|
||||
type downloadResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data struct {
|
||||
SignedURL string `json:"signedURL"`
|
||||
DownloadURL string `json:"downloadUrl"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type createDirResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data struct {
|
||||
FileID string `json:"fileId"`
|
||||
FileName string `json:"fileName"`
|
||||
ResType int `json:"resType"`
|
||||
CTime int64 `json:"ctime"`
|
||||
UTime int64 `json:"utime"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type deleteResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data struct {
|
||||
TaskID string `json:"taskId"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type taskStatusResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data struct {
|
||||
Status int `json:"status"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type uploadTokenResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data uploadTokenData `json:"data"`
|
||||
}
|
||||
|
||||
type uploadTokenData struct {
|
||||
TaskID string `json:"taskId"`
|
||||
ObjectPath string `json:"objectPath"`
|
||||
BucketName string `json:"bucketName"`
|
||||
EndPoint string `json:"endPoint"`
|
||||
FullEndPoint string `json:"fullEndPoint"`
|
||||
AccessKeyID string `json:"accessKeyID"`
|
||||
SecretAccessKey string `json:"secretAccessKey"`
|
||||
SessionToken string `json:"sessionToken"`
|
||||
Creds struct {
|
||||
AccessKeyID string `json:"accessKeyID"`
|
||||
SecretAccessKey string `json:"secretAccessKey"`
|
||||
SessionToken string `json:"sessionToken"`
|
||||
} `json:"creds"`
|
||||
}
|
||||
|
||||
type taskInfoResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data struct {
|
||||
FileID string `json:"fileId"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func unixOrZero(v int64) time.Time {
|
||||
if v <= 0 {
|
||||
return time.Time{}
|
||||
}
|
||||
return time.Unix(v, 0)
|
||||
}
|
||||
@@ -5,12 +5,14 @@ import (
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Drive 是多家网盘统一抽象。上层不区分盘,只区分 Kind。
|
||||
type Drive interface {
|
||||
// Kind 返回驱动代号:"quark" / "p115" / "p123" / "pikpak" / "wopan" / "onedrive" / "googledrive" / "localstorage"
|
||||
// Kind 返回驱动代号:"quark" / "p115" / "p123" / "pikpak" / "wopan" / "guangyapan" / "onedrive" / "googledrive" / "localstorage"
|
||||
Kind() string
|
||||
|
||||
// ID 返回该盘在 catalog 中的唯一标识
|
||||
@@ -40,6 +42,27 @@ type Drive interface {
|
||||
RootID() string
|
||||
}
|
||||
|
||||
// Remover is an optional drive capability. It mirrors OpenList's optional
|
||||
// Remove interface: callers must type-assert before deleting a source file.
|
||||
type Remover interface {
|
||||
Remove(ctx context.Context, fileID string) error
|
||||
}
|
||||
|
||||
// SourceFile carries the catalog metadata available when an administrator
|
||||
// requests deletion of the original source file.
|
||||
type SourceFile struct {
|
||||
FileID string
|
||||
ParentID string
|
||||
Name string
|
||||
Size int64
|
||||
}
|
||||
|
||||
// SourceRemover is an optional, richer removal capability for providers whose
|
||||
// playback ID is not the same ID required by their delete API.
|
||||
type SourceRemover interface {
|
||||
RemoveSource(ctx context.Context, source SourceFile) error
|
||||
}
|
||||
|
||||
type Entry struct {
|
||||
ID string
|
||||
Name string
|
||||
@@ -98,3 +121,42 @@ func RateLimitRetryAfter(err error) (time.Duration, bool) {
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// TextMentionsHTTPStatus only looks for explicit numeric HTTP status contexts
|
||||
// in errors from tools that do not expose structured response metadata.
|
||||
func TextMentionsHTTPStatus(text string, statuses ...int) bool {
|
||||
text = strings.ToLower(strings.TrimSpace(text))
|
||||
if text == "" {
|
||||
return false
|
||||
}
|
||||
for _, status := range statuses {
|
||||
if status <= 0 {
|
||||
continue
|
||||
}
|
||||
code := strconv.Itoa(status)
|
||||
if strings.HasPrefix(text, code+" ") ||
|
||||
strings.Contains(text, "status="+code) ||
|
||||
strings.Contains(text, "status: "+code) ||
|
||||
strings.Contains(text, "status "+code) ||
|
||||
strings.Contains(text, "status code "+code) ||
|
||||
strings.Contains(text, "http "+code) ||
|
||||
strings.Contains(text, "http status="+code) ||
|
||||
strings.Contains(text, "http status: "+code) ||
|
||||
strings.Contains(text, "http status "+code) ||
|
||||
strings.Contains(text, "server returned "+code) ||
|
||||
strings.Contains(text, "code="+code) ||
|
||||
strings.Contains(text, "code: "+code) ||
|
||||
strings.Contains(text, "error_code="+code) ||
|
||||
strings.Contains(text, "error_code: "+code) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func ErrorMentionsHTTPStatus(err error, statuses ...int) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
return TextMentionsHTTPStatus(err.Error(), statuses...)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
package drives
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestTextMentionsHTTPStatus(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
text string
|
||||
want bool
|
||||
}{
|
||||
{name: "status context", text: "request failed with status: 429 Too Many Requests", want: true},
|
||||
{name: "http context", text: "http 503 service unavailable", want: true},
|
||||
{name: "server returned context", text: "Server returned 403 Forbidden", want: true},
|
||||
{name: "message only", text: "操作频繁,请稍后重试", want: false},
|
||||
{name: "unrelated number", text: "generated 429 bytes", want: false},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if got := TextMentionsHTTPStatus(tc.text, 403, 429, 503); got != tc.want {
|
||||
t.Fatalf("TextMentionsHTTPStatus(%q) = %v, want %v", tc.text, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
@@ -17,20 +18,29 @@ import (
|
||||
|
||||
const Kind = "localstorage"
|
||||
|
||||
const maxSTRMBytes = 64 * 1024
|
||||
|
||||
type Config struct {
|
||||
ID string
|
||||
RootPath string
|
||||
// STRMAllowOutsideRoot 允许 .strm 指向存储根目录之外的本地路径。
|
||||
// 默认关闭:strm 等于可以让 /p/stream 读到服务器上的任意文件,只有
|
||||
// 管理员明确知道自己在做什么(例如 strm 库与 rclone 挂载目录分离)
|
||||
// 时才应打开。
|
||||
STRMAllowOutsideRoot bool
|
||||
}
|
||||
|
||||
type Driver struct {
|
||||
id string
|
||||
rootPath string
|
||||
id string
|
||||
rootPath string
|
||||
strmAllowOutsideRoot bool
|
||||
}
|
||||
|
||||
func New(c Config) *Driver {
|
||||
return &Driver{
|
||||
id: c.ID,
|
||||
rootPath: c.RootPath,
|
||||
id: c.ID,
|
||||
rootPath: c.RootPath,
|
||||
strmAllowOutsideRoot: c.STRMAllowOutsideRoot,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -122,7 +132,13 @@ func (d *Driver) StreamURL(ctx context.Context, fileID string) (*drives.StreamLi
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if info.IsDir() || !info.Mode().IsRegular() || info.Size() <= 0 {
|
||||
if info.IsDir() || !info.Mode().IsRegular() {
|
||||
return nil, os.ErrNotExist
|
||||
}
|
||||
if strings.EqualFold(filepath.Ext(p), ".strm") {
|
||||
return d.streamURLFromSTRM(ctx, p)
|
||||
}
|
||||
if info.Size() <= 0 {
|
||||
return nil, os.ErrNotExist
|
||||
}
|
||||
return &drives.StreamLink{
|
||||
@@ -131,6 +147,115 @@ func (d *Driver) StreamURL(ctx context.Context, fileID string) (*drives.StreamLi
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (d *Driver) streamURLFromSTRM(ctx context.Context, strmPath string) (*drives.StreamLink, error) {
|
||||
target, err := readSTRMTarget(strmPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if filepath.IsAbs(target) {
|
||||
return d.localSTRMLink(strmPath, target)
|
||||
}
|
||||
u, err := url.Parse(target)
|
||||
if err == nil {
|
||||
switch strings.ToLower(u.Scheme) {
|
||||
case "http", "https":
|
||||
if u.Host == "" {
|
||||
return nil, fmt.Errorf("localstorage: invalid strm url %q", target)
|
||||
}
|
||||
return &drives.StreamLink{
|
||||
URL: target,
|
||||
Expires: time.Now().Add(24 * time.Hour),
|
||||
}, nil
|
||||
case "file":
|
||||
if u.Host != "" && !strings.EqualFold(u.Host, "localhost") {
|
||||
return nil, fmt.Errorf("localstorage: unsupported strm file url host %q", u.Host)
|
||||
}
|
||||
return d.localSTRMLink(strmPath, u.Path)
|
||||
case "":
|
||||
// Local path below.
|
||||
default:
|
||||
return nil, fmt.Errorf("localstorage: unsupported strm target scheme %q", u.Scheme)
|
||||
}
|
||||
} else if strings.Contains(target, "://") {
|
||||
return nil, fmt.Errorf("localstorage: invalid strm url %q: %w", target, err)
|
||||
}
|
||||
return d.localSTRMLink(strmPath, target)
|
||||
}
|
||||
|
||||
func readSTRMTarget(path string) (string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
data, err := io.ReadAll(io.LimitReader(f, maxSTRMBytes+1))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if len(data) > maxSTRMBytes {
|
||||
return "", errors.New("localstorage: strm file is too large")
|
||||
}
|
||||
lines := strings.Split(string(data), "\n")
|
||||
for i, line := range lines {
|
||||
if i == 0 {
|
||||
line = strings.TrimPrefix(line, "\ufeff")
|
||||
}
|
||||
line = strings.TrimSpace(line)
|
||||
if line != "" {
|
||||
return line, nil
|
||||
}
|
||||
}
|
||||
return "", errors.New("localstorage: empty strm target")
|
||||
}
|
||||
|
||||
func (d *Driver) localSTRMLink(strmPath, target string) (*drives.StreamLink, error) {
|
||||
target = strings.TrimSpace(target)
|
||||
if target == "" {
|
||||
return nil, errors.New("localstorage: empty strm target")
|
||||
}
|
||||
|
||||
var p string
|
||||
if filepath.IsAbs(target) {
|
||||
p = filepath.Clean(target)
|
||||
} else {
|
||||
p = filepath.Join(filepath.Dir(strmPath), filepath.FromSlash(target))
|
||||
}
|
||||
p, err := filepath.Abs(p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
root, err := d.root()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
realPath, within, err := realPathWithinRoot(root, p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !within && !d.strmAllowOutsideRoot {
|
||||
return nil, errors.New("localstorage: strm target escapes root (enable strm_allow_outside_root to allow)")
|
||||
}
|
||||
if strings.EqualFold(filepath.Ext(p), ".strm") || strings.EqualFold(filepath.Ext(realPath), ".strm") {
|
||||
return nil, errors.New("localstorage: nested strm target is not supported")
|
||||
}
|
||||
info, err := os.Stat(realPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if info.IsDir() || !info.Mode().IsRegular() || info.Size() <= 0 {
|
||||
return nil, os.ErrNotExist
|
||||
}
|
||||
return &drives.StreamLink{
|
||||
URL: realPath,
|
||||
Expires: time.Now().Add(24 * time.Hour),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (d *Driver) Upload(context.Context, string, string, io.Reader, int64) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
@@ -139,6 +264,39 @@ func (d *Driver) EnsureDir(context.Context, string) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
p, rel, err := d.pathForID(fileID)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if rel == "" {
|
||||
return errors.New("localstorage: refusing to remove root")
|
||||
}
|
||||
info, err := os.Stat(p)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if info.IsDir() {
|
||||
return errors.New("localstorage: refusing to remove directory")
|
||||
}
|
||||
if !info.Mode().IsRegular() {
|
||||
return errors.New("localstorage: refusing to remove non-regular file")
|
||||
}
|
||||
if err := os.Remove(p); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) root() (string, error) {
|
||||
raw := strings.TrimSpace(d.rootPath)
|
||||
if raw == "" {
|
||||
@@ -158,6 +316,8 @@ func (d *Driver) root() (string, error) {
|
||||
return filepath.Abs(raw)
|
||||
}
|
||||
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
|
||||
func (d *Driver) pathForID(id string) (string, string, error) {
|
||||
root, err := d.root()
|
||||
if err != nil {
|
||||
@@ -177,6 +337,11 @@ func (d *Driver) pathForID(id string) (string, string, error) {
|
||||
if !pathWithinRoot(root, p) {
|
||||
return "", "", errors.New("localstorage: path escapes root")
|
||||
}
|
||||
if _, within, err := realPathWithinRoot(root, p); err != nil {
|
||||
return "", "", err
|
||||
} else if !within {
|
||||
return "", "", errors.New("localstorage: path escapes root")
|
||||
}
|
||||
return p, rel, nil
|
||||
}
|
||||
|
||||
@@ -188,6 +353,26 @@ func pathWithinRoot(root, path string) bool {
|
||||
return rel == "." || (rel != ".." && !strings.HasPrefix(rel, ".."+string(os.PathSeparator)))
|
||||
}
|
||||
|
||||
func realPathWithinRoot(root, path string) (string, bool, error) {
|
||||
realRoot, err := filepath.EvalSymlinks(root)
|
||||
if err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
realRoot, err = filepath.Abs(realRoot)
|
||||
if err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
realPath, err := filepath.EvalSymlinks(path)
|
||||
if err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
realPath, err = filepath.Abs(realPath)
|
||||
if err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
return realPath, pathWithinRoot(realRoot, realPath), nil
|
||||
}
|
||||
|
||||
func localStoragePathHint(configured string) string {
|
||||
cwd, _ := os.Getwd()
|
||||
parts := []string{}
|
||||
|
||||
@@ -58,6 +58,199 @@ func TestListEncodesRelativePathsAndStreamURLResolvesFile(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLResolvesHTTPSTRM(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
strmPath := filepath.Join(root, "movie.strm")
|
||||
target := "https://media.example/clip.mp4?token=abc"
|
||||
if err := os.WriteFile(strmPath, []byte("\ufeff\n "+target+"\n"), 0o644); err != nil {
|
||||
t.Fatalf("write strm: %v", err)
|
||||
}
|
||||
drv := New(Config{ID: "local", RootPath: root})
|
||||
|
||||
link, err := drv.StreamURL(context.Background(), encodeRel("movie.strm"))
|
||||
if err != nil {
|
||||
t.Fatalf("stream url: %v", err)
|
||||
}
|
||||
if link.URL != target {
|
||||
t.Fatalf("url = %q, want %q", link.URL, target)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLResolvesRelativeLocalSTRM(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
if err := os.MkdirAll(filepath.Join(root, "links"), 0o755); err != nil {
|
||||
t.Fatalf("mkdir links: %v", err)
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Join(root, "media"), 0o755); err != nil {
|
||||
t.Fatalf("mkdir media: %v", err)
|
||||
}
|
||||
videoPath := filepath.Join(root, "media", "clip.mp4")
|
||||
if err := os.WriteFile(videoPath, []byte("video"), 0o644); err != nil {
|
||||
t.Fatalf("write video: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(root, "links", "movie.strm"), []byte("../media/clip.mp4\n"), 0o644); err != nil {
|
||||
t.Fatalf("write strm: %v", err)
|
||||
}
|
||||
drv := New(Config{ID: "local", RootPath: root})
|
||||
|
||||
link, err := drv.StreamURL(context.Background(), encodeRel("links/movie.strm"))
|
||||
if err != nil {
|
||||
t.Fatalf("stream url: %v", err)
|
||||
}
|
||||
if link.URL != videoPath {
|
||||
t.Fatalf("url = %q, want %q", link.URL, videoPath)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLRejectsInvalidSTRMTargets(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
setup func(t *testing.T, root string) string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "empty",
|
||||
setup: func(t *testing.T, root string) string {
|
||||
t.Helper()
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "empty.strm"), []byte("\n \r\n"))
|
||||
return "empty.strm"
|
||||
},
|
||||
want: "empty strm target",
|
||||
},
|
||||
{
|
||||
name: "escapes root",
|
||||
setup: func(t *testing.T, root string) string {
|
||||
t.Helper()
|
||||
writeLocalStorageTestFile(t, filepath.Join(filepath.Dir(root), "outside.mp4"), []byte("video"))
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "escape.strm"), []byte("../outside.mp4\n"))
|
||||
return "escape.strm"
|
||||
},
|
||||
want: "escapes root",
|
||||
},
|
||||
{
|
||||
name: "nested",
|
||||
setup: func(t *testing.T, root string) string {
|
||||
t.Helper()
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "nested.strm"), []byte("https://media.example/clip.mp4\n"))
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "outer.strm"), []byte("nested.strm\n"))
|
||||
return "outer.strm"
|
||||
},
|
||||
want: "nested strm target",
|
||||
},
|
||||
{
|
||||
name: "unsupported scheme",
|
||||
setup: func(t *testing.T, root string) string {
|
||||
t.Helper()
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "ftp.strm"), []byte("ftp://media.example/clip.mp4\n"))
|
||||
return "ftp.strm"
|
||||
},
|
||||
want: "unsupported strm target scheme",
|
||||
},
|
||||
{
|
||||
name: "too large",
|
||||
setup: func(t *testing.T, root string) string {
|
||||
t.Helper()
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "large.strm"), []byte(strings.Repeat("x", maxSTRMBytes+1)))
|
||||
return "large.strm"
|
||||
},
|
||||
want: "strm file is too large",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
rel := tt.setup(t, root)
|
||||
drv := New(Config{ID: "local", RootPath: root})
|
||||
|
||||
_, err := drv.StreamURL(context.Background(), encodeRel(rel))
|
||||
|
||||
if err == nil || !strings.Contains(err.Error(), tt.want) {
|
||||
t.Fatalf("error = %v, want contain %q", err, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLRejectsSTRMTargetEscapingRootThroughSymlink(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
outside := t.TempDir()
|
||||
writeLocalStorageTestFile(t, filepath.Join(outside, "secret.mp4"), []byte("secret"))
|
||||
if err := os.MkdirAll(filepath.Join(root, "links"), 0o755); err != nil {
|
||||
t.Fatalf("mkdir links: %v", err)
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Join(root, "real"), 0o755); err != nil {
|
||||
t.Fatalf("mkdir real: %v", err)
|
||||
}
|
||||
if err := os.Symlink(outside, filepath.Join(root, "real", "outside")); err != nil {
|
||||
t.Fatalf("symlink: %v", err)
|
||||
}
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "links", "movie.strm"), []byte("../real/outside/secret.mp4\n"))
|
||||
drv := New(Config{ID: "local", RootPath: root})
|
||||
|
||||
_, err := drv.StreamURL(context.Background(), encodeRel("links/movie.strm"))
|
||||
|
||||
if err == nil || !strings.Contains(err.Error(), "strm target escapes root") {
|
||||
t.Fatalf("error = %v, want strm target escapes root", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLAllowsSTRMTargetOutsideRootWhenEnabled(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
outside := t.TempDir()
|
||||
target := filepath.Join(outside, "movie.mp4")
|
||||
writeLocalStorageTestFile(t, target, []byte("movie-data"))
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "movie.strm"), []byte(target+"\n"))
|
||||
|
||||
// 默认关闭:根目录外的目标仍被拒绝
|
||||
strict := New(Config{ID: "local", RootPath: root})
|
||||
if _, err := strict.StreamURL(context.Background(), encodeRel("movie.strm")); err == nil || !strings.Contains(err.Error(), "strm target escapes root") {
|
||||
t.Fatalf("default error = %v, want strm target escapes root", err)
|
||||
}
|
||||
|
||||
// 开启 strm_allow_outside_root 后放行
|
||||
relaxed := New(Config{ID: "local", RootPath: root, STRMAllowOutsideRoot: true})
|
||||
link, err := relaxed.StreamURL(context.Background(), encodeRel("movie.strm"))
|
||||
if err != nil {
|
||||
t.Fatalf("StreamURL with allow-outside-root: %v", err)
|
||||
}
|
||||
resolved, err := filepath.EvalSymlinks(target)
|
||||
if err != nil {
|
||||
t.Fatalf("eval target: %v", err)
|
||||
}
|
||||
if link.URL != resolved {
|
||||
t.Fatalf("link url = %q, want %q", link.URL, resolved)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLAllowOutsideRootStillRejectsNestedSTRM(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
outside := t.TempDir()
|
||||
writeLocalStorageTestFile(t, filepath.Join(outside, "inner.strm"), []byte("http://example.com/v.mp4\n"))
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "movie.strm"), []byte(filepath.Join(outside, "inner.strm")+"\n"))
|
||||
|
||||
drv := New(Config{ID: "local", RootPath: root, STRMAllowOutsideRoot: true})
|
||||
if _, err := drv.StreamURL(context.Background(), encodeRel("movie.strm")); err == nil || !strings.Contains(err.Error(), "nested strm") {
|
||||
t.Fatalf("error = %v, want nested strm rejection", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLRejectsSymlinkFileIDEscapingRoot(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
outside := t.TempDir()
|
||||
writeLocalStorageTestFile(t, filepath.Join(outside, "secret.mp4"), []byte("secret"))
|
||||
if err := os.Symlink(filepath.Join(outside, "secret.mp4"), filepath.Join(root, "link.mp4")); err != nil {
|
||||
t.Fatalf("symlink: %v", err)
|
||||
}
|
||||
drv := New(Config{ID: "local", RootPath: root})
|
||||
|
||||
_, err := drv.StreamURL(context.Background(), encodeRel("link.mp4"))
|
||||
|
||||
if err == nil || !strings.Contains(err.Error(), "path escapes root") {
|
||||
t.Fatalf("error = %v, want path escapes root", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLRejectsEscapingID(t *testing.T) {
|
||||
drv := New(Config{ID: "local", RootPath: t.TempDir()})
|
||||
escaped := base64.RawURLEncoding.EncodeToString([]byte("../secret.mp4"))
|
||||
@@ -100,6 +293,45 @@ func TestPathForIDAllowsRootPathSlash(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestScannerPersistsLocalStorageSTRM(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
root := t.TempDir()
|
||||
if err := os.MkdirAll(filepath.Join(root, "collection"), 0o755); err != nil {
|
||||
t.Fatalf("mkdir collection: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(root, "collection", "clip.strm"), []byte("https://media.example/clip.mp4\n"), 0o644); err != nil {
|
||||
t.Fatalf("write strm: %v", err)
|
||||
}
|
||||
cat, err := catalog.Open(filepath.Join(t.TempDir(), "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
drv := New(Config{ID: "local", RootPath: root})
|
||||
sc := scanner.New(cat, drv, []string{".strm"}, nil, nil)
|
||||
stats, err := sc.Run(ctx, drv.RootID())
|
||||
if err != nil {
|
||||
t.Fatalf("scan: %v", err)
|
||||
}
|
||||
if stats.Added != 1 {
|
||||
t.Fatalf("added = %d, want 1", stats.Added)
|
||||
}
|
||||
|
||||
fileID := encodeRel("collection/clip.strm")
|
||||
got, err := cat.GetVideo(ctx, Kind+"-local-"+fileID)
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.Ext != "strm" || got.FileID != fileID || got.ParentID != encodeRel("collection") {
|
||||
t.Fatalf("video = %#v, want local strm video under collection", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScannerPersistsLocalStorageVideo(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
root := t.TempDir()
|
||||
@@ -134,7 +366,14 @@ func TestScannerPersistsLocalStorageVideo(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.DriveID != "local" || got.FileID != fileID || got.Category != "collection" {
|
||||
t.Fatalf("video = %#v, want local drive video in collection", got)
|
||||
if got.DriveID != "local" || got.FileID != fileID || got.ParentID != encodeRel("collection") {
|
||||
t.Fatalf("video = %#v, want local drive video under collection", got)
|
||||
}
|
||||
}
|
||||
|
||||
func writeLocalStorageTestFile(t *testing.T, path string, data []byte) {
|
||||
t.Helper()
|
||||
if err := os.WriteFile(path, data, 0o644); err != nil {
|
||||
t.Fatalf("write %s: %v", path, err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,12 +78,38 @@ func (d *Driver) EnsureDir(context.Context, string) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
path, err := d.uploadPath(fileID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if info.IsDir() {
|
||||
return errors.New("localupload: refusing to remove directory")
|
||||
}
|
||||
if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) RootID() string { return d.uploadDir() }
|
||||
|
||||
func (d *Driver) uploadDir() string {
|
||||
return d.uploadDirPath
|
||||
}
|
||||
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
|
||||
func (d *Driver) uploadPath(fileID string) (string, error) {
|
||||
if strings.TrimSpace(fileID) == "" || filepath.Base(fileID) != fileID {
|
||||
return "", errors.New("invalid upload file id")
|
||||
|
||||
@@ -501,6 +501,17 @@ func (d *Driver) Rename(ctx context.Context, fileID, newName string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return errors.New("onedrive remove: empty file id")
|
||||
}
|
||||
if err := d.request(ctx, d.itemURL(fileID), http.MethodDelete, nil, nil); err != nil {
|
||||
return fmt.Errorf("onedrive remove: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) request(ctx context.Context, rawURL, method string, configure func(*resty.Request), out any) error {
|
||||
return d.requestOnce(ctx, rawURL, method, configure, out, true)
|
||||
}
|
||||
@@ -583,8 +594,8 @@ func (d *Driver) refresh(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func isRateLimitResponse(res *resty.Response, code, message string) bool {
|
||||
if isRateLimitCode(code) || isRateLimitMessage(message) {
|
||||
func isRateLimitResponse(res *resty.Response, code, _ string) bool {
|
||||
if isRateLimitCode(code) {
|
||||
return true
|
||||
}
|
||||
if res == nil {
|
||||
@@ -621,18 +632,6 @@ func isRateLimitCode(code string) bool {
|
||||
}
|
||||
}
|
||||
|
||||
func isRateLimitMessage(message string) bool {
|
||||
text := strings.ToLower(strings.TrimSpace(message))
|
||||
if text == "" {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "throttl") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "activity limit") ||
|
||||
strings.Contains(text, "temporarily blocked")
|
||||
}
|
||||
|
||||
func onedriveRateLimitError(res *resty.Response, message string) error {
|
||||
if strings.TrimSpace(message) == "" {
|
||||
message = "onedrive rate limited"
|
||||
@@ -741,3 +740,4 @@ func guessMime(name string) string {
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*Driver)(nil)
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
|
||||
@@ -214,7 +214,7 @@ func TestGraph429ReturnsRateLimitErrorWithRetryAfter(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestGraphThrottleMessageReturnsRateLimitError(t *testing.T) {
|
||||
func TestGraphThrottleMessageDoesNotReturnRateLimitError(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusForbidden)
|
||||
@@ -238,11 +238,11 @@ func TestGraphThrottleMessageReturnsRateLimitError(t *testing.T) {
|
||||
|
||||
_, err := d.StreamURL(context.Background(), "file-id")
|
||||
if err == nil {
|
||||
t.Fatal("list succeeded, want rate limit error")
|
||||
t.Fatal("list succeeded, want graph error")
|
||||
}
|
||||
var rateLimit *drives.RateLimitError
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want RateLimitError", err)
|
||||
if errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want non-rate-limit error", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,11 +20,12 @@ import (
|
||||
)
|
||||
|
||||
type Driver struct {
|
||||
id string
|
||||
cookie string
|
||||
rootID string
|
||||
client *sdk.Pan115Client
|
||||
ua string
|
||||
id string
|
||||
cookie string
|
||||
rootID string
|
||||
client *sdk.Pan115Client
|
||||
ua string
|
||||
uploadTempDir string
|
||||
|
||||
listMu sync.Mutex
|
||||
lastListAt time.Time
|
||||
@@ -32,10 +33,11 @@ type Driver struct {
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
ID string
|
||||
Cookie string // 形如 "UID=xxx; CID=xxx; SEID=xxx; KID=xxx"
|
||||
RootID string // 默认 "0"
|
||||
UA string // 默认 UA115Browser
|
||||
ID string
|
||||
Cookie string // 形如 "UID=xxx; CID=xxx; SEID=xxx; KID=xxx"
|
||||
RootID string // 默认 "0"
|
||||
UA string // 默认 UA115Browser
|
||||
UploadTempDir string
|
||||
}
|
||||
|
||||
func New(c Config) *Driver {
|
||||
@@ -48,11 +50,12 @@ func New(c Config) *Driver {
|
||||
ua = sdk.UA115Browser
|
||||
}
|
||||
return &Driver{
|
||||
id: c.ID,
|
||||
cookie: c.Cookie,
|
||||
rootID: rootID,
|
||||
ua: ua,
|
||||
listInterval: 2 * time.Second,
|
||||
id: c.ID,
|
||||
cookie: c.Cookie,
|
||||
rootID: rootID,
|
||||
ua: ua,
|
||||
uploadTempDir: strings.TrimSpace(c.UploadTempDir),
|
||||
listInterval: 2 * time.Second,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,7 +90,7 @@ func (d *Driver) List(ctx context.Context, dirID string) ([]drives.Entry, error)
|
||||
// p115ListCooldown 是列目录触发疑似风控错误时的冷却时长。
|
||||
//
|
||||
// 历史上是 [30min × 3],3 次都失败就放弃;新策略改为 10 分钟无限重试 ——
|
||||
// 只要错误仍属 transient(429 / 405 / WAF / blocked / 安全威胁 / unexpected),
|
||||
// 只要错误仍属明确 HTTP transient 状态(429 / 405),
|
||||
// 就持续等 10 分钟再发一次列目录请求,直到成功或 ctx 取消。这样即使 115
|
||||
// 风控持续较长时间,扫描会自然延后到风控结束,不再丢半棵子树。
|
||||
const p115ListCooldown = 10 * time.Minute
|
||||
@@ -156,17 +159,7 @@ func isTransient115UpstreamError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
text := strings.ToLower(err.Error())
|
||||
return strings.Contains(text, "405") ||
|
||||
strings.Contains(text, "429") ||
|
||||
strings.Contains(text, "too many request") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "blocked") ||
|
||||
strings.Contains(text, "security") ||
|
||||
strings.Contains(text, "waf") ||
|
||||
strings.Contains(text, "unexpected error") ||
|
||||
strings.Contains(text, "访问被阻断") ||
|
||||
strings.Contains(text, "安全威胁")
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusMethodNotAllowed, http.StatusTooManyRequests)
|
||||
}
|
||||
|
||||
// ListDirsOnly 只列指定目录的直接**子目录**,不返回文件条目。专为 admin 后台
|
||||
@@ -357,7 +350,7 @@ func (d *Driver) UploadAndReportSha1(ctx context.Context, parentID, name string,
|
||||
parentID = d.rootID
|
||||
}
|
||||
|
||||
tmp, sha1Hex, written, err := bufferAndHashSha1(r, size)
|
||||
tmp, sha1Hex, written, err := bufferAndHashSha1(d.uploadTempDir, r, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
@@ -461,12 +454,35 @@ func (d *Driver) Rename(ctx context.Context, fileID, newName string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
if d.client == nil {
|
||||
return errors.New("p115 remove: driver not initialized")
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return errors.New("p115 remove: empty fileID")
|
||||
}
|
||||
if err := d.client.Delete(fileID); err != nil {
|
||||
return fmt.Errorf("p115 remove: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// bufferAndHashSha1 把 r 全量复制到一个临时文件,同时计算 SHA1。
|
||||
// 返回临时文件(位置在末尾,需调用方 Seek 回 0)、SHA1 hex 大写、实际字节数。
|
||||
//
|
||||
// 调用方负责 Close + Remove 临时文件。
|
||||
func bufferAndHashSha1(r io.Reader, declaredSize int64) (*os.File, string, int64, error) {
|
||||
tmp, err := os.CreateTemp("", "p115-upload-*.bin")
|
||||
func bufferAndHashSha1(tempDir string, r io.Reader, declaredSize int64) (*os.File, string, int64, error) {
|
||||
tempDir = strings.TrimSpace(tempDir)
|
||||
if tempDir != "" {
|
||||
if err := os.MkdirAll(tempDir, 0o755); err != nil {
|
||||
return nil, "", 0, fmt.Errorf("p115 upload: create tmp dir: %w", err)
|
||||
}
|
||||
}
|
||||
tmp, err := os.CreateTemp(tempDir, "p115-upload-*.bin")
|
||||
if err != nil {
|
||||
return nil, "", 0, fmt.Errorf("p115 upload: create tmp: %w", err)
|
||||
}
|
||||
@@ -563,3 +579,4 @@ func guessMime(name string) string {
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*Driver)(nil)
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -22,8 +23,9 @@ func TestIsTransient115ListError(t *testing.T) {
|
||||
want bool
|
||||
}{
|
||||
{name: "nil", err: nil, want: false},
|
||||
{name: "blocked html", err: errors.New(`<!doctype html><title>405</title>Sorry, your request has been blocked as it may cause potential threats to the server's security.`), want: true},
|
||||
{name: "chinese waf", err: errors.New("很抱歉,由于您访问的URL有可能对网站造成安全威胁,您的访问被阻断。"), want: true},
|
||||
{name: "blocked html without status context", err: errors.New(`<!doctype html><title>405</title>Sorry, your request has been blocked as it may cause potential threats to the server's security.`), want: false},
|
||||
{name: "chinese waf", err: errors.New("很抱歉,由于您访问的URL有可能对网站造成安全威胁,您的访问被阻断。"), want: false},
|
||||
{name: "status 405", err: errors.New("request failed with status: 405"), want: true},
|
||||
{name: "rate limit", err: errors.New("429 too many requests"), want: true},
|
||||
{name: "regular auth error", err: errors.New("invalid credential"), want: false},
|
||||
}
|
||||
@@ -43,10 +45,10 @@ func TestWrap115StreamTransientError(t *testing.T) {
|
||||
err error
|
||||
wantRateLimit bool
|
||||
}{
|
||||
{name: "unexpected", err: errors.New("unexpected error"), wantRateLimit: true},
|
||||
{name: "unexpected", err: errors.New("unexpected error"), wantRateLimit: false},
|
||||
{name: "405 blocked", err: errors.New("405 request has been blocked"), wantRateLimit: true},
|
||||
{name: "429", err: errors.New("429 too many requests"), wantRateLimit: true},
|
||||
{name: "blocked", err: errors.New("blocked by waf"), wantRateLimit: true},
|
||||
{name: "blocked", err: errors.New("blocked by waf"), wantRateLimit: false},
|
||||
{name: "auth", err: errors.New("invalid credential"), wantRateLimit: false},
|
||||
}
|
||||
|
||||
@@ -85,7 +87,7 @@ func TestBufferAndHashSha1(t *testing.T) {
|
||||
wantHex := strings.ToUpper(hex.EncodeToString(want[:]))
|
||||
|
||||
t.Run("declared size matches", func(t *testing.T) {
|
||||
tmp, gotHex, n, err := bufferAndHashSha1(bytes.NewReader(body), int64(len(body)))
|
||||
tmp, gotHex, n, err := bufferAndHashSha1("", bytes.NewReader(body), int64(len(body)))
|
||||
if err != nil {
|
||||
t.Fatalf("bufferAndHashSha1 returned error: %v", err)
|
||||
}
|
||||
@@ -110,14 +112,14 @@ func TestBufferAndHashSha1(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("declared size mismatch returns error", func(t *testing.T) {
|
||||
_, _, _, err := bufferAndHashSha1(bytes.NewReader(body), int64(len(body))+1)
|
||||
_, _, _, err := bufferAndHashSha1("", bytes.NewReader(body), int64(len(body))+1)
|
||||
if err == nil {
|
||||
t.Fatal("expected size mismatch error, got nil")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("declared size zero is unchecked", func(t *testing.T) {
|
||||
tmp, gotHex, n, err := bufferAndHashSha1(bytes.NewReader(body), 0)
|
||||
tmp, gotHex, n, err := bufferAndHashSha1("", bytes.NewReader(body), 0)
|
||||
if err != nil {
|
||||
t.Fatalf("bufferAndHashSha1 returned error: %v", err)
|
||||
}
|
||||
@@ -129,6 +131,18 @@ func TestBufferAndHashSha1(t *testing.T) {
|
||||
t.Errorf("written = %d, want %d", n, len(body))
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("uses configured temp dir", func(t *testing.T) {
|
||||
tempDir := filepath.Join(t.TempDir(), "upload-tmp")
|
||||
tmp, _, _, err := bufferAndHashSha1(tempDir, bytes.NewReader(body), int64(len(body)))
|
||||
if err != nil {
|
||||
t.Fatalf("bufferAndHashSha1 returned error: %v", err)
|
||||
}
|
||||
defer cleanup(tmp)
|
||||
if gotDir := filepath.Dir(tmp.Name()); gotDir != tempDir {
|
||||
t.Fatalf("tmp dir = %q, want %q", gotDir, tempDir)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestUploadAndReportSha1RejectsInvalidArgs 检查空 reader / 空 name / 负 size 在
|
||||
|
||||
@@ -42,6 +42,7 @@ const (
|
||||
endpointDownloadInfo = "/file/download_info"
|
||||
endpointMkdir = "/file/upload_request"
|
||||
endpointRename = "/file/rename"
|
||||
endpointTrash = "/file/trash"
|
||||
endpointUpload = "/file/upload_request"
|
||||
endpointS3Auth = "/file/s3_upload_object/auth"
|
||||
endpointS3Parts = "/file/s3_repare_upload_parts_batch"
|
||||
@@ -69,6 +70,7 @@ type Driver struct {
|
||||
httpClient *http.Client
|
||||
|
||||
onTokenUpdate func(access string)
|
||||
uploadTempDir string
|
||||
|
||||
tokenMu sync.RWMutex
|
||||
|
||||
@@ -89,6 +91,7 @@ type Config struct {
|
||||
|
||||
MainAPIBaseURL string
|
||||
LoginAPIBaseURL string
|
||||
UploadTempDir string
|
||||
|
||||
OnTokenUpdate func(access string)
|
||||
}
|
||||
@@ -122,6 +125,7 @@ func New(c Config) *Driver {
|
||||
referer: defaultReferer,
|
||||
userAgent: defaultUserAgent,
|
||||
onTokenUpdate: c.OnTokenUpdate,
|
||||
uploadTempDir: strings.TrimSpace(c.UploadTempDir),
|
||||
client: resty.New().
|
||||
SetTimeout(30*time.Second).
|
||||
SetHeader("Accept", "application/json, text/plain, */*"),
|
||||
@@ -259,8 +263,8 @@ func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader,
|
||||
|
||||
// UploadResult 是 UploadAndReportHash 的返回值。
|
||||
//
|
||||
// FileID 是 123 云盘分配的新文件 ID;Hash 是本次上传的 MD5 HEX(小写),
|
||||
// 与 123 云盘列表返回的 Etag 一致;Size 是实际上传字节数。
|
||||
// FileID 是 123网盘分配的新文件 ID;Hash 是本次上传的 MD5 HEX(小写),
|
||||
// 与 123网盘列表返回的 Etag 一致;Size 是实际上传字节数。
|
||||
type UploadResult struct {
|
||||
FileID string
|
||||
Hash string
|
||||
@@ -269,7 +273,7 @@ type UploadResult struct {
|
||||
|
||||
// UploadAndReportHash 把 r 上传到 parentID 目录下的指定文件名,返回新文件元数据。
|
||||
//
|
||||
// 123 云盘 Web 上传协议需要先计算文件 MD5 作为 etag 申请 upload_request。
|
||||
// 123网盘 Web 上传协议需要先计算文件 MD5 作为 etag 申请 upload_request。
|
||||
// 命中 Reuse 时服务端已经秒传;否则用返回的 S3 预签名 URL 分片 PUT,最后
|
||||
// 调 upload_complete/v2 完成。
|
||||
func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string, r io.Reader, size int64) (UploadResult, error) {
|
||||
@@ -288,7 +292,7 @@ func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string,
|
||||
parentID = d.rootID
|
||||
}
|
||||
|
||||
tmp, md5Hex, actualSize, err := bufferAndHashMD5(r, size)
|
||||
tmp, md5Hex, actualSize, err := bufferAndHashMD5(d.uploadTempDir, r, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
@@ -522,7 +526,7 @@ func (d *Driver) cacheUploadedFile(fileID, parentID, name, md5Hex string, size i
|
||||
}, parentID)
|
||||
}
|
||||
|
||||
// Rename 调用 123 云盘 Web API 把指定 fileID 重命名为 newName。
|
||||
// Rename 调用 123网盘 Web API 把指定 fileID 重命名为 newName。
|
||||
func (d *Driver) Rename(ctx context.Context, fileID, newName string) error {
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
@@ -545,6 +549,32 @@ func (d *Driver) Rename(ctx context.Context, fileID, newName string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return errors.New("123pan remove: empty file id")
|
||||
}
|
||||
f, _, err := d.findFile(ctx, fileID)
|
||||
if err != nil {
|
||||
if strings.Contains(strings.ToLower(err.Error()), "not found") {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("123pan remove metadata: %w", err)
|
||||
}
|
||||
body := map[string]any{
|
||||
"driveId": 0,
|
||||
"operation": true,
|
||||
"fileTrashInfoList": []panFile{f},
|
||||
}
|
||||
if _, err := d.request(ctx, endpointTrash, http.MethodPost, func(req *resty.Request) {
|
||||
req.SetBody(body)
|
||||
}, nil); err != nil {
|
||||
return fmt.Errorf("123pan remove: %w", err)
|
||||
}
|
||||
d.removeCachedFile(fileID)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) EnsureDir(ctx context.Context, pathFromRoot string) (string, error) {
|
||||
parts := splitPath(pathFromRoot)
|
||||
currentID := d.rootID
|
||||
@@ -583,7 +613,7 @@ func (d *Driver) makeDir(ctx context.Context, parentID, name string) (string, er
|
||||
if resp.Data.FileID != 0 {
|
||||
return strconv.FormatInt(resp.Data.FileID, 10), nil
|
||||
}
|
||||
// 123 云盘创建目录的返回字段不稳定;创建成功但没回 fileId 时回读父目录确认。
|
||||
// 123网盘创建目录的返回字段不稳定;创建成功但没回 fileId 时回读父目录确认。
|
||||
childID, err := d.findChildDir(ctx, parentID, name)
|
||||
if err != nil {
|
||||
return "", err
|
||||
@@ -727,8 +757,8 @@ func (d *Driver) request(ctx context.Context, endpoint, method string, configure
|
||||
return nil, errors.New("123pan request: unauthorized")
|
||||
}
|
||||
|
||||
func isP123RateLimitResponse(res *resty.Response, code int, message string) bool {
|
||||
if code == http.StatusTooManyRequests || isP123RateLimitMessage(message) {
|
||||
func isP123RateLimitResponse(res *resty.Response, code int, _ string) bool {
|
||||
if code == http.StatusTooManyRequests {
|
||||
return true
|
||||
}
|
||||
if res == nil {
|
||||
@@ -737,7 +767,7 @@ func isP123RateLimitResponse(res *resty.Response, code int, message string) bool
|
||||
return isP123RateLimitHTTPResponse(res.StatusCode(), res.Header().Get("Retry-After"), res.String())
|
||||
}
|
||||
|
||||
func isP123RateLimitHTTPResponse(status int, retryAfter, body string) bool {
|
||||
func isP123RateLimitHTTPResponse(status int, retryAfter, _ string) bool {
|
||||
if status == http.StatusTooManyRequests {
|
||||
return true
|
||||
}
|
||||
@@ -747,35 +777,9 @@ func isP123RateLimitHTTPResponse(status int, retryAfter, body string) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
if isP123RateLimitMessage(body) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isP123RateLimitMessage(message string) bool {
|
||||
text := strings.ToLower(strings.TrimSpace(message))
|
||||
if text == "" {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(text, "请求太频繁") ||
|
||||
strings.Contains(text, "请求过于频繁") ||
|
||||
strings.Contains(text, "请求频繁") ||
|
||||
strings.Contains(text, "操作频繁") ||
|
||||
strings.Contains(text, "频率限制") ||
|
||||
strings.Contains(text, "请求次数过多") ||
|
||||
strings.Contains(text, "too many request") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "rate-limit") ||
|
||||
strings.Contains(text, "ratelimit") ||
|
||||
strings.Contains(text, "throttl") ||
|
||||
strings.Contains(text, "temporarily blocked") ||
|
||||
strings.Contains(text, "request has been blocked") ||
|
||||
strings.Contains(text, "blocked") ||
|
||||
strings.Contains(text, "访问被阻断")
|
||||
}
|
||||
|
||||
func p123RateLimitError(res *resty.Response, code int, message string) error {
|
||||
if strings.TrimSpace(message) == "" {
|
||||
message = "123pan rate limited"
|
||||
@@ -942,6 +946,12 @@ func (d *Driver) renameCachedFile(fileID, newName string) {
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Driver) removeCachedFile(fileID string) {
|
||||
d.fileMu.Lock()
|
||||
delete(d.files, fileID)
|
||||
d.fileMu.Unlock()
|
||||
}
|
||||
|
||||
func (d *Driver) cachedFile(fileID string) (panFile, string, bool) {
|
||||
d.fileMu.RLock()
|
||||
defer d.fileMu.RUnlock()
|
||||
@@ -1008,7 +1018,7 @@ func loginError(message string) error {
|
||||
message = strings.TrimSpace(message)
|
||||
if strings.Contains(message, "境外登录风险") ||
|
||||
(strings.Contains(message, "短信验证码") && strings.Contains(message, "微信")) {
|
||||
return errors.New("123pan login: 账号密码登录被 123 云盘风控拦截,请在浏览器完成短信/微信验证后复制 access_token,并在后台编辑该 123 云盘时只填写 access_token")
|
||||
return errors.New("123pan login: 账号密码登录被 123网盘风控拦截,请在浏览器完成短信/微信验证后复制 access_token,并在后台编辑该 123网盘时只填写 access_token")
|
||||
}
|
||||
if message == "" {
|
||||
message = "login failed"
|
||||
@@ -1051,8 +1061,14 @@ func splitPath(p string) []string {
|
||||
return strings.Split(p, "/")
|
||||
}
|
||||
|
||||
func bufferAndHashMD5(r io.Reader, declaredSize int64) (*os.File, string, int64, error) {
|
||||
tmp, err := os.CreateTemp("", "p123-upload-*.bin")
|
||||
func bufferAndHashMD5(tempDir string, r io.Reader, declaredSize int64) (*os.File, string, int64, error) {
|
||||
tempDir = strings.TrimSpace(tempDir)
|
||||
if tempDir != "" {
|
||||
if err := os.MkdirAll(tempDir, 0o755); err != nil {
|
||||
return nil, "", 0, fmt.Errorf("123pan upload: create tmp dir: %w", err)
|
||||
}
|
||||
}
|
||||
tmp, err := os.CreateTemp(tempDir, "p123-upload-*.bin")
|
||||
if err != nil {
|
||||
return nil, "", 0, fmt.Errorf("123pan upload: create tmp: %w", err)
|
||||
}
|
||||
@@ -1111,3 +1127,4 @@ func guessMime(name string) string {
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*Driver)(nil)
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
|
||||
@@ -11,6 +11,8 @@ import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -458,6 +460,29 @@ func TestUploadPresignedPUT429ReturnsRateLimitError(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestBufferAndHashMD5UsesConfiguredTempDir(t *testing.T) {
|
||||
body := []byte("hello-123-upload-test")
|
||||
tempDir := filepath.Join(t.TempDir(), "upload-tmp")
|
||||
tmp, gotHex, n, err := bufferAndHashMD5(tempDir, bytes.NewReader(body), int64(len(body)))
|
||||
if err != nil {
|
||||
t.Fatalf("bufferAndHashMD5 returned error: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
_ = tmp.Close()
|
||||
_ = os.Remove(tmp.Name())
|
||||
}()
|
||||
if gotDir := filepath.Dir(tmp.Name()); gotDir != tempDir {
|
||||
t.Fatalf("tmp dir = %q, want %q", gotDir, tempDir)
|
||||
}
|
||||
want := md5.Sum(body)
|
||||
if gotHex != fmt.Sprintf("%x", want) {
|
||||
t.Fatalf("md5 = %s, want %x", gotHex, want)
|
||||
}
|
||||
if n != int64(len(body)) {
|
||||
t.Fatalf("written = %d, want %d", n, len(body))
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenameSendsExpectedBody(t *testing.T) {
|
||||
var renameRequest map[string]any
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
@@ -278,7 +278,7 @@ func qrScanPlatformText(platform int) string {
|
||||
case 4:
|
||||
return "微信"
|
||||
case 7:
|
||||
return "123 云盘 App"
|
||||
return "123网盘 App"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -150,7 +150,7 @@ func TestQRCodePollUsesAppToken(t *testing.T) {
|
||||
if wxCodeRequested {
|
||||
t.Fatalf("wx_code should not be called when app token is already returned")
|
||||
}
|
||||
if got.AccessToken != "app-token" || got.PlatformText != "123 云盘 App" {
|
||||
if got.AccessToken != "app-token" || got.PlatformText != "123网盘 App" {
|
||||
t.Fatalf("status = %#v, want app token", got)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"path"
|
||||
@@ -43,8 +44,10 @@ type Driver struct {
|
||||
algorithms []string
|
||||
userAgent string
|
||||
|
||||
client *resty.Client
|
||||
onTokenUpdate func(access, refresh, captcha, deviceID string)
|
||||
client *resty.Client
|
||||
onTokenUpdate func(access, refresh, captcha, deviceID string)
|
||||
uploadToOSSFunc func(context.Context, *s3Params, io.Reader) error
|
||||
uploadTempDir string
|
||||
|
||||
// captchaMu serializes captcha-token refreshes triggered by 4002 / 9
|
||||
// recovery in requestOnce. Without it, N concurrent callers all hitting
|
||||
@@ -75,6 +78,7 @@ type Config struct {
|
||||
DeviceID string
|
||||
RootID string
|
||||
DisableMediaLink bool
|
||||
UploadTempDir string
|
||||
OnTokenUpdate func(access, refresh, captcha, deviceID string)
|
||||
}
|
||||
|
||||
@@ -107,6 +111,7 @@ func New(c Config) *Driver {
|
||||
deviceID: deviceID,
|
||||
disableMediaLink: c.DisableMediaLink,
|
||||
onTokenUpdate: c.OnTokenUpdate,
|
||||
uploadTempDir: strings.TrimSpace(c.UploadTempDir),
|
||||
client: resty.New().
|
||||
SetTimeout(30*time.Second).
|
||||
SetHeader("Accept", "application/json, text/plain, */*"),
|
||||
@@ -173,8 +178,8 @@ func (d *Driver) List(ctx context.Context, dirID string) ([]drives.Entry, error)
|
||||
|
||||
// pikpakListCooldown 是列目录触发疑似限流错误时的冷却时长。
|
||||
//
|
||||
// 与 p115 driver 的 listCooldown 同语义:只要错误属 transient
|
||||
// (error_code=10 / HTTP 429 / 5xx / 通用 "rate limit" 文本),就持续
|
||||
// 与 p115 driver 的 listCooldown 同语义:只要错误属明确限流/临时状态
|
||||
// (结构化 error_code=10 / HTTP 429 / 5xx),就持续
|
||||
// 等 10 分钟再发一次列目录请求,直到成功或 ctx 取消。这样即使 PikPak
|
||||
// 风控持续较长时间,扫描会自然延后到风控结束,不再丢半棵子树。
|
||||
const pikpakListCooldown = 10 * time.Minute
|
||||
@@ -240,7 +245,6 @@ func pikpakSleepContext(ctx context.Context, d time.Duration) error {
|
||||
//
|
||||
// - PikPak 业务码 error_code=10 ("操作频繁",见 OpenList drivers/pikpak/util.go)
|
||||
// - HTTP 429 / 500 / 502 / 503 / 504 / 509(rclone 也把这些归为 retry)
|
||||
// - 通用文本:rate limit / too many requests / blocked / temporarily unavailable
|
||||
//
|
||||
// 不包含 4122/4121/16(access_token 过期)和 9/4002(captcha 过期)—— 这些
|
||||
// 由 requestOnce 内部已经做过一次自动恢复重试;如果恢复后仍然报这类错误,
|
||||
@@ -257,22 +261,14 @@ func isTransientPikPakListError(err error) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
text := strings.ToLower(err.Error())
|
||||
return strings.Contains(text, "error_code=10") ||
|
||||
strings.Contains(text, "429") ||
|
||||
strings.Contains(text, "http 500") ||
|
||||
strings.Contains(text, "http 502") ||
|
||||
strings.Contains(text, "http 503") ||
|
||||
strings.Contains(text, "http 504") ||
|
||||
strings.Contains(text, "http 509") ||
|
||||
strings.Contains(text, "too many request") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "operation frequent") ||
|
||||
strings.Contains(text, "操作频繁") ||
|
||||
strings.Contains(text, "blocked") ||
|
||||
strings.Contains(text, "temporarily unavailable") ||
|
||||
strings.Contains(text, "service unavailable")
|
||||
return drives.ErrorMentionsHTTPStatus(err,
|
||||
http.StatusTooManyRequests,
|
||||
http.StatusInternalServerError,
|
||||
http.StatusBadGateway,
|
||||
http.StatusServiceUnavailable,
|
||||
http.StatusGatewayTimeout,
|
||||
509,
|
||||
)
|
||||
}
|
||||
|
||||
func (d *Driver) Stat(ctx context.Context, fileID string) (*drives.Entry, error) {
|
||||
@@ -354,6 +350,19 @@ func (d *Driver) Rename(ctx context.Context, fileID, newName string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return errors.New("pikpak remove: empty file id")
|
||||
}
|
||||
if err := d.request(ctx, filesURL+":batchTrash", http.MethodPost, func(req *resty.Request) {
|
||||
req.SetBody(map[string]any{"ids": []string{fileID}})
|
||||
}, nil); err != nil {
|
||||
return fmt.Errorf("pikpak remove: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) EnsureDir(ctx context.Context, pathFromRoot string) (string, error) {
|
||||
currentID := d.rootID
|
||||
for _, name := range splitPath(pathFromRoot) {
|
||||
@@ -563,3 +572,4 @@ func ParseBoolDefault(raw string, def bool) bool {
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*Driver)(nil)
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
|
||||
@@ -110,7 +110,7 @@ func TestEnsureDirReusesExistingFolder(t *testing.T) {
|
||||
"files": []map[string]any{{
|
||||
"id": "existing-folder-id",
|
||||
"kind": "drive#folder",
|
||||
"name": "91 Spider",
|
||||
"name": "Crawler Uploads",
|
||||
}},
|
||||
})
|
||||
case http.MethodPost:
|
||||
@@ -124,7 +124,7 @@ func TestEnsureDirReusesExistingFolder(t *testing.T) {
|
||||
defer srv.Close()
|
||||
|
||||
d := newTestDriver(t, srv)
|
||||
got, err := d.EnsureDir(context.Background(), "91 Spider")
|
||||
got, err := d.EnsureDir(context.Background(), "Crawler Uploads")
|
||||
if err != nil {
|
||||
t.Fatalf("ensure dir: %v", err)
|
||||
}
|
||||
@@ -150,7 +150,7 @@ func TestEnsureDirCreatesMissingFolder(t *testing.T) {
|
||||
writePikPakJSON(t, w, map[string]any{
|
||||
"id": "new-folder-id",
|
||||
"kind": "drive#folder",
|
||||
"name": "91 Spider",
|
||||
"name": "Crawler Uploads",
|
||||
})
|
||||
default:
|
||||
t.Fatalf("unexpected method %s", r.Method)
|
||||
@@ -160,14 +160,14 @@ func TestEnsureDirCreatesMissingFolder(t *testing.T) {
|
||||
defer srv.Close()
|
||||
|
||||
d := newTestDriver(t, srv)
|
||||
id, err := d.EnsureDir(context.Background(), "91 Spider")
|
||||
id, err := d.EnsureDir(context.Background(), "Crawler Uploads")
|
||||
if err != nil {
|
||||
t.Fatalf("ensure dir: %v", err)
|
||||
}
|
||||
if id != "new-folder-id" {
|
||||
t.Fatalf("dir id = %q, want new-folder-id", id)
|
||||
}
|
||||
if got.Kind != "drive#folder" || got.ParentID != "root-id" || got.Name != "91 Spider" {
|
||||
if got.Kind != "drive#folder" || got.ParentID != "root-id" || got.Name != "Crawler Uploads" {
|
||||
t.Fatalf("create folder body = %#v", got)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -64,7 +64,7 @@ func isCaptchaTokenRejectedCode(code int64) bool {
|
||||
}
|
||||
|
||||
// APIError is the public alias for the PikPak API error response. Callers
|
||||
// outside this package (e.g. the spider91→PikPak migrator, tests) can either
|
||||
// outside this package (e.g. crawler upload workers and tests) can either
|
||||
// construct it for fakes or unwrap it via errors.As. Prefer IsCaptchaError
|
||||
// over hard-coding the numeric error codes.
|
||||
type APIError = errResp
|
||||
|
||||
@@ -6,7 +6,10 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -26,7 +29,7 @@ import (
|
||||
// - 未命中:resumable.params 含 S3 兼容凭证(access_key / secret /
|
||||
// bucket / endpoint / key / security_token)
|
||||
//
|
||||
// 3. 用 Aliyun OSS SDK PutObject 把字节传到 endpoint+bucket+key
|
||||
// 3. 用 Aliyun OSS SDK PutObject 把字节传到 PikPak 返回的临时 OSS endpoint
|
||||
//
|
||||
// 4. PikPak 服务端轮询 OSS,发现完成后把 resp.File.ID 标记为可用;
|
||||
// 所以 Upload 完成后直接返回 resp.File.ID 即可(一开始就有,
|
||||
@@ -36,9 +39,11 @@ const (
|
||||
ossSecurityTokenHeaderName = "X-OSS-Security-Token"
|
||||
ossUserAgent = "aliyun-sdk-android/2.9.13(Linux/Android 14/M2004j7ac;UKQ1.231108.001)"
|
||||
// 单次 PutObject 的硬上限(OSS 文档限制 5GiB;保守用 5GiB-1)。
|
||||
// spider91 视频通常 ~100MiB,远低于该值。超过则需走 multipart,
|
||||
// 当前未实现,遇到会显式报错。
|
||||
// 超过该值需走 multipart;当前未实现,遇到会显式报错。
|
||||
maxSinglePutSize = 5*1024*1024*1024 - 1
|
||||
// 首次上传失败后最多再重试 3 次。每次重试都会重新申请 PikPak
|
||||
// upload session,以避开偶发不可解析/不可达的临时上传 endpoint。
|
||||
pikpakUploadMaxAttempts = 4
|
||||
)
|
||||
|
||||
// uploadTaskData 是 POST /drive/v1/files 的响应结构。
|
||||
@@ -73,6 +78,20 @@ type UploadResult struct {
|
||||
Size int64
|
||||
}
|
||||
|
||||
type preparedUploadBody struct {
|
||||
reader io.ReadSeeker
|
||||
start int64
|
||||
cleanup func()
|
||||
}
|
||||
|
||||
func (b preparedUploadBody) rewind() error {
|
||||
if b.reader == nil {
|
||||
return errors.New("pikpak upload: nil upload body")
|
||||
}
|
||||
_, err := b.reader.Seek(b.start, io.SeekStart)
|
||||
return err
|
||||
}
|
||||
|
||||
// Upload 实现 drives.Drive 接口;只返回 fileID。
|
||||
// 完整上传元数据见 UploadAndReportHash。
|
||||
func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader, size int64) (string, error) {
|
||||
@@ -85,7 +104,7 @@ func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader,
|
||||
|
||||
// UploadAndReportHash 上传并返回 file ID + GCID + 实际字节数。
|
||||
//
|
||||
// 用于 spider91 → PikPak 迁移 worker:上传完后直接把 hash 写回 catalog
|
||||
// 用于 crawler upload worker:上传完后直接把 hash 写回 catalog
|
||||
// 的 content_hash 字段,避免再读一次本地文件做 hash。
|
||||
//
|
||||
// 参数:
|
||||
@@ -98,8 +117,7 @@ func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader,
|
||||
// - 必须先算 GCID 再申请上传会话(PikPak API 要求 hash 字段),
|
||||
// 所以这里先 io.Copy 到临时文件并同步算 GCID。
|
||||
// - 命中秒传时不发任何字节;否则用 OSS PutObject 上传。
|
||||
// - 单次 PutObject 上限保守用 5GiB-1。spider91 视频远小于此值,
|
||||
// 超出该值会报错(暂不实现 multipart)。
|
||||
// - 单次 PutObject 上限保守用 5GiB-1,超出该值会报错(暂不实现 multipart)。
|
||||
func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string, r io.Reader, size int64) (UploadResult, error) {
|
||||
if r == nil {
|
||||
return UploadResult{}, errors.New("pikpak upload: nil reader")
|
||||
@@ -119,23 +137,59 @@ func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string,
|
||||
parentID = d.rootID
|
||||
}
|
||||
|
||||
// 1) 把 r 全量缓冲到临时文件,同时算 GCID。
|
||||
tmp, gcidHex, actualSize, err := bufferAndHashGCID(r, size)
|
||||
// 1) 算 GCID,并准备一个可重试读取的 body。爬虫迁移传入的是
|
||||
// *os.File,可直接复用原文件,避免再占用一份视频大小的临时空间。
|
||||
body, gcidHex, actualSize, err := d.prepareUploadBody(r, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
defer func() {
|
||||
_ = tmp.Close()
|
||||
_ = os.Remove(tmp.Name())
|
||||
}()
|
||||
if body.cleanup != nil {
|
||||
defer body.cleanup()
|
||||
}
|
||||
|
||||
// 2) 申请上传会话。
|
||||
result := UploadResult{Hash: gcidHex, Size: actualSize}
|
||||
var lastErr error
|
||||
for attempt := 1; attempt <= pikpakUploadMaxAttempts; attempt++ {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
|
||||
resp, err := d.requestUploadSession(ctx, parentID, name, actualSize, gcidHex)
|
||||
if err != nil {
|
||||
lastErr = fmt.Errorf("pikpak upload: request session: %w", err)
|
||||
if !shouldRetryPikPakUploadAttempt(lastErr, attempt) {
|
||||
return UploadResult{}, lastErr
|
||||
}
|
||||
d.logUploadRetry(name, attempt, lastErr)
|
||||
if err := pikpakSleepContext(ctx, pikpakUploadRetryDelay(attempt)); err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
out, err := d.completeUploadAttempt(ctx, body, parentID, name, result, resp)
|
||||
if err == nil {
|
||||
return out, nil
|
||||
}
|
||||
lastErr = err
|
||||
if !shouldRetryPikPakUploadAttempt(lastErr, attempt) {
|
||||
return UploadResult{}, lastErr
|
||||
}
|
||||
d.logUploadRetry(name, attempt, lastErr)
|
||||
if err := pikpakSleepContext(ctx, pikpakUploadRetryDelay(attempt)); err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
}
|
||||
return UploadResult{}, lastErr
|
||||
}
|
||||
|
||||
func (d *Driver) requestUploadSession(ctx context.Context, parentID, name string, size int64, gcidHex string) (uploadTaskData, error) {
|
||||
var resp uploadTaskData
|
||||
if err := d.request(ctx, filesURL, http.MethodPost, func(req *resty.Request) {
|
||||
req.SetBody(map[string]any{
|
||||
"kind": "drive#file",
|
||||
"name": name,
|
||||
"size": actualSize,
|
||||
"size": size,
|
||||
"hash": gcidHex,
|
||||
"upload_type": "UPLOAD_TYPE_RESUMABLE",
|
||||
"objProvider": map[string]any{"provider": "UPLOAD_TYPE_UNKNOWN"},
|
||||
@@ -143,12 +197,13 @@ func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string,
|
||||
"folder_type": "NORMAL",
|
||||
})
|
||||
}, &resp); err != nil {
|
||||
return UploadResult{}, fmt.Errorf("pikpak upload: request session: %w", err)
|
||||
return uploadTaskData{}, err
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
result := UploadResult{Hash: gcidHex, Size: actualSize}
|
||||
|
||||
// 3) 命中秒传:服务端已经知道这个 hash,直接返回新文件 ID。
|
||||
func (d *Driver) completeUploadAttempt(ctx context.Context, body preparedUploadBody, parentID, name string, result UploadResult, resp uploadTaskData) (UploadResult, error) {
|
||||
// 命中秒传:服务端已经知道这个 hash,直接返回新文件 ID。
|
||||
if resp.Resumable == nil {
|
||||
if resp.File.ID != "" {
|
||||
result.FileID = resp.File.ID
|
||||
@@ -163,15 +218,15 @@ func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string,
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// 4) 未命中秒传:把字节传到 S3 兼容存储。
|
||||
if _, err := tmp.Seek(0, io.SeekStart); err != nil {
|
||||
return UploadResult{}, fmt.Errorf("pikpak upload: seek tmp: %w", err)
|
||||
// 未命中秒传:把字节传到 S3 兼容存储。
|
||||
if err := body.rewind(); err != nil {
|
||||
return UploadResult{}, fmt.Errorf("pikpak upload: rewind body: %w", err)
|
||||
}
|
||||
if err := d.uploadToOSS(ctx, &resp.Resumable.Params, tmp); err != nil {
|
||||
if err := d.uploadToOSS(ctx, &resp.Resumable.Params, body.reader); err != nil {
|
||||
return UploadResult{}, fmt.Errorf("pikpak upload: oss put: %w", err)
|
||||
}
|
||||
|
||||
// 5) 拿到 fileID。优先走响应里的预分配 ID;为空就回查目录。
|
||||
// 拿到 fileID。优先走响应里的预分配 ID;为空就回查目录。
|
||||
if resp.File.ID != "" {
|
||||
result.FileID = resp.File.ID
|
||||
return result, nil
|
||||
@@ -184,12 +239,114 @@ func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string,
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func shouldRetryPikPakUploadAttempt(err error, attempt int) bool {
|
||||
return attempt < pikpakUploadMaxAttempts && isRetryablePikPakUploadError(err)
|
||||
}
|
||||
|
||||
func pikpakUploadRetryDelay(attempt int) time.Duration {
|
||||
if attempt <= 0 {
|
||||
return 0
|
||||
}
|
||||
return time.Duration(attempt) * time.Second
|
||||
}
|
||||
|
||||
func (d *Driver) logUploadRetry(name string, attempt int, err error) {
|
||||
log.Printf("[pikpak] upload retry drive=%s name=%q next_attempt=%d/%d err=%v",
|
||||
d.id, name, attempt+1, pikpakUploadMaxAttempts, err)
|
||||
}
|
||||
|
||||
func isRetryablePikPakUploadError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||
return false
|
||||
}
|
||||
var serviceErr oss.ServiceError
|
||||
if errors.As(err, &serviceErr) {
|
||||
return serviceErr.StatusCode == http.StatusTooManyRequests || serviceErr.StatusCode >= 500
|
||||
}
|
||||
var netErr net.Error
|
||||
if errors.As(err, &netErr) {
|
||||
return true
|
||||
}
|
||||
text := strings.ToLower(err.Error())
|
||||
return strings.Contains(text, "no such host") ||
|
||||
strings.Contains(text, "temporary failure in name resolution") ||
|
||||
strings.Contains(text, "server misbehaving") ||
|
||||
strings.Contains(text, "connection reset") ||
|
||||
strings.Contains(text, "connection refused") ||
|
||||
strings.Contains(text, "broken pipe") ||
|
||||
strings.Contains(text, "eof") ||
|
||||
strings.Contains(text, "i/o timeout") ||
|
||||
strings.Contains(text, "tls handshake timeout") ||
|
||||
strings.Contains(text, "http 429") ||
|
||||
strings.Contains(text, "http 500") ||
|
||||
strings.Contains(text, "http 502") ||
|
||||
strings.Contains(text, "http 503") ||
|
||||
strings.Contains(text, "http 504") ||
|
||||
strings.Contains(text, "http 509") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "temporarily unavailable") ||
|
||||
strings.Contains(text, "service unavailable")
|
||||
}
|
||||
|
||||
func (d *Driver) prepareUploadBody(r io.Reader, size int64) (preparedUploadBody, string, int64, error) {
|
||||
if rs, ok := r.(io.ReadSeeker); ok {
|
||||
gcidHex, actualSize, start, err := hashGCIDFromReadSeeker(rs, size)
|
||||
if err != nil {
|
||||
return preparedUploadBody{}, "", 0, err
|
||||
}
|
||||
return preparedUploadBody{reader: rs, start: start, cleanup: func() {}}, gcidHex, actualSize, nil
|
||||
}
|
||||
|
||||
tmp, gcidHex, actualSize, err := bufferAndHashGCID(d.uploadTempDir, r, size)
|
||||
if err != nil {
|
||||
return preparedUploadBody{}, "", 0, err
|
||||
}
|
||||
return preparedUploadBody{
|
||||
reader: tmp,
|
||||
start: 0,
|
||||
cleanup: func() {
|
||||
_ = tmp.Close()
|
||||
_ = os.Remove(tmp.Name())
|
||||
},
|
||||
}, gcidHex, actualSize, nil
|
||||
}
|
||||
|
||||
func hashGCIDFromReadSeeker(r io.ReadSeeker, size int64) (string, int64, int64, error) {
|
||||
start, err := r.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return "", 0, 0, fmt.Errorf("pikpak upload: seek body: %w", err)
|
||||
}
|
||||
|
||||
h := NewGCID(size)
|
||||
written, copyErr := io.Copy(h, r)
|
||||
_, seekErr := r.Seek(start, io.SeekStart)
|
||||
if copyErr != nil {
|
||||
return "", 0, start, fmt.Errorf("pikpak upload: hash body: %w", copyErr)
|
||||
}
|
||||
if seekErr != nil {
|
||||
return "", 0, start, fmt.Errorf("pikpak upload: rewind body: %w", seekErr)
|
||||
}
|
||||
if size > 0 && written != size {
|
||||
return "", 0, start, fmt.Errorf("pikpak upload: size mismatch: declared %d, copied %d", size, written)
|
||||
}
|
||||
return strings.ToUpper(hex.EncodeToString(h.Sum(nil))), written, start, nil
|
||||
}
|
||||
|
||||
// bufferAndHashGCID 把 r 复制到一个临时文件,同时计算 GCID。
|
||||
// 返回临时文件(位置在末尾,需要调用方 Seek 回 0)、GCID hex 大写、实际写入字节数。
|
||||
// 返回临时文件(位置在末尾,需要调用方 Seek 回 start)、GCID hex 大写、实际写入字节数。
|
||||
//
|
||||
// 调用方负责 Close + Remove 临时文件。
|
||||
func bufferAndHashGCID(r io.Reader, size int64) (*os.File, string, int64, error) {
|
||||
tmp, err := os.CreateTemp("", "pikpak-upload-*.bin")
|
||||
func bufferAndHashGCID(tempDir string, r io.Reader, size int64) (*os.File, string, int64, error) {
|
||||
tempDir = strings.TrimSpace(tempDir)
|
||||
if tempDir != "" {
|
||||
if err := os.MkdirAll(tempDir, 0o755); err != nil {
|
||||
return nil, "", 0, fmt.Errorf("pikpak upload: create tmp dir: %w", err)
|
||||
}
|
||||
}
|
||||
tmp, err := os.CreateTemp(tempDir, "pikpak-upload-*.bin")
|
||||
if err != nil {
|
||||
return nil, "", 0, fmt.Errorf("pikpak upload: create tmp: %w", err)
|
||||
}
|
||||
@@ -215,10 +372,13 @@ func bufferAndHashGCID(r io.Reader, size int64) (*os.File, string, int64, error)
|
||||
//
|
||||
// 参数复用 PikPak 的临时凭证;必须带 Security Token 头部 + UserAgent,与 OpenList 一致。
|
||||
func (d *Driver) uploadToOSS(ctx context.Context, p *s3Params, body io.Reader) error {
|
||||
if d.uploadToOSSFunc != nil {
|
||||
return d.uploadToOSSFunc(ctx, p, body)
|
||||
}
|
||||
if p == nil {
|
||||
return errors.New("pikpak upload: nil s3 params")
|
||||
}
|
||||
client, err := oss.New(p.Endpoint, p.AccessKeyID, p.AccessKeySecret)
|
||||
client, err := newPikPakOSSClient(p)
|
||||
if err != nil {
|
||||
return fmt.Errorf("oss client: %w", err)
|
||||
}
|
||||
@@ -235,6 +395,44 @@ func (d *Driver) uploadToOSS(ctx context.Context, p *s3Params, body io.Reader) e
|
||||
)
|
||||
}
|
||||
|
||||
func newPikPakOSSClient(p *s3Params, options ...oss.ClientOption) (*oss.Client, error) {
|
||||
if p == nil {
|
||||
return nil, errors.New("pikpak upload: nil s3 params")
|
||||
}
|
||||
clientOptions := make([]oss.ClientOption, 0, len(options)+1)
|
||||
if isPikPakCNAMEEndpoint(p.Endpoint) {
|
||||
clientOptions = append(clientOptions, oss.UseCname(true))
|
||||
}
|
||||
clientOptions = append(clientOptions, options...)
|
||||
return oss.New(p.Endpoint, p.AccessKeyID, p.AccessKeySecret, clientOptions...)
|
||||
}
|
||||
|
||||
func isPikPakCNAMEEndpoint(endpoint string) bool {
|
||||
host := endpointHost(endpoint)
|
||||
if host == "" {
|
||||
return false
|
||||
}
|
||||
host = strings.TrimSuffix(strings.ToLower(host), ".")
|
||||
return host != "mypikpak.com" && host != "mypikpak.net" &&
|
||||
(strings.HasSuffix(host, ".mypikpak.com") || strings.HasSuffix(host, ".mypikpak.net"))
|
||||
}
|
||||
|
||||
func endpointHost(endpoint string) string {
|
||||
endpoint = strings.TrimSpace(endpoint)
|
||||
if endpoint == "" {
|
||||
return ""
|
||||
}
|
||||
if u, err := url.Parse(endpoint); err == nil && u.Host != "" {
|
||||
endpoint = u.Host
|
||||
} else if idx := strings.IndexByte(endpoint, '/'); idx >= 0 {
|
||||
endpoint = endpoint[:idx]
|
||||
}
|
||||
if host, _, err := net.SplitHostPort(endpoint); err == nil {
|
||||
endpoint = host
|
||||
}
|
||||
return strings.Trim(endpoint, "[]")
|
||||
}
|
||||
|
||||
type readerWithCtx struct {
|
||||
ctx context.Context
|
||||
r io.Reader
|
||||
|
||||
@@ -6,12 +6,17 @@ import (
|
||||
"crypto/sha1"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/aliyun/aliyun-oss-go-sdk/oss"
|
||||
"github.com/go-resty/resty/v2"
|
||||
)
|
||||
|
||||
@@ -139,6 +144,80 @@ func TestUploadInstantSuccessReturnsFileID(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadUsesReadSeekerWithoutTempCopy(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/drive/v1/files", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{
|
||||
"upload_type": "UPLOAD_TYPE_RESUMABLE",
|
||||
"resumable": null,
|
||||
"file": {"id": "instant-file-id", "name": "test.mp4", "kind": "drive#file"}
|
||||
}`))
|
||||
})
|
||||
server := httptest.NewServer(mux)
|
||||
defer server.Close()
|
||||
|
||||
d := newTestDriver(t, server)
|
||||
uploadTempDir := filepath.Join(t.TempDir(), "upload-tmp")
|
||||
d.uploadTempDir = uploadTempDir
|
||||
|
||||
data := bytes.Repeat([]byte{0x31}, 1024)
|
||||
path := filepath.Join(t.TempDir(), "video.bin")
|
||||
if err := os.WriteFile(path, data, 0o644); err != nil {
|
||||
t.Fatalf("write source: %v", err)
|
||||
}
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
t.Fatalf("open source: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
id, err := d.Upload(context.Background(), "parent-id", "test.mp4", f, int64(len(data)))
|
||||
if err != nil {
|
||||
t.Fatalf("upload: %v", err)
|
||||
}
|
||||
if id != "instant-file-id" {
|
||||
t.Fatalf("file id = %q, want instant-file-id", id)
|
||||
}
|
||||
if _, err := os.Stat(uploadTempDir); !os.IsNotExist(err) {
|
||||
t.Fatalf("upload temp dir stat err = %v, want not created for read seeker input", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadBuffersNonSeekReaderInConfiguredTempDir(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/drive/v1/files", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{
|
||||
"upload_type": "UPLOAD_TYPE_RESUMABLE",
|
||||
"resumable": null,
|
||||
"file": {"id": "instant-file-id", "name": "test.mp4", "kind": "drive#file"}
|
||||
}`))
|
||||
})
|
||||
server := httptest.NewServer(mux)
|
||||
defer server.Close()
|
||||
|
||||
d := newTestDriver(t, server)
|
||||
uploadTempDir := filepath.Join(t.TempDir(), "upload-tmp")
|
||||
d.uploadTempDir = uploadTempDir
|
||||
|
||||
data := bytes.Repeat([]byte{0x42}, 1024)
|
||||
id, err := d.Upload(context.Background(), "parent-id", "test.mp4", bytes.NewBuffer(data), int64(len(data)))
|
||||
if err != nil {
|
||||
t.Fatalf("upload: %v", err)
|
||||
}
|
||||
if id != "instant-file-id" {
|
||||
t.Fatalf("file id = %q, want instant-file-id", id)
|
||||
}
|
||||
entries, err := os.ReadDir(uploadTempDir)
|
||||
if err != nil {
|
||||
t.Fatalf("read upload temp dir: %v", err)
|
||||
}
|
||||
if len(entries) != 0 {
|
||||
t.Fatalf("upload temp dir entries = %d, want cleaned", len(entries))
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadInstantSuccessFallsBackToListWhenFileIDMissing(t *testing.T) {
|
||||
listCalled := false
|
||||
mux := http.NewServeMux()
|
||||
@@ -181,6 +260,95 @@ func TestUploadInstantSuccessFallsBackToListWhenFileIDMissing(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadRetriesWithNewSessionWhenOSSEndpointDNSFails(t *testing.T) {
|
||||
sessionRequests := 0
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/drive/v1/files", func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
t.Errorf("method = %q, want POST", r.Method)
|
||||
}
|
||||
sessionRequests++
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(fmt.Sprintf(`{
|
||||
"upload_type": "UPLOAD_TYPE_RESUMABLE",
|
||||
"resumable": {
|
||||
"kind": "drive#resumable",
|
||||
"provider": "UPLOAD_TYPE_UNKNOWN",
|
||||
"params": {
|
||||
"access_key_id": "ak",
|
||||
"access_key_secret": "sk",
|
||||
"bucket": "bucket",
|
||||
"endpoint": "https://vip-lixian-%02d.upload-a10b.mypikpak.com",
|
||||
"key": "object-key-%02d",
|
||||
"security_token": "token"
|
||||
}
|
||||
},
|
||||
"file": {"id": "retry-file-%02d", "name": "retry.mp4", "kind": "drive#file"}
|
||||
}`, sessionRequests, sessionRequests, sessionRequests)))
|
||||
})
|
||||
server := httptest.NewServer(mux)
|
||||
defer server.Close()
|
||||
|
||||
d := newTestDriver(t, server)
|
||||
uploadAttempts := 0
|
||||
var uploaded []byte
|
||||
d.uploadToOSSFunc = func(_ context.Context, _ *s3Params, body io.Reader) error {
|
||||
uploadAttempts++
|
||||
if uploadAttempts == 1 {
|
||||
return &net.DNSError{Err: "no such host", Name: "vip-lixian-01.upload-a10b.mypikpak.com"}
|
||||
}
|
||||
var err error
|
||||
uploaded, err = io.ReadAll(body)
|
||||
return err
|
||||
}
|
||||
|
||||
payload := []byte("retry payload body")
|
||||
id, err := d.Upload(context.Background(), "parent-id", "retry.mp4", bytes.NewReader(payload), int64(len(payload)))
|
||||
if err != nil {
|
||||
t.Fatalf("upload: %v", err)
|
||||
}
|
||||
if id != "retry-file-02" {
|
||||
t.Fatalf("file id = %q, want retry-file-02 from the second session", id)
|
||||
}
|
||||
if sessionRequests != 2 {
|
||||
t.Fatalf("session requests = %d, want 2", sessionRequests)
|
||||
}
|
||||
if uploadAttempts != 2 {
|
||||
t.Fatalf("upload attempts = %d, want 2", uploadAttempts)
|
||||
}
|
||||
if !bytes.Equal(uploaded, payload) {
|
||||
t.Fatalf("uploaded body = %q, want %q", string(uploaded), string(payload))
|
||||
}
|
||||
}
|
||||
|
||||
func TestPikPakOSSClientUsesCNAMEForPikPakUploadEndpoint(t *testing.T) {
|
||||
params := &s3Params{
|
||||
AccessKeyID: "ak",
|
||||
AccessKeySecret: "sk",
|
||||
Bucket: "vip-lixian-07",
|
||||
Endpoint: "http://upload-a10b.mypikpak.com",
|
||||
Key: "upload_tmp/object-key",
|
||||
}
|
||||
client, err := newPikPakOSSClient(params)
|
||||
if err != nil {
|
||||
t.Fatalf("new oss client: %v", err)
|
||||
}
|
||||
bucket, err := client.Bucket(params.Bucket)
|
||||
if err != nil {
|
||||
t.Fatalf("bucket: %v", err)
|
||||
}
|
||||
signed, err := bucket.SignURL(params.Key, oss.HTTPPut, 60)
|
||||
if err != nil {
|
||||
t.Fatalf("sign url: %v", err)
|
||||
}
|
||||
if strings.Contains(signed, "vip-lixian-07.upload-a10b.mypikpak.com") {
|
||||
t.Fatalf("signed url uses invalid bucket-prefixed PikPak host: %s", signed)
|
||||
}
|
||||
if !strings.Contains(signed, "http://upload-a10b.mypikpak.com/upload_tmp%2Fobject-key") {
|
||||
t.Fatalf("signed url = %s, want PikPak endpoint host with object key path", signed)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadRejectsInvalidArguments(t *testing.T) {
|
||||
d := New(Config{ID: "x", Username: "u", Password: "p", Platform: "web"})
|
||||
cases := []struct {
|
||||
@@ -212,7 +380,7 @@ func TestUploadRejectsInvalidArguments(t *testing.T) {
|
||||
func TestBufferAndHashGCIDDetectsSizeMismatch(t *testing.T) {
|
||||
src := bytes.NewReader([]byte("hello"))
|
||||
// 声明 size=10 但实际只有 5 字节
|
||||
_, _, _, err := bufferAndHashGCID(src, 10)
|
||||
_, _, _, err := bufferAndHashGCID("", src, 10)
|
||||
if err == nil {
|
||||
t.Fatal("expected size mismatch error")
|
||||
}
|
||||
@@ -223,7 +391,7 @@ func TestBufferAndHashGCIDDetectsSizeMismatch(t *testing.T) {
|
||||
|
||||
func TestBufferAndHashGCIDComputesCorrectHash(t *testing.T) {
|
||||
data := bytes.Repeat([]byte{0x55}, 1024)
|
||||
tmp, hex, written, err := bufferAndHashGCID(bytes.NewReader(data), int64(len(data)))
|
||||
tmp, hex, written, err := bufferAndHashGCID("", bytes.NewReader(data), int64(len(data)))
|
||||
if err != nil {
|
||||
t.Fatalf("buffer: %v", err)
|
||||
}
|
||||
|
||||
@@ -16,23 +16,23 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
defaultUA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) quark-cloud-drive/2.5.20 Chrome/100.0.4896.160 Electron/18.3.5.4-b478491100 Safari/537.36 Channel/pckk_other_ch"
|
||||
defaultUA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) quark-cloud-drive/2.5.20 Chrome/100.0.4896.160 Electron/18.3.5.4-b478491100 Safari/537.36 Channel/pckk_other_ch"
|
||||
defaultReferer = "https://pan.quark.cn"
|
||||
defaultAPI = "https://drive.quark.cn/1/clouddrive"
|
||||
defaultPR = "ucpro"
|
||||
)
|
||||
|
||||
type Driver struct {
|
||||
id string
|
||||
cookie string
|
||||
rootID string
|
||||
ua string
|
||||
referer string
|
||||
apiBase string
|
||||
pr string
|
||||
client *resty.Client
|
||||
onCookieUpdate func(string)
|
||||
useTranscodingAddress bool
|
||||
id string
|
||||
cookie string
|
||||
rootID string
|
||||
ua string
|
||||
referer string
|
||||
apiBase string
|
||||
pr string
|
||||
client *resty.Client
|
||||
onCookieUpdate func(string)
|
||||
useTranscodingAddress bool
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
@@ -60,7 +60,7 @@ func New(c Config) *Driver {
|
||||
onCookieUpdate: c.OnCookieUpdate,
|
||||
}
|
||||
d.client = resty.New().
|
||||
SetTimeout(30 * time.Second).
|
||||
SetTimeout(30*time.Second).
|
||||
SetHeader("Accept", "application/json, text/plain, */*").
|
||||
SetHeader("Referer", d.referer).
|
||||
SetHeader("User-Agent", d.ua)
|
||||
@@ -269,6 +269,22 @@ func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader,
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return errors.New("quark remove: empty file id")
|
||||
}
|
||||
body := map[string]any{
|
||||
"action_type": 1,
|
||||
"exclude_fids": []string{},
|
||||
"filelist": []string{fileID},
|
||||
}
|
||||
if err := d.request(ctx, "/file/delete", http.MethodPost, nil, body, nil); err != nil {
|
||||
return fmt.Errorf("quark remove: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ---------- helpers ----------
|
||||
|
||||
func fileToEntry(f *file, parentID string) drives.Entry {
|
||||
@@ -343,3 +359,4 @@ func setCookieValue(cookie, key, value string) string {
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*Driver)(nil)
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,986 @@
|
||||
package scriptcrawler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"image"
|
||||
"image/color"
|
||||
"image/jpeg"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/fingerprint"
|
||||
"github.com/video-site/backend/internal/mediaasset"
|
||||
)
|
||||
|
||||
const (
|
||||
scriptCrawlerDuplicateBytes = "duplicate-video-bytes"
|
||||
scriptCrawlerUniqueBytes = "unique-video-bytes"
|
||||
)
|
||||
|
||||
func writeScriptCrawlerFFprobeStub(t *testing.T, dir string, ok bool) string {
|
||||
t.Helper()
|
||||
name := "ffprobe-ok.sh"
|
||||
body := "#!/bin/sh\necho video\nexit 0\n"
|
||||
if !ok {
|
||||
name = "ffprobe-fail.sh"
|
||||
body = "#!/bin/sh\necho 'moov atom not found' >&2\nexit 1\n"
|
||||
}
|
||||
path := filepath.Join(dir, name)
|
||||
if err := os.WriteFile(path, []byte(body), 0o755); err != nil {
|
||||
t.Fatalf("write ffprobe stub: %v", err)
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
func writeScriptCrawlerFFmpegStub(t *testing.T, dir string) string {
|
||||
t.Helper()
|
||||
path := filepath.Join(dir, "ffmpeg-hls.sh")
|
||||
body := "#!/bin/sh\nif [ -n \"$GO_SCRIPTCRAWLER_FFMPEG_ARGS_FILE\" ]; then printf '%s\\n' \"$@\" > \"$GO_SCRIPTCRAWLER_FFMPEG_ARGS_FILE\"; fi\nout=\"\"\nfor arg do out=\"$arg\"; done\nprintf 'hls-video-bytes' > \"$out\"\n"
|
||||
if err := os.WriteFile(path, []byte(body), 0o755); err != nil {
|
||||
t.Fatalf("write ffmpeg stub: %v", err)
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
func writeScriptCrawlerJPEG(t *testing.T, path string, c color.RGBA) {
|
||||
t.Helper()
|
||||
img := image.NewRGBA(image.Rect(0, 0, 48, 48))
|
||||
for y := 0; y < 48; y++ {
|
||||
for x := 0; x < 48; x++ {
|
||||
img.SetRGBA(x, y, c)
|
||||
}
|
||||
}
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
t.Fatalf("create jpeg: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
if err := jpeg.Encode(f, img, &jpeg.Options{Quality: 95}); err != nil {
|
||||
t.Fatalf("encode jpeg: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceImportsLocalFileAndSkipsExisting(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
dummyScript := filepath.Join(tmp, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
CrawlerName: "Demo Crawler",
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Skipped != 0 || res.Failed != 0 {
|
||||
t.Fatalf("result = new:%d skipped:%d failed:%d, want 1/0/0", res.NewVideos, res.Skipped, res.Failed)
|
||||
}
|
||||
v, err := cat.GetVideo(ctx, BuildVideoID("demo", "abc-123"))
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if v.Title != "Imported From Helper" || v.FileID != "abc-123.mp4" || v.Size == 0 {
|
||||
t.Fatalf("video = title:%q file:%q size:%d", v.Title, v.FileID, v.Size)
|
||||
}
|
||||
if !hasString(v.Tags, "Demo Crawler") {
|
||||
t.Fatalf("video tags = %#v, want crawler name tag", v.Tags)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(drv.VideosDir(), "abc-123.mp4")); err != nil {
|
||||
t.Fatalf("video file not copied: %v", err)
|
||||
}
|
||||
|
||||
res, err = c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("second run: %v", err)
|
||||
}
|
||||
if res.NewVideos != 0 || res.Skipped != 1 {
|
||||
t.Fatalf("second result = new:%d skipped:%d, want 0/1", res.NewVideos, res.Skipped)
|
||||
}
|
||||
if res.SeenSnapshot != 1 {
|
||||
t.Fatalf("seen snapshot = %d, want 1", res.SeenSnapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceMarksPreviewDisabledWhenConfigured(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
dummyScript := filepath.Join(tmp, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
DisablePreview: true,
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Failed != 0 {
|
||||
t.Fatalf("result = new:%d failed:%d, want 1/0", res.NewVideos, res.Failed)
|
||||
}
|
||||
v, err := cat.GetVideo(ctx, BuildVideoID("demo", "abc-123"))
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if v.PreviewStatus != "disabled" {
|
||||
t.Fatalf("preview status = %q, want disabled", v.PreviewStatus)
|
||||
}
|
||||
if v.FingerprintStatus != "ready" || v.SampledSHA256 == "" {
|
||||
t.Fatalf("fingerprint status=%q sampled=%q, want ready and sampled hash", v.FingerprintStatus, v.SampledSHA256)
|
||||
}
|
||||
pending, err := cat.ListVideosByPreviewStatus(ctx, "demo", "pending", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list pending previews: %v", err)
|
||||
}
|
||||
if len(pending) != 0 {
|
||||
t.Fatalf("pending previews = %d, want 0", len(pending))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceUsesCurrentDrivePreviewSwitch(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: drv.ID(),
|
||||
Kind: Kind,
|
||||
Name: "Demo",
|
||||
RootID: "/",
|
||||
Credentials: map[string]string{"script_path": "/tmp/crawler.py"},
|
||||
TeaserEnabled: true,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed drive: %v", err)
|
||||
}
|
||||
dummyScript := filepath.Join(tmp, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
DisablePreview: true,
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Failed != 0 {
|
||||
t.Fatalf("result = new:%d failed:%d, want 1/0", res.NewVideos, res.Failed)
|
||||
}
|
||||
v, err := cat.GetVideo(ctx, BuildVideoID("demo", "abc-123"))
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if v.PreviewStatus != "pending" {
|
||||
t.Fatalf("preview status = %q, want pending from current drive switch", v.PreviewStatus)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceUsesDefaultCrawlerNamespace(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
dummyScript := filepath.Join(tmp, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.SeenSnapshot != 0 {
|
||||
t.Fatalf("result = new:%d seen:%d, want 1/0", res.NewVideos, res.SeenSnapshot)
|
||||
}
|
||||
videoID := BuildVideoID("demo", "abc-123")
|
||||
if _, err := cat.GetVideo(ctx, videoID); err != nil {
|
||||
t.Fatalf("get crawler video: %v", err)
|
||||
}
|
||||
|
||||
res, err = c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("second run: %v", err)
|
||||
}
|
||||
if res.NewVideos != 0 || res.Skipped != 1 || res.SeenSnapshot != 1 {
|
||||
t.Fatalf("second result = new:%d skipped:%d seen:%d, want 0/1/1", res.NewVideos, res.Skipped, res.SeenSnapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOncePassesAbsoluteJobPathsWhenWorkDirDiffers(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
t.Chdir(tmp)
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join("data", "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
scriptDir := filepath.Join(tmp, "scripts")
|
||||
if err := os.MkdirAll(scriptDir, 0o755); err != nil {
|
||||
t.Fatalf("mkdir script dir: %v", err)
|
||||
}
|
||||
dummyScript := filepath.Join(scriptDir, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_ASSERT_ABS", "1")
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
WorkDir: scriptDir,
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Skipped != 0 || res.Failed != 0 {
|
||||
t.Fatalf("result = new:%d skipped:%d failed:%d, want 1/0/0", res.NewVideos, res.Skipped, res.Failed)
|
||||
}
|
||||
if !filepath.IsAbs(res.JobFile) || !filepath.IsAbs(res.SeenFile) {
|
||||
t.Fatalf("result paths should be absolute: job=%q seen=%q", res.JobFile, res.SeenFile)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceImportsSimpleMediaURLWithoutSourceID(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/video.mp4" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
_, _ = w.Write([]byte("simple-video-bytes"))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
dummyScript := filepath.Join(tmp, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_SIMPLE", "1")
|
||||
t.Setenv("GO_SCRIPTCRAWLER_MEDIA_URL", srv.URL+"/video.mp4?token=first")
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
HTTPClient: srv.Client(),
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Skipped != 0 || res.Failed != 0 {
|
||||
t.Fatalf("result = new:%d skipped:%d failed:%d, want 1/0/0", res.NewVideos, res.Skipped, res.Failed)
|
||||
}
|
||||
videos, err := cat.ListVideosByDrive(ctx, "demo")
|
||||
if err != nil {
|
||||
t.Fatalf("list videos: %v", err)
|
||||
}
|
||||
if len(videos) != 1 {
|
||||
t.Fatalf("videos = %d, want 1", len(videos))
|
||||
}
|
||||
v := videos[0]
|
||||
if !strings.HasPrefix(v.ID, BuildVideoID("demo", "auto-")) {
|
||||
t.Fatalf("video id = %q, want generated auto source id", v.ID)
|
||||
}
|
||||
if v.Title != "Simple Protocol Video" || v.Ext != "mp4" || v.ThumbnailURL != "" || v.Size == 0 {
|
||||
t.Fatalf("video = title:%q ext:%q thumb:%q size:%d", v.Title, v.Ext, v.ThumbnailURL, v.Size)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(drv.VideosDir(), v.FileID)); err != nil {
|
||||
t.Fatalf("video file not downloaded: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_SCRIPTCRAWLER_MEDIA_URL", srv.URL+"/video.mp4?token=second")
|
||||
res, err = c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("second run: %v", err)
|
||||
}
|
||||
if res.NewVideos != 0 || res.Skipped != 1 {
|
||||
t.Fatalf("second result = new:%d skipped:%d, want 0/1", res.NewVideos, res.Skipped)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceSkipsFingerprintDuplicateAndContinues(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
|
||||
seedFile := "seed-canonical.mp4"
|
||||
if err := os.WriteFile(filepath.Join(drv.VideosDir(), seedFile), []byte(scriptCrawlerDuplicateBytes), 0o644); err != nil {
|
||||
t.Fatalf("write seed video: %v", err)
|
||||
}
|
||||
seed := &catalog.Video{
|
||||
ID: "seed-for-hash",
|
||||
DriveID: drv.ID(),
|
||||
FileID: seedFile,
|
||||
Title: "Seed",
|
||||
Size: int64(len(scriptCrawlerDuplicateBytes)),
|
||||
PublishedAt: time.Now(),
|
||||
}
|
||||
sampled, err := fingerprint.Compute(ctx, drv, seed, fingerprint.Config{}, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("compute seed fingerprint: %v", err)
|
||||
}
|
||||
_ = os.Remove(filepath.Join(drv.VideosDir(), seedFile))
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: "existing-canonical",
|
||||
DriveID: "other-drive",
|
||||
FileID: "existing.mp4",
|
||||
FileName: "existing.mp4",
|
||||
Title: "Existing Canonical",
|
||||
Size: int64(len(scriptCrawlerDuplicateBytes)),
|
||||
Ext: "mp4",
|
||||
SampledSHA256: sampled,
|
||||
FingerprintStatus: "ready",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed canonical video: %v", err)
|
||||
}
|
||||
|
||||
dummyScript := filepath.Join(tmp, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_DUP_UNIQUE", "1")
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Skipped != 1 || res.Failed != 0 || res.TotalEntries != 2 {
|
||||
t.Fatalf("result = total:%d new:%d skipped:%d failed:%d, want 2/1/1/0", res.TotalEntries, res.NewVideos, res.Skipped, res.Failed)
|
||||
}
|
||||
if res.CandidateBudget <= res.TargetNew {
|
||||
t.Fatalf("candidate budget = %d, target = %d; want expanded budget", res.CandidateBudget, res.TargetNew)
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, BuildVideoID("demo", "dup-source")); err == nil {
|
||||
t.Fatal("duplicate candidate should not be imported")
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(drv.VideosDir(), "dup-source.mp4")); !os.IsNotExist(err) {
|
||||
t.Fatalf("duplicate local file stat = %v, want removed", err)
|
||||
}
|
||||
v, err := cat.GetVideo(ctx, BuildVideoID("demo", "unique-source"))
|
||||
if err != nil {
|
||||
t.Fatalf("unique video should be imported: %v", err)
|
||||
}
|
||||
if v.SampledSHA256 == "" || v.FingerprintStatus != "ready" {
|
||||
t.Fatalf("unique fingerprint = %q status=%q, want ready sampled fingerprint", v.SampledSHA256, v.FingerprintStatus)
|
||||
}
|
||||
seen, err := cat.ListCrawlerSourceIDs(ctx, Kind, "demo")
|
||||
if err != nil {
|
||||
t.Fatalf("list seen source ids: %v", err)
|
||||
}
|
||||
seenSet := map[string]bool{}
|
||||
for _, id := range seen {
|
||||
seenSet[id] = true
|
||||
}
|
||||
if !seenSet["dup-source"] || !seenSet["unique-source"] {
|
||||
t.Fatalf("seen ids = %#v, want duplicate and imported source ids", seen)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerProcessItemSkipsNearDuplicateByTitleDurationAndThumbnail(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
commonThumbDir := filepath.Join(tmp, "common-thumbs")
|
||||
if err := os.MkdirAll(commonThumbDir, 0o755); err != nil {
|
||||
t.Fatalf("mkdir common thumbs: %v", err)
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
canonicalID := "existing-canonical"
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: canonicalID,
|
||||
DriveID: "other-drive",
|
||||
FileID: "existing.mp4",
|
||||
FileName: "existing.mp4",
|
||||
Title: "91 Test Similar Title 1215516",
|
||||
DurationSeconds: 257,
|
||||
Size: 12345,
|
||||
Ext: "mp4",
|
||||
ThumbnailURL: "/p/thumb/" + canonicalID,
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed canonical video: %v", err)
|
||||
}
|
||||
writeScriptCrawlerJPEG(t, mediaasset.ThumbnailPathInDir(commonThumbDir, canonicalID), color.RGBA{R: 210, G: 40, B: 40, A: 255})
|
||||
|
||||
outputDir := drv.OutputDir()
|
||||
mediaPath := filepath.Join(outputDir, "near-video.mp4")
|
||||
if err := os.WriteFile(mediaPath, []byte("near-duplicate-but-different-bytes"), 0o644); err != nil {
|
||||
t.Fatalf("write media: %v", err)
|
||||
}
|
||||
thumbPath := filepath.Join(outputDir, "near-thumb.jpg")
|
||||
writeScriptCrawlerJPEG(t, thumbPath, color.RGBA{R: 211, G: 41, B: 41, A: 255})
|
||||
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
CommonThumbDir: commonThumbDir,
|
||||
})
|
||||
imported, err := c.processItem(ctx, Item{
|
||||
SourceID: "near-source",
|
||||
Title: "91 Test Similar Title 1215516 - source suffix",
|
||||
Author: "helper",
|
||||
DurationSeconds: 257,
|
||||
Media: MediaRef{LocalFile: mediaPath},
|
||||
Thumbnail: MediaRef{LocalFile: thumbPath},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("process item: %v", err)
|
||||
}
|
||||
if imported {
|
||||
t.Fatal("near duplicate imported, want skipped")
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, BuildVideoID("demo", "near-source")); err == nil {
|
||||
t.Fatal("near duplicate should not be inserted into catalog")
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(drv.VideosDir(), "near-source.mp4")); !os.IsNotExist(err) {
|
||||
t.Fatalf("near duplicate video stat = %v, want removed", err)
|
||||
}
|
||||
if sourceThumb, err := drv.ThumbPath("near-source.jpg"); err != nil {
|
||||
t.Fatalf("source thumb path: %v", err)
|
||||
} else if _, err := os.Stat(sourceThumb); !os.IsNotExist(err) {
|
||||
t.Fatalf("source thumb stat = %v, want removed", err)
|
||||
}
|
||||
if _, err := os.Stat(mediaasset.ThumbnailPathInDir(commonThumbDir, BuildVideoID("demo", "near-source"))); !os.IsNotExist(err) {
|
||||
t.Fatalf("common thumb stat = %v, want removed", err)
|
||||
}
|
||||
seen, err := cat.ListCrawlerSourceIDs(ctx, Kind, "demo")
|
||||
if err != nil {
|
||||
t.Fatalf("list seen source ids: %v", err)
|
||||
}
|
||||
if !hasString(seen, "near-source") {
|
||||
t.Fatalf("seen ids = %#v, want near-source", seen)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerProcessItemKeepsLargerNearDuplicate(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
commonThumbDir := filepath.Join(tmp, "common-thumbs")
|
||||
if err := os.MkdirAll(commonThumbDir, 0o755); err != nil {
|
||||
t.Fatalf("mkdir common thumbs: %v", err)
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
smallerID := "smaller-canonical"
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: smallerID,
|
||||
DriveID: "other-drive",
|
||||
FileID: "smaller.mp4",
|
||||
FileName: "smaller.mp4",
|
||||
Title: "91 Test Larger Candidate 1215516",
|
||||
DurationSeconds: 257,
|
||||
Size: 5,
|
||||
Ext: "mp4",
|
||||
ThumbnailURL: "/p/thumb/" + smallerID,
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed smaller video: %v", err)
|
||||
}
|
||||
writeScriptCrawlerJPEG(t, mediaasset.ThumbnailPathInDir(commonThumbDir, smallerID), color.RGBA{R: 80, G: 160, B: 80, A: 255})
|
||||
|
||||
outputDir := drv.OutputDir()
|
||||
mediaPath := filepath.Join(outputDir, "larger-video.mp4")
|
||||
if err := os.WriteFile(mediaPath, []byte("near-duplicate-larger-candidate-bytes"), 0o644); err != nil {
|
||||
t.Fatalf("write media: %v", err)
|
||||
}
|
||||
thumbPath := filepath.Join(outputDir, "larger-thumb.jpg")
|
||||
writeScriptCrawlerJPEG(t, thumbPath, color.RGBA{R: 81, G: 161, B: 81, A: 255})
|
||||
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
CommonThumbDir: commonThumbDir,
|
||||
})
|
||||
imported, err := c.processItem(ctx, Item{
|
||||
SourceID: "larger-source",
|
||||
Title: "91 Test Larger Candidate 1215516 - source suffix",
|
||||
Author: "helper",
|
||||
DurationSeconds: 257,
|
||||
Media: MediaRef{LocalFile: mediaPath},
|
||||
Thumbnail: MediaRef{LocalFile: thumbPath},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("process item: %v", err)
|
||||
}
|
||||
if !imported {
|
||||
t.Fatal("larger near duplicate was skipped, want imported")
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, smallerID); err == nil {
|
||||
t.Fatal("smaller near duplicate should be deleted from catalog")
|
||||
}
|
||||
if deleted, err := cat.IsVideoDeleted(ctx, smallerID); err != nil || !deleted {
|
||||
t.Fatalf("smaller tombstone = %v, %v; want deleted tombstone", deleted, err)
|
||||
}
|
||||
larger, err := cat.GetVideo(ctx, BuildVideoID("demo", "larger-source"))
|
||||
if err != nil {
|
||||
t.Fatalf("larger video should be imported: %v", err)
|
||||
}
|
||||
if larger.Size <= 5 {
|
||||
t.Fatalf("larger size = %d, want > 5", larger.Size)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceRejectsInvalidDownloadedVideo(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
dummyScript := filepath.Join(tmp, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
CrawlerName: "Demo Crawler",
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, false),
|
||||
ScriptPath: dummyScript,
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 0 || res.Skipped != 0 || res.Failed != 1 || res.TotalEntries != 1 {
|
||||
t.Fatalf("result = total:%d new:%d skipped:%d failed:%d, want 1/0/0/1", res.TotalEntries, res.NewVideos, res.Skipped, res.Failed)
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, BuildVideoID("demo", "abc-123")); err == nil {
|
||||
t.Fatal("invalid video should not be imported")
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(drv.VideosDir(), "abc-123.mp4")); !os.IsNotExist(err) {
|
||||
t.Fatalf("invalid local video stat = %v, want removed", err)
|
||||
}
|
||||
seen, err := cat.ListCrawlerSourceIDs(ctx, Kind, "demo")
|
||||
if err != nil {
|
||||
t.Fatalf("list seen source ids: %v", err)
|
||||
}
|
||||
if len(seen) != 0 {
|
||||
t.Fatalf("seen ids = %#v, want none for invalid video", seen)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceDownloadsHLSMediaURL(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
dummyScript := filepath.Join(tmp, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HLS", "1")
|
||||
ffmpegArgsFile := filepath.Join(tmp, "ffmpeg-args.txt")
|
||||
t.Setenv("GO_SCRIPTCRAWLER_FFMPEG_ARGS_FILE", ffmpegArgsFile)
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
CrawlerName: "Demo Crawler",
|
||||
PythonPath: wrapper,
|
||||
FFmpegPath: writeScriptCrawlerFFmpegStub(t, tmp),
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Skipped != 0 || res.Failed != 0 {
|
||||
t.Fatalf("result = new:%d skipped:%d failed:%d, want 1/0/0", res.NewVideos, res.Skipped, res.Failed)
|
||||
}
|
||||
v, err := cat.GetVideo(ctx, BuildVideoID("demo", "hls-source"))
|
||||
if err != nil {
|
||||
t.Fatalf("get hls video: %v", err)
|
||||
}
|
||||
if v.FileID != "hls-source.mp4" || v.Size != int64(len("hls-video-bytes")) {
|
||||
t.Fatalf("video file=%q size=%d, want hls-source.mp4 size %d", v.FileID, v.Size, len("hls-video-bytes"))
|
||||
}
|
||||
data, err := os.ReadFile(filepath.Join(drv.VideosDir(), "hls-source.mp4"))
|
||||
if err != nil {
|
||||
t.Fatalf("read hls output: %v", err)
|
||||
}
|
||||
if string(data) != "hls-video-bytes" {
|
||||
t.Fatalf("hls output = %q", string(data))
|
||||
}
|
||||
argsData, err := os.ReadFile(ffmpegArgsFile)
|
||||
if err != nil {
|
||||
t.Fatalf("read ffmpeg args: %v", err)
|
||||
}
|
||||
argsText := "\n" + string(argsData) + "\n"
|
||||
for _, want := range []string{
|
||||
"\n-protocol_whitelist\nhttp,https,tcp,tls,crypto\n",
|
||||
"\n-allowed_extensions\nALL\n",
|
||||
"\n-allowed_segment_extensions\nALL\n",
|
||||
"\n-extension_picky\n0\n",
|
||||
} {
|
||||
if !strings.Contains(argsText, want) {
|
||||
t.Fatalf("ffmpeg args missing %q in:\n%s", strings.TrimSpace(want), string(argsData))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestScriptCrawlerHelperProcess(t *testing.T) {
|
||||
if os.Getenv("GO_WANT_SCRIPTCRAWLER_HELPER") != "1" {
|
||||
return
|
||||
}
|
||||
args := os.Args
|
||||
jobPath := ""
|
||||
for i := 0; i < len(args)-1; i++ {
|
||||
if args[i] == "--job" {
|
||||
jobPath = args[i+1]
|
||||
break
|
||||
}
|
||||
}
|
||||
if jobPath == "" {
|
||||
fmt.Fprintln(os.Stderr, "missing --job")
|
||||
os.Exit(2)
|
||||
}
|
||||
data, err := os.ReadFile(jobPath)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(2)
|
||||
}
|
||||
var job Job
|
||||
if err := json.Unmarshal(data, &job); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(2)
|
||||
}
|
||||
if os.Getenv("GO_WANT_SCRIPTCRAWLER_ASSERT_ABS") == "1" {
|
||||
if !filepath.IsAbs(jobPath) || !filepath.IsAbs(job.SeenSourceIDsFile) || !filepath.IsAbs(job.OutputDir) {
|
||||
fmt.Fprintf(os.Stderr, "expected absolute paths, got job=%q seen=%q output=%q\n", jobPath, job.SeenSourceIDsFile, job.OutputDir)
|
||||
os.Exit(2)
|
||||
}
|
||||
}
|
||||
if os.Getenv("GO_WANT_SCRIPTCRAWLER_SIMPLE") == "1" {
|
||||
event := map[string]any{
|
||||
"title": "Simple Protocol Video",
|
||||
"media_url": os.Getenv("GO_SCRIPTCRAWLER_MEDIA_URL"),
|
||||
}
|
||||
_ = json.NewEncoder(os.Stdout).Encode(event)
|
||||
os.Exit(0)
|
||||
}
|
||||
if os.Getenv("GO_WANT_SCRIPTCRAWLER_HLS") == "1" {
|
||||
event := Event{
|
||||
Type: "item",
|
||||
Item: Item{
|
||||
SourceID: "hls-source",
|
||||
Title: "HLS Protocol Video",
|
||||
Author: "helper",
|
||||
Media: MediaRef{
|
||||
URL: "https://media.example.test/video.m3u8",
|
||||
Headers: map[string]string{
|
||||
"Referer": "https://example.test/",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
_ = json.NewEncoder(os.Stdout).Encode(event)
|
||||
os.Exit(0)
|
||||
}
|
||||
if os.Getenv("GO_WANT_SCRIPTCRAWLER_DUP_UNIQUE") == "1" {
|
||||
duplicateFile := filepath.Join(job.OutputDir, "duplicate.mp4")
|
||||
if err := os.WriteFile(duplicateFile, []byte(scriptCrawlerDuplicateBytes), 0o644); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(2)
|
||||
}
|
||||
uniqueFile := filepath.Join(job.OutputDir, "unique.mp4")
|
||||
if err := os.WriteFile(uniqueFile, []byte(scriptCrawlerUniqueBytes), 0o644); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(2)
|
||||
}
|
||||
for _, event := range []Event{
|
||||
{
|
||||
Type: "item",
|
||||
Item: Item{
|
||||
SourceID: "dup-source",
|
||||
Title: "Duplicate Candidate",
|
||||
Author: "helper",
|
||||
Media: MediaRef{LocalFile: duplicateFile},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: "item",
|
||||
Item: Item{
|
||||
SourceID: "unique-source",
|
||||
Title: "Unique Candidate",
|
||||
Author: "helper",
|
||||
Media: MediaRef{LocalFile: uniqueFile},
|
||||
},
|
||||
},
|
||||
} {
|
||||
_ = json.NewEncoder(os.Stdout).Encode(event)
|
||||
}
|
||||
os.Exit(0)
|
||||
}
|
||||
localFile := filepath.Join(job.OutputDir, "helper.mp4")
|
||||
if err := os.WriteFile(localFile, []byte("helper-video"), 0o644); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(2)
|
||||
}
|
||||
event := Event{
|
||||
Type: "item",
|
||||
Item: Item{
|
||||
SourceID: "abc-123",
|
||||
Title: "Imported From Helper",
|
||||
Author: "helper",
|
||||
Media: MediaRef{LocalFile: localFile},
|
||||
},
|
||||
}
|
||||
_ = json.NewEncoder(os.Stdout).Encode(event)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
func hasString(values []string, want string) bool {
|
||||
for _, value := range values {
|
||||
if value == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -0,0 +1,213 @@
|
||||
// Package scriptcrawler provides a generic local drive for script-based
|
||||
// crawlers. A crawler script discovers videos; the Go runner downloads them
|
||||
// into this drive and the existing preview/fingerprint workers consume them
|
||||
// through the normal drives.Drive interface.
|
||||
package scriptcrawler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
const Kind = "scriptcrawler"
|
||||
|
||||
type Config struct {
|
||||
ID string
|
||||
RootDir string
|
||||
}
|
||||
|
||||
type Driver struct {
|
||||
id string
|
||||
rootDir string
|
||||
}
|
||||
|
||||
func New(c Config) *Driver {
|
||||
return &Driver{id: c.ID, rootDir: c.RootDir}
|
||||
}
|
||||
|
||||
func (d *Driver) Kind() string { return Kind }
|
||||
|
||||
func (d *Driver) ID() string { return d.id }
|
||||
|
||||
func (d *Driver) RootID() string { return "/" }
|
||||
|
||||
func (d *Driver) Init(context.Context) error {
|
||||
if strings.TrimSpace(d.id) == "" {
|
||||
return errors.New("scriptcrawler: empty drive id")
|
||||
}
|
||||
if strings.TrimSpace(d.rootDir) == "" {
|
||||
return errors.New("scriptcrawler: empty root dir")
|
||||
}
|
||||
for _, sub := range []string{"videos", "thumbs", "output", ".crawl"} {
|
||||
if err := os.MkdirAll(filepath.Join(d.rootDir, sub), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) RootDir() string { return d.rootDir }
|
||||
|
||||
func (d *Driver) VideosDir() string { return filepath.Join(d.rootDir, "videos") }
|
||||
|
||||
func (d *Driver) ThumbsDir() string { return filepath.Join(d.rootDir, "thumbs") }
|
||||
|
||||
func (d *Driver) OutputDir() string { return filepath.Join(d.rootDir, "output") }
|
||||
|
||||
func (d *Driver) CrawlDir() string { return filepath.Join(d.rootDir, ".crawl") }
|
||||
|
||||
func (d *Driver) VideoPath(fileID string) (string, error) {
|
||||
return safeJoin(d.VideosDir(), fileID)
|
||||
}
|
||||
|
||||
func (d *Driver) ThumbPath(fileID string) (string, error) {
|
||||
return safeJoin(d.ThumbsDir(), fileID)
|
||||
}
|
||||
|
||||
func (d *Driver) OutputPath(fileName string) (string, error) {
|
||||
return safeJoin(d.OutputDir(), fileName)
|
||||
}
|
||||
|
||||
func (d *Driver) List(context.Context, string) ([]drives.Entry, error) {
|
||||
entries, err := os.ReadDir(d.VideosDir())
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
out := make([]drives.Entry, 0, len(entries))
|
||||
for _, e := range entries {
|
||||
if e.IsDir() {
|
||||
continue
|
||||
}
|
||||
info, err := e.Info()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
out = append(out, drives.Entry{
|
||||
ID: e.Name(),
|
||||
Name: e.Name(),
|
||||
Size: info.Size(),
|
||||
IsDir: false,
|
||||
ModTime: info.ModTime(),
|
||||
})
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (d *Driver) Stat(ctx context.Context, fileID string) (*drives.Entry, error) {
|
||||
path, err := d.VideoPath(fileID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &drives.Entry{
|
||||
ID: fileID,
|
||||
Name: fileID,
|
||||
Size: info.Size(),
|
||||
IsDir: info.IsDir(),
|
||||
ModTime: info.ModTime(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (d *Driver) StreamURL(ctx context.Context, fileID string) (*drives.StreamLink, error) {
|
||||
path, err := d.VideoPath(fileID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if info.IsDir() || info.Size() == 0 {
|
||||
return nil, os.ErrNotExist
|
||||
}
|
||||
return &drives.StreamLink{
|
||||
URL: path,
|
||||
Expires: time.Now().Add(24 * time.Hour),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (d *Driver) Upload(context.Context, string, string, io.Reader, int64) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
|
||||
func (d *Driver) EnsureDir(context.Context, string) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
videoPath, err := d.VideoPath(fileID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
info, err := os.Stat(videoPath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
removeThumbCandidates(d.ThumbPath, strings.TrimSuffix(fileID, filepath.Ext(fileID)))
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if info.IsDir() {
|
||||
return errors.New("scriptcrawler: refusing to remove directory")
|
||||
}
|
||||
if err := os.Remove(videoPath); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
removeThumbCandidates(d.ThumbPath, strings.TrimSuffix(fileID, filepath.Ext(fileID)))
|
||||
return nil
|
||||
}
|
||||
|
||||
func removeThumbCandidates(pathFor func(string) (string, error), stem string) {
|
||||
stem = strings.TrimSpace(stem)
|
||||
if stem == "" {
|
||||
return
|
||||
}
|
||||
for _, ext := range []string{".jpg", ".jpeg", ".png", ".webp"} {
|
||||
path, err := pathFor(stem + ext)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
_ = os.Remove(path)
|
||||
}
|
||||
}
|
||||
|
||||
func safeJoin(root, fileID string) (string, error) {
|
||||
id := strings.TrimSpace(fileID)
|
||||
if id == "" || filepath.Base(id) != id {
|
||||
return "", errors.New("scriptcrawler: invalid file id")
|
||||
}
|
||||
if strings.TrimSpace(root) == "" {
|
||||
return "", errors.New("scriptcrawler: empty root")
|
||||
}
|
||||
rootAbs, err := filepath.Abs(root)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
pathAbs, err := filepath.Abs(filepath.Join(rootAbs, id))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if pathAbs != rootAbs && !strings.HasPrefix(pathAbs, rootAbs+string(os.PathSeparator)) {
|
||||
return "", errors.New("scriptcrawler: file id escapes root")
|
||||
}
|
||||
return pathAbs, nil
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*Driver)(nil)
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
@@ -0,0 +1,405 @@
|
||||
package scriptcrawler
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
|
||||
// DryRun 在不入库的前提下试跑一个爬虫脚本:临时目录里生成 job.json,
|
||||
// 启动脚本进程,拿到第一条(或前 MaxItems 条)item 事件后立即停止,
|
||||
// 再对视频直链做一次小范围探测,验证脚本"能不能爬取到视频"。
|
||||
// 用于后台导入脚本后的"测试脚本"按钮。
|
||||
|
||||
const (
|
||||
defaultDryRunTimeout = 2 * time.Minute
|
||||
dryRunLogTailLines = 60
|
||||
dryRunMediaProbeLimit = 20 * time.Second
|
||||
dryRunStopGrace = 100 * time.Millisecond
|
||||
)
|
||||
|
||||
type DryRunConfig struct {
|
||||
PythonPath string
|
||||
ScriptPath string
|
||||
ProxyURL string
|
||||
ConfigJSON string
|
||||
// MaxItems 收到多少条 item 后停止脚本,默认 1。
|
||||
MaxItems int
|
||||
// Timeout 整个试跑的硬上限,默认 2 分钟。
|
||||
Timeout time.Duration
|
||||
// SkipMediaProbe 跳过视频直链可达性探测(单测注入用)。
|
||||
SkipMediaProbe bool
|
||||
HTTPClient *http.Client
|
||||
}
|
||||
|
||||
type DryRunItem struct {
|
||||
Title string `json:"title"`
|
||||
SourceID string `json:"sourceId,omitempty"`
|
||||
MediaURL string `json:"mediaUrl,omitempty"`
|
||||
MediaLocalFile string `json:"mediaLocalFile,omitempty"`
|
||||
ThumbnailURL string `json:"thumbnailUrl,omitempty"`
|
||||
DetailURL string `json:"detailUrl,omitempty"`
|
||||
}
|
||||
|
||||
type DryRunMediaCheck struct {
|
||||
OK bool `json:"ok"`
|
||||
Status int `json:"status,omitempty"`
|
||||
ContentType string `json:"contentType,omitempty"`
|
||||
ContentLength int64 `json:"contentLengthBytes,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
type DryRunResult struct {
|
||||
OK bool `json:"ok"`
|
||||
Items []DryRunItem `json:"items"`
|
||||
MediaCheck *DryRunMediaCheck `json:"mediaCheck,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Log []string `json:"log,omitempty"`
|
||||
DurationMs int64 `json:"durationMs"`
|
||||
}
|
||||
|
||||
type dryRunLogTail struct {
|
||||
mu sync.Mutex
|
||||
lines []string
|
||||
partial string
|
||||
}
|
||||
|
||||
func newDryRunLogTail() *dryRunLogTail {
|
||||
return &dryRunLogTail{lines: make([]string, 0, dryRunLogTailLines)}
|
||||
}
|
||||
|
||||
func (t *dryRunLogTail) Write(p []byte) (int, error) {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
|
||||
chunk := strings.ReplaceAll(string(p), "\r\n", "\n")
|
||||
parts := strings.Split(t.partial+chunk, "\n")
|
||||
t.partial = parts[len(parts)-1]
|
||||
for _, line := range parts[:len(parts)-1] {
|
||||
t.appendLocked(line)
|
||||
}
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
func (t *dryRunLogTail) snapshot() []string {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
|
||||
lines := append([]string{}, t.lines...)
|
||||
if partial := strings.TrimSpace(t.partial); partial != "" {
|
||||
lines = appendDryRunLogLine(lines, partial)
|
||||
}
|
||||
return lines
|
||||
}
|
||||
|
||||
func (t *dryRunLogTail) appendLocked(line string) {
|
||||
t.lines = appendDryRunLogLine(t.lines, line)
|
||||
}
|
||||
|
||||
func appendDryRunLogLine(lines []string, line string) []string {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
return lines
|
||||
}
|
||||
if len(lines) >= dryRunLogTailLines {
|
||||
lines = lines[1:]
|
||||
}
|
||||
return append(lines, line)
|
||||
}
|
||||
|
||||
func DryRun(ctx context.Context, cfg DryRunConfig) *DryRunResult {
|
||||
started := time.Now()
|
||||
result := &DryRunResult{Items: []DryRunItem{}}
|
||||
defer func() { result.DurationMs = time.Since(started).Milliseconds() }()
|
||||
|
||||
scriptPath := strings.TrimSpace(cfg.ScriptPath)
|
||||
if scriptPath == "" {
|
||||
result.Error = "脚本路径为空,请先导入脚本"
|
||||
return result
|
||||
}
|
||||
if _, err := os.Stat(scriptPath); err != nil {
|
||||
result.Error = fmt.Sprintf("脚本不存在: %v", err)
|
||||
return result
|
||||
}
|
||||
pythonPath := strings.TrimSpace(cfg.PythonPath)
|
||||
if pythonPath == "" {
|
||||
pythonPath = "python3"
|
||||
}
|
||||
maxItems := cfg.MaxItems
|
||||
if maxItems <= 0 {
|
||||
maxItems = 1
|
||||
}
|
||||
timeout := cfg.Timeout
|
||||
if timeout <= 0 {
|
||||
timeout = defaultDryRunTimeout
|
||||
}
|
||||
|
||||
tmpDir, err := os.MkdirTemp("", "crawler-dryrun-")
|
||||
if err != nil {
|
||||
result.Error = fmt.Sprintf("创建临时目录失败: %v", err)
|
||||
return result
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
outputDir := filepath.Join(tmpDir, "output")
|
||||
if err := os.MkdirAll(outputDir, 0o755); err != nil {
|
||||
result.Error = fmt.Sprintf("创建输出目录失败: %v", err)
|
||||
return result
|
||||
}
|
||||
seenPath := filepath.Join(tmpDir, "seen.txt")
|
||||
if err := os.WriteFile(seenPath, nil, 0o644); err != nil {
|
||||
result.Error = fmt.Sprintf("写入 seen 文件失败: %v", err)
|
||||
return result
|
||||
}
|
||||
|
||||
configJSON := json.RawMessage([]byte("{}"))
|
||||
if raw := strings.TrimSpace(cfg.ConfigJSON); raw != "" {
|
||||
if !json.Valid([]byte(raw)) {
|
||||
result.Error = "自定义配置必须是合法 JSON"
|
||||
return result
|
||||
}
|
||||
configJSON = json.RawMessage(raw)
|
||||
}
|
||||
job := Job{
|
||||
Protocol: "crawler.v1",
|
||||
Mode: "crawl",
|
||||
RunID: "dryrun-" + started.UTC().Format("20060102T150405Z"),
|
||||
CrawlerID: "dryrun",
|
||||
TargetNew: maxItems,
|
||||
SeenSourceIDsFile: seenPath,
|
||||
OutputDir: outputDir,
|
||||
Config: configJSON,
|
||||
Network: JobNetwork{ProxyURL: strings.TrimSpace(cfg.ProxyURL)},
|
||||
}
|
||||
jobPath := filepath.Join(tmpDir, "job.json")
|
||||
jobData, err := json.MarshalIndent(job, "", " ")
|
||||
if err != nil {
|
||||
result.Error = fmt.Sprintf("生成 job 文件失败: %v", err)
|
||||
return result
|
||||
}
|
||||
if err := os.WriteFile(jobPath, jobData, 0o600); err != nil {
|
||||
result.Error = fmt.Sprintf("写入 job 文件失败: %v", err)
|
||||
return result
|
||||
}
|
||||
|
||||
runCtx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
|
||||
cmd := exec.CommandContext(runCtx, pythonPath, scriptPath, "--job", jobPath)
|
||||
cmd.Dir = filepath.Dir(scriptPath)
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
|
||||
cmd.Cancel = func() error {
|
||||
return killDryRunProcess(cmd)
|
||||
}
|
||||
// 超时或提前 kill 后,脚本派生的子进程可能仍持有 stdout/stderr 管道;
|
||||
// WaitDelay 强制在宽限期后关闭管道,避免读取端永久阻塞。
|
||||
cmd.WaitDelay = 3 * time.Second
|
||||
if proxyURL := strings.TrimSpace(cfg.ProxyURL); proxyURL != "" {
|
||||
cmd.Env = append(os.Environ(),
|
||||
"HTTP_PROXY="+proxyURL,
|
||||
"HTTPS_PROXY="+proxyURL,
|
||||
"http_proxy="+proxyURL,
|
||||
"https_proxy="+proxyURL,
|
||||
"NO_PROXY=",
|
||||
"no_proxy=",
|
||||
)
|
||||
}
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
result.Error = fmt.Sprintf("启动脚本失败: %v", err)
|
||||
return result
|
||||
}
|
||||
logTail := newDryRunLogTail()
|
||||
cmd.Stderr = logTail
|
||||
if err := cmd.Start(); err != nil {
|
||||
_ = stdout.Close()
|
||||
result.Error = fmt.Sprintf("启动脚本失败: %v", err)
|
||||
return result
|
||||
}
|
||||
|
||||
items := []DryRunItem{}
|
||||
var firstMediaHeaders map[string]string
|
||||
parseFailures := 0
|
||||
scanner := bufio.NewScanner(stdout)
|
||||
scanner.Buffer(make([]byte, 64*1024), 4*1024*1024)
|
||||
for scanner.Scan() {
|
||||
if runCtx.Err() != nil {
|
||||
break
|
||||
}
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
var event Event
|
||||
if err := json.Unmarshal([]byte(line), &event); err != nil {
|
||||
parseFailures++
|
||||
continue
|
||||
}
|
||||
eventType := strings.ToLower(strings.TrimSpace(event.Type))
|
||||
item := event.normalizedItem()
|
||||
if eventType == "" && item.hasPayload() {
|
||||
eventType = "item"
|
||||
}
|
||||
if eventType != "item" {
|
||||
continue
|
||||
}
|
||||
normalized, _, err := normalizeItemForImport(item)
|
||||
if err != nil {
|
||||
result.Error = fmt.Sprintf("item 字段不完整: %v", err)
|
||||
continue
|
||||
}
|
||||
mediaURL := strings.TrimSpace(normalized.Media.URL)
|
||||
if len(items) == 0 {
|
||||
firstMediaHeaders = normalized.Media.Headers
|
||||
}
|
||||
items = append(items, DryRunItem{
|
||||
Title: strings.TrimSpace(normalized.Title),
|
||||
SourceID: strings.TrimSpace(item.SourceID),
|
||||
MediaURL: mediaURL,
|
||||
MediaLocalFile: strings.TrimSpace(normalized.Media.LocalFile),
|
||||
ThumbnailURL: strings.TrimSpace(normalized.Thumbnail.URL),
|
||||
DetailURL: strings.TrimSpace(normalized.DetailURL),
|
||||
})
|
||||
if len(items) >= maxItems {
|
||||
break
|
||||
}
|
||||
}
|
||||
// 拿够了就停掉脚本,避免它继续翻页。给已经自然结束的脚本一个很短
|
||||
// 的宽限期,让 stderr 日志先被管道读完,避免 dry-run 回显偶发为空。
|
||||
waitDone := make(chan struct{})
|
||||
go func() {
|
||||
_ = cmd.Wait()
|
||||
close(waitDone)
|
||||
}()
|
||||
select {
|
||||
case <-waitDone:
|
||||
case <-time.After(dryRunStopGrace):
|
||||
_ = killDryRunProcess(cmd)
|
||||
<-waitDone
|
||||
}
|
||||
|
||||
result.Log = logTail.snapshot()
|
||||
result.Items = items
|
||||
|
||||
if len(items) == 0 {
|
||||
if result.Error == "" {
|
||||
switch {
|
||||
case runCtx.Err() != nil && ctx.Err() == nil:
|
||||
result.Error = fmt.Sprintf("测试超时(%s),脚本没有输出任何视频", timeout)
|
||||
case parseFailures > 0:
|
||||
result.Error = "脚本 stdout 不是合法的 crawler.v1 JSON Lines(日志应输出到 stderr)"
|
||||
default:
|
||||
result.Error = "脚本退出但没有输出任何视频"
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
result.Error = ""
|
||||
|
||||
first := items[0]
|
||||
switch {
|
||||
case cfg.SkipMediaProbe:
|
||||
result.OK = true
|
||||
case first.MediaLocalFile != "":
|
||||
// 脚本自己下载到 output_dir 的模式:试跑用的是临时目录,
|
||||
// 文件已随目录清理,能输出合法 local_file 即视为通过。
|
||||
result.OK = true
|
||||
default:
|
||||
check := probeMediaURL(ctx, cfg, first, firstMediaHeaders)
|
||||
result.MediaCheck = check
|
||||
result.OK = check.OK
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func killDryRunProcess(cmd *exec.Cmd) error {
|
||||
if cmd == nil || cmd.Process == nil {
|
||||
return nil
|
||||
}
|
||||
if err := syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL); err != nil {
|
||||
if err == syscall.ESRCH {
|
||||
return nil
|
||||
}
|
||||
return cmd.Process.Kill()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// probeMediaURL 对视频直链发一个 Range: bytes=0-0 的小请求,
|
||||
// 验证直链可达(带上脚本给的防盗链 headers 和代理)。
|
||||
func probeMediaURL(ctx context.Context, cfg DryRunConfig, item DryRunItem, mediaHeaders map[string]string) *DryRunMediaCheck {
|
||||
check := &DryRunMediaCheck{}
|
||||
if item.MediaURL == "" {
|
||||
check.Error = "item 没有视频直链"
|
||||
return check
|
||||
}
|
||||
|
||||
client := cfg.HTTPClient
|
||||
if client == nil {
|
||||
transport := &http.Transport{
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
ResponseHeaderTimeout: dryRunMediaProbeLimit,
|
||||
}
|
||||
if err := configureExplicitProxy(transport, cfg.ProxyURL); err != nil {
|
||||
check.Error = fmt.Sprintf("代理配置无效: %v", err)
|
||||
return check
|
||||
}
|
||||
client = &http.Client{Transport: transport}
|
||||
}
|
||||
|
||||
probeCtx, cancel := context.WithTimeout(ctx, dryRunMediaProbeLimit)
|
||||
defer cancel()
|
||||
req, err := http.NewRequestWithContext(probeCtx, http.MethodGet, item.MediaURL, nil)
|
||||
if err != nil {
|
||||
check.Error = fmt.Sprintf("视频直链无效: %v", err)
|
||||
return check
|
||||
}
|
||||
req.Header.Set("User-Agent", defaultUserAgent)
|
||||
req.Header.Set("Range", "bytes=0-0")
|
||||
if item.DetailURL != "" {
|
||||
req.Header.Set("Referer", item.DetailURL)
|
||||
}
|
||||
for k, v := range mediaHeaders {
|
||||
k = strings.TrimSpace(k)
|
||||
if k == "" {
|
||||
continue
|
||||
}
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
check.Error = fmt.Sprintf("视频直链请求失败: %v", err)
|
||||
return check
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
check.Status = resp.StatusCode
|
||||
check.ContentType = resp.Header.Get("Content-Type")
|
||||
if cr := resp.Header.Get("Content-Range"); cr != "" {
|
||||
// Content-Range: bytes 0-0/12345 → 取总大小
|
||||
if idx := strings.LastIndex(cr, "/"); idx >= 0 {
|
||||
var total int64
|
||||
if _, err := fmt.Sscanf(cr[idx+1:], "%d", &total); err == nil {
|
||||
check.ContentLength = total
|
||||
}
|
||||
}
|
||||
}
|
||||
if check.ContentLength == 0 && resp.StatusCode == http.StatusOK {
|
||||
check.ContentLength = resp.ContentLength
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusPartialContent {
|
||||
check.Error = fmt.Sprintf("视频直链返回 HTTP %d", resp.StatusCode)
|
||||
return check
|
||||
}
|
||||
check.OK = true
|
||||
return check
|
||||
}
|
||||
@@ -0,0 +1,176 @@
|
||||
package scriptcrawler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func writeDryRunScript(t *testing.T, body string) string {
|
||||
t.Helper()
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "crawler.sh")
|
||||
if err := os.WriteFile(path, []byte("#!/bin/sh\n"+body), 0o755); err != nil {
|
||||
t.Fatalf("write script: %v", err)
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
func TestDryRunCollectsFirstItem(t *testing.T) {
|
||||
script := writeDryRunScript(t, `
|
||||
echo '[log] fetching list page' >&2
|
||||
echo '{"type":"item","item":{"title":"Test Video","media_url":"https://cdn.example.test/v.mp4","source_id":"123","thumbnail_url":"https://cdn.example.test/t.jpg"}}'
|
||||
echo '{"type":"done","stats":{"emitted":1}}'
|
||||
`)
|
||||
result := DryRun(context.Background(), DryRunConfig{
|
||||
PythonPath: "/bin/sh",
|
||||
ScriptPath: script,
|
||||
SkipMediaProbe: true,
|
||||
})
|
||||
if !result.OK {
|
||||
t.Fatalf("ok = false, error = %q, log = %v", result.Error, result.Log)
|
||||
}
|
||||
if len(result.Items) != 1 {
|
||||
t.Fatalf("items = %d, want 1", len(result.Items))
|
||||
}
|
||||
item := result.Items[0]
|
||||
if item.Title != "Test Video" || item.MediaURL != "https://cdn.example.test/v.mp4" || item.SourceID != "123" {
|
||||
t.Fatalf("item = %+v", item)
|
||||
}
|
||||
if len(result.Log) == 0 || !strings.Contains(result.Log[0], "fetching list page") {
|
||||
t.Fatalf("log tail = %v, want stderr captured", result.Log)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDryRunCapturesStderrWhenStoppingAfterFirstItem(t *testing.T) {
|
||||
script := writeDryRunScript(t, `
|
||||
echo '[log] first item ready' >&2
|
||||
echo '{"type":"item","item":{"title":"Early Stop Video","media_url":"https://cdn.example.test/v.mp4","source_id":"early-stop"}}'
|
||||
sleep 30
|
||||
`)
|
||||
start := time.Now()
|
||||
result := DryRun(context.Background(), DryRunConfig{
|
||||
PythonPath: "/bin/sh",
|
||||
ScriptPath: script,
|
||||
SkipMediaProbe: true,
|
||||
})
|
||||
if !result.OK {
|
||||
t.Fatalf("ok = false, error = %q, log = %v", result.Error, result.Log)
|
||||
}
|
||||
if elapsed := time.Since(start); elapsed > 5*time.Second {
|
||||
t.Fatalf("dry run took %s, script was not stopped after first item", elapsed)
|
||||
}
|
||||
if len(result.Log) == 0 || !strings.Contains(result.Log[0], "first item ready") {
|
||||
t.Fatalf("log tail = %v, want stderr captured before early stop", result.Log)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDryRunProbesMediaURL(t *testing.T) {
|
||||
var gotRange, gotReferer string
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
gotRange = r.Header.Get("Range")
|
||||
gotReferer = r.Header.Get("Referer")
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
w.Header().Set("Content-Range", "bytes 0-0/4096")
|
||||
w.WriteHeader(http.StatusPartialContent)
|
||||
_, _ = w.Write([]byte("x"))
|
||||
}))
|
||||
t.Cleanup(srv.Close)
|
||||
|
||||
script := writeDryRunScript(t, fmt.Sprintf(
|
||||
`echo '{"type":"item","title":"Probe Video","media_url":"%s/v.mp4","detail_url":"https://example.test/view"}'`,
|
||||
srv.URL,
|
||||
))
|
||||
result := DryRun(context.Background(), DryRunConfig{
|
||||
PythonPath: "/bin/sh",
|
||||
ScriptPath: script,
|
||||
})
|
||||
if !result.OK {
|
||||
t.Fatalf("ok = false, error = %q, mediaCheck = %+v", result.Error, result.MediaCheck)
|
||||
}
|
||||
if result.MediaCheck == nil || !result.MediaCheck.OK {
|
||||
t.Fatalf("mediaCheck = %+v, want ok", result.MediaCheck)
|
||||
}
|
||||
if result.MediaCheck.Status != http.StatusPartialContent || result.MediaCheck.ContentLength != 4096 {
|
||||
t.Fatalf("mediaCheck = %+v, want 206 with total 4096", result.MediaCheck)
|
||||
}
|
||||
if gotRange != "bytes=0-0" || gotReferer != "https://example.test/view" {
|
||||
t.Fatalf("probe headers range=%q referer=%q", gotRange, gotReferer)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDryRunReportsBrokenMediaURL(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
http.Error(w, "forbidden", http.StatusForbidden)
|
||||
}))
|
||||
t.Cleanup(srv.Close)
|
||||
|
||||
script := writeDryRunScript(t, fmt.Sprintf(
|
||||
`echo '{"type":"item","title":"Dead Link","media_url":"%s/v.mp4"}'`,
|
||||
srv.URL,
|
||||
))
|
||||
result := DryRun(context.Background(), DryRunConfig{
|
||||
PythonPath: "/bin/sh",
|
||||
ScriptPath: script,
|
||||
})
|
||||
if result.OK {
|
||||
t.Fatal("ok = true, want false for HTTP 403 media url")
|
||||
}
|
||||
if result.MediaCheck == nil || result.MediaCheck.OK || result.MediaCheck.Status != http.StatusForbidden {
|
||||
t.Fatalf("mediaCheck = %+v, want failed 403", result.MediaCheck)
|
||||
}
|
||||
if len(result.Items) != 1 {
|
||||
t.Fatalf("items = %d, want item still returned for debugging", len(result.Items))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDryRunRejectsNonJSONStdout(t *testing.T) {
|
||||
script := writeDryRunScript(t, `echo 'plain text progress output'`)
|
||||
result := DryRun(context.Background(), DryRunConfig{
|
||||
PythonPath: "/bin/sh",
|
||||
ScriptPath: script,
|
||||
SkipMediaProbe: true,
|
||||
})
|
||||
if result.OK {
|
||||
t.Fatal("ok = true, want false for non-JSON stdout")
|
||||
}
|
||||
if !strings.Contains(result.Error, "JSON Lines") {
|
||||
t.Fatalf("error = %q, want JSON Lines hint", result.Error)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDryRunTimesOut(t *testing.T) {
|
||||
script := writeDryRunScript(t, `sleep 30`)
|
||||
start := time.Now()
|
||||
result := DryRun(context.Background(), DryRunConfig{
|
||||
PythonPath: "/bin/sh",
|
||||
ScriptPath: script,
|
||||
Timeout: 2 * time.Second,
|
||||
SkipMediaProbe: true,
|
||||
})
|
||||
if result.OK {
|
||||
t.Fatal("ok = true, want false on timeout")
|
||||
}
|
||||
if !strings.Contains(result.Error, "超时") {
|
||||
t.Fatalf("error = %q, want timeout message", result.Error)
|
||||
}
|
||||
if elapsed := time.Since(start); elapsed > 10*time.Second {
|
||||
t.Fatalf("dry run took %s, script was not killed", elapsed)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDryRunMissingScript(t *testing.T) {
|
||||
result := DryRun(context.Background(), DryRunConfig{
|
||||
PythonPath: "/bin/sh",
|
||||
ScriptPath: filepath.Join(t.TempDir(), "missing.py"),
|
||||
})
|
||||
if result.OK || result.Error == "" {
|
||||
t.Fatalf("result = %+v, want error for missing script", result)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
package scriptcrawler
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const maxCrawlerNameRunes = 80
|
||||
|
||||
type Metadata struct {
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
func ReadMetadata(scriptPath string) (Metadata, error) {
|
||||
scriptPath = strings.TrimSpace(scriptPath)
|
||||
if scriptPath == "" {
|
||||
return Metadata{}, errors.New("脚本路径为空")
|
||||
}
|
||||
if filepath.Ext(scriptPath) != ".py" {
|
||||
return Metadata{}, errors.New("目前只支持 .py 爬虫脚本")
|
||||
}
|
||||
data, err := os.ReadFile(scriptPath)
|
||||
if err != nil {
|
||||
return Metadata{}, fmt.Errorf("读取脚本失败: %w", err)
|
||||
}
|
||||
return ExtractMetadata(string(data))
|
||||
}
|
||||
|
||||
func ExtractMetadata(source string) (Metadata, error) {
|
||||
for _, line := range strings.Split(source, "\n") {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
if trimmed == "" || strings.HasPrefix(trimmed, "#") {
|
||||
continue
|
||||
}
|
||||
if !strings.HasPrefix(trimmed, "CRAWLER_NAME") {
|
||||
continue
|
||||
}
|
||||
left, right, ok := strings.Cut(trimmed, "=")
|
||||
if !ok || strings.TrimSpace(left) != "CRAWLER_NAME" {
|
||||
continue
|
||||
}
|
||||
name, ok := parsePythonStringLiteral(right)
|
||||
if !ok {
|
||||
return Metadata{}, errors.New(`CRAWLER_NAME 必须是字符串字面量,例如 CRAWLER_NAME = "示例爬虫"`)
|
||||
}
|
||||
name = strings.TrimSpace(name)
|
||||
if name == "" {
|
||||
return Metadata{}, errors.New("CRAWLER_NAME 不能为空")
|
||||
}
|
||||
if len([]rune(name)) > maxCrawlerNameRunes {
|
||||
return Metadata{}, fmt.Errorf("CRAWLER_NAME 不能超过 %d 个字符", maxCrawlerNameRunes)
|
||||
}
|
||||
return Metadata{Name: name}, nil
|
||||
}
|
||||
return Metadata{}, errors.New(`脚本必须声明 CRAWLER_NAME,例如 CRAWLER_NAME = "示例爬虫"`)
|
||||
}
|
||||
|
||||
func parsePythonStringLiteral(raw string) (string, bool) {
|
||||
s := strings.TrimSpace(raw)
|
||||
if s == "" {
|
||||
return "", false
|
||||
}
|
||||
rawString := false
|
||||
for len(s) > 0 {
|
||||
switch s[0] {
|
||||
case 'r', 'R':
|
||||
rawString = true
|
||||
s = strings.TrimSpace(s[1:])
|
||||
case 'u', 'U', 'b', 'B':
|
||||
s = strings.TrimSpace(s[1:])
|
||||
default:
|
||||
goto parseQuote
|
||||
}
|
||||
}
|
||||
|
||||
parseQuote:
|
||||
if len(s) < 2 || (s[0] != '"' && s[0] != '\'') {
|
||||
return "", false
|
||||
}
|
||||
quote := s[0]
|
||||
var b strings.Builder
|
||||
escaped := false
|
||||
for i := 1; i < len(s); i++ {
|
||||
ch := s[i]
|
||||
if escaped {
|
||||
switch {
|
||||
case rawString:
|
||||
b.WriteByte('\\')
|
||||
b.WriteByte(ch)
|
||||
case ch == 'n':
|
||||
b.WriteByte('\n')
|
||||
case ch == 'r':
|
||||
b.WriteByte('\r')
|
||||
case ch == 't':
|
||||
b.WriteByte('\t')
|
||||
case ch == '\\' || ch == quote || ch == '"' || ch == '\'':
|
||||
b.WriteByte(ch)
|
||||
default:
|
||||
b.WriteByte(ch)
|
||||
}
|
||||
escaped = false
|
||||
continue
|
||||
}
|
||||
if ch == '\\' {
|
||||
escaped = true
|
||||
continue
|
||||
}
|
||||
if ch == quote {
|
||||
return b.String(), true
|
||||
}
|
||||
b.WriteByte(ch)
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
package scriptcrawler
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestExtractMetadataReadsCrawlerName(t *testing.T) {
|
||||
meta, err := ExtractMetadata(`
|
||||
# comment
|
||||
CRAWLER_NAME = "示例爬虫"
|
||||
`)
|
||||
if err != nil {
|
||||
t.Fatalf("extract metadata: %v", err)
|
||||
}
|
||||
if meta.Name != "示例爬虫" {
|
||||
t.Fatalf("name = %q", meta.Name)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractMetadataRejectsMissingCrawlerName(t *testing.T) {
|
||||
_, err := ExtractMetadata(`print("hello")`)
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "CRAWLER_NAME") {
|
||||
t.Fatalf("error = %v, want CRAWLER_NAME guidance", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractMetadataRejectsEmptyCrawlerName(t *testing.T) {
|
||||
_, err := ExtractMetadata(`CRAWLER_NAME = " "`)
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "不能为空") {
|
||||
t.Fatalf("error = %v, want empty-name error", err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
package scriptcrawler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/mediaasset"
|
||||
"github.com/video-site/backend/internal/mediasim"
|
||||
)
|
||||
|
||||
const (
|
||||
nearDuplicateTitleThreshold = 0.90
|
||||
nearDuplicateSSIMThreshold = 0.95
|
||||
nearDuplicateDurationToleranceSeconds = 2
|
||||
nearDuplicateCandidateLimit = 200
|
||||
)
|
||||
|
||||
type nearDuplicateMatch struct {
|
||||
video *catalog.Video
|
||||
titleSimilarity float64
|
||||
thumbnailSSIM float64
|
||||
}
|
||||
|
||||
func (c *Crawler) findNearDuplicateVideo(ctx context.Context, source *catalog.Video, sourceThumbPath string) (*nearDuplicateMatch, error) {
|
||||
if c == nil || c.cfg.Catalog == nil || source == nil {
|
||||
return nil, nil
|
||||
}
|
||||
sourceThumbPath = strings.TrimSpace(sourceThumbPath)
|
||||
commonThumbDir := strings.TrimSpace(c.cfg.CommonThumbDir)
|
||||
if sourceThumbPath == "" || commonThumbDir == "" || strings.TrimSpace(source.Title) == "" || source.DurationSeconds <= 0 {
|
||||
return nil, nil
|
||||
}
|
||||
if _, err := os.Stat(sourceThumbPath); err != nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
candidates, err := c.cfg.Catalog.ListNearDuplicateVideoCandidates(ctx, source, nearDuplicateDurationToleranceSeconds, nearDuplicateCandidateLimit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, candidate := range candidates {
|
||||
if candidate == nil || candidate.ID == source.ID {
|
||||
continue
|
||||
}
|
||||
titleScore := mediasim.TitleSimilarity(source.Title, candidate.Title)
|
||||
if titleScore < nearDuplicateTitleThreshold {
|
||||
continue
|
||||
}
|
||||
candidateThumbPath := mediaasset.ThumbnailPathInDir(commonThumbDir, candidate.ID)
|
||||
if _, err := os.Stat(candidateThumbPath); err != nil {
|
||||
continue
|
||||
}
|
||||
ssimScore, err := mediasim.ImageSSIM(sourceThumbPath, candidateThumbPath)
|
||||
if err != nil {
|
||||
log.Printf("[scriptcrawler] drive=%s source_id=%s candidate=%s thumbnail ssim failed: %v", c.cfg.Driver.ID(), source.ID, candidate.ID, err)
|
||||
continue
|
||||
}
|
||||
if ssimScore >= nearDuplicateSSIMThreshold {
|
||||
return &nearDuplicateMatch{
|
||||
video: candidate,
|
||||
titleSimilarity: titleScore,
|
||||
thumbnailSSIM: ssimScore,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,787 +0,0 @@
|
||||
package spider91
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
)
|
||||
|
||||
// TestCrawlerRunOnceFullFlow 用一个伪 python 脚本 + httptest 服务器
|
||||
// 把 Crawler.RunOnce 的完整流程跑一遍:脚本生成 JSON、下载视频和封面、入库、
|
||||
// 重复运行跳过已存在的 91 源视频 ID。
|
||||
func TestCrawlerRunOnceFullFlow(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-based fake script only on unix")
|
||||
}
|
||||
|
||||
tmp := t.TempDir()
|
||||
|
||||
// 1. 假 HTTP 服务器:根据路径返回视频数据或封面数据
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case strings.Contains(r.URL.Path, "120001.mp4"):
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("FAKEVIDEO1"))
|
||||
case strings.Contains(r.URL.Path, "120002.mp4"):
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("FAKEVIDEO2BYTES"))
|
||||
case strings.Contains(r.URL.Path, "/thumb/120001.jpg"):
|
||||
w.Header().Set("Content-Type", "image/jpeg")
|
||||
_, _ = w.Write([]byte("\xff\xd8\xff\xe0fakejpg1"))
|
||||
case strings.Contains(r.URL.Path, "/thumb/120002.jpg"):
|
||||
w.Header().Set("Content-Type", "image/jpeg")
|
||||
_, _ = w.Write([]byte("\xff\xd8\xff\xe0fakejpg2"))
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
// 2. 假 python 脚本:解析 --output / --stream-output 参数,
|
||||
// 在 stream 模式下逐行 echo 每条视频的 JSON 到 stdout(模拟 Python 端 stream),
|
||||
// 同时仍写 --output 文件作归档。
|
||||
videoEntries := []map[string]string{
|
||||
{
|
||||
"title": "Video One 口交",
|
||||
"thumb_url": srv.URL + "/thumb/not-120001.jpg",
|
||||
"video_url": srv.URL + "/videos/120001.mp4",
|
||||
"viewkey": "vk-001",
|
||||
"detail_url": srv.URL + "/v.php?viewkey=vk-001",
|
||||
},
|
||||
{
|
||||
"title": "Video Two",
|
||||
"thumb_url": srv.URL + "/thumb/not-120002.jpg",
|
||||
"video_url": srv.URL + "/videos/120002.mp4",
|
||||
"viewkey": "vk-002",
|
||||
"detail_url": srv.URL + "/v.php?viewkey=vk-002",
|
||||
},
|
||||
}
|
||||
scriptPath := filepath.Join(tmp, "fake_spider.sh")
|
||||
scriptBody := buildFakeSpiderScript(videoEntries)
|
||||
if err := os.WriteFile(scriptPath, []byte(scriptBody), 0o755); err != nil {
|
||||
t.Fatalf("write script: %v", err)
|
||||
}
|
||||
|
||||
// 3. 准备 catalog + driver + crawler
|
||||
dbPath := filepath.Join(tmp, "test.db")
|
||||
cat, err := catalog.Open(dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
defer cat.Close()
|
||||
|
||||
driveID := "spider91-test"
|
||||
rootDir := filepath.Join(tmp, "spider91", driveID)
|
||||
commonThumbs := filepath.Join(tmp, "previews", "thumbs")
|
||||
drv := New(Config{ID: driveID, RootDir: rootDir})
|
||||
|
||||
// 把 drive 也写入 catalog(Crawler 不直接读,但 main 真实流程会写)
|
||||
if err := cat.UpsertDrive(context.Background(), &catalog.Drive{
|
||||
ID: driveID,
|
||||
Kind: Kind,
|
||||
Name: "test crawler",
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert drive: %v", err)
|
||||
}
|
||||
if _, err := cat.CreateTagAndClassify(context.Background(), "Video One", nil, "user"); err != nil {
|
||||
t.Fatalf("create user tag: %v", err)
|
||||
}
|
||||
|
||||
var newVideos []*catalog.Video
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: "sh",
|
||||
ScriptPath: scriptPath,
|
||||
CommonThumbDir: commonThumbs,
|
||||
SpiderTimeout: 10 * time.Second,
|
||||
DownloadTimeout: 10 * time.Second,
|
||||
OnNewVideo: func(v *catalog.Video) {
|
||||
newVideos = append(newVideos, v)
|
||||
},
|
||||
})
|
||||
|
||||
// 4. 第一次 RunOnce:应该新入库 2 条
|
||||
res, err := c.RunOnce(context.Background(), 15)
|
||||
if err != nil {
|
||||
t.Fatalf("RunOnce: %v", err)
|
||||
}
|
||||
if res.NewVideos != 2 || res.Skipped != 0 || res.Failed != 0 {
|
||||
t.Fatalf("first run result: new=%d skipped=%d failed=%d, want 2/0/0",
|
||||
res.NewVideos, res.Skipped, res.Failed)
|
||||
}
|
||||
if res.TargetNew != 15 {
|
||||
t.Fatalf("first run TargetNew = %d, want 15", res.TargetNew)
|
||||
}
|
||||
if res.SeenSnapshot != 0 {
|
||||
t.Fatalf("first run SeenSnapshot = %d, want 0 (catalog empty before first run)", res.SeenSnapshot)
|
||||
}
|
||||
if len(newVideos) != 2 {
|
||||
t.Fatalf("OnNewVideo called %d times, want 2", len(newVideos))
|
||||
}
|
||||
|
||||
// 5. 检查文件落盘
|
||||
for _, item := range []struct {
|
||||
sourceID string
|
||||
size int64
|
||||
}{
|
||||
{"120001", 10},
|
||||
{"120002", 15},
|
||||
} {
|
||||
videoPath := filepath.Join(rootDir, "videos", item.sourceID+".mp4")
|
||||
info, err := os.Stat(videoPath)
|
||||
if err != nil {
|
||||
t.Fatalf("video %s missing: %v", item.sourceID, err)
|
||||
}
|
||||
if info.Size() != item.size {
|
||||
t.Fatalf("video %s size = %d, want %d", item.sourceID, info.Size(), item.size)
|
||||
}
|
||||
|
||||
thumbPath := filepath.Join(rootDir, "thumbs", item.sourceID+".jpg")
|
||||
if _, err := os.Stat(thumbPath); err != nil {
|
||||
t.Fatalf("thumb %s missing: %v", item.sourceID, err)
|
||||
}
|
||||
|
||||
// 复制到 common thumbs 目录的副本,名字按 videoID 来
|
||||
videoID := BuildVideoID(driveID, item.sourceID)
|
||||
commonThumb := filepath.Join(commonThumbs, videoID+".jpg")
|
||||
if _, err := os.Stat(commonThumb); err != nil {
|
||||
t.Fatalf("common thumb %s missing: %v", commonThumb, err)
|
||||
}
|
||||
}
|
||||
|
||||
// 6. 检查 catalog 入库
|
||||
for _, sourceID := range []string{"120001", "120002"} {
|
||||
videoID := BuildVideoID(driveID, sourceID)
|
||||
v, err := cat.GetVideo(context.Background(), videoID)
|
||||
if err != nil {
|
||||
t.Fatalf("GetVideo %s: %v", videoID, err)
|
||||
}
|
||||
if v.DriveID != driveID {
|
||||
t.Fatalf("video %s drive_id = %q want %q", videoID, v.DriveID, driveID)
|
||||
}
|
||||
if v.FileID != sourceID+".mp4" {
|
||||
t.Fatalf("video %s file_id = %q want %q", videoID, v.FileID, sourceID+".mp4")
|
||||
}
|
||||
if v.ThumbnailURL == "" {
|
||||
t.Fatalf("video %s ThumbnailURL empty (cover should be ready)", videoID)
|
||||
}
|
||||
if v.Author != DefaultAuthor {
|
||||
t.Fatalf("video %s author = %q want %q", videoID, v.Author, DefaultAuthor)
|
||||
}
|
||||
// 每条视频都应该带 "91porn" 标签(UpsertVideo 路径自动同步 tags 表)
|
||||
hasDefaultTag := false
|
||||
for _, tag := range v.Tags {
|
||||
if tag == DefaultTag {
|
||||
hasDefaultTag = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasDefaultTag {
|
||||
t.Fatalf("video %s tags = %v, want contain %q", videoID, v.Tags, DefaultTag)
|
||||
}
|
||||
if sourceID == "120001" {
|
||||
if !containsString(v.Tags, "口交") {
|
||||
t.Fatalf("video %s tags = %v, want contain built-in tag 口交", videoID, v.Tags)
|
||||
}
|
||||
if !containsString(v.Tags, "Video One") {
|
||||
t.Fatalf("video %s tags = %v, want contain user tag Video One", videoID, v.Tags)
|
||||
}
|
||||
}
|
||||
if sourceID == "120002" && (containsString(v.Tags, "口交") || containsString(v.Tags, "Video One")) {
|
||||
t.Fatalf("video %s tags = %v, should not inherit tags from other spider91 videos", videoID, v.Tags)
|
||||
}
|
||||
}
|
||||
|
||||
// 7. 第二次 RunOnce:源视频 ID 已存在 → 全部 skipped,无新文件下载
|
||||
newVideos = nil
|
||||
res2, err := c.RunOnce(context.Background(), 15)
|
||||
if err != nil {
|
||||
t.Fatalf("second RunOnce: %v", err)
|
||||
}
|
||||
if res2.NewVideos != 0 {
|
||||
t.Fatalf("second run NewVideos = %d, want 0", res2.NewVideos)
|
||||
}
|
||||
if res2.Skipped != 2 {
|
||||
t.Fatalf("second run Skipped = %d, want 2", res2.Skipped)
|
||||
}
|
||||
// 第二次运行时 catalog 里已经有 2 条,seen snapshot 应该写出 2 个源视频 ID
|
||||
if res2.SeenSnapshot != 2 {
|
||||
t.Fatalf("second run SeenSnapshot = %d, want 2", res2.SeenSnapshot)
|
||||
}
|
||||
if len(newVideos) != 0 {
|
||||
t.Fatalf("second run OnNewVideo fired %d times, want 0", len(newVideos))
|
||||
}
|
||||
}
|
||||
|
||||
// TestCrawlerRunOnceMissingScript 报错而不是 panic。
|
||||
func TestCrawlerRunOnceMissingScript(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "x.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("catalog: %v", err)
|
||||
}
|
||||
defer cat.Close()
|
||||
drv := New(Config{ID: "x", RootDir: filepath.Join(tmp, "x")})
|
||||
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: "python3",
|
||||
ScriptPath: filepath.Join(tmp, "does-not-exist.py"),
|
||||
})
|
||||
|
||||
if _, err := c.RunOnce(context.Background(), 1); err == nil {
|
||||
t.Fatalf("expected error for missing script")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerPassesProxyToSpiderProcess(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-based fake script only on unix")
|
||||
}
|
||||
|
||||
tmp := t.TempDir()
|
||||
scriptPath := filepath.Join(tmp, "print_proxy_env.sh")
|
||||
script := `#!/bin/sh
|
||||
printf 'HTTP_PROXY=%s\n' "$HTTP_PROXY"
|
||||
printf 'HTTPS_PROXY=%s\n' "$HTTPS_PROXY"
|
||||
printf 'http_proxy=%s\n' "$http_proxy"
|
||||
printf 'https_proxy=%s\n' "$https_proxy"
|
||||
printf 'NO_PROXY=%s\n' "$NO_PROXY"
|
||||
printf 'no_proxy=%s\n' "$no_proxy"
|
||||
`
|
||||
if err := os.WriteFile(scriptPath, []byte(script), 0o755); err != nil {
|
||||
t.Fatalf("write script: %v", err)
|
||||
}
|
||||
|
||||
proxyURL := "socks5h://proxy.local:1080"
|
||||
drv := New(Config{ID: "proxy-drive", RootDir: filepath.Join(tmp, "proxy-drive")})
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
PythonPath: "sh",
|
||||
ScriptPath: scriptPath,
|
||||
ProxyURL: proxyURL,
|
||||
})
|
||||
cmd, stdout, err := c.startSpiderTargetNew(
|
||||
context.Background(),
|
||||
1,
|
||||
filepath.Join(tmp, "seen.txt"),
|
||||
filepath.Join(tmp, "out.json"),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("startSpiderTargetNew: %v", err)
|
||||
}
|
||||
raw, err := io.ReadAll(stdout)
|
||||
if err != nil {
|
||||
t.Fatalf("read stdout: %v", err)
|
||||
}
|
||||
if err := cmd.Wait(); err != nil {
|
||||
t.Fatalf("wait: %v", err)
|
||||
}
|
||||
|
||||
want := strings.Join([]string{
|
||||
"HTTP_PROXY=" + proxyURL,
|
||||
"HTTPS_PROXY=" + proxyURL,
|
||||
"http_proxy=" + proxyURL,
|
||||
"https_proxy=" + proxyURL,
|
||||
"NO_PROXY=",
|
||||
"no_proxy=",
|
||||
}, "\n") + "\n"
|
||||
if string(raw) != want {
|
||||
t.Fatalf("proxy env = %q, want %q", string(raw), want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConfigureExplicitProxySupportsSocksSchemes(t *testing.T) {
|
||||
for _, raw := range []string{
|
||||
"socks5://127.0.0.1:1080",
|
||||
"socks5h://proxy-user:proxy-pass@127.0.0.1:1080",
|
||||
} {
|
||||
t.Run(raw, func(t *testing.T) {
|
||||
transport := &http.Transport{Proxy: http.ProxyFromEnvironment}
|
||||
if err := configureExplicitProxy(transport, raw); err != nil {
|
||||
t.Fatalf("configureExplicitProxy: %v", err)
|
||||
}
|
||||
if transport.Proxy != nil {
|
||||
t.Fatalf("Transport.Proxy should be nil for SOCKS proxy")
|
||||
}
|
||||
if transport.DialContext == nil {
|
||||
t.Fatalf("Transport.DialContext should be set for SOCKS proxy")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
transport := &http.Transport{Proxy: http.ProxyFromEnvironment}
|
||||
if err := configureExplicitProxy(transport, "http://127.0.0.1:7890"); err != nil {
|
||||
t.Fatalf("configureExplicitProxy http: %v", err)
|
||||
}
|
||||
if transport.Proxy == nil {
|
||||
t.Fatalf("Transport.Proxy should be set for HTTP proxy")
|
||||
}
|
||||
if transport.DialContext != nil {
|
||||
t.Fatalf("Transport.DialContext should not be set for HTTP proxy")
|
||||
}
|
||||
|
||||
if err := configureExplicitProxy(&http.Transport{}, "ftp://127.0.0.1:21"); err == nil {
|
||||
t.Fatalf("expected unsupported proxy scheme error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSelectSocksTargetIPPrefersIPv4(t *testing.T) {
|
||||
got := selectSocksTargetIP([]net.IPAddr{
|
||||
{IP: net.ParseIP("2606:4700:20::681a:229")},
|
||||
{IP: net.ParseIP("104.26.3.41")},
|
||||
})
|
||||
if got == nil || got.String() != "104.26.3.41" {
|
||||
t.Fatalf("selectSocksTargetIP = %v, want IPv4 104.26.3.41", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestCrawlerThumbDownloadFailureMarksStatusFailed 验证:网站封面下载失败时
|
||||
// crawler 把 thumbnail_status 显式标 'failed',避免后续封面补队列一直重复
|
||||
// 捞到这条 spider91 视频。
|
||||
//
|
||||
// 历史 bug:之前 thumb 下载失败仅打 log,url=”, status 走 schema DEFAULT 'pending'。
|
||||
// CountVideosNeedingThumbnail 条件是 url=” AND status != 'failed' → count=1。
|
||||
// spider91 drive 的 thumb worker 按设计不处理 spider91 视频 → 没人会改 status,
|
||||
// 后续补队列会一直认为它还缺封面。
|
||||
func TestCrawlerThumbDownloadFailureMarksStatusFailed(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-based fake script only on unix")
|
||||
}
|
||||
tmp := t.TempDir()
|
||||
|
||||
// 假 HTTP 服务器:thumb 路径返回 500,video 正常返回字节。
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case strings.Contains(r.URL.Path, "120101.mp4"):
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("FAKEVIDEO"))
|
||||
case strings.Contains(r.URL.Path, "120101.jpg"):
|
||||
http.Error(w, "broken", http.StatusInternalServerError)
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
videoEntries := []map[string]string{
|
||||
{
|
||||
"title": "Thumb Failure Video",
|
||||
"thumb_url": srv.URL + "/thumb/120101.jpg",
|
||||
"video_url": srv.URL + "/videos/120101.mp4",
|
||||
"viewkey": "vk-thumb-fail",
|
||||
"detail_url": srv.URL + "/v.php?viewkey=vk-thumb-fail",
|
||||
},
|
||||
}
|
||||
scriptPath := filepath.Join(tmp, "fake.sh")
|
||||
if err := os.WriteFile(scriptPath, []byte(buildFakeSpiderScript(videoEntries)), 0o755); err != nil {
|
||||
t.Fatalf("write script: %v", err)
|
||||
}
|
||||
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "test.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("catalog: %v", err)
|
||||
}
|
||||
defer cat.Close()
|
||||
|
||||
driveID := "thumbfail-drive"
|
||||
drv := New(Config{ID: driveID, RootDir: filepath.Join(tmp, "spider91", driveID)})
|
||||
if err := cat.UpsertDrive(context.Background(), &catalog.Drive{
|
||||
ID: driveID, Kind: Kind, Name: "thumbfail",
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert drive: %v", err)
|
||||
}
|
||||
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: "sh",
|
||||
ScriptPath: scriptPath,
|
||||
CommonThumbDir: filepath.Join(tmp, "previews", "thumbs"),
|
||||
SpiderTimeout: 10 * time.Second,
|
||||
DownloadTimeout: 10 * time.Second,
|
||||
})
|
||||
|
||||
res, err := c.RunOnce(context.Background(), 5)
|
||||
if err != nil {
|
||||
t.Fatalf("RunOnce: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 {
|
||||
t.Fatalf("expected 1 new video, got %d (failed=%d)", res.NewVideos, res.Failed)
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(context.Background(), "spider91-"+driveID+"-120101")
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.ThumbnailURL != "" {
|
||||
t.Errorf("ThumbnailURL = %q, want empty (download failed)", got.ThumbnailURL)
|
||||
}
|
||||
|
||||
// 关键断言:CountVideosNeedingThumbnail 应该返回 0。
|
||||
// 该函数的 SQL 条件是 `url = '' AND status != 'failed'`;如果 crawler 没把
|
||||
// status 标 'failed'(schema DEFAULT 'pending'),count 就会是 1。
|
||||
count, err := cat.CountVideosNeedingThumbnail(context.Background(), driveID)
|
||||
if err != nil {
|
||||
t.Fatalf("count: %v", err)
|
||||
}
|
||||
if count != 0 {
|
||||
t.Fatalf("CountVideosNeedingThumbnail = %d, want 0 (status should be 'failed' to unblock teaser worker)", count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerUsesCrawlerVideoURLForFirstDownload(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-based fake script only on unix")
|
||||
}
|
||||
tmp := t.TempDir()
|
||||
|
||||
var detailRequests int32
|
||||
var originalRequests int32
|
||||
var wrongRequests int32
|
||||
var srv *httptest.Server
|
||||
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.URL.Path == "/v.php":
|
||||
atomic.AddInt32(&detailRequests, 1)
|
||||
_, _ = w.Write([]byte(spider91DetailHTML(srv.URL + "/videos/856305.mp4?token=wrong")))
|
||||
case r.URL.Path == "/videos/120201.mp4" && r.URL.Query().Get("token") == "original":
|
||||
atomic.AddInt32(&originalRequests, 1)
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("ORIGINALVIDEO"))
|
||||
case r.URL.Path == "/videos/856305.mp4":
|
||||
atomic.AddInt32(&wrongRequests, 1)
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("WRONGVIDEO"))
|
||||
case r.URL.Path == "/thumb/120201.jpg":
|
||||
w.Header().Set("Content-Type", "image/jpeg")
|
||||
_, _ = w.Write([]byte("\xff\xd8\xff\xe0thumb"))
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
entry := map[string]string{
|
||||
"title": "Use Original URL First",
|
||||
"thumb_url": srv.URL + "/thumb/wrong-thumb.jpg",
|
||||
"video_url": srv.URL + "/videos/120201.mp4?token=original",
|
||||
"viewkey": "vk-use-original",
|
||||
"detail_url": srv.URL + "/v.php?viewkey=vk-use-original",
|
||||
}
|
||||
cat, drv, scriptPath := seedCrawlerTestDeps(t, tmp, "use-original-drive", []map[string]string{entry})
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: "sh",
|
||||
ScriptPath: scriptPath,
|
||||
CommonThumbDir: filepath.Join(tmp, "previews", "thumbs"),
|
||||
SpiderTimeout: 10 * time.Second,
|
||||
DownloadTimeout: 10 * time.Second,
|
||||
})
|
||||
|
||||
res, err := c.RunOnce(context.Background(), 1)
|
||||
if err != nil {
|
||||
t.Fatalf("RunOnce: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Failed != 0 {
|
||||
t.Fatalf("result new=%d failed=%d, want 1/0", res.NewVideos, res.Failed)
|
||||
}
|
||||
if got := atomic.LoadInt32(&detailRequests); got != 0 {
|
||||
t.Fatalf("detail requests = %d, want 0 (first download should use crawler URL)", got)
|
||||
}
|
||||
if got := atomic.LoadInt32(&originalRequests); got != 1 {
|
||||
t.Fatalf("original URL requests = %d, want 1", got)
|
||||
}
|
||||
if got := atomic.LoadInt32(&wrongRequests); got != 0 {
|
||||
t.Fatalf("wrong source URL requests = %d, want 0", got)
|
||||
}
|
||||
info, err := os.Stat(filepath.Join(drv.RootDir(), "videos", "120201.mp4"))
|
||||
if err != nil {
|
||||
t.Fatalf("original video missing: %v", err)
|
||||
}
|
||||
if info.Size() != int64(len("ORIGINALVIDEO")) {
|
||||
t.Fatalf("original video size = %d, want %d", info.Size(), len("ORIGINALVIDEO"))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRefreshesVideoURLAfterExpiredDownload(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-based fake script only on unix")
|
||||
}
|
||||
tmp := t.TempDir()
|
||||
|
||||
var detailRequests int32
|
||||
var staleRequests int32
|
||||
var freshRequests int32
|
||||
var srv *httptest.Server
|
||||
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.URL.Path == "/v.php":
|
||||
n := atomic.AddInt32(&detailRequests, 1)
|
||||
videoURL := srv.URL + "/videos/120202.mp4?token=stale"
|
||||
if n > 1 {
|
||||
videoURL = srv.URL + "/videos/120202.mp4?token=fresh"
|
||||
}
|
||||
_, _ = w.Write([]byte(spider91DetailHTML(videoURL)))
|
||||
case r.URL.Path == "/videos/120202.mp4" && r.URL.Query().Get("token") == "stale":
|
||||
atomic.AddInt32(&staleRequests, 1)
|
||||
http.Error(w, "expired", http.StatusForbidden)
|
||||
case r.URL.Path == "/videos/120202.mp4" && r.URL.Query().Get("token") == "fresh":
|
||||
atomic.AddInt32(&freshRequests, 1)
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("REFRESHEDVIDEO"))
|
||||
case r.URL.Path == "/thumb/120202.jpg":
|
||||
w.Header().Set("Content-Type", "image/jpeg")
|
||||
_, _ = w.Write([]byte("\xff\xd8\xff\xe0thumb"))
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
entry := map[string]string{
|
||||
"title": "Refresh After Expired Download",
|
||||
"thumb_url": srv.URL + "/thumb/wrong-thumb.jpg",
|
||||
"video_url": srv.URL + "/videos/120202.mp4?token=old",
|
||||
"viewkey": "vk-refresh-after",
|
||||
"detail_url": srv.URL + "/v.php?viewkey=vk-refresh-after",
|
||||
}
|
||||
cat, drv, scriptPath := seedCrawlerTestDeps(t, tmp, "refresh-after-drive", []map[string]string{entry})
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: "sh",
|
||||
ScriptPath: scriptPath,
|
||||
CommonThumbDir: filepath.Join(tmp, "previews", "thumbs"),
|
||||
SpiderTimeout: 10 * time.Second,
|
||||
DownloadTimeout: 10 * time.Second,
|
||||
})
|
||||
|
||||
res, err := c.RunOnce(context.Background(), 1)
|
||||
if err != nil {
|
||||
t.Fatalf("RunOnce: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Failed != 0 {
|
||||
t.Fatalf("result new=%d failed=%d, want 1/0", res.NewVideos, res.Failed)
|
||||
}
|
||||
if got := atomic.LoadInt32(&detailRequests); got < 2 {
|
||||
t.Fatalf("detail requests = %d, want at least 2 (initial refresh + retry refresh)", got)
|
||||
}
|
||||
if got := atomic.LoadInt32(&staleRequests); got != 1 {
|
||||
t.Fatalf("stale URL requests = %d, want 1", got)
|
||||
}
|
||||
if got := atomic.LoadInt32(&freshRequests); got != 1 {
|
||||
t.Fatalf("fresh URL requests = %d, want 1", got)
|
||||
}
|
||||
info, err := os.Stat(filepath.Join(drv.RootDir(), "videos", "120202.mp4"))
|
||||
if err != nil {
|
||||
t.Fatalf("refreshed video missing: %v", err)
|
||||
}
|
||||
if info.Size() != int64(len("REFRESHEDVIDEO")) {
|
||||
t.Fatalf("refreshed video size = %d, want %d", info.Size(), len("REFRESHEDVIDEO"))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRejectsRefreshedSourceIDMismatch(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-based fake script only on unix")
|
||||
}
|
||||
tmp := t.TempDir()
|
||||
|
||||
var srv *httptest.Server
|
||||
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.URL.Path == "/v.php":
|
||||
_, _ = w.Write([]byte(spider91DetailHTML(srv.URL + "/videos/856305.mp4?token=fresh")))
|
||||
case r.URL.Path == "/videos/1203058.mp4":
|
||||
http.Error(w, "expired", http.StatusForbidden)
|
||||
case r.URL.Path == "/videos/856305.mp4":
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("WRONGVIDEO"))
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
entry := map[string]string{
|
||||
"title": "Source ID Mismatch",
|
||||
"thumb_url": srv.URL + "/thumb/1203058.jpg",
|
||||
"video_url": srv.URL + "/videos/1203058.mp4?token=old",
|
||||
"viewkey": "86fd91cce1f2e1a154cc",
|
||||
"source_id": "1203058",
|
||||
"detail_url": srv.URL + "/v.php?viewkey=86fd91cce1f2e1a154cc",
|
||||
}
|
||||
cat, drv, scriptPath := seedCrawlerTestDeps(t, tmp, "mismatch-drive", []map[string]string{entry})
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: "sh",
|
||||
ScriptPath: scriptPath,
|
||||
CommonThumbDir: filepath.Join(tmp, "previews", "thumbs"),
|
||||
SpiderTimeout: 10 * time.Second,
|
||||
DownloadTimeout: 10 * time.Second,
|
||||
})
|
||||
|
||||
res, err := c.RunOnce(context.Background(), 1)
|
||||
if err != nil {
|
||||
t.Fatalf("RunOnce: %v", err)
|
||||
}
|
||||
if res.NewVideos != 0 || res.Failed != 1 {
|
||||
t.Fatalf("result new=%d failed=%d, want 0/1", res.NewVideos, res.Failed)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(drv.RootDir(), "videos", "1203058.mp4")); !os.IsNotExist(err) {
|
||||
t.Fatalf("mismatched source file should not be written, stat err=%v", err)
|
||||
}
|
||||
if v, _ := cat.GetVideo(context.Background(), BuildVideoID(drv.ID(), "1203058")); v != nil {
|
||||
t.Fatalf("mismatched video should not be inserted: %+v", v)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSourceIDForItemRequiresNumericSourceID(t *testing.T) {
|
||||
if got := sourceIDForItem(spiderVideoEntry{
|
||||
Viewkey: "86fd91cce1f2e1a154cc",
|
||||
VideoURL: "https://cdn.example/videos/1203058.mp4?token=x",
|
||||
}); got != "1203058" {
|
||||
t.Fatalf("sourceIDForItem(video url) = %q, want 1203058", got)
|
||||
}
|
||||
if got := sourceIDForItem(spiderVideoEntry{
|
||||
Viewkey: "86fd91cce1f2e1a154cc",
|
||||
ThumbURL: "https://img.example/thumb/1203058.jpg",
|
||||
}); got != "1203058" {
|
||||
t.Fatalf("sourceIDForItem(thumb url) = %q, want 1203058", got)
|
||||
}
|
||||
if got := sourceIDForItem(spiderVideoEntry{
|
||||
Viewkey: "86fd91cce1f2e1a154cc",
|
||||
SourceID: "not-numeric",
|
||||
VideoURL: "https://cdn.example/videos/video.mp4",
|
||||
}); got != "" {
|
||||
t.Fatalf("sourceIDForItem(non numeric) = %q, want empty", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeThumbURLForSource(t *testing.T) {
|
||||
got := normalizeThumbURLForSource("https://img.example/thumb/856305.jpg?x=1#frag", "1203058")
|
||||
want := "https://img.example/thumb/1203058.jpg"
|
||||
if got != want {
|
||||
t.Fatalf("normalizeThumbURLForSource = %q, want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpider91ListURLForDetail(t *testing.T) {
|
||||
got := spider91ListURLForDetail("https://www.91porn.com/view_video.php?viewkey=abc&page=5&c=furum&viewtype=basic&category=top")
|
||||
want := "https://www.91porn.com/v.php?category=top&page=5&viewtype=basic"
|
||||
if got != want {
|
||||
t.Fatalf("spider91ListURLForDetail = %q, want %q", got, want)
|
||||
}
|
||||
if got := spider91ListURLForDetail("http://127.0.0.1/v.php?viewkey=abc&page=5&viewtype=basic&category=top"); got != "" {
|
||||
t.Fatalf("spider91ListURLForDetail(localhost) = %q, want empty", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpider91CookieHeader(t *testing.T) {
|
||||
got := spider91CookieHeader([]*http.Cookie{
|
||||
{Name: "CLIPSHARE", Value: "abc"},
|
||||
{Name: "ga", Value: "def"},
|
||||
{Name: "mode", Value: "m"},
|
||||
})
|
||||
want := "mode=d; CLIPSHARE=abc; ga=def"
|
||||
if got != want {
|
||||
t.Fatalf("spider91CookieHeader = %q, want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func spider91DetailHTML(videoURL string) string {
|
||||
fragment := `<video><source src="` + videoURL + `" type="video/mp4"></video>`
|
||||
return `document.write(strencode2("` + url.PathEscape(fragment) + `"));`
|
||||
}
|
||||
|
||||
func seedCrawlerTestDeps(t *testing.T, tmp, driveID string, entries []map[string]string) (*catalog.Catalog, *Driver, string) {
|
||||
t.Helper()
|
||||
scriptPath := filepath.Join(tmp, driveID+"-fake.sh")
|
||||
if err := os.WriteFile(scriptPath, []byte(buildFakeSpiderScript(entries)), 0o755); err != nil {
|
||||
t.Fatalf("write script: %v", err)
|
||||
}
|
||||
cat, err := catalog.Open(filepath.Join(tmp, driveID+".db"))
|
||||
if err != nil {
|
||||
t.Fatalf("catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: driveID, RootDir: filepath.Join(tmp, "spider91", driveID)})
|
||||
if err := cat.UpsertDrive(context.Background(), &catalog.Drive{
|
||||
ID: driveID, Kind: Kind, Name: driveID,
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert drive: %v", err)
|
||||
}
|
||||
return cat, drv, scriptPath
|
||||
}
|
||||
|
||||
// buildFakeSpiderScript 生成一个伪 python 脚本(其实是 sh)。
|
||||
//
|
||||
// 行为:
|
||||
// - 解析 --output FILE / --stream-output 两个 flag
|
||||
// - --stream-output 时:逐行输出每个 entry 的 JSON 到 stdout 并 flush
|
||||
// - --output 时:把完整 JSON 数据写到 FILE(向后兼容,且作归档)
|
||||
//
|
||||
// 用 sh 来写是为了避免 Python 依赖。每条 entry 的 JSON 用 Go marshal 出来后嵌入。
|
||||
func buildFakeSpiderScript(entries []map[string]string) string {
|
||||
var sb strings.Builder
|
||||
sb.WriteString("#!/bin/sh\n")
|
||||
sb.WriteString("out=\"\"; stream=0\n")
|
||||
sb.WriteString("while [ $# -gt 0 ]; do case \"$1\" in --output) out=\"$2\"; shift 2;; --stream-output) stream=1; shift;; *) shift;; esac; done\n")
|
||||
|
||||
// stream 模式:逐行 echo
|
||||
sb.WriteString("if [ \"$stream\" = \"1\" ]; then\n")
|
||||
for _, e := range entries {
|
||||
raw, _ := json.Marshal(e)
|
||||
// 用单引号 here-string 形式确保 JSON 中的双引号原样出来
|
||||
sb.WriteString(" cat <<'STREAM_EOF'\n")
|
||||
sb.Write(raw)
|
||||
sb.WriteString("\nSTREAM_EOF\n")
|
||||
}
|
||||
sb.WriteString("fi\n")
|
||||
|
||||
// 写 --output 文件(带完整 wrapper)
|
||||
sb.WriteString("if [ -n \"$out\" ]; then\n")
|
||||
sb.WriteString(" mkdir -p \"$(dirname \"$out\")\" 2>/dev/null\n")
|
||||
sb.WriteString(" cat > \"$out\" <<'OUT_EOF'\n")
|
||||
wrapper := map[string]any{
|
||||
"crawl_time": "2026-01-01T00:00:00",
|
||||
"total_videos": len(entries),
|
||||
"videos": entries,
|
||||
}
|
||||
wrapped, _ := json.MarshalIndent(wrapper, "", " ")
|
||||
sb.Write(wrapped)
|
||||
sb.WriteString("\nOUT_EOF\n")
|
||||
sb.WriteString("fi\n")
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func containsString(values []string, want string) bool {
|
||||
for _, value := range values {
|
||||
if value == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -1,194 +0,0 @@
|
||||
// Package spider91 把 91porn 爬虫的产物(本地下载好的视频和封面)
|
||||
// 包装成一个 drives.Drive 实现,让它跟其它网盘一样可以挂载到 catalog 上。
|
||||
//
|
||||
// 与其它 drive 不同的是:
|
||||
// - 数据来源不是云盘 API,而是 Python 子进程跑 spider_91porn.py 后下载到本地
|
||||
// - StreamURL 直接返回本地文件路径,由 api.handleSpider91Video 用 http.ServeFile 服务
|
||||
// - List/Stat 用于 GC 兜底(按本地文件名列出 videos/ 目录)
|
||||
package spider91
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
// Kind 是该 drive 的类型代号,写到 catalog.drives.kind。
|
||||
const Kind = "spider91"
|
||||
|
||||
// Config 创建 Driver 所需的配置。
|
||||
type Config struct {
|
||||
// ID 是 catalog 中的 drive id,driver 用它隔离每个 spider91 实例的本地目录。
|
||||
ID string
|
||||
// RootDir 是该 drive 在磁盘上的根目录,driver 会在下面创建 videos/ 和 thumbs/。
|
||||
// 一般由 backend 拼成 <data_dir>/spider91/<driveID>/。
|
||||
RootDir string
|
||||
}
|
||||
|
||||
// Driver 实现 drives.Drive。
|
||||
type Driver struct {
|
||||
id string
|
||||
rootDir string
|
||||
}
|
||||
|
||||
// New 构造一个 Driver。
|
||||
func New(c Config) *Driver {
|
||||
return &Driver{
|
||||
id: c.ID,
|
||||
rootDir: c.RootDir,
|
||||
}
|
||||
}
|
||||
|
||||
// Kind 返回 "spider91"。
|
||||
func (d *Driver) Kind() string { return Kind }
|
||||
|
||||
// ID 返回 catalog 中的 drive id。
|
||||
func (d *Driver) ID() string { return d.id }
|
||||
|
||||
// RootID 返回根目录的逻辑 ID。spider91 没有真正的目录结构,
|
||||
// 这里固定返回 "/" 占位,调用方实际不会用它去 List 子目录。
|
||||
func (d *Driver) RootID() string { return "/" }
|
||||
|
||||
// Init 确保 rootDir/videos 和 rootDir/thumbs 存在。
|
||||
func (d *Driver) Init(ctx context.Context) error {
|
||||
if strings.TrimSpace(d.rootDir) == "" {
|
||||
return errors.New("spider91: empty rootDir")
|
||||
}
|
||||
for _, sub := range []string{"videos", "thumbs"} {
|
||||
if err := os.MkdirAll(filepath.Join(d.rootDir, sub), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// VideosDir 返回视频文件存放目录的绝对路径。
|
||||
func (d *Driver) VideosDir() string { return filepath.Join(d.rootDir, "videos") }
|
||||
|
||||
// ThumbsDir 返回封面文件存放目录的绝对路径。
|
||||
func (d *Driver) ThumbsDir() string { return filepath.Join(d.rootDir, "thumbs") }
|
||||
|
||||
// RootDir 返回 driver 的存储根。
|
||||
func (d *Driver) RootDir() string { return d.rootDir }
|
||||
|
||||
// VideoPath 返回某个视频文件的绝对路径,并校验路径不会逃出 videos/ 目录。
|
||||
func (d *Driver) VideoPath(fileID string) (string, error) {
|
||||
return safeJoin(d.VideosDir(), fileID)
|
||||
}
|
||||
|
||||
// ThumbPath 返回某个封面文件的绝对路径。
|
||||
func (d *Driver) ThumbPath(fileID string) (string, error) {
|
||||
return safeJoin(d.ThumbsDir(), fileID)
|
||||
}
|
||||
|
||||
// List 列出 videos/ 目录下的视频文件,便于上层做 GC 兜底;
|
||||
// dirID 当前会被忽略,spider91 没有目录树。
|
||||
func (d *Driver) List(ctx context.Context, dirID string) ([]drives.Entry, error) {
|
||||
entries, err := os.ReadDir(d.VideosDir())
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
out := make([]drives.Entry, 0, len(entries))
|
||||
for _, e := range entries {
|
||||
if e.IsDir() {
|
||||
continue
|
||||
}
|
||||
info, err := e.Info()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
out = append(out, drives.Entry{
|
||||
ID: e.Name(),
|
||||
Name: e.Name(),
|
||||
Size: info.Size(),
|
||||
IsDir: false,
|
||||
ModTime: info.ModTime(),
|
||||
})
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// Stat 查询单个视频文件的元数据。
|
||||
func (d *Driver) Stat(ctx context.Context, fileID string) (*drives.Entry, error) {
|
||||
path, err := d.VideoPath(fileID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &drives.Entry{
|
||||
ID: fileID,
|
||||
Name: fileID,
|
||||
Size: info.Size(),
|
||||
IsDir: info.IsDir(),
|
||||
ModTime: info.ModTime(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// StreamURL 返回本地视频文件路径,给 ffmpeg / 上层服务使用。
|
||||
// 注意:proxy.serve 不能直接处理本地路径,回放要走 api.handleSpider91Video。
|
||||
// 预览视频/封面 worker 通过 localPreviewLink 兜底走本地文件,刚好兼容 path 形式的 URL。
|
||||
func (d *Driver) StreamURL(ctx context.Context, fileID string) (*drives.StreamLink, error) {
|
||||
path, err := d.VideoPath(fileID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if info.IsDir() || info.Size() == 0 {
|
||||
return nil, os.ErrNotExist
|
||||
}
|
||||
return &drives.StreamLink{
|
||||
URL: path,
|
||||
Expires: time.Now().Add(24 * time.Hour),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Upload 不支持:上传由 crawler 自己完成,不通过 Drive 接口。
|
||||
func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader, size int64) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
|
||||
// EnsureDir 不支持。
|
||||
func (d *Driver) EnsureDir(ctx context.Context, pathFromRoot string) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
|
||||
// safeJoin 把 fileID 拼到 root 下,保证最终路径不会逃出 root。
|
||||
// fileID 必须是单纯的文件名(不含 / 或 .. 等组件)。
|
||||
func safeJoin(root, fileID string) (string, error) {
|
||||
id := strings.TrimSpace(fileID)
|
||||
if id == "" || filepath.Base(id) != id {
|
||||
return "", errors.New("spider91: invalid file id")
|
||||
}
|
||||
if root == "" {
|
||||
return "", errors.New("spider91: empty root dir")
|
||||
}
|
||||
rootAbs, err := filepath.Abs(root)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
pathAbs, err := filepath.Abs(filepath.Join(rootAbs, id))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if pathAbs != rootAbs && !strings.HasPrefix(pathAbs, rootAbs+string(os.PathSeparator)) {
|
||||
return "", errors.New("spider91: file id escapes root")
|
||||
}
|
||||
return pathAbs, nil
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*Driver)(nil)
|
||||
@@ -1,149 +0,0 @@
|
||||
package spider91
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDriverInitCreatesSubdirs(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
d := New(Config{ID: "test", RootDir: filepath.Join(dir, "drive1")})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
for _, sub := range []string{"videos", "thumbs"} {
|
||||
info, err := os.Stat(filepath.Join(dir, "drive1", sub))
|
||||
if err != nil {
|
||||
t.Fatalf("stat %s: %v", sub, err)
|
||||
}
|
||||
if !info.IsDir() {
|
||||
t.Fatalf("%s is not a dir", sub)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDriverInitRejectsEmptyRoot(t *testing.T) {
|
||||
d := New(Config{ID: "test", RootDir: ""})
|
||||
if err := d.Init(context.Background()); err == nil {
|
||||
t.Fatalf("expected error for empty root")
|
||||
}
|
||||
}
|
||||
|
||||
func TestVideoPathRejectsTraversal(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
d := New(Config{ID: "test", RootDir: dir})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
cases := []string{
|
||||
"",
|
||||
" ",
|
||||
"../etc/passwd",
|
||||
"sub/dir.mp4",
|
||||
"./abc.mp4",
|
||||
}
|
||||
for _, c := range cases {
|
||||
if _, err := d.VideoPath(c); err == nil {
|
||||
t.Fatalf("VideoPath(%q) accepted, want error", c)
|
||||
}
|
||||
if _, err := d.ThumbPath(c); err == nil {
|
||||
t.Fatalf("ThumbPath(%q) accepted, want error", c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestVideoPathHappy(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
d := New(Config{ID: "test", RootDir: dir})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
got, err := d.VideoPath("abc.mp4")
|
||||
if err != nil {
|
||||
t.Fatalf("VideoPath: %v", err)
|
||||
}
|
||||
want := filepath.Join(dir, "videos", "abc.mp4")
|
||||
wantAbs, _ := filepath.Abs(want)
|
||||
if got != wantAbs {
|
||||
t.Fatalf("VideoPath: got %q want %q", got, wantAbs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListReturnsFiles(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
d := New(Config{ID: "test", RootDir: dir})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
mustWrite(t, filepath.Join(d.VideosDir(), "abc.mp4"), "data")
|
||||
mustWrite(t, filepath.Join(d.VideosDir(), "def.mp4"), "x")
|
||||
|
||||
entries, err := d.List(context.Background(), "/")
|
||||
if err != nil {
|
||||
t.Fatalf("List: %v", err)
|
||||
}
|
||||
if len(entries) != 2 {
|
||||
t.Fatalf("List len = %d, want 2", len(entries))
|
||||
}
|
||||
names := map[string]int64{}
|
||||
for _, e := range entries {
|
||||
names[e.Name] = e.Size
|
||||
}
|
||||
if names["abc.mp4"] != 4 || names["def.mp4"] != 1 {
|
||||
t.Fatalf("unexpected entries: %+v", names)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLReturnsLocalPath(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
d := New(Config{ID: "test", RootDir: dir})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
mustWrite(t, filepath.Join(d.VideosDir(), "abc.mp4"), "videodata")
|
||||
|
||||
link, err := d.StreamURL(context.Background(), "abc.mp4")
|
||||
if err != nil {
|
||||
t.Fatalf("StreamURL: %v", err)
|
||||
}
|
||||
if !strings.HasSuffix(link.URL, "videos/abc.mp4") {
|
||||
t.Fatalf("StreamURL.URL = %q, want suffix videos/abc.mp4", link.URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLEmptyFile(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
d := New(Config{ID: "test", RootDir: dir})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
mustWrite(t, filepath.Join(d.VideosDir(), "abc.mp4"), "")
|
||||
if _, err := d.StreamURL(context.Background(), "abc.mp4"); !errors.Is(err, os.ErrNotExist) {
|
||||
t.Fatalf("empty file should return os.ErrNotExist, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildVideoIDStable(t *testing.T) {
|
||||
id1 := BuildVideoID("crawler1", "abc")
|
||||
id2 := BuildVideoID("crawler1", "abc")
|
||||
if id1 != id2 {
|
||||
t.Fatalf("BuildVideoID not deterministic")
|
||||
}
|
||||
if id1 != "spider91-crawler1-abc" {
|
||||
t.Fatalf("BuildVideoID format unexpected: %q", id1)
|
||||
}
|
||||
}
|
||||
|
||||
func mustWrite(t *testing.T, path, content string) {
|
||||
t.Helper()
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
||||
t.Fatalf("mkdir: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
|
||||
t.Fatalf("write: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -1,55 +0,0 @@
|
||||
package spider91
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestDetectVideoExt(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
url string
|
||||
want string
|
||||
}{
|
||||
{"mp4 with token", "https://cdn.example.com/mp43/abc.mp4?st=xyz&e=12345", ".mp4"},
|
||||
{"webm", "https://cdn.example.com/path/video.webm?token=1", ".webm"},
|
||||
{"mkv", "https://cdn.example.com/path/foo.mkv", ".mkv"},
|
||||
{"mov", "https://cdn.example.com/path/foo.mov?x=1", ".mov"},
|
||||
{"flv", "https://cdn.example.com/path/foo.flv", ".flv"},
|
||||
{"m4v", "https://cdn.example.com/path/foo.m4v", ".m4v"},
|
||||
{"avi", "https://cdn.example.com/path/foo.avi", ".avi"},
|
||||
{"m3u8 fallback to mp4", "https://cdn.example.com/path/playlist.m3u8", ".mp4"},
|
||||
{"ts fallback to mp4", "https://cdn.example.com/path/seg001.ts", ".mp4"},
|
||||
{"unknown ext fallback", "https://cdn.example.com/path/foo.weird", ".mp4"},
|
||||
{"no ext fallback", "https://cdn.example.com/v.php?id=12345", ".mp4"},
|
||||
{"empty url", "", ".mp4"},
|
||||
{"uppercase", "https://cdn.example.com/path/FOO.MP4?token=1", ".mp4"},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := detectVideoExt(tc.url)
|
||||
if got != tc.want {
|
||||
t.Fatalf("detectVideoExt(%q) = %q, want %q", tc.url, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectThumbExt(t *testing.T) {
|
||||
tests := []struct {
|
||||
url string
|
||||
want string
|
||||
}{
|
||||
{"https://cdn.example.com/thumb/foo.jpg", ".jpg"},
|
||||
{"https://cdn.example.com/thumb/foo.jpeg", ".jpeg"},
|
||||
{"https://cdn.example.com/thumb/foo.png", ".png"},
|
||||
{"https://cdn.example.com/thumb/foo.webp", ".webp"},
|
||||
{"https://cdn.example.com/thumb/foo.gif", ".gif"},
|
||||
{"https://cdn.example.com/thumb/foo.svg", ".jpg"}, // not in whitelist
|
||||
{"https://cdn.example.com/thumb/no-ext", ".jpg"},
|
||||
{"", ".jpg"},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
got := detectThumbExt(tc.url)
|
||||
if got != tc.want {
|
||||
t.Fatalf("detectThumbExt(%q) = %q, want %q", tc.url, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,19 +2,23 @@ package wopan
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
sdk "github.com/OpenListTeam/wopan-sdk-go"
|
||||
"github.com/go-resty/resty/v2"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
// Driver 封装联通沃盘
|
||||
// Driver 封装联通网盘
|
||||
type Driver struct {
|
||||
id string
|
||||
rootID string
|
||||
@@ -23,14 +27,24 @@ type Driver struct {
|
||||
refreshToken string
|
||||
client *sdk.WoClient
|
||||
onTokenUpdate func(access, refresh string)
|
||||
uploadTempDir string
|
||||
|
||||
listMu sync.Mutex
|
||||
lastListAt time.Time
|
||||
listInterval time.Duration
|
||||
listCooldown time.Duration
|
||||
|
||||
fileIDMu sync.RWMutex
|
||||
fidToID map[string]string
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
ID string
|
||||
AccessToken string
|
||||
RefreshToken string
|
||||
FamilyID string // 空则走个人空间,有值则走家庭空间
|
||||
RootID string // 根目录 ID,默认 "0"
|
||||
ID string
|
||||
AccessToken string
|
||||
RefreshToken string
|
||||
FamilyID string // 空则走个人空间,有值则走家庭空间
|
||||
RootID string // 根目录 ID,默认 "0"
|
||||
UploadTempDir string
|
||||
// 当 SDK 刷新 token 时回调,便于持久化
|
||||
OnTokenUpdate func(access, refresh string)
|
||||
}
|
||||
@@ -47,6 +61,10 @@ func New(c Config) *Driver {
|
||||
accessToken: c.AccessToken,
|
||||
refreshToken: c.RefreshToken,
|
||||
onTokenUpdate: c.OnTokenUpdate,
|
||||
uploadTempDir: strings.TrimSpace(c.UploadTempDir),
|
||||
listInterval: 800 * time.Millisecond,
|
||||
listCooldown: 5 * time.Minute,
|
||||
fidToID: make(map[string]string),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,15 +96,41 @@ func (d *Driver) spaceType() string {
|
||||
}
|
||||
|
||||
func (d *Driver) List(ctx context.Context, dirID string) ([]drives.Entry, error) {
|
||||
d.listMu.Lock()
|
||||
defer d.listMu.Unlock()
|
||||
|
||||
var result []drives.Entry
|
||||
pageNum := 0
|
||||
pageSize := 100
|
||||
for {
|
||||
data, err := d.client.QueryAllFiles(d.spaceType(), dirID, pageNum, pageSize, 0, d.familyID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("wopan list: %w", err)
|
||||
var data *sdk.QueryAllFilesData
|
||||
for attempt := 0; ; attempt++ {
|
||||
if err := d.waitForListSlotLocked(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var err error
|
||||
data, err = d.client.QueryAllFiles(d.spaceType(), dirID, pageNum, pageSize, 0, d.familyID, func(req *resty.Request) {
|
||||
req.SetContext(ctx)
|
||||
})
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
err = wopanRequestError("list", err)
|
||||
wait, ok := drives.RateLimitRetryAfter(err)
|
||||
if !ok {
|
||||
return nil, err
|
||||
}
|
||||
if wait <= 0 {
|
||||
wait = d.listCooldown
|
||||
}
|
||||
log.Printf("[wopan] list cooling down drive=%s dir=%s page=%d cooldown=%s attempt=%d err=%v",
|
||||
d.id, dirID, pageNum, wait, attempt+1, err)
|
||||
if err := sleepContext(ctx, wait); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
for _, f := range data.Files {
|
||||
d.rememberFileID(f)
|
||||
result = append(result, fileToEntry(f, dirID))
|
||||
}
|
||||
if len(data.Files) < pageSize {
|
||||
@@ -103,9 +147,11 @@ func (d *Driver) Stat(ctx context.Context, fileID string) (*drives.Entry, error)
|
||||
}
|
||||
|
||||
func (d *Driver) StreamURL(ctx context.Context, fileID string) (*drives.StreamLink, error) {
|
||||
data, err := d.client.GetDownloadUrlV2([]string{fileID})
|
||||
data, err := d.client.GetDownloadUrlV2([]string{fileID}, func(req *resty.Request) {
|
||||
req.SetContext(ctx)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("wopan download url: %w", err)
|
||||
return nil, wopanRequestError("download url", err)
|
||||
}
|
||||
if len(data.List) == 0 {
|
||||
return nil, fmt.Errorf("wopan download url: empty response")
|
||||
@@ -119,7 +165,12 @@ func (d *Driver) StreamURL(ctx context.Context, fileID string) (*drives.StreamLi
|
||||
|
||||
func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader, size int64) (string, error) {
|
||||
// wopan SDK 要求 *os.File,先把流落到临时文件再上传
|
||||
tmp, err := os.CreateTemp("", "wopan-upload-*.tmp")
|
||||
if d.uploadTempDir != "" {
|
||||
if err := os.MkdirAll(d.uploadTempDir, 0o755); err != nil {
|
||||
return "", fmt.Errorf("wopan upload: create tmp dir: %w", err)
|
||||
}
|
||||
}
|
||||
tmp, err := os.CreateTemp(d.uploadTempDir, "wopan-upload-*.tmp")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
@@ -142,9 +193,151 @@ func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader,
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("wopan upload: %w", err)
|
||||
}
|
||||
if fid != "" {
|
||||
if objectID, err := d.findDeleteFileIDInParent(ctx, parentID, drives.SourceFile{
|
||||
FileID: fid,
|
||||
Name: name,
|
||||
Size: size,
|
||||
}); err == nil {
|
||||
d.rememberFIDMapping(fid, objectID)
|
||||
} else {
|
||||
log.Printf("[wopan] upload drive=%s parent=%s fid=%s resolve object id: %v", d.id, parentID, fid, err)
|
||||
}
|
||||
}
|
||||
return fid, nil
|
||||
}
|
||||
|
||||
func (d *Driver) Rename(ctx context.Context, fileID, newName string) error {
|
||||
if d.client == nil {
|
||||
return fmt.Errorf("wopan rename: driver not initialized")
|
||||
}
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return fmt.Errorf("wopan rename: empty file id")
|
||||
}
|
||||
newName = strings.TrimSpace(newName)
|
||||
if newName == "" {
|
||||
return fmt.Errorf("wopan rename: empty new name")
|
||||
}
|
||||
renameID := fileID
|
||||
if cached := d.cachedDeleteFileID(fileID); cached != "" {
|
||||
renameID = cached
|
||||
}
|
||||
if err := d.client.RenameFileOrDirectory(d.spaceType(), 1, renameID, newName, d.familyID, func(req *resty.Request) {
|
||||
req.SetContext(ctx)
|
||||
}); err != nil {
|
||||
return wopanRequestError("rename", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
if d.client == nil {
|
||||
return fmt.Errorf("wopan remove: driver not initialized")
|
||||
}
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return fmt.Errorf("wopan remove: empty file id")
|
||||
}
|
||||
deleteID := fileID
|
||||
if cached := d.cachedDeleteFileID(fileID); cached != "" {
|
||||
deleteID = cached
|
||||
}
|
||||
if err := d.deleteFileByObjectID(ctx, deleteID); err != nil {
|
||||
return fmt.Errorf("wopan remove: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) RemoveSource(ctx context.Context, source drives.SourceFile) error {
|
||||
if d.client == nil {
|
||||
return fmt.Errorf("wopan remove: driver not initialized")
|
||||
}
|
||||
fileID := strings.TrimSpace(source.FileID)
|
||||
if fileID == "" {
|
||||
return fmt.Errorf("wopan remove: empty file id")
|
||||
}
|
||||
deleteID, err := d.resolveDeleteFileID(ctx, source)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := d.deleteFileByObjectID(ctx, deleteID); err != nil {
|
||||
return fmt.Errorf("wopan remove: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) deleteFileByObjectID(ctx context.Context, fileID string) error {
|
||||
if err := d.client.DeleteFile(d.spaceType(), nil, []string{fileID}, func(req *resty.Request) {
|
||||
req.SetContext(ctx)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) resolveDeleteFileID(ctx context.Context, source drives.SourceFile) (string, error) {
|
||||
fileID := strings.TrimSpace(source.FileID)
|
||||
if fileID == "" {
|
||||
return "", fmt.Errorf("wopan remove: empty file id")
|
||||
}
|
||||
if cached := d.cachedDeleteFileID(fileID); cached != "" {
|
||||
return cached, nil
|
||||
}
|
||||
parentID := strings.TrimSpace(source.ParentID)
|
||||
if parentID == "" {
|
||||
return fileID, nil
|
||||
}
|
||||
return d.findDeleteFileIDInParent(ctx, parentID, source)
|
||||
}
|
||||
|
||||
func (d *Driver) findDeleteFileIDInParent(ctx context.Context, parentID string, source drives.SourceFile) (string, error) {
|
||||
d.listMu.Lock()
|
||||
defer d.listMu.Unlock()
|
||||
|
||||
pageNum := 0
|
||||
pageSize := 100
|
||||
for {
|
||||
var data *sdk.QueryAllFilesData
|
||||
for attempt := 0; ; attempt++ {
|
||||
if err := d.waitForListSlotLocked(ctx); err != nil {
|
||||
return "", err
|
||||
}
|
||||
var err error
|
||||
data, err = d.client.QueryAllFiles(d.spaceType(), parentID, pageNum, pageSize, 0, d.familyID, func(req *resty.Request) {
|
||||
req.SetContext(ctx)
|
||||
})
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
err = wopanRequestError("resolve delete id", err)
|
||||
wait, ok := drives.RateLimitRetryAfter(err)
|
||||
if !ok {
|
||||
return "", err
|
||||
}
|
||||
if wait <= 0 {
|
||||
wait = d.listCooldown
|
||||
}
|
||||
log.Printf("[wopan] resolve delete id cooling down drive=%s parent=%s page=%d cooldown=%s attempt=%d err=%v",
|
||||
d.id, parentID, pageNum, wait, attempt+1, err)
|
||||
if err := sleepContext(ctx, wait); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
for _, f := range data.Files {
|
||||
d.rememberFileID(f)
|
||||
if id, ok := deleteFileIDFromWopanFile(f, source); ok {
|
||||
return id, nil
|
||||
}
|
||||
}
|
||||
if len(data.Files) < pageSize {
|
||||
break
|
||||
}
|
||||
pageNum++
|
||||
}
|
||||
return "", fmt.Errorf("wopan remove: source file %q not found under parent %q", source.FileID, parentID)
|
||||
}
|
||||
|
||||
func (d *Driver) EnsureDir(ctx context.Context, pathFromRoot string) (string, error) {
|
||||
parts := splitPath(pathFromRoot)
|
||||
currentID := d.rootID
|
||||
@@ -154,9 +347,11 @@ func (d *Driver) EnsureDir(ctx context.Context, pathFromRoot string) (string, er
|
||||
return "", err
|
||||
}
|
||||
if childID == "" {
|
||||
resp, err := d.client.CreateDirectory(d.spaceType(), currentID, name, d.familyID)
|
||||
resp, err := d.client.CreateDirectory(d.spaceType(), currentID, name, d.familyID, func(req *resty.Request) {
|
||||
req.SetContext(ctx)
|
||||
})
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("wopan mkdir %s: %w", name, err)
|
||||
return "", wopanRequestError("mkdir "+name, err)
|
||||
}
|
||||
childID = resp.Id
|
||||
}
|
||||
@@ -190,9 +385,12 @@ func fileToEntry(f *sdk.File, parentID string) drives.Entry {
|
||||
mod, _ := time.Parse("2006-01-02 15:04:05", f.CreateTime)
|
||||
name := f.Name
|
||||
isDir := f.Type == 0
|
||||
id := f.Fid
|
||||
id := f.Id
|
||||
if !isDir && f.Fid != "" {
|
||||
id = f.Fid
|
||||
}
|
||||
if id == "" {
|
||||
id = f.Id
|
||||
id = f.Fid
|
||||
}
|
||||
if isDir && !strings.HasSuffix(name, "/") {
|
||||
// 不改 name,只标志
|
||||
@@ -208,6 +406,128 @@ func fileToEntry(f *sdk.File, parentID string) drives.Entry {
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Driver) rememberFileID(f *sdk.File) {
|
||||
if f == nil || f.Type == 0 {
|
||||
return
|
||||
}
|
||||
objectID := strings.TrimSpace(f.Id)
|
||||
fid := strings.TrimSpace(f.Fid)
|
||||
if objectID == "" {
|
||||
return
|
||||
}
|
||||
d.fileIDMu.Lock()
|
||||
if d.fidToID == nil {
|
||||
d.fidToID = make(map[string]string)
|
||||
}
|
||||
d.fidToID[objectID] = objectID
|
||||
if fid != "" {
|
||||
d.fidToID[fid] = objectID
|
||||
}
|
||||
d.fileIDMu.Unlock()
|
||||
}
|
||||
|
||||
func (d *Driver) rememberFIDMapping(fid, objectID string) {
|
||||
fid = strings.TrimSpace(fid)
|
||||
objectID = strings.TrimSpace(objectID)
|
||||
if fid == "" || objectID == "" {
|
||||
return
|
||||
}
|
||||
d.fileIDMu.Lock()
|
||||
if d.fidToID == nil {
|
||||
d.fidToID = make(map[string]string)
|
||||
}
|
||||
d.fidToID[fid] = objectID
|
||||
d.fidToID[objectID] = objectID
|
||||
d.fileIDMu.Unlock()
|
||||
}
|
||||
|
||||
func (d *Driver) cachedDeleteFileID(fileID string) string {
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return ""
|
||||
}
|
||||
d.fileIDMu.RLock()
|
||||
defer d.fileIDMu.RUnlock()
|
||||
return strings.TrimSpace(d.fidToID[fileID])
|
||||
}
|
||||
|
||||
func deleteFileIDFromWopanFile(f *sdk.File, source drives.SourceFile) (string, bool) {
|
||||
if f == nil || f.Type == 0 {
|
||||
return "", false
|
||||
}
|
||||
sourceID := strings.TrimSpace(source.FileID)
|
||||
if sourceID == "" {
|
||||
return "", false
|
||||
}
|
||||
objectID := strings.TrimSpace(f.Id)
|
||||
fid := strings.TrimSpace(f.Fid)
|
||||
if objectID == "" {
|
||||
return "", false
|
||||
}
|
||||
if sourceID != objectID && sourceID != fid {
|
||||
return "", false
|
||||
}
|
||||
return objectID, true
|
||||
}
|
||||
|
||||
func (d *Driver) waitForListSlotLocked(ctx context.Context) error {
|
||||
if d.listInterval <= 0 || d.lastListAt.IsZero() {
|
||||
d.lastListAt = time.Now()
|
||||
return ctx.Err()
|
||||
}
|
||||
next := d.lastListAt.Add(d.listInterval)
|
||||
now := time.Now()
|
||||
if now.Before(next) {
|
||||
if err := sleepContext(ctx, next.Sub(now)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
d.lastListAt = time.Now()
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
func sleepContext(ctx context.Context, d time.Duration) error {
|
||||
if d <= 0 {
|
||||
return ctx.Err()
|
||||
}
|
||||
timer := time.NewTimer(d)
|
||||
defer timer.Stop()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-timer.C:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func wopanRequestError(step string, err error) error {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
wrapped := fmt.Errorf("wopan %s: %w", step, err)
|
||||
if isWopanRateLimitError(err) {
|
||||
return &drives.RateLimitError{
|
||||
Provider: "wopan",
|
||||
Err: wrapped,
|
||||
}
|
||||
}
|
||||
return wrapped
|
||||
}
|
||||
|
||||
func isWopanRateLimitError(err error) bool {
|
||||
if err == nil || errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||
return false
|
||||
}
|
||||
return drives.ErrorMentionsHTTPStatus(err,
|
||||
http.StatusTooManyRequests,
|
||||
http.StatusInternalServerError,
|
||||
http.StatusBadGateway,
|
||||
http.StatusServiceUnavailable,
|
||||
http.StatusGatewayTimeout,
|
||||
509,
|
||||
)
|
||||
}
|
||||
|
||||
func guessMime(name string) string {
|
||||
ext := strings.ToLower(path.Ext(name))
|
||||
switch ext {
|
||||
@@ -229,3 +549,5 @@ func guessMime(name string) string {
|
||||
|
||||
// 确保实现接口
|
||||
var _ drives.Drive = (*Driver)(nil)
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
var _ drives.SourceRemover = (*Driver)(nil)
|
||||
|
||||
@@ -0,0 +1,113 @@
|
||||
package wopan
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
sdk "github.com/OpenListTeam/wopan-sdk-go"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
func TestFileToEntryUsesDirectoryIDAndFileFID(t *testing.T) {
|
||||
dir := fileToEntry(&sdk.File{
|
||||
Id: "dir-object-id",
|
||||
Fid: "0",
|
||||
Type: 0,
|
||||
Name: "collection",
|
||||
}, "root")
|
||||
if !dir.IsDir {
|
||||
t.Fatal("directory entry IsDir = false")
|
||||
}
|
||||
if dir.ID != "dir-object-id" {
|
||||
t.Fatalf("directory id = %q, want object id", dir.ID)
|
||||
}
|
||||
|
||||
file := fileToEntry(&sdk.File{
|
||||
Id: "file-object-id",
|
||||
Fid: "fid/with/slash",
|
||||
Type: 1,
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
}, "dir-object-id")
|
||||
if file.IsDir {
|
||||
t.Fatal("file entry IsDir = true")
|
||||
}
|
||||
if file.ID != "fid/with/slash" {
|
||||
t.Fatalf("file id = %q, want fid for download", file.ID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteFileIDFromWopanFileUsesObjectIDForFID(t *testing.T) {
|
||||
got, ok := deleteFileIDFromWopanFile(&sdk.File{
|
||||
Id: "file-object-id",
|
||||
Fid: "fid/with/slash",
|
||||
Type: 1,
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
}, drives.SourceFile{
|
||||
FileID: "fid/with/slash",
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
})
|
||||
if !ok {
|
||||
t.Fatal("delete file id not resolved")
|
||||
}
|
||||
if got != "file-object-id" {
|
||||
t.Fatalf("delete file id = %q, want object id", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteFileIDFromWopanFileAcceptsObjectID(t *testing.T) {
|
||||
got, ok := deleteFileIDFromWopanFile(&sdk.File{
|
||||
Id: "file-object-id",
|
||||
Fid: "fid-1",
|
||||
Type: 1,
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
}, drives.SourceFile{
|
||||
FileID: "file-object-id",
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
})
|
||||
if !ok {
|
||||
t.Fatal("delete file id not resolved")
|
||||
}
|
||||
if got != "file-object-id" {
|
||||
t.Fatalf("delete file id = %q, want object id", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteFileIDFromWopanFileRejectsIDMismatch(t *testing.T) {
|
||||
if _, ok := deleteFileIDFromWopanFile(&sdk.File{
|
||||
Id: "file-object-id",
|
||||
Fid: "fid-1",
|
||||
Type: 1,
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
}, drives.SourceFile{
|
||||
FileID: "other-fid",
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
}); ok {
|
||||
t.Fatal("delete file id resolved despite id mismatch")
|
||||
}
|
||||
}
|
||||
|
||||
func TestWopanRequestErrorWrapsRateLimit(t *testing.T) {
|
||||
err := wopanRequestError("list", errors.New("request failed with status: 429 Too Many Requests"))
|
||||
var rateLimit *drives.RateLimitError
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want RateLimitError", err)
|
||||
}
|
||||
if rateLimit.Provider != "wopan" {
|
||||
t.Fatalf("provider = %q, want wopan", rateLimit.Provider)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWopanRequestErrorLeavesNormalErrors(t *testing.T) {
|
||||
err := wopanRequestError("download url", errors.New("invalid access token"))
|
||||
var rateLimit *drives.RateLimitError
|
||||
if errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want non-rate-limit error", err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,349 @@
|
||||
package wopan
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-resty/resty/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultQRCodeAPIBase = "https://panservice.mail.wo.cn/wohome/open/v1/QRCode"
|
||||
defaultQRCodeClient = "1001000021"
|
||||
)
|
||||
|
||||
type QRConfig struct {
|
||||
APIBaseURL string
|
||||
HTTPClient *http.Client
|
||||
Now func() time.Time
|
||||
}
|
||||
|
||||
type QRClient struct {
|
||||
apiBase string
|
||||
client *resty.Client
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
type QRCodeSession struct {
|
||||
UUID string `json:"uuid"`
|
||||
QRImageDataURL string `json:"qrImageDataUrl"`
|
||||
ExpiresAt string `json:"expiresAt,omitempty"`
|
||||
}
|
||||
|
||||
type QRCodeStatus struct {
|
||||
State int `json:"state"`
|
||||
StatusText string `json:"statusText"`
|
||||
AccessToken string `json:"accessToken,omitempty"`
|
||||
RefreshToken string `json:"refreshToken,omitempty"`
|
||||
FamilyID string `json:"familyID,omitempty"`
|
||||
}
|
||||
|
||||
func NewQRClient(c QRConfig) *QRClient {
|
||||
apiBase := strings.TrimRight(strings.TrimSpace(c.APIBaseURL), "/")
|
||||
if apiBase == "" {
|
||||
apiBase = defaultQRCodeAPIBase
|
||||
}
|
||||
httpClient := c.HTTPClient
|
||||
if httpClient == nil {
|
||||
httpClient = &http.Client{Timeout: 20 * time.Second}
|
||||
}
|
||||
now := c.Now
|
||||
if now == nil {
|
||||
now = time.Now
|
||||
}
|
||||
return &QRClient{
|
||||
apiBase: apiBase,
|
||||
client: resty.NewWithClient(httpClient).
|
||||
SetTimeout(20*time.Second).
|
||||
SetHeader("Accept", "application/json"),
|
||||
now: now,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *QRClient) Generate(ctx context.Context) (QRCodeSession, error) {
|
||||
var envelope qrEnvelope
|
||||
res, err := c.request(ctx).
|
||||
SetResult(&envelope).
|
||||
Get(c.apiBase + "/generate")
|
||||
if err != nil {
|
||||
return QRCodeSession{}, err
|
||||
}
|
||||
if res.IsError() {
|
||||
return QRCodeSession{}, qrAPIError(envelope.message(), res.StatusCode())
|
||||
}
|
||||
|
||||
var result qrGenerateResult
|
||||
if err := decodeResult(envelope.Result, &result); err != nil {
|
||||
return QRCodeSession{}, err
|
||||
}
|
||||
result.UUID = strings.TrimSpace(result.UUID)
|
||||
result.Image = strings.TrimSpace(result.Image)
|
||||
if result.UUID == "" {
|
||||
return QRCodeSession{}, errors.New("wopan qr: empty uuid")
|
||||
}
|
||||
if result.Image == "" {
|
||||
return QRCodeSession{}, errors.New("wopan qr: empty image")
|
||||
}
|
||||
return QRCodeSession{
|
||||
UUID: result.UUID,
|
||||
QRImageDataURL: qrImageDataURL(result.Image),
|
||||
ExpiresAt: c.now().Add(60 * time.Second).Format(time.RFC3339),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *QRClient) Poll(ctx context.Context, uuid string) (QRCodeStatus, error) {
|
||||
uuid = strings.TrimSpace(uuid)
|
||||
if uuid == "" {
|
||||
return QRCodeStatus{}, errors.New("uuid is required")
|
||||
}
|
||||
|
||||
var envelope qrEnvelope
|
||||
res, err := c.request(ctx).
|
||||
SetQueryParam("uuid", uuid).
|
||||
SetResult(&envelope).
|
||||
Get(c.apiBase + "/query")
|
||||
if err != nil {
|
||||
return QRCodeStatus{}, err
|
||||
}
|
||||
if res.IsError() {
|
||||
return QRCodeStatus{}, qrAPIError(envelope.message(), res.StatusCode())
|
||||
}
|
||||
|
||||
result, err := decodeResultMap(envelope.Result)
|
||||
if err != nil {
|
||||
return QRCodeStatus{}, err
|
||||
}
|
||||
state := intValue(result["state"])
|
||||
status := QRCodeStatus{
|
||||
State: state,
|
||||
StatusText: qrStateText(state),
|
||||
}
|
||||
if state != 3 {
|
||||
return status, nil
|
||||
}
|
||||
|
||||
status.AccessToken = findStringByKeys(result, "access_token", "accessToken", "token", "tokenValue")
|
||||
status.RefreshToken = findStringByKeys(result, "refresh_token", "refreshToken")
|
||||
status.FamilyID = findStringByKeys(result, "family_id", "familyId", "familyID", "defaultFamilyId", "defaultHomeId", "homeId")
|
||||
if status.AccessToken == "" || status.RefreshToken == "" {
|
||||
missing := make([]string, 0, 2)
|
||||
if status.AccessToken == "" {
|
||||
missing = append(missing, "access_token")
|
||||
}
|
||||
if status.RefreshToken == "" {
|
||||
missing = append(missing, "refresh_token")
|
||||
}
|
||||
return QRCodeStatus{}, fmt.Errorf("wopan qr: login succeeded but missing %s; available keys: %s",
|
||||
strings.Join(missing, ", "), strings.Join(collectJSONKeys(result), ", "))
|
||||
}
|
||||
return status, nil
|
||||
}
|
||||
|
||||
func (c *QRClient) request(ctx context.Context) *resty.Request {
|
||||
return c.client.R().
|
||||
SetContext(ctx).
|
||||
SetHeaders(map[string]string{
|
||||
"client-id": defaultQRCodeClient,
|
||||
"x-yp-client-id": defaultQRCodeClient,
|
||||
"Accept": "application/json",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
})
|
||||
}
|
||||
|
||||
type qrEnvelope struct {
|
||||
Meta qrMeta `json:"meta"`
|
||||
Result json.RawMessage `json:"result"`
|
||||
Code any `json:"code,omitempty"`
|
||||
Message string `json:"message,omitempty"`
|
||||
Msg string `json:"msg,omitempty"`
|
||||
}
|
||||
|
||||
type qrMeta struct {
|
||||
Code any `json:"code,omitempty"`
|
||||
Message string `json:"message,omitempty"`
|
||||
Msg string `json:"msg,omitempty"`
|
||||
}
|
||||
|
||||
type qrGenerateResult struct {
|
||||
UUID string `json:"uuid"`
|
||||
Image string `json:"image"`
|
||||
}
|
||||
|
||||
func (e qrEnvelope) message() string {
|
||||
for _, s := range []string{e.Message, e.Msg, e.Meta.Message, e.Meta.Msg} {
|
||||
if strings.TrimSpace(s) != "" {
|
||||
return strings.TrimSpace(s)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func decodeResult(raw json.RawMessage, dst any) error {
|
||||
if len(raw) == 0 || string(raw) == "null" {
|
||||
return errors.New("wopan qr: empty result")
|
||||
}
|
||||
if err := json.Unmarshal(raw, dst); err != nil {
|
||||
return fmt.Errorf("wopan qr: decode result: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func decodeResultMap(raw json.RawMessage) (map[string]any, error) {
|
||||
var result map[string]any
|
||||
if err := decodeResult(raw, &result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if result == nil {
|
||||
return nil, errors.New("wopan qr: empty result")
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func qrImageDataURL(image string) string {
|
||||
image = strings.TrimSpace(image)
|
||||
if strings.HasPrefix(strings.ToLower(image), "data:image/") {
|
||||
return image
|
||||
}
|
||||
return "data:image/png;base64," + image
|
||||
}
|
||||
|
||||
func qrAPIError(message string, httpStatus int) error {
|
||||
message = strings.TrimSpace(message)
|
||||
if message == "" {
|
||||
message = fmt.Sprintf("HTTP %d", httpStatus)
|
||||
}
|
||||
return errors.New(message)
|
||||
}
|
||||
|
||||
func qrStateText(state int) string {
|
||||
switch state {
|
||||
case 1:
|
||||
return "等待扫码"
|
||||
case 2:
|
||||
return "已扫码,请在联通网盘 App 确认"
|
||||
case 3:
|
||||
return "登录成功"
|
||||
case 4:
|
||||
return "二维码已过期"
|
||||
default:
|
||||
return "未知状态"
|
||||
}
|
||||
}
|
||||
|
||||
func intValue(v any) int {
|
||||
switch x := v.(type) {
|
||||
case int:
|
||||
return x
|
||||
case int64:
|
||||
return int(x)
|
||||
case float64:
|
||||
return int(x)
|
||||
case json.Number:
|
||||
n, _ := x.Int64()
|
||||
return int(n)
|
||||
case string:
|
||||
n, _ := strconv.Atoi(strings.TrimSpace(x))
|
||||
return n
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func findStringByKeys(v any, keys ...string) string {
|
||||
targets := make(map[string]struct{}, len(keys))
|
||||
for _, key := range keys {
|
||||
targets[normalizeJSONKey(key)] = struct{}{}
|
||||
}
|
||||
return findStringByNormalizedKeys(v, targets)
|
||||
}
|
||||
|
||||
func findStringByNormalizedKeys(v any, targets map[string]struct{}) string {
|
||||
switch x := v.(type) {
|
||||
case map[string]any:
|
||||
for key, value := range x {
|
||||
if _, ok := targets[normalizeJSONKey(key)]; ok {
|
||||
if s := stringValue(value); s != "" {
|
||||
return s
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, value := range x {
|
||||
if s := findStringByNormalizedKeys(value, targets); s != "" {
|
||||
return s
|
||||
}
|
||||
}
|
||||
case []any:
|
||||
for _, value := range x {
|
||||
if s := findStringByNormalizedKeys(value, targets); s != "" {
|
||||
return s
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func stringValue(v any) string {
|
||||
switch x := v.(type) {
|
||||
case string:
|
||||
return strings.TrimSpace(x)
|
||||
case int:
|
||||
return strconv.Itoa(x)
|
||||
case int64:
|
||||
return strconv.FormatInt(x, 10)
|
||||
case float64:
|
||||
if x == float64(int64(x)) {
|
||||
return strconv.FormatInt(int64(x), 10)
|
||||
}
|
||||
return strconv.FormatFloat(x, 'f', -1, 64)
|
||||
case json.Number:
|
||||
return strings.TrimSpace(x.String())
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeJSONKey(key string) string {
|
||||
key = strings.ToLower(strings.TrimSpace(key))
|
||||
key = strings.ReplaceAll(key, "_", "")
|
||||
key = strings.ReplaceAll(key, "-", "")
|
||||
key = strings.ReplaceAll(key, " ", "")
|
||||
return key
|
||||
}
|
||||
|
||||
func collectJSONKeys(v any) []string {
|
||||
seen := map[string]struct{}{}
|
||||
var walk func(any)
|
||||
walk = func(value any) {
|
||||
switch x := value.(type) {
|
||||
case map[string]any:
|
||||
for key, child := range x {
|
||||
if strings.TrimSpace(key) != "" {
|
||||
seen[key] = struct{}{}
|
||||
}
|
||||
walk(child)
|
||||
}
|
||||
case []any:
|
||||
for _, child := range x {
|
||||
walk(child)
|
||||
}
|
||||
}
|
||||
}
|
||||
walk(v)
|
||||
|
||||
keys := make([]string, 0, len(seen))
|
||||
for key := range seen {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
if len(keys) > 16 {
|
||||
keys = append(keys[:16], "...")
|
||||
}
|
||||
return keys
|
||||
}
|
||||
@@ -0,0 +1,128 @@
|
||||
package wopan
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestQRCodeGenerateUsesServiceImage(t *testing.T) {
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if r.URL.Path != "/QRCode/generate" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
if r.Header.Get("client-id") != defaultQRCodeClient {
|
||||
t.Fatalf("client-id = %q, want %q", r.Header.Get("client-id"), defaultQRCodeClient)
|
||||
}
|
||||
if r.Header.Get("x-yp-client-id") != defaultQRCodeClient {
|
||||
t.Fatalf("x-yp-client-id = %q, want %q", r.Header.Get("x-yp-client-id"), defaultQRCodeClient)
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"meta": map[string]string{"code": "0000", "message": "ok"},
|
||||
"result": map[string]string{
|
||||
"uuid": "uuid-1",
|
||||
"image": "iVBORw0KGgo=",
|
||||
},
|
||||
})
|
||||
}))
|
||||
t.Cleanup(api.Close)
|
||||
|
||||
got, err := NewQRClient(QRConfig{APIBaseURL: api.URL + "/QRCode"}).Generate(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("Generate() error = %v", err)
|
||||
}
|
||||
if got.UUID != "uuid-1" {
|
||||
t.Fatalf("uuid = %q, want uuid-1", got.UUID)
|
||||
}
|
||||
if got.QRImageDataURL != "data:image/png;base64,iVBORw0KGgo=" {
|
||||
t.Fatalf("qrImageDataUrl = %q, want PNG data URL", got.QRImageDataURL)
|
||||
}
|
||||
if got.ExpiresAt == "" {
|
||||
t.Fatalf("expiresAt is empty")
|
||||
}
|
||||
}
|
||||
|
||||
func TestQRCodePollPending(t *testing.T) {
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if r.URL.Path != "/QRCode/query" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
if r.URL.Query().Get("uuid") != "uuid-1" {
|
||||
t.Fatalf("uuid query = %q, want uuid-1", r.URL.Query().Get("uuid"))
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"meta": map[string]string{"code": "0000", "message": "ok"},
|
||||
"result": map[string]any{
|
||||
"state": 1,
|
||||
"token": nil,
|
||||
"refreshToken": nil,
|
||||
},
|
||||
})
|
||||
}))
|
||||
t.Cleanup(api.Close)
|
||||
|
||||
got, err := NewQRClient(QRConfig{APIBaseURL: api.URL + "/QRCode"}).Poll(context.Background(), "uuid-1")
|
||||
if err != nil {
|
||||
t.Fatalf("Poll() error = %v", err)
|
||||
}
|
||||
if got.State != 1 || got.StatusText != "等待扫码" || got.AccessToken != "" || got.RefreshToken != "" {
|
||||
t.Fatalf("status = %#v, want pending without tokens", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQRCodePollSuccessMapsTokenFields(t *testing.T) {
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if r.URL.Path != "/QRCode/query" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"meta": map[string]string{"code": "0000", "message": "ok"},
|
||||
"result": map[string]any{
|
||||
"state": 3,
|
||||
"token": "access-1",
|
||||
"refreshToken": "refresh-1",
|
||||
},
|
||||
})
|
||||
}))
|
||||
t.Cleanup(api.Close)
|
||||
|
||||
got, err := NewQRClient(QRConfig{APIBaseURL: api.URL + "/QRCode"}).Poll(context.Background(), "uuid-1")
|
||||
if err != nil {
|
||||
t.Fatalf("Poll() error = %v", err)
|
||||
}
|
||||
if got.State != 3 || got.AccessToken != "access-1" || got.RefreshToken != "refresh-1" {
|
||||
t.Fatalf("status = %#v, want token and refreshToken mapped", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQRCodePollSuccessReportsMissingTokenKeys(t *testing.T) {
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"meta": map[string]string{"code": "0000", "message": "ok"},
|
||||
"result": map[string]any{
|
||||
"state": 3,
|
||||
"user": map[string]string{"name": "demo"},
|
||||
},
|
||||
})
|
||||
}))
|
||||
t.Cleanup(api.Close)
|
||||
|
||||
_, err := NewQRClient(QRConfig{APIBaseURL: api.URL + "/QRCode"}).Poll(context.Background(), "uuid-1")
|
||||
if err == nil {
|
||||
t.Fatal("Poll() error is nil, want missing token error")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "missing access_token, refresh_token") ||
|
||||
!strings.Contains(err.Error(), "available keys") {
|
||||
t.Fatalf("error = %q, want missing token keys", err.Error())
|
||||
}
|
||||
}
|
||||
@@ -149,6 +149,28 @@ func (w *Worker) Status() TaskStatus {
|
||||
return status
|
||||
}
|
||||
|
||||
// WaitIdle blocks until the fingerprint queue is empty and no item is being processed.
|
||||
func (w *Worker) WaitIdle(ctx context.Context) error {
|
||||
if w == nil {
|
||||
return nil
|
||||
}
|
||||
if w.queue.lengthExcluding("") == 0 {
|
||||
return nil
|
||||
}
|
||||
ticker := time.NewTicker(200 * time.Millisecond)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-ticker.C:
|
||||
if w.queue.lengthExcluding("") == 0 {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Worker) processQueued(ctx context.Context, v *catalog.Video) {
|
||||
defer w.queue.release(v.ID)
|
||||
if w.Catalog == nil || w.Drive == nil || v == nil || v.ID == "" {
|
||||
@@ -327,11 +349,74 @@ func readHTTPRange(ctx context.Context, hc *http.Client, link *drives.StreamLink
|
||||
return data, nil
|
||||
}
|
||||
}
|
||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024))
|
||||
if remoteRangeResponseLooksRateLimited(link.URL, resp.StatusCode, body) {
|
||||
return nil, &drives.RateLimitError{
|
||||
Provider: "fingerprint",
|
||||
RetryAfter: parseRetryAfter(resp.Header.Get("Retry-After")),
|
||||
Err: fmt.Errorf("remote sample rate limited: status=%d body=%s", resp.StatusCode, strings.TrimSpace(string(body))),
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("fingerprint: range request got status=%d for bytes=%d-%d", resp.StatusCode, r.start, end)
|
||||
}
|
||||
return io.ReadAll(io.LimitReader(resp.Body, r.length))
|
||||
}
|
||||
|
||||
func remoteRangeResponseLooksRateLimited(rawURL string, status int, body []byte) bool {
|
||||
if status == http.StatusTooManyRequests {
|
||||
return true
|
||||
}
|
||||
if isWopanMediaURL(rawURL) && (status == http.StatusForbidden || status == http.StatusTooManyRequests ||
|
||||
status == http.StatusInternalServerError || status == http.StatusBadGateway ||
|
||||
status == http.StatusServiceUnavailable || status == http.StatusGatewayTimeout ||
|
||||
status == 509) {
|
||||
return true
|
||||
}
|
||||
if isGuangYaPanMediaURL(rawURL) && (status == http.StatusForbidden || status == http.StatusTooManyRequests ||
|
||||
status == http.StatusInternalServerError || status == http.StatusBadGateway ||
|
||||
status == http.StatusServiceUnavailable || status == http.StatusGatewayTimeout ||
|
||||
status == 509) {
|
||||
return true
|
||||
}
|
||||
if status == http.StatusForbidden && isGoogleDriveMediaURL(rawURL) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isWopanMediaURL(rawURL string) bool {
|
||||
u, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
host := strings.ToLower(u.Hostname())
|
||||
path := strings.ToLower(u.Path)
|
||||
return (strings.HasSuffix(host, "pan.wo.cn") ||
|
||||
strings.HasSuffix(host, "smartont.net") ||
|
||||
strings.Contains(host, "wo.cn")) &&
|
||||
strings.Contains(path, "/openapi/download")
|
||||
}
|
||||
|
||||
func isGuangYaPanMediaURL(rawURL string) bool {
|
||||
u, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
host := strings.ToLower(u.Hostname())
|
||||
return strings.HasSuffix(host, "guangyacdn.com") ||
|
||||
strings.HasSuffix(host, "guangyapan.com")
|
||||
}
|
||||
|
||||
func isGoogleDriveMediaURL(rawURL string) bool {
|
||||
u, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
host := strings.ToLower(u.Host)
|
||||
path := strings.ToLower(u.Path)
|
||||
return strings.Contains(host, "googleapis.com") && strings.Contains(path, "/drive/")
|
||||
}
|
||||
|
||||
func parseRetryAfter(raw string) time.Duration {
|
||||
raw = strings.TrimSpace(raw)
|
||||
if raw == "" {
|
||||
|
||||
@@ -2,6 +2,7 @@ package fingerprint
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
@@ -85,6 +86,75 @@ func TestComputeRemoteUsesRangeSamples(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeRemote429ReturnsRateLimit(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Retry-After", "60")
|
||||
w.WriteHeader(http.StatusTooManyRequests)
|
||||
_, _ = w.Write([]byte(`{"error":{"code":429}}`))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
drv := &fakeDrive{paths: map[string]string{"remote": srv.URL + "/video.mp4"}}
|
||||
_, err := Compute(ctx, drv, &catalog.Video{ID: "remote", FileID: "remote", Size: 1024 * 1024}, Config{
|
||||
SampleSizeBytes: 4,
|
||||
FullHashMaxSize: 8,
|
||||
HTTPClient: srv.Client(),
|
||||
}, srv.Client())
|
||||
if err == nil {
|
||||
t.Fatal("compute succeeded, want rate limit")
|
||||
}
|
||||
var rateLimit *drives.RateLimitError
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want RateLimitError", err)
|
||||
}
|
||||
if rateLimit.RetryAfter != time.Minute {
|
||||
t.Fatalf("retry after = %s, want 1m", rateLimit.RetryAfter)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWopanRemoteRangeErrorsLookRateLimited(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
rawURL string
|
||||
status int
|
||||
}{
|
||||
{rawURL: "https://gxdownload.pan.wo.cn:8445/openapi/download?fid=encoded", status: http.StatusForbidden},
|
||||
{rawURL: "https://du.smartont.net:8445/openapi/download?fid=encoded", status: http.StatusServiceUnavailable},
|
||||
{rawURL: "https://du.smartont.net:8445/openapi/download?fid=encoded", status: 509},
|
||||
} {
|
||||
if !remoteRangeResponseLooksRateLimited(tc.rawURL, tc.status, nil) {
|
||||
t.Fatalf("remoteRangeResponseLooksRateLimited(%q, %d) = false, want true", tc.rawURL, tc.status)
|
||||
}
|
||||
}
|
||||
if remoteRangeResponseLooksRateLimited("https://example.com/video.mp4", http.StatusForbidden, nil) {
|
||||
t.Fatal("generic 403 should not be treated as wopan rate limit")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGuangYaPanRemoteRangeErrorsLookRateLimited(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
rawURL string
|
||||
status int
|
||||
}{
|
||||
{rawURL: "https://txgz02-httpdown.guangyacdn.com/download/?fid=encoded", status: http.StatusForbidden},
|
||||
{rawURL: "https://txgz02-httpdown.guangyacdn.com/download/?fid=encoded", status: http.StatusServiceUnavailable},
|
||||
{rawURL: "https://txgz02-httpdown.guangyacdn.com/download/?fid=encoded", status: 509},
|
||||
} {
|
||||
if !remoteRangeResponseLooksRateLimited(tc.rawURL, tc.status, nil) {
|
||||
t.Fatalf("remoteRangeResponseLooksRateLimited(%q, %d) = false, want true", tc.rawURL, tc.status)
|
||||
}
|
||||
}
|
||||
if remoteRangeResponseLooksRateLimited("https://example.com/video.mp4", http.StatusForbidden, nil) {
|
||||
t.Fatal("generic 403 should not be treated as guangyapan rate limit")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGoogleDriveRemoteRangeForbiddenLooksRateLimitedByURL(t *testing.T) {
|
||||
if !remoteRangeResponseLooksRateLimited("https://www.googleapis.com/drive/v3/files/file-1?alt=media", http.StatusForbidden, nil) {
|
||||
t.Fatal("google drive media 403 should be treated as rate limit by URL and status")
|
||||
}
|
||||
}
|
||||
|
||||
type fakeDrive struct {
|
||||
paths map[string]string
|
||||
}
|
||||
|
||||
@@ -0,0 +1,280 @@
|
||||
package mediasim
|
||||
|
||||
import (
|
||||
"image"
|
||||
_ "image/gif"
|
||||
_ "image/jpeg"
|
||||
_ "image/png"
|
||||
"math"
|
||||
"os"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
const (
|
||||
ssimSampleSize = 96
|
||||
minCoreTitleRunes = 12
|
||||
)
|
||||
|
||||
var titleCoreSeparators = []string{
|
||||
" - ",
|
||||
" -- ",
|
||||
" — ",
|
||||
" – ",
|
||||
" | ",
|
||||
" | ",
|
||||
"_",
|
||||
"_",
|
||||
"-",
|
||||
"—",
|
||||
"–",
|
||||
"-",
|
||||
"|",
|
||||
}
|
||||
|
||||
// TitleSimilarity returns the best normalized Levenshtein similarity in [0, 1]
|
||||
// between the full titles and their leading core title segments.
|
||||
func TitleSimilarity(a, b string) float64 {
|
||||
leftVariants := titleVariants(a)
|
||||
rightVariants := titleVariants(b)
|
||||
if len(leftVariants) == 0 && len(rightVariants) == 0 {
|
||||
return 1
|
||||
}
|
||||
if len(leftVariants) == 0 || len(rightVariants) == 0 {
|
||||
return 0
|
||||
}
|
||||
best := 0.0
|
||||
for _, left := range leftVariants {
|
||||
for _, right := range rightVariants {
|
||||
score := normalizedLevenshteinSimilarity(left, right)
|
||||
if score > best {
|
||||
best = score
|
||||
}
|
||||
}
|
||||
}
|
||||
return best
|
||||
}
|
||||
|
||||
// TitleKeys returns the normalized full title and core-title variants used by
|
||||
// TitleSimilarity. It is intended for cheap caller-side prefiltering before
|
||||
// running the heavier Levenshtein comparison.
|
||||
func TitleKeys(value string) []string {
|
||||
return append([]string(nil), titleVariants(value)...)
|
||||
}
|
||||
|
||||
func normalizedLevenshteinSimilarity(left, right string) float64 {
|
||||
leftRunes := []rune(left)
|
||||
rightRunes := []rune(right)
|
||||
if len(leftRunes) == 0 && len(rightRunes) == 0 {
|
||||
return 1
|
||||
}
|
||||
if len(leftRunes) == 0 || len(rightRunes) == 0 {
|
||||
return 0
|
||||
}
|
||||
maxLen := len(leftRunes)
|
||||
if len(rightRunes) > maxLen {
|
||||
maxLen = len(rightRunes)
|
||||
}
|
||||
return 1 - float64(levenshtein(leftRunes, rightRunes))/float64(maxLen)
|
||||
}
|
||||
|
||||
func titleVariants(value string) []string {
|
||||
full := normalizeTitle(value)
|
||||
if full == "" {
|
||||
return nil
|
||||
}
|
||||
out := appendTitleVariant(nil, full)
|
||||
if core := normalizeTitleCore(value); core != "" && core != full {
|
||||
out = appendTitleVariant(out, core)
|
||||
}
|
||||
for _, tail := range titleTailVariants(value) {
|
||||
normalized := normalizeTitle(tail)
|
||||
if len([]rune(normalized)) >= minCoreTitleRunes {
|
||||
out = appendTitleVariant(out, normalized)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func appendTitleVariant(out []string, value string) []string {
|
||||
for _, existing := range out {
|
||||
if existing == value {
|
||||
return out
|
||||
}
|
||||
}
|
||||
return append(out, value)
|
||||
}
|
||||
|
||||
func titleTailVariants(value string) []string {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" {
|
||||
return nil
|
||||
}
|
||||
var out []string
|
||||
for _, sep := range []string{"@", "@"} {
|
||||
if idx := strings.LastIndex(value, sep); idx >= 0 && idx+len(sep) < len(value) {
|
||||
out = append(out, strings.TrimSpace(value[idx+len(sep):]))
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func normalizeTitleCore(value string) string {
|
||||
head := strings.TrimSpace(value)
|
||||
for _, sep := range titleCoreSeparators {
|
||||
if idx := strings.Index(head, sep); idx > 0 {
|
||||
head = strings.TrimSpace(head[:idx])
|
||||
break
|
||||
}
|
||||
}
|
||||
normalized := normalizeTitle(head)
|
||||
if len([]rune(normalized)) < minCoreTitleRunes {
|
||||
return ""
|
||||
}
|
||||
return normalized
|
||||
}
|
||||
|
||||
func normalizeTitle(value string) string {
|
||||
value = strings.ToLower(strings.TrimSpace(value))
|
||||
for _, ext := range []string{".mp4", ".m4v", ".mkv", ".mov", ".avi", ".webm", ".ts", ".m3u8"} {
|
||||
if strings.HasSuffix(value, ext) {
|
||||
value = strings.TrimSuffix(value, ext)
|
||||
break
|
||||
}
|
||||
}
|
||||
var b strings.Builder
|
||||
for _, r := range value {
|
||||
if unicode.IsLetter(r) || unicode.IsDigit(r) {
|
||||
b.WriteRune(r)
|
||||
}
|
||||
}
|
||||
if b.Len() > 0 {
|
||||
return b.String()
|
||||
}
|
||||
return strings.Join(strings.Fields(value), "")
|
||||
}
|
||||
|
||||
func levenshtein(a, b []rune) int {
|
||||
if len(a) < len(b) {
|
||||
a, b = b, a
|
||||
}
|
||||
previous := make([]int, len(b)+1)
|
||||
current := make([]int, len(b)+1)
|
||||
for j := range previous {
|
||||
previous[j] = j
|
||||
}
|
||||
for i := 1; i <= len(a); i++ {
|
||||
current[0] = i
|
||||
for j := 1; j <= len(b); j++ {
|
||||
cost := 0
|
||||
if a[i-1] != b[j-1] {
|
||||
cost = 1
|
||||
}
|
||||
current[j] = minInt(
|
||||
previous[j]+1,
|
||||
current[j-1]+1,
|
||||
previous[j-1]+cost,
|
||||
)
|
||||
}
|
||||
previous, current = current, previous
|
||||
}
|
||||
return previous[len(b)]
|
||||
}
|
||||
|
||||
func minInt(values ...int) int {
|
||||
min := values[0]
|
||||
for _, value := range values[1:] {
|
||||
if value < min {
|
||||
min = value
|
||||
}
|
||||
}
|
||||
return min
|
||||
}
|
||||
|
||||
// ImageSSIM compares two local images using luminance SSIM over a fixed grid.
|
||||
func ImageSSIM(leftPath, rightPath string) (float64, error) {
|
||||
left, err := decodeImage(leftPath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
right, err := decodeImage(rightPath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return SSIM(left, right), nil
|
||||
}
|
||||
|
||||
func decodeImage(path string) (image.Image, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
img, _, err := image.Decode(f)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return img, nil
|
||||
}
|
||||
|
||||
// SSIM compares two images after nearest-neighbor sampling onto the same grid.
|
||||
func SSIM(left, right image.Image) float64 {
|
||||
if left == nil || right == nil {
|
||||
return 0
|
||||
}
|
||||
leftSamples := grayscaleSamples(left, ssimSampleSize, ssimSampleSize)
|
||||
rightSamples := grayscaleSamples(right, ssimSampleSize, ssimSampleSize)
|
||||
if len(leftSamples) == 0 || len(leftSamples) != len(rightSamples) {
|
||||
return 0
|
||||
}
|
||||
|
||||
var leftMean, rightMean float64
|
||||
for i := range leftSamples {
|
||||
leftMean += leftSamples[i]
|
||||
rightMean += rightSamples[i]
|
||||
}
|
||||
n := float64(len(leftSamples))
|
||||
leftMean /= n
|
||||
rightMean /= n
|
||||
|
||||
var leftVariance, rightVariance, covariance float64
|
||||
for i := range leftSamples {
|
||||
leftDelta := leftSamples[i] - leftMean
|
||||
rightDelta := rightSamples[i] - rightMean
|
||||
leftVariance += leftDelta * leftDelta
|
||||
rightVariance += rightDelta * rightDelta
|
||||
covariance += leftDelta * rightDelta
|
||||
}
|
||||
leftVariance /= n
|
||||
rightVariance /= n
|
||||
covariance /= n
|
||||
|
||||
const c1 = 6.5025 // (0.01 * 255)^2
|
||||
const c2 = 58.5225 // (0.03 * 255)^2
|
||||
denominator := (leftMean*leftMean + rightMean*rightMean + c1) * (leftVariance + rightVariance + c2)
|
||||
if denominator == 0 {
|
||||
return 0
|
||||
}
|
||||
score := ((2*leftMean*rightMean + c1) * (2*covariance + c2)) / denominator
|
||||
if math.IsNaN(score) || math.IsInf(score, 0) {
|
||||
return 0
|
||||
}
|
||||
return score
|
||||
}
|
||||
|
||||
func grayscaleSamples(img image.Image, width, height int) []float64 {
|
||||
bounds := img.Bounds()
|
||||
if bounds.Dx() <= 0 || bounds.Dy() <= 0 || width <= 0 || height <= 0 {
|
||||
return nil
|
||||
}
|
||||
out := make([]float64, 0, width*height)
|
||||
for y := 0; y < height; y++ {
|
||||
sourceY := bounds.Min.Y + y*bounds.Dy()/height
|
||||
for x := 0; x < width; x++ {
|
||||
sourceX := bounds.Min.X + x*bounds.Dx()/width
|
||||
r, g, b, _ := img.At(sourceX, sourceY).RGBA()
|
||||
out = append(out, 0.299*float64(r>>8)+0.587*float64(g>>8)+0.114*float64(b>>8))
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
package mediasim
|
||||
|
||||
import (
|
||||
"image"
|
||||
"image/color"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTitleSimilarityNormalizesPunctuationAndWhitespace(t *testing.T) {
|
||||
score := TitleSimilarity("AB-123 测试视频.mp4", "ab123测试视频")
|
||||
if score < 0.90 {
|
||||
t.Fatalf("similarity = %.3f, want >= 0.90", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTitleSimilarityUsesLeadingCoreTitle(t *testing.T) {
|
||||
score := TitleSimilarity(
|
||||
"反差极品大二女友,叫声可射~,“射进小骚逼里面~” - 性感小皮鞭",
|
||||
"反差极品大二女友,叫声可射~,“射进小骚逼里面~”",
|
||||
)
|
||||
if score < 0.99 {
|
||||
t.Fatalf("similarity = %.3f, want core-title match", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTitleSimilarityDoesNotMatchBySharedSuffixOnly(t *testing.T) {
|
||||
score := TitleSimilarity(
|
||||
"高颜值大学生宿舍自拍视频完整流出 - 同一个来源",
|
||||
"户外旅行风景记录城市夜景合集 - 同一个来源",
|
||||
)
|
||||
if score >= 0.90 {
|
||||
t.Fatalf("similarity = %.3f, want < 0.90", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTitleSimilarityRejectsDifferentTitles(t *testing.T) {
|
||||
score := TitleSimilarity("完全不同的视频标题", "another unrelated movie")
|
||||
if score >= 0.90 {
|
||||
t.Fatalf("similarity = %.3f, want < 0.90", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSSIMScoresIdenticalAndDifferentImages(t *testing.T) {
|
||||
red := solidImage(color.RGBA{R: 220, G: 20, B: 20, A: 255})
|
||||
redAgain := solidImage(color.RGBA{R: 220, G: 20, B: 20, A: 255})
|
||||
blue := solidImage(color.RGBA{R: 20, G: 20, B: 220, A: 255})
|
||||
|
||||
if score := SSIM(red, redAgain); score < 0.999 {
|
||||
t.Fatalf("identical SSIM = %.6f, want close to 1", score)
|
||||
}
|
||||
if score := SSIM(red, blue); score >= 0.95 {
|
||||
t.Fatalf("different SSIM = %.6f, want < 0.95", score)
|
||||
}
|
||||
}
|
||||
|
||||
func solidImage(c color.RGBA) image.Image {
|
||||
img := image.NewRGBA(image.Rect(0, 0, 32, 32))
|
||||
for y := 0; y < 32; y++ {
|
||||
for x := 0; x < 32; x++ {
|
||||
img.SetRGBA(x, y, c)
|
||||
}
|
||||
}
|
||||
return img
|
||||
}
|
||||
@@ -1,19 +1,19 @@
|
||||
// Package nightly orchestrates the single nightly maintenance pipeline that
|
||||
// replaces the legacy scanLoop / crawlerLoop / spider91 migrator periodic loop.
|
||||
// replaces the legacy scanLoop / crawlerLoop / crawler upload periodic loop.
|
||||
//
|
||||
// Pipeline (fired once per day at cron_hour, also via TriggerNow for admin
|
||||
// "扫描所有网盘"):
|
||||
//
|
||||
// Phase 1: for each non-spider91 cloud drive
|
||||
// Phase 1: for each non-crawler cloud drive
|
||||
// scan + delete-detection + enqueue thumb + enqueue preview video
|
||||
// wait until all thumb / preview-video queues are idle
|
||||
// Phase 2: if any spider91 drive configured
|
||||
// Phase 2: if any script crawler configured
|
||||
// crawl + enqueue preview video for new videos
|
||||
// wait until preview-video queues are idle
|
||||
// Phase 3: spider91 → cloud migration (single sweep, captcha cooldown still
|
||||
// Phase 3: crawler local video → cloud upload (single sweep, captcha cooldown still
|
||||
// honored within this call)
|
||||
// Phase 4: cleanup duplicate local preview/thumbnail assets after sampled
|
||||
// fingerprints have identified canonical videos
|
||||
// Phase 4: full-library duplicate video maintenance:
|
||||
// exact size+sampled_sha256 dedupe, then title/duration/thumbnail dedupe
|
||||
//
|
||||
// A 6h soft deadline guards each pipeline run; phases check deadline at their
|
||||
// boundaries and exit cleanly if exceeded (no in-flight ffmpeg / upload is
|
||||
@@ -64,32 +64,32 @@ type Config struct {
|
||||
MaxDuration time.Duration
|
||||
|
||||
// ListScanTargets returns the drive IDs to run Phase 1 on, in deterministic
|
||||
// order. Should exclude spider91 and localupload drives.
|
||||
// order. Should exclude crawler and localupload drives.
|
||||
ListScanTargets func(ctx context.Context) []string
|
||||
|
||||
// RunScan synchronously runs scan + cleanup + enqueueDriveGeneration for
|
||||
// one drive. Errors are expected to be logged inside, not surfaced.
|
||||
RunScan func(ctx context.Context, driveID string)
|
||||
|
||||
// ListSpider91Drives returns spider91 drive IDs to crawl in Phase 2.
|
||||
// Returns empty slice when no spider91 drive is configured.
|
||||
ListSpider91Drives func(ctx context.Context) []string
|
||||
// ListCrawlerDrives returns script crawler drive IDs to crawl in Phase 2.
|
||||
// Returns empty slice when no crawler is configured.
|
||||
ListCrawlerDrives func(ctx context.Context) []string
|
||||
|
||||
// RunSpider91Crawl synchronously runs one crawl cycle (downloads + thumbs +
|
||||
// preview-video enqueue) for a single spider91 drive.
|
||||
RunSpider91Crawl func(ctx context.Context, driveID string)
|
||||
// RunCrawlerCrawl synchronously runs one crawl cycle (downloads + thumbs +
|
||||
// preview-video enqueue) for a single crawler drive.
|
||||
RunCrawlerCrawl func(ctx context.Context, driveID string)
|
||||
|
||||
// WaitPreviewQueuesIdle blocks until both the thumbnail and preview-video queues
|
||||
// across all drives are drained (queue empty + no in-flight task). It must
|
||||
// honor ctx cancellation.
|
||||
WaitPreviewQueuesIdle func(ctx context.Context) error
|
||||
|
||||
// RunMigration runs spider91migrate.Migrator.RunOnce for Phase 3.
|
||||
// RunMigration runs crawlerupload.Migrator.RunOnce for Phase 3.
|
||||
RunMigration func(ctx context.Context) error
|
||||
|
||||
// RunDedupeAssetCleanup removes generated local assets from non-canonical
|
||||
// videos in size+sampled_sha256 duplicate groups. It must not delete cloud
|
||||
// files or catalog rows.
|
||||
// RunDedupeAssetCleanup runs full-library duplicate video maintenance. It
|
||||
// removes duplicate catalog rows and local generated assets, but never
|
||||
// deletes cloud source files.
|
||||
RunDedupeAssetCleanup func(ctx context.Context) error
|
||||
|
||||
// Now is injected for tests; nil → time.Now.
|
||||
@@ -351,23 +351,23 @@ func (r *Runner) runPipeline(ctx context.Context) {
|
||||
if r.checkDeadline(ctx, "phase 2") {
|
||||
return
|
||||
}
|
||||
spiderIDs := []string{}
|
||||
if r.cfg.ListSpider91Drives != nil {
|
||||
spiderIDs = r.cfg.ListSpider91Drives(ctx)
|
||||
crawlerIDs := []string{}
|
||||
if r.cfg.ListCrawlerDrives != nil {
|
||||
crawlerIDs = r.cfg.ListCrawlerDrives(ctx)
|
||||
}
|
||||
if len(spiderIDs) == 0 {
|
||||
log.Printf("[nightly] phase 2/3 skipped: no spider91 drive configured")
|
||||
if len(crawlerIDs) == 0 {
|
||||
log.Printf("[nightly] phase 2/3 skipped: no crawler configured")
|
||||
r.runDedupeAssetCleanupPhase(ctx)
|
||||
return
|
||||
}
|
||||
log.Printf("[nightly] phase 2: crawling %d spider91 drive(s)", len(spiderIDs))
|
||||
for _, id := range spiderIDs {
|
||||
log.Printf("[nightly] phase 2: crawling %d crawler drive(s)", len(crawlerIDs))
|
||||
for _, id := range crawlerIDs {
|
||||
if ctx.Err() != nil {
|
||||
log.Printf("[nightly] phase 2 aborted by ctx: %v", ctx.Err())
|
||||
return
|
||||
}
|
||||
log.Printf("[nightly] phase 2: crawling drive=%s", id)
|
||||
r.cfg.RunSpider91Crawl(ctx, id)
|
||||
r.cfg.RunCrawlerCrawl(ctx, id)
|
||||
}
|
||||
log.Printf("[nightly] phase 2: waiting for teaser queue to drain")
|
||||
if err := r.waitIdle(ctx, "phase 2"); err != nil {
|
||||
@@ -378,7 +378,7 @@ func (r *Runner) runPipeline(ctx context.Context) {
|
||||
if r.checkDeadline(ctx, "phase 3") {
|
||||
return
|
||||
}
|
||||
log.Printf("[nightly] phase 3: spider91 migration")
|
||||
log.Printf("[nightly] phase 3: crawler upload")
|
||||
if r.cfg.RunMigration != nil {
|
||||
if err := r.cfg.RunMigration(ctx); err != nil {
|
||||
log.Printf("[nightly] phase 3 migration: %v", err)
|
||||
@@ -418,9 +418,9 @@ func (r *Runner) runDedupeAssetCleanupPhase(ctx context.Context) {
|
||||
if r.cfg.RunDedupeAssetCleanup == nil {
|
||||
return
|
||||
}
|
||||
log.Printf("[nightly] phase 4: duplicate asset cleanup")
|
||||
log.Printf("[nightly] phase 4: duplicate video maintenance")
|
||||
if err := r.cfg.RunDedupeAssetCleanup(ctx); err != nil {
|
||||
log.Printf("[nightly] phase 4 duplicate asset cleanup: %v", err)
|
||||
log.Printf("[nightly] phase 4 duplicate video maintenance: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -99,11 +99,11 @@ func TestRunPipelineHonoursPhaseOrder(t *testing.T) {
|
||||
RunScan: func(_ context.Context, id string) {
|
||||
rec.push("scan:" + id)
|
||||
},
|
||||
ListSpider91Drives: func(context.Context) []string {
|
||||
rec.push("list-spider")
|
||||
ListCrawlerDrives: func(context.Context) []string {
|
||||
rec.push("list-crawler")
|
||||
return []string{"sp-1"}
|
||||
},
|
||||
RunSpider91Crawl: func(_ context.Context, id string) {
|
||||
RunCrawlerCrawl: func(_ context.Context, id string) {
|
||||
rec.push("crawl:" + id)
|
||||
},
|
||||
WaitPreviewQueuesIdle: func(context.Context) error {
|
||||
@@ -128,7 +128,7 @@ func TestRunPipelineHonoursPhaseOrder(t *testing.T) {
|
||||
"scan:drive-a",
|
||||
"scan:drive-b",
|
||||
"wait-idle", // after phase 1
|
||||
"list-spider",
|
||||
"list-crawler",
|
||||
"crawl:sp-1",
|
||||
"wait-idle", // after phase 2
|
||||
"migrate",
|
||||
@@ -144,15 +144,15 @@ func TestRunPipelineHonoursPhaseOrder(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunPipelineSkipsMigrationWhenNoSpider91(t *testing.T) {
|
||||
func TestRunPipelineSkipsMigrationWhenNoCrawler(t *testing.T) {
|
||||
rec := &recorder{}
|
||||
|
||||
r := New(Config{
|
||||
Settings: newStubSettings(),
|
||||
ListScanTargets: func(context.Context) []string { return []string{"drive-a"} },
|
||||
RunScan: func(_ context.Context, id string) { rec.push("scan:" + id) },
|
||||
ListSpider91Drives: func(context.Context) []string { return nil },
|
||||
RunSpider91Crawl: func(_ context.Context, id string) { rec.push("crawl:" + id) },
|
||||
Settings: newStubSettings(),
|
||||
ListScanTargets: func(context.Context) []string { return []string{"drive-a"} },
|
||||
RunScan: func(_ context.Context, id string) { rec.push("scan:" + id) },
|
||||
ListCrawlerDrives: func(context.Context) []string { return nil },
|
||||
RunCrawlerCrawl: func(_ context.Context, id string) { rec.push("crawl:" + id) },
|
||||
WaitPreviewQueuesIdle: func(context.Context) error {
|
||||
rec.push("wait-idle")
|
||||
return nil
|
||||
@@ -171,7 +171,7 @@ func TestRunPipelineSkipsMigrationWhenNoSpider91(t *testing.T) {
|
||||
|
||||
for _, c := range rec.snapshot() {
|
||||
if c == "migrate" || c == "crawl:sp-1" {
|
||||
t.Fatalf("phase 2/3 should be skipped when no spider91 drive, got call %q", c)
|
||||
t.Fatalf("phase 2/3 should be skipped when no crawler, got call %q", c)
|
||||
}
|
||||
}
|
||||
foundCleanup := false
|
||||
@@ -181,7 +181,7 @@ func TestRunPipelineSkipsMigrationWhenNoSpider91(t *testing.T) {
|
||||
}
|
||||
}
|
||||
if !foundCleanup {
|
||||
t.Fatalf("dedupe cleanup should still run when spider91 is absent; calls=%v", rec.snapshot())
|
||||
t.Fatalf("dedupe cleanup should still run when crawler is absent; calls=%v", rec.snapshot())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -200,8 +200,8 @@ func TestRunPipelineExitsWhenContextCancelledMidPhase(t *testing.T) {
|
||||
cancel()
|
||||
}
|
||||
},
|
||||
ListSpider91Drives: func(context.Context) []string { return []string{"x"} },
|
||||
RunSpider91Crawl: func(context.Context, string) { rec.push("crawl") },
|
||||
ListCrawlerDrives: func(context.Context) []string { return []string{"x"} },
|
||||
RunCrawlerCrawl: func(context.Context, string) { rec.push("crawl") },
|
||||
WaitPreviewQueuesIdle: func(context.Context) error { rec.push("wait-idle"); return nil },
|
||||
RunMigration: func(context.Context) error { rec.push("migrate"); return nil },
|
||||
RunDedupeAssetCleanup: func(context.Context) error { rec.push("dedupe-cleanup"); return nil },
|
||||
@@ -289,12 +289,12 @@ func TestCtxCancelPreventsLaterPhases(t *testing.T) {
|
||||
WaitPreviewQueuesIdle: func(ctx context.Context) error {
|
||||
return ctx.Err()
|
||||
},
|
||||
ListSpider91Drives: func(context.Context) []string {
|
||||
rec.push("list-spider")
|
||||
ListCrawlerDrives: func(context.Context) []string {
|
||||
rec.push("list-crawler")
|
||||
return []string{"x"}
|
||||
},
|
||||
RunSpider91Crawl: func(context.Context, string) { rec.push("crawl") },
|
||||
RunMigration: func(context.Context) error { rec.push("migrate"); return nil },
|
||||
RunCrawlerCrawl: func(context.Context, string) { rec.push("crawl") },
|
||||
RunMigration: func(context.Context) error { rec.push("migrate"); return nil },
|
||||
})
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
@@ -303,7 +303,7 @@ func TestCtxCancelPreventsLaterPhases(t *testing.T) {
|
||||
r.runPipeline(ctx)
|
||||
|
||||
for _, c := range rec.snapshot() {
|
||||
if c == "crawl" || c == "migrate" || c == "list-spider" {
|
||||
if c == "crawl" || c == "migrate" || c == "list-crawler" {
|
||||
t.Fatalf("later phase should not run after ctx done; got %q", c)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -952,15 +952,7 @@ func redactURLs(text string) string {
|
||||
}
|
||||
|
||||
func ffmpegOutputLooksRateLimited(output []byte) bool {
|
||||
text := strings.ToLower(string(output))
|
||||
if !strings.Contains(text, "429") {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "throttl") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "rate-limit") ||
|
||||
strings.Contains(text, "server returned 429")
|
||||
return drives.TextMentionsHTTPStatus(string(output), http.StatusTooManyRequests)
|
||||
}
|
||||
|
||||
// --- 本地落盘 ---
|
||||
@@ -1064,12 +1056,10 @@ type ThumbWorker struct {
|
||||
}
|
||||
|
||||
const (
|
||||
defaultTransientMediaCooldown = 5 * time.Minute
|
||||
defaultGenerationRateLimitCooldown = 5 * time.Minute
|
||||
defaultThumbTransientMediaMaxFailures = 3
|
||||
defaultWorkerQueueSize = 10000
|
||||
maxPreviewTeaserSizeBytes int64 = 5 * 1024 * 1024 * 1024
|
||||
previewStatusSkipped = "skipped"
|
||||
defaultTransientMediaCooldown = 5 * time.Minute
|
||||
defaultGenerationRateLimitCooldown = 5 * time.Minute
|
||||
defaultThumbTransientMediaMaxFailures = 3
|
||||
defaultWorkerQueueSize = 10000
|
||||
)
|
||||
|
||||
type rateLimitState struct {
|
||||
@@ -1124,6 +1114,19 @@ func (q *videoQueue) release(v *catalog.Video) {
|
||||
q.mu.Unlock()
|
||||
}
|
||||
|
||||
func (q *videoQueue) idsSnapshot() []string {
|
||||
q.mu.Lock()
|
||||
defer q.mu.Unlock()
|
||||
if len(q.ids) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := make([]string, 0, len(q.ids))
|
||||
for id := range q.ids {
|
||||
out = append(out, id)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (q *videoQueue) lengthExcluding(currentID string) int {
|
||||
q.mu.Lock()
|
||||
defer q.mu.Unlock()
|
||||
@@ -1251,6 +1254,13 @@ func (w *Worker) Status() TaskStatus {
|
||||
return taskStatus(&w.activity, &w.rateLimit, w.queue.lengthExcluding(currentID))
|
||||
}
|
||||
|
||||
func (w *Worker) ActiveVideoIDs() []string {
|
||||
if w == nil {
|
||||
return nil
|
||||
}
|
||||
return w.queue.idsSnapshot()
|
||||
}
|
||||
|
||||
func (w *ThumbWorker) Status() TaskStatus {
|
||||
if w == nil {
|
||||
return TaskStatus{State: "idle"}
|
||||
@@ -1427,11 +1437,17 @@ func (w *Worker) skipIfRateLimited(v *catalog.Video) bool {
|
||||
}
|
||||
|
||||
func (w *Worker) pauseForRateLimit(err error, step, title string) bool {
|
||||
_, ok := drives.RateLimitRetryAfter(err)
|
||||
wait, ok := drives.RateLimitRetryAfter(err)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
until := w.rateLimit.pause(time.Now(), defaultGenerationRateLimitCooldown)
|
||||
if wait <= 0 {
|
||||
wait = w.RateLimitCooldown
|
||||
if wait <= 0 {
|
||||
wait = defaultGenerationRateLimitCooldown
|
||||
}
|
||||
}
|
||||
until := w.rateLimit.pause(time.Now(), wait)
|
||||
log.Printf("[preview] drive=%s rate-limited until=%s step=%s video=%s: %v", w.Drive.ID(), until.Format(time.RFC3339), step, title, err)
|
||||
return true
|
||||
}
|
||||
@@ -1460,11 +1476,17 @@ func (w *ThumbWorker) skipIfRateLimited(v *catalog.Video) bool {
|
||||
}
|
||||
|
||||
func (w *ThumbWorker) pauseForRateLimit(err error, step, title string) bool {
|
||||
_, ok := drives.RateLimitRetryAfter(err)
|
||||
wait, ok := drives.RateLimitRetryAfter(err)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
until := w.rateLimit.pause(time.Now(), defaultGenerationRateLimitCooldown)
|
||||
if wait <= 0 {
|
||||
wait = w.RateLimitCooldown
|
||||
if wait <= 0 {
|
||||
wait = defaultGenerationRateLimitCooldown
|
||||
}
|
||||
}
|
||||
until := w.rateLimit.pause(time.Now(), wait)
|
||||
log.Printf("[thumb] drive=%s rate-limited until=%s step=%s video=%s: %v", w.Drive.ID(), until.Format(time.RFC3339), step, title, err)
|
||||
return true
|
||||
}
|
||||
@@ -1506,60 +1528,17 @@ func driveErrorShouldCooldown(d drives.Drive, err error) bool {
|
||||
}
|
||||
switch d.Kind() {
|
||||
case "p115":
|
||||
text := strings.ToLower(err.Error())
|
||||
return strings.Contains(text, "server returned 403") ||
|
||||
strings.Contains(text, "403 forbidden") ||
|
||||
strings.Contains(text, "server returned 405") ||
|
||||
strings.Contains(text, "405 method") ||
|
||||
strings.Contains(text, "access denied") ||
|
||||
strings.Contains(text, "moov atom not found") ||
|
||||
strings.Contains(text, "partial file") ||
|
||||
strings.Contains(text, "request has been blocked") ||
|
||||
strings.Contains(text, "访问被阻断")
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusForbidden, http.StatusMethodNotAllowed, http.StatusTooManyRequests)
|
||||
case "pikpak":
|
||||
// PikPak 在预览视频 / 封面生成阶段(取链或拉直链字节)可能命中:
|
||||
// - error_code=10 操作频繁
|
||||
// - HTTP 429 / 5xx / 509 限流和服务端不可用
|
||||
// - 通用文本:rate limit / too many requests / blocked
|
||||
// 命中时让 worker 冷却 5 分钟,避免连续请求加重风控。
|
||||
text := strings.ToLower(err.Error())
|
||||
return strings.Contains(text, "error_code=10") ||
|
||||
strings.Contains(text, "操作频繁") ||
|
||||
strings.Contains(text, "429") ||
|
||||
strings.Contains(text, "http 500") ||
|
||||
strings.Contains(text, "http 502") ||
|
||||
strings.Contains(text, "http 503") ||
|
||||
strings.Contains(text, "http 504") ||
|
||||
strings.Contains(text, "http 509") ||
|
||||
strings.Contains(text, "too many request") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "blocked") ||
|
||||
strings.Contains(text, "partial file") ||
|
||||
strings.Contains(text, "service unavailable")
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusTooManyRequests, http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout, 509)
|
||||
case "p123":
|
||||
// 123 云盘直链解析 / ffmpeg 读取阶段可能返回 429、5xx,或 WAF 类
|
||||
// blocked / 访问阻断文本。命中时冷却,避免封面和预览视频生成连续打接口。
|
||||
text := strings.ToLower(err.Error())
|
||||
return strings.Contains(text, "请求太频繁") ||
|
||||
strings.Contains(text, "请求过于频繁") ||
|
||||
strings.Contains(text, "请求频繁") ||
|
||||
strings.Contains(text, "操作频繁") ||
|
||||
strings.Contains(text, "频率限制") ||
|
||||
strings.Contains(text, "请求次数过多") ||
|
||||
strings.Contains(text, "429") ||
|
||||
strings.Contains(text, "http 500") ||
|
||||
strings.Contains(text, "http 502") ||
|
||||
strings.Contains(text, "http 503") ||
|
||||
strings.Contains(text, "http 504") ||
|
||||
strings.Contains(text, "server returned 403") ||
|
||||
strings.Contains(text, "403 forbidden") ||
|
||||
strings.Contains(text, "too many request") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "blocked") ||
|
||||
strings.Contains(text, "访问被阻断") ||
|
||||
strings.Contains(text, "service unavailable")
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusForbidden, http.StatusTooManyRequests, http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout)
|
||||
case "wopan":
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusForbidden, http.StatusTooManyRequests, http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout, 509)
|
||||
case "guangyapan":
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusForbidden, http.StatusTooManyRequests, http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout, 509)
|
||||
case "googledrive":
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusForbidden, http.StatusTooManyRequests, http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout)
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -1610,11 +1589,6 @@ func (w *ThumbWorker) process(ctx context.Context, v *catalog.Video) bool {
|
||||
return false
|
||||
}
|
||||
_ = w.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{ThumbnailStatus: "pending"})
|
||||
if isSpider91OriginVideo(v) {
|
||||
log.Printf("[thumb] skip %s: spider91-origin video must use crawled thumbnail", v.Title)
|
||||
_ = w.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{ThumbnailStatus: "failed"})
|
||||
return false
|
||||
}
|
||||
link, err := w.streamLink(ctx, v)
|
||||
if err != nil {
|
||||
if w.pauseForRecoverableError(ctx, v, err, "streamURL") {
|
||||
@@ -1696,10 +1670,6 @@ func (w *ThumbWorker) generateThumbnailFromLink(ctx context.Context, v *catalog.
|
||||
return nil
|
||||
}
|
||||
|
||||
func isSpider91OriginVideo(v *catalog.Video) bool {
|
||||
return v != nil && strings.HasPrefix(v.ID, "spider91-")
|
||||
}
|
||||
|
||||
func localPreviewLink(v *catalog.Video) (*drives.StreamLink, bool) {
|
||||
if v.PreviewLocal == "" {
|
||||
return nil, false
|
||||
@@ -1713,15 +1683,6 @@ func localPreviewLink(v *catalog.Video) (*drives.StreamLink, bool) {
|
||||
}
|
||||
|
||||
func (w *Worker) process(ctx context.Context, v *catalog.Video) {
|
||||
if shouldSkipTeaser(v) {
|
||||
removePreviousLocalTeaser(v.PreviewLocal, "")
|
||||
if err := w.Catalog.UpdatePreview(ctx, v.ID, "", previewStatusSkipped); err != nil {
|
||||
log.Printf("[preview] skip %s: update status: %v", v.Title, err)
|
||||
return
|
||||
}
|
||||
log.Printf("[preview] skip %s: size=%d exceeds 5GiB teaser limit", v.Title, v.Size)
|
||||
return
|
||||
}
|
||||
if w.skipIfRateLimited(v) {
|
||||
return
|
||||
}
|
||||
@@ -1774,10 +1735,6 @@ func (w *Worker) process(ctx context.Context, v *catalog.Video) {
|
||||
log.Printf("[preview] ready %s (duration=%.1fs)", v.Title, duration)
|
||||
}
|
||||
|
||||
func shouldSkipTeaser(v *catalog.Video) bool {
|
||||
return v != nil && v.Size > maxPreviewTeaserSizeBytes
|
||||
}
|
||||
|
||||
func (w *Worker) generateTeaser(ctx context.Context, v *catalog.Video, link *drives.StreamLink, duration float64) (string, error) {
|
||||
gen, ok := w.Gen.(refreshingTeaserGenerator)
|
||||
if !ok || w.Drive == nil || w.Drive.Kind() != "p115" {
|
||||
|
||||
@@ -89,9 +89,9 @@ func TestThumbWorkerBackfillsDurationWhenThumbnailAlreadyExists(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestThumbWorkerDoesNotGenerateThumbnailForSpider91OriginVideo(t *testing.T) {
|
||||
func TestThumbWorkerGeneratesThumbnailForCrawlerLikeVideoID(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "spider91-91-spider-1200001")
|
||||
cat, video := seedPreviewTestVideo(t, "scriptcrawler-crawler-main-source001")
|
||||
|
||||
gen := &fakeThumbGenerator{probeDuration: 42}
|
||||
drv := &previewFakeDrive{kind: "pikpak"}
|
||||
@@ -103,18 +103,18 @@ func TestThumbWorkerDoesNotGenerateThumbnailForSpider91OriginVideo(t *testing.T)
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.ThumbnailURL != "" {
|
||||
t.Fatalf("thumbnail = %q, want empty when crawled spider91 thumbnail is missing", got.ThumbnailURL)
|
||||
if got.ThumbnailURL != "/p/thumb/"+video.ID {
|
||||
t.Fatalf("thumbnail = %q, want generated thumb URL", got.ThumbnailURL)
|
||||
}
|
||||
failed, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "failed", 0)
|
||||
ready, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "ready", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list failed thumbnails: %v", err)
|
||||
t.Fatalf("list ready thumbnails: %v", err)
|
||||
}
|
||||
if len(failed) != 1 || failed[0].ID != video.ID {
|
||||
t.Fatalf("failed thumbnails = %#v, want only %s", failed, video.ID)
|
||||
if len(ready) != 1 || ready[0].ID != video.ID {
|
||||
t.Fatalf("ready thumbnails = %#v, want only %s", ready, video.ID)
|
||||
}
|
||||
if gen.probeCalls != 0 || gen.generateCalls != 0 {
|
||||
t.Fatalf("generator calls probe=%d generate=%d, want no ffmpeg work for spider91-origin thumbnail", gen.probeCalls, gen.generateCalls)
|
||||
if gen.probeCalls != 1 || gen.generateCalls != 1 {
|
||||
t.Fatalf("generator calls probe=%d generate=%d, want one thumbnail generation", gen.probeCalls, gen.generateCalls)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -349,42 +349,10 @@ func TestPreviewWorkerNeverCallsDriveUploadOrEnsureDir(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestPreviewWorkerSkipsTeaserForVideoLargerThanFiveGiB(t *testing.T) {
|
||||
func TestPreviewWorkerGeneratesTeaserForLargeVideo(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "preview-large-video")
|
||||
video.Size = maxPreviewTeaserSizeBytes + 1
|
||||
if err := cat.UpsertVideo(ctx, video); err != nil {
|
||||
t.Fatalf("update video: %v", err)
|
||||
}
|
||||
|
||||
gen := &fakeTeaserGenerator{}
|
||||
drv := &previewFakeDrive{}
|
||||
worker := NewWorker(gen, cat, drv)
|
||||
|
||||
worker.process(ctx, video)
|
||||
|
||||
got, err := cat.GetVideo(ctx, video.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.PreviewStatus != previewStatusSkipped {
|
||||
t.Fatalf("preview status = %q, want skipped", got.PreviewStatus)
|
||||
}
|
||||
if got.PreviewLocal != "" {
|
||||
t.Fatalf("preview local = %q, want empty", got.PreviewLocal)
|
||||
}
|
||||
if drv.streamCalls != 0 {
|
||||
t.Fatalf("stream calls = %d, want 0", drv.streamCalls)
|
||||
}
|
||||
if gen.generateCalls != 0 {
|
||||
t.Fatalf("generate calls = %d, want 0", gen.generateCalls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPreviewWorkerGeneratesTeaserAtFiveGiBBoundary(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "preview-five-gib-video")
|
||||
video.Size = maxPreviewTeaserSizeBytes
|
||||
video.Size = 6 * 1024 * 1024 * 1024
|
||||
if err := cat.UpsertVideo(ctx, video); err != nil {
|
||||
t.Fatalf("update video: %v", err)
|
||||
}
|
||||
@@ -442,7 +410,7 @@ func TestPreviewWorkerRateLimitLeavesCurrentPendingAndSkipsNextVideo(t *testing.
|
||||
if gen.generateCalls != 1 {
|
||||
t.Fatalf("generate calls = %d, want 1", gen.generateCalls)
|
||||
}
|
||||
assertCooldownAround(t, worker.Status().CooldownUntil, before, 5*time.Minute)
|
||||
assertCooldownAround(t, worker.Status().CooldownUntil, before, 2*time.Hour)
|
||||
|
||||
gen.generateErr = nil
|
||||
worker.process(ctx, &second)
|
||||
@@ -458,7 +426,7 @@ func TestPreviewWorkerRateLimitLeavesCurrentPendingAndSkipsNextVideo(t *testing.
|
||||
}
|
||||
}
|
||||
|
||||
func TestThumbWorkerRateLimitCoolsDownFiveMinutes(t *testing.T) {
|
||||
func TestThumbWorkerRateLimitHonorsRetryAfter(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "thumb-rate-limit")
|
||||
|
||||
@@ -482,12 +450,12 @@ func TestThumbWorkerRateLimitCoolsDownFiveMinutes(t *testing.T) {
|
||||
if got.ThumbnailURL != "" {
|
||||
t.Fatalf("thumbnail = %q, want unchanged after rate limit", got.ThumbnailURL)
|
||||
}
|
||||
assertCooldownAround(t, worker.Status().CooldownUntil, before, 5*time.Minute)
|
||||
assertCooldownAround(t, worker.Status().CooldownUntil, before, 2*time.Hour)
|
||||
}
|
||||
|
||||
func TestThumbWorkerP115TransientErrorFailsAfterRetryLimit(t *testing.T) {
|
||||
func TestThumbWorkerP115MessageOnlyErrorFailsWithoutCooldown(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "thumb-p115-transient")
|
||||
cat, video := seedPreviewTestVideo(t, "thumb-p115-message-only")
|
||||
|
||||
gen := &fakeThumbGenerator{
|
||||
generateErr: errors.New("ffmpeg thumb: exit status 183, stderr: partial file Cannot determine format of input 0:0 after EOF"),
|
||||
@@ -495,69 +463,26 @@ func TestThumbWorkerP115TransientErrorFailsAfterRetryLimit(t *testing.T) {
|
||||
drv := &previewFakeDrive{kind: "p115"}
|
||||
worker := NewThumbWorker(gen, cat, drv)
|
||||
|
||||
for attempt := 1; attempt <= defaultThumbTransientMediaMaxFailures; attempt++ {
|
||||
worker.rateLimit = rateLimitState{}
|
||||
worker.process(ctx, video)
|
||||
|
||||
if attempt < defaultThumbTransientMediaMaxFailures {
|
||||
pending, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "pending", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list pending thumbnails: %v", err)
|
||||
}
|
||||
if len(pending) != 1 || pending[0].ID != video.ID {
|
||||
t.Fatalf("attempt %d pending thumbnails = %#v, want only %s", attempt, pending, video.ID)
|
||||
}
|
||||
missing, err := cat.CountVideosNeedingThumbnail(ctx, video.DriveID)
|
||||
if err != nil {
|
||||
t.Fatalf("count missing thumbnails: %v", err)
|
||||
}
|
||||
if missing != 1 {
|
||||
t.Fatalf("attempt %d missing thumbnails = %d, want 1 before retry limit", attempt, missing)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
failed, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "failed", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list failed thumbnails: %v", err)
|
||||
}
|
||||
if len(failed) != 1 || failed[0].ID != video.ID {
|
||||
t.Fatalf("failed thumbnails = %#v, want only %s", failed, video.ID)
|
||||
}
|
||||
missing, err := cat.CountVideosNeedingThumbnail(ctx, video.DriveID)
|
||||
if err != nil {
|
||||
t.Fatalf("count missing thumbnails: %v", err)
|
||||
}
|
||||
if missing != 0 {
|
||||
t.Fatalf("missing thumbnails = %d, want 0 after retry limit marks failed", missing)
|
||||
}
|
||||
}
|
||||
|
||||
if gen.generateCalls != defaultThumbTransientMediaMaxFailures {
|
||||
t.Fatalf("generate calls = %d, want %d", gen.generateCalls, defaultThumbTransientMediaMaxFailures)
|
||||
}
|
||||
|
||||
if err := cat.UpdateVideoMeta(ctx, video.ID, catalog.VideoMetaPatch{
|
||||
ThumbnailStatus: "pending",
|
||||
ResetThumbnailFailures: true,
|
||||
}); err != nil {
|
||||
t.Fatalf("reset thumbnail status: %v", err)
|
||||
}
|
||||
worker.rateLimit = rateLimitState{}
|
||||
worker.process(ctx, video)
|
||||
|
||||
pending, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "pending", 0)
|
||||
failed, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "failed", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list pending thumbnails after reset: %v", err)
|
||||
t.Fatalf("list failed thumbnails: %v", err)
|
||||
}
|
||||
if len(pending) != 1 || pending[0].ID != video.ID {
|
||||
t.Fatalf("pending thumbnails after reset = %#v, want only %s", pending, video.ID)
|
||||
if len(failed) != 1 || failed[0].ID != video.ID {
|
||||
t.Fatalf("failed thumbnails = %#v, want only %s", failed, video.ID)
|
||||
}
|
||||
if !worker.Status().CooldownUntil.IsZero() {
|
||||
t.Fatalf("cooldown until = %s, want no cooldown for message-only media error", worker.Status().CooldownUntil)
|
||||
}
|
||||
if gen.generateCalls != 1 {
|
||||
t.Fatalf("generate calls = %d, want 1", gen.generateCalls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestThumbWorkerRequeuesP115TransientErrorBeforeRetryLimit(t *testing.T) {
|
||||
func TestThumbWorkerDoesNotRequeueP115MessageOnlyError(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "thumb-p115-requeue")
|
||||
cat, video := seedPreviewTestVideo(t, "thumb-p115-no-requeue")
|
||||
|
||||
gen := &fakeThumbGenerator{
|
||||
generateErr: errors.New("ffmpeg thumb: partial file Cannot determine format of input 0:0 after EOF"),
|
||||
@@ -569,11 +494,8 @@ func TestThumbWorkerRequeuesP115TransientErrorBeforeRetryLimit(t *testing.T) {
|
||||
|
||||
select {
|
||||
case queued := <-worker.ch:
|
||||
if queued.ID != video.ID {
|
||||
t.Fatalf("requeued video id = %q, want %q", queued.ID, video.ID)
|
||||
}
|
||||
t.Fatalf("unexpected requeued video id = %q", queued.ID)
|
||||
default:
|
||||
t.Fatal("expected transient thumbnail failure to requeue the same video")
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(ctx, video.ID)
|
||||
@@ -581,14 +503,14 @@ func TestThumbWorkerRequeuesP115TransientErrorBeforeRetryLimit(t *testing.T) {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.ThumbnailURL != "" {
|
||||
t.Fatalf("thumbnail = %q, want empty after transient failure", got.ThumbnailURL)
|
||||
t.Fatalf("thumbnail = %q, want empty after message-only failure", got.ThumbnailURL)
|
||||
}
|
||||
pending, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "pending", 0)
|
||||
failed, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "failed", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list pending thumbnails: %v", err)
|
||||
t.Fatalf("list failed thumbnails: %v", err)
|
||||
}
|
||||
if len(pending) != 1 || pending[0].ID != video.ID {
|
||||
t.Fatalf("pending thumbnails = %#v, want only %s", pending, video.ID)
|
||||
if len(failed) != 1 || failed[0].ID != video.ID {
|
||||
t.Fatalf("failed thumbnails = %#v, want only %s", failed, video.ID)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -649,18 +571,82 @@ func TestP123TransientErrorsShouldCooldown(t *testing.T) {
|
||||
drv := &previewFakeDrive{kind: "p123"}
|
||||
for _, err := range []error{
|
||||
errors.New("Server returned 403 Forbidden"),
|
||||
errors.New("请求太频繁"),
|
||||
errors.New("http 503 service unavailable"),
|
||||
} {
|
||||
if !driveErrorShouldCooldown(drv, err) {
|
||||
t.Fatalf("driveErrorShouldCooldown(%v) = false, want true", err)
|
||||
}
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("请求太频繁")) {
|
||||
t.Fatal("message-only throttling text should not trigger p123 cooldown")
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("invalid credential")) {
|
||||
t.Fatal("invalid credential should not trigger p123 cooldown")
|
||||
}
|
||||
}
|
||||
|
||||
func TestWopanTransientErrorsShouldCooldown(t *testing.T) {
|
||||
drv := &previewFakeDrive{kind: "wopan"}
|
||||
for _, err := range []error{
|
||||
errors.New("ffmpeg: Server returned 403 Forbidden"),
|
||||
errors.New("wopan download url: request failed with status: 429 Too Many Requests"),
|
||||
errors.New("http 503 service unavailable"),
|
||||
} {
|
||||
if !driveErrorShouldCooldown(drv, err) {
|
||||
t.Fatalf("driveErrorShouldCooldown(%v) = false, want true", err)
|
||||
}
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("操作频繁,请稍后重试")) {
|
||||
t.Fatal("message-only throttling text should not trigger wopan cooldown")
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("invalid access token")) {
|
||||
t.Fatal("invalid access token should not trigger wopan cooldown")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGuangYaPanTransientErrorsShouldCooldown(t *testing.T) {
|
||||
drv := &previewFakeDrive{kind: "guangyapan"}
|
||||
for _, err := range []error{
|
||||
errors.New("ffmpeg: Server returned 403 Forbidden"),
|
||||
errors.New("guangyapan api rate limited: status=429 msg=操作频繁,请稍后重试"),
|
||||
errors.New("http 503 service unavailable"),
|
||||
} {
|
||||
if !driveErrorShouldCooldown(drv, err) {
|
||||
t.Fatalf("driveErrorShouldCooldown(%v) = false, want true", err)
|
||||
}
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("操作频繁,请稍后重试")) {
|
||||
t.Fatal("message-only throttling text should not trigger guangyapan cooldown")
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("invalid access token")) {
|
||||
t.Fatal("invalid access token should not trigger guangyapan cooldown")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGoogleDriveMediaErrorsShouldCooldown(t *testing.T) {
|
||||
drv := &previewFakeDrive{kind: "googledrive"}
|
||||
for _, err := range []error{
|
||||
errors.New("ffmpeg: Server returned 403 Forbidden"),
|
||||
errors.New("http 503 service unavailable"),
|
||||
} {
|
||||
if !driveErrorShouldCooldown(drv, err) {
|
||||
t.Fatalf("driveErrorShouldCooldown(%v) = false, want true", err)
|
||||
}
|
||||
}
|
||||
for _, err := range []error{
|
||||
errors.New("google drive api error: usageLimits userRateLimitExceeded"),
|
||||
errors.New("downloadQuotaExceeded: The download quota for this file has been exceeded"),
|
||||
errors.New("sharingRateLimitExceeded"),
|
||||
} {
|
||||
if driveErrorShouldCooldown(drv, err) {
|
||||
t.Fatalf("message-only google drive error %v should not trigger cooldown", err)
|
||||
}
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("invalid credentials")) {
|
||||
t.Fatal("invalid credentials should not trigger googledrive cooldown")
|
||||
}
|
||||
}
|
||||
|
||||
func assertCooldownAround(t *testing.T, until time.Time, before time.Time, want time.Duration) {
|
||||
t.Helper()
|
||||
if until.IsZero() {
|
||||
|
||||
@@ -147,15 +147,19 @@ func (p *Proxy) ServeStream(w http.ResponseWriter, r *http.Request, driveID, fil
|
||||
// CDN 不校验请求头,直连可获得最佳带宽并避免占用 backend 出站
|
||||
// - onedrive:Microsoft Graph 返回的 @microsoft.graph.downloadUrl 是短期
|
||||
// 免鉴权下载 URL,不需要后端继续代传视频字节
|
||||
// - p123:123 云盘 download_info 返回的下载页会再跳 CDN;driver 已在后端
|
||||
// - p123:123网盘 download_info 返回的下载页会再跳 CDN;driver 已在后端
|
||||
// 先解出最终 Location,浏览器可直接 302 到该短期地址
|
||||
// - wopan:联通网盘 GetDownloadUrlV2 返回的是短期直链,OpenList 也是直接
|
||||
// 将该 URL 交给客户端使用;不需要后端持续代传视频字节
|
||||
// - guangyapan:光鸭 get_res_download_url 返回 signedURL / downloadUrl,
|
||||
// 浏览器可直接访问,不需要后端持续代传视频字节
|
||||
//
|
||||
// 其余网盘(如沃盘 / 夸克等)仍走反代,因为它们的下载
|
||||
// 其余网盘(如夸克等)仍走反代,因为它们的下载
|
||||
// 链接通常需要随请求带上后端持有的 Cookie / Authorization / Range
|
||||
// 的特殊处理,浏览器拿不到这些上下文。
|
||||
func shouldRedirect(d drives.Drive) bool {
|
||||
switch d.Kind() {
|
||||
case "p115", "pikpak", "onedrive", "p123":
|
||||
case "p115", "pikpak", "onedrive", "p123", "wopan", "guangyapan":
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
||||
@@ -201,6 +201,56 @@ func TestServeStreamRedirectsP123(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeStreamRedirectsWopan(t *testing.T) {
|
||||
reg := NewRegistry()
|
||||
drv := &proxyFakeSimpleDrive{
|
||||
kind: "wopan",
|
||||
url: "https://du.smartont.net:8445/openapi/download?fid=encoded",
|
||||
}
|
||||
reg.Set("wopan", drv)
|
||||
|
||||
p := New(reg)
|
||||
req := httptest.NewRequest(http.MethodGet, "/p/stream/wopan/file-1", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
|
||||
p.ServeStream(rr, req, "wopan", "file-1")
|
||||
|
||||
if rr.Code != http.StatusFound {
|
||||
t.Fatalf("status = %d, want %d", rr.Code, http.StatusFound)
|
||||
}
|
||||
if got := rr.Header().Get("Location"); got != "https://du.smartont.net:8445/openapi/download?fid=encoded" {
|
||||
t.Fatalf("Location = %q", got)
|
||||
}
|
||||
if drv.calls != 1 {
|
||||
t.Fatalf("link calls = %d, want 1", drv.calls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeStreamRedirectsGuangYaPan(t *testing.T) {
|
||||
reg := NewRegistry()
|
||||
drv := &proxyFakeSimpleDrive{
|
||||
kind: "guangyapan",
|
||||
url: "https://cdn.guangyapan.example/video.mp4?sign=encoded",
|
||||
}
|
||||
reg.Set("guangyapan", drv)
|
||||
|
||||
p := New(reg)
|
||||
req := httptest.NewRequest(http.MethodGet, "/p/stream/guangyapan/file-1", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
|
||||
p.ServeStream(rr, req, "guangyapan", "file-1")
|
||||
|
||||
if rr.Code != http.StatusFound {
|
||||
t.Fatalf("status = %d, want %d", rr.Code, http.StatusFound)
|
||||
}
|
||||
if got := rr.Header().Get("Location"); got != "https://cdn.guangyapan.example/video.mp4?sign=encoded" {
|
||||
t.Fatalf("Location = %q", got)
|
||||
}
|
||||
if drv.calls != 1 {
|
||||
t.Fatalf("link calls = %d, want 1", drv.calls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeStreamServesLocalFilePath(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "video.mp4")
|
||||
if err := os.WriteFile(path, []byte("0123456789"), 0o644); err != nil {
|
||||
|
||||
@@ -2,6 +2,7 @@ package scanner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"log"
|
||||
"path"
|
||||
@@ -25,6 +26,8 @@ type Scanner struct {
|
||||
SkipDirIDs map[string]struct{}
|
||||
// 回调:新视频被加入后触发预览视频生成
|
||||
OnNewVideo func(v *catalog.Video)
|
||||
// OnProgress 在扫描进度变化时触发。回调只应读取 Stats 里的计数,不应修改 map 字段。
|
||||
OnProgress func(stats Stats)
|
||||
// ProgressInterval 控制扫描内部 heartbeat 的最小输出间隔。
|
||||
// 0 → 默认 30s;< 0 → 关闭 heartbeat(仅留外层 start / done 两行)。
|
||||
// heartbeat 单行格式:
|
||||
@@ -91,6 +94,9 @@ func (s *Scanner) Run(ctx context.Context, startDirID string) (Stats, error) {
|
||||
driveID = s.Drive.ID()
|
||||
}
|
||||
progress := func(currentDir string) {
|
||||
if s.OnProgress != nil {
|
||||
s.OnProgress(stats)
|
||||
}
|
||||
if interval < 0 {
|
||||
return
|
||||
}
|
||||
@@ -127,6 +133,9 @@ func (s *Scanner) walk(ctx context.Context, dirID, dirName string, stats *Stats,
|
||||
}
|
||||
|
||||
for _, e := range entries {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if e.IsDir {
|
||||
// 跳过 previews 目录,避免扫到自己生成的预览视频
|
||||
if strings.EqualFold(e.Name, "previews") {
|
||||
@@ -137,13 +146,15 @@ func (s *Scanner) walk(ctx context.Context, dirID, dirName string, stats *Stats,
|
||||
continue
|
||||
}
|
||||
if err := s.walk(ctx, e.ID, e.Name, stats, progress); err != nil {
|
||||
if ctxErr := ctx.Err(); ctxErr != nil {
|
||||
return ctxErr
|
||||
}
|
||||
stats.Errors++
|
||||
log.Printf("[scanner] walk %s error: %v", e.Name, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
stats.Scanned++
|
||||
ext := strings.ToLower(path.Ext(e.Name))
|
||||
if !s.Exts[ext] {
|
||||
continue
|
||||
@@ -151,10 +162,15 @@ func (s *Scanner) walk(ctx context.Context, dirID, dirName string, stats *Stats,
|
||||
if e.Size <= 0 {
|
||||
continue
|
||||
}
|
||||
stats.Scanned++
|
||||
progress(dirName)
|
||||
stats.SeenFileIDs[e.ID] = struct{}{}
|
||||
|
||||
id := s.Drive.Kind() + "-" + s.Drive.ID() + "-" + e.ID
|
||||
id := s.Drive.Kind() + "-" + s.Drive.ID() + "-" + videoIDFilePart(e.ID)
|
||||
if deleted, err := s.Catalog.IsDeletedVideoCandidate(ctx, id, s.Drive.ID(), e.ID, e.Hash, e.Name, e.Size); err != nil {
|
||||
if ctxErr := ctx.Err(); ctxErr != nil {
|
||||
return ctxErr
|
||||
}
|
||||
stats.Errors++
|
||||
log.Printf("[scanner] check deleted video %s error: %v", id, err)
|
||||
continue
|
||||
@@ -170,33 +186,45 @@ func (s *Scanner) walk(ctx context.Context, dirID, dirName string, stats *Stats,
|
||||
if matched, err := s.Catalog.MatchTags(ctx, e.Name+" "+dirName+" "+parsed.Author); err == nil {
|
||||
tags = mergeTags(tags, matched)
|
||||
}
|
||||
if label, ok, err := s.Catalog.EnsureCollectionTag(ctx, dirName); err == nil && ok {
|
||||
tags = mergeTags(tags, []string{label})
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
existing, _ := s.Catalog.GetVideo(ctx, id)
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if existing != nil {
|
||||
patch := catalog.VideoMetaPatch{}
|
||||
if e.Hash != "" && existing.ContentHash == "" {
|
||||
patch.ContentHash = e.Hash
|
||||
existing.ContentHash = e.Hash
|
||||
}
|
||||
if e.Name != "" && existing.FileName == "" {
|
||||
if e.Name != "" && existing.FileName != e.Name {
|
||||
patch.FileName = e.Name
|
||||
existing.FileName = e.Name
|
||||
patch.Title = parsed.Title
|
||||
patch.TitleSet = true
|
||||
patch.Author = parsed.Author
|
||||
patch.AuthorSet = true
|
||||
}
|
||||
// 已存在但轻量元数据空缺时,顺便补齐。
|
||||
if existing.Category == "" && dirName != "" {
|
||||
patch.Category = dirName
|
||||
}
|
||||
if patch.Category != "" || patch.ContentHash != "" || patch.FileName != "" {
|
||||
if patch.ContentHash != "" || patch.FileName != "" || patch.TitleSet || patch.AuthorSet {
|
||||
_ = s.Catalog.UpdateVideoMeta(ctx, id, patch)
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if dup := s.findDuplicate(ctx, e.Hash, e.Name, e.Size, id); dup != nil {
|
||||
continue
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if !sameTags(existing.Tags, tags) {
|
||||
_ = s.Catalog.SetAutoVideoTags(ctx, id, tags)
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
@@ -204,6 +232,9 @@ func (s *Scanner) walk(ctx context.Context, dirID, dirName string, stats *Stats,
|
||||
if dup := s.findDuplicate(ctx, e.Hash, e.Name, e.Size, id); dup != nil {
|
||||
continue
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
v := &catalog.Video{
|
||||
@@ -220,16 +251,22 @@ func (s *Scanner) walk(ctx context.Context, dirID, dirName string, stats *Stats,
|
||||
Quality: "HD",
|
||||
Size: e.Size,
|
||||
PreviewStatus: "pending",
|
||||
Category: dirName,
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}
|
||||
if err := s.Catalog.UpsertVideo(ctx, v); err != nil {
|
||||
if ctxErr := ctx.Err(); ctxErr != nil {
|
||||
return ctxErr
|
||||
}
|
||||
log.Printf("[scanner] upsert %s error: %v", v.Title, err)
|
||||
continue
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.Added++
|
||||
progress(dirName)
|
||||
if s.OnNewVideo != nil {
|
||||
s.OnNewVideo(v)
|
||||
}
|
||||
@@ -296,3 +333,10 @@ func mergeTags(lists ...[]string) []string {
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func videoIDFilePart(fileID string) string {
|
||||
if !strings.ContainsAny(fileID, `/\`+"\x00") {
|
||||
return fileID
|
||||
}
|
||||
return "b64_" + base64.RawURLEncoding.EncodeToString([]byte(fileID))
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package scanner
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
@@ -90,6 +91,128 @@ func TestRunIgnoresZeroSizeVideoFiles(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunScannedCountsOnlyVideoCandidates(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
drv := &scannerFakeDrive{
|
||||
entries: []drives.Entry{
|
||||
{ID: "file-1", Name: "clip.mp4", Size: 123},
|
||||
{ID: "file-2", Name: "notes.txt", Size: 123},
|
||||
{ID: "file-3", Name: "empty.mp4", Size: 0},
|
||||
},
|
||||
}
|
||||
sc := New(cat, drv, []string{".mp4"}, nil, nil)
|
||||
|
||||
stats, err := sc.Run(ctx, "")
|
||||
if err != nil {
|
||||
t.Fatalf("scan: %v", err)
|
||||
}
|
||||
if stats.Scanned != 1 {
|
||||
t.Fatalf("scanned = %d, want one non-empty video candidate", stats.Scanned)
|
||||
}
|
||||
if stats.Added != 1 {
|
||||
t.Fatalf("added = %d, want one added video", stats.Added)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunUsesPathSafeVideoIDForUnsafeFileID(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
drv := &scannerFakeDrive{
|
||||
entries: []drives.Entry{{
|
||||
ID: "fid/with space",
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
}},
|
||||
}
|
||||
sc := New(cat, drv, []string{".mp4"}, nil, nil)
|
||||
|
||||
stats, err := sc.Run(ctx, "")
|
||||
if err != nil {
|
||||
t.Fatalf("scan: %v", err)
|
||||
}
|
||||
if stats.Added != 1 {
|
||||
t.Fatalf("added = %d, want 1", stats.Added)
|
||||
}
|
||||
if _, ok := stats.SeenFileIDs["fid/with space"]; !ok {
|
||||
t.Fatalf("seen file ids = %#v, want original file id", stats.SeenFileIDs)
|
||||
}
|
||||
|
||||
wantID := "fake-drive-b64_ZmlkL3dpdGggc3BhY2U"
|
||||
got, err := cat.GetVideo(ctx, wantID)
|
||||
if err != nil {
|
||||
t.Fatalf("get video %s: %v", wantID, err)
|
||||
}
|
||||
if strings.Contains(got.ID, "/") {
|
||||
t.Fatalf("video id = %q, must not contain slash", got.ID)
|
||||
}
|
||||
if got.FileID != "fid/with space" {
|
||||
t.Fatalf("file id = %q, want original", got.FileID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunStopsWhenContextCanceledDuringFileLoop(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
drv := &scannerFakeDrive{
|
||||
entries: []drives.Entry{
|
||||
{ID: "file-1", Name: "one.mp4", Size: 123},
|
||||
{ID: "file-2", Name: "two.mp4", Size: 123},
|
||||
{ID: "file-3", Name: "three.mp4", Size: 123},
|
||||
},
|
||||
}
|
||||
callbacks := 0
|
||||
sc := New(cat, drv, []string{".mp4"}, nil, func(*catalog.Video) {
|
||||
callbacks++
|
||||
cancel()
|
||||
})
|
||||
|
||||
stats, err := sc.Run(ctx, "")
|
||||
|
||||
if !errors.Is(err, context.Canceled) {
|
||||
t.Fatalf("scan error = %v, want context.Canceled", err)
|
||||
}
|
||||
if stats.Added != 1 || callbacks != 1 {
|
||||
t.Fatalf("added=%d callbacks=%d, want exactly one video before cancellation", stats.Added, callbacks)
|
||||
}
|
||||
if _, err := cat.GetVideo(context.Background(), "fake-drive-file-1"); err != nil {
|
||||
t.Fatalf("first video should be persisted before cancellation: %v", err)
|
||||
}
|
||||
if _, err := cat.GetVideo(context.Background(), "fake-drive-file-2"); err != sql.ErrNoRows {
|
||||
t.Fatalf("second video lookup error = %v, want sql.ErrNoRows", err)
|
||||
}
|
||||
if _, err := cat.GetVideo(context.Background(), "fake-drive-file-3"); err != sql.ErrNoRows {
|
||||
t.Fatalf("third video lookup error = %v, want sql.ErrNoRows", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunSkipsAdminDeletedVideo(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
@@ -200,6 +323,67 @@ func TestRunDoesNotBackfillRemoteThumbnailForExistingVideo(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunSyncsRenamedExistingVideoMetadata(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: "fake-drive-file-1",
|
||||
DriveID: "drive",
|
||||
FileID: "file-1",
|
||||
FileName: "old-name - Old Author.mp4",
|
||||
Title: "old-name",
|
||||
Author: "Old Author",
|
||||
PreviewStatus: "pending",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed video: %v", err)
|
||||
}
|
||||
|
||||
drv := &scannerFakeDrive{
|
||||
entries: []drives.Entry{{
|
||||
ID: "file-1",
|
||||
Name: "[4K] renamed clip.mp4",
|
||||
Size: 123,
|
||||
ModTime: now,
|
||||
}},
|
||||
}
|
||||
sc := New(cat, drv, []string{".mp4"}, nil, nil)
|
||||
|
||||
stats, err := sc.Run(ctx, "")
|
||||
if err != nil {
|
||||
t.Fatalf("scan: %v", err)
|
||||
}
|
||||
if stats.Added != 0 {
|
||||
t.Fatalf("added = %d, want existing video to be updated in place", stats.Added)
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(ctx, "fake-drive-file-1")
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.FileName != "[4K] renamed clip.mp4" {
|
||||
t.Fatalf("file_name = %q, want remote name", got.FileName)
|
||||
}
|
||||
if got.Title != "renamed clip" {
|
||||
t.Fatalf("title = %q, want parsed title from remote name", got.Title)
|
||||
}
|
||||
if got.Author != "" {
|
||||
t.Fatalf("author = %q, want cleared author from remote name without author suffix", got.Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunReplacesExistingVideoTagsWithFixedFilenameTags(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
@@ -251,7 +435,7 @@ func TestRunReplacesExistingVideoTagsWithFixedFilenameTags(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunAddsShortCollectionDirectoryAsTag(t *testing.T) {
|
||||
func TestRunDoesNotCreateTagFromDirectoryName(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -269,7 +453,6 @@ func TestRunAddsShortCollectionDirectoryAsTag(t *testing.T) {
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: "Existing",
|
||||
Category: "sunny",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -300,84 +483,6 @@ func TestRunAddsShortCollectionDirectoryAsTag(t *testing.T) {
|
||||
t.Fatalf("scan: %v", err)
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(ctx, "fake-drive-file-1")
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if !sameStrings(got.Tags, []string{"sunny"}) {
|
||||
t.Fatalf("tags = %#v, want sunny", got.Tags)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunDoesNotRecreateDeletedCollectionDirectoryTag(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
now := time.Now()
|
||||
for _, id := range []string{"existing-1", "existing-2"} {
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: id,
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: "Existing",
|
||||
Category: "sunny",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed existing sunny video: %v", err)
|
||||
}
|
||||
}
|
||||
if label, ok, err := cat.EnsureCollectionTag(ctx, "sunny"); err != nil || !ok || label != "sunny" {
|
||||
t.Fatalf("ensure collection = %q, %v, %v; want sunny true nil", label, ok, err)
|
||||
}
|
||||
tags, err := cat.ListTags(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("list tags: %v", err)
|
||||
}
|
||||
var tagID int64
|
||||
for _, tag := range tags {
|
||||
if tag.Label == "sunny" {
|
||||
tagID = tag.ID
|
||||
break
|
||||
}
|
||||
}
|
||||
if tagID == 0 {
|
||||
t.Fatal("sunny tag not found before delete")
|
||||
}
|
||||
if _, err := cat.DeleteTag(ctx, tagID); err != nil {
|
||||
t.Fatalf("delete tag: %v", err)
|
||||
}
|
||||
|
||||
drv := &scannerTreeFakeDrive{
|
||||
entries: map[string][]drives.Entry{
|
||||
"root": {{
|
||||
ID: "dir-1",
|
||||
Name: "sunny",
|
||||
IsDir: true,
|
||||
}},
|
||||
"dir-1": {{
|
||||
ID: "file-1",
|
||||
ParentID: "dir-1",
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
ModTime: now,
|
||||
}},
|
||||
},
|
||||
}
|
||||
sc := New(cat, drv, []string{".mp4"}, nil, nil)
|
||||
|
||||
if _, err := sc.Run(ctx, ""); err != nil {
|
||||
t.Fatalf("scan: %v", err)
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(ctx, "fake-drive-file-1")
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
@@ -385,15 +490,6 @@ func TestRunDoesNotRecreateDeletedCollectionDirectoryTag(t *testing.T) {
|
||||
if len(got.Tags) != 0 {
|
||||
t.Fatalf("tags = %#v, want none", got.Tags)
|
||||
}
|
||||
tags, err = cat.ListTags(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("list tags after scan: %v", err)
|
||||
}
|
||||
for _, tag := range tags {
|
||||
if tag.Label == "sunny" {
|
||||
t.Fatal("deleted collection tag was recreated during scan")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunMapsAVCodeDirectoryToAVTag(t *testing.T) {
|
||||
@@ -414,7 +510,6 @@ func TestRunMapsAVCodeDirectoryToAVTag(t *testing.T) {
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: "Existing",
|
||||
Category: "cc-1750027",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,178 @@
|
||||
// Package transcode 实现"浏览器兼容性转码":把网盘/本地存储中浏览器
|
||||
// <video> 播不动的视频(AVI/WMV/FLV、MPEG-4 Part 2、RMVB 等)转成
|
||||
// H.264 + AAC 的 MP4,并把产物上传回同一存储,播放源切到产物文件。
|
||||
//
|
||||
// 与封面/预览生成不同,转码不会自动运行——只能由管理员在网盘管理页
|
||||
// 手动开启,也可以随时手动停止。
|
||||
package transcode
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// MediaInfo 是 ffprobe 探测出来的、做兼容性判定所需的最小信息。
|
||||
type MediaInfo struct {
|
||||
// FormatName 是 ffprobe 的 format_name,逗号分隔的 demuxer 别名,
|
||||
// 例如 "mov,mp4,m4a,3gp,3g2,mj2" / "avi" / "matroska,webm"。
|
||||
FormatName string
|
||||
VideoCodecs []string
|
||||
AudioCodecs []string
|
||||
}
|
||||
|
||||
// browserCompatibleVideoCodecs 是主流浏览器 <video> 普遍可解码的视频编码。
|
||||
// HEVC/H.265 只有部分平台支持,保守起见不算兼容。
|
||||
var browserCompatibleVideoCodecs = map[string]bool{
|
||||
"h264": true,
|
||||
"vp8": true,
|
||||
"vp9": true,
|
||||
"av1": true,
|
||||
}
|
||||
|
||||
// browserCompatibleAudioCodecs 是主流浏览器普遍可解码的音频编码。
|
||||
var browserCompatibleAudioCodecs = map[string]bool{
|
||||
"aac": true,
|
||||
"mp3": true,
|
||||
"opus": true,
|
||||
"vorbis": true,
|
||||
"flac": true,
|
||||
}
|
||||
|
||||
// NeedsTranscode 判断这个文件是否需要转码才能在浏览器里播放。
|
||||
// ext 是 catalog 里记录的扩展名(小写、不带点),用来区分 mkv 和 webm
|
||||
// (两者的 format_name 都是 "matroska,webm")。
|
||||
func NeedsTranscode(info MediaInfo, ext string) bool {
|
||||
if !containerCompatible(info.FormatName, ext) {
|
||||
return true
|
||||
}
|
||||
for _, codec := range info.VideoCodecs {
|
||||
if !browserCompatibleVideoCodecs[strings.ToLower(codec)] {
|
||||
return true
|
||||
}
|
||||
}
|
||||
for _, codec := range info.AudioCodecs {
|
||||
if !browserCompatibleAudioCodecs[strings.ToLower(codec)] {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func containerCompatible(formatName, ext string) bool {
|
||||
format := strings.ToLower(formatName)
|
||||
for _, name := range strings.Split(format, ",") {
|
||||
if name == "mp4" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
// matroska,webm:只有真 .webm 信任为浏览器可播容器;.mkv 保守转码。
|
||||
if strings.Contains(format, "webm") && strings.EqualFold(ext, "webm") {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ProbeFile 用 ffprobe 探测本地文件的容器与音视频编码。
|
||||
func ProbeFile(ctx context.Context, ffprobePath, path string) (MediaInfo, error) {
|
||||
ctx2, cancel := context.WithTimeout(ctx, 60*time.Second)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx2, ffprobePath,
|
||||
"-v", "error",
|
||||
"-show_entries", "format=format_name",
|
||||
"-show_entries", "stream=codec_type,codec_name",
|
||||
"-of", "json",
|
||||
path,
|
||||
)
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
return MediaInfo{}, fmt.Errorf("transcode: ffprobe: %w", err)
|
||||
}
|
||||
var parsed struct {
|
||||
Format struct {
|
||||
FormatName string `json:"format_name"`
|
||||
} `json:"format"`
|
||||
Streams []struct {
|
||||
CodecType string `json:"codec_type"`
|
||||
CodecName string `json:"codec_name"`
|
||||
} `json:"streams"`
|
||||
}
|
||||
if err := json.Unmarshal(out, &parsed); err != nil {
|
||||
return MediaInfo{}, fmt.Errorf("transcode: parse ffprobe output: %w", err)
|
||||
}
|
||||
info := MediaInfo{FormatName: parsed.Format.FormatName}
|
||||
for _, s := range parsed.Streams {
|
||||
switch s.CodecType {
|
||||
case "video":
|
||||
info.VideoCodecs = append(info.VideoCodecs, s.CodecName)
|
||||
case "audio":
|
||||
info.AudioCodecs = append(info.AudioCodecs, s.CodecName)
|
||||
}
|
||||
}
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// buildFFmpegArgs 按探测结果生成转码参数:
|
||||
// - 编码本就兼容、只是容器不行(如 AVI 里装 H.264)→ 流拷贝 remux,零质量损失;
|
||||
// - 否则视频转 H.264(裁到偶数尺寸 + yuv420p 保证兼容性)、音频转 AAC。
|
||||
//
|
||||
// 两种情况都加 +faststart 把 moov 提前,便于边下边播。
|
||||
func buildFFmpegArgs(info MediaInfo, inPath, outPath string) []string {
|
||||
args := []string{"-y", "-i", inPath}
|
||||
videoOK := true
|
||||
for _, codec := range info.VideoCodecs {
|
||||
if !browserCompatibleVideoCodecs[strings.ToLower(codec)] {
|
||||
videoOK = false
|
||||
break
|
||||
}
|
||||
}
|
||||
audioOK := true
|
||||
for _, codec := range info.AudioCodecs {
|
||||
if !browserCompatibleAudioCodecs[strings.ToLower(codec)] {
|
||||
audioOK = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if videoOK {
|
||||
args = append(args, "-c:v", "copy")
|
||||
} else {
|
||||
args = append(args,
|
||||
"-c:v", "libx264",
|
||||
"-preset", "veryfast",
|
||||
"-crf", "23",
|
||||
"-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2",
|
||||
"-pix_fmt", "yuv420p",
|
||||
)
|
||||
}
|
||||
if len(info.AudioCodecs) == 0 {
|
||||
args = append(args, "-an")
|
||||
} else if audioOK {
|
||||
args = append(args, "-c:a", "copy")
|
||||
} else {
|
||||
args = append(args, "-c:a", "aac", "-b:a", "128k")
|
||||
}
|
||||
args = append(args, "-movflags", "+faststart", "-f", "mp4", outPath)
|
||||
return args
|
||||
}
|
||||
|
||||
// TranscodeFile 把本地输入文件转成浏览器可播的 MP4 写到 outPath。
|
||||
func TranscodeFile(ctx context.Context, ffmpegPath string, info MediaInfo, inPath, outPath string) error {
|
||||
args := buildFFmpegArgs(info, inPath, outPath)
|
||||
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("transcode: ffmpeg: %w: %s", err, tailOf(string(out), 400))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func tailOf(s string, n int) string {
|
||||
s = strings.TrimSpace(s)
|
||||
if len(s) <= n {
|
||||
return s
|
||||
}
|
||||
return s[len(s)-n:]
|
||||
}
|
||||
@@ -0,0 +1,125 @@
|
||||
package transcode
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
)
|
||||
|
||||
func TestNeedsTranscode(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
info MediaInfo
|
||||
ext string
|
||||
want bool
|
||||
}{
|
||||
{
|
||||
name: "h264 aac mp4 is compatible",
|
||||
info: MediaInfo{FormatName: "mov,mp4,m4a,3gp,3g2,mj2", VideoCodecs: []string{"h264"}, AudioCodecs: []string{"aac"}},
|
||||
ext: "mp4",
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "mpeg4 in avi needs transcode",
|
||||
info: MediaInfo{FormatName: "avi", VideoCodecs: []string{"mpeg4"}, AudioCodecs: []string{"mp3"}},
|
||||
ext: "avi",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "h264 in avi needs remux",
|
||||
info: MediaInfo{FormatName: "avi", VideoCodecs: []string{"h264"}, AudioCodecs: []string{"aac"}},
|
||||
ext: "avi",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "hevc in mp4 needs transcode",
|
||||
info: MediaInfo{FormatName: "mov,mp4,m4a,3gp,3g2,mj2", VideoCodecs: []string{"hevc"}, AudioCodecs: []string{"aac"}},
|
||||
ext: "mp4",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "vp9 opus webm is compatible",
|
||||
info: MediaInfo{FormatName: "matroska,webm", VideoCodecs: []string{"vp9"}, AudioCodecs: []string{"opus"}},
|
||||
ext: "webm",
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "h264 in mkv is conservative transcode",
|
||||
info: MediaInfo{FormatName: "matroska,webm", VideoCodecs: []string{"h264"}, AudioCodecs: []string{"aac"}},
|
||||
ext: "mkv",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "pcm audio in mov needs transcode",
|
||||
info: MediaInfo{FormatName: "mov,mp4,m4a,3gp,3g2,mj2", VideoCodecs: []string{"h264"}, AudioCodecs: []string{"pcm_s16le"}},
|
||||
ext: "mov",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "video only h264 mp4 is compatible",
|
||||
info: MediaInfo{FormatName: "mov,mp4,m4a,3gp,3g2,mj2", VideoCodecs: []string{"h264"}},
|
||||
ext: "mp4",
|
||||
want: false,
|
||||
},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if got := NeedsTranscode(tc.info, tc.ext); got != tc.want {
|
||||
t.Fatalf("NeedsTranscode(%+v, %q) = %v, want %v", tc.info, tc.ext, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildFFmpegArgsRemuxWhenCodecsCompatible(t *testing.T) {
|
||||
// AVI 里装 H.264+AAC:只需要换容器,应该走流拷贝
|
||||
info := MediaInfo{FormatName: "avi", VideoCodecs: []string{"h264"}, AudioCodecs: []string{"aac"}}
|
||||
args := strings.Join(buildFFmpegArgs(info, "in.avi", "out.mp4"), " ")
|
||||
if !strings.Contains(args, "-c:v copy") {
|
||||
t.Fatalf("expected video stream copy, got: %s", args)
|
||||
}
|
||||
if !strings.Contains(args, "-c:a copy") {
|
||||
t.Fatalf("expected audio stream copy, got: %s", args)
|
||||
}
|
||||
if !strings.Contains(args, "+faststart") {
|
||||
t.Fatalf("expected faststart flag, got: %s", args)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildFFmpegArgsTranscodesIncompatibleCodecs(t *testing.T) {
|
||||
info := MediaInfo{FormatName: "avi", VideoCodecs: []string{"mpeg4"}, AudioCodecs: []string{"wmav2"}}
|
||||
args := strings.Join(buildFFmpegArgs(info, "in.avi", "out.mp4"), " ")
|
||||
if !strings.Contains(args, "-c:v libx264") {
|
||||
t.Fatalf("expected libx264 video encode, got: %s", args)
|
||||
}
|
||||
if !strings.Contains(args, "-c:a aac") {
|
||||
t.Fatalf("expected aac audio encode, got: %s", args)
|
||||
}
|
||||
if !strings.Contains(args, "yuv420p") {
|
||||
t.Fatalf("expected yuv420p pixel format, got: %s", args)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildFFmpegArgsDropsAudioWhenNoAudioStream(t *testing.T) {
|
||||
info := MediaInfo{FormatName: "avi", VideoCodecs: []string{"mpeg4"}}
|
||||
args := strings.Join(buildFFmpegArgs(info, "in.avi", "out.mp4"), " ")
|
||||
if !strings.Contains(args, "-an") {
|
||||
t.Fatalf("expected -an for video without audio, got: %s", args)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTranscodedName(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
fileName, title, id, want string
|
||||
}{
|
||||
{"www.98T.la@167.avi", "www.98T.la@167", "p115-1", "www.98T.la@167.mp4"},
|
||||
{"", "标题", "p115-2", "标题.mp4"},
|
||||
{"a/b\\c.wmv", "", "p115-3", "a_b_c.mp4"},
|
||||
} {
|
||||
v := &catalog.Video{FileName: tc.fileName, Title: tc.title, ID: tc.id}
|
||||
if got := transcodedName(v); got != tc.want {
|
||||
t.Fatalf("transcodedName(%q,%q,%q) = %q, want %q", tc.fileName, tc.title, tc.id, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,308 @@
|
||||
package transcode
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
// DefaultTargetDirName 是转码产物在网盘上的存放目录(相对根目录)。
|
||||
// worker 第一次上传前会 EnsureDir 并把该目录加进 drive 的扫描跳过列表,
|
||||
// 避免 scanner 把转码产物当成新视频重复入库。
|
||||
const DefaultTargetDirName = "91转码"
|
||||
|
||||
type Config struct {
|
||||
FFmpegPath string
|
||||
FFprobePath string
|
||||
// WorkDir 是下载原始文件 / 写转码产物的本地临时目录。
|
||||
WorkDir string
|
||||
// TargetDirName 为空时用 DefaultTargetDirName。
|
||||
TargetDirName string
|
||||
}
|
||||
|
||||
// TaskStatus 与 preview/fingerprint worker 的状态结构对齐,供 admin 展示。
|
||||
type TaskStatus struct {
|
||||
State string
|
||||
CurrentTitle string
|
||||
QueueLength int
|
||||
DoneCount int
|
||||
TotalCount int
|
||||
}
|
||||
|
||||
// Worker 串行处理一个 drive 的转码任务。生命周期与一次"开始转码"对应:
|
||||
// Run 处理完整个候选列表(或 ctx 被取消)后即结束,不常驻。
|
||||
type Worker struct {
|
||||
cfg Config
|
||||
cat *catalog.Catalog
|
||||
drv drives.Drive
|
||||
hc *http.Client
|
||||
|
||||
mu sync.Mutex
|
||||
state string
|
||||
currentTitle string
|
||||
done int
|
||||
total int
|
||||
|
||||
targetDirOnce sync.Once
|
||||
targetDirID string
|
||||
targetDirErr error
|
||||
}
|
||||
|
||||
func NewWorker(cfg Config, cat *catalog.Catalog, drv drives.Drive) *Worker {
|
||||
if cfg.FFmpegPath == "" {
|
||||
cfg.FFmpegPath = "ffmpeg"
|
||||
}
|
||||
if cfg.FFprobePath == "" {
|
||||
cfg.FFprobePath = "ffprobe"
|
||||
}
|
||||
if cfg.TargetDirName == "" {
|
||||
cfg.TargetDirName = DefaultTargetDirName
|
||||
}
|
||||
if cfg.WorkDir == "" {
|
||||
cfg.WorkDir = os.TempDir()
|
||||
}
|
||||
return &Worker{
|
||||
cfg: cfg,
|
||||
cat: cat,
|
||||
drv: drv,
|
||||
hc: &http.Client{Timeout: 0},
|
||||
state: "idle",
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Worker) Status() TaskStatus {
|
||||
w.mu.Lock()
|
||||
defer w.mu.Unlock()
|
||||
queueLen := w.total - w.done
|
||||
if w.state == "generating" && queueLen > 0 {
|
||||
// 正在处理的那条不算"排队中"
|
||||
queueLen--
|
||||
}
|
||||
if queueLen < 0 {
|
||||
queueLen = 0
|
||||
}
|
||||
return TaskStatus{
|
||||
State: w.state,
|
||||
CurrentTitle: w.currentTitle,
|
||||
QueueLength: queueLen,
|
||||
DoneCount: w.done,
|
||||
TotalCount: w.total,
|
||||
}
|
||||
}
|
||||
|
||||
// Run 串行转码整个候选列表。ctx 取消时停在当前条目边界(正在跑的 ffmpeg
|
||||
// 会被 CommandContext 杀掉),未处理的候选保持原状态,下次开始时继续。
|
||||
func (w *Worker) Run(ctx context.Context, videos []*catalog.Video) {
|
||||
w.mu.Lock()
|
||||
w.state = "generating"
|
||||
w.total = len(videos)
|
||||
w.done = 0
|
||||
w.mu.Unlock()
|
||||
|
||||
defer func() {
|
||||
w.mu.Lock()
|
||||
w.state = "idle"
|
||||
w.currentTitle = ""
|
||||
w.mu.Unlock()
|
||||
}()
|
||||
|
||||
for _, v := range videos {
|
||||
if ctx.Err() != nil {
|
||||
log.Printf("[transcode] drive=%s canceled after %d/%d", w.drv.ID(), w.doneCount(), len(videos))
|
||||
return
|
||||
}
|
||||
w.mu.Lock()
|
||||
w.currentTitle = v.Title
|
||||
w.mu.Unlock()
|
||||
|
||||
if err := w.process(ctx, v); err != nil {
|
||||
if ctx.Err() != nil {
|
||||
// 取消导致的失败不要写 failed,保持候选状态便于下次继续
|
||||
log.Printf("[transcode] drive=%s canceled while processing %s", w.drv.ID(), v.ID)
|
||||
return
|
||||
}
|
||||
log.Printf("[transcode] drive=%s video=%s failed: %v", w.drv.ID(), v.ID, err)
|
||||
if uerr := w.cat.UpdateVideoTranscode(context.WithoutCancel(ctx), v.ID, "failed", err.Error(), "", 0); uerr != nil {
|
||||
log.Printf("[transcode] mark failed %s: %v", v.ID, uerr)
|
||||
}
|
||||
}
|
||||
w.mu.Lock()
|
||||
w.done++
|
||||
w.mu.Unlock()
|
||||
}
|
||||
log.Printf("[transcode] drive=%s finished %d videos", w.drv.ID(), len(videos))
|
||||
}
|
||||
|
||||
func (w *Worker) doneCount() int {
|
||||
w.mu.Lock()
|
||||
defer w.mu.Unlock()
|
||||
return w.done
|
||||
}
|
||||
|
||||
func (w *Worker) process(ctx context.Context, v *catalog.Video) error {
|
||||
localPath, cleanup, err := w.fetchSource(ctx, v)
|
||||
if err != nil {
|
||||
return fmt.Errorf("fetch source: %w", err)
|
||||
}
|
||||
defer cleanup()
|
||||
|
||||
info, err := ProbeFile(ctx, w.cfg.FFprobePath, localPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !NeedsTranscode(info, v.Ext) {
|
||||
log.Printf("[transcode] drive=%s video=%s compatible (%s), skip", w.drv.ID(), v.ID, info.FormatName)
|
||||
return w.cat.UpdateVideoTranscode(ctx, v.ID, "skipped", "", "", 0)
|
||||
}
|
||||
|
||||
outPath := filepath.Join(w.cfg.WorkDir, sanitizeFileName(v.ID)+".transcoding.mp4")
|
||||
defer os.Remove(outPath)
|
||||
if err := TranscodeFile(ctx, w.cfg.FFmpegPath, info, localPath, outPath); err != nil {
|
||||
return err
|
||||
}
|
||||
stat, err := os.Stat(outPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("stat transcoded output: %w", err)
|
||||
}
|
||||
|
||||
dirID, err := w.ensureTargetDir(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("ensure target dir: %w", err)
|
||||
}
|
||||
f, err := os.Open(outPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
fileID, err := w.drv.Upload(ctx, dirID, transcodedName(v), f, stat.Size())
|
||||
if err != nil {
|
||||
return fmt.Errorf("upload transcoded file: %w", err)
|
||||
}
|
||||
log.Printf("[transcode] drive=%s video=%s ready: file=%s size=%d", w.drv.ID(), v.ID, fileID, stat.Size())
|
||||
return w.cat.UpdateVideoTranscode(ctx, v.ID, "ready", "", fileID, stat.Size())
|
||||
}
|
||||
|
||||
// fetchSource 把原始文件准备成本地路径。本地存储直接复用源路径(cleanup
|
||||
// 不删除源文件);云盘则整文件下载到 WorkDir。
|
||||
func (w *Worker) fetchSource(ctx context.Context, v *catalog.Video) (string, func(), error) {
|
||||
link, err := w.drv.StreamURL(ctx, v.FileID)
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
u, err := url.Parse(link.URL)
|
||||
if isLocal := err == nil && u.Scheme != "http" && u.Scheme != "https"; isLocal {
|
||||
path := link.URL
|
||||
if err == nil && u.Scheme == "file" {
|
||||
path = u.Path
|
||||
}
|
||||
return path, func() {}, nil
|
||||
}
|
||||
|
||||
tmpPath := filepath.Join(w.cfg.WorkDir, sanitizeFileName(v.ID)+".src.tmp")
|
||||
cleanup := func() { os.Remove(tmpPath) }
|
||||
if err := w.downloadTo(ctx, link, tmpPath); err != nil {
|
||||
cleanup()
|
||||
return "", nil, err
|
||||
}
|
||||
return tmpPath, cleanup, nil
|
||||
}
|
||||
|
||||
func (w *Worker) downloadTo(ctx context.Context, link *drives.StreamLink, dst string) error {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, link.URL, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for k, vals := range link.Headers {
|
||||
for _, val := range vals {
|
||||
req.Header.Add(k, val)
|
||||
}
|
||||
}
|
||||
res, err := w.hc.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode < 200 || res.StatusCode >= 300 {
|
||||
return fmt.Errorf("download source: HTTP %d", res.StatusCode)
|
||||
}
|
||||
f, err := os.Create(dst)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
if _, err := io.Copy(f, res.Body); err != nil {
|
||||
return fmt.Errorf("download source: %w", err)
|
||||
}
|
||||
return f.Sync()
|
||||
}
|
||||
|
||||
// ensureTargetDir 确保网盘上的转码产物目录存在,并把它写进 drive 的扫描
|
||||
// 跳过列表(幂等),避免 scanner 把产物再当新视频收进库。
|
||||
func (w *Worker) ensureTargetDir(ctx context.Context) (string, error) {
|
||||
w.targetDirOnce.Do(func() {
|
||||
dirID, err := w.drv.EnsureDir(ctx, w.cfg.TargetDirName)
|
||||
if err != nil {
|
||||
w.targetDirErr = err
|
||||
return
|
||||
}
|
||||
w.targetDirID = dirID
|
||||
if err := w.addDirToSkipList(ctx, dirID); err != nil {
|
||||
// 跳过列表更新失败不阻塞转码,只记日志(最坏情况是 scanner
|
||||
// 之后把产物扫成新视频,可手动加跳过目录修复)。
|
||||
log.Printf("[transcode] drive=%s add skip dir %s: %v", w.drv.ID(), dirID, err)
|
||||
}
|
||||
})
|
||||
return w.targetDirID, w.targetDirErr
|
||||
}
|
||||
|
||||
func (w *Worker) addDirToSkipList(ctx context.Context, dirID string) error {
|
||||
d, err := w.cat.GetDrive(ctx, w.drv.ID())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, existing := range d.SkipDirIDs {
|
||||
if existing == dirID {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return w.cat.SetDriveSkipDirIDs(ctx, w.drv.ID(), append(d.SkipDirIDs, dirID))
|
||||
}
|
||||
|
||||
// transcodedName 生成产物文件名:原文件名去掉扩展名 + .mp4。
|
||||
func transcodedName(v *catalog.Video) string {
|
||||
base := strings.TrimSpace(v.FileName)
|
||||
if base == "" {
|
||||
base = v.Title
|
||||
}
|
||||
if base == "" {
|
||||
base = v.ID
|
||||
}
|
||||
if ext := filepath.Ext(base); ext != "" {
|
||||
base = strings.TrimSuffix(base, ext)
|
||||
}
|
||||
return sanitizeFileName(base) + ".mp4"
|
||||
}
|
||||
|
||||
// sanitizeFileName 把路径分隔符等危险字符替换掉,避免拼出意外路径。
|
||||
func sanitizeFileName(name string) string {
|
||||
replacer := strings.NewReplacer(
|
||||
"/", "_", "\\", "_", ":", "_", "*", "_", "?", "_",
|
||||
"\"", "_", "<", "_", ">", "_", "|", "_", "\x00", "_",
|
||||
)
|
||||
out := strings.TrimSpace(replacer.Replace(name))
|
||||
if out == "" {
|
||||
out = fmt.Sprintf("transcoded-%d", time.Now().UnixMilli())
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -134,9 +134,9 @@ apt_install() {
|
||||
python3 python3-requests python3-bs4 python3-lxml python3-socks
|
||||
}
|
||||
|
||||
verify_spider91_python_deps() {
|
||||
command -v python3 >/dev/null 2>&1 || die "python3 is required for 91Spider"
|
||||
python3 - <<'PY' || die "missing Python modules for 91Spider: requests, bs4, lxml, socks"
|
||||
verify_crawler_python_deps() {
|
||||
command -v python3 >/dev/null 2>&1 || die "python3 is required for crawler scripts"
|
||||
python3 - <<'PY' || die "missing Python modules for crawler scripts: requests, bs4, lxml, socks"
|
||||
import importlib.util
|
||||
import sys
|
||||
|
||||
@@ -200,7 +200,7 @@ install_dependencies() {
|
||||
install_go
|
||||
command -v ffmpeg >/dev/null 2>&1 || die "ffmpeg is required"
|
||||
command -v ffprobe >/dev/null 2>&1 || die "ffprobe is required"
|
||||
verify_spider91_python_deps
|
||||
verify_crawler_python_deps
|
||||
}
|
||||
|
||||
ensure_ownership() {
|
||||
|
||||
+10
-2
@@ -2,8 +2,16 @@
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<link rel="icon" type="image/svg+xml" href="/favicon.svg" />
|
||||
<meta name="referrer" content="no-referrer" />
|
||||
<link rel="icon" type="image/png" href="/icon.png" />
|
||||
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png" />
|
||||
<link rel="manifest" href="/manifest.webmanifest" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, viewport-fit=cover" />
|
||||
<meta name="mobile-web-app-capable" content="yes" />
|
||||
<meta name="apple-mobile-web-app-capable" content="yes" />
|
||||
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
|
||||
<meta name="apple-mobile-web-app-title" content="91" />
|
||||
<meta name="theme-color" content="#000000" />
|
||||
<meta name="description" content="91 视频站" />
|
||||
<title>91</title>
|
||||
<!-- Premium Fonts Preconnect & Links -->
|
||||
@@ -18,7 +26,7 @@
|
||||
(function () {
|
||||
try {
|
||||
var t = localStorage.getItem("video-site:theme");
|
||||
if (t === "pink" || t === "dark") {
|
||||
if (t === "pink" || t === "dark" || t === "sky") {
|
||||
document.documentElement.setAttribute("data-theme", t);
|
||||
} else {
|
||||
document.documentElement.setAttribute("data-theme", "dark");
|
||||
|
||||
+3
-8
@@ -128,7 +128,7 @@ verify_runtime_deps() {
|
||||
command -v "$cmd" >/dev/null 2>&1 || die "missing command: $cmd"
|
||||
done
|
||||
|
||||
python3 - <<'PY' || die "missing Python modules for 91Spider: requests, bs4, lxml, socks"
|
||||
python3 - <<'PY' || die "missing Python modules for crawler scripts: requests, bs4, lxml, socks"
|
||||
import importlib.util
|
||||
import sys
|
||||
|
||||
@@ -194,7 +194,7 @@ backup_install_files() {
|
||||
local backup="$1"
|
||||
mkdir -p "$backup"
|
||||
cp -a "$INSTALL_PATH/server" "$backup/server"
|
||||
for item in dist config.example.yaml 91VideoSpider config.yaml .version; do
|
||||
for item in dist config.example.yaml config.yaml .version; do
|
||||
if [[ -e "$INSTALL_PATH/$item" ]]; then
|
||||
cp -a "$INSTALL_PATH/$item" "$backup/$item"
|
||||
fi
|
||||
@@ -205,7 +205,7 @@ restore_install_files() {
|
||||
local backup="$1"
|
||||
mkdir -p "$INSTALL_PATH"
|
||||
cp -a "$backup/server" "$INSTALL_PATH/server"
|
||||
for item in dist config.example.yaml 91VideoSpider config.yaml .version; do
|
||||
for item in dist config.example.yaml config.yaml .version; do
|
||||
rm -rf "${INSTALL_PATH:?}/$item"
|
||||
if [[ -e "$backup/$item" ]]; then
|
||||
cp -a "$backup/$item" "$INSTALL_PATH/$item"
|
||||
@@ -441,7 +441,6 @@ process_looks_like_app() {
|
||||
[[ "$cmd" == *"VIDEO_FRONTEND_DIR=$INSTALL_PATH/dist"* ]] && return 0
|
||||
[[ "$cmd" == *"VIDEO_CONFIG=$INSTALL_PATH/config.yaml"* ]] && return 0
|
||||
[[ "$cmd" == *"video-site-91"* ]] && return 0
|
||||
[[ "$cmd" == *"91VideoSpider"* ]] && return 0
|
||||
return 1
|
||||
}
|
||||
|
||||
@@ -595,10 +594,6 @@ fetch_and_unpack() {
|
||||
rm -rf "$INSTALL_PATH/dist"
|
||||
cp -R "$root/dist" "$INSTALL_PATH/dist"
|
||||
cp "$root/config.example.yaml" "$INSTALL_PATH/config.example.yaml"
|
||||
if [[ -d "$root/91VideoSpider" ]]; then
|
||||
rm -rf "$INSTALL_PATH/91VideoSpider"
|
||||
cp -R "$root/91VideoSpider" "$INSTALL_PATH/91VideoSpider"
|
||||
fi
|
||||
chmod +x "$INSTALL_PATH/server"
|
||||
rm -rf "$tmp"
|
||||
}
|
||||
|
||||
Generated
+37
-2
@@ -1,14 +1,16 @@
|
||||
{
|
||||
"name": "video-site",
|
||||
"version": "0.1.0",
|
||||
"version": "0.2.2",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "video-site",
|
||||
"version": "0.1.0",
|
||||
"version": "0.2.2",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"artplayer": "^5.4.0",
|
||||
"hls.js": "^1.6.16",
|
||||
"lucide-react": "0.453.0",
|
||||
"react": "18.3.1",
|
||||
"react-dom": "18.3.1",
|
||||
@@ -475,6 +477,15 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/artplayer": {
|
||||
"version": "5.4.0",
|
||||
"resolved": "https://registry.npmjs.org/artplayer/-/artplayer-5.4.0.tgz",
|
||||
"integrity": "sha512-2B+plbx8N2yNsjK4nJU3+EOG8TULm1LRZk/QPkWRAMEX2Ee/MSnZG/WJYz8kcoZxZuLKcQ3uXifqLuPxZOH29A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"option-validator": "^2.0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/csstype": {
|
||||
"version": "3.2.3",
|
||||
"resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz",
|
||||
@@ -525,12 +536,27 @@
|
||||
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/hls.js": {
|
||||
"version": "1.6.16",
|
||||
"resolved": "https://registry.npmjs.org/hls.js/-/hls.js-1.6.16.tgz",
|
||||
"integrity": "sha512-VSIRpLfRwlAAdGL4wiTucx2ScRipo0ed1FBatWkyt832jC4CReKstga6yIhYVwGu9LOBjuX9wzmRMeQdBJtzEA==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/js-tokens": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
||||
"integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/kind-of": {
|
||||
"version": "6.0.3",
|
||||
"resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.3.tgz",
|
||||
"integrity": "sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/lightningcss": {
|
||||
"version": "1.32.0",
|
||||
"resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.32.0.tgz",
|
||||
@@ -832,6 +858,15 @@
|
||||
"node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/option-validator": {
|
||||
"version": "2.0.6",
|
||||
"resolved": "https://registry.npmjs.org/option-validator/-/option-validator-2.0.6.tgz",
|
||||
"integrity": "sha512-tmZDan2LRIRQyhUGvkff68/O0R8UmF+Btmiiz0SmSw2ng3CfPZB9wJlIjHpe/MKUZqyIZkVIXCrwr1tIN+0Dzg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"kind-of": "^6.0.3"
|
||||
}
|
||||
},
|
||||
"node_modules/picocolors": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
|
||||
|
||||
+3
-1
@@ -2,7 +2,7 @@
|
||||
"name": "video-site",
|
||||
"private": true,
|
||||
"license": "MIT",
|
||||
"version": "0.1.0",
|
||||
"version": "0.2.2",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
@@ -13,6 +13,8 @@
|
||||
"test": "node --import tsx --test tests/*.test.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"artplayer": "^5.4.0",
|
||||
"hls.js": "^1.6.16",
|
||||
"lucide-react": "0.453.0",
|
||||
"react": "18.3.1",
|
||||
"react-dom": "18.3.1",
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 21 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 136 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 114 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 19 KiB |
@@ -1,28 +0,0 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32">
|
||||
<defs>
|
||||
<!-- Background Gradient: Warm Orange to Sakura Pink, representing both themes -->
|
||||
<linearGradient id="bg-grad" x1="0%" y1="0%" x2="100%" y2="100%">
|
||||
<stop offset="0%" stop-color="#FF7E40" />
|
||||
<stop offset="100%" stop-color="#FF4B91" />
|
||||
</linearGradient>
|
||||
|
||||
<!-- Subtle drop shadow for the play button to give it depth -->
|
||||
<filter id="shadow" x="-20%" y="-20%" width="140%" height="140%">
|
||||
<feDropShadow dx="0" dy="1.5" stdDeviation="1" flood-opacity="0.25" />
|
||||
</filter>
|
||||
</defs>
|
||||
|
||||
<!-- Main Squircle Background -->
|
||||
<rect x="2" y="2" width="28" height="28" rx="8" fill="url(#bg-grad)" />
|
||||
|
||||
<!-- Inner border for a premium, glassmorphic feel -->
|
||||
<rect x="3" y="3" width="26" height="26" rx="7" fill="none" stroke="#ffffff" stroke-width="1" opacity="0.2" />
|
||||
|
||||
<!-- Stylized Play Button Icon, perfectly centered with rounded corners and drop shadow -->
|
||||
<path d="M13 10.5 L21.5 16 L13 21.5 Z"
|
||||
fill="#ffffff"
|
||||
stroke="#ffffff"
|
||||
stroke-width="2.5"
|
||||
stroke-linejoin="round"
|
||||
filter="url(#shadow)" />
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 1.1 KiB |
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user