Compare commits
34 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2adaac3d7d | |||
| ee8af315b0 | |||
| 6884473dbf | |||
| f0458f7043 | |||
| e32da9016b | |||
| 2427f58165 | |||
| 00aaeed736 | |||
| 5efbceb205 | |||
| 0faeaf408f | |||
| 1b5eda92b0 | |||
| 840a858dbd | |||
| 1ee5ee35be | |||
| 12b737b6fe | |||
| bd33d26a1f | |||
| 36fe32cb84 | |||
| 194d98895a | |||
| 2437fbd779 | |||
| 4dd66b8120 | |||
| 30b736cf36 | |||
| 57391e0e98 | |||
| 052e142520 | |||
| f9351324c6 | |||
| bb83277d62 | |||
| aa856db1f6 | |||
| 7e5e67697e | |||
| 9cc8e02bec | |||
| 139e63eef2 | |||
| b8388eba59 | |||
| 76782f3801 | |||
| 1ae1408fb6 | |||
| 738406162a | |||
| 0f111b846d | |||
| 4dd9015bd7 | |||
| 84fbb6f51c |
@@ -30,13 +30,17 @@ tmp/
|
||||
|
||||
# 91 爬虫脚本独立运行时的默认输出文件(backend 跑时会显式 --output 到 backend/data/spider91/,所以不会落在这里)
|
||||
91porn_videos.json
|
||||
91VideoSpider/91porn_videos.json
|
||||
91VideoSpider/data/
|
||||
91VideoSpider/__pycache__/
|
||||
__pycache__/
|
||||
*.pyc
|
||||
|
||||
# Local scratch images
|
||||
/*.png
|
||||
/*.jpg
|
||||
/*.jpeg
|
||||
/*.gif
|
||||
/*.webp
|
||||
/*.bmp
|
||||
/*.ico
|
||||
/image.jpg
|
||||
/image003.jpg
|
||||
/image004.jpg
|
||||
|
||||
@@ -1,988 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
91porn 视频爬虫脚本
|
||||
===================
|
||||
爬取 https://www.91porn.com/v.php?category=top&viewtype=basic 下的所有视频信息:
|
||||
- 视频名称
|
||||
- 封面图直链
|
||||
- 视频直链 (MP4)
|
||||
|
||||
依赖安装:
|
||||
pip install requests beautifulsoup4 lxml PySocks
|
||||
|
||||
使用方法:
|
||||
# 作为 video-site-91 通用爬虫脚本运行(后台会自动这样调用)
|
||||
python spider_91porn.py --job /path/to/job.json
|
||||
|
||||
# 全量爬取(默认行为,从 page=1 一直爬到末尾,写到 OUTPUT_FILE)
|
||||
python spider_91porn.py
|
||||
|
||||
# 只爬指定页(单页模式,手动调试用)
|
||||
python spider_91porn.py --page 1 --output /tmp/spider91_page1.json
|
||||
|
||||
# 凑够 N 个新视频模式(backend 凌晨任务用)
|
||||
python spider_91porn.py --target-new 15 --seen-viewkeys-file /tmp/seen.txt --output /tmp/new.json
|
||||
|
||||
CLI 参数:
|
||||
--job FILE crawler.v1 job JSON 路径;后台爬虫管理会使用此模式
|
||||
--page N 只爬第 N 页,配合 --output 用于手动调试
|
||||
--target-new N 从 page 1 起翻页直到凑够 N 个新视频(不在 seen 列表里的)
|
||||
--seen-viewkeys-file FILE 每行一个已知 viewkey 或 mp4 源 ID,命中即跳过;与 --target-new 配合使用
|
||||
--output FILE 输出 JSON 路径,覆盖默认的 OUTPUT_FILE
|
||||
--no-resume 禁用断点续爬(单页/target-new 模式下自动禁用)
|
||||
--quiet 压缩日志,每条视频只输出一行
|
||||
-h / --help 帮助
|
||||
|
||||
配置说明 (编辑脚本内 "配置区域"):
|
||||
- MIN_PAGE_DELAY / MAX_PAGE_DELAY : 列表页请求间隔 (默认 3-6 秒)
|
||||
- MIN_DETAIL_DELAY / MAX_DETAIL_DELAY : 详情页请求间隔 (默认 2-5 秒)
|
||||
- MAX_PAGES : 限制最大爬取页数 (None=不限, 如 5=只爬前5页)
|
||||
- OUTPUT_FILE : 输出文件名
|
||||
|
||||
输出格式 (JSON):
|
||||
--job 模式下 stdout 输出 crawler.v1 JSON Lines,日志全部写到 stderr。
|
||||
手动运行模式仍会写传统 JSON 文件:
|
||||
{
|
||||
"videos": [
|
||||
{
|
||||
"title": "视频标题",
|
||||
"thumb_url": "https://...thumb/xxxx.jpg",
|
||||
"video_url": "https://...mp43/xxxx.mp4?st=...",
|
||||
"viewkey": "abc123...",
|
||||
"source_id": "xxxx",
|
||||
"detail_url": "https://...view_video.php?viewkey=..."
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
|
||||
注意:
|
||||
1. 视频直链包含时效性token (e参数为过期时间戳),会过期,需定期重新爬取
|
||||
2. 脚本已内置随机延时,请勿移除,避免对服务器造成压力
|
||||
3. 网站有Cloudflare保护,如遇到403/5xx错误,可能需要使用带cookie的session
|
||||
4. 本脚本仅供学习交流,请遵守当地法律法规
|
||||
|
||||
作者: OpenCode
|
||||
日期: 2026-05-22
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import requests
|
||||
import re
|
||||
import time
|
||||
import random
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import sys
|
||||
import html
|
||||
from urllib.parse import urljoin, unquote, urlparse
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
print("错误: 缺少依赖库 beautifulsoup4", file=sys.stderr)
|
||||
print("请运行: pip install beautifulsoup4 lxml", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def prefer_ipv4_for_plain_socks5_proxy():
|
||||
"""PySocks may pick IPv6 first for socks5://; some SOCKS5 servers only accept IPv4."""
|
||||
proxy_envs = (
|
||||
os.environ.get("HTTPS_PROXY", ""),
|
||||
os.environ.get("HTTP_PROXY", ""),
|
||||
os.environ.get("https_proxy", ""),
|
||||
os.environ.get("http_proxy", ""),
|
||||
)
|
||||
uses_plain_socks5 = any(v.strip().lower().startswith("socks5://") for v in proxy_envs)
|
||||
if not uses_plain_socks5 or getattr(socket, "_spider91_ipv4_first", False):
|
||||
return
|
||||
|
||||
original_getaddrinfo = socket.getaddrinfo
|
||||
|
||||
def getaddrinfo_ipv4_first(*args, **kwargs):
|
||||
infos = original_getaddrinfo(*args, **kwargs)
|
||||
return sorted(infos, key=lambda info: 0 if info[0] == socket.AF_INET else 1)
|
||||
|
||||
socket.getaddrinfo = getaddrinfo_ipv4_first
|
||||
socket._spider91_ipv4_first = True
|
||||
|
||||
# ===================== 配置区域 =====================
|
||||
BASE_URL = "https://www.91porn.com/v.php"
|
||||
LIST_PARAMS = {
|
||||
"category": "top",
|
||||
"viewtype": "basic"
|
||||
}
|
||||
|
||||
# 请求头 (模拟真实浏览器)
|
||||
HEADERS = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/125.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": (
|
||||
"text/html,application/xhtml+xml,application/xml;"
|
||||
"q=0.9,image/avif,image/webp,image/apng,*/*;"
|
||||
"q=0.8,application/signed-exchange;v=b3;q=0.7"
|
||||
),
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
# 注意: 不要包含 "br" (brotli),除非安装了 brotli 库
|
||||
# "Accept-Encoding": "gzip, deflate, br",
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "none",
|
||||
"Sec-Fetch-User": "?1",
|
||||
}
|
||||
|
||||
# 延时配置 (秒) - 控制爬取频率,避免被封
|
||||
MIN_PAGE_DELAY = 3.0 # 列表页之间最小延时
|
||||
MAX_PAGE_DELAY = 6.0 # 列表页之间最大延时
|
||||
MIN_DETAIL_DELAY = 2.0 # 详情页之间最小延时
|
||||
MAX_DETAIL_DELAY = 5.0 # 详情页之间最大延时
|
||||
|
||||
# 重试配置
|
||||
MAX_RETRIES = 3
|
||||
RETRY_DELAY = 5.0
|
||||
|
||||
# 输出配置
|
||||
OUTPUT_FILE = "91porn_videos.json"
|
||||
MAX_PAGES = None # 设置为 None 爬取所有页,或设置整数如 5 只爬前5页
|
||||
RESUME = True # 是否跳过输出文件中已存在的 viewkey (断点续爬)
|
||||
MAX_EMPTY_PAGES = 2 # 连续空页数达到此值时停止爬取
|
||||
CRAWLER_NAME = "91Porn"
|
||||
CRAWLER_PROTOCOL = "crawler.v1"
|
||||
# ===================================================
|
||||
|
||||
|
||||
def crawler_source_id(raw: str) -> str:
|
||||
"""Return a backend-safe source_id, preserving existing numeric 91 IDs."""
|
||||
value = str(raw or "").strip()
|
||||
if not value:
|
||||
return ""
|
||||
safe = re.sub(r"[^A-Za-z0-9_.-]+", "_", value).strip("._-")
|
||||
return safe[:160]
|
||||
|
||||
|
||||
def write_jsonl(event: dict):
|
||||
print(json.dumps(event, ensure_ascii=False), flush=True)
|
||||
|
||||
|
||||
class Porn91Spider:
|
||||
def __init__(
|
||||
self,
|
||||
output_file: str = None,
|
||||
start_page: int = 1,
|
||||
max_pages: int = None,
|
||||
resume: bool = None,
|
||||
max_empty_pages: int = None,
|
||||
quiet: bool = False,
|
||||
target_new: int = None,
|
||||
seen_viewkeys: list = None,
|
||||
stream_output: bool = False,
|
||||
stream_protocol: str = "legacy",
|
||||
):
|
||||
"""
|
||||
构造函数。所有参数都有默认值,等同于使用脚本顶部的全局配置。
|
||||
backend 调用时会传 output_file/seen_viewkeys/target_new,等价于:
|
||||
"从第 1 页开始爬,跳过 seen_viewkeys 里的视频,凑够 target_new 个新视频后停止"
|
||||
|
||||
stream_output=True 时(backend 流水线用):
|
||||
- 每凑齐一个 video 直链就把该 entry 作为一行 JSON 写到 stdout 并 flush,
|
||||
便于上层(Go crawler)边读边下载,不再等所有详情页处理完。
|
||||
- 所有日志改走 stderr,避免与 stdout JSONL 流混合。
|
||||
- --output 仍生效,作为离线归档用(脚本退出时一次性写完整 JSON)。
|
||||
"""
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(HEADERS)
|
||||
# 91porn 没有固定 mode cookie 时,详情页首次请求可能返回与列表卡片
|
||||
# 不一致的视频源;固定桌面模式让列表页和详情页解析保持一致。
|
||||
self.session.cookies.set("mode", "d")
|
||||
|
||||
# 解析后的实际配置;优先使用构造参数,回退到模块级配置
|
||||
self.output_file = output_file if output_file is not None else OUTPUT_FILE
|
||||
self.start_page = max(1, int(start_page or 1))
|
||||
# max_pages=None 表示不限制;max_pages=N 表示从 start_page 起爬 N 页
|
||||
self.max_pages = max_pages if max_pages is None or max_pages > 0 else None
|
||||
# resume 默认跟模块配置;单页模式下调用方应该显式传 False
|
||||
self.resume = RESUME if resume is None else bool(resume)
|
||||
self.max_empty_pages = (
|
||||
MAX_EMPTY_PAGES if max_empty_pages is None else int(max_empty_pages)
|
||||
)
|
||||
# target_new 是 backend 触发时的核心模式:累计处理这么多新源视频后退出。
|
||||
self.target_new = target_new if target_new and target_new > 0 else None
|
||||
self.quiet = bool(quiet)
|
||||
# stream_output:每解析出一个 video 直链立即输出一行 JSON 到 stdout
|
||||
# (配合 backend Go 端 bufio.Scanner 实时消费,下载一个就开始下一个)。
|
||||
# 开启后所有 log 都走 stderr。
|
||||
self.stream_output = bool(stream_output)
|
||||
self.stream_protocol = stream_protocol or "legacy"
|
||||
|
||||
# 添加重试适配器
|
||||
try:
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
retry_strategy = Retry(
|
||||
total=MAX_RETRIES,
|
||||
backoff_factor=1,
|
||||
status_forcelist=[429, 500, 502, 503, 504],
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry_strategy)
|
||||
self.session.mount("https://", adapter)
|
||||
self.session.mount("http://", adapter)
|
||||
except ImportError:
|
||||
pass # urllib3 版本可能较低
|
||||
|
||||
self.results = []
|
||||
self.pages_crawled = 0
|
||||
self.processed_videos = 0
|
||||
self.skipped_videos = 0
|
||||
self.failed_videos = 0
|
||||
self.skip_viewkeys = set()
|
||||
|
||||
# backend 通过 --seen-viewkeys-file 传进来一批已入库的历史 ID。
|
||||
# 兼容旧名:文件里可能是 viewkey,也可能是新逻辑使用的 mp4 源 ID。
|
||||
if seen_viewkeys:
|
||||
for vk in seen_viewkeys:
|
||||
if not vk:
|
||||
continue
|
||||
vk = vk.strip()
|
||||
if vk:
|
||||
self.skip_viewkeys.add(vk)
|
||||
|
||||
# 断点续爬:加载已有结果,跳过已处理的 viewkey
|
||||
if self.resume and os.path.exists(self.output_file):
|
||||
try:
|
||||
with open(self.output_file, 'r', encoding='utf-8') as f:
|
||||
existing_data = json.load(f)
|
||||
existing_videos = existing_data.get('videos', [])
|
||||
self.results = existing_videos
|
||||
for v in existing_videos:
|
||||
vk = v.get('viewkey', '')
|
||||
if vk:
|
||||
self.skip_viewkeys.add(vk)
|
||||
self.processed_videos = existing_data.get('successful', 0)
|
||||
self.failed_videos = existing_data.get('failed', 0)
|
||||
self.log(f"加载已有数据: {len(self.results)} 个视频, 将跳过已处理项")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def log(self, message: str):
|
||||
"""带时间戳的日志输出。stream_output 模式下走 stderr,避免污染 stdout JSONL。"""
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
line = f"[{timestamp}] {message}"
|
||||
if self.stream_output:
|
||||
print(line, file=sys.stderr, flush=True)
|
||||
else:
|
||||
print(line)
|
||||
|
||||
def emit_stream_video(self, video: dict):
|
||||
"""stream_output 模式下把单条 video entry 作为一行 JSON 写到 stdout 并立即刷盘。
|
||||
Go 端 bufio.Scanner 按行读取,每收到一行就立即下载视频和封面。"""
|
||||
if not self.stream_output:
|
||||
return
|
||||
try:
|
||||
if self.stream_protocol == "crawler.v1":
|
||||
source_id = crawler_source_id(video.get("source_id") or video.get("viewkey") or "")
|
||||
item = {
|
||||
"title": video.get("title") or "",
|
||||
"detail_url": video.get("detail_url") or "",
|
||||
"author": "91porn",
|
||||
"tags": ["91porn"],
|
||||
"media_url": video.get("video_url") or "",
|
||||
"thumbnail_url": video.get("thumb_url") or "",
|
||||
"headers": {
|
||||
"Referer": video.get("detail_url") or BASE_URL,
|
||||
},
|
||||
}
|
||||
if source_id:
|
||||
item["source_id"] = source_id
|
||||
event = {
|
||||
"type": "item",
|
||||
"item": item,
|
||||
}
|
||||
write_jsonl(event)
|
||||
else:
|
||||
print(json.dumps(video, ensure_ascii=False), flush=True)
|
||||
except Exception as e:
|
||||
# stdout 异常基本只在管道断开时发生(消费方进程死了);
|
||||
# 写到 stderr 让 backend 看到,然后让 crawl 循环自己 break。
|
||||
print(f"[stream] emit failed: {e}", file=sys.stderr, flush=True)
|
||||
|
||||
def random_sleep(self, min_sec: float, max_sec: float):
|
||||
"""随机延时,模拟人类行为"""
|
||||
delay = random.uniform(min_sec, max_sec)
|
||||
if not self.quiet:
|
||||
self.log(f" 随机延时 {delay:.2f} 秒...")
|
||||
time.sleep(delay)
|
||||
|
||||
def fetch_page(self, url: str, description: str = "", referer: str = "") -> str:
|
||||
"""
|
||||
获取页面HTML内容,带错误处理和重试
|
||||
"""
|
||||
headers_extra = {}
|
||||
if referer:
|
||||
headers_extra["Referer"] = referer
|
||||
|
||||
for attempt in range(1, MAX_RETRIES + 1):
|
||||
try:
|
||||
self.log(f"正在请求: {description or url} (尝试 {attempt}/{MAX_RETRIES})")
|
||||
response = self.session.get(url, timeout=30, headers=headers_extra)
|
||||
|
||||
# 检查是否被Cloudflare拦截 (需在 raise_for_status 之前)
|
||||
if response.status_code == 403:
|
||||
self.log("警告: 收到 403 Forbidden,可能被拦截")
|
||||
if attempt < MAX_RETRIES:
|
||||
self.random_sleep(RETRY_DELAY, RETRY_DELAY + 3)
|
||||
continue
|
||||
return ""
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
# 优先使用 content.decode('utf-8'),避免 requests 编码检测问题
|
||||
try:
|
||||
html_content = response.content.decode('utf-8', errors='replace')
|
||||
except Exception:
|
||||
html_content = response.text
|
||||
|
||||
# Cloudflare 挑战检测:如果页面主要内容只有挑战页面,而非正常内容
|
||||
# 注意:网站本身会加载 challenge-platform 脚本,所以不能仅凭此判断
|
||||
is_cf_challenge = (
|
||||
"Just a moment" in html_content and
|
||||
len(html_content) < 8000
|
||||
)
|
||||
if is_cf_challenge:
|
||||
self.log("警告: 页面被Cloudflare挑战拦截,需要浏览器环境或正确cookie")
|
||||
if attempt < MAX_RETRIES:
|
||||
self.random_sleep(RETRY_DELAY, RETRY_DELAY + 5)
|
||||
continue
|
||||
return ""
|
||||
|
||||
return html_content
|
||||
except requests.exceptions.HTTPError as e:
|
||||
self.log(f"HTTP错误: {e}")
|
||||
if attempt < MAX_RETRIES:
|
||||
self.random_sleep(RETRY_DELAY, RETRY_DELAY + 3)
|
||||
else:
|
||||
return ""
|
||||
except requests.exceptions.RequestException as e:
|
||||
self.log(f"请求失败: {e}")
|
||||
if attempt < MAX_RETRIES:
|
||||
self.random_sleep(RETRY_DELAY, RETRY_DELAY + 3)
|
||||
else:
|
||||
self.log(f"达到最大重试次数,放弃: {url}")
|
||||
return ""
|
||||
return ""
|
||||
|
||||
def parse_list_page(self, html: str) -> list:
|
||||
"""
|
||||
解析列表页,提取视频基本信息
|
||||
返回: [{title, detail_url, thumb_url, viewkey}, ...]
|
||||
"""
|
||||
videos = []
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
|
||||
# 只解析正常视频卡片。页面中还混有 col-lg-8 的异常大卡片,里面的标题、
|
||||
# thumb、detail URL 会串到其它视频,不能作为入库来源。
|
||||
video_cards = soup.select('div.col-xs-12.col-sm-4.col-md-3.col-lg-3')
|
||||
|
||||
seen_cards = set()
|
||||
|
||||
for card in video_cards:
|
||||
link = card.find('a', href=re.compile(r'view_video\.php\?viewkey='))
|
||||
if not link:
|
||||
continue
|
||||
href = link.get('href', '')
|
||||
if not href:
|
||||
continue
|
||||
|
||||
# 提取 viewkey
|
||||
match = re.search(r'viewkey=([^&]+)', href)
|
||||
if not match:
|
||||
continue
|
||||
viewkey = match.group(1)
|
||||
|
||||
detail_url = urljoin(BASE_URL, href)
|
||||
|
||||
# 提取标题
|
||||
title = self._extract_title(link)
|
||||
|
||||
# 提取列表卡片来源 ID 和封面图 URL
|
||||
thumb_url = ""
|
||||
source_id = ""
|
||||
overlay = link.find(id=re.compile(r'^playvthumb_\d+$'))
|
||||
if overlay:
|
||||
source_id = overlay.get('id', '').rsplit('_', 1)[-1]
|
||||
img = link.find('img', class_=re.compile(r'img-responsive'))
|
||||
if img:
|
||||
thumb_url = img.get('src', '') or img.get('data-original', '')
|
||||
if thumb_url:
|
||||
thumb_url = urljoin(BASE_URL, thumb_url)
|
||||
if not source_id and thumb_url:
|
||||
source_id = self._extract_thumb_source_id(thumb_url)
|
||||
|
||||
card_key = source_id or detail_url
|
||||
if card_key in seen_cards:
|
||||
continue
|
||||
seen_cards.add(card_key)
|
||||
|
||||
videos.append({
|
||||
"title": title,
|
||||
"detail_url": detail_url,
|
||||
"thumb_url": thumb_url,
|
||||
"viewkey": viewkey,
|
||||
"source_id": source_id
|
||||
})
|
||||
|
||||
return videos
|
||||
|
||||
def _extract_title(self, link) -> str:
|
||||
"""
|
||||
从视频链接标签中提取并清理标题
|
||||
"""
|
||||
# 优先从 span.video-title 获取 (已渲染的干净标题)
|
||||
title_el = link.find('span', class_=re.compile(r'video-title'))
|
||||
if title_el:
|
||||
title = title_el.get_text(strip=True)
|
||||
if title:
|
||||
return html.unescape(title)
|
||||
|
||||
# 备用: 从 link 的 title 属性提取
|
||||
title = link.get('title', '').strip()
|
||||
if title:
|
||||
return html.unescape(title)
|
||||
|
||||
# 最后手段: 从链接文本提取并清理前缀
|
||||
text = link.get_text(separator=' ', strip=True)
|
||||
# 去掉前缀: "HD" / "91" / 时间戳 "HH:MM:SS"
|
||||
text = re.sub(r'^(HD\s+|91\s+)?\d{2}:\d{2}:\d{2}\s*', '', text)
|
||||
text = re.sub(r'\s+', ' ', text).strip()
|
||||
return html.unescape(text)[:120]
|
||||
|
||||
def parse_detail_page(self, html: str) -> dict:
|
||||
"""
|
||||
解析详情页,提取视频直链
|
||||
返回: {"video_url": "...", "source_id": "...", "title": "..."} 或空字典
|
||||
"""
|
||||
result = {}
|
||||
|
||||
if not html:
|
||||
return result
|
||||
|
||||
title = self._extract_detail_title(html)
|
||||
if title:
|
||||
result["title"] = title
|
||||
|
||||
# 方法1: 解码 strencode2 (主要方式, 页面通过 document.write 动态写入 video 标签)
|
||||
# 格式: document.write(strencode2("%3c%73%6f..."));
|
||||
strencode_match = re.search(r'strencode2\(["\']([^"\']+)["\']\)', html)
|
||||
if strencode_match:
|
||||
encoded = strencode_match.group(1)
|
||||
try:
|
||||
# strencode2 在JS中等价于 unescape / decodeURIComponent
|
||||
decoded = unquote(encoded)
|
||||
|
||||
# 从解码后的 HTML 片段中提取 src
|
||||
src_match = re.search(r"src=['\"]([^'\"]+)['\"]", decoded)
|
||||
if src_match:
|
||||
video_url = src_match.group(1)
|
||||
# 规范化双斜杠 (如 https://host//path -> https://host/path)
|
||||
video_url = re.sub(r'(https?://[^/]+)//+', r'\1/', video_url)
|
||||
result["video_url"] = video_url
|
||||
result["source_id"] = self._extract_source_id(video_url)
|
||||
return result
|
||||
except Exception as e:
|
||||
self.log(f" 解码 strencode2 失败: {e}")
|
||||
|
||||
# 方法2: 通用正则匹配页面中的 mp4 链接 (备用, 过滤广告)
|
||||
mp4_match = re.search(
|
||||
r'https?://[^\s"\'<>]+\.mp4[^\s"\'<>]*',
|
||||
html
|
||||
)
|
||||
if mp4_match:
|
||||
url = mp4_match.group(0)
|
||||
if 'kwai' not in url and 'ad-' not in url.lower():
|
||||
result["video_url"] = url
|
||||
result["source_id"] = self._extract_source_id(url)
|
||||
return result
|
||||
|
||||
return result
|
||||
|
||||
def _extract_detail_title(self, html_text: str) -> str:
|
||||
soup = BeautifulSoup(html_text, 'lxml')
|
||||
title_el = soup.find('title')
|
||||
if not title_el:
|
||||
return ""
|
||||
title = title_el.get_text(" ", strip=True)
|
||||
title = re.sub(r'\s*-\s*91porn.*$', '', title, flags=re.IGNORECASE).strip()
|
||||
return html.unescape(title)[:160]
|
||||
|
||||
def _extract_source_id(self, video_url: str) -> str:
|
||||
path = urlparse(video_url or "").path
|
||||
name = os.path.basename(path)
|
||||
stem, ext = os.path.splitext(name)
|
||||
if ext.lower() not in {".mp4", ".m4v", ".mov", ".webm", ".mkv", ".avi"}:
|
||||
return ""
|
||||
source_id = re.sub(r'[^0-9]+', '', stem)
|
||||
if not source_id or source_id != stem:
|
||||
return ""
|
||||
return source_id
|
||||
|
||||
def _extract_thumb_source_id(self, thumb_url: str) -> str:
|
||||
path = urlparse(thumb_url or "").path
|
||||
match = re.search(r'/thumb/(\d+)\.[A-Za-z0-9]+$', path)
|
||||
return match.group(1) if match else ""
|
||||
|
||||
def _thumb_url_for_source(self, thumb_url: str, source_id: str) -> str:
|
||||
if not thumb_url or not source_id:
|
||||
return thumb_url
|
||||
parsed = urlparse(thumb_url)
|
||||
match = re.search(r'/thumb/([^/?#]+)\.[A-Za-z0-9]+$', parsed.path)
|
||||
if not match:
|
||||
return thumb_url
|
||||
current = match.group(1)
|
||||
if current == source_id:
|
||||
return thumb_url
|
||||
path = re.sub(
|
||||
r'/thumb/[^/?#]+\.[A-Za-z0-9]+$',
|
||||
f'/thumb/{source_id}.jpg',
|
||||
parsed.path,
|
||||
)
|
||||
return parsed._replace(path=path, query="", fragment="").geturl()
|
||||
|
||||
def crawl(self):
|
||||
"""
|
||||
主爬取流程。停止条件(任一满足即停):
|
||||
- 达到 max_pages 配置
|
||||
- 连续 max_empty_pages 页都没有视频
|
||||
- target_new 模式下,已经累计处理 target_new 个新视频
|
||||
"""
|
||||
self.log("=" * 60)
|
||||
self.log("91porn 视频爬虫启动")
|
||||
self.log("=" * 60)
|
||||
self.log(f"配置: 列表页延时 {MIN_PAGE_DELAY}-{MAX_PAGE_DELAY}s, 详情页延时 {MIN_DETAIL_DELAY}-{MAX_DETAIL_DELAY}s")
|
||||
self.log(f"配置: 最大重试 {MAX_RETRIES} 次, 连续空页上限 {self.max_empty_pages}")
|
||||
self.log(f"配置: 起始页 {self.start_page}, 最大爬取页数 {self.max_pages if self.max_pages else '不限'}")
|
||||
if self.target_new:
|
||||
self.log(f"配置: 目标新增视频数 {self.target_new}")
|
||||
self.log(f"配置: 输出文件 {os.path.abspath(self.output_file)}")
|
||||
if self.skip_viewkeys:
|
||||
self.log(f"配置: 已跳过 {len(self.skip_viewkeys)} 个已知 viewkey")
|
||||
self.log("")
|
||||
|
||||
page_num = self.start_page
|
||||
consecutive_empty = 0
|
||||
crawled_in_session = 0
|
||||
|
||||
while True:
|
||||
if self.max_pages is not None and crawled_in_session >= self.max_pages:
|
||||
self.log(f"达到配置的页数上限 {self.max_pages},停止")
|
||||
break
|
||||
if consecutive_empty >= self.max_empty_pages:
|
||||
self.log(f"连续 {self.max_empty_pages} 页无结果,已达到末尾")
|
||||
break
|
||||
if self.target_new is not None and self.processed_videos >= self.target_new:
|
||||
self.log(f"已累计 {self.processed_videos} 个新视频,达到目标 {self.target_new},停止")
|
||||
break
|
||||
|
||||
if page_num == 1:
|
||||
page_url = f"{BASE_URL}?category=top&viewtype=basic"
|
||||
else:
|
||||
page_url = f"{BASE_URL}?category=top&viewtype=basic&page={page_num}"
|
||||
|
||||
if crawled_in_session > 0:
|
||||
self.log("")
|
||||
self.random_sleep(MIN_PAGE_DELAY, MAX_PAGE_DELAY)
|
||||
|
||||
self.log(f"[页 {page_num}] 请求: {page_url}")
|
||||
page_html = self.fetch_page(page_url, f"列表页 第{page_num}页")
|
||||
|
||||
if not page_html:
|
||||
self.log(f"[页 {page_num}] 获取失败,跳过")
|
||||
consecutive_empty += 1
|
||||
page_num += 1
|
||||
crawled_in_session += 1
|
||||
continue
|
||||
|
||||
page_videos = self.parse_list_page(page_html)
|
||||
|
||||
# 判断页面是否真的没有视频(而非全部已处理)
|
||||
if not page_videos:
|
||||
self.log(f"[页 {page_num}] 页面无视频,可能已到末尾")
|
||||
consecutive_empty += 1
|
||||
page_num += 1
|
||||
crawled_in_session += 1
|
||||
continue
|
||||
|
||||
consecutive_empty = 0
|
||||
|
||||
# 过滤已处理的 viewkey,只保留新视频
|
||||
new_videos = [v for v in page_videos if v['viewkey'] not in self.skip_viewkeys]
|
||||
skipped_on_page = len(page_videos) - len(new_videos)
|
||||
|
||||
if skipped_on_page > 0:
|
||||
self.log(f"[页 {page_num}] 发现 {len(page_videos)} 个链接, 其中 {skipped_on_page} 个已处理, {len(new_videos)} 个新视频")
|
||||
else:
|
||||
self.log(f"[页 {page_num}] 发现 {len(new_videos)} 个视频")
|
||||
|
||||
if new_videos:
|
||||
self._process_video_list(new_videos, referer=page_url)
|
||||
self.pages_crawled += 1
|
||||
page_num += 1
|
||||
crawled_in_session += 1
|
||||
|
||||
self._save_results()
|
||||
self._print_summary()
|
||||
|
||||
def _process_video_list(self, videos: list, referer: str = ""):
|
||||
"""
|
||||
处理一批视频列表,逐个获取详情页
|
||||
"""
|
||||
for idx, video in enumerate(videos, 1):
|
||||
# target_new 模式下,凑够后立即停止,不再请求详情页
|
||||
if self.target_new is not None and self.processed_videos >= self.target_new:
|
||||
return
|
||||
# 跳过已处理的 viewkey (断点续爬)
|
||||
if video['viewkey'] in self.skip_viewkeys:
|
||||
self.log(f" [SKIP] 已处理过: {video['viewkey']}")
|
||||
self.skipped_videos += 1
|
||||
continue
|
||||
|
||||
self.log(f" 处理视频 {idx}/{len(videos)}: {video['title'][:40]}...")
|
||||
|
||||
# 延时控制 (同一批次内第一个视频不延时)
|
||||
if idx > 1:
|
||||
self.random_sleep(MIN_DETAIL_DELAY, MAX_DETAIL_DELAY)
|
||||
|
||||
# 获取详情页
|
||||
detail_html = self.fetch_page(video['detail_url'], f"详情页 viewkey={video['viewkey']}", referer=referer)
|
||||
|
||||
if not detail_html:
|
||||
self.log(f" [FAIL] 详情页获取失败: {video['viewkey']}")
|
||||
video["video_url"] = ""
|
||||
self.results.append(video)
|
||||
self.skip_viewkeys.add(video['viewkey'])
|
||||
self.failed_videos += 1
|
||||
continue
|
||||
|
||||
# 解析视频直链
|
||||
detail_info = self.parse_detail_page(detail_html)
|
||||
|
||||
if detail_info.get("video_url"):
|
||||
video["video_url"] = detail_info["video_url"]
|
||||
if detail_info.get("title"):
|
||||
video["title"] = detail_info["title"]
|
||||
list_source_id = video.get("source_id", "")
|
||||
detail_source_id = detail_info.get("source_id", "")
|
||||
if list_source_id and detail_source_id and list_source_id != detail_source_id:
|
||||
self.log(
|
||||
f" [FAIL] 详情页视频源不匹配: list_source_id={list_source_id} "
|
||||
f"detail_source_id={detail_source_id} viewkey={video['viewkey']}"
|
||||
)
|
||||
self.failed_videos += 1
|
||||
self.skip_viewkeys.add(video['viewkey'])
|
||||
continue
|
||||
if not list_source_id and detail_source_id:
|
||||
video["source_id"] = detail_source_id
|
||||
if video.get("source_id"):
|
||||
video["thumb_url"] = self._thumb_url_for_source(
|
||||
video.get("thumb_url", ""),
|
||||
video["source_id"],
|
||||
)
|
||||
if video["source_id"] in self.skip_viewkeys:
|
||||
self.log(f" [SKIP] 已处理过 source_id: {video['source_id']}")
|
||||
self.skipped_videos += 1
|
||||
continue
|
||||
self.results.append(video)
|
||||
self.skip_viewkeys.add(video['viewkey'])
|
||||
if video.get("source_id"):
|
||||
self.skip_viewkeys.add(video["source_id"])
|
||||
self.processed_videos += 1
|
||||
self.log(f" [OK] 成功提取视频直链")
|
||||
# 流式:立刻把这条 entry 交给 Go 端开始下载,不等本批余下视频
|
||||
self.emit_stream_video(video)
|
||||
else:
|
||||
self.log(f" [FAIL] 未找到视频直链: {video['viewkey']}")
|
||||
video["video_url"] = ""
|
||||
self.results.append(video)
|
||||
self.skip_viewkeys.add(video['viewkey'])
|
||||
self.failed_videos += 1
|
||||
|
||||
def _save_results(self):
|
||||
"""
|
||||
保存结果到JSON文件
|
||||
"""
|
||||
output_data = {
|
||||
"crawl_time": datetime.now().isoformat(),
|
||||
"source_url": BASE_URL,
|
||||
"pages_crawled": self.pages_crawled,
|
||||
"total_videos": len(self.results),
|
||||
"successful": self.processed_videos,
|
||||
"skipped": self.skipped_videos,
|
||||
"failed": self.failed_videos,
|
||||
"videos": self.results
|
||||
}
|
||||
|
||||
try:
|
||||
# 保证父目录存在;写入临时文件后原子 rename,避免读到半截 JSON
|
||||
out_path = self.output_file
|
||||
parent = os.path.dirname(os.path.abspath(out_path))
|
||||
if parent:
|
||||
os.makedirs(parent, exist_ok=True)
|
||||
tmp_path = out_path + ".part"
|
||||
with open(tmp_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(output_data, f, ensure_ascii=False, indent=2)
|
||||
os.replace(tmp_path, out_path)
|
||||
self.log(f"结果已保存到: {os.path.abspath(out_path)}")
|
||||
except Exception as e:
|
||||
self.log(f"保存文件失败: {e}")
|
||||
# 尝试输出到控制台作为备份
|
||||
backup_out = sys.stderr if self.stream_output else sys.stdout
|
||||
print("\n--- 备份输出 ---", file=backup_out, flush=True)
|
||||
print(json.dumps(output_data, ensure_ascii=False, indent=2), file=backup_out, flush=True)
|
||||
|
||||
def _print_summary(self):
|
||||
"""
|
||||
打印爬取摘要
|
||||
"""
|
||||
self.log("")
|
||||
self.log("=" * 60)
|
||||
self.log("爬取完成!")
|
||||
self.log("=" * 60)
|
||||
self.log(f"爬取页数: {self.pages_crawled}")
|
||||
self.log(f"总视频数: {len(self.results)}")
|
||||
self.log(f"成功提取直链: {self.processed_videos}")
|
||||
self.log(f"跳过(已处理): {self.skipped_videos}")
|
||||
self.log(f"失败/缺失直链: {self.failed_videos}")
|
||||
self.log(f"输出文件: {os.path.abspath(self.output_file)}")
|
||||
self.log("=" * 60)
|
||||
|
||||
|
||||
def print_help():
|
||||
print("""
|
||||
================================================
|
||||
91porn 视频爬虫 v1.0
|
||||
================================================
|
||||
|
||||
本脚本将爬取 91porn "本月最热" 分类下的所有视频信息:
|
||||
- 视频名称
|
||||
- 封面图直链
|
||||
- 视频直链 (MP4)
|
||||
|
||||
依赖安装:
|
||||
pip install requests beautifulsoup4 lxml PySocks
|
||||
|
||||
使用方法:
|
||||
python spider_91porn.py
|
||||
|
||||
配置说明 (编辑脚本内 "配置区域"):
|
||||
MIN_PAGE_DELAY / MAX_PAGE_DELAY : 列表页请求间隔 (默认 3-6 秒)
|
||||
MIN_DETAIL_DELAY / MAX_DETAIL_DELAY : 详情页请求间隔 (默认 2-5 秒)
|
||||
MAX_PAGES : 限制最大爬取页数 (None=不限, 如 5=只爬前5页)
|
||||
OUTPUT_FILE : 输出文件名 (默认 91porn_videos.json)
|
||||
|
||||
按 Ctrl+C 可随时中断并保存已爬取的数据
|
||||
|
||||
注意:
|
||||
1. 视频直链包含时效性token,会过期,需定期重新爬取
|
||||
2. 脚本已内置随机延时,请勿移除,避免对服务器造成压力
|
||||
3. 如遇到Cloudflare拦截,需要先通过浏览器获取Cookie
|
||||
4. 本脚本仅供学习交流,请遵守当地法律法规
|
||||
================================================
|
||||
""")
|
||||
|
||||
|
||||
def run_job(job_path: str):
|
||||
"""Run as a crawler.v1 script plugin.
|
||||
|
||||
The Go host passes a job JSON file and expects stdout JSONL events. Logs go
|
||||
to stderr so stdout stays machine-readable.
|
||||
"""
|
||||
with open(job_path, "r", encoding="utf-8") as f:
|
||||
job = json.load(f)
|
||||
|
||||
if job.get("protocol") != CRAWLER_PROTOCOL:
|
||||
raise ValueError(f"unsupported crawler protocol: {job.get('protocol')!r}")
|
||||
if job.get("mode") not in ("", None, "crawl"):
|
||||
raise ValueError(f"unsupported crawler mode: {job.get('mode')!r}")
|
||||
|
||||
try:
|
||||
target_new = int(job.get("target_new") or 15)
|
||||
except (TypeError, ValueError):
|
||||
target_new = 15
|
||||
if target_new <= 0:
|
||||
target_new = 15
|
||||
seen_file = job.get("seen_source_ids_file") or ""
|
||||
output_dir = job.get("output_dir") or os.getcwd()
|
||||
run_id = job.get("run_id") or datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
output_file = os.path.join(output_dir, f"spider91-{run_id}.json")
|
||||
|
||||
network = job.get("network") if isinstance(job.get("network"), dict) else {}
|
||||
proxy_url = str(network.get("proxy_url") or "").strip()
|
||||
if proxy_url:
|
||||
os.environ["HTTP_PROXY"] = proxy_url
|
||||
os.environ["HTTPS_PROXY"] = proxy_url
|
||||
os.environ["http_proxy"] = proxy_url
|
||||
os.environ["https_proxy"] = proxy_url
|
||||
os.environ["NO_PROXY"] = ""
|
||||
os.environ["no_proxy"] = ""
|
||||
|
||||
seen_viewkeys = []
|
||||
if seen_file:
|
||||
try:
|
||||
with open(seen_file, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
seen_viewkeys.append(line)
|
||||
except FileNotFoundError:
|
||||
print(f"警告: seen_source_ids_file 不存在: {seen_file}", file=sys.stderr, flush=True)
|
||||
except Exception as e:
|
||||
print(f"警告: 读取 seen_source_ids_file 失败: {e}", file=sys.stderr, flush=True)
|
||||
|
||||
prefer_ipv4_for_plain_socks5_proxy()
|
||||
spider = Porn91Spider(
|
||||
output_file=output_file,
|
||||
start_page=1,
|
||||
max_pages=None,
|
||||
resume=False,
|
||||
quiet=True,
|
||||
target_new=target_new,
|
||||
seen_viewkeys=seen_viewkeys,
|
||||
stream_output=True,
|
||||
stream_protocol="crawler.v1",
|
||||
)
|
||||
try:
|
||||
spider.crawl()
|
||||
done = {
|
||||
"type": "done",
|
||||
"stats": {
|
||||
"emitted": spider.processed_videos,
|
||||
"failed": spider.failed_videos,
|
||||
"skipped": spider.skipped_videos,
|
||||
},
|
||||
}
|
||||
write_jsonl(done)
|
||||
except KeyboardInterrupt:
|
||||
spider.log("\n用户中断,正在保存已爬取的数据...")
|
||||
spider._save_results()
|
||||
raise
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) > 1 and sys.argv[1] in ('-h', '--help', 'help'):
|
||||
print_help()
|
||||
return
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="spider_91porn.py",
|
||||
description="91porn 视频元数据爬虫",
|
||||
add_help=False, # 让 -h/--help 走 print_help() 中文版本
|
||||
)
|
||||
parser.add_argument("--page", type=int, default=None,
|
||||
help="只爬指定页(单页模式,配合 --output 用于定时任务)")
|
||||
parser.add_argument("--output", type=str, default=None,
|
||||
help="输出 JSON 路径,覆盖默认 OUTPUT_FILE")
|
||||
parser.add_argument("--max-pages", type=int, default=None,
|
||||
help="单页模式下,从 --page 起最多再爬几页(默认 1)")
|
||||
parser.add_argument("--no-resume", action="store_true",
|
||||
help="禁用断点续爬(单页模式默认禁用)")
|
||||
parser.add_argument("--quiet", action="store_true",
|
||||
help="压缩日志,每条视频只输出关键事件")
|
||||
parser.add_argument("--target-new", type=int, default=None,
|
||||
help="目标新增模式:从 page 1 起翻页直到累计处理这么多新源视频后停止(backend 凌晨任务用)")
|
||||
parser.add_argument("--seen-viewkeys-file", type=str, default=None,
|
||||
help="文件路径,每行一个已处理过的 viewkey 或 mp4 源 ID;脚本会跳过这些视频")
|
||||
parser.add_argument("--stream-output", action="store_true",
|
||||
help="流式模式:每解析一条视频直链就立即把它作为一行 JSON 写到 stdout 并 flush;"
|
||||
"日志改走 stderr。配合 backend 边读边下载使用。")
|
||||
parser.add_argument("--job", type=str, default=None,
|
||||
help="crawler.v1 job JSON 路径;作为通用脚本爬虫运行。")
|
||||
|
||||
args, _ = parser.parse_known_args()
|
||||
if args.job:
|
||||
run_job(args.job)
|
||||
return
|
||||
|
||||
cli_out = sys.stderr if args.stream_output else sys.stdout
|
||||
prefer_ipv4_for_plain_socks5_proxy()
|
||||
|
||||
print("""
|
||||
================================================
|
||||
91porn 视频爬虫启动中...
|
||||
================================================
|
||||
按 Ctrl+C 可随时中断并保存进度
|
||||
""", file=cli_out)
|
||||
|
||||
# 加载已知 ID(来自 backend 的 catalog 已入库列表;兼容旧参数名)
|
||||
seen_viewkeys = []
|
||||
if args.seen_viewkeys_file:
|
||||
try:
|
||||
with open(args.seen_viewkeys_file, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
seen_viewkeys.append(line)
|
||||
except FileNotFoundError:
|
||||
print(f"警告: --seen-viewkeys-file 不存在: {args.seen_viewkeys_file}", file=cli_out)
|
||||
except Exception as e:
|
||||
print(f"警告: 读取 --seen-viewkeys-file 失败: {e}", file=cli_out)
|
||||
|
||||
# 决定运行模式
|
||||
if args.target_new is not None:
|
||||
# 凑够 N 个新视频模式:从 page 1 起翻页,直到累计 target_new 个新视频
|
||||
spider = Porn91Spider(
|
||||
output_file=args.output,
|
||||
start_page=1,
|
||||
max_pages=None,
|
||||
resume=False, # 凑够 N 模式靠 seen_viewkeys 去重,不读 OUTPUT_FILE
|
||||
quiet=args.quiet,
|
||||
target_new=args.target_new,
|
||||
seen_viewkeys=seen_viewkeys,
|
||||
stream_output=args.stream_output,
|
||||
)
|
||||
elif args.page is not None:
|
||||
# 单页模式(保留作手动调试用):start_page=N, max_pages=1
|
||||
start_page = max(1, args.page)
|
||||
max_pages = args.max_pages if args.max_pages and args.max_pages > 0 else 1
|
||||
spider = Porn91Spider(
|
||||
output_file=args.output,
|
||||
start_page=start_page,
|
||||
max_pages=max_pages,
|
||||
resume=False,
|
||||
quiet=args.quiet,
|
||||
seen_viewkeys=seen_viewkeys,
|
||||
stream_output=args.stream_output,
|
||||
)
|
||||
else:
|
||||
# 全量模式(向后兼容):从 page 1 起爬到末尾
|
||||
spider = Porn91Spider(
|
||||
output_file=args.output,
|
||||
resume=False if args.no_resume else None,
|
||||
quiet=args.quiet,
|
||||
seen_viewkeys=seen_viewkeys,
|
||||
stream_output=args.stream_output,
|
||||
)
|
||||
|
||||
try:
|
||||
spider.crawl()
|
||||
except KeyboardInterrupt:
|
||||
spider.log("\n用户中断,正在保存已爬取的数据...")
|
||||
spider._save_results()
|
||||
spider._print_summary()
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
spider.log(f"发生未预料的错误: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
spider._save_results()
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -20,12 +20,11 @@
|
||||
|
||||
## 功能特性
|
||||
|
||||
- **多后端支持** — 兼容 115 云盘、PikPak 云盘、123网盘、联通网盘、OneDrive、Google Drive 和本地存储
|
||||
- **低带宽播放** — 115 云盘、PikPak 云盘、123网盘、联通网盘、OneDrive 支持302模式,在线播放视频时,不占用服务器带宽,播放体验不受服务器带宽影响;Google Drive 不支持302模式,走服务器中转,观看体验会受服务器带宽影响
|
||||
- **多后端支持** — 兼容 115 云盘、PikPak 云盘、123网盘、联通网盘、光鸭网盘、OneDrive、Google Drive 和本地存储
|
||||
- **低带宽播放** — 115 云盘、PikPak 云盘、123网盘、联通网盘、光鸭网盘、OneDrive 支持302模式,在线播放视频时,不占用服务器带宽,播放体验不受服务器带宽影响;Google Drive 不支持302模式,走服务器中转,观看体验会受服务器带宽影响
|
||||
- **封面 & 预览片段** — 自动为每个视频生成封面图和预览片段,首页快速选片
|
||||
- **爬虫脚本** — 项目支持导入自定义脚本,但是有一些规范,具体可以参考 [SpiderFor91](https://github.com/Just-Spider/SpiderFor91),项目不再内置任何爬虫脚本
|
||||
- **短视频模式** — 一键切换抖音风格,沉浸刷片
|
||||
|
||||
---
|
||||
|
||||
## 预览图
|
||||
@@ -82,6 +81,14 @@ sudo bash install.sh
|
||||
|
||||
> `video-site-91` 为等效别名,两者可互换使用。
|
||||
|
||||
**已部署用户升级:**
|
||||
|
||||
```bash
|
||||
91 update
|
||||
```
|
||||
|
||||
升级会保留现有 `config.yaml`、数据库、封面、预览、上传文件和爬虫数据。脚本会自动安装或检查 `ffmpeg` / `ffprobe` 等运行依赖,并在新版本启动失败时回滚到升级前文件。
|
||||
|
||||
**自定义端口:**
|
||||
|
||||
```bash
|
||||
@@ -153,6 +160,7 @@ docker compose up -d # 更新并重启
|
||||
```
|
||||
|
||||
> 所有配置、数据库、封面、预览及上传文件均保存在 `./data/` 目录下。
|
||||
> 从旧版本升级 Docker 部署时,执行 `docker compose pull && docker compose up -d` 即可;`./data/` 不会被镜像更新覆盖。
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
视频聚合站的 Go 后端。提供三件事:
|
||||
|
||||
1. 多家网盘统一抽象(夸克 / 115 / PikPak / 联通网盘 / OneDrive / Google Drive / 本地存储)
|
||||
1. 多家网盘统一抽象(夸克 / 115 / PikPak / 联通网盘 / 光鸭网盘 / OneDrive / Google Drive / 本地存储)
|
||||
2. 视频元数据目录(SQLite)+ 扫描 + 预览视频预生成
|
||||
3. REST API(前台)+ 管理后台 + 直链代理
|
||||
4. 标签池、视频隐藏、按网盘统计和详情页来源网盘类型展示能力
|
||||
@@ -20,6 +20,7 @@ internal/
|
||||
p115/ 115(壳子 + SheltonZhu/115driver)
|
||||
pikpak/ PikPak(自己实现,参考 OpenList pikpak)
|
||||
wopan/ 联通网盘(壳子 + OpenListTeam/wopan-sdk-go)
|
||||
guangyapan/ 光鸭网盘(参考 AList GuangYaPan)
|
||||
onedrive/ OneDrive(OpenList 在线续期 + Microsoft Graph 文件接口)
|
||||
googledrive/ Google Drive(OpenList 在线续期 + Google Drive API;播放走后端代理)
|
||||
localstorage/ 本地目录扫描(服务器已有视频目录)
|
||||
@@ -108,6 +109,7 @@ go run ./cmd/server 后端 9192
|
||||
| p115 | `cookie`(形如 `UID=...; CID=...; SEID=...; KID=...`) |
|
||||
| pikpak | `username`、`password`(token、验证码和设备 ID 由服务端自动处理并保存) |
|
||||
| wopan | `access_token`、`refresh_token`,可选 `family_id` |
|
||||
| guangyapan | 推荐后台扫码登录自动写入 `access_token`、`refresh_token`;也可手工填写 token;可选 `root_path` |
|
||||
| onedrive | `refresh_token` |
|
||||
| googledrive | 默认只需 `refresh_token`;自建 OAuth 客户端模式还需 `use_online_api=false`、`client_id`、`client_secret` |
|
||||
| localstorage | `path`(服务器上的已有视频目录,如 `/mnt/videos`) |
|
||||
@@ -154,9 +156,9 @@ Google Drive 默认按 OpenList 在线 API 调用 `https://api.oplist.org/google
|
||||
## 管理能力
|
||||
|
||||
- `/admin/drives`:新增、编辑、删除网盘,触发扫描。
|
||||
- `/admin/videos`:按网盘筛选视频,每页 100 条分页,查看各网盘预览视频统计,编辑标题/作者/分类/标签,单条或全量重生预览视频。
|
||||
- `/admin/videos`:按网盘筛选视频,每页 100 条分页,查看各网盘预览视频统计,编辑标题/作者/分类/标签,单条或全量重生预览视频;拉黑视频页可查看被删除或被隐藏的视频,并支持移出黑名单后在下次扫盘重新入库。
|
||||
- `/admin/tags`:新增标签并用内置规则自动匹配已有视频;删除非系统标签时会从所有视频上同步移除该标签。
|
||||
- 播放页视频信息会展示来源网盘类型;同时提供“不再展示”,点击后会把视频标记为全局隐藏。隐藏视频不会再出现在首页、列表、搜索、相关推荐和详情接口中。目前没有管理后台恢复入口,如需恢复可把数据库里对应视频的 `hidden` 字段改回 `0`。
|
||||
- 播放页视频信息会展示来源网盘类型,并提供删除入口。被删除或被隐藏的视频会进入黑名单,不会再出现在首页、列表、搜索和详情接口中;在后台移出黑名单后,会在下次扫盘时重新发现并入库。
|
||||
|
||||
## 预览视频生成
|
||||
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
)
|
||||
|
||||
func TestCrawlerIntCredFallbacks(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
d *catalog.Drive
|
||||
key string
|
||||
def int
|
||||
want int
|
||||
}{
|
||||
{"nil drive", nil, "page", 1, 1},
|
||||
{"nil creds", &catalog.Drive{}, "page", 7, 7},
|
||||
{"empty value", &catalog.Drive{Credentials: map[string]string{"page": ""}}, "page", 5, 5},
|
||||
{"non-numeric", &catalog.Drive{Credentials: map[string]string{"page": "abc"}}, "page", 9, 9},
|
||||
{"happy", &catalog.Drive{Credentials: map[string]string{"page": "42"}}, "page", 1, 42},
|
||||
{"missing key", &catalog.Drive{Credentials: map[string]string{"a": "1"}}, "b", 99, 99},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := crawlerIntCred(tc.d, tc.key, tc.def)
|
||||
if got != tc.want {
|
||||
t.Fatalf("crawlerIntCred(%s) = %d, want %d", tc.name, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,101 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"testing"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
"github.com/video-site/backend/internal/proxy"
|
||||
)
|
||||
|
||||
func TestSpider91IntCredFallbacks(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
d *catalog.Drive
|
||||
key string
|
||||
def int
|
||||
want int
|
||||
}{
|
||||
{"nil drive", nil, "page", 1, 1},
|
||||
{"nil creds", &catalog.Drive{}, "page", 7, 7},
|
||||
{"empty value", &catalog.Drive{Credentials: map[string]string{"page": ""}}, "page", 5, 5},
|
||||
{"non-numeric", &catalog.Drive{Credentials: map[string]string{"page": "abc"}}, "page", 9, 9},
|
||||
{"happy", &catalog.Drive{Credentials: map[string]string{"page": "42"}}, "page", 1, 42},
|
||||
{"missing key", &catalog.Drive{Credentials: map[string]string{"a": "1"}}, "b", 99, 99},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := spider91IntCred(tc.d, tc.key, tc.def)
|
||||
if got != tc.want {
|
||||
t.Fatalf("spider91IntCred(%s) = %d, want %d", tc.name, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpider91UploadDriveIDDoesNotAutoSelectTarget(t *testing.T) {
|
||||
reg := proxy.NewRegistry()
|
||||
reg.Set("p115-one", &spider91UploadTargetFakeDrive{id: "p115-one", kind: "p115"})
|
||||
reg.Set("p123-one", &spider91UploadTargetFakeDrive{id: "p123-one", kind: "p123"})
|
||||
reg.Set("onedrive-one", &spider91UploadTargetFakeDrive{id: "onedrive-one", kind: "onedrive"})
|
||||
reg.Set("wopan-one", &spider91UploadTargetFakeDrive{id: "wopan-one", kind: "wopan"})
|
||||
|
||||
app := &App{registry: reg}
|
||||
if got := app.Spider91UploadDriveID(); got != "" {
|
||||
t.Fatalf("empty upload target selected %q, want local-only empty target", got)
|
||||
}
|
||||
|
||||
app.spider91UploadDriveID = "p115-one"
|
||||
if got := app.Spider91UploadDriveID(); got != "p115-one" {
|
||||
t.Fatalf("explicit upload target = %q, want p115-one", got)
|
||||
}
|
||||
|
||||
app.spider91UploadDriveID = "p123-one"
|
||||
if got := app.Spider91UploadDriveID(); got != "p123-one" {
|
||||
t.Fatalf("explicit p123 upload target = %q, want p123-one", got)
|
||||
}
|
||||
|
||||
app.spider91UploadDriveID = "onedrive-one"
|
||||
if got := app.Spider91UploadDriveID(); got != "onedrive-one" {
|
||||
t.Fatalf("explicit onedrive upload target = %q, want onedrive-one", got)
|
||||
}
|
||||
|
||||
app.spider91UploadDriveID = "wopan-one"
|
||||
if got := app.Spider91UploadDriveID(); got != "wopan-one" {
|
||||
t.Fatalf("explicit wopan upload target = %q, want wopan-one", got)
|
||||
}
|
||||
|
||||
app.spider91UploadDriveID = "missing"
|
||||
if got := app.Spider91UploadDriveID(); got != "" {
|
||||
t.Fatalf("missing upload target = %q, want empty", got)
|
||||
}
|
||||
}
|
||||
|
||||
type spider91UploadTargetFakeDrive struct {
|
||||
id string
|
||||
kind string
|
||||
}
|
||||
|
||||
func (d *spider91UploadTargetFakeDrive) Kind() string { return d.kind }
|
||||
func (d *spider91UploadTargetFakeDrive) ID() string { return d.id }
|
||||
func (d *spider91UploadTargetFakeDrive) Init(context.Context) error {
|
||||
return nil
|
||||
}
|
||||
func (d *spider91UploadTargetFakeDrive) List(context.Context, string) ([]drives.Entry, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (d *spider91UploadTargetFakeDrive) Stat(context.Context, string) (*drives.Entry, error) {
|
||||
return nil, drives.ErrNotSupported
|
||||
}
|
||||
func (d *spider91UploadTargetFakeDrive) StreamURL(context.Context, string) (*drives.StreamLink, error) {
|
||||
return nil, drives.ErrNotSupported
|
||||
}
|
||||
func (d *spider91UploadTargetFakeDrive) Upload(context.Context, string, string, io.Reader, int64) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
func (d *spider91UploadTargetFakeDrive) EnsureDir(context.Context, string) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
func (d *spider91UploadTargetFakeDrive) RootID() string { return "root" }
|
||||
@@ -3,6 +3,9 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"image"
|
||||
"image/color"
|
||||
"image/jpeg"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -15,7 +18,6 @@ import (
|
||||
"github.com/video-site/backend/internal/config"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
"github.com/video-site/backend/internal/drives/scriptcrawler"
|
||||
"github.com/video-site/backend/internal/drives/spider91"
|
||||
"github.com/video-site/backend/internal/fingerprint"
|
||||
"github.com/video-site/backend/internal/preview"
|
||||
"github.com/video-site/backend/internal/proxy"
|
||||
@@ -227,6 +229,53 @@ func TestRegisterPreviewWorkersBackfillsHistoricalFingerprints(t *testing.T) {
|
||||
t.Fatalf("fingerprint status=%q sampled=%q, want ready with hash", got.FingerprintStatus, got.SampledSHA256)
|
||||
}
|
||||
|
||||
func TestUpdateScriptCrawlerRunStatePreservesCurrentTeaserSwitch(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: "crawler-id",
|
||||
Kind: scriptcrawler.Kind,
|
||||
Name: "Crawler",
|
||||
RootID: "/",
|
||||
Credentials: map[string]string{
|
||||
"script_path": "/tmp/crawler.py",
|
||||
"target_new": "10",
|
||||
},
|
||||
TeaserEnabled: false,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed crawler drive: %v", err)
|
||||
}
|
||||
if err := cat.SetDriveTeaserEnabled(ctx, "crawler-id", true); err != nil {
|
||||
t.Fatalf("toggle teaser: %v", err)
|
||||
}
|
||||
|
||||
app := &App{cat: cat}
|
||||
if err := app.updateScriptCrawlerRunState(ctx, "crawler-id", nil); err != nil {
|
||||
t.Fatalf("update run state: %v", err)
|
||||
}
|
||||
got, err := cat.GetDrive(ctx, "crawler-id")
|
||||
if err != nil {
|
||||
t.Fatalf("get crawler drive: %v", err)
|
||||
}
|
||||
if !got.TeaserEnabled {
|
||||
t.Fatal("teaserEnabled = false after run state update, want preserved true")
|
||||
}
|
||||
if got.Status != "ok" || got.LastError != "" {
|
||||
t.Fatalf("status=%q lastError=%q, want ok with no error", got.Status, got.LastError)
|
||||
}
|
||||
if got.Credentials["last_crawl_at"] == "" || got.Credentials["target_new"] != "10" {
|
||||
t.Fatalf("credentials after run state update = %#v", got.Credentials)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStopDriveTasksCancelsQueuedTasksAndReplacesWorkers(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
@@ -391,31 +440,85 @@ func TestDriveGenerationStatusIncludesScanState(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunSpider91MigrationAfterManualCrawlRequiresConfiguredUploadTarget(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
registry := proxy.NewRegistry()
|
||||
migrator := &serverFakeSpider91MigrationRunner{}
|
||||
func TestDriveGenerationStatusIncludesScanCooldown(t *testing.T) {
|
||||
until := time.Now().Add(time.Hour).Round(time.Second)
|
||||
app := &App{
|
||||
scanQueued: map[string]bool{"drive-id": true},
|
||||
scanProgress: map[string]driveScanProgress{
|
||||
"drive-id": {Scanned: 12, Added: 3, CooldownUntil: until},
|
||||
},
|
||||
}
|
||||
|
||||
status := app.driveGenerationStatuses()["drive-id"].Scan
|
||||
if status.State != "cooling" {
|
||||
t.Fatalf("scan status = %#v, want cooling", status)
|
||||
}
|
||||
if status.CooldownUntil != until.Format(time.RFC3339) {
|
||||
t.Fatalf("cooldown until = %q, want %q", status.CooldownUntil, until.Format(time.RFC3339))
|
||||
}
|
||||
}
|
||||
|
||||
func TestGuangYaPanGenerationCooldowns(t *testing.T) {
|
||||
drv := &serverFakeKindDrive{id: "gy", kind: "guangyapan"}
|
||||
if got := generationCooldownForDrive(drv); got != 10*time.Minute {
|
||||
t.Fatalf("generation cooldown = %s, want 10m", got)
|
||||
}
|
||||
if got := fingerprintConfigForDrive(drv).RateLimitCooldown; got != 10*time.Minute {
|
||||
t.Fatalf("fingerprint cooldown = %s, want 10m", got)
|
||||
}
|
||||
if got := scanCooldownForDrive(drv); got != 10*time.Minute {
|
||||
t.Fatalf("scan cooldown = %s, want 10m", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunCrawlerMigrationAfterManualCrawlRequiresCrawlerUploadTarget(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: "crawler-main",
|
||||
Kind: scriptcrawler.Kind,
|
||||
Name: "Crawler",
|
||||
RootID: "/",
|
||||
Credentials: map[string]string{
|
||||
"script_path": "/tmp/crawler.py",
|
||||
},
|
||||
}); err != nil {
|
||||
t.Fatalf("seed crawler: %v", err)
|
||||
}
|
||||
|
||||
registry := proxy.NewRegistry()
|
||||
migrator := &serverFakeCrawlerUploadRunner{}
|
||||
app := &App{
|
||||
cat: cat,
|
||||
registry: registry,
|
||||
spider91Migrator: migrator,
|
||||
crawlerUploader: migrator,
|
||||
workers: map[string]*preview.Worker{},
|
||||
thumbWorkers: map[string]*preview.ThumbWorker{},
|
||||
fingerprintWorkers: map[string]*fingerprint.Worker{},
|
||||
}
|
||||
|
||||
app.runSpider91MigrationAfterManualCrawl(ctx, "91spider")
|
||||
app.runCrawlerMigrationAfterManualCrawl(ctx, "crawler-main")
|
||||
if migrator.called != 0 {
|
||||
t.Fatalf("migration called without upload target")
|
||||
}
|
||||
|
||||
app.spider91UploadDriveID = "pikpak"
|
||||
app.runSpider91MigrationAfterManualCrawl(ctx, "91spider")
|
||||
if migrator.called != 0 {
|
||||
t.Fatalf("migration called when upload target is not attached")
|
||||
d, err := cat.GetDrive(ctx, "crawler-main")
|
||||
if err != nil {
|
||||
t.Fatalf("get crawler: %v", err)
|
||||
}
|
||||
|
||||
registry.Set("pikpak", &serverFakeKindDrive{id: "pikpak", kind: "pikpak"})
|
||||
app.runSpider91MigrationAfterManualCrawl(ctx, "91spider")
|
||||
d.Credentials["upload_drive_id"] = "pikpak"
|
||||
if err := cat.UpsertDrive(ctx, d); err != nil {
|
||||
t.Fatalf("set upload target: %v", err)
|
||||
}
|
||||
app.runCrawlerMigrationAfterManualCrawl(ctx, "crawler-main")
|
||||
if migrator.called != 1 {
|
||||
t.Fatalf("migration calls = %d, want 1", migrator.called)
|
||||
}
|
||||
@@ -446,11 +549,11 @@ func TestScheduleCrawlerUploadMigrationRunsForConfiguredCrawler(t *testing.T) {
|
||||
}
|
||||
registry := proxy.NewRegistry()
|
||||
registry.Set("crawler-truvaze", &serverFakeKindDrive{id: "crawler-truvaze", kind: scriptcrawler.Kind})
|
||||
migrator := &serverFakeSpider91MigrationRunner{}
|
||||
migrator := &serverFakeCrawlerUploadRunner{}
|
||||
app := &App{
|
||||
cat: cat,
|
||||
registry: registry,
|
||||
spider91Migrator: migrator,
|
||||
crawlerUploader: migrator,
|
||||
workers: map[string]*preview.Worker{},
|
||||
thumbWorkers: map[string]*preview.ThumbWorker{},
|
||||
fingerprintWorkers: map[string]*fingerprint.Worker{},
|
||||
@@ -489,8 +592,8 @@ func TestScheduleCrawlerUploadMigrationSkipsWithoutUploadTarget(t *testing.T) {
|
||||
}); err != nil {
|
||||
t.Fatalf("seed crawler: %v", err)
|
||||
}
|
||||
migrator := &serverFakeSpider91MigrationRunner{}
|
||||
app := &App{cat: cat, registry: proxy.NewRegistry(), spider91Migrator: migrator}
|
||||
migrator := &serverFakeCrawlerUploadRunner{}
|
||||
app := &App{cat: cat, registry: proxy.NewRegistry(), crawlerUploader: migrator}
|
||||
|
||||
if app.scheduleCrawlerUploadMigration(ctx, "crawler-local") {
|
||||
t.Fatal("scheduleCrawlerUploadMigration returned true without upload target")
|
||||
@@ -500,6 +603,128 @@ func TestScheduleCrawlerUploadMigrationSkipsWithoutUploadTarget(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestScheduleManualCrawlerUploadMigrationRunsWhenAssetsReady(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: "crawler-ready",
|
||||
Kind: scriptcrawler.Kind,
|
||||
Name: "Ready Crawler",
|
||||
RootID: "/",
|
||||
TeaserEnabled: true,
|
||||
Credentials: map[string]string{
|
||||
"script_path": "/tmp/ready.py",
|
||||
"upload_drive_id": "pikpak-target",
|
||||
},
|
||||
}); err != nil {
|
||||
t.Fatalf("seed crawler: %v", err)
|
||||
}
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: scriptcrawler.BuildVideoID("crawler-ready", "source-1"),
|
||||
DriveID: "crawler-ready",
|
||||
FileID: "source-1.mp4",
|
||||
FileName: "source-1.mp4",
|
||||
Title: "Source 1",
|
||||
Size: 123,
|
||||
Ext: "mp4",
|
||||
SampledSHA256: "sampled-source-1",
|
||||
FingerprintStatus: "ready",
|
||||
PreviewStatus: "ready",
|
||||
PublishedAt: time.Now(),
|
||||
CreatedAt: time.Now(),
|
||||
UpdatedAt: time.Now(),
|
||||
}); err != nil {
|
||||
t.Fatalf("seed video: %v", err)
|
||||
}
|
||||
registry := proxy.NewRegistry()
|
||||
registry.Set("crawler-ready", &serverFakeKindDrive{id: "crawler-ready", kind: scriptcrawler.Kind})
|
||||
registry.Set("pikpak-target", &serverFakeKindDrive{id: "pikpak-target", kind: "pikpak"})
|
||||
migrator := &serverFakeCrawlerUploadRunner{}
|
||||
app := &App{
|
||||
cat: cat,
|
||||
registry: registry,
|
||||
crawlerUploader: migrator,
|
||||
workers: map[string]*preview.Worker{},
|
||||
thumbWorkers: map[string]*preview.ThumbWorker{},
|
||||
fingerprintWorkers: map[string]*fingerprint.Worker{},
|
||||
}
|
||||
|
||||
accepted, message := app.scheduleManualCrawlerUploadMigration(ctx, "crawler-ready")
|
||||
if !accepted {
|
||||
t.Fatalf("accepted = false, message = %q", message)
|
||||
}
|
||||
deadline := time.After(time.Second)
|
||||
for migrator.called == 0 {
|
||||
select {
|
||||
case <-deadline:
|
||||
t.Fatalf("migration calls = %d, want 1", migrator.called)
|
||||
case <-time.After(10 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestScheduleManualCrawlerUploadMigrationRejectsPendingFingerprint(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: "crawler-pending",
|
||||
Kind: scriptcrawler.Kind,
|
||||
Name: "Pending Crawler",
|
||||
RootID: "/",
|
||||
TeaserEnabled: true,
|
||||
Credentials: map[string]string{
|
||||
"script_path": "/tmp/pending.py",
|
||||
"upload_drive_id": "pikpak-target",
|
||||
},
|
||||
}); err != nil {
|
||||
t.Fatalf("seed crawler: %v", err)
|
||||
}
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: scriptcrawler.BuildVideoID("crawler-pending", "source-1"),
|
||||
DriveID: "crawler-pending",
|
||||
FileID: "source-1.mp4",
|
||||
FileName: "source-1.mp4",
|
||||
Title: "Source 1",
|
||||
Size: 123,
|
||||
Ext: "mp4",
|
||||
PreviewStatus: "ready",
|
||||
PublishedAt: time.Now(),
|
||||
CreatedAt: time.Now(),
|
||||
UpdatedAt: time.Now(),
|
||||
}); err != nil {
|
||||
t.Fatalf("seed video: %v", err)
|
||||
}
|
||||
migrator := &serverFakeCrawlerUploadRunner{}
|
||||
app := &App{cat: cat, registry: proxy.NewRegistry(), crawlerUploader: migrator}
|
||||
|
||||
accepted, message := app.scheduleManualCrawlerUploadMigration(ctx, "crawler-pending")
|
||||
if accepted {
|
||||
t.Fatal("accepted = true, want false")
|
||||
}
|
||||
if !strings.Contains(message, "指纹") {
|
||||
t.Fatalf("message = %q, want fingerprint reason", message)
|
||||
}
|
||||
if migrator.called != 0 {
|
||||
t.Fatalf("migration calls = %d, want 0", migrator.called)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDriveGenerationStatusUsesWorkerQueueNotPendingCatalogRows(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
@@ -687,9 +912,8 @@ func TestNightlyTargetsComeFromCatalogBeforeDriveAttach(t *testing.T) {
|
||||
for _, d := range []*catalog.Drive{
|
||||
{ID: "115", Kind: "p115", Name: "115", RootID: "0", TeaserEnabled: true},
|
||||
{ID: "pikpak", Kind: "pikpak", Name: "PikPak", RootID: "0", TeaserEnabled: true},
|
||||
{ID: "91-legacy", Kind: "spider91", Name: "91 Legacy", RootID: "0", TeaserEnabled: true},
|
||||
{ID: "91-crawler", Kind: scriptcrawler.Kind, Name: "91 Spider", RootID: "/", Credentials: map[string]string{"script_path": "/tmp/crawler.py"}, TeaserEnabled: true},
|
||||
{ID: "91-crawler-deleted", Kind: scriptcrawler.Kind, Name: "Deleted Spider", RootID: "/", Credentials: map[string]string{}, TeaserEnabled: true},
|
||||
{ID: "crawler-main", Kind: scriptcrawler.Kind, Name: "Crawler", RootID: "/", Credentials: map[string]string{"script_path": "/tmp/crawler.py"}, TeaserEnabled: true},
|
||||
{ID: "crawler-deleted", Kind: scriptcrawler.Kind, Name: "Deleted Crawler", RootID: "/", Credentials: map[string]string{}, TeaserEnabled: true},
|
||||
} {
|
||||
if err := cat.UpsertDrive(ctx, d); err != nil {
|
||||
t.Fatalf("seed drive %s: %v", d.ID, err)
|
||||
@@ -701,13 +925,13 @@ func TestNightlyTargetsComeFromCatalogBeforeDriveAttach(t *testing.T) {
|
||||
if len(scanIDs) != 2 || scanIDs[0] != "115" || scanIDs[1] != "pikpak" {
|
||||
t.Fatalf("scan target ids = %#v, want 115 and pikpak from catalog", scanIDs)
|
||||
}
|
||||
spiderIDs := app.listSpider91DriveIDs(ctx)
|
||||
if len(spiderIDs) != 1 || spiderIDs[0] != "91-crawler" {
|
||||
t.Fatalf("spider91 ids = %#v, want crawler-page script drive", spiderIDs)
|
||||
crawlerIDs := app.listCrawlerDriveIDs(ctx)
|
||||
if len(crawlerIDs) != 1 || crawlerIDs[0] != "crawler-main" {
|
||||
t.Fatalf("crawler ids = %#v, want crawler-page script drive", crawlerIDs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAttachDriveRejectsLegacySpider91Storage(t *testing.T) {
|
||||
func TestAttachDriveRejectsUnknownKind(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -719,9 +943,9 @@ func TestAttachDriveRejectsLegacySpider91Storage(t *testing.T) {
|
||||
}
|
||||
})
|
||||
d := &catalog.Drive{
|
||||
ID: "91-legacy",
|
||||
Kind: spider91.Kind,
|
||||
Name: "91 Legacy",
|
||||
ID: "unknown-main",
|
||||
Kind: "unknown",
|
||||
Name: "Unknown",
|
||||
RootID: "/",
|
||||
TeaserEnabled: true,
|
||||
}
|
||||
@@ -731,18 +955,11 @@ func TestAttachDriveRejectsLegacySpider91Storage(t *testing.T) {
|
||||
|
||||
app := &App{cat: cat, registry: proxy.NewRegistry()}
|
||||
err = app.attachDrive(ctx, d)
|
||||
if err == nil || !strings.Contains(err.Error(), "爬虫管理") {
|
||||
t.Fatalf("attach err = %v, want crawler management guidance", err)
|
||||
if err == nil || !strings.Contains(err.Error(), "unknown drive kind: unknown") {
|
||||
t.Fatalf("attach err = %v, want unknown kind error", err)
|
||||
}
|
||||
if _, ok := app.registry.Get(d.ID); ok {
|
||||
t.Fatal("legacy spider91 drive should not be registered")
|
||||
}
|
||||
got, err := cat.GetDrive(ctx, d.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("get drive: %v", err)
|
||||
}
|
||||
if got.Status != "error" || !strings.Contains(got.LastError, "爬虫管理") {
|
||||
t.Fatalf("status/error = %q/%q, want deprecated error", got.Status, got.LastError)
|
||||
t.Fatal("unknown drive should not be registered")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1390,7 +1607,7 @@ func TestDeleteVideoUsesSourceRemoverWithCatalogMetadata(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteVideoRemovesSpider91SourceFile(t *testing.T) {
|
||||
func TestDeleteVideoRemovesScriptCrawlerSourceFile(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
root := t.TempDir()
|
||||
localDir := filepath.Join(root, "previews")
|
||||
@@ -1401,23 +1618,28 @@ func TestDeleteVideoRemovesSpider91SourceFile(t *testing.T) {
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: "spider-main",
|
||||
Kind: spider91.Kind,
|
||||
Name: "Spider",
|
||||
ID: "crawler-main",
|
||||
Kind: scriptcrawler.Kind,
|
||||
Name: "Crawler",
|
||||
RootID: "/",
|
||||
TeaserEnabled: true,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed drive: %v", err)
|
||||
}
|
||||
app := &App{
|
||||
cfg: &config.Config{Storage: config.Storage{LocalPreviewDir: localDir}},
|
||||
cat: cat,
|
||||
cfg: &config.Config{Storage: config.Storage{LocalPreviewDir: localDir}},
|
||||
cat: cat,
|
||||
registry: proxy.NewRegistry(),
|
||||
}
|
||||
sourceDir := app.spider91DriveDir("spider-main")
|
||||
sourceDir := app.scriptCrawlerDriveDir("crawler-main")
|
||||
app.registry.Set("crawler-main", scriptcrawler.New(scriptcrawler.Config{
|
||||
ID: "crawler-main",
|
||||
RootDir: sourceDir,
|
||||
}))
|
||||
sourceVideo := filepath.Join(sourceDir, "videos", "source.mp4")
|
||||
sourceThumb := filepath.Join(sourceDir, "thumbs", "source.jpg")
|
||||
previewPath := filepath.Join(localDir, "spider91-spider-main-source.mp4")
|
||||
commonThumb := filepath.Join(localDir, "thumbs", "spider91-spider-main-source.jpg")
|
||||
previewPath := filepath.Join(localDir, "scriptcrawler-crawler-main-source.mp4")
|
||||
commonThumb := filepath.Join(localDir, "thumbs", "scriptcrawler-crawler-main-source.jpg")
|
||||
for _, path := range []string{sourceVideo, sourceThumb, previewPath, commonThumb} {
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
||||
t.Fatalf("mkdir %s: %v", path, err)
|
||||
@@ -1429,15 +1651,15 @@ func TestDeleteVideoRemovesSpider91SourceFile(t *testing.T) {
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: "spider91-spider-main-source",
|
||||
DriveID: "spider-main",
|
||||
ID: "scriptcrawler-crawler-main-source",
|
||||
DriveID: "crawler-main",
|
||||
FileID: "source.mp4",
|
||||
FileName: "source.mp4",
|
||||
Ext: "mp4",
|
||||
Title: "Spider Source",
|
||||
Title: "Crawler Source",
|
||||
PreviewLocal: previewPath,
|
||||
PreviewStatus: "ready",
|
||||
ThumbnailURL: "/p/thumb/spider91-spider-main-source",
|
||||
ThumbnailURL: "/p/thumb/scriptcrawler-crawler-main-source",
|
||||
Size: 456,
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
@@ -1446,9 +1668,9 @@ func TestDeleteVideoRemovesSpider91SourceFile(t *testing.T) {
|
||||
t.Fatalf("seed video: %v", err)
|
||||
}
|
||||
|
||||
result, err := app.deleteVideo(ctx, "spider91-spider-main-source", true)
|
||||
result, err := app.deleteVideo(ctx, "scriptcrawler-crawler-main-source", true)
|
||||
if err != nil {
|
||||
t.Fatalf("delete spider video: %v", err)
|
||||
t.Fatalf("delete crawler video: %v", err)
|
||||
}
|
||||
if !result.OK || !result.DeletedSource {
|
||||
t.Fatalf("delete result = %#v, want source deleted", result)
|
||||
@@ -1458,23 +1680,23 @@ func TestDeleteVideoRemovesSpider91SourceFile(t *testing.T) {
|
||||
t.Fatalf("deleted file %s still exists, stat err=%v", path, err)
|
||||
}
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, "spider91-spider-main-source"); err != sql.ErrNoRows {
|
||||
if _, err := cat.GetVideo(ctx, "scriptcrawler-crawler-main-source"); err != sql.ErrNoRows {
|
||||
t.Fatalf("deleted video lookup error = %v, want sql.ErrNoRows", err)
|
||||
}
|
||||
deleted, err := cat.IsVideoDeleted(ctx, "spider91-spider-main-source")
|
||||
deleted, err := cat.IsVideoDeleted(ctx, "scriptcrawler-crawler-main-source")
|
||||
if err != nil {
|
||||
t.Fatalf("check tombstone: %v", err)
|
||||
}
|
||||
if !deleted {
|
||||
t.Fatal("deleted spider91 video tombstone missing")
|
||||
t.Fatal("deleted crawler video tombstone missing")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCleanupDriveVideosForDeleteSpider91RemovesCrawledDirAndOriginRecords(t *testing.T) {
|
||||
func TestCleanupDriveVideosForDeleteScriptCrawlerRemovesOnlyLocalRows(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
root := t.TempDir()
|
||||
localDir := filepath.Join(root, "previews")
|
||||
driveID := "spider-main"
|
||||
driveID := "crawler-main"
|
||||
cat, err := catalog.Open(filepath.Join(t.TempDir(), "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
@@ -1487,22 +1709,19 @@ func TestCleanupDriveVideosForDeleteSpider91RemovesCrawledDirAndOriginRecords(t
|
||||
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: driveID,
|
||||
Kind: "spider91",
|
||||
Name: "91 Spider",
|
||||
Kind: scriptcrawler.Kind,
|
||||
Name: "Crawler",
|
||||
RootID: "/",
|
||||
TeaserEnabled: true,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed spider91 drive: %v", err)
|
||||
t.Fatalf("seed crawler drive: %v", err)
|
||||
}
|
||||
|
||||
spiderDriveDir := filepath.Join(root, "spider91", driveID)
|
||||
sourceVideo := filepath.Join(spiderDriveDir, "videos", "source.mp4")
|
||||
sourceThumb := filepath.Join(spiderDriveDir, "thumbs", "source.jpg")
|
||||
localPreview := filepath.Join(localDir, "spider91-spider-main-source.mp4")
|
||||
localThumb := filepath.Join(localDir, "thumbs", "spider91-spider-main-source.jpg")
|
||||
migratedPreview := filepath.Join(localDir, "spider91-spider-main-migrated.mp4")
|
||||
migratedThumb := filepath.Join(localDir, "thumbs", "spider91-spider-main-migrated.jpg")
|
||||
for _, path := range []string{sourceVideo, sourceThumb, localPreview, localThumb, migratedPreview, migratedThumb} {
|
||||
localPreview := filepath.Join(localDir, "scriptcrawler-crawler-main-source.mp4")
|
||||
localThumb := filepath.Join(localDir, "thumbs", "scriptcrawler-crawler-main-source.jpg")
|
||||
migratedPreview := filepath.Join(localDir, "scriptcrawler-crawler-main-migrated.mp4")
|
||||
migratedThumb := filepath.Join(localDir, "thumbs", "scriptcrawler-crawler-main-migrated.jpg")
|
||||
for _, path := range []string{localPreview, localThumb, migratedPreview, migratedThumb} {
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
||||
t.Fatalf("mkdir %s: %v", path, err)
|
||||
}
|
||||
@@ -1514,22 +1733,22 @@ func TestCleanupDriveVideosForDeleteSpider91RemovesCrawledDirAndOriginRecords(t
|
||||
now := time.Now()
|
||||
for _, v := range []*catalog.Video{
|
||||
{
|
||||
ID: "spider91-spider-main-source",
|
||||
ID: "scriptcrawler-crawler-main-source",
|
||||
DriveID: driveID,
|
||||
FileID: "source.mp4",
|
||||
Title: "Source",
|
||||
PreviewLocal: localPreview,
|
||||
PreviewStatus: "ready",
|
||||
ThumbnailURL: "/p/thumb/spider91-spider-main-source",
|
||||
ThumbnailURL: "/p/thumb/scriptcrawler-crawler-main-source",
|
||||
},
|
||||
{
|
||||
ID: "spider91-spider-main-migrated",
|
||||
ID: "scriptcrawler-crawler-main-migrated",
|
||||
DriveID: "PikPak",
|
||||
FileID: "pikpak-file-id",
|
||||
Title: "Migrated",
|
||||
PreviewLocal: migratedPreview,
|
||||
PreviewStatus: "ready",
|
||||
ThumbnailURL: "/p/thumb/spider91-spider-main-migrated",
|
||||
ThumbnailURL: "/p/thumb/scriptcrawler-crawler-main-migrated",
|
||||
},
|
||||
{
|
||||
ID: "pikpak-PikPak-other",
|
||||
@@ -1557,24 +1776,30 @@ func TestCleanupDriveVideosForDeleteSpider91RemovesCrawledDirAndOriginRecords(t
|
||||
}
|
||||
removed, err := app.cleanupDriveVideosForDelete(ctx, driveID)
|
||||
if err != nil {
|
||||
t.Fatalf("cleanup spider91 videos: %v", err)
|
||||
t.Fatalf("cleanup crawler videos: %v", err)
|
||||
}
|
||||
if removed != 2 {
|
||||
t.Fatalf("removed = %d, want 2", removed)
|
||||
if removed != 1 {
|
||||
t.Fatalf("removed = %d, want 1", removed)
|
||||
}
|
||||
for _, id := range []string{"spider91-spider-main-source", "spider91-spider-main-migrated"} {
|
||||
if _, err := cat.GetVideo(ctx, id); err != sql.ErrNoRows {
|
||||
t.Fatalf("%s lookup error = %v, want sql.ErrNoRows", id, err)
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, "scriptcrawler-crawler-main-source"); err != sql.ErrNoRows {
|
||||
t.Fatalf("local crawler video lookup error = %v, want sql.ErrNoRows", err)
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, "scriptcrawler-crawler-main-migrated"); err != nil {
|
||||
t.Fatalf("migrated crawler video missing: %v", err)
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, "pikpak-PikPak-other"); err != nil {
|
||||
t.Fatalf("unrelated pikpak video missing: %v", err)
|
||||
}
|
||||
for _, path := range []string{spiderDriveDir, localPreview, localThumb, migratedPreview, migratedThumb} {
|
||||
for _, path := range []string{localPreview, localThumb} {
|
||||
if _, err := os.Stat(path); !os.IsNotExist(err) {
|
||||
t.Fatalf("%s still exists, stat err=%v", path, err)
|
||||
}
|
||||
}
|
||||
for _, path := range []string{migratedPreview, migratedThumb} {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
t.Fatalf("%s missing, stat err=%v", path, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCleanupOrphanDriveVideosRemovesRowsAndGeneratedAssets(t *testing.T) {
|
||||
@@ -1665,7 +1890,7 @@ func TestCleanupOrphanDriveVideosRemovesRowsAndGeneratedAssets(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCleanupDuplicateVideoAssetsRemovesOnlyDuplicateLocalAssets(t *testing.T) {
|
||||
func TestCleanupDuplicateVideoAssetsDeletesExactDuplicateRows(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
localDir := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(t.TempDir(), "catalog.db"))
|
||||
@@ -1746,15 +1971,22 @@ func TestCleanupDuplicateVideoAssetsRemovesOnlyDuplicateLocalAssets(t *testing.T
|
||||
t.Fatalf("duplicate asset %s still exists, stat err=%v", path, err)
|
||||
}
|
||||
}
|
||||
dup, err := cat.GetVideo(ctx, "duplicate-video")
|
||||
if _, err := cat.GetVideo(ctx, "duplicate-video"); err != sql.ErrNoRows {
|
||||
t.Fatalf("duplicate lookup error = %v, want sql.ErrNoRows", err)
|
||||
}
|
||||
deleted, err := cat.IsVideoDeleted(ctx, "duplicate-video")
|
||||
if err != nil {
|
||||
t.Fatalf("get duplicate: %v", err)
|
||||
t.Fatalf("check duplicate tombstone: %v", err)
|
||||
}
|
||||
if dup.PreviewLocal != "" || dup.PreviewStatus != "pending" {
|
||||
t.Fatalf("duplicate preview local=%q status=%q, want empty pending", dup.PreviewLocal, dup.PreviewStatus)
|
||||
if !deleted {
|
||||
t.Fatalf("duplicate tombstone missing")
|
||||
}
|
||||
if dup.ThumbnailURL != "" {
|
||||
t.Fatalf("duplicate thumbnail url = %q, want empty", dup.ThumbnailURL)
|
||||
deletedItems, _, err := cat.ListDeletedVideos(ctx, catalog.ListParams{Page: 1, PageSize: 10})
|
||||
if err != nil {
|
||||
t.Fatalf("list deleted videos: %v", err)
|
||||
}
|
||||
if len(deletedItems) != 1 || deletedItems[0].ID != "duplicate-video" || deletedItems[0].Reason != catalog.DeletedVideoReasonDuplicate {
|
||||
t.Fatalf("duplicate tombstone = %#v, want reason %q", deletedItems, catalog.DeletedVideoReasonDuplicate)
|
||||
}
|
||||
canon, err := cat.GetVideo(ctx, "canonical-video")
|
||||
if err != nil {
|
||||
@@ -1765,6 +1997,137 @@ func TestCleanupDuplicateVideoAssetsRemovesOnlyDuplicateLocalAssets(t *testing.T
|
||||
}
|
||||
}
|
||||
|
||||
func TestCleanupDuplicateVideoAssetsDeletesNearDuplicateRowsKeepingLargest(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
localDir := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(t.TempDir(), "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
smallPreview := filepath.Join(localDir, "small-video.mp4")
|
||||
largePreview := filepath.Join(localDir, "large-video.mp4")
|
||||
smallThumb := filepath.Join(localDir, "thumbs", "small-video.jpg")
|
||||
largeThumb := filepath.Join(localDir, "thumbs", "large-video.jpg")
|
||||
for _, path := range []string{smallPreview, largePreview} {
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
||||
t.Fatalf("mkdir %s: %v", path, err)
|
||||
}
|
||||
if err := os.WriteFile(path, []byte("preview"), 0o644); err != nil {
|
||||
t.Fatalf("write %s: %v", path, err)
|
||||
}
|
||||
}
|
||||
writeSolidJPEG(t, smallThumb, color.RGBA{R: 180, G: 80, B: 40, A: 255})
|
||||
writeSolidJPEG(t, largeThumb, color.RGBA{R: 180, G: 80, B: 40, A: 255})
|
||||
|
||||
now := time.Date(2026, 5, 29, 12, 0, 0, 0, time.UTC)
|
||||
for _, v := range []*catalog.Video{
|
||||
{
|
||||
ID: "small-video",
|
||||
DriveID: "scriptcrawler-a",
|
||||
FileID: "file-small",
|
||||
FileName: "small.mp4",
|
||||
Title: "反差极品大二女友,叫声可射~,“射进小骚逼里面~” - 91porn",
|
||||
DurationSeconds: 313,
|
||||
Size: 1024,
|
||||
ThumbnailURL: "/p/thumb/small-video",
|
||||
PreviewLocal: smallPreview,
|
||||
PreviewStatus: "ready",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
{
|
||||
ID: "large-video",
|
||||
DriveID: "scriptcrawler-b",
|
||||
FileID: "file-large",
|
||||
FileName: "large.mp4",
|
||||
Title: "反差极品大二女友,叫声可射~,“射进小骚逼里面~”_91pinse",
|
||||
DurationSeconds: 313,
|
||||
Size: 4096,
|
||||
ThumbnailURL: "/p/thumb/large-video",
|
||||
PreviewLocal: largePreview,
|
||||
PreviewStatus: "ready",
|
||||
PublishedAt: now.Add(time.Second),
|
||||
CreatedAt: now.Add(time.Second),
|
||||
UpdatedAt: now.Add(time.Second),
|
||||
},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
app := &App{
|
||||
cfg: &config.Config{Storage: config.Storage{LocalPreviewDir: localDir}},
|
||||
cat: cat,
|
||||
}
|
||||
if err := app.cleanupDuplicateVideoAssets(ctx); err != nil {
|
||||
t.Fatalf("cleanup duplicate video assets: %v", err)
|
||||
}
|
||||
|
||||
if _, err := cat.GetVideo(ctx, "small-video"); err != sql.ErrNoRows {
|
||||
t.Fatalf("small duplicate lookup error = %v, want sql.ErrNoRows", err)
|
||||
}
|
||||
deleted, err := cat.IsVideoDeleted(ctx, "small-video")
|
||||
if err != nil {
|
||||
t.Fatalf("check small tombstone: %v", err)
|
||||
}
|
||||
if !deleted {
|
||||
t.Fatalf("small duplicate tombstone missing")
|
||||
}
|
||||
deletedItems, _, err := cat.ListDeletedVideos(ctx, catalog.ListParams{Page: 1, PageSize: 10})
|
||||
if err != nil {
|
||||
t.Fatalf("list deleted videos: %v", err)
|
||||
}
|
||||
if len(deletedItems) != 1 || deletedItems[0].ID != "small-video" || deletedItems[0].Reason != catalog.DeletedVideoReasonDuplicate {
|
||||
t.Fatalf("small duplicate tombstone = %#v, want reason %q", deletedItems, catalog.DeletedVideoReasonDuplicate)
|
||||
}
|
||||
large, err := cat.GetVideo(ctx, "large-video")
|
||||
if err != nil {
|
||||
t.Fatalf("large canonical missing: %v", err)
|
||||
}
|
||||
if large.Size != 4096 {
|
||||
t.Fatalf("large canonical size = %d, want 4096", large.Size)
|
||||
}
|
||||
for _, path := range []string{smallPreview, smallThumb} {
|
||||
if _, err := os.Stat(path); !os.IsNotExist(err) {
|
||||
t.Fatalf("small duplicate asset %s still exists, stat err=%v", path, err)
|
||||
}
|
||||
}
|
||||
for _, path := range []string{largePreview, largeThumb} {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
t.Fatalf("large canonical asset %s missing: %v", path, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func writeSolidJPEG(t *testing.T, path string, c color.RGBA) {
|
||||
t.Helper()
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
||||
t.Fatalf("mkdir %s: %v", path, err)
|
||||
}
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
t.Fatalf("create %s: %v", path, err)
|
||||
}
|
||||
defer f.Close()
|
||||
img := image.NewRGBA(image.Rect(0, 0, 64, 64))
|
||||
for y := 0; y < 64; y++ {
|
||||
for x := 0; x < 64; x++ {
|
||||
img.SetRGBA(x, y, c)
|
||||
}
|
||||
}
|
||||
if err := jpeg.Encode(f, img, &jpeg.Options{Quality: 95}); err != nil {
|
||||
t.Fatalf("encode %s: %v", path, err)
|
||||
}
|
||||
}
|
||||
|
||||
type serverFakeTeaserGenerator struct {
|
||||
mu sync.Mutex
|
||||
events []string
|
||||
@@ -1900,11 +2263,11 @@ func (d *serverSourceRemovableFakeDrive) Remove(ctx context.Context, fileID stri
|
||||
return nil
|
||||
}
|
||||
|
||||
type serverFakeSpider91MigrationRunner struct {
|
||||
type serverFakeCrawlerUploadRunner struct {
|
||||
called int
|
||||
}
|
||||
|
||||
func (r *serverFakeSpider91MigrationRunner) RunOnce(context.Context) error {
|
||||
func (r *serverFakeCrawlerUploadRunner) RunOnce(context.Context) error {
|
||||
r.called++
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -56,7 +56,7 @@ preview:
|
||||
width: 480
|
||||
|
||||
# 盘列表。上线后请通过管理后台添加,本文件可留空。
|
||||
# kind 支持 quark / p115 / p123 / pikpak / wopan / onedrive / googledrive / localstorage。
|
||||
# kind 支持 quark / p115 / p123 / pikpak / wopan / guangyapan / onedrive / googledrive / localstorage。
|
||||
# OneDrive 示例:
|
||||
# - id: "my-onedrive"
|
||||
# kind: "onedrive"
|
||||
@@ -76,6 +76,17 @@ preview:
|
||||
# # use_online_api: "false"
|
||||
# # client_id: "..."
|
||||
# # client_secret: "..."
|
||||
# 光鸭网盘示例:
|
||||
# - id: "my-guangyapan"
|
||||
# kind: "guangyapan"
|
||||
# name: "我的光鸭网盘"
|
||||
# # 留空表示光鸭网盘根目录;也可以填写光鸭目录 fileId
|
||||
# root_id: ""
|
||||
# params:
|
||||
# # 推荐在后台使用扫码登录自动写入 access_token / refresh_token。
|
||||
# refresh_token: "..."
|
||||
# # 可选:按路径解析扫描根目录,优先于 root_id
|
||||
# # root_path: "影视/电影"
|
||||
# 本地存储示例:
|
||||
# - id: "local-media"
|
||||
# kind: "localstorage"
|
||||
|
||||
@@ -21,9 +21,9 @@ import (
|
||||
|
||||
"github.com/video-site/backend/internal/auth"
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives/guangyapan"
|
||||
"github.com/video-site/backend/internal/drives/p123"
|
||||
"github.com/video-site/backend/internal/drives/scriptcrawler"
|
||||
"github.com/video-site/backend/internal/drives/spider91"
|
||||
"github.com/video-site/backend/internal/drives/wopan"
|
||||
)
|
||||
|
||||
@@ -48,31 +48,36 @@ type AdminServer struct {
|
||||
// LocalPreviewDir is the local directory that stores generated preview videos and thumbs.
|
||||
LocalPreviewDir string
|
||||
// Hooks:外层注入实际执行者
|
||||
OnDriveSaved func(driveID string) error
|
||||
OnDriveDeleteCleanup func(ctx context.Context, driveID string) (int, error)
|
||||
OnDriveRemoved func(driveID string)
|
||||
OnScanRequested func(driveID string) bool
|
||||
OnStopDriveTasks func(driveID string) bool
|
||||
OnStopAllTasks func() int
|
||||
OnRegenPreview func(videoID string)
|
||||
OnRegenAllPreviews func()
|
||||
OnRegenFailedPreviews func(driveID string)
|
||||
OnRegenFailedThumbnails func(driveID string)
|
||||
OnRegenFailedFingerprints func(driveID string)
|
||||
OnDeleteVideo func(ctx context.Context, videoID string, deleteSource bool) (DeleteVideoResult, error)
|
||||
GetDriveGenerationStatuses func() map[string]DriveGenerationStatuses
|
||||
OnDriveSaved func(driveID string) error
|
||||
OnDriveDeleteCleanup func(ctx context.Context, driveID string) (int, error)
|
||||
OnDriveRemoved func(driveID string)
|
||||
OnScanRequested func(driveID string) bool
|
||||
OnCrawlerUploadRequested func(driveID string) (bool, string)
|
||||
OnStopDriveTasks func(driveID string) bool
|
||||
OnStopAllTasks func() int
|
||||
OnRegenPreview func(videoID string)
|
||||
OnRegenAllPreviews func()
|
||||
OnRegenFailedPreviews func(driveID string)
|
||||
OnRegenFailedThumbnails func(driveID string)
|
||||
OnRegenFailedFingerprints func(driveID string)
|
||||
// OnStartDriveTranscode 手动开启某盘的浏览器兼容性转码任务。
|
||||
// 返回 (是否接受, 拒绝原因)。转码从不自动运行,只能在这里手动触发;
|
||||
// 处理完候选列表后任务自然结束。
|
||||
OnStartDriveTranscode func(driveID string) (bool, string)
|
||||
// OnStopDriveTranscode 手动停止某盘正在进行的转码任务。返回是否有任务被停。
|
||||
OnStopDriveTranscode func(driveID string) bool
|
||||
OnDeleteVideo func(ctx context.Context, videoID string, deleteSource bool) (DeleteVideoResult, error)
|
||||
GetDriveGenerationStatuses func() map[string]DriveGenerationStatuses
|
||||
GetPreviewGenerationVideoIDs func() map[string]bool
|
||||
// OnTeaserEnabledChanged 在 per-drive 预览视频开关被切换后调用。
|
||||
// enabled=true 时上层应该重新把 pending 预览视频入队(类似旧的全局开关从关到开);
|
||||
// enabled=false 时通常不用做事 —— worker 入队前会再次查 catalog,自然停止。
|
||||
OnTeaserEnabledChanged func(driveID string, enabled bool)
|
||||
// Theme 读写("dark" | "pink")
|
||||
// Theme 读写("dark" | "pink" | "sky")
|
||||
GetTheme func() string
|
||||
SetTheme func(theme string) error
|
||||
// Spider91 → 115/123/PikPak/OneDrive/Google Drive/联通网盘 上传目标 drive ID 读写
|
||||
GetSpider91UploadDriveID func() string
|
||||
SetSpider91UploadDriveID func(driveID string) error
|
||||
// OnRunNightlyJob 触发一次完整的凌晨流水线(Phase1 扫盘 + Phase2 91 爬虫 +
|
||||
// Phase3 迁移)。立即返回 —— 实际任务在后台跑,admin 在日志或下次状态查询里
|
||||
// OnRunNightlyJob 触发一次完整的凌晨流水线(Phase1 扫盘 + Phase2 爬虫 +
|
||||
// Phase3 上传)。立即返回 —— 实际任务在后台跑,admin 在日志或下次状态查询里
|
||||
// 看进度。若流水线正在跑或已排队,Runner 会拒绝重复触发。
|
||||
OnRunNightlyJob func() bool
|
||||
// GetNightlyJobStatus 返回凌晨流水线当前状态,用于前端禁用重复触发按钮。
|
||||
@@ -88,6 +93,9 @@ type AdminServer struct {
|
||||
// 联通网盘扫码登录接口测试注入;生产留空走官方 panservice.mail.wo.cn。
|
||||
WopanQRAPIBaseURL string
|
||||
WopanQRHTTPClient *http.Client
|
||||
// 光鸭网盘扫码登录接口测试注入;生产留空走官方 account.guangyapan.com。
|
||||
GuangYaPanAccountBaseURL string
|
||||
GuangYaPanHTTPClient *http.Client
|
||||
}
|
||||
|
||||
const (
|
||||
@@ -118,6 +126,7 @@ type DriveGenerationStatuses struct {
|
||||
Preview GenerationStatus `json:"preview"`
|
||||
Fingerprint GenerationStatus `json:"fingerprint"`
|
||||
Upload GenerationStatus `json:"upload"`
|
||||
Transcode GenerationStatus `json:"transcode"`
|
||||
}
|
||||
|
||||
type NightlyJobStatus struct {
|
||||
@@ -160,6 +169,8 @@ func (a *AdminServer) Register(r chi.Router) {
|
||||
r.Get("/drives/p123/qr/{uniID}", a.handleP123QRStatus)
|
||||
r.Post("/drives/wopan/qr", a.handleWopanQRStart)
|
||||
r.Get("/drives/wopan/qr/{uuid}", a.handleWopanQRStatus)
|
||||
r.Post("/drives/guangyapan/qr", a.handleGuangYaPanQRStart)
|
||||
r.Get("/drives/guangyapan/qr/status", a.handleGuangYaPanQRStatus)
|
||||
r.Delete("/drives/{id}", a.handleDeleteDrive)
|
||||
r.Post("/drives/{id}/rescan", a.handleRescan)
|
||||
r.Post("/drives/{id}/tasks/stop", a.handleStopDriveTasks)
|
||||
@@ -169,6 +180,8 @@ func (a *AdminServer) Register(r chi.Router) {
|
||||
r.Post("/drives/{id}/previews/failed/regenerate", a.handleRegenFailedPreviews)
|
||||
r.Post("/drives/{id}/thumbnails/failed/regenerate", a.handleRegenFailedThumbnails)
|
||||
r.Post("/drives/{id}/fingerprints/failed/regenerate", a.handleRegenFailedFingerprints)
|
||||
r.Post("/drives/{id}/transcode/start", a.handleStartDriveTranscode)
|
||||
r.Post("/drives/{id}/transcode/stop", a.handleStopDriveTranscode)
|
||||
|
||||
// 爬虫
|
||||
r.Get("/crawlers", a.handleListCrawlers)
|
||||
@@ -178,14 +191,19 @@ func (a *AdminServer) Register(r chi.Router) {
|
||||
r.Post("/crawlers/test-script", a.handleTestCrawlerScript)
|
||||
r.Delete("/crawlers/{id}", a.handleDeleteCrawler)
|
||||
r.Post("/crawlers/{id}/run", a.handleRunCrawler)
|
||||
r.Post("/crawlers/{id}/upload", a.handleUploadCrawlerVideos)
|
||||
r.Post("/crawlers/{id}/tasks/stop", a.handleStopCrawlerTasks)
|
||||
|
||||
// 视频
|
||||
r.Get("/videos", a.handleAdminListVideos)
|
||||
r.Get("/videos/stats", a.handleVideoStats)
|
||||
r.Put("/videos/{id}", a.handleUpdateVideo)
|
||||
r.Delete("/videos/{id}", a.handleDeleteVideo)
|
||||
r.Post("/videos/regen-preview", a.handleRegenAllPreviews)
|
||||
r.Post("/videos/{id}/regen-preview", a.handleRegenPreview)
|
||||
// 黑名单(被拉黑/手动删除、扫盘不再入库的视频)
|
||||
r.Get("/blacklist", a.handleListBlacklist)
|
||||
r.Delete("/blacklist/{id}", a.handleRemoveBlacklist)
|
||||
|
||||
// 标签
|
||||
r.Get("/tags", a.handleListTags)
|
||||
@@ -431,6 +449,11 @@ func (a *AdminServer) handleListDrives(w http.ResponseWriter, r *http.Request) {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
transcodeCounts, err := a.Catalog.CountTranscodesByDrive(r.Context())
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
generationStatuses := map[string]DriveGenerationStatuses{}
|
||||
if a.GetDriveGenerationStatuses != nil {
|
||||
generationStatuses = a.GetDriveGenerationStatuses()
|
||||
@@ -445,17 +468,18 @@ func (a *AdminServer) handleListDrives(w http.ResponseWriter, r *http.Request) {
|
||||
Status string `json:"status"`
|
||||
LastError string `json:"lastError,omitempty"`
|
||||
HasCredential bool `json:"hasCredential"`
|
||||
// TeaserEnabled 控制是否给本盘生成预览视频/封面。前端用它在网盘列表/编辑表单展示开关状态。
|
||||
// TeaserEnabled 控制是否给本盘生成预览视频;封面生成不受影响。
|
||||
// 前端用它在网盘列表/编辑表单展示开关状态。
|
||||
TeaserEnabled bool `json:"teaserEnabled"`
|
||||
// SkipDirIDs 是用户在 admin 配置的"扫描跳过目录"集合(drive 侧目录 fileID)。
|
||||
// 前端用它在"设置跳过目录"弹窗里回显已选项;JSON 字段名 camelCase 与
|
||||
// catalog.Drive 保持一致。
|
||||
SkipDirIDs []string `json:"skipDirIds"`
|
||||
// LastCrawlAt 是 spider91 上次成功爬取的 unix 秒(来自 credentials.last_crawl_at)。
|
||||
// 其它 kind 留 0;前端用它显示"上次抓取: N 小时前"。
|
||||
Spider91Proxy string `json:"spider91Proxy,omitempty"`
|
||||
LastCrawlAt int64 `json:"lastCrawlAt,omitempty"`
|
||||
GoogleDriveUseOnlineAPI *bool `json:"googleDriveUseOnlineAPI,omitempty"`
|
||||
SkipDirIDs []string `json:"skipDirIds"`
|
||||
LastCrawlAt int64 `json:"lastCrawlAt,omitempty"`
|
||||
GoogleDriveUseOnlineAPI *bool `json:"googleDriveUseOnlineAPI,omitempty"`
|
||||
GoogleDriveOpenListAPIURL string `json:"googleDriveOpenListApiUrl,omitempty"`
|
||||
// STRMAllowOutsideRoot 是 localstorage 的 .strm 越root开关;其它 kind 省略。
|
||||
STRMAllowOutsideRoot *bool `json:"strmAllowOutsideRoot,omitempty"`
|
||||
ScanGenerationStatus GenerationStatus `json:"scanGenerationStatus"`
|
||||
ThumbnailGenerationStatus GenerationStatus `json:"thumbnailGenerationStatus"`
|
||||
PreviewGenerationStatus GenerationStatus `json:"previewGenerationStatus"`
|
||||
@@ -470,6 +494,11 @@ func (a *AdminServer) handleListDrives(w http.ResponseWriter, r *http.Request) {
|
||||
FingerprintReadyCount int `json:"fingerprintReadyCount"`
|
||||
FingerprintPendingCount int `json:"fingerprintPendingCount"`
|
||||
FingerprintFailedCount int `json:"fingerprintFailedCount"`
|
||||
TranscodeGenerationStatus GenerationStatus `json:"transcodeGenerationStatus"`
|
||||
TranscodePendingCount int `json:"transcodePendingCount"`
|
||||
TranscodeReadyCount int `json:"transcodeReadyCount"`
|
||||
TranscodeFailedCount int `json:"transcodeFailedCount"`
|
||||
TranscodeSkippedCount int `json:"transcodeSkippedCount"`
|
||||
}
|
||||
list := make([]out, 0, len(drives))
|
||||
for _, d := range drives {
|
||||
@@ -479,6 +508,7 @@ func (a *AdminServer) handleListDrives(w http.ResponseWriter, r *http.Request) {
|
||||
counts := teaserCounts[d.ID]
|
||||
thumbCounts := thumbnailCounts[d.ID]
|
||||
fingerprintCount := fingerprintCounts[d.ID]
|
||||
transcodeCount := transcodeCounts[d.ID]
|
||||
generation := generationStatuses[d.ID]
|
||||
if generation.Scan.State == "" {
|
||||
generation.Scan.State = "idle"
|
||||
@@ -492,7 +522,9 @@ func (a *AdminServer) handleListDrives(w http.ResponseWriter, r *http.Request) {
|
||||
if generation.Fingerprint.State == "" {
|
||||
generation.Fingerprint.State = "idle"
|
||||
}
|
||||
// spider91 没有用户凭证概念;只要存在 drive 行就视为"已配置"。
|
||||
if generation.Transcode.State == "" {
|
||||
generation.Transcode.State = "idle"
|
||||
}
|
||||
// last_crawl_at 是后端自动写入的运行状态字段,不计入 hasCredential 判定。
|
||||
hasCred := false
|
||||
userCredKeys := 0
|
||||
@@ -502,7 +534,7 @@ func (a *AdminServer) handleListDrives(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
userCredKeys++
|
||||
}
|
||||
hasCred = userCredKeys > 0 || d.Kind == "spider91"
|
||||
hasCred = userCredKeys > 0
|
||||
|
||||
var lastCrawlAt int64
|
||||
if d.Credentials != nil {
|
||||
@@ -520,9 +552,10 @@ func (a *AdminServer) handleListDrives(w http.ResponseWriter, r *http.Request) {
|
||||
HasCredential: hasCred,
|
||||
TeaserEnabled: d.TeaserEnabled,
|
||||
SkipDirIDs: append([]string{}, d.SkipDirIDs...),
|
||||
Spider91Proxy: spider91ProxyForDrive(d),
|
||||
LastCrawlAt: lastCrawlAt,
|
||||
GoogleDriveUseOnlineAPI: googleDriveUseOnlineAPIForDrive(d),
|
||||
GoogleDriveOpenListAPIURL: googleDriveOpenListAPIURLForDrive(d),
|
||||
STRMAllowOutsideRoot: strmAllowOutsideRootForDrive(d),
|
||||
ScanGenerationStatus: generation.Scan,
|
||||
ThumbnailGenerationStatus: generation.Thumbnail,
|
||||
PreviewGenerationStatus: generation.Preview,
|
||||
@@ -537,6 +570,11 @@ func (a *AdminServer) handleListDrives(w http.ResponseWriter, r *http.Request) {
|
||||
FingerprintReadyCount: fingerprintCount.Ready,
|
||||
FingerprintPendingCount: fingerprintCount.Pending,
|
||||
FingerprintFailedCount: fingerprintCount.Failed,
|
||||
TranscodeGenerationStatus: generation.Transcode,
|
||||
TranscodePendingCount: transcodeCount.Pending,
|
||||
TranscodeReadyCount: transcodeCount.Ready,
|
||||
TranscodeFailedCount: transcodeCount.Failed,
|
||||
TranscodeSkippedCount: transcodeCount.Skipped,
|
||||
})
|
||||
}
|
||||
writeJSON(w, http.StatusOK, list)
|
||||
@@ -550,7 +588,7 @@ type upsertDriveReq struct {
|
||||
// Deprecated: 扫描起点已固定为 rootId;保留字段只为兼容旧客户端请求体。
|
||||
ScanRootID string `json:"scanRootId"`
|
||||
Credentials map[string]string `json:"credentials"`
|
||||
// TeaserEnabled 是 per-drive 预览视频/封面生成开关。
|
||||
// TeaserEnabled 是 per-drive 预览视频生成开关;封面生成不受影响。
|
||||
// 用 *bool 区分 "未传" / "传了 false":未传时表示客户端不打算改这个字段,
|
||||
// 沿用 catalog 现有值;新建时未传一律默认开启(true)。
|
||||
TeaserEnabled *bool `json:"teaserEnabled,omitempty"`
|
||||
@@ -575,10 +613,11 @@ func (a *AdminServer) handleUpsertDrive(w http.ResponseWriter, r *http.Request)
|
||||
if existingDrive, err := a.Catalog.GetDrive(r.Context(), body.ID); err == nil {
|
||||
existing = existingDrive
|
||||
}
|
||||
if body.Kind == "spider91" {
|
||||
http.Error(w, "91Spider 已不再支持通过网盘添加,请在爬虫管理页面添加爬虫脚本", http.StatusBadRequest)
|
||||
if !isSupportedDriveKind(body.Kind) {
|
||||
http.Error(w, "unsupported drive kind", http.StatusBadRequest)
|
||||
return
|
||||
} else if body.Kind == scriptcrawler.Kind {
|
||||
}
|
||||
if body.Kind == scriptcrawler.Kind {
|
||||
credentials, err := mergeScriptCrawlerCredentials(existing, body.Credentials)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||
@@ -587,6 +626,10 @@ func (a *AdminServer) handleUpsertDrive(w http.ResponseWriter, r *http.Request)
|
||||
body.Credentials = credentials
|
||||
} else if body.Kind == "googledrive" {
|
||||
body.Credentials = mergeGoogleDriveCredentials(existing, body.Credentials)
|
||||
} else if body.Kind == "localstorage" || body.Kind == "guangyapan" {
|
||||
// 按键合并、空值沿用旧值:这些网盘的编辑表单允许只改某几个字段,
|
||||
// 其它 token / 路径 / 开关字段应保留旧值。
|
||||
body.Credentials = mergeNonEmptyCredentials(existing, body.Credentials)
|
||||
} else if len(body.Credentials) == 0 && existing != nil && len(existing.Credentials) > 0 {
|
||||
body.Credentials = existing.Credentials
|
||||
}
|
||||
@@ -647,6 +690,7 @@ type crawlerDTO struct {
|
||||
Proxy string `json:"proxy,omitempty"`
|
||||
TargetNew string `json:"targetNew,omitempty"`
|
||||
UploadDriveID string `json:"uploadDriveId,omitempty"`
|
||||
TeaserEnabled bool `json:"teaserEnabled"`
|
||||
LastCrawlAt int64 `json:"lastCrawlAt,omitempty"`
|
||||
ScanGenerationStatus GenerationStatus `json:"scanGenerationStatus"`
|
||||
ThumbnailGenerationStatus GenerationStatus `json:"thumbnailGenerationStatus"`
|
||||
@@ -674,6 +718,7 @@ type upsertCrawlerReq struct {
|
||||
Proxy string `json:"proxy"`
|
||||
TargetNew string `json:"targetNew"`
|
||||
UploadDriveID string `json:"uploadDriveId"`
|
||||
TeaserEnabled *bool `json:"teaserEnabled,omitempty"`
|
||||
}
|
||||
|
||||
func (a *AdminServer) handleListCrawlers(w http.ResponseWriter, r *http.Request) {
|
||||
@@ -735,6 +780,7 @@ func (a *AdminServer) crawlerDTOForDrive(d *catalog.Drive, assets catalog.Crawle
|
||||
Proxy: strings.TrimSpace(d.Credentials["proxy"]),
|
||||
TargetNew: strings.TrimSpace(d.Credentials["target_new"]),
|
||||
UploadDriveID: strings.TrimSpace(d.Credentials["upload_drive_id"]),
|
||||
TeaserEnabled: d.TeaserEnabled,
|
||||
LastCrawlAt: lastCrawlAt,
|
||||
ScanGenerationStatus: generation.Scan,
|
||||
ThumbnailGenerationStatus: generation.Thumbnail,
|
||||
@@ -762,7 +808,6 @@ func crawlerVideoIDPrefixes(d *catalog.Drive) []string {
|
||||
}
|
||||
return []string{
|
||||
scriptcrawler.Kind + "-" + d.ID + "-",
|
||||
spider91.Kind + "-" + d.ID + "-",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -821,6 +866,13 @@ func (a *AdminServer) handleUpsertCrawler(w http.ResponseWriter, r *http.Request
|
||||
return
|
||||
}
|
||||
name := meta.Name
|
||||
teaserEnabled := true
|
||||
if existing != nil {
|
||||
teaserEnabled = existing.TeaserEnabled
|
||||
}
|
||||
if body.TeaserEnabled != nil {
|
||||
teaserEnabled = *body.TeaserEnabled
|
||||
}
|
||||
if id == "" {
|
||||
generatedID, err := a.generateCrawlerID(r.Context(), name)
|
||||
if err != nil {
|
||||
@@ -836,15 +888,15 @@ func (a *AdminServer) handleUpsertCrawler(w http.ResponseWriter, r *http.Request
|
||||
RootID: "/",
|
||||
Credentials: merged,
|
||||
Status: "disconnected",
|
||||
TeaserEnabled: true,
|
||||
}
|
||||
if existing != nil {
|
||||
d.TeaserEnabled = existing.TeaserEnabled
|
||||
TeaserEnabled: teaserEnabled,
|
||||
}
|
||||
if err := a.Catalog.UpsertDrive(r.Context(), d); err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
if existing != nil && existing.TeaserEnabled != teaserEnabled && a.OnTeaserEnabledChanged != nil {
|
||||
a.OnTeaserEnabledChanged(id, teaserEnabled)
|
||||
}
|
||||
if a.OnDriveSaved != nil {
|
||||
if err := a.OnDriveSaved(id); err != nil {
|
||||
writeJSON(w, http.StatusOK, map[string]any{"ok": true, "id": id, "warning": err.Error()})
|
||||
@@ -894,14 +946,14 @@ func (a *AdminServer) validateCrawlerUploadDrive(ctx context.Context, driveID st
|
||||
return fmt.Errorf("上传目标网盘 %q 不存在", driveID)
|
||||
}
|
||||
if !isCrawlerUploadTargetKind(d.Kind) {
|
||||
return fmt.Errorf("上传目标网盘 %q 类型为 %s,仅支持 115网盘、PikPak、123网盘、Google Drive、OneDrive、联通网盘", driveID, d.Kind)
|
||||
return fmt.Errorf("上传目标网盘 %q 类型为 %s,仅支持 115网盘、PikPak、123网盘、Google Drive、OneDrive、联通网盘、光鸭网盘", driveID, d.Kind)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func isCrawlerUploadTargetKind(kind string) bool {
|
||||
switch strings.TrimSpace(kind) {
|
||||
case "p115", "pikpak", "p123", "googledrive", "onedrive", "wopan":
|
||||
case "p115", "pikpak", "p123", "googledrive", "onedrive", "wopan", "guangyapan":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
@@ -1230,6 +1282,104 @@ func (a *AdminServer) handleRunCrawler(w http.ResponseWriter, r *http.Request) {
|
||||
writeJSON(w, http.StatusAccepted, resp)
|
||||
}
|
||||
|
||||
func (a *AdminServer) handleUploadCrawlerVideos(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
d, err := a.Catalog.GetDrive(r.Context(), id)
|
||||
if err != nil || d == nil || !isConfiguredCrawlerDrive(d) {
|
||||
http.Error(w, "crawler not found", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
status := a.nightlyJobStatus()
|
||||
if status.Running || status.Queued {
|
||||
writeJSON(w, http.StatusAccepted, map[string]any{
|
||||
"ok": true,
|
||||
"accepted": false,
|
||||
"message": fullScanBusyMessage,
|
||||
"status": status,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
assets, err := a.Catalog.CountCrawlerAssets(r.Context(), d.ID, crawlerVideoIDPrefixes(d))
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
generation := DriveGenerationStatuses{}
|
||||
if a.GetDriveGenerationStatuses != nil {
|
||||
generation = a.GetDriveGenerationStatuses()[d.ID]
|
||||
}
|
||||
if reason := crawlerUploadBlockedReason(d, assets, generation); reason != "" {
|
||||
writeJSON(w, http.StatusAccepted, map[string]any{
|
||||
"ok": true,
|
||||
"accepted": false,
|
||||
"message": reason,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
accepted := true
|
||||
message := ""
|
||||
if a.OnCrawlerUploadRequested != nil {
|
||||
accepted, message = a.OnCrawlerUploadRequested(id)
|
||||
}
|
||||
resp := map[string]any{"ok": true, "accepted": accepted}
|
||||
if !accepted {
|
||||
if strings.TrimSpace(message) == "" {
|
||||
message = driveTaskBusyMessage
|
||||
}
|
||||
resp["message"] = message
|
||||
}
|
||||
writeJSON(w, http.StatusAccepted, resp)
|
||||
}
|
||||
|
||||
func crawlerUploadBlockedReason(d *catalog.Drive, assets catalog.CrawlerAssetCounts, generation DriveGenerationStatuses) string {
|
||||
if d == nil || !isConfiguredCrawlerDrive(d) {
|
||||
return "爬虫不存在"
|
||||
}
|
||||
if strings.TrimSpace(d.Credentials["upload_drive_id"]) == "" {
|
||||
return "请先配置上传网盘"
|
||||
}
|
||||
if assets.Local <= 0 {
|
||||
return "没有待上传的本地视频"
|
||||
}
|
||||
if crawlerGenerationBusy(generation) {
|
||||
return "当前爬虫有正在进行的任务,请稍后重试"
|
||||
}
|
||||
if assets.Fingerprint.Pending > 0 {
|
||||
return "还有待生成的视频指纹"
|
||||
}
|
||||
if assets.Fingerprint.Failed > 0 {
|
||||
return "存在指纹生成失败的视频,请先重试或处理失败项"
|
||||
}
|
||||
if d.TeaserEnabled {
|
||||
if assets.Teaser.Pending > 0 {
|
||||
return "还有待生成的预览视频"
|
||||
}
|
||||
if assets.Teaser.Failed > 0 {
|
||||
return "存在预览视频生成失败的视频,请先重试或处理失败项"
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func crawlerGenerationBusy(g DriveGenerationStatuses) bool {
|
||||
return generationBusy(g.Scan) ||
|
||||
generationBusy(g.Thumbnail) ||
|
||||
generationBusy(g.Preview) ||
|
||||
generationBusy(g.Fingerprint) ||
|
||||
generationBusy(g.Upload)
|
||||
}
|
||||
|
||||
func generationBusy(g GenerationStatus) bool {
|
||||
switch strings.TrimSpace(g.State) {
|
||||
case "", "idle":
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func (a *AdminServer) handleStopCrawlerTasks(w http.ResponseWriter, r *http.Request) {
|
||||
a.handleStopDriveTasks(w, r)
|
||||
}
|
||||
@@ -1280,6 +1430,15 @@ func isCrawlerDriveKind(kind string) bool {
|
||||
return kind == scriptcrawler.Kind
|
||||
}
|
||||
|
||||
func isSupportedDriveKind(kind string) bool {
|
||||
switch kind {
|
||||
case "quark", "p115", "p123", "pikpak", "wopan", "guangyapan", "onedrive", "googledrive", "localstorage", scriptcrawler.Kind:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func isConfiguredCrawlerDrive(d *catalog.Drive) bool {
|
||||
return d != nil &&
|
||||
isCrawlerDriveKind(d.Kind) &&
|
||||
@@ -1315,11 +1474,19 @@ func (a *AdminServer) removeImportedCrawlerScript(d *catalog.Drive) (bool, error
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func spider91ProxyForDrive(d *catalog.Drive) string {
|
||||
if d == nil || d.Kind != "spider91" || d.Credentials == nil {
|
||||
return ""
|
||||
// strmAllowOutsideRootForDrive 返回 localstorage 的 .strm 越root开关;
|
||||
// 其它 kind 返回 nil(JSON 省略)。未配置时默认 false。
|
||||
func strmAllowOutsideRootForDrive(d *catalog.Drive) *bool {
|
||||
if d == nil || d.Kind != "localstorage" {
|
||||
return nil
|
||||
}
|
||||
return strings.TrimSpace(d.Credentials["proxy"])
|
||||
result := false
|
||||
if d.Credentials != nil {
|
||||
if v, err := strconv.ParseBool(strings.TrimSpace(d.Credentials["strm_allow_outside_root"])); err == nil {
|
||||
result = v
|
||||
}
|
||||
}
|
||||
return &result
|
||||
}
|
||||
|
||||
func googleDriveUseOnlineAPIForDrive(d *catalog.Drive) *bool {
|
||||
@@ -1342,7 +1509,25 @@ func googleDriveUseOnlineAPIForDrive(d *catalog.Drive) *bool {
|
||||
return &result
|
||||
}
|
||||
|
||||
func googleDriveOpenListAPIURLForDrive(d *catalog.Drive) string {
|
||||
if d == nil || d.Kind != "googledrive" || d.Credentials == nil {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(d.Credentials["api_url_address"])
|
||||
}
|
||||
|
||||
func mergeGoogleDriveCredentials(existing *catalog.Drive, incoming map[string]string) map[string]string {
|
||||
merged := mergeNonEmptyCredentials(existing, incoming)
|
||||
if _, ok := incoming["api_url_address"]; ok && strings.TrimSpace(incoming["api_url_address"]) == "" {
|
||||
delete(merged, "api_url_address")
|
||||
}
|
||||
return merged
|
||||
}
|
||||
|
||||
// mergeNonEmptyCredentials 逐键合并凭证:incoming 里非空的键覆盖旧值,
|
||||
// 空值/缺失的键沿用旧值。googledrive、localstorage 和 guangyapan 的编辑表单都依赖
|
||||
// 这个语义(留空 = 不修改)。
|
||||
func mergeNonEmptyCredentials(existing *catalog.Drive, incoming map[string]string) map[string]string {
|
||||
merged := map[string]string{}
|
||||
if existing != nil {
|
||||
for k, v := range existing.Credentials {
|
||||
@@ -1363,34 +1548,6 @@ func mergeGoogleDriveCredentials(existing *catalog.Drive, incoming map[string]st
|
||||
return merged
|
||||
}
|
||||
|
||||
func mergeSpider91Credentials(existing *catalog.Drive, incoming map[string]string) (map[string]string, error) {
|
||||
merged := map[string]string{}
|
||||
if existing != nil {
|
||||
for k, v := range existing.Credentials {
|
||||
merged[k] = v
|
||||
}
|
||||
}
|
||||
for k, v := range incoming {
|
||||
if strings.TrimSpace(k) == "" {
|
||||
continue
|
||||
}
|
||||
if k == "proxy" {
|
||||
proxy, err := normalizeSpider91ProxyURL(v)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if proxy == "" {
|
||||
delete(merged, "proxy")
|
||||
} else {
|
||||
merged["proxy"] = proxy
|
||||
}
|
||||
continue
|
||||
}
|
||||
merged[k] = v
|
||||
}
|
||||
return merged, nil
|
||||
}
|
||||
|
||||
func mergeScriptCrawlerCredentials(existing *catalog.Drive, incoming map[string]string) (map[string]string, error) {
|
||||
merged := map[string]string{}
|
||||
if existing != nil {
|
||||
@@ -1452,10 +1609,6 @@ func mergeScriptCrawlerCredentials(existing *catalog.Drive, incoming map[string]
|
||||
return merged, nil
|
||||
}
|
||||
|
||||
func normalizeSpider91ProxyURL(raw string) (string, error) {
|
||||
return normalizeCrawlerProxyURL(raw, "91Spider")
|
||||
}
|
||||
|
||||
func normalizeCrawlerProxyURL(raw, label string) (string, error) {
|
||||
proxy := strings.TrimSpace(raw)
|
||||
if proxy == "" {
|
||||
@@ -1547,6 +1700,35 @@ func (a *AdminServer) handleStopDriveTasks(w http.ResponseWriter, r *http.Reques
|
||||
})
|
||||
}
|
||||
|
||||
// handleStartDriveTranscode 手动开启某盘的浏览器兼容性转码。
|
||||
// 转码默认不开启、从不自动运行;本接口是唯一入口。
|
||||
func (a *AdminServer) handleStartDriveTranscode(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
if a.OnStartDriveTranscode == nil {
|
||||
writeErr(w, http.StatusNotImplemented, errors.New("transcode not supported"))
|
||||
return
|
||||
}
|
||||
accepted, message := a.OnStartDriveTranscode(id)
|
||||
writeJSON(w, http.StatusAccepted, map[string]any{
|
||||
"ok": true,
|
||||
"accepted": accepted,
|
||||
"message": message,
|
||||
})
|
||||
}
|
||||
|
||||
// handleStopDriveTranscode 手动停止某盘正在进行的转码任务。
|
||||
func (a *AdminServer) handleStopDriveTranscode(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
stopped := false
|
||||
if a.OnStopDriveTranscode != nil {
|
||||
stopped = a.OnStopDriveTranscode(id)
|
||||
}
|
||||
writeJSON(w, http.StatusAccepted, map[string]any{
|
||||
"ok": true,
|
||||
"stopped": stopped,
|
||||
})
|
||||
}
|
||||
|
||||
func (a *AdminServer) p123QRClient() *p123.QRClient {
|
||||
return p123.NewQRClient(p123.QRConfig{
|
||||
UserAPIBaseURL: a.P123UserAPIBaseURL,
|
||||
@@ -1612,6 +1794,38 @@ func (a *AdminServer) handleWopanQRStatus(w http.ResponseWriter, r *http.Request
|
||||
writeJSON(w, http.StatusOK, status)
|
||||
}
|
||||
|
||||
func (a *AdminServer) guangYaPanQRClient() *guangyapan.QRClient {
|
||||
return guangyapan.NewQRClient(guangyapan.QRConfig{
|
||||
AccountBaseURL: a.GuangYaPanAccountBaseURL,
|
||||
HTTPClient: a.GuangYaPanHTTPClient,
|
||||
})
|
||||
}
|
||||
|
||||
func (a *AdminServer) handleGuangYaPanQRStart(w http.ResponseWriter, r *http.Request) {
|
||||
session, err := a.guangYaPanQRClient().Generate(r.Context())
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusBadGateway, err)
|
||||
return
|
||||
}
|
||||
w.Header().Set("Cache-Control", "no-store")
|
||||
writeJSON(w, http.StatusOK, session)
|
||||
}
|
||||
|
||||
func (a *AdminServer) handleGuangYaPanQRStatus(w http.ResponseWriter, r *http.Request) {
|
||||
deviceCode := r.URL.Query().Get("deviceCode")
|
||||
if strings.TrimSpace(deviceCode) == "" {
|
||||
http.Error(w, "deviceCode is required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
status, err := a.guangYaPanQRClient().Poll(r.Context(), deviceCode)
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusBadGateway, err)
|
||||
return
|
||||
}
|
||||
w.Header().Set("Cache-Control", "no-store")
|
||||
writeJSON(w, http.StatusOK, status)
|
||||
}
|
||||
|
||||
// handleRunNightlyJob 触发一次完整的凌晨流水线(不论当前时间,不论今日是否已跑)。
|
||||
// 立即返回 202;进度通过 backend 日志和下次 GET /admin/api/drives 的状态变化观察。
|
||||
// 流水线已在跑或已排队时,Runner 会拒绝重复触发。
|
||||
@@ -1798,6 +2012,56 @@ func (a *AdminServer) handleAdminListVideos(w http.ResponseWriter, r *http.Reque
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
if a.GetPreviewGenerationVideoIDs != nil {
|
||||
generating := a.GetPreviewGenerationVideoIDs()
|
||||
for _, item := range items {
|
||||
if item != nil && generating[item.ID] {
|
||||
item.PreviewStatus = "generating"
|
||||
}
|
||||
}
|
||||
}
|
||||
writeJSON(w, http.StatusOK, map[string]any{
|
||||
"items": mapAdminVideos(items),
|
||||
"total": total,
|
||||
"page": page,
|
||||
"size": size,
|
||||
})
|
||||
}
|
||||
|
||||
// handleVideoStats 返回后台视频管理两个标签页的计数(当前/拉黑)。
|
||||
func (a *AdminServer) handleVideoStats(w http.ResponseWriter, r *http.Request) {
|
||||
current, blacklisted, err := a.Catalog.VideoManagementCounts(r.Context())
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, map[string]any{
|
||||
"current": current,
|
||||
"blacklisted": blacklisted,
|
||||
})
|
||||
}
|
||||
|
||||
// handleListBlacklist 分页返回黑名单(墓碑)视频。
|
||||
func (a *AdminServer) handleListBlacklist(w http.ResponseWriter, r *http.Request) {
|
||||
q := r.URL.Query()
|
||||
page, _ := strconv.Atoi(q.Get("page"))
|
||||
size, _ := strconv.Atoi(q.Get("size"))
|
||||
if page <= 0 {
|
||||
page = 1
|
||||
}
|
||||
if size <= 0 || size > 100 {
|
||||
size = 100
|
||||
}
|
||||
items, total, err := a.Catalog.ListDeletedVideos(r.Context(), catalog.ListParams{
|
||||
Keyword: q.Get("keyword"),
|
||||
DriveID: q.Get("driveId"),
|
||||
Page: page,
|
||||
PageSize: size,
|
||||
})
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, map[string]any{
|
||||
"items": items,
|
||||
"total": total,
|
||||
@@ -1806,6 +2070,20 @@ func (a *AdminServer) handleAdminListVideos(w http.ResponseWriter, r *http.Reque
|
||||
})
|
||||
}
|
||||
|
||||
// handleRemoveBlacklist 把视频移出黑名单(删除墓碑),下次扫盘会重新入库。
|
||||
func (a *AdminServer) handleRemoveBlacklist(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
if err := a.Catalog.RemoveDeletedVideo(r.Context(), id); err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
writeErr(w, http.StatusNotFound, err)
|
||||
return
|
||||
}
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, map[string]any{"ok": true})
|
||||
}
|
||||
|
||||
func (a *AdminServer) handleListTags(w http.ResponseWriter, r *http.Request) {
|
||||
tags, err := a.Catalog.ListTags(r.Context())
|
||||
if err != nil {
|
||||
@@ -1862,7 +2140,6 @@ type updateVideoReq struct {
|
||||
Title string `json:"title"`
|
||||
Author string `json:"author"`
|
||||
Tags []string `json:"tags"`
|
||||
Category string `json:"category"`
|
||||
Badges []string `json:"badges"`
|
||||
Description string `json:"description"`
|
||||
Thumbnail string `json:"thumbnail"`
|
||||
@@ -1870,6 +2147,97 @@ type updateVideoReq struct {
|
||||
DurationSec int `json:"durationSeconds"`
|
||||
}
|
||||
|
||||
type adminVideoDTO struct {
|
||||
ID string `json:"id"`
|
||||
DriveID string `json:"driveId"`
|
||||
FileID string `json:"fileId"`
|
||||
FileName string `json:"fileName"`
|
||||
ContentHash string `json:"contentHash"`
|
||||
SampledSHA256 string `json:"sampledSha256"`
|
||||
FingerprintStatus string `json:"fingerprintStatus"`
|
||||
FingerprintError string `json:"fingerprintError"`
|
||||
ParentID string `json:"parentId"`
|
||||
Title string `json:"title"`
|
||||
Author string `json:"author"`
|
||||
Tags []string `json:"tags"`
|
||||
DurationSeconds int `json:"durationSeconds"`
|
||||
Size int64 `json:"size"`
|
||||
Ext string `json:"ext"`
|
||||
Quality string `json:"quality"`
|
||||
ThumbnailURL string `json:"thumbnailUrl"`
|
||||
PreviewFileID string `json:"previewFileId"`
|
||||
PreviewLocal string `json:"previewLocal"`
|
||||
PreviewStatus string `json:"previewStatus"`
|
||||
TranscodeStatus string `json:"transcodeStatus"`
|
||||
TranscodeError string `json:"transcodeError"`
|
||||
TranscodedFileID string `json:"transcodedFileId"`
|
||||
TranscodedSize int64 `json:"transcodedSize"`
|
||||
Views int `json:"views"`
|
||||
LastViewedAt time.Time `json:"lastViewedAt"`
|
||||
Favorites int `json:"favorites"`
|
||||
Comments int `json:"comments"`
|
||||
Likes int `json:"likes"`
|
||||
Dislikes int `json:"dislikes"`
|
||||
Hidden bool `json:"hidden"`
|
||||
Badges []string `json:"badges"`
|
||||
Description string `json:"description"`
|
||||
PublishedAt time.Time `json:"publishedAt"`
|
||||
CreatedAt time.Time `json:"createdAt"`
|
||||
UpdatedAt time.Time `json:"updatedAt"`
|
||||
}
|
||||
|
||||
func mapAdminVideo(v *catalog.Video) adminVideoDTO {
|
||||
if v == nil {
|
||||
return adminVideoDTO{}
|
||||
}
|
||||
return adminVideoDTO{
|
||||
ID: v.ID,
|
||||
DriveID: v.DriveID,
|
||||
FileID: v.FileID,
|
||||
FileName: v.FileName,
|
||||
ContentHash: v.ContentHash,
|
||||
SampledSHA256: v.SampledSHA256,
|
||||
FingerprintStatus: v.FingerprintStatus,
|
||||
FingerprintError: v.FingerprintError,
|
||||
ParentID: v.ParentID,
|
||||
Title: v.Title,
|
||||
Author: v.Author,
|
||||
Tags: v.Tags,
|
||||
DurationSeconds: v.DurationSeconds,
|
||||
Size: v.Size,
|
||||
Ext: v.Ext,
|
||||
Quality: v.Quality,
|
||||
ThumbnailURL: v.ThumbnailURL,
|
||||
PreviewFileID: v.PreviewFileID,
|
||||
PreviewLocal: v.PreviewLocal,
|
||||
PreviewStatus: v.PreviewStatus,
|
||||
TranscodeStatus: v.TranscodeStatus,
|
||||
TranscodeError: v.TranscodeError,
|
||||
TranscodedFileID: v.TranscodedFileID,
|
||||
TranscodedSize: v.TranscodedSize,
|
||||
Views: v.Views,
|
||||
LastViewedAt: v.LastViewedAt,
|
||||
Favorites: v.Favorites,
|
||||
Comments: v.Comments,
|
||||
Likes: v.Likes,
|
||||
Dislikes: v.Dislikes,
|
||||
Hidden: v.Hidden,
|
||||
Badges: v.Badges,
|
||||
Description: v.Description,
|
||||
PublishedAt: v.PublishedAt,
|
||||
CreatedAt: v.CreatedAt,
|
||||
UpdatedAt: v.UpdatedAt,
|
||||
}
|
||||
}
|
||||
|
||||
func mapAdminVideos(vs []*catalog.Video) []adminVideoDTO {
|
||||
out := make([]adminVideoDTO, 0, len(vs))
|
||||
for _, v := range vs {
|
||||
out = append(out, mapAdminVideo(v))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (a *AdminServer) handleUpdateVideo(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
var body updateVideoReq
|
||||
@@ -1888,9 +2256,6 @@ func (a *AdminServer) handleUpdateVideo(w http.ResponseWriter, r *http.Request)
|
||||
if body.Author != "" {
|
||||
v.Author = body.Author
|
||||
}
|
||||
if body.Category != "" {
|
||||
v.Category = body.Category
|
||||
}
|
||||
if body.Badges != nil {
|
||||
v.Badges = body.Badges
|
||||
}
|
||||
@@ -1925,7 +2290,7 @@ func (a *AdminServer) handleUpdateVideo(w http.ResponseWriter, r *http.Request)
|
||||
return
|
||||
}
|
||||
}
|
||||
writeJSON(w, http.StatusOK, v)
|
||||
writeJSON(w, http.StatusOK, mapAdminVideo(v))
|
||||
}
|
||||
|
||||
func (a *AdminServer) handleDeleteVideo(w http.ResponseWriter, r *http.Request) {
|
||||
@@ -2019,10 +2384,9 @@ func (a *AdminServer) handleRegenFailedFingerprints(w http.ResponseWriter, r *ht
|
||||
//
|
||||
// 注意:早期的全局 previewEnabled 字段已经下沉为每盘 teaser_enabled,
|
||||
// 不再出现在这里;前端要切换某个盘的预览视频生成请用 POST /admin/api/drives 上传
|
||||
// teaserEnabled 字段。保留 settings 用作主题、spider91 上传目标这类全局配置。
|
||||
// teaserEnabled 字段。settings 目前只保留全站主题。
|
||||
type settingsDTO struct {
|
||||
Theme string `json:"theme"`
|
||||
Spider91UploadDriveID string `json:"spider91UploadDriveId"`
|
||||
Theme string `json:"theme"`
|
||||
}
|
||||
|
||||
func (a *AdminServer) handleGetSettings(w http.ResponseWriter, r *http.Request) {
|
||||
@@ -2032,19 +2396,12 @@ func (a *AdminServer) handleGetSettings(w http.ResponseWriter, r *http.Request)
|
||||
theme = v
|
||||
}
|
||||
}
|
||||
spider91UploadID := ""
|
||||
if a.GetSpider91UploadDriveID != nil {
|
||||
spider91UploadID = a.GetSpider91UploadDriveID()
|
||||
}
|
||||
writeJSON(w, http.StatusOK, settingsDTO{
|
||||
Theme: theme,
|
||||
Spider91UploadDriveID: spider91UploadID,
|
||||
Theme: theme,
|
||||
})
|
||||
}
|
||||
|
||||
func (a *AdminServer) handlePutSettings(w http.ResponseWriter, r *http.Request) {
|
||||
// 用 map 区分"没传"和"传了空字符串"两种语义;空 spider91 上传 ID 表示
|
||||
// 本地保存不上传。
|
||||
var raw map[string]json.RawMessage
|
||||
if err := json.NewDecoder(r.Body).Decode(&raw); err != nil {
|
||||
writeErr(w, http.StatusBadRequest, err)
|
||||
@@ -2065,25 +2422,10 @@ func (a *AdminServer) handlePutSettings(w http.ResponseWriter, r *http.Request)
|
||||
}
|
||||
}
|
||||
|
||||
if v, ok := raw["spider91UploadDriveId"]; ok && a.SetSpider91UploadDriveID != nil {
|
||||
var driveID string
|
||||
if err := json.Unmarshal(v, &driveID); err != nil {
|
||||
writeErr(w, http.StatusBadRequest, err)
|
||||
return
|
||||
}
|
||||
if err := a.SetSpider91UploadDriveID(driveID); err != nil {
|
||||
writeErr(w, http.StatusBadRequest, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// 回显当前值
|
||||
resp := settingsDTO{}
|
||||
if a.GetTheme != nil {
|
||||
resp.Theme = a.GetTheme()
|
||||
}
|
||||
if a.GetSpider91UploadDriveID != nil {
|
||||
resp.Spider91UploadDriveID = a.GetSpider91UploadDriveID()
|
||||
}
|
||||
writeJSON(w, http.StatusOK, resp)
|
||||
}
|
||||
|
||||
@@ -732,9 +732,34 @@ func TestHandleUpsertGoogleDriveMergesOAuthCredentials(t *testing.T) {
|
||||
if got.Credentials["client_id"] != "google-client-id" || got.Credentials["client_secret"] != "google-client-secret" {
|
||||
t.Fatalf("oauth client credentials = %#v, want saved", got.Credentials)
|
||||
}
|
||||
if got.Credentials["api_url_address"] != "https://api.oplist.org/googleui/renewapi" {
|
||||
t.Fatalf("api_url_address = %q, want preserved", got.Credentials["api_url_address"])
|
||||
}
|
||||
|
||||
clearReq := httptest.NewRequest(http.MethodPost, "/admin/api/drives", bytes.NewBufferString(`{
|
||||
"id": "google-main",
|
||||
"kind": "googledrive",
|
||||
"name": "Google Drive",
|
||||
"rootId": "root",
|
||||
"credentials": {
|
||||
"api_url_address": ""
|
||||
}
|
||||
}`))
|
||||
clearRR := httptest.NewRecorder()
|
||||
(&AdminServer{Catalog: cat}).handleUpsertDrive(clearRR, clearReq)
|
||||
if clearRR.Code != http.StatusOK {
|
||||
t.Fatalf("clear status = %d, body = %s", clearRR.Code, clearRR.Body.String())
|
||||
}
|
||||
cleared, err := cat.GetDrive(ctx, "google-main")
|
||||
if err != nil {
|
||||
t.Fatalf("get cleared drive: %v", err)
|
||||
}
|
||||
if _, ok := cleared.Credentials["api_url_address"]; ok {
|
||||
t.Fatalf("api_url_address was not cleared: %#v", cleared.Credentials)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleUpsertSpider91DriveIsRejected(t *testing.T) {
|
||||
func TestHandleUpsertUnknownDriveKindIsRejected(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -747,14 +772,12 @@ func TestHandleUpsertSpider91DriveIsRejected(t *testing.T) {
|
||||
})
|
||||
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: "spider91-main",
|
||||
Kind: "spider91",
|
||||
Name: "91 Spider",
|
||||
ID: "unknown-main",
|
||||
Kind: "unknown",
|
||||
Name: "Unknown",
|
||||
RootID: "/",
|
||||
Credentials: map[string]string{
|
||||
"last_crawl_at": "1800000000",
|
||||
"proxy": "http://old-proxy.local:7890",
|
||||
"script_path": "/opt/video-site-91/91VideoSpider/spider_91porn.py",
|
||||
"token": "old-token",
|
||||
},
|
||||
Status: "ok",
|
||||
}); err != nil {
|
||||
@@ -762,33 +785,27 @@ func TestHandleUpsertSpider91DriveIsRejected(t *testing.T) {
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/admin/api/drives", strings.NewReader(`{
|
||||
"id": "spider91-main",
|
||||
"kind": "spider91",
|
||||
"name": "91 Spider",
|
||||
"id": "unknown-main",
|
||||
"kind": "unknown",
|
||||
"name": "Unknown",
|
||||
"rootId": "/",
|
||||
"credentials": {"proxy": " socks5h://proxy-user:proxy-pass@127.0.0.1:7891 "}
|
||||
"credentials": {"token": "new-token"}
|
||||
}`))
|
||||
rr := httptest.NewRecorder()
|
||||
(&AdminServer{Catalog: cat}).handleUpsertDrive(rr, req)
|
||||
if rr.Code != http.StatusBadRequest {
|
||||
t.Fatalf("status = %d, want 400; body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
if !strings.Contains(rr.Body.String(), "爬虫管理") {
|
||||
t.Fatalf("body = %q, want crawler management guidance", rr.Body.String())
|
||||
if rr.Body.String() != "unsupported drive kind\n" {
|
||||
t.Fatalf("body = %q, want unsupported kind", rr.Body.String())
|
||||
}
|
||||
|
||||
got, err := cat.GetDrive(ctx, "spider91-main")
|
||||
got, err := cat.GetDrive(ctx, "unknown-main")
|
||||
if err != nil {
|
||||
t.Fatalf("get drive: %v", err)
|
||||
}
|
||||
if got.Credentials["proxy"] != "http://old-proxy.local:7890" {
|
||||
t.Fatalf("proxy = %q, want unchanged old proxy", got.Credentials["proxy"])
|
||||
}
|
||||
if got.Credentials["last_crawl_at"] != "1800000000" {
|
||||
t.Fatalf("last_crawl_at = %q, want preserved", got.Credentials["last_crawl_at"])
|
||||
}
|
||||
if got.Credentials["script_path"] == "" {
|
||||
t.Fatalf("script_path should be preserved")
|
||||
if got.Credentials["token"] != "old-token" {
|
||||
t.Fatalf("token = %q, want unchanged old token", got.Credentials["token"])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -914,37 +931,25 @@ func TestHandleListCrawlersOnlyIncludesCrawlerPageScripts(t *testing.T) {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
scriptPath := filepath.Join(tmp, "spider_91porn.py")
|
||||
if err := os.WriteFile(scriptPath, []byte("CRAWLER_NAME = \"91Porn\"\n"), 0o644); err != nil {
|
||||
scriptPath := filepath.Join(tmp, "demo_crawler.py")
|
||||
if err := os.WriteFile(scriptPath, []byte("CRAWLER_NAME = \"Demo Crawler\"\n"), 0o644); err != nil {
|
||||
t.Fatalf("write crawler script: %v", err)
|
||||
}
|
||||
|
||||
for _, d := range []*catalog.Drive{
|
||||
{
|
||||
ID: "spider91-main",
|
||||
Kind: "spider91",
|
||||
Name: "91 Spider",
|
||||
RootID: "/",
|
||||
Credentials: map[string]string{
|
||||
"last_crawl_at": "1800000000",
|
||||
"proxy": " http://127.0.0.1:7890 ",
|
||||
"script_path": scriptPath,
|
||||
},
|
||||
Status: "ok",
|
||||
},
|
||||
{
|
||||
ID: "crawler-spider91",
|
||||
ID: "crawler-main",
|
||||
Kind: "scriptcrawler",
|
||||
Name: "91 Spider",
|
||||
Name: "Crawler",
|
||||
RootID: "/",
|
||||
Credentials: map[string]string{
|
||||
"builtin": "spider91",
|
||||
"last_crawl_at": "1800000000",
|
||||
"proxy": " http://127.0.0.1:7890 ",
|
||||
"script_path": scriptPath,
|
||||
"upload_drive_id": "p115-target",
|
||||
},
|
||||
Status: "ok",
|
||||
Status: "ok",
|
||||
TeaserEnabled: false,
|
||||
},
|
||||
{
|
||||
ID: "p115-target",
|
||||
@@ -979,27 +984,27 @@ func TestHandleListCrawlersOnlyIncludesCrawlerPageScripts(t *testing.T) {
|
||||
}
|
||||
for _, v := range []*catalog.Video{
|
||||
{
|
||||
ID: "spider91-crawler-spider91-local",
|
||||
DriveID: "crawler-spider91",
|
||||
ID: "scriptcrawler-crawler-main-local",
|
||||
DriveID: "crawler-main",
|
||||
FileID: "local.mp4",
|
||||
FileName: "local.mp4",
|
||||
Title: "Local",
|
||||
Size: 123,
|
||||
Ext: "mp4",
|
||||
ThumbnailURL: "/p/thumb/spider91-crawler-spider91-local",
|
||||
ThumbnailURL: "/p/thumb/scriptcrawler-crawler-main-local",
|
||||
PreviewStatus: "ready",
|
||||
DurationSeconds: 12,
|
||||
PublishedAt: time.Now(),
|
||||
},
|
||||
{
|
||||
ID: "scriptcrawler-crawler-spider91-migrated",
|
||||
ID: "scriptcrawler-crawler-main-migrated",
|
||||
DriveID: "p115-target",
|
||||
FileID: "uploaded-id",
|
||||
FileName: "migrated.mp4",
|
||||
Title: "Migrated",
|
||||
Size: 456,
|
||||
Ext: "mp4",
|
||||
ThumbnailURL: "/p/thumb/scriptcrawler-crawler-spider91-migrated",
|
||||
ThumbnailURL: "/p/thumb/scriptcrawler-crawler-main-migrated",
|
||||
PreviewStatus: "ready",
|
||||
DurationSeconds: 34,
|
||||
PublishedAt: time.Now(),
|
||||
@@ -1027,6 +1032,7 @@ func TestHandleListCrawlersOnlyIncludesCrawlerPageScripts(t *testing.T) {
|
||||
Kind string `json:"kind"`
|
||||
Proxy string `json:"proxy"`
|
||||
UploadDriveID string `json:"uploadDriveId"`
|
||||
TeaserEnabled bool `json:"teaserEnabled"`
|
||||
LastCrawlAt int64 `json:"lastCrawlAt"`
|
||||
TotalCrawled int `json:"totalCrawledCount"`
|
||||
LocalVideos int `json:"localVideoCount"`
|
||||
@@ -1038,11 +1044,12 @@ func TestHandleListCrawlersOnlyIncludesCrawlerPageScripts(t *testing.T) {
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
byID := map[string]struct {
|
||||
type crawlerListRow struct {
|
||||
Name string
|
||||
Kind string
|
||||
Proxy string
|
||||
UploadDriveID string
|
||||
TeaserEnabled bool
|
||||
LastCrawlAt int64
|
||||
TotalCrawled int
|
||||
LocalVideos int
|
||||
@@ -1050,25 +1057,15 @@ func TestHandleListCrawlersOnlyIncludesCrawlerPageScripts(t *testing.T) {
|
||||
ThumbnailReady int
|
||||
TeaserReady int
|
||||
FingerprintReady int
|
||||
}{}
|
||||
}
|
||||
byID := map[string]crawlerListRow{}
|
||||
for _, d := range got {
|
||||
byID[d.ID] = struct {
|
||||
Name string
|
||||
Kind string
|
||||
Proxy string
|
||||
UploadDriveID string
|
||||
LastCrawlAt int64
|
||||
TotalCrawled int
|
||||
LocalVideos int
|
||||
MigratedVideo int
|
||||
ThumbnailReady int
|
||||
TeaserReady int
|
||||
FingerprintReady int
|
||||
}{
|
||||
byID[d.ID] = crawlerListRow{
|
||||
Name: d.Name,
|
||||
Kind: d.Kind,
|
||||
Proxy: d.Proxy,
|
||||
UploadDriveID: d.UploadDriveID,
|
||||
TeaserEnabled: d.TeaserEnabled,
|
||||
LastCrawlAt: d.LastCrawlAt,
|
||||
TotalCrawled: d.TotalCrawled,
|
||||
LocalVideos: d.LocalVideos,
|
||||
@@ -1078,32 +1075,32 @@ func TestHandleListCrawlersOnlyIncludesCrawlerPageScripts(t *testing.T) {
|
||||
FingerprintReady: d.FingerprintReady,
|
||||
}
|
||||
}
|
||||
if _, ok := byID["spider91-main"]; ok {
|
||||
t.Fatal("legacy spider91 drive should not be returned by crawler list")
|
||||
}
|
||||
if _, ok := byID["crawler-script-deleted"]; ok {
|
||||
t.Fatal("crawler without script_path should not be returned by crawler list")
|
||||
}
|
||||
if byID["crawler-spider91"].Kind != "scriptcrawler" {
|
||||
t.Fatalf("crawler kind = %q, want scriptcrawler", byID["crawler-spider91"].Kind)
|
||||
if byID["crawler-main"].Kind != "scriptcrawler" {
|
||||
t.Fatalf("crawler kind = %q, want scriptcrawler", byID["crawler-main"].Kind)
|
||||
}
|
||||
if byID["crawler-spider91"].Name != "91Porn" {
|
||||
t.Fatalf("crawler name = %q, want script metadata name", byID["crawler-spider91"].Name)
|
||||
if byID["crawler-main"].Name != "Demo Crawler" {
|
||||
t.Fatalf("crawler name = %q, want script metadata name", byID["crawler-main"].Name)
|
||||
}
|
||||
if byID["crawler-spider91"].Proxy != "http://127.0.0.1:7890" {
|
||||
t.Fatalf("crawler proxy = %q, want trimmed proxy", byID["crawler-spider91"].Proxy)
|
||||
if byID["crawler-main"].Proxy != "http://127.0.0.1:7890" {
|
||||
t.Fatalf("crawler proxy = %q, want trimmed proxy", byID["crawler-main"].Proxy)
|
||||
}
|
||||
if byID["crawler-spider91"].UploadDriveID != "p115-target" {
|
||||
t.Fatalf("uploadDriveId = %q, want p115-target", byID["crawler-spider91"].UploadDriveID)
|
||||
if byID["crawler-main"].UploadDriveID != "p115-target" {
|
||||
t.Fatalf("uploadDriveId = %q, want p115-target", byID["crawler-main"].UploadDriveID)
|
||||
}
|
||||
if byID["crawler-spider91"].LastCrawlAt != 1800000000 {
|
||||
t.Fatalf("lastCrawlAt = %d, want 1800000000", byID["crawler-spider91"].LastCrawlAt)
|
||||
if byID["crawler-main"].TeaserEnabled {
|
||||
t.Fatal("teaserEnabled = true, want false from crawler drive")
|
||||
}
|
||||
if byID["crawler-spider91"].TotalCrawled != 2 || byID["crawler-spider91"].LocalVideos != 1 || byID["crawler-spider91"].MigratedVideo != 1 {
|
||||
t.Fatalf("crawler counts = total %d local %d migrated %d, want 2/1/1", byID["crawler-spider91"].TotalCrawled, byID["crawler-spider91"].LocalVideos, byID["crawler-spider91"].MigratedVideo)
|
||||
if byID["crawler-main"].LastCrawlAt != 1800000000 {
|
||||
t.Fatalf("lastCrawlAt = %d, want 1800000000", byID["crawler-main"].LastCrawlAt)
|
||||
}
|
||||
if byID["crawler-spider91"].ThumbnailReady != 2 || byID["crawler-spider91"].TeaserReady != 2 || byID["crawler-spider91"].FingerprintReady != 2 {
|
||||
t.Fatalf("asset ready counts = thumb %d teaser %d fingerprint %d, want 2/2/2", byID["crawler-spider91"].ThumbnailReady, byID["crawler-spider91"].TeaserReady, byID["crawler-spider91"].FingerprintReady)
|
||||
if byID["crawler-main"].TotalCrawled != 2 || byID["crawler-main"].LocalVideos != 1 || byID["crawler-main"].MigratedVideo != 1 {
|
||||
t.Fatalf("crawler counts = total %d local %d migrated %d, want 2/1/1", byID["crawler-main"].TotalCrawled, byID["crawler-main"].LocalVideos, byID["crawler-main"].MigratedVideo)
|
||||
}
|
||||
if byID["crawler-main"].ThumbnailReady != 2 || byID["crawler-main"].TeaserReady != 2 || byID["crawler-main"].FingerprintReady != 2 {
|
||||
t.Fatalf("asset ready counts = thumb %d teaser %d fingerprint %d, want 2/2/2", byID["crawler-main"].ThumbnailReady, byID["crawler-main"].TeaserReady, byID["crawler-main"].FingerprintReady)
|
||||
}
|
||||
if _, ok := byID["onedrive-main"]; ok {
|
||||
t.Fatal("onedrive should not be returned by crawler list")
|
||||
@@ -1125,10 +1122,7 @@ func TestHandleListCrawlersOnlyIncludesCrawlerPageScripts(t *testing.T) {
|
||||
for _, d := range drives {
|
||||
driveIDs[d.ID] = true
|
||||
}
|
||||
if !driveIDs["spider91-main"] {
|
||||
t.Fatal("legacy spider91 drive should remain visible in drive list for deletion")
|
||||
}
|
||||
if driveIDs["crawler-spider91"] {
|
||||
if driveIDs["crawler-main"] {
|
||||
t.Fatal("scriptcrawler should not be returned by drive list")
|
||||
}
|
||||
}
|
||||
@@ -1148,15 +1142,15 @@ func TestHandleUpsertCrawlerRequiresScriptPath(t *testing.T) {
|
||||
|
||||
srv := &AdminServer{Catalog: cat}
|
||||
scriptPath := filepath.Join(tmp, "custom.py")
|
||||
if err := os.WriteFile(scriptPath, []byte("CRAWLER_NAME = \"91 Spider\"\n"), 0o644); err != nil {
|
||||
if err := os.WriteFile(scriptPath, []byte("CRAWLER_NAME = \"Demo Crawler\"\n"), 0o644); err != nil {
|
||||
t.Fatalf("write crawler script: %v", err)
|
||||
}
|
||||
|
||||
// 不再内置任何爬虫:没有脚本路径的保存请求必须被拒绝,
|
||||
// 旧的 builtin 字段也不再有"免脚本"特权。
|
||||
req := httptest.NewRequest(http.MethodPost, "/admin/api/crawlers", strings.NewReader(`{
|
||||
"id": "spider91-main",
|
||||
"builtin": "spider91",
|
||||
"id": "crawler-main",
|
||||
"builtin": "legacy",
|
||||
"scriptPath": "",
|
||||
"targetNew": "15"
|
||||
}`))
|
||||
@@ -1168,10 +1162,11 @@ func TestHandleUpsertCrawlerRequiresScriptPath(t *testing.T) {
|
||||
|
||||
// 带脚本路径时正常保存,且请求中的 builtin 字段被忽略,不会写入凭证。
|
||||
req = httptest.NewRequest(http.MethodPost, "/admin/api/crawlers", strings.NewReader(`{
|
||||
"id": "spider91-main",
|
||||
"builtin": "spider91",
|
||||
"id": "crawler-main",
|
||||
"builtin": "legacy",
|
||||
"scriptPath": "`+scriptPath+`",
|
||||
"targetNew": "15"
|
||||
"targetNew": "15",
|
||||
"teaserEnabled": false
|
||||
}`))
|
||||
rr = httptest.NewRecorder()
|
||||
srv.handleUpsertCrawler(rr, req)
|
||||
@@ -1179,7 +1174,7 @@ func TestHandleUpsertCrawlerRequiresScriptPath(t *testing.T) {
|
||||
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
|
||||
got, err := cat.GetDrive(ctx, "spider91-main")
|
||||
got, err := cat.GetDrive(ctx, "crawler-main")
|
||||
if err != nil {
|
||||
t.Fatalf("get crawler drive: %v", err)
|
||||
}
|
||||
@@ -1189,12 +1184,15 @@ func TestHandleUpsertCrawlerRequiresScriptPath(t *testing.T) {
|
||||
if got.Credentials["python_path"] != "" || got.Credentials["config_json"] != "" {
|
||||
t.Fatalf("legacy hidden credentials should not be saved: %+v", got.Credentials)
|
||||
}
|
||||
if got.Name != "91 Spider" {
|
||||
if got.Name != "Demo Crawler" {
|
||||
t.Fatalf("name = %q, want script metadata name", got.Name)
|
||||
}
|
||||
if got.Credentials["script_path"] != scriptPath {
|
||||
t.Fatalf("script_path = %q, want %q", got.Credentials["script_path"], scriptPath)
|
||||
}
|
||||
if got.TeaserEnabled {
|
||||
t.Fatal("teaserEnabled = true, want false from request")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleUpsertCrawlerGeneratesIDFromScriptName(t *testing.T) {
|
||||
@@ -1271,18 +1269,28 @@ func TestHandleUpsertCrawlerPersistsAndValidatesUploadDrive(t *testing.T) {
|
||||
for _, d := range []*catalog.Drive{
|
||||
{ID: "p115-target", Kind: "p115", Name: "115", RootID: "0", Credentials: map[string]string{"cookie": "x"}},
|
||||
{ID: "wopan-target", Kind: "wopan", Name: "沃盘", RootID: "0", Credentials: map[string]string{"access_token": "a", "refresh_token": "r"}},
|
||||
{ID: "guangyapan-target", Kind: "guangyapan", Name: "光鸭", RootID: "", Credentials: map[string]string{"access_token": "a", "refresh_token": "r"}},
|
||||
{ID: "local-target", Kind: "localstorage", Name: "Local", RootID: "/", Credentials: map[string]string{"path": tmp}},
|
||||
} {
|
||||
if err := cat.UpsertDrive(ctx, d); err != nil {
|
||||
t.Fatalf("seed drive %s: %v", d.ID, err)
|
||||
}
|
||||
}
|
||||
srv := &AdminServer{Catalog: cat}
|
||||
var teaserCallbackID string
|
||||
var teaserCallbackEnabled bool
|
||||
srv := &AdminServer{
|
||||
Catalog: cat,
|
||||
OnTeaserEnabledChanged: func(id string, enabled bool) {
|
||||
teaserCallbackID = id
|
||||
teaserCallbackEnabled = enabled
|
||||
},
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/admin/api/crawlers", strings.NewReader(`{
|
||||
"id": "crawler-upload",
|
||||
"scriptPath": "`+scriptPath+`",
|
||||
"uploadDriveId": "p115-target"
|
||||
"uploadDriveId": "p115-target",
|
||||
"teaserEnabled": false
|
||||
}`))
|
||||
rr := httptest.NewRecorder()
|
||||
srv.handleUpsertCrawler(rr, req)
|
||||
@@ -1296,6 +1304,12 @@ func TestHandleUpsertCrawlerPersistsAndValidatesUploadDrive(t *testing.T) {
|
||||
if got.Credentials["upload_drive_id"] != "p115-target" {
|
||||
t.Fatalf("upload_drive_id = %q, want p115-target", got.Credentials["upload_drive_id"])
|
||||
}
|
||||
if got.TeaserEnabled {
|
||||
t.Fatal("teaserEnabled = true, want false")
|
||||
}
|
||||
if teaserCallbackID != "" {
|
||||
t.Fatalf("teaser callback on create = %q, want none", teaserCallbackID)
|
||||
}
|
||||
|
||||
req = httptest.NewRequest(http.MethodPost, "/admin/api/crawlers", strings.NewReader(`{
|
||||
"id": "crawler-upload",
|
||||
@@ -1314,6 +1328,52 @@ func TestHandleUpsertCrawlerPersistsAndValidatesUploadDrive(t *testing.T) {
|
||||
if got.Credentials["upload_drive_id"] != "wopan-target" {
|
||||
t.Fatalf("upload_drive_id = %q, want wopan-target", got.Credentials["upload_drive_id"])
|
||||
}
|
||||
if got.TeaserEnabled {
|
||||
t.Fatal("teaserEnabled after edit without field = true, want preserved false")
|
||||
}
|
||||
if teaserCallbackID != "" {
|
||||
t.Fatalf("teaser callback after preserved edit = %q, want none", teaserCallbackID)
|
||||
}
|
||||
|
||||
req = httptest.NewRequest(http.MethodPost, "/admin/api/crawlers", strings.NewReader(`{
|
||||
"id": "crawler-upload",
|
||||
"scriptPath": "`+scriptPath+`",
|
||||
"uploadDriveId": "guangyapan-target"
|
||||
}`))
|
||||
rr = httptest.NewRecorder()
|
||||
srv.handleUpsertCrawler(rr, req)
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("guangyapan target status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
got, err = cat.GetDrive(ctx, "crawler-upload")
|
||||
if err != nil {
|
||||
t.Fatalf("get crawler after guangyapan target: %v", err)
|
||||
}
|
||||
if got.Credentials["upload_drive_id"] != "guangyapan-target" {
|
||||
t.Fatalf("upload_drive_id = %q, want guangyapan-target", got.Credentials["upload_drive_id"])
|
||||
}
|
||||
|
||||
req = httptest.NewRequest(http.MethodPost, "/admin/api/crawlers", strings.NewReader(`{
|
||||
"id": "crawler-upload",
|
||||
"scriptPath": "`+scriptPath+`",
|
||||
"uploadDriveId": "wopan-target",
|
||||
"teaserEnabled": true
|
||||
}`))
|
||||
rr = httptest.NewRecorder()
|
||||
srv.handleUpsertCrawler(rr, req)
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("enable teaser status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
got, err = cat.GetDrive(ctx, "crawler-upload")
|
||||
if err != nil {
|
||||
t.Fatalf("get crawler after teaser enable: %v", err)
|
||||
}
|
||||
if !got.TeaserEnabled {
|
||||
t.Fatal("teaserEnabled after explicit enable = false, want true")
|
||||
}
|
||||
if teaserCallbackID != "crawler-upload" || !teaserCallbackEnabled {
|
||||
t.Fatalf("teaser callback = %q/%v, want crawler-upload/true", teaserCallbackID, teaserCallbackEnabled)
|
||||
}
|
||||
|
||||
req = httptest.NewRequest(http.MethodPost, "/admin/api/crawlers", strings.NewReader(`{
|
||||
"id": "crawler-upload",
|
||||
@@ -1704,6 +1764,94 @@ func TestHandleWopanQRStatus(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleGuangYaPanQRStart(t *testing.T) {
|
||||
upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if r.URL.Path != "/v1/auth/device/code" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
var body map[string]any
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode body: %v", err)
|
||||
}
|
||||
if body["scope"] != "user" {
|
||||
t.Fatalf("scope = %#v, want user", body["scope"])
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"device_code": "device-1",
|
||||
"verification_uri_complete": "https://account.guangyapan.example/device?code=abc",
|
||||
"interval": 5,
|
||||
"expires_in": 300,
|
||||
})
|
||||
}))
|
||||
t.Cleanup(upstream.Close)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/admin/api/drives/guangyapan/qr", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
(&AdminServer{GuangYaPanAccountBaseURL: upstream.URL}).handleGuangYaPanQRStart(rr, req)
|
||||
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
var got struct {
|
||||
DeviceCode string `json:"deviceCode"`
|
||||
QRCodeURL string `json:"qrCodeUrl"`
|
||||
QRImageDataURL string `json:"qrImageDataUrl"`
|
||||
}
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
if got.DeviceCode != "device-1" || got.QRCodeURL != "https://account.guangyapan.example/device?code=abc" {
|
||||
t.Fatalf("response = %#v", got)
|
||||
}
|
||||
if !strings.HasPrefix(got.QRImageDataURL, "data:image/png;base64,") {
|
||||
t.Fatalf("qr image = %q", got.QRImageDataURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleGuangYaPanQRStatus(t *testing.T) {
|
||||
upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if r.URL.Path != "/v1/auth/token" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
var body map[string]any
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode body: %v", err)
|
||||
}
|
||||
if body["device_code"] != "device-1" {
|
||||
t.Fatalf("device_code = %#v, want device-1", body["device_code"])
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"access_token": "access-1",
|
||||
"refresh_token": "refresh-1",
|
||||
"token_type": "Bearer",
|
||||
})
|
||||
}))
|
||||
t.Cleanup(upstream.Close)
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/admin/api/drives/guangyapan/qr/status?deviceCode=device-1", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
(&AdminServer{GuangYaPanAccountBaseURL: upstream.URL}).handleGuangYaPanQRStatus(rr, req)
|
||||
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
var got struct {
|
||||
State string `json:"state"`
|
||||
AccessToken string `json:"accessToken"`
|
||||
RefreshToken string `json:"refreshToken"`
|
||||
}
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
if got.State != "success" || got.AccessToken != "access-1" || got.RefreshToken != "refresh-1" {
|
||||
t.Fatalf("response = %#v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleTestCrawlerScriptRunsImportedScript(t *testing.T) {
|
||||
if _, err := exec.LookPath("python3"); err != nil {
|
||||
t.Skip("python3 is required for crawler script dry-run")
|
||||
@@ -1798,7 +1946,8 @@ func TestHandleListDrivesIncludesGoogleDriveOnlineAPIMode(t *testing.T) {
|
||||
Name: "Google Legacy",
|
||||
RootID: "root",
|
||||
Credentials: map[string]string{
|
||||
"refresh_token": "legacy-refresh",
|
||||
"refresh_token": "legacy-refresh",
|
||||
"api_url_address": "https://openlist-api.example/googleui/renewapi",
|
||||
},
|
||||
Status: "ok",
|
||||
},
|
||||
@@ -1829,15 +1978,18 @@ func TestHandleListDrivesIncludesGoogleDriveOnlineAPIMode(t *testing.T) {
|
||||
}
|
||||
|
||||
var got []struct {
|
||||
ID string `json:"id"`
|
||||
GoogleDriveUseOnlineAPI bool `json:"googleDriveUseOnlineAPI"`
|
||||
ID string `json:"id"`
|
||||
GoogleDriveUseOnlineAPI bool `json:"googleDriveUseOnlineAPI"`
|
||||
GoogleDriveOpenListAPIURL string `json:"googleDriveOpenListApiUrl"`
|
||||
}
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
byID := map[string]bool{}
|
||||
byAPIURL := map[string]string{}
|
||||
for _, d := range got {
|
||||
byID[d.ID] = d.GoogleDriveUseOnlineAPI
|
||||
byAPIURL[d.ID] = d.GoogleDriveOpenListAPIURL
|
||||
}
|
||||
if !byID["google-legacy"] {
|
||||
t.Fatalf("legacy google drive use_online_api = false, want true")
|
||||
@@ -1845,6 +1997,9 @@ func TestHandleListDrivesIncludesGoogleDriveOnlineAPIMode(t *testing.T) {
|
||||
if byID["google-oauth"] {
|
||||
t.Fatalf("oauth google drive use_online_api = true, want false")
|
||||
}
|
||||
if byAPIURL["google-legacy"] != "https://openlist-api.example/googleui/renewapi" {
|
||||
t.Fatalf("legacy google drive openlist api url = %q, want custom URL", byAPIURL["google-legacy"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleListDrivesIncludesTeaserCounts(t *testing.T) {
|
||||
@@ -2321,6 +2476,52 @@ func TestHandleAdminListVideosFiltersByDriveID(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleAdminListVideosDoesNotExposeCategory(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: "video-1",
|
||||
DriveID: "drive",
|
||||
FileID: "file-1",
|
||||
Title: "Video",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed video: %v", err)
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/admin/api/videos", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
(&AdminServer{Catalog: cat}).handleAdminListVideos(rr, req)
|
||||
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
var got struct {
|
||||
Items []map[string]any `json:"items"`
|
||||
}
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
if len(got.Items) != 1 {
|
||||
t.Fatalf("items len = %d, want 1", len(got.Items))
|
||||
}
|
||||
if _, ok := got.Items[0]["category"]; ok {
|
||||
t.Fatalf("admin video response exposed category: %#v", got.Items[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleAdminListVideosPaginates(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
@@ -2373,6 +2574,80 @@ func TestHandleAdminListVideosPaginates(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleAdminListVideosMarksActivePreviewGeneration(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*catalog.Video{
|
||||
{
|
||||
ID: "active-video",
|
||||
DriveID: "OneDrive",
|
||||
FileID: "active-file",
|
||||
Title: "Active video",
|
||||
PreviewStatus: "ready",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
{
|
||||
ID: "idle-video",
|
||||
DriveID: "OneDrive",
|
||||
FileID: "idle-file",
|
||||
Title: "Idle video",
|
||||
PreviewStatus: "ready",
|
||||
PublishedAt: now.Add(-time.Hour),
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed video %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/admin/api/videos?driveId=OneDrive", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
(&AdminServer{
|
||||
Catalog: cat,
|
||||
GetPreviewGenerationVideoIDs: func() map[string]bool {
|
||||
return map[string]bool{"active-video": true}
|
||||
},
|
||||
}).handleAdminListVideos(rr, req)
|
||||
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
var got struct {
|
||||
Items []catalog.Video `json:"items"`
|
||||
Total int `json:"total"`
|
||||
}
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
if got.Total != 2 || len(got.Items) != 2 {
|
||||
t.Fatalf("response total/items = %d/%d, want 2/2", got.Total, len(got.Items))
|
||||
}
|
||||
statusByID := map[string]string{}
|
||||
for _, item := range got.Items {
|
||||
statusByID[item.ID] = item.PreviewStatus
|
||||
}
|
||||
if statusByID["active-video"] != "generating" {
|
||||
t.Fatalf("active status = %q, want generating", statusByID["active-video"])
|
||||
}
|
||||
if statusByID["idle-video"] != "ready" {
|
||||
t.Fatalf("idle status = %q, want ready", statusByID["idle-video"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleRegenAllPreviewsInvokesHook(t *testing.T) {
|
||||
called := false
|
||||
server := &AdminServer{
|
||||
|
||||
@@ -25,7 +25,6 @@ import (
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives/localstorage"
|
||||
"github.com/video-site/backend/internal/drives/localupload"
|
||||
"github.com/video-site/backend/internal/drives/spider91"
|
||||
"github.com/video-site/backend/internal/mediaasset"
|
||||
"github.com/video-site/backend/internal/proxy"
|
||||
)
|
||||
@@ -55,12 +54,16 @@ type Server struct {
|
||||
LocalDir string
|
||||
UploadDir string
|
||||
OnVideoUploaded func(*catalog.Video)
|
||||
// OnHideVideo 处理前台「不再展示」。隐藏机制已废弃,改走拉黑逻辑:
|
||||
// 删除库中记录 + 本地封面/预览,保留网盘源文件,并写黑名单墓碑
|
||||
// (扫盘不再入库)。未注入时回退为旧的 hidden 标记。
|
||||
OnHideVideo func(ctx context.Context, videoID string) error
|
||||
|
||||
tagCacheMu sync.Mutex
|
||||
tagCacheUntil time.Time
|
||||
tagCache []TagDTO
|
||||
|
||||
// GetTheme 返回当前生效的主题("dark" | "pink")。前台 /api/settings/theme 用,
|
||||
// GetTheme 返回当前生效的主题("dark" | "pink" | "sky")。前台 /api/settings/theme 用,
|
||||
// 不需要登录。无注入时返回 "dark"。
|
||||
GetTheme func() string
|
||||
}
|
||||
@@ -90,7 +93,6 @@ type VideoDTO struct {
|
||||
Dislikes int `json:"dislikes"`
|
||||
PublishedAt string `json:"publishedAt"`
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
Category string `json:"category,omitempty"`
|
||||
}
|
||||
|
||||
type TagDTO struct {
|
||||
@@ -149,18 +151,17 @@ func (s *Server) RegisterRoutes(r chi.Router, a *auth.Authenticator) {
|
||||
// 代理路由同样需要鉴权,防止绕过
|
||||
r.Get("/p/stream/{driveID}/*", s.handleStream)
|
||||
r.Get("/p/upload/{videoID}", s.handleUploadedVideo)
|
||||
r.Get("/p/spider91/{videoID}", s.handleSpider91Video)
|
||||
r.Get("/p/preview/{videoID}", s.handlePreview)
|
||||
r.Get("/p/thumb/{videoID}", s.handleThumb)
|
||||
})
|
||||
}
|
||||
|
||||
// handleGetTheme 返回当前生效的主题。无需登录。响应永远是
|
||||
// {"theme": "dark"} 或 {"theme": "pink"},便于前端无脑解析。
|
||||
// {"theme": "dark" | "pink" | "sky"},便于前端无脑解析。
|
||||
func (s *Server) handleGetTheme(w http.ResponseWriter, r *http.Request) {
|
||||
theme := "dark"
|
||||
if s.GetTheme != nil {
|
||||
if v := s.GetTheme(); v == "pink" || v == "dark" {
|
||||
if v := s.GetTheme(); v == "pink" || v == "dark" || v == "sky" {
|
||||
theme = v
|
||||
}
|
||||
}
|
||||
@@ -291,7 +292,6 @@ func (s *Server) handleList(w http.ResponseWriter, r *http.Request) {
|
||||
params := catalog.ListParams{
|
||||
Keyword: q.Get("q"),
|
||||
Tag: q.Get("tag"),
|
||||
Category: q.Get("cat"),
|
||||
Sort: sort,
|
||||
Page: page,
|
||||
PageSize: size,
|
||||
@@ -526,11 +526,9 @@ func (s *Server) handleTags(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
// shortsNextReq 客户端把当前轮已看过的 video id 列表传上来。
|
||||
// PreferredFromVideoID 来自短视频页最近一次点赞成功的视频,用于优先推荐相似标签。
|
||||
type shortsNextReq struct {
|
||||
SeenIDs []string `json:"seenIds"`
|
||||
Count int `json:"count"`
|
||||
PreferredFromVideoID string `json:"preferredFromVideoId"`
|
||||
SeenIDs []string `json:"seenIds"`
|
||||
Count int `json:"count"`
|
||||
}
|
||||
|
||||
// ShortsItemDTO 是短视频流单条的精简结构。比 VideoDTO 多 videoSrc / poster,
|
||||
@@ -548,8 +546,8 @@ type ShortsItemDTO struct {
|
||||
// - 服务器从未在 seenIds 中的可见视频里随机抽至多 count 条返回
|
||||
// - 当返回数量 < count 且小于全库可见总数时,说明本轮即将结束,
|
||||
// 返回 roundComplete=true,前端应在用户看完返回的这些后清空本地已看记录开新一轮
|
||||
// - 当 seenIds 已经覆盖全库时,本接口直接返回新一轮的随机一批
|
||||
// (传 seenIds=[] 即可让客户端在轮次完成后重新开始)
|
||||
// - 当 seenIds 真实覆盖当前全部可见视频时,本接口直接返回新一轮的随机一批
|
||||
// (不能仅看 seenIds 长度,里面可能有隐藏、删除或历史脏 ID)
|
||||
func (s *Server) handleShortsNext(w http.ResponseWriter, r *http.Request) {
|
||||
var body shortsNextReq
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
|
||||
@@ -570,22 +568,18 @@ func (s *Server) handleShortsNext(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// 如果客户端已看记录已经 ≥ 全库,则视为新一轮,直接忽略 seenIds
|
||||
exclude := body.SeenIDs
|
||||
if total > 0 && len(exclude) >= total {
|
||||
exclude = nil
|
||||
}
|
||||
|
||||
var items []*catalog.Video
|
||||
if strings.TrimSpace(body.PreferredFromVideoID) != "" {
|
||||
items, err = s.Catalog.RandomVideosForPreferredVideoExcluding(r.Context(), body.PreferredFromVideoID, exclude, count)
|
||||
} else {
|
||||
items, err = s.Catalog.RandomVideosExcluding(r.Context(), exclude, count)
|
||||
}
|
||||
items, err := s.Catalog.RandomVideosExcluding(r.Context(), body.SeenIDs, count)
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
if total > 0 && len(items) == 0 && len(body.SeenIDs) > 0 {
|
||||
items, err = s.Catalog.RandomVideosExcluding(r.Context(), nil, count)
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// 注入 sourceLabel 以便前端展示来源网盘
|
||||
driveLabels := make(map[string]string)
|
||||
@@ -687,7 +681,14 @@ func (s *Server) handleView(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
func (s *Server) handleHideVideo(w http.ResponseWriter, r *http.Request) {
|
||||
id := routeParam(r, "id")
|
||||
if err := s.Catalog.HideVideo(r.Context(), id); err != nil {
|
||||
var err error
|
||||
if s.OnHideVideo != nil {
|
||||
// 走拉黑逻辑:删记录 + 删本地封面/预览 + 写墓碑,保留网盘源文件。
|
||||
err = s.OnHideVideo(r.Context(), id)
|
||||
} else {
|
||||
err = s.Catalog.HideVideo(r.Context(), id)
|
||||
}
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
writeErr(w, http.StatusNotFound, err)
|
||||
return
|
||||
@@ -828,44 +829,6 @@ func (s *Server) handleUploadedVideo(w http.ResponseWriter, r *http.Request) {
|
||||
http.ServeFile(w, r, path)
|
||||
}
|
||||
|
||||
// handleSpider91Video 服务 spider91 drive 下载到本地的视频文件。
|
||||
// 路径形如 /p/spider91/<videoID>,videoID = "spider91-<driveID>-<sourceID>"。
|
||||
// 通过 catalog 拿到 file_id("<sourceID>.mp4"),再让 driver 解析到绝对路径并 ServeFile。
|
||||
func (s *Server) handleSpider91Video(w http.ResponseWriter, r *http.Request) {
|
||||
videoID := routeParam(r, "videoID")
|
||||
v, err := s.Catalog.GetVideo(r.Context(), videoID)
|
||||
if err != nil || v.Hidden {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
if s.Proxy == nil || s.Proxy.Registry == nil {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
d, ok := s.Proxy.Registry.Get(v.DriveID)
|
||||
if !ok || d.Kind() != spider91.Kind {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
sd, ok := d.(*spider91.Driver)
|
||||
if !ok {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
path, err := sd.VideoPath(v.FileID)
|
||||
if err != nil {
|
||||
http.Error(w, "invalid video id", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
info, err := os.Stat(path)
|
||||
if err != nil || info.IsDir() || info.Size() == 0 {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
w.Header().Set("Cache-Control", "private, max-age=300")
|
||||
http.ServeFile(w, r, path)
|
||||
}
|
||||
|
||||
func (s *Server) handlePreview(w http.ResponseWriter, r *http.Request) {
|
||||
videoID := routeParam(r, "videoID")
|
||||
v, err := s.Catalog.GetVideo(r.Context(), videoID)
|
||||
@@ -944,7 +907,6 @@ func mapVideo(v *catalog.Video) VideoDTO {
|
||||
Dislikes: v.Dislikes,
|
||||
PublishedAt: v.PublishedAt.Format("2006-01-02"),
|
||||
Tags: tags,
|
||||
Category: v.Category,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -970,17 +932,21 @@ func thumbnailURL(v *catalog.Video) string {
|
||||
return base + "?v=" + strconv.FormatInt(v.UpdatedAt.UnixMilli(), 10)
|
||||
}
|
||||
|
||||
// transcodedSource 在视频有就绪的浏览器兼容性转码产物时返回产物的播放地址。
|
||||
// 产物和原始文件在同一个 drive 上,走同一条 /p/stream 代理/302 链路。
|
||||
func transcodedSource(v *catalog.Video) (string, bool) {
|
||||
if v.TranscodeStatus == "ready" && v.TranscodedFileID != "" && v.DriveID != localUploadDriveID {
|
||||
return fmt.Sprintf("/p/stream/%s/%s", pathSegment(v.DriveID), pathSegment(v.TranscodedFileID)), true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func (s *Server) videoSource(v *catalog.Video) string {
|
||||
if v.DriveID == localUploadDriveID {
|
||||
return "/p/upload/" + pathSegment(v.ID)
|
||||
}
|
||||
if s.Proxy != nil && s.Proxy.Registry != nil {
|
||||
if d, ok := s.Proxy.Registry.Get(v.DriveID); ok {
|
||||
switch d.Kind() {
|
||||
case spider91.Kind:
|
||||
return "/p/spider91/" + pathSegment(v.ID)
|
||||
}
|
||||
}
|
||||
if src, ok := transcodedSource(v); ok {
|
||||
return src
|
||||
}
|
||||
return fmt.Sprintf("/p/stream/%s/%s", pathSegment(v.DriveID), pathSegment(v.FileID))
|
||||
}
|
||||
@@ -991,6 +957,9 @@ func videoSource(v *catalog.Video) string {
|
||||
if v.DriveID == localUploadDriveID {
|
||||
return "/p/upload/" + pathSegment(v.ID)
|
||||
}
|
||||
if src, ok := transcodedSource(v); ok {
|
||||
return src
|
||||
}
|
||||
return fmt.Sprintf("/p/stream/%s/%s", pathSegment(v.DriveID), pathSegment(v.FileID))
|
||||
}
|
||||
|
||||
@@ -1048,14 +1017,14 @@ func driveKindLabel(kind string) string {
|
||||
return "PikPak"
|
||||
case "wopan":
|
||||
return "联通网盘"
|
||||
case "guangyapan":
|
||||
return "光鸭网盘"
|
||||
case "onedrive":
|
||||
return "OneDrive"
|
||||
case "googledrive":
|
||||
return "Google Drive"
|
||||
case localstorage.Kind:
|
||||
return "本地存储"
|
||||
case spider91.Kind:
|
||||
return "91 爬虫"
|
||||
default:
|
||||
return kind
|
||||
}
|
||||
|
||||
@@ -498,6 +498,68 @@ func TestHandleListLatestPrefersReadyThumbnails(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleListIgnoresCategoryQueryAndDoesNotExposeCategory(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*catalog.Video{
|
||||
{
|
||||
ID: "video-a",
|
||||
DriveID: "drive",
|
||||
FileID: "file-a",
|
||||
Title: "A",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
{
|
||||
ID: "video-b",
|
||||
DriveID: "drive",
|
||||
FileID: "file-b",
|
||||
Title: "B",
|
||||
PublishedAt: now.Add(-time.Hour),
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed video %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
rr := httptest.NewRecorder()
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/list?page=1&size=24&cat=alpha", nil)
|
||||
(&Server{Catalog: cat}).handleList(rr, req)
|
||||
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
var got struct {
|
||||
Items []map[string]any `json:"items"`
|
||||
Total int `json:"total"`
|
||||
}
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if got.Total != 2 || len(got.Items) != 2 {
|
||||
t.Fatalf("response total/items = %d/%d, want 2/2", got.Total, len(got.Items))
|
||||
}
|
||||
for _, item := range got.Items {
|
||||
if _, ok := item["category"]; ok {
|
||||
t.Fatalf("list response exposed category: %#v", item)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleUploadVideoSavesFileVideoTagsAndQueuesPreview(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
@@ -763,7 +825,6 @@ func TestHandleTagsReturnsUnifiedTagPool(t *testing.T) {
|
||||
FileID: "file-1",
|
||||
Title: "清纯女大后入",
|
||||
Tags: []string{"后入", "女大"},
|
||||
Category: "random-category",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -810,7 +871,7 @@ func TestHandleTagsReturnsUnifiedTagPool(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleShortsNextUsesPreferredVideoLeastPopulatedTag(t *testing.T) {
|
||||
func TestHandleShortsNextReturnsRandomBatchExcludingSeen(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -834,7 +895,7 @@ func TestHandleShortsNextUsesPreferredVideoLeastPopulatedTag(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/shorts/next", strings.NewReader(`{"seenIds":["current"],"count":3,"preferredFromVideoId":"current"}`))
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/shorts/next", strings.NewReader(`{"seenIds":["current"],"count":3}`))
|
||||
rr := httptest.NewRecorder()
|
||||
(&Server{Catalog: cat}).handleShortsNext(rr, req)
|
||||
|
||||
@@ -857,10 +918,7 @@ func TestHandleShortsNextUsesPreferredVideoLeastPopulatedTag(t *testing.T) {
|
||||
t.Fatalf("total = %d, want 4", got.Total)
|
||||
}
|
||||
if got.RoundComplete {
|
||||
t.Fatalf("roundComplete = true, want false with fallback-filled batch")
|
||||
}
|
||||
if !containsString(ids, "rare-1") {
|
||||
t.Fatalf("ids = %#v, want rare-1 from least populated tag", ids)
|
||||
t.Fatalf("roundComplete = true, want false with a full remaining batch")
|
||||
}
|
||||
if containsString(ids, "current") {
|
||||
t.Fatalf("ids = %#v, should exclude current", ids)
|
||||
@@ -868,6 +926,76 @@ func TestHandleShortsNextUsesPreferredVideoLeastPopulatedTag(t *testing.T) {
|
||||
if len(ids) != 3 {
|
||||
t.Fatalf("ids = %#v, want 3 items", ids)
|
||||
}
|
||||
for _, want := range []string{"common-1", "common-2", "rare-1"} {
|
||||
if !containsString(ids, want) {
|
||||
t.Fatalf("ids = %#v, want remaining id %s", ids, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleShortsNextDoesNotResetForStaleSeenIDs(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*catalog.Video{
|
||||
{ID: "seen-1", DriveID: "drive", FileID: "f-seen-1", Title: "seen 1", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "fresh-1", DriveID: "drive", FileID: "f-fresh-1", Title: "fresh 1", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "fresh-2", DriveID: "drive", FileID: "f-fresh-2", Title: "fresh 2", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "hidden-1", DriveID: "drive", FileID: "f-hidden-1", Title: "hidden 1", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
if err := cat.HideVideo(ctx, "hidden-1"); err != nil {
|
||||
t.Fatalf("hide hidden-1: %v", err)
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/shorts/next", strings.NewReader(`{"seenIds":["seen-1","hidden-1","deleted-stale"],"count":3}`))
|
||||
rr := httptest.NewRecorder()
|
||||
(&Server{Catalog: cat}).handleShortsNext(rr, req)
|
||||
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
var got struct {
|
||||
Items []ShortsItemDTO `json:"items"`
|
||||
Total int `json:"total"`
|
||||
RoundComplete bool `json:"roundComplete"`
|
||||
}
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
ids := make([]string, 0, len(got.Items))
|
||||
for _, item := range got.Items {
|
||||
ids = append(ids, item.ID)
|
||||
}
|
||||
if got.Total != 3 {
|
||||
t.Fatalf("total = %d, want 3", got.Total)
|
||||
}
|
||||
if !got.RoundComplete {
|
||||
t.Fatalf("roundComplete = false, want true after returning all unviewed visible videos")
|
||||
}
|
||||
if containsString(ids, "seen-1") || containsString(ids, "hidden-1") {
|
||||
t.Fatalf("ids = %#v, should not reset and return seen or hidden videos", ids)
|
||||
}
|
||||
for _, want := range []string{"fresh-1", "fresh-2"} {
|
||||
if !containsString(ids, want) {
|
||||
t.Fatalf("ids = %#v, want %s", ids, want)
|
||||
}
|
||||
}
|
||||
if len(ids) != 2 {
|
||||
t.Fatalf("ids = %#v, want exactly the two unviewed visible videos", ids)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleUpdateVideoTagsRejectsUnknownTags(t *testing.T) {
|
||||
|
||||
@@ -51,38 +51,44 @@ func (c *Catalog) Close() error { return c.db.Close() }
|
||||
// ---------- Video ----------
|
||||
|
||||
type Video struct {
|
||||
ID string `json:"id"`
|
||||
DriveID string `json:"driveId"`
|
||||
FileID string `json:"fileId"`
|
||||
FileName string `json:"fileName"`
|
||||
ContentHash string `json:"contentHash"`
|
||||
SampledSHA256 string `json:"sampledSha256"`
|
||||
FingerprintStatus string `json:"fingerprintStatus"`
|
||||
FingerprintError string `json:"fingerprintError"`
|
||||
ParentID string `json:"parentId"`
|
||||
Title string `json:"title"`
|
||||
Author string `json:"author"`
|
||||
Tags []string `json:"tags"`
|
||||
DurationSeconds int `json:"durationSeconds"`
|
||||
Size int64 `json:"size"`
|
||||
Ext string `json:"ext"`
|
||||
Quality string `json:"quality"`
|
||||
ThumbnailURL string `json:"thumbnailUrl"`
|
||||
PreviewFileID string `json:"previewFileId"`
|
||||
PreviewLocal string `json:"previewLocal"`
|
||||
PreviewStatus string `json:"previewStatus"`
|
||||
Views int `json:"views"`
|
||||
Favorites int `json:"favorites"`
|
||||
Comments int `json:"comments"`
|
||||
Likes int `json:"likes"`
|
||||
Dislikes int `json:"dislikes"`
|
||||
Category string `json:"category"`
|
||||
Hidden bool `json:"hidden"`
|
||||
Badges []string `json:"badges"`
|
||||
Description string `json:"description"`
|
||||
PublishedAt time.Time `json:"publishedAt"`
|
||||
CreatedAt time.Time `json:"createdAt"`
|
||||
UpdatedAt time.Time `json:"updatedAt"`
|
||||
ID string `json:"id"`
|
||||
DriveID string `json:"driveId"`
|
||||
FileID string `json:"fileId"`
|
||||
FileName string `json:"fileName"`
|
||||
ContentHash string `json:"contentHash"`
|
||||
SampledSHA256 string `json:"sampledSha256"`
|
||||
FingerprintStatus string `json:"fingerprintStatus"`
|
||||
FingerprintError string `json:"fingerprintError"`
|
||||
ParentID string `json:"parentId"`
|
||||
Title string `json:"title"`
|
||||
Author string `json:"author"`
|
||||
Tags []string `json:"tags"`
|
||||
DurationSeconds int `json:"durationSeconds"`
|
||||
Size int64 `json:"size"`
|
||||
Ext string `json:"ext"`
|
||||
Quality string `json:"quality"`
|
||||
ThumbnailURL string `json:"thumbnailUrl"`
|
||||
PreviewFileID string `json:"previewFileId"`
|
||||
PreviewLocal string `json:"previewLocal"`
|
||||
PreviewStatus string `json:"previewStatus"`
|
||||
// TranscodeStatus:浏览器兼容性转码状态。
|
||||
// ''=未检测 / pending=已入队 / ready=已转码 / skipped=无需转码 / failed=失败。
|
||||
TranscodeStatus string `json:"transcodeStatus"`
|
||||
TranscodeError string `json:"transcodeError"`
|
||||
TranscodedFileID string `json:"transcodedFileId"`
|
||||
TranscodedSize int64 `json:"transcodedSize"`
|
||||
Views int `json:"views"`
|
||||
LastViewedAt time.Time `json:"lastViewedAt"`
|
||||
Favorites int `json:"favorites"`
|
||||
Comments int `json:"comments"`
|
||||
Likes int `json:"likes"`
|
||||
Dislikes int `json:"dislikes"`
|
||||
Hidden bool `json:"hidden"`
|
||||
Badges []string `json:"badges"`
|
||||
Description string `json:"description"`
|
||||
PublishedAt time.Time `json:"publishedAt"`
|
||||
CreatedAt time.Time `json:"createdAt"`
|
||||
UpdatedAt time.Time `json:"updatedAt"`
|
||||
}
|
||||
|
||||
func (c *Catalog) UpsertVideo(ctx context.Context, v *Video) error {
|
||||
@@ -105,16 +111,16 @@ func (c *Catalog) UpsertVideo(ctx context.Context, v *Video) error {
|
||||
INSERT INTO videos (
|
||||
id, drive_id, file_id, file_name, content_hash, sampled_sha256, fingerprint_status, fingerprint_error, parent_id, title, author, tags,
|
||||
duration_seconds, size_bytes, ext, quality, thumbnail_url, thumbnail_status,
|
||||
preview_file_id, preview_local, preview_status,
|
||||
views, favorites, comments, likes, dislikes,
|
||||
category, hidden, badges, description, published_at, created_at, updated_at
|
||||
) VALUES (
|
||||
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
|
||||
?, ?, ?, ?, ?, CASE WHEN COALESCE(?, '') != '' THEN 'ready' ELSE 'pending' END,
|
||||
?, ?, ?,
|
||||
?, ?, ?, ?, ?,
|
||||
?, ?, ?, ?, ?, ?, ?
|
||||
)
|
||||
preview_file_id, preview_local, preview_status,
|
||||
views, last_viewed_at, favorites, comments, likes, dislikes,
|
||||
hidden, badges, description, published_at, created_at, updated_at
|
||||
) VALUES (
|
||||
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
|
||||
?, ?, ?, ?, ?, CASE WHEN COALESCE(?, '') != '' THEN 'ready' ELSE 'pending' END,
|
||||
?, ?, ?,
|
||||
?, ?, ?, ?, ?, ?,
|
||||
?, ?, ?, ?, ?, ?
|
||||
)
|
||||
ON CONFLICT(id) DO UPDATE SET
|
||||
file_name = CASE
|
||||
WHEN excluded.file_name != '' THEN excluded.file_name
|
||||
@@ -155,16 +161,15 @@ ON CONFLICT(id) DO UPDATE SET
|
||||
WHEN COALESCE(excluded.thumbnail_url, '') != '' THEN 'ready'
|
||||
ELSE videos.thumbnail_status
|
||||
END,
|
||||
category = excluded.category,
|
||||
badges = excluded.badges,
|
||||
description = excluded.description,
|
||||
badges = excluded.badges,
|
||||
description = excluded.description,
|
||||
updated_at = excluded.updated_at
|
||||
`,
|
||||
v.ID, v.DriveID, v.FileID, v.FileName, v.ContentHash, v.SampledSHA256, fingerprintStatus, v.FingerprintError, v.ParentID, v.Title, v.Author, string(tagsJSON),
|
||||
v.DurationSeconds, v.Size, v.Ext, v.Quality, v.ThumbnailURL, v.ThumbnailURL,
|
||||
v.PreviewFileID, v.PreviewLocal, nullableStatus(v.PreviewStatus),
|
||||
v.Views, v.Favorites, v.Comments, v.Likes, v.Dislikes,
|
||||
v.Category, boolToInt(v.Hidden), string(badgesJSON), v.Description,
|
||||
v.Views, unixMilliOrZero(v.LastViewedAt), v.Favorites, v.Comments, v.Likes, v.Dislikes,
|
||||
boolToInt(v.Hidden), string(badgesJSON), v.Description,
|
||||
v.PublishedAt.UnixMilli(), v.CreatedAt.UnixMilli(), v.UpdatedAt.UnixMilli(),
|
||||
)
|
||||
if err != nil {
|
||||
@@ -190,6 +195,84 @@ func (c *Catalog) UpdatePreview(ctx context.Context, id, previewLocal, status st
|
||||
return err
|
||||
}
|
||||
|
||||
// transcodeCandidateWhereSQL 圈定"可能需要浏览器兼容性转码"的视频:
|
||||
// mp4/webm/m4v 默认浏览器可播不进候选;strm 是远程引用没有本体。
|
||||
// 其余扩展名都先入候选,由转码 worker probe 实际编码后决定转码还是跳过
|
||||
// (skipped)。failed 也保留在候选里,重新点开始转码时会自动重试。
|
||||
const transcodeCandidateWhereSQL = `COALESCE(ext, '') NOT IN ('mp4', 'webm', 'm4v', 'strm')
|
||||
AND COALESCE(transcode_status, '') IN ('', 'pending', 'failed')`
|
||||
|
||||
// ListTranscodeCandidates 列出某盘所有转码候选视频。limit<=0 表示不限制。
|
||||
func (c *Catalog) ListTranscodeCandidates(ctx context.Context, driveID string, limit int) ([]*Video, error) {
|
||||
query := `SELECT ` + allVideoCols + ` FROM videos
|
||||
WHERE drive_id = ? AND ` + transcodeCandidateWhereSQL + `
|
||||
ORDER BY created_at ASC, id ASC`
|
||||
args := []any{driveID}
|
||||
if limit > 0 {
|
||||
query += ` LIMIT ?`
|
||||
args = append(args, limit)
|
||||
}
|
||||
rows, err := c.db.QueryContext(ctx, query, args...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var out []*Video
|
||||
for rows.Next() {
|
||||
v, err := scanVideo(rows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, v)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// UpdateVideoTranscode 写回单条视频的转码结果。
|
||||
// status=ready 时 transcodedFileID/transcodedSize 指向转码产物;
|
||||
// 其它 status 调用方应传空值,本函数会按传入值原样覆盖。
|
||||
func (c *Catalog) UpdateVideoTranscode(ctx context.Context, id, status, errMsg, transcodedFileID string, transcodedSize int64) error {
|
||||
_, err := c.db.ExecContext(ctx,
|
||||
`UPDATE videos SET transcode_status = ?, transcode_error = ?, transcoded_file_id = ?, transcoded_size = ?, updated_at = ? WHERE id = ?`,
|
||||
status, errMsg, transcodedFileID, transcodedSize, time.Now().UnixMilli(), id)
|
||||
return err
|
||||
}
|
||||
|
||||
// DriveTranscodeCounts 是单盘的转码进度统计。
|
||||
type DriveTranscodeCounts struct {
|
||||
// Pending 是仍在候选集合里、还没有出结果的数量(含从未检测过的)。
|
||||
Pending int
|
||||
Ready int
|
||||
Failed int
|
||||
Skipped int
|
||||
}
|
||||
|
||||
func (c *Catalog) CountTranscodesByDrive(ctx context.Context) (map[string]DriveTranscodeCounts, error) {
|
||||
rows, err := c.db.QueryContext(ctx, `
|
||||
SELECT drive_id,
|
||||
COUNT(CASE WHEN COALESCE(ext, '') NOT IN ('mp4', 'webm', 'm4v', 'strm')
|
||||
AND COALESCE(transcode_status, '') IN ('', 'pending') THEN 1 END) AS pending_count,
|
||||
COUNT(CASE WHEN COALESCE(transcode_status, '') = 'ready' THEN 1 END) AS ready_count,
|
||||
COUNT(CASE WHEN COALESCE(transcode_status, '') = 'failed' THEN 1 END) AS failed_count,
|
||||
COUNT(CASE WHEN COALESCE(transcode_status, '') = 'skipped' THEN 1 END) AS skipped_count
|
||||
FROM videos
|
||||
GROUP BY drive_id`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
out := make(map[string]DriveTranscodeCounts)
|
||||
for rows.Next() {
|
||||
var driveID string
|
||||
var counts DriveTranscodeCounts
|
||||
if err := rows.Scan(&driveID, &counts.Pending, &counts.Ready, &counts.Failed, &counts.Skipped); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out[driveID] = counts
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
func (c *Catalog) HideVideo(ctx context.Context, id string) error {
|
||||
res, err := c.db.ExecContext(ctx,
|
||||
`UPDATE videos SET hidden = 1, updated_at = ? WHERE id = ?`,
|
||||
@@ -203,10 +286,30 @@ func (c *Catalog) HideVideo(ctx context.Context, id string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// MigrateVideoToDrive 把 catalog 里 id=videoID 这条视频迁移到另一个 drive。
|
||||
// 用于 spider91 → PikPak 的迁移:上传成功后改写 drive_id / file_id /
|
||||
// content_hash,保留视频自身的 id(spider91-<driveID>-<sourceID>),这样
|
||||
// 关联表 (video_tags / 收藏 / 点赞) 都不需要动。
|
||||
// ListHiddenVideos 返回所有被标记隐藏(hidden=1)的视频。
|
||||
// 仅用于一次性把历史「隐藏」视频迁移为黑名单墓碑——隐藏机制已废弃,
|
||||
// 前台「不再展示」改走拉黑逻辑。
|
||||
func (c *Catalog) ListHiddenVideos(ctx context.Context) ([]*Video, error) {
|
||||
rows, err := c.db.QueryContext(ctx,
|
||||
`SELECT `+allVideoCols+` FROM videos WHERE COALESCE(hidden, 0) = 1`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var out []*Video
|
||||
for rows.Next() {
|
||||
v, err := scanVideo(rows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, v)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// MigrateVideoToDrive rewrites a crawler video row after it has been uploaded
|
||||
// to another drive. The video id is preserved so tags, favorites, likes and
|
||||
// view records keep pointing at the same logical video.
|
||||
//
|
||||
// scanner 后续看到 PikPak 目录下相同 hash / file_name 的文件时,会通过
|
||||
// findDuplicate 命中本行,不会再插入重复行。
|
||||
@@ -232,8 +335,8 @@ func (c *Catalog) MigrateVideoToDrive(ctx context.Context, videoID, newDriveID,
|
||||
}
|
||||
|
||||
// ListVideosByDriveID 列出指定 drive 下所有未隐藏的视频,按 published_at 倒序。
|
||||
// 给 spider91 → 115/PikPak 迁移 worker 用:扫描 spider91 drive 下所有视频,
|
||||
// 检查哪些还有本地文件,依次上传到目标盘。
|
||||
// crawler upload worker uses this to find local crawler rows before uploading
|
||||
// them to their configured target drive.
|
||||
func (c *Catalog) ListVideosByDriveID(ctx context.Context, driveID string, limit int) ([]*Video, error) {
|
||||
if driveID == "" {
|
||||
return nil, fmt.Errorf("catalog: list videos by drive: empty drive id")
|
||||
@@ -318,9 +421,10 @@ func (c *Catalog) IncrementView(ctx context.Context, id string) (int, error) {
|
||||
return 0, err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
now := time.Now().UnixMilli()
|
||||
res, err := tx.ExecContext(ctx,
|
||||
`UPDATE videos SET views = views + 1, updated_at = ? WHERE id = ?`,
|
||||
time.Now().UnixMilli(), id)
|
||||
`UPDATE videos SET views = views + 1, last_viewed_at = ?, updated_at = ? WHERE id = ?`,
|
||||
now, now, id)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
@@ -343,9 +447,12 @@ type VideoMetaPatch struct {
|
||||
ThumbnailStatus string
|
||||
ResetThumbnailFailures bool
|
||||
DurationSeconds int
|
||||
Category string
|
||||
ContentHash string
|
||||
FileName string
|
||||
Title string
|
||||
TitleSet bool
|
||||
Author string
|
||||
AuthorSet bool
|
||||
Tags []string
|
||||
TagsSet bool
|
||||
}
|
||||
@@ -383,10 +490,6 @@ func (c *Catalog) UpdateVideoMeta(ctx context.Context, id string, p VideoMetaPat
|
||||
parts = append(parts, "duration_seconds = ?")
|
||||
args = append(args, p.DurationSeconds)
|
||||
}
|
||||
if p.Category != "" {
|
||||
parts = append(parts, "category = ?")
|
||||
args = append(args, p.Category)
|
||||
}
|
||||
if p.ContentHash != "" {
|
||||
parts = append(parts, "content_hash = ?")
|
||||
args = append(args, normalizeContentHash(p.ContentHash))
|
||||
@@ -395,6 +498,14 @@ func (c *Catalog) UpdateVideoMeta(ctx context.Context, id string, p VideoMetaPat
|
||||
parts = append(parts, "file_name = ?")
|
||||
args = append(args, p.FileName)
|
||||
}
|
||||
if p.TitleSet {
|
||||
parts = append(parts, "title = ?")
|
||||
args = append(args, p.Title)
|
||||
}
|
||||
if p.AuthorSet {
|
||||
parts = append(parts, "author = ?")
|
||||
args = append(args, p.Author)
|
||||
}
|
||||
if p.TagsSet {
|
||||
tagsJSON, _ := json.Marshal(p.Tags)
|
||||
parts = append(parts, "tags = ?")
|
||||
@@ -448,35 +559,6 @@ func (c *Catalog) IncrementThumbnailFailures(ctx context.Context, id string) (in
|
||||
return failures, nil
|
||||
}
|
||||
|
||||
// ListCategories 聚合所有 category,按视频数降序
|
||||
type CategoryStat struct {
|
||||
Category string
|
||||
Count int
|
||||
}
|
||||
|
||||
func (c *Catalog) ListCategories(ctx context.Context) ([]CategoryStat, error) {
|
||||
rows, err := c.db.QueryContext(ctx,
|
||||
`SELECT COALESCE(category, '') AS c, COUNT(*) AS cnt
|
||||
FROM videos
|
||||
WHERE category IS NOT NULL AND category != ''
|
||||
AND COALESCE(hidden, 0) = 0
|
||||
GROUP BY c
|
||||
ORDER BY cnt DESC, c ASC`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var out []CategoryStat
|
||||
for rows.Next() {
|
||||
var s CategoryStat
|
||||
if err := rows.Scan(&s.Category, &s.Count); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, s)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
type TagStat struct {
|
||||
Label string
|
||||
Count int
|
||||
@@ -640,6 +722,29 @@ func (c *Catalog) ListVideosByDrive(ctx context.Context, driveID string) ([]*Vid
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// ListVideoMaintenanceCandidates returns all current catalog videos without the
|
||||
// public listing dedupe filter. Nightly maintenance needs to see duplicate rows
|
||||
// that ListVideos intentionally hides from the frontend.
|
||||
func (c *Catalog) ListVideoMaintenanceCandidates(ctx context.Context) ([]*Video, error) {
|
||||
rows, err := c.db.QueryContext(ctx,
|
||||
`SELECT `+allVideoCols+` FROM videos
|
||||
WHERE COALESCE(hidden, 0) = 0
|
||||
ORDER BY created_at ASC, id ASC`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var out []*Video
|
||||
for rows.Next() {
|
||||
v, err := scanVideo(rows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, v)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
func (c *Catalog) ListVideosByIDPrefix(ctx context.Context, prefix string) ([]*Video, error) {
|
||||
prefix = strings.TrimSpace(prefix)
|
||||
if prefix == "" {
|
||||
@@ -711,21 +816,6 @@ func (c *Catalog) ListVideoFileIDsByDrive(ctx context.Context, driveID string) (
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// ListSpider91Viewkeys 列出某个 spider91 drive 历史上爬过的所有 ID 后缀。
|
||||
// 函数名保留历史叫法;新 spider91 数据的后缀是 91 mp4 源 ID,不再是 viewkey。
|
||||
//
|
||||
// 不能再用 ListVideoFileIDsByDrive:那个只看 drive_id,但 spider91 视频
|
||||
// 一旦被 spider91migrate 迁移到 PikPak,drive_id 就变成 PikPak 了。
|
||||
//
|
||||
// 这里按 video.id 前缀 "spider91-<driveID>-" 查,即使迁移后视频也仍能被
|
||||
// 找到——id 本身会保留 "spider91-<driveID>-<sourceID>" 这个来源前缀。
|
||||
//
|
||||
// 用途:crawler 把这个集合写到 seen 文件,让 Python/Go 跳过已爬过的视频,
|
||||
// 配合 --target-new 真正凑出 N 个未爬过的视频。
|
||||
func (c *Catalog) ListSpider91Viewkeys(ctx context.Context, driveID string) ([]string, error) {
|
||||
return c.ListCrawlerSourceIDs(ctx, "spider91", driveID)
|
||||
}
|
||||
|
||||
// ListCrawlerSourceIDs lists source IDs that were already imported by a
|
||||
// crawler-like drive. It reads both videos and deleted_videos so explicit admin
|
||||
// deletions remain tombstoned for future crawler runs.
|
||||
@@ -802,10 +892,19 @@ ON CONFLICT(kind, drive_id, source_id) DO UPDATE SET
|
||||
return err
|
||||
}
|
||||
|
||||
// DeleteVideoWithTombstone records that an administrator explicitly deleted a
|
||||
// video, then removes the visible catalog row. The tombstone is used by
|
||||
// scanners/crawlers to avoid importing the same source file again.
|
||||
const DeletedVideoReasonDuplicate = "duplicate"
|
||||
|
||||
// DeleteVideoWithTombstone records that a video was removed, then removes the
|
||||
// visible catalog row. The tombstone is used by scanners/crawlers to avoid
|
||||
// importing the same source file again.
|
||||
func (c *Catalog) DeleteVideoWithTombstone(ctx context.Context, id string) error {
|
||||
return c.DeleteVideoWithTombstoneReason(ctx, id, "")
|
||||
}
|
||||
|
||||
// DeleteVideoWithTombstoneReason is the same tombstone path with an optional
|
||||
// machine reason for admin UI hints. Empty reason means user/admin initiated.
|
||||
func (c *Catalog) DeleteVideoWithTombstoneReason(ctx context.Context, id, reason string) error {
|
||||
reason = normalizeDeletedVideoReason(reason)
|
||||
tx, err := c.db.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -829,7 +928,7 @@ SELECT id, drive_id, file_id, COALESCE(content_hash, ''), COALESCE(file_name, ''
|
||||
}
|
||||
v.ContentHash = normalizeContentHash(v.ContentHash)
|
||||
|
||||
// 先记录这次视频关联的 tag_id,便于事务末尾清理孤儿 collection 标签。
|
||||
// 先记录这次视频关联的 tag_id,便于事务末尾清理旧版本遗留的孤儿 collection 标签。
|
||||
tagIDs, err := collectVideoTagIDs(ctx, tx, id)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -837,16 +936,17 @@ SELECT id, drive_id, file_id, COALESCE(content_hash, ''), COALESCE(file_name, ''
|
||||
|
||||
now := time.Now().UnixMilli()
|
||||
if _, err := tx.ExecContext(ctx, `
|
||||
INSERT INTO deleted_videos (id, drive_id, file_id, content_hash, file_name, size_bytes, deleted_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
INSERT INTO deleted_videos (id, drive_id, file_id, content_hash, file_name, size_bytes, reason, deleted_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(id) DO UPDATE SET
|
||||
drive_id = excluded.drive_id,
|
||||
file_id = excluded.file_id,
|
||||
content_hash = excluded.content_hash,
|
||||
file_name = excluded.file_name,
|
||||
size_bytes = excluded.size_bytes,
|
||||
reason = excluded.reason,
|
||||
deleted_at = excluded.deleted_at`,
|
||||
v.ID, v.DriveID, v.FileID, v.ContentHash, v.FileName, v.Size, now); err != nil {
|
||||
v.ID, v.DriveID, v.FileID, v.ContentHash, v.FileName, v.Size, reason, now); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := tx.ExecContext(ctx, `DELETE FROM video_tags WHERE video_id = ?`, id); err != nil {
|
||||
@@ -872,7 +972,7 @@ func (c *Catalog) DeleteVideo(ctx context.Context, id string) error {
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
// 先记录这次视频关联的 tag_id,便于事务末尾清理孤儿 collection 标签
|
||||
// 先记录这次视频关联的 tag_id,便于事务末尾清理旧版本遗留的孤儿 collection 标签。
|
||||
tagIDs, err := collectVideoTagIDs(ctx, tx, id)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -889,7 +989,7 @@ func (c *Catalog) DeleteVideo(ctx context.Context, id string) error {
|
||||
return sql.ErrNoRows
|
||||
}
|
||||
|
||||
// collection 标签是 scanner 按目录名机器生成的;视频删完后若不再被引用就一起回收。
|
||||
// collection 标签来自旧版本按目录名生成的标签;视频删完后若不再被引用就一起回收。
|
||||
// system / user / auto / legacy 不在此处删除,避免破坏管理员手动维护的标签语义。
|
||||
if err := pruneOrphanCollectionTagsByID(ctx, tx, tagIDs); err != nil {
|
||||
return err
|
||||
@@ -898,6 +998,97 @@ func (c *Catalog) DeleteVideo(ctx context.Context, id string) error {
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
// DeletedVideo 是黑名单(墓碑)表里的一条记录。原始视频行已删除,
|
||||
// 这里只保留扫盘去重和后台展示需要的最小字段;没有 title/封面/作者。
|
||||
type DeletedVideo struct {
|
||||
ID string `json:"id"`
|
||||
DriveID string `json:"driveId"`
|
||||
FileID string `json:"fileId"`
|
||||
FileName string `json:"fileName"`
|
||||
Size int64 `json:"size"`
|
||||
Reason string `json:"reason"`
|
||||
DeletedAt int64 `json:"deletedAt"` // unix 毫秒
|
||||
}
|
||||
|
||||
// ListDeletedVideos 分页列出黑名单视频,按拉黑时间倒序。
|
||||
// Keyword 非空时按文件名模糊匹配,DriveID 非空时限定来源网盘。
|
||||
func (c *Catalog) ListDeletedVideos(ctx context.Context, p ListParams) ([]*DeletedVideo, int, error) {
|
||||
if p.PageSize <= 0 {
|
||||
p.PageSize = 50
|
||||
}
|
||||
if p.Page <= 0 {
|
||||
p.Page = 1
|
||||
}
|
||||
var where []string
|
||||
var args []any
|
||||
if kw := strings.TrimSpace(p.Keyword); kw != "" {
|
||||
where = append(where, "file_name LIKE ?")
|
||||
args = append(args, "%"+kw+"%")
|
||||
}
|
||||
if driveID := strings.TrimSpace(p.DriveID); driveID != "" {
|
||||
where = append(where, "drive_id = ?")
|
||||
args = append(args, driveID)
|
||||
}
|
||||
whereSQL := ""
|
||||
if len(where) > 0 {
|
||||
whereSQL = " WHERE " + strings.Join(where, " AND ")
|
||||
}
|
||||
|
||||
var total int
|
||||
if err := c.db.QueryRowContext(ctx, `SELECT COUNT(*) FROM deleted_videos`+whereSQL, args...).Scan(&total); err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
offset := (p.Page - 1) * p.PageSize
|
||||
rows, err := c.db.QueryContext(ctx,
|
||||
`SELECT id, COALESCE(drive_id, ''), COALESCE(file_id, ''), COALESCE(file_name, ''), COALESCE(size_bytes, 0), COALESCE(reason, ''), deleted_at
|
||||
FROM deleted_videos`+whereSQL+`
|
||||
ORDER BY deleted_at DESC
|
||||
LIMIT ? OFFSET ?`,
|
||||
append(args, p.PageSize, offset)...)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var out []*DeletedVideo
|
||||
for rows.Next() {
|
||||
v := &DeletedVideo{}
|
||||
if err := rows.Scan(&v.ID, &v.DriveID, &v.FileID, &v.FileName, &v.Size, &v.Reason, &v.DeletedAt); err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
out = append(out, v)
|
||||
}
|
||||
return out, total, rows.Err()
|
||||
}
|
||||
|
||||
// RemoveDeletedVideo 把视频移出黑名单(删除墓碑)。移除后该视频会在
|
||||
// 下次扫盘/凌晨流水线时被重新发现并入库,本函数不主动触发扫描。
|
||||
func (c *Catalog) RemoveDeletedVideo(ctx context.Context, id string) error {
|
||||
res, err := c.db.ExecContext(ctx, `DELETE FROM deleted_videos WHERE id = ?`, id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if rows, err := res.RowsAffected(); err == nil && rows == 0 {
|
||||
return sql.ErrNoRows
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// VideoManagementCounts 返回后台视频管理两个标签的计数:
|
||||
// current=当前可见(与「当前视频」页一致的去重+在线盘+hidden=0 口径),
|
||||
// blacklisted=黑名单墓碑总数。
|
||||
func (c *Catalog) VideoManagementCounts(ctx context.Context) (current, blacklisted int, err error) {
|
||||
currentSQL := `SELECT COUNT(*) FROM videos WHERE COALESCE(hidden, 0) = 0 AND ` + activeDriveWhereSQL + ` AND ` + uniqueVideoWhereSQL
|
||||
if err = c.db.QueryRowContext(ctx, currentSQL).Scan(¤t); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
if err = c.db.QueryRowContext(ctx, `SELECT COUNT(*) FROM deleted_videos`).Scan(&blacklisted); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
return current, blacklisted, nil
|
||||
}
|
||||
|
||||
func (c *Catalog) IsVideoDeleted(ctx context.Context, id string) (bool, error) {
|
||||
id = strings.TrimSpace(id)
|
||||
if id == "" {
|
||||
@@ -997,6 +1188,73 @@ func (c *Catalog) FindEquivalentVideo(ctx context.Context, source *Video) (*Vide
|
||||
return scanVideo(row)
|
||||
}
|
||||
|
||||
// FindVideoBySampledFingerprint returns the earliest visible video with the
|
||||
// same file size and sampled fingerprint as source.
|
||||
func (c *Catalog) FindVideoBySampledFingerprint(ctx context.Context, source *Video) (*Video, error) {
|
||||
if source == nil || source.Size <= 0 {
|
||||
return nil, sql.ErrNoRows
|
||||
}
|
||||
sampled := normalizeContentHash(source.SampledSHA256)
|
||||
if sampled == "" {
|
||||
return nil, sql.ErrNoRows
|
||||
}
|
||||
row := c.db.QueryRowContext(ctx,
|
||||
`SELECT `+allVideoCols+` FROM videos
|
||||
WHERE id != ?
|
||||
AND COALESCE(hidden, 0) = 0
|
||||
AND COALESCE(file_id, '') != ''
|
||||
AND size_bytes = ?
|
||||
AND COALESCE(sampled_sha256, '') != ''
|
||||
AND sampled_sha256 = ?
|
||||
ORDER BY created_at ASC, id ASC
|
||||
LIMIT 1`,
|
||||
source.ID, source.Size, sampled)
|
||||
return scanVideo(row)
|
||||
}
|
||||
|
||||
// ListNearDuplicateVideoCandidates returns visible videos that are cheap
|
||||
// candidates for perceptual duplicate checking: same-ish duration and a ready
|
||||
// thumbnail URL. Callers are expected to apply title similarity and image SSIM.
|
||||
func (c *Catalog) ListNearDuplicateVideoCandidates(ctx context.Context, source *Video, durationToleranceSeconds, limit int) ([]*Video, error) {
|
||||
if source == nil || strings.TrimSpace(source.Title) == "" || source.DurationSeconds <= 0 {
|
||||
return nil, nil
|
||||
}
|
||||
if durationToleranceSeconds < 0 {
|
||||
durationToleranceSeconds = 0
|
||||
}
|
||||
if limit <= 0 {
|
||||
limit = 200
|
||||
}
|
||||
minDuration := source.DurationSeconds - durationToleranceSeconds
|
||||
if minDuration < 1 {
|
||||
minDuration = 1
|
||||
}
|
||||
maxDuration := source.DurationSeconds + durationToleranceSeconds
|
||||
rows, err := c.db.QueryContext(ctx,
|
||||
`SELECT `+allVideoCols+` FROM videos
|
||||
WHERE id != ?
|
||||
AND COALESCE(hidden, 0) = 0
|
||||
AND COALESCE(file_id, '') != ''
|
||||
AND COALESCE(thumbnail_url, '') != ''
|
||||
AND COALESCE(duration_seconds, 0) BETWEEN ? AND ?
|
||||
ORDER BY ABS(duration_seconds - ?) ASC, created_at ASC, id ASC
|
||||
LIMIT ?`,
|
||||
source.ID, minDuration, maxDuration, source.DurationSeconds, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var out []*Video
|
||||
for rows.Next() {
|
||||
v, err := scanVideo(rows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, v)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// FindEquivalentVideoOnDrive returns a visible video on driveID that represents
|
||||
// the same content as source by strong hash or sampled fingerprint.
|
||||
func (c *Catalog) FindEquivalentVideoOnDrive(ctx context.Context, source *Video, driveID string) (*Video, error) {
|
||||
@@ -1160,8 +1418,7 @@ type ListParams struct {
|
||||
Keyword string
|
||||
DriveID string
|
||||
Tag string
|
||||
Category string
|
||||
Sort string // latest | hot | week | long
|
||||
Sort string // latest | hot | recent
|
||||
ThumbnailReadyOnly bool
|
||||
PreferReadyThumbnails bool
|
||||
SkipTotal bool
|
||||
@@ -1180,9 +1437,9 @@ func (c *Catalog) ListVideos(ctx context.Context, p ListParams) ([]*Video, int,
|
||||
var where []string
|
||||
var args []any
|
||||
if p.Keyword != "" {
|
||||
where = append(where, "(title LIKE ? OR author LIKE ?)")
|
||||
where = append(where, "(title LIKE ? OR author LIKE ? OR file_name LIKE ?)")
|
||||
like := "%" + p.Keyword + "%"
|
||||
args = append(args, like, like)
|
||||
args = append(args, like, like, like)
|
||||
}
|
||||
if p.DriveID != "" {
|
||||
where = append(where, "drive_id = ?")
|
||||
@@ -1192,10 +1449,6 @@ func (c *Catalog) ListVideos(ctx context.Context, p ListParams) ([]*Video, int,
|
||||
where = append(where, videoMatchesTagLabelSQL("videos"))
|
||||
args = append(args, p.Tag)
|
||||
}
|
||||
if p.Category != "" && p.Category != "all" {
|
||||
where = append(where, "category = ?")
|
||||
args = append(args, p.Category)
|
||||
}
|
||||
if p.ThumbnailReadyOnly {
|
||||
where = append(where, "COALESCE(thumbnail_url, '') != ''")
|
||||
}
|
||||
@@ -1216,10 +1469,8 @@ func (c *Catalog) ListVideos(ctx context.Context, p ListParams) ([]*Video, int,
|
||||
case "hot":
|
||||
// 热度 = 点赞数,点赞相同按最新
|
||||
orderBy = " ORDER BY " + readyOrderPrefix + "likes DESC, published_at DESC"
|
||||
case "week":
|
||||
orderBy = " ORDER BY " + readyOrderPrefix + "likes DESC"
|
||||
case "long":
|
||||
orderBy = " ORDER BY " + readyOrderPrefix + "duration_seconds DESC"
|
||||
case "recent":
|
||||
orderBy = " ORDER BY " + readyOrderPrefix + "COALESCE(last_viewed_at, 0) DESC, published_at DESC"
|
||||
}
|
||||
|
||||
var total int
|
||||
@@ -1342,160 +1593,6 @@ func cleanVideoIDs(ids []string) []string {
|
||||
return cleaned
|
||||
}
|
||||
|
||||
func cleanTagLabels(labels []string) []string {
|
||||
seen := make(map[string]struct{}, len(labels))
|
||||
cleaned := make([]string, 0, len(labels))
|
||||
for _, label := range labels {
|
||||
label = strings.TrimSpace(label)
|
||||
if label == "" {
|
||||
continue
|
||||
}
|
||||
key := strings.ToLower(label)
|
||||
if _, ok := seen[key]; ok {
|
||||
continue
|
||||
}
|
||||
seen[key] = struct{}{}
|
||||
cleaned = append(cleaned, label)
|
||||
}
|
||||
return cleaned
|
||||
}
|
||||
|
||||
func (c *Catalog) LeastPopulatedVisibleUniqueTag(ctx context.Context, labels []string) (string, error) {
|
||||
cleaned := cleanTagLabels(labels)
|
||||
bestLabel := ""
|
||||
bestCount := 0
|
||||
for _, label := range cleaned {
|
||||
var count int
|
||||
if err := c.db.QueryRowContext(ctx,
|
||||
`SELECT COUNT(*)
|
||||
FROM videos
|
||||
WHERE COALESCE(hidden, 0) = 0
|
||||
AND `+activeDriveWhereSQL+`
|
||||
AND `+uniqueVideoWhereSQL+`
|
||||
AND EXISTS (
|
||||
SELECT 1
|
||||
FROM video_tags vt
|
||||
JOIN tags t ON t.id = vt.tag_id
|
||||
WHERE vt.video_id = videos.id
|
||||
AND t.label = ? COLLATE NOCASE
|
||||
)`,
|
||||
label,
|
||||
).Scan(&count); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if count == 0 {
|
||||
continue
|
||||
}
|
||||
if bestLabel == "" || count < bestCount {
|
||||
bestLabel = label
|
||||
bestCount = count
|
||||
}
|
||||
}
|
||||
return bestLabel, nil
|
||||
}
|
||||
|
||||
func (c *Catalog) RandomVideosByTagExcluding(ctx context.Context, tag string, excludeIDs []string, limit int) ([]*Video, error) {
|
||||
if limit <= 0 {
|
||||
return nil, nil
|
||||
}
|
||||
tag = strings.TrimSpace(tag)
|
||||
if tag == "" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
cleaned := cleanVideoIDs(excludeIDs)
|
||||
args := make([]any, 0, len(cleaned)+2)
|
||||
args = append(args, tag)
|
||||
whereSQL := `WHERE COALESCE(hidden, 0) = 0
|
||||
AND ` + activeDriveWhereSQL + `
|
||||
AND ` + uniqueVideoWhereSQL + `
|
||||
AND EXISTS (
|
||||
SELECT 1
|
||||
FROM video_tags vt
|
||||
JOIN tags t ON t.id = vt.tag_id
|
||||
WHERE vt.video_id = videos.id
|
||||
AND t.label = ? COLLATE NOCASE
|
||||
)`
|
||||
if len(cleaned) > 0 {
|
||||
placeholders := strings.Repeat("?,", len(cleaned))
|
||||
placeholders = placeholders[:len(placeholders)-1]
|
||||
whereSQL += " AND id NOT IN (" + placeholders + ")"
|
||||
for _, id := range cleaned {
|
||||
args = append(args, id)
|
||||
}
|
||||
}
|
||||
args = append(args, limit)
|
||||
|
||||
rows, err := c.db.QueryContext(ctx,
|
||||
`SELECT `+allVideoCols+` FROM videos `+whereSQL+`
|
||||
ORDER BY RANDOM() LIMIT ?`,
|
||||
args...,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var out []*Video
|
||||
for rows.Next() {
|
||||
v, err := scanVideo(rows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, v)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *Catalog) RandomVideosForPreferredVideoExcluding(ctx context.Context, preferredVideoID string, excludeIDs []string, limit int) ([]*Video, error) {
|
||||
if limit <= 0 {
|
||||
return nil, nil
|
||||
}
|
||||
preferredVideoID = strings.TrimSpace(preferredVideoID)
|
||||
if preferredVideoID == "" {
|
||||
return c.RandomVideosExcluding(ctx, excludeIDs, limit)
|
||||
}
|
||||
|
||||
preferredExclude := append([]string{}, excludeIDs...)
|
||||
preferredExclude = append(preferredExclude, preferredVideoID)
|
||||
|
||||
preferred, err := c.GetVideo(ctx, preferredVideoID)
|
||||
if err != nil || preferred == nil || preferred.Hidden || len(preferred.Tags) == 0 {
|
||||
return c.RandomVideosExcluding(ctx, preferredExclude, limit)
|
||||
}
|
||||
tag, err := c.LeastPopulatedVisibleUniqueTag(ctx, preferred.Tags)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if tag == "" {
|
||||
return c.RandomVideosExcluding(ctx, preferredExclude, limit)
|
||||
}
|
||||
|
||||
items, err := c.RandomVideosByTagExcluding(ctx, tag, preferredExclude, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(items) >= limit {
|
||||
return items, nil
|
||||
}
|
||||
|
||||
mergedExclude := make([]string, 0, len(preferredExclude)+len(items))
|
||||
mergedExclude = append(mergedExclude, preferredExclude...)
|
||||
for _, item := range items {
|
||||
if item != nil {
|
||||
mergedExclude = append(mergedExclude, item.ID)
|
||||
}
|
||||
}
|
||||
fallback, err := c.RandomVideosExcluding(ctx, mergedExclude, limit-len(items))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return append(items, fallback...), nil
|
||||
}
|
||||
|
||||
type DriveTeaserCounts struct {
|
||||
Ready int
|
||||
Pending int
|
||||
@@ -1900,7 +1997,7 @@ type Drive struct {
|
||||
Credentials map[string]string `json:"credentials,omitempty"`
|
||||
Status string `json:"status"`
|
||||
LastError string `json:"lastError,omitempty"`
|
||||
// TeaserEnabled 控制是否给本盘生成预览视频/封面。
|
||||
// TeaserEnabled 控制是否给本盘生成预览视频;封面生成不受影响。
|
||||
// 替代早期的全局 preview.enabled 开关;新建 drive 时 UpsertDrive 默认置 true。
|
||||
TeaserEnabled bool `json:"teaserEnabled"`
|
||||
// SkipDirIDs 是用户在管理后台为该盘选定的"扫描跳过目录"集合(网盘侧的目录 fileID)。
|
||||
@@ -1955,7 +2052,7 @@ func normalizeDriveRootFields(d *Drive) {
|
||||
func normalizeDriveRootID(kind, rootID string) string {
|
||||
rootID = strings.TrimSpace(rootID)
|
||||
switch kind {
|
||||
case "pikpak":
|
||||
case "pikpak", "guangyapan":
|
||||
if rootID == "0" {
|
||||
return ""
|
||||
}
|
||||
@@ -1965,7 +2062,7 @@ func normalizeDriveRootID(kind, rootID string) string {
|
||||
return "root"
|
||||
}
|
||||
return rootID
|
||||
case "localstorage", "spider91":
|
||||
case "localstorage", "scriptcrawler":
|
||||
return "/"
|
||||
default:
|
||||
if rootID == "" {
|
||||
@@ -2033,7 +2130,7 @@ func (c *Catalog) DeleteDrive(ctx context.Context, id string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// SetDriveTeaserEnabled 切换某盘的预览视频/封面生成开关。
|
||||
// SetDriveTeaserEnabled 切换某盘的预览视频生成开关。
|
||||
//
|
||||
// 与 UpsertDrive 的区别:只动 teaser_enabled + updated_at 一列,不要求调用方
|
||||
// 重传 kind / name / credentials 等容易踩坑的字段。
|
||||
@@ -2165,10 +2262,11 @@ COALESCE(sampled_sha256, ''), COALESCE(fingerprint_status, 'pending'), COALESCE(
|
||||
COALESCE(parent_id, ''), title, COALESCE(author, ''), COALESCE(tags, '[]'),
|
||||
duration_seconds, size_bytes, COALESCE(ext, ''), COALESCE(quality, ''), COALESCE(thumbnail_url, ''),
|
||||
COALESCE(preview_file_id, ''), COALESCE(preview_local, ''), COALESCE(preview_status, 'pending'),
|
||||
views, favorites, comments, likes, dislikes,
|
||||
COALESCE(category, ''), COALESCE(hidden, 0), COALESCE(badges, '[]'), COALESCE(description, ''),
|
||||
published_at, created_at, updated_at
|
||||
`
|
||||
COALESCE(transcode_status, ''), COALESCE(transcode_error, ''), COALESCE(transcoded_file_id, ''), COALESCE(transcoded_size, 0),
|
||||
views, COALESCE(last_viewed_at, 0), favorites, comments, likes, dislikes,
|
||||
COALESCE(hidden, 0), COALESCE(badges, '[]'), COALESCE(description, ''),
|
||||
published_at, created_at, updated_at
|
||||
`
|
||||
|
||||
const activeDriveWhereSQL = `(videos.drive_id = 'local-upload'
|
||||
OR EXISTS (
|
||||
@@ -2228,7 +2326,7 @@ type rowScanner interface {
|
||||
func scanVideo(row rowScanner) (*Video, error) {
|
||||
v := &Video{}
|
||||
var tagsJSON, badgesJSON string
|
||||
var publishedAt, createdAt, updatedAt int64
|
||||
var publishedAt, createdAt, updatedAt, lastViewedAt int64
|
||||
var hidden int
|
||||
err := row.Scan(
|
||||
&v.ID, &v.DriveID, &v.FileID, &v.FileName, &v.ContentHash,
|
||||
@@ -2236,8 +2334,9 @@ func scanVideo(row rowScanner) (*Video, error) {
|
||||
&v.ParentID, &v.Title, &v.Author, &tagsJSON,
|
||||
&v.DurationSeconds, &v.Size, &v.Ext, &v.Quality, &v.ThumbnailURL,
|
||||
&v.PreviewFileID, &v.PreviewLocal, &v.PreviewStatus,
|
||||
&v.Views, &v.Favorites, &v.Comments, &v.Likes, &v.Dislikes,
|
||||
&v.Category, &hidden, &badgesJSON, &v.Description,
|
||||
&v.TranscodeStatus, &v.TranscodeError, &v.TranscodedFileID, &v.TranscodedSize,
|
||||
&v.Views, &lastViewedAt, &v.Favorites, &v.Comments, &v.Likes, &v.Dislikes,
|
||||
&hidden, &badgesJSON, &v.Description,
|
||||
&publishedAt, &createdAt, &updatedAt,
|
||||
)
|
||||
if err != nil {
|
||||
@@ -2249,6 +2348,9 @@ func scanVideo(row rowScanner) (*Video, error) {
|
||||
v.PublishedAt = time.UnixMilli(publishedAt)
|
||||
v.CreatedAt = time.UnixMilli(createdAt)
|
||||
v.UpdatedAt = time.UnixMilli(updatedAt)
|
||||
if lastViewedAt > 0 {
|
||||
v.LastViewedAt = time.UnixMilli(lastViewedAt)
|
||||
}
|
||||
return v, nil
|
||||
}
|
||||
|
||||
@@ -2256,6 +2358,22 @@ func normalizeContentHash(hash string) string {
|
||||
return strings.ToLower(strings.TrimSpace(hash))
|
||||
}
|
||||
|
||||
func normalizeDeletedVideoReason(reason string) string {
|
||||
switch strings.ToLower(strings.TrimSpace(reason)) {
|
||||
case DeletedVideoReasonDuplicate:
|
||||
return DeletedVideoReasonDuplicate
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func unixMilliOrZero(t time.Time) int64 {
|
||||
if t.IsZero() {
|
||||
return 0
|
||||
}
|
||||
return t.UnixMilli()
|
||||
}
|
||||
|
||||
func boolToInt(v bool) int {
|
||||
if v {
|
||||
return 1
|
||||
|
||||
@@ -58,10 +58,11 @@ func TestUpsertDriveDefaultsRootIDByKind(t *testing.T) {
|
||||
}{
|
||||
{id: "p115", kind: "p115", want: "0"},
|
||||
{id: "pikpak", kind: "pikpak", want: ""},
|
||||
{id: "guangyapan", kind: "guangyapan", want: ""},
|
||||
{id: "onedrive", kind: "onedrive", want: "root"},
|
||||
{id: "googledrive", kind: "googledrive", want: "root"},
|
||||
{id: "localstorage", kind: "localstorage", want: "/"},
|
||||
{id: "spider91", kind: "spider91", want: "/"},
|
||||
{id: "scriptcrawler", kind: "scriptcrawler", want: "/"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
if err := cat.UpsertDrive(ctx, &Drive{
|
||||
@@ -84,7 +85,7 @@ func TestUpsertDriveDefaultsRootIDByKind(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpsertDriveIgnoresRootIDForLocalStorageAndSpider91(t *testing.T) {
|
||||
func TestUpsertDriveIgnoresRootIDForLocalStorageAndScriptCrawler(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -101,7 +102,7 @@ func TestUpsertDriveIgnoresRootIDForLocalStorageAndSpider91(t *testing.T) {
|
||||
kind string
|
||||
}{
|
||||
{id: "localstorage", kind: "localstorage"},
|
||||
{id: "spider91", kind: "spider91"},
|
||||
{id: "scriptcrawler", kind: "scriptcrawler"},
|
||||
} {
|
||||
if err := cat.UpsertDrive(ctx, &Drive{
|
||||
ID: tc.id,
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestListVideoFileIDsByDrive 校验 spider91 crawler 用到的轻量 file_id 查询:
|
||||
// TestListVideoFileIDsByDrive 校验上传 worker 用到的轻量 file_id 查询:
|
||||
// - 只返回指定 drive 的 file_id;不返回其它 drive 的
|
||||
// - 跳过 file_id 为空的视频
|
||||
// - 返回顺序无要求,但每个 file_id 只出现一次
|
||||
@@ -33,20 +33,20 @@ func TestListVideoFileIDsByDrive(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
insert("spider91-A-vk001", "spider-a", "vk001.mp4")
|
||||
insert("spider91-A-vk002", "spider-a", "vk002.flv")
|
||||
insert("spider91-A-vk003", "spider-a", "vk003.mp4")
|
||||
insert("scriptcrawler-A-source001", "crawler-a", "source001.mp4")
|
||||
insert("scriptcrawler-A-source002", "crawler-a", "source002.flv")
|
||||
insert("scriptcrawler-A-source003", "crawler-a", "source003.mp4")
|
||||
// 不同 drive 的视频不应出现
|
||||
insert("quark-other-fid", "drive-quark", "abcdef")
|
||||
// 空 file_id 应被过滤
|
||||
insert("spider91-A-empty", "spider-a", "")
|
||||
insert("scriptcrawler-A-empty", "crawler-a", "")
|
||||
|
||||
got, err := cat.ListVideoFileIDsByDrive(ctx, "spider-a")
|
||||
got, err := cat.ListVideoFileIDsByDrive(ctx, "crawler-a")
|
||||
if err != nil {
|
||||
t.Fatalf("ListVideoFileIDsByDrive: %v", err)
|
||||
}
|
||||
sort.Strings(got)
|
||||
want := []string{"vk001.mp4", "vk002.flv", "vk003.mp4"}
|
||||
want := []string{"source001.mp4", "source002.flv", "source003.mp4"}
|
||||
sort.Strings(want)
|
||||
if len(got) != len(want) {
|
||||
t.Fatalf("got %d ids, want %d: got=%v", len(got), len(want), got)
|
||||
@@ -67,11 +67,11 @@ func TestListVideoFileIDsByDrive(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestListSpider91ViewkeysFindsMigratedVideos 校验:即使 spider91 视频
|
||||
// 被迁移到 PikPak(drive_id 改了),ListSpider91Viewkeys 仍能通过 video.id
|
||||
// 前缀找到这些 viewkey。这是 crawler 写 seen 文件的关键不变量,
|
||||
// 否则下一次爬取会把已爬过的 viewkey 当作"新"的再爬一遍。
|
||||
func TestListSpider91ViewkeysFindsMigratedVideos(t *testing.T) {
|
||||
// TestListCrawlerSourceIDsFindsMigratedVideos 校验:即使爬虫视频被上传迁移
|
||||
// 到目标网盘(drive_id 改了),ListCrawlerSourceIDs 仍能通过 video.id 前缀
|
||||
// 找到这些 source_id。这是 crawler 写 seen 文件的关键不变量,否则下一次
|
||||
// 爬取会把已爬过的 source_id 当作"新"的再爬一遍。
|
||||
func TestListCrawlerSourceIDsFindsMigratedVideos(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -92,25 +92,25 @@ func TestListSpider91ViewkeysFindsMigratedVideos(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// 1) 仍在 spider91 drive 下的视频(未迁移)
|
||||
insert("spider91-91Spider-vk001", "91Spider", "vk001.mp4")
|
||||
// 2) 已迁移到 PikPak 的视频:drive_id 变了,但 id 仍是 spider91-91Spider-...
|
||||
insert("spider91-91Spider-vk002", "PikPak", "PIKPAK-FILE-ID-2")
|
||||
insert("spider91-91Spider-vk003", "PikPak", "PIKPAK-FILE-ID-3")
|
||||
// 3) 别的 spider91 drive 的视频,不应混进来
|
||||
insert("spider91-OtherDrive-vk999", "OtherDrive", "vk999.mp4")
|
||||
// 1) 仍在本地爬虫 drive 下的视频(未上传)
|
||||
insert("scriptcrawler-crawler-a-source001", "crawler-a", "source001.mp4")
|
||||
// 2) 已上传到目标盘的视频:drive_id 变了,但 id 仍保留 crawler 来源前缀。
|
||||
insert("scriptcrawler-crawler-a-source002", "target-drive", "TARGET-FILE-ID-2")
|
||||
insert("scriptcrawler-crawler-a-source003", "target-drive", "TARGET-FILE-ID-3")
|
||||
// 3) 别的爬虫 drive 的视频,不应混进来
|
||||
insert("scriptcrawler-other-source999", "other-crawler", "source999.mp4")
|
||||
// 4) 完全无关的视频
|
||||
insert("quark-some-fid", "drive-quark", "abc")
|
||||
|
||||
got, err := cat.ListSpider91Viewkeys(ctx, "91Spider")
|
||||
got, err := cat.ListCrawlerSourceIDs(ctx, "scriptcrawler", "crawler-a")
|
||||
if err != nil {
|
||||
t.Fatalf("ListSpider91Viewkeys: %v", err)
|
||||
t.Fatalf("ListCrawlerSourceIDs: %v", err)
|
||||
}
|
||||
sort.Strings(got)
|
||||
want := []string{"vk001", "vk002", "vk003"}
|
||||
want := []string{"source001", "source002", "source003"}
|
||||
sort.Strings(want)
|
||||
if len(got) != len(want) {
|
||||
t.Fatalf("got %d viewkeys, want %d: got=%v", len(got), len(want), got)
|
||||
t.Fatalf("got %d source ids, want %d: got=%v", len(got), len(want), got)
|
||||
}
|
||||
for i := range got {
|
||||
if got[i] != want[i] {
|
||||
@@ -119,9 +119,9 @@ func TestListSpider91ViewkeysFindsMigratedVideos(t *testing.T) {
|
||||
}
|
||||
|
||||
// 不存在的 drive 返回空列表
|
||||
other, err := cat.ListSpider91Viewkeys(ctx, "no-such-drive")
|
||||
other, err := cat.ListCrawlerSourceIDs(ctx, "scriptcrawler", "no-such-drive")
|
||||
if err != nil {
|
||||
t.Fatalf("ListSpider91Viewkeys empty: %v", err)
|
||||
t.Fatalf("ListCrawlerSourceIDs empty: %v", err)
|
||||
}
|
||||
if len(other) != 0 {
|
||||
t.Fatalf("non-existent drive: got %v, want empty", other)
|
||||
@@ -138,12 +138,12 @@ func TestDeleteVideoWithTombstonePreventsReimport(t *testing.T) {
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: "spider91-91Spider-vk004",
|
||||
DriveID: "91Spider",
|
||||
FileID: "vk004.mp4",
|
||||
FileName: "vk004.mp4",
|
||||
ID: "scriptcrawler-crawler-a-source004",
|
||||
DriveID: "crawler-a",
|
||||
FileID: "source004.mp4",
|
||||
FileName: "source004.mp4",
|
||||
ContentHash: "ABCDEF",
|
||||
Title: "Deleted Spider",
|
||||
Title: "Deleted Source",
|
||||
Size: 2048,
|
||||
PreviewStatus: "ready",
|
||||
PublishedAt: now,
|
||||
@@ -153,24 +153,24 @@ func TestDeleteVideoWithTombstonePreventsReimport(t *testing.T) {
|
||||
t.Fatalf("upsert: %v", err)
|
||||
}
|
||||
|
||||
if err := cat.DeleteVideoWithTombstone(ctx, "spider91-91Spider-vk004"); err != nil {
|
||||
if err := cat.DeleteVideoWithTombstone(ctx, "scriptcrawler-crawler-a-source004"); err != nil {
|
||||
t.Fatalf("delete with tombstone: %v", err)
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, "spider91-91Spider-vk004"); err != sql.ErrNoRows {
|
||||
if _, err := cat.GetVideo(ctx, "scriptcrawler-crawler-a-source004"); err != sql.ErrNoRows {
|
||||
t.Fatalf("get deleted video error = %v, want sql.ErrNoRows", err)
|
||||
}
|
||||
deleted, err := cat.IsDeletedVideoCandidate(ctx, "spider91-91Spider-vk004", "91Spider", "vk004.mp4", "abcdef", "vk004.mp4", 2048)
|
||||
deleted, err := cat.IsDeletedVideoCandidate(ctx, "scriptcrawler-crawler-a-source004", "crawler-a", "source004.mp4", "abcdef", "source004.mp4", 2048)
|
||||
if err != nil {
|
||||
t.Fatalf("check deleted candidate: %v", err)
|
||||
}
|
||||
if !deleted {
|
||||
t.Fatal("deleted candidate was not recognized")
|
||||
}
|
||||
viewkeys, err := cat.ListSpider91Viewkeys(ctx, "91Spider")
|
||||
sourceIDs, err := cat.ListCrawlerSourceIDs(ctx, "scriptcrawler", "crawler-a")
|
||||
if err != nil {
|
||||
t.Fatalf("ListSpider91Viewkeys: %v", err)
|
||||
t.Fatalf("ListCrawlerSourceIDs: %v", err)
|
||||
}
|
||||
if len(viewkeys) != 1 || viewkeys[0] != "vk004" {
|
||||
t.Fatalf("viewkeys = %#v, want [vk004]", viewkeys)
|
||||
if len(sourceIDs) != 1 || sourceIDs[0] != "source004" {
|
||||
t.Fatalf("source ids = %#v, want [source004]", sourceIDs)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
package catalog
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestListVideosKeywordMatchesFileName(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: "p115-115-sone-089-4k",
|
||||
DriveID: "drive",
|
||||
FileID: "file-sone-089-4k",
|
||||
FileName: "www.98T.la@sone-089-4k.mp4",
|
||||
Title: "www.98T.la@sone-089",
|
||||
Author: "4k",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed video: %v", err)
|
||||
}
|
||||
|
||||
items, total, err := cat.ListVideos(ctx, ListParams{
|
||||
Keyword: "www.98T.la@sone-089-4k.mp4",
|
||||
Page: 1,
|
||||
PageSize: 10,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("list videos: %v", err)
|
||||
}
|
||||
if total != 1 {
|
||||
t.Fatalf("total = %d, want 1", total)
|
||||
}
|
||||
if len(items) != 1 || items[0].ID != "p115-115-sone-089-4k" {
|
||||
t.Fatalf("items = %#v, want seeded video", items)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
package catalog
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestIncrementViewStoresLastViewedAt(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: "video-1",
|
||||
DriveID: "drive",
|
||||
FileID: "file-1",
|
||||
Title: "Video 1",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed video: %v", err)
|
||||
}
|
||||
|
||||
if _, err := cat.IncrementView(ctx, "video-1"); err != nil {
|
||||
t.Fatalf("increment view: %v", err)
|
||||
}
|
||||
got, err := cat.GetVideo(ctx, "video-1")
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.Views != 1 {
|
||||
t.Fatalf("views = %d, want 1", got.Views)
|
||||
}
|
||||
if got.LastViewedAt.IsZero() {
|
||||
t.Fatal("last viewed time was not stored")
|
||||
}
|
||||
}
|
||||
|
||||
func TestListVideosRecentSortUsesLastViewedAt(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*Video{
|
||||
{ID: "old-view", DriveID: "drive", FileID: "old-view", Title: "Old View", PublishedAt: now.Add(3 * time.Hour), CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "recent-view", DriveID: "drive", FileID: "recent-view", Title: "Recent View", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "unviewed", DriveID: "drive", FileID: "unviewed", Title: "Unviewed", PublishedAt: now.Add(4 * time.Hour), CreatedAt: now, UpdatedAt: now},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
if _, err := cat.db.ExecContext(ctx,
|
||||
`UPDATE videos SET last_viewed_at = CASE id
|
||||
WHEN 'old-view' THEN ?
|
||||
WHEN 'recent-view' THEN ?
|
||||
ELSE 0
|
||||
END`,
|
||||
now.Add(-time.Hour).UnixMilli(),
|
||||
now.Add(time.Hour).UnixMilli(),
|
||||
); err != nil {
|
||||
t.Fatalf("seed last_viewed_at: %v", err)
|
||||
}
|
||||
|
||||
items, _, err := cat.ListVideos(ctx, ListParams{Sort: "recent", Page: 1, PageSize: 3})
|
||||
if err != nil {
|
||||
t.Fatalf("list recent videos: %v", err)
|
||||
}
|
||||
if len(items) != 3 {
|
||||
t.Fatalf("items = %d, want 3", len(items))
|
||||
}
|
||||
got := []string{items[0].ID, items[1].ID, items[2].ID}
|
||||
want := []string{"recent-view", "old-view", "unviewed"}
|
||||
for i := range want {
|
||||
if got[i] != want[i] {
|
||||
t.Fatalf("recent order = %#v, want %#v", got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -21,13 +21,17 @@ CREATE TABLE IF NOT EXISTS videos (
|
||||
thumbnail_failures INTEGER DEFAULT 0, -- consecutive transient thumbnail generation failures
|
||||
preview_file_id TEXT, -- deprecated: 旧版回写网盘后的预览视频 file id
|
||||
preview_local TEXT, -- 本地预览视频路径(兜底)
|
||||
preview_status TEXT DEFAULT 'pending', -- pending / ready / failed
|
||||
preview_status TEXT DEFAULT 'pending', -- pending / ready / failed / disabled
|
||||
transcode_status TEXT DEFAULT '', -- '' / pending / ready / skipped / failed(浏览器兼容性转码)
|
||||
transcode_error TEXT DEFAULT '',
|
||||
transcoded_file_id TEXT DEFAULT '', -- 转码产物在同一 drive 上的 fileID,播放源优先用它
|
||||
transcoded_size INTEGER DEFAULT 0,
|
||||
views INTEGER DEFAULT 0,
|
||||
last_viewed_at INTEGER DEFAULT 0,
|
||||
favorites INTEGER DEFAULT 0,
|
||||
comments INTEGER DEFAULT 0,
|
||||
likes INTEGER DEFAULT 0,
|
||||
dislikes INTEGER DEFAULT 0,
|
||||
category TEXT,
|
||||
hidden INTEGER DEFAULT 0, -- 1 = hidden from public display
|
||||
tags_manual INTEGER DEFAULT 0, -- 1 = user explicitly curated tags
|
||||
badges TEXT, -- JSON array
|
||||
@@ -70,7 +74,7 @@ CREATE TABLE IF NOT EXISTS deleted_tags (
|
||||
deleted_at INTEGER NOT NULL
|
||||
);
|
||||
|
||||
-- 管理员显式删除过的视频。用于防止后续扫描 / spider91 爬虫把同一个源文件
|
||||
-- 管理员显式删除过的视频。用于防止后续扫描 / 爬虫把同一个源文件
|
||||
-- 再次入库;不代表原始云盘文件已被删除。
|
||||
CREATE TABLE IF NOT EXISTS deleted_videos (
|
||||
id TEXT PRIMARY KEY,
|
||||
@@ -79,6 +83,7 @@ CREATE TABLE IF NOT EXISTS deleted_videos (
|
||||
content_hash TEXT NOT NULL DEFAULT '',
|
||||
file_name TEXT NOT NULL DEFAULT '',
|
||||
size_bytes INTEGER NOT NULL DEFAULT 0,
|
||||
reason TEXT NOT NULL DEFAULT '',
|
||||
deleted_at INTEGER NOT NULL
|
||||
);
|
||||
|
||||
@@ -110,14 +115,14 @@ CREATE INDEX IF NOT EXISTS idx_crawler_seen_sources_drive
|
||||
-- 网盘账户
|
||||
CREATE TABLE IF NOT EXISTS drives (
|
||||
id TEXT PRIMARY KEY,
|
||||
kind TEXT NOT NULL, -- quark / p115 / p123 / pikpak / wopan / onedrive / googledrive / localstorage / spider91
|
||||
kind TEXT NOT NULL, -- quark / p115 / p123 / pikpak / wopan / guangyapan / onedrive / googledrive / localstorage / scriptcrawler
|
||||
name TEXT NOT NULL,
|
||||
root_id TEXT NOT NULL DEFAULT '0',
|
||||
scan_root_id TEXT, -- deprecated: 扫描起点固定等于 root_id
|
||||
credentials TEXT, -- JSON: cookie / refresh_token 等
|
||||
status TEXT DEFAULT 'disconnected', -- disconnected / ok / error
|
||||
last_error TEXT,
|
||||
-- 是否给该盘生成预览视频/封面:1 开 / 0 关。
|
||||
-- 是否给该盘生成预览视频:1 开 / 0 关。封面生成不受影响。
|
||||
-- 替代了早期的全局 preview.enabled 设置(保留旧 setting 行不再读)。
|
||||
teaser_enabled INTEGER NOT NULL DEFAULT 1,
|
||||
-- 扫描时要跳过的目录 ID 集合(JSON array of string)。命中其中任意一个的目录及其
|
||||
|
||||
@@ -165,171 +165,3 @@ func TestRandomVideosWithReadyThumbnailsExcluding(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRandomVideosForPreferredVideoChoosesLeastPopulatedTag(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*Video{
|
||||
{ID: "current", DriveID: "drive", FileID: "f-current", Title: "current", Tags: []string{"common", "rare"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "common-1", DriveID: "drive", FileID: "f-common-1", Title: "common 1", Tags: []string{"common"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "common-2", DriveID: "drive", FileID: "f-common-2", Title: "common 2", Tags: []string{"common"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "rare-1", DriveID: "drive", FileID: "f-rare-1", Title: "rare 1", Tags: []string{"rare"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
tag, err := cat.LeastPopulatedVisibleUniqueTag(ctx, []string{"common", "rare"})
|
||||
if err != nil {
|
||||
t.Fatalf("least populated tag: %v", err)
|
||||
}
|
||||
if tag != "rare" {
|
||||
t.Fatalf("least populated tag = %q, want rare", tag)
|
||||
}
|
||||
|
||||
got, err := cat.RandomVideosForPreferredVideoExcluding(ctx, "current", []string{"current"}, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("random preferred: %v", err)
|
||||
}
|
||||
if len(got) != 1 || got[0].ID != "rare-1" {
|
||||
t.Fatalf("preferred result = %#v, want rare-1", videoIDs(got))
|
||||
}
|
||||
|
||||
got, err = cat.RandomVideosForPreferredVideoExcluding(ctx, "current", nil, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("random preferred without explicit exclude: %v", err)
|
||||
}
|
||||
if len(got) != 1 || got[0].ID == "current" {
|
||||
t.Fatalf("preferred result without explicit exclude = %#v, should not return current", videoIDs(got))
|
||||
}
|
||||
}
|
||||
|
||||
func TestRandomVideosForPreferredVideoFallsBackToFillBatch(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*Video{
|
||||
{ID: "current", DriveID: "drive", FileID: "f-current", Title: "current", Tags: []string{"common", "rare"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "common-1", DriveID: "drive", FileID: "f-common-1", Title: "common 1", Tags: []string{"common"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "common-2", DriveID: "drive", FileID: "f-common-2", Title: "common 2", Tags: []string{"common"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "rare-1", DriveID: "drive", FileID: "f-rare-1", Title: "rare 1", Tags: []string{"rare"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "hidden-rare", DriveID: "drive", FileID: "f-hidden-rare", Title: "hidden rare", Tags: []string{"rare"}, PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
if err := cat.HideVideo(ctx, "hidden-rare"); err != nil {
|
||||
t.Fatalf("hide hidden-rare: %v", err)
|
||||
}
|
||||
|
||||
got, err := cat.RandomVideosForPreferredVideoExcluding(ctx, "current", []string{"current"}, 3)
|
||||
if err != nil {
|
||||
t.Fatalf("random preferred: %v", err)
|
||||
}
|
||||
ids := videoIDs(got)
|
||||
if len(ids) != 3 {
|
||||
t.Fatalf("result ids = %#v, want 3 items", ids)
|
||||
}
|
||||
for _, excluded := range []string{"current", "hidden-rare"} {
|
||||
if hasVideoID(ids, excluded) {
|
||||
t.Fatalf("result ids = %#v, should not include %s", ids, excluded)
|
||||
}
|
||||
}
|
||||
if !hasVideoID(ids, "rare-1") {
|
||||
t.Fatalf("result ids = %#v, want rare-1 from least populated tag", ids)
|
||||
}
|
||||
if len(uniqueVideoIDs(ids)) != len(ids) {
|
||||
t.Fatalf("result ids = %#v, want no duplicates", ids)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRandomVideosForPreferredVideoFallbacksWhenPreferenceUnavailable(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
for _, v := range []*Video{
|
||||
{ID: "untagged", DriveID: "drive", FileID: "f-untagged", Title: "untagged", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "visible-1", DriveID: "drive", FileID: "f-visible-1", Title: "visible 1", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
{ID: "visible-2", DriveID: "drive", FileID: "f-visible-2", Title: "visible 2", PublishedAt: now, CreatedAt: now, UpdatedAt: now},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
got, err := cat.RandomVideosForPreferredVideoExcluding(ctx, "missing", []string{"untagged"}, 2)
|
||||
if err != nil {
|
||||
t.Fatalf("random missing preferred: %v", err)
|
||||
}
|
||||
if !sameVideoIDSet(videoIDs(got), []string{"visible-1", "visible-2"}) {
|
||||
t.Fatalf("missing preferred ids = %#v, want visible fallback videos", videoIDs(got))
|
||||
}
|
||||
|
||||
got, err = cat.RandomVideosForPreferredVideoExcluding(ctx, "untagged", []string{"untagged"}, 2)
|
||||
if err != nil {
|
||||
t.Fatalf("random untagged preferred: %v", err)
|
||||
}
|
||||
if !sameVideoIDSet(videoIDs(got), []string{"visible-1", "visible-2"}) {
|
||||
t.Fatalf("untagged preferred ids = %#v, want visible fallback videos", videoIDs(got))
|
||||
}
|
||||
}
|
||||
|
||||
func videoIDs(videos []*Video) []string {
|
||||
ids := make([]string, 0, len(videos))
|
||||
for _, v := range videos {
|
||||
ids = append(ids, v.ID)
|
||||
}
|
||||
return ids
|
||||
}
|
||||
|
||||
func hasVideoID(ids []string, want string) bool {
|
||||
for _, id := range ids {
|
||||
if id == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func uniqueVideoIDs(ids []string) map[string]struct{} {
|
||||
seen := make(map[string]struct{}, len(ids))
|
||||
for _, id := range ids {
|
||||
seen[id] = struct{}{}
|
||||
}
|
||||
return seen
|
||||
}
|
||||
|
||||
func sameVideoIDSet(a, b []string) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
seen := make(map[string]int, len(a))
|
||||
for _, value := range a {
|
||||
seen[value]++
|
||||
}
|
||||
for _, value := range b {
|
||||
if seen[value] == 0 {
|
||||
return false
|
||||
}
|
||||
seen[value]--
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -66,6 +66,30 @@ func (c *Catalog) migrate(ctx context.Context) error {
|
||||
if err := c.addColumnIfMissing(ctx, "videos", "thumbnail_failures", "INTEGER DEFAULT 0"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addColumnIfMissing(ctx, "videos", "last_viewed_at", "INTEGER DEFAULT 0"); err != nil {
|
||||
return err
|
||||
}
|
||||
// videos.transcode_*:浏览器兼容性转码状态。
|
||||
// status:''=未检测 / pending=已入队 / ready=已转码 / skipped=检测后无需转码 / failed=失败。
|
||||
// transcoded_file_id 指向转码产物在同一 drive 上的 fileID,播放源优先使用它。
|
||||
if err := c.addColumnIfMissing(ctx, "videos", "transcode_status", "TEXT DEFAULT ''"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addColumnIfMissing(ctx, "videos", "transcode_error", "TEXT DEFAULT ''"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addColumnIfMissing(ctx, "videos", "transcoded_file_id", "TEXT DEFAULT ''"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addColumnIfMissing(ctx, "videos", "transcoded_size", "INTEGER DEFAULT 0"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.dropColumnIfExists(ctx, "videos", "category"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.ensureBaseVideoIndexes(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
// drives.teaser_enabled:每盘预览视频开关,替代旧的全局 preview.enabled。
|
||||
// 升级路径:直接让 ALTER TABLE 的 DEFAULT 1 兜底 —— 每个现存 drive 都默认开启,
|
||||
// 不读旧的 settings.preview.enabled 字段。这样老用户即便之前关过全局开关,
|
||||
@@ -87,10 +111,14 @@ CREATE TABLE IF NOT EXISTS deleted_videos (
|
||||
content_hash TEXT NOT NULL DEFAULT '',
|
||||
file_name TEXT NOT NULL DEFAULT '',
|
||||
size_bytes INTEGER NOT NULL DEFAULT 0,
|
||||
reason TEXT NOT NULL DEFAULT '',
|
||||
deleted_at INTEGER NOT NULL
|
||||
)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addColumnIfMissing(ctx, "deleted_videos", "reason", "TEXT NOT NULL DEFAULT ''"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.syncDriveScanRootIDToRootID(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -109,6 +137,9 @@ CREATE TABLE IF NOT EXISTS deleted_videos (
|
||||
if err := c.reconcileThumbnailStatusOnce(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.requeueSkippedPreviews(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_videos_content_hash ON videos(content_hash)`); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -127,6 +158,9 @@ CREATE TABLE IF NOT EXISTS deleted_videos (
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_videos_visible_pub ON videos(COALESCE(hidden, 0), published_at DESC)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_videos_last_viewed ON videos(last_viewed_at DESC)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_videos_file_name_size ON videos(file_name, size_bytes)`); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -151,9 +185,6 @@ CREATE TABLE IF NOT EXISTS deleted_videos (
|
||||
if err := c.collapseAVCodeTags(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.createCollectionTagsFromCategories(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.classifySystemTags(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -163,7 +194,7 @@ CREATE TABLE IF NOT EXISTS deleted_videos (
|
||||
if err := c.clearRemoteP123ThumbnailsOnce(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.clearRemoteNonSpider91Thumbnails(ctx); err != nil {
|
||||
if err := c.clearRemoteThumbnails(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.hideZeroSizeVideosFromKnownDrives(ctx); err != nil {
|
||||
@@ -180,6 +211,172 @@ func (c *Catalog) addColumnIfMissing(ctx context.Context, table, column, definit
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Catalog) dropColumnIfExists(ctx context.Context, table, column string) error {
|
||||
rows, err := c.db.QueryContext(ctx, `PRAGMA table_info(`+table+`)`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer rows.Close()
|
||||
found := false
|
||||
for rows.Next() {
|
||||
var cid int
|
||||
var name, typ string
|
||||
var notNull int
|
||||
var defaultValue any
|
||||
var pk int
|
||||
if err := rows.Scan(&cid, &name, &typ, ¬Null, &defaultValue, &pk); err != nil {
|
||||
return err
|
||||
}
|
||||
if strings.EqualFold(name, column) {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
_ = rows.Close()
|
||||
return err
|
||||
}
|
||||
if err := rows.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
if !found {
|
||||
return nil
|
||||
}
|
||||
if _, err = c.db.ExecContext(ctx, `ALTER TABLE `+table+` DROP COLUMN `+column); err == nil {
|
||||
return nil
|
||||
}
|
||||
if table == "videos" && strings.EqualFold(column, "category") {
|
||||
log.Printf("[catalog] native drop column videos.category failed, rebuilding videos table without category: %v", err)
|
||||
return c.rebuildVideosTableWithoutCategory(ctx)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Catalog) ensureBaseVideoIndexes(ctx context.Context) error {
|
||||
for _, stmt := range []string{
|
||||
`CREATE INDEX IF NOT EXISTS idx_videos_drive ON videos(drive_id, file_id)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_videos_pub ON videos(published_at DESC)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_videos_views ON videos(views DESC)`,
|
||||
} {
|
||||
if _, err := c.db.ExecContext(ctx, stmt); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var currentVideoColumnNames = []string{
|
||||
"id",
|
||||
"drive_id",
|
||||
"file_id",
|
||||
"file_name",
|
||||
"content_hash",
|
||||
"sampled_sha256",
|
||||
"fingerprint_status",
|
||||
"fingerprint_error",
|
||||
"parent_id",
|
||||
"title",
|
||||
"author",
|
||||
"tags",
|
||||
"duration_seconds",
|
||||
"size_bytes",
|
||||
"ext",
|
||||
"quality",
|
||||
"thumbnail_url",
|
||||
"thumbnail_status",
|
||||
"thumbnail_failures",
|
||||
"preview_file_id",
|
||||
"preview_local",
|
||||
"preview_status",
|
||||
"transcode_status",
|
||||
"transcode_error",
|
||||
"transcoded_file_id",
|
||||
"transcoded_size",
|
||||
"views",
|
||||
"last_viewed_at",
|
||||
"favorites",
|
||||
"comments",
|
||||
"likes",
|
||||
"dislikes",
|
||||
"hidden",
|
||||
"tags_manual",
|
||||
"badges",
|
||||
"description",
|
||||
"published_at",
|
||||
"created_at",
|
||||
"updated_at",
|
||||
}
|
||||
|
||||
const createVideosWithoutCategorySQL = `
|
||||
CREATE TABLE videos_category_drop_new (
|
||||
id TEXT PRIMARY KEY,
|
||||
drive_id TEXT NOT NULL,
|
||||
file_id TEXT NOT NULL,
|
||||
file_name TEXT DEFAULT '',
|
||||
content_hash TEXT DEFAULT '',
|
||||
sampled_sha256 TEXT DEFAULT '',
|
||||
fingerprint_status TEXT DEFAULT 'pending',
|
||||
fingerprint_error TEXT DEFAULT '',
|
||||
parent_id TEXT,
|
||||
title TEXT NOT NULL,
|
||||
author TEXT,
|
||||
tags TEXT,
|
||||
duration_seconds INTEGER DEFAULT 0,
|
||||
size_bytes INTEGER DEFAULT 0,
|
||||
ext TEXT,
|
||||
quality TEXT,
|
||||
thumbnail_url TEXT,
|
||||
thumbnail_status TEXT DEFAULT 'pending',
|
||||
thumbnail_failures INTEGER DEFAULT 0,
|
||||
preview_file_id TEXT,
|
||||
preview_local TEXT,
|
||||
preview_status TEXT DEFAULT 'pending',
|
||||
transcode_status TEXT DEFAULT '',
|
||||
transcode_error TEXT DEFAULT '',
|
||||
transcoded_file_id TEXT DEFAULT '',
|
||||
transcoded_size INTEGER DEFAULT 0,
|
||||
views INTEGER DEFAULT 0,
|
||||
last_viewed_at INTEGER DEFAULT 0,
|
||||
favorites INTEGER DEFAULT 0,
|
||||
comments INTEGER DEFAULT 0,
|
||||
likes INTEGER DEFAULT 0,
|
||||
dislikes INTEGER DEFAULT 0,
|
||||
hidden INTEGER DEFAULT 0,
|
||||
tags_manual INTEGER DEFAULT 0,
|
||||
badges TEXT,
|
||||
description TEXT,
|
||||
published_at INTEGER NOT NULL,
|
||||
created_at INTEGER NOT NULL,
|
||||
updated_at INTEGER NOT NULL
|
||||
)`
|
||||
|
||||
func (c *Catalog) rebuildVideosTableWithoutCategory(ctx context.Context) error {
|
||||
tx, err := c.db.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
if _, err := tx.ExecContext(ctx, `DROP TABLE IF EXISTS videos_category_drop_new`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := tx.ExecContext(ctx, createVideosWithoutCategorySQL); err != nil {
|
||||
return err
|
||||
}
|
||||
cols := strings.Join(currentVideoColumnNames, ", ")
|
||||
if _, err := tx.ExecContext(ctx,
|
||||
`INSERT INTO videos_category_drop_new (`+cols+`) SELECT `+cols+` FROM videos`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := tx.ExecContext(ctx, `DROP TABLE videos`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := tx.ExecContext(ctx, `ALTER TABLE videos_category_drop_new RENAME TO videos`); err != nil {
|
||||
return err
|
||||
}
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
// addColumnIfMissingReportNew 与 addColumnIfMissing 同步,但额外返回 added=true 表示
|
||||
// 本次确实创建了新列(即旧 schema 缺这列),方便调用方仅在迁移路径里补做一次性
|
||||
// 数据初始化(如把全局 setting 同步到新 per-drive 字段)。
|
||||
@@ -281,6 +478,24 @@ UPDATE videos
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) requeueSkippedPreviews(ctx context.Context) error {
|
||||
res, err := c.db.ExecContext(ctx, `
|
||||
UPDATE videos
|
||||
SET preview_file_id = '',
|
||||
preview_local = '',
|
||||
preview_status = 'pending',
|
||||
updated_at = ?
|
||||
WHERE COALESCE(preview_status, 'pending') = 'skipped'
|
||||
`, time.Now().UnixMilli())
|
||||
if err != nil {
|
||||
return fmt.Errorf("requeue skipped previews: %w", err)
|
||||
}
|
||||
if affected, err := res.RowsAffected(); err == nil && affected > 0 {
|
||||
log.Printf("[catalog] requeued %d skipped preview(s) for generation", affected)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) clearVolatileOneDriveThumbnails(ctx context.Context) error {
|
||||
// 把 OneDrive 过期的 mediap.svc.ms thumb URL 清空,让 worker 重新抽帧生成本地封面。
|
||||
// 同步把 thumbnail_status 重置为 'pending':清空后 url 是空的,本应进 worker 重做,
|
||||
@@ -345,10 +560,9 @@ func (c *Catalog) clearRemoteP123ThumbnailsOnce(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) clearRemoteNonSpider91Thumbnails(ctx context.Context) error {
|
||||
// 非 91Spider 视频不再使用网盘侧返回的远程缩略图。清空历史 http/https
|
||||
// thumbnail_url 后,封面 worker 会重新从视频中间帧生成本地 /p/thumb/<id>。
|
||||
// 91Spider 的封面是爬虫下载后保存到本地 /p/thumb/<id>,不受这条规则影响。
|
||||
func (c *Catalog) clearRemoteThumbnails(ctx context.Context) error {
|
||||
// 不再使用网盘侧返回的远程缩略图。清空历史 http/https thumbnail_url 后,
|
||||
// 封面 worker 会重新从视频中间帧生成本地 /p/thumb/<id>。
|
||||
res, err := c.db.ExecContext(ctx, `
|
||||
UPDATE videos
|
||||
SET thumbnail_url = '',
|
||||
@@ -359,18 +573,12 @@ UPDATE videos
|
||||
lower(COALESCE(thumbnail_url, '')) LIKE 'http://%'
|
||||
OR lower(COALESCE(thumbnail_url, '')) LIKE 'https://%'
|
||||
)
|
||||
AND NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM drives
|
||||
WHERE drives.id = videos.drive_id
|
||||
AND drives.kind = 'spider91'
|
||||
)
|
||||
`, time.Now().UnixMilli())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if affected, err := res.RowsAffected(); err == nil && affected > 0 {
|
||||
log.Printf("[catalog] cleared %d remote non-91Spider thumbnail(s) for local regeneration", affected)
|
||||
log.Printf("[catalog] cleared %d remote thumbnail(s) for local regeneration", affected)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -458,61 +666,6 @@ WHERE COALESCE(tags, '') NOT IN ('', '[]', 'null')
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) createCollectionTagsFromCategories(ctx context.Context) error {
|
||||
rows, err := c.db.QueryContext(ctx, `
|
||||
SELECT category, COUNT(*) FROM videos
|
||||
WHERE COALESCE(category, '') != ''
|
||||
GROUP BY category`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
type categoryStat struct {
|
||||
category string
|
||||
count int
|
||||
}
|
||||
var categories []categoryStat
|
||||
for rows.Next() {
|
||||
var stat categoryStat
|
||||
if err := rows.Scan(&stat.category, &stat.count); err != nil {
|
||||
return err
|
||||
}
|
||||
categories = append(categories, stat)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := rows.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, stat := range categories {
|
||||
if isAVCodePollutedLabel(stat.category) {
|
||||
if _, err := c.ensureTag(ctx, avTagLabel, fixedtags.AliasesFor(avTagLabel), "system"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addTagToVideosByCategory(ctx, stat.category, avTagLabel, "auto"); err != nil {
|
||||
return err
|
||||
}
|
||||
continue
|
||||
}
|
||||
if stat.count < 3 {
|
||||
continue
|
||||
}
|
||||
if !LooksLikeCollectionTag(stat.category) {
|
||||
continue
|
||||
}
|
||||
if c.tagDeleted(ctx, stat.category) {
|
||||
continue
|
||||
}
|
||||
if _, err := c.ensureTag(ctx, stat.category, nil, "collection"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addCollectionTagToVideos(ctx, stat.category); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) CreateTagAndClassify(ctx context.Context, label string, aliases []string, source string) (int, error) {
|
||||
tag, err := c.ensureTag(ctx, label, aliases, source)
|
||||
if err != nil {
|
||||
@@ -806,41 +959,6 @@ func (c *Catalog) MatchTags(ctx context.Context, text string) ([]string, error)
|
||||
return sortLabelsByTagOrder(tags, uniqueStrings(out)), nil
|
||||
}
|
||||
|
||||
func (c *Catalog) EnsureCollectionTag(ctx context.Context, label string) (string, bool, error) {
|
||||
label = cleanTagLabel(label)
|
||||
if isAVCodePollutedLabel(label) {
|
||||
if _, err := c.ensureTag(ctx, avTagLabel, fixedtags.AliasesFor(avTagLabel), "system"); err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
if err := c.addTagToVideosByCategory(ctx, label, avTagLabel, "auto"); err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
return avTagLabel, true, nil
|
||||
}
|
||||
if !LooksLikeCollectionTag(label) {
|
||||
return "", false, nil
|
||||
}
|
||||
if c.tagDeleted(ctx, label) {
|
||||
return "", false, nil
|
||||
}
|
||||
if !c.tagExists(ctx, label) {
|
||||
count, err := c.categoryVideoCount(ctx, label)
|
||||
if err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
if count < 2 {
|
||||
return "", false, nil
|
||||
}
|
||||
}
|
||||
if _, err := c.ensureTag(ctx, label, nil, "collection"); err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
if err := c.addCollectionTagToVideos(ctx, label); err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
return label, true, nil
|
||||
}
|
||||
|
||||
func (c *Catalog) ensureTag(ctx context.Context, label string, aliases []string, source string) (Tag, error) {
|
||||
label = cleanTagLabel(label)
|
||||
if label == "" {
|
||||
@@ -893,7 +1011,7 @@ func (c *Catalog) classifyTag(ctx context.Context, tag Tag) (int, error) {
|
||||
return 0, err
|
||||
}
|
||||
rows, err := c.db.QueryContext(ctx, `
|
||||
SELECT id, title, COALESCE(author, ''), COALESCE(category, ''), COALESCE(tags_manual, 0)
|
||||
SELECT id, title, COALESCE(author, ''), COALESCE(tags_manual, 0)
|
||||
FROM videos`)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
@@ -902,15 +1020,15 @@ FROM videos`)
|
||||
|
||||
classified := 0
|
||||
for rows.Next() {
|
||||
var videoID, title, author, category string
|
||||
var videoID, title, author string
|
||||
var manual int
|
||||
if err := rows.Scan(&videoID, &title, &author, &category, &manual); err != nil {
|
||||
if err := rows.Scan(&videoID, &title, &author, &manual); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if manual == 1 {
|
||||
continue
|
||||
}
|
||||
matcher := normalizeTagText(title + " " + author + " " + category)
|
||||
matcher := normalizeTagText(title + " " + author)
|
||||
if !matcher.contains(tag.Label) {
|
||||
matchedAlias := false
|
||||
for _, alias := range tag.Aliases {
|
||||
@@ -1042,54 +1160,6 @@ func (c *Catalog) insertVideoTag(ctx context.Context, videoID string, tagID int6
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Catalog) addCollectionTagToVideos(ctx context.Context, category string) error {
|
||||
return c.addTagToVideosByCategory(ctx, category, category, "auto")
|
||||
}
|
||||
|
||||
func (c *Catalog) addTagToVideosByCategory(ctx context.Context, category, label, source string) error {
|
||||
tag, err := c.getTagByLabel(ctx, label)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
rows, err := c.db.QueryContext(ctx, `
|
||||
SELECT v.id
|
||||
FROM videos v
|
||||
WHERE v.category = ?
|
||||
AND COALESCE(v.tags_manual, 0) = 0
|
||||
AND NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM video_tags vt
|
||||
WHERE vt.video_id = v.id
|
||||
AND vt.tag_id = ?
|
||||
)`, category, tag.ID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var videoIDs []string
|
||||
for rows.Next() {
|
||||
var videoID string
|
||||
if err := rows.Scan(&videoID); err != nil {
|
||||
return err
|
||||
}
|
||||
videoIDs = append(videoIDs, videoID)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := rows.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, videoID := range videoIDs {
|
||||
if err := c.insertVideoTag(ctx, videoID, tag.ID, source); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.syncVideoTagsJSON(ctx, videoID, false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Catalog) collapseAVCodeTags(ctx context.Context) error {
|
||||
if _, err := c.ensureTag(ctx, avTagLabel, fixedtags.AliasesFor(avTagLabel), "system"); err != nil {
|
||||
return err
|
||||
@@ -1279,12 +1349,6 @@ func (c *Catalog) restoreDeletedTag(ctx context.Context, label string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Catalog) categoryVideoCount(ctx context.Context, category string) (int, error) {
|
||||
var count int
|
||||
err := c.db.QueryRowContext(ctx, `SELECT COUNT(*) FROM videos WHERE category = ?`, category).Scan(&count)
|
||||
return count, err
|
||||
}
|
||||
|
||||
func (c *Catalog) getTagByLabelTx(ctx context.Context, tx *sql.Tx, label string) (Tag, error) {
|
||||
row := tx.QueryRowContext(ctx,
|
||||
`SELECT id, label, aliases, source, 0 FROM tags WHERE label = ? COLLATE NOCASE`,
|
||||
@@ -1434,46 +1498,6 @@ func isShortASCIIWord(s string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func LooksLikeCollectionTag(label string) bool {
|
||||
label = cleanTagLabel(label)
|
||||
if label == "" {
|
||||
return false
|
||||
}
|
||||
if isAVCodePollutedLabel(label) {
|
||||
return false
|
||||
}
|
||||
runes := []rune(label)
|
||||
if len(runes) < 2 || len(runes) > 24 {
|
||||
return false
|
||||
}
|
||||
lower := strings.ToLower(label)
|
||||
blocked := map[string]bool{
|
||||
"v": true, "pv": true, "my pack": true, "my upload": true,
|
||||
"视频": true, "视频1": true, "第一直播": true, "男人必备": true,
|
||||
"瑟女聚集地": true, "成人色游": true, "ai女友": true,
|
||||
}
|
||||
if blocked[lower] {
|
||||
return false
|
||||
}
|
||||
hasLetter := false
|
||||
for _, r := range label {
|
||||
if unicode.IsLetter(r) {
|
||||
hasLetter = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasLetter {
|
||||
return false
|
||||
}
|
||||
for _, r := range label {
|
||||
switch r {
|
||||
case ',', '。', '!', '?', ';', '、', ':', '~', '~':
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func IsAVCode(label string) bool {
|
||||
label = cleanTagLabel(label)
|
||||
if label == "" {
|
||||
@@ -1555,9 +1579,7 @@ func sortLabelsByTagOrder(tags []Tag, labels []string) []string {
|
||||
return labels
|
||||
}
|
||||
|
||||
// pruneOrphanCollectionTags 删除所有 source='collection' 且不再被任何 video_tags 引用的标签。
|
||||
// 在 migrate 末尾调用,相当于启动时自愈:之前 DeleteVideo 没顺带清理留下的孤儿,会在重启时被收回。
|
||||
// 只动 collection:system 是固定标签需保留;user 是管理员手动建的;auto/legacy 默认有视频在引用。
|
||||
// pruneOrphanCollectionTags 删除旧版本生成的 source='collection' 孤儿标签。
|
||||
func (c *Catalog) pruneOrphanCollectionTags(ctx context.Context) error {
|
||||
_, err := c.db.ExecContext(ctx, `
|
||||
DELETE FROM tags
|
||||
@@ -1566,8 +1588,7 @@ DELETE FROM tags
|
||||
return err
|
||||
}
|
||||
|
||||
// pruneOrphanCollectionTagsByID 在事务里检查一组候选 tag_id,删除其中
|
||||
// source='collection' 且已经没有视频引用的标签。供 DeleteVideo 调用。
|
||||
// pruneOrphanCollectionTagsByID 在事务里检查并删除旧版本生成的孤儿 collection 标签。
|
||||
func pruneOrphanCollectionTagsByID(ctx context.Context, tx *sql.Tx, tagIDs []int64) error {
|
||||
for _, tagID := range tagIDs {
|
||||
var src string
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
@@ -136,7 +137,6 @@ func TestCreateTagAndClassifyAddsTagToMatchingExistingVideos(t *testing.T) {
|
||||
DriveID: "drive",
|
||||
FileID: "file-1",
|
||||
Title: "清纯短发合集",
|
||||
Category: "普通目录",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -148,7 +148,6 @@ func TestCreateTagAndClassifyAddsTagToMatchingExistingVideos(t *testing.T) {
|
||||
DriveID: "drive",
|
||||
FileID: "file-2",
|
||||
Title: "普通标题",
|
||||
Category: "普通目录",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -232,52 +231,6 @@ func TestDeleteTagRemovesTagFromVideos(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteTagSuppressesAutomaticCollectionRecreation(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
for _, id := range []string{"video-1", "video-2"} {
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: id,
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: "合集视频",
|
||||
Category: "sunny",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed video %s: %v", id, err)
|
||||
}
|
||||
}
|
||||
|
||||
if label, ok, err := cat.EnsureCollectionTag(ctx, "sunny"); err != nil || !ok || label != "sunny" {
|
||||
t.Fatalf("ensure collection = %q, %v, %v; want sunny true nil", label, ok, err)
|
||||
}
|
||||
tag := mustTagByLabel(t, ctx, cat, "sunny")
|
||||
if _, err := cat.DeleteTag(ctx, tag.ID); err != nil {
|
||||
t.Fatalf("delete tag: %v", err)
|
||||
}
|
||||
|
||||
if label, ok, err := cat.EnsureCollectionTag(ctx, "sunny"); err != nil || ok || label != "" {
|
||||
t.Fatalf("ensure deleted collection = %q, %v, %v; want empty false nil", label, ok, err)
|
||||
}
|
||||
for _, tag := range mustListTags(t, ctx, cat) {
|
||||
if tag.Label == "sunny" {
|
||||
t.Fatal("deleted collection tag was recreated automatically")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateTagAndClassifyRestoresDeletedTag(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
@@ -343,13 +296,13 @@ func TestEnsureTagForVideoIDPrefixBackfillsSourceTag(t *testing.T) {
|
||||
id string
|
||||
manual bool
|
||||
}{
|
||||
{id: "spider91-91-spider-1200001"},
|
||||
{id: "spider91-91-spider-1200002", manual: true},
|
||||
{id: "spider91-other-1200003"},
|
||||
{id: "scriptcrawler-crawler-a-source001"},
|
||||
{id: "scriptcrawler-crawler-a-source002", manual: true},
|
||||
{id: "scriptcrawler-other-source003"},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: seed.id,
|
||||
DriveID: "91-spider",
|
||||
DriveID: "crawler-a",
|
||||
FileID: seed.id + ".mp4",
|
||||
Title: "legacy title without source text",
|
||||
PublishedAt: now,
|
||||
@@ -365,28 +318,28 @@ func TestEnsureTagForVideoIDPrefixBackfillsSourceTag(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
added, err := cat.EnsureTagForVideoIDPrefix(ctx, "spider91-91-spider-", "91porn", nil, "system")
|
||||
added, err := cat.EnsureTagForVideoIDPrefix(ctx, "scriptcrawler-crawler-a-", "crawler-tag", nil, "system")
|
||||
if err != nil {
|
||||
t.Fatalf("ensure prefix tag: %v", err)
|
||||
}
|
||||
if added != 1 {
|
||||
t.Fatalf("added = %d, want 1", added)
|
||||
}
|
||||
got, err := cat.GetVideo(ctx, "spider91-91-spider-1200001")
|
||||
got, err := cat.GetVideo(ctx, "scriptcrawler-crawler-a-source001")
|
||||
if err != nil {
|
||||
t.Fatalf("get tagged video: %v", err)
|
||||
}
|
||||
if !sameStrings(got.Tags, []string{"91porn"}) {
|
||||
t.Fatalf("tagged video tags = %#v, want 91porn", got.Tags)
|
||||
if !sameStrings(got.Tags, []string{"crawler-tag"}) {
|
||||
t.Fatalf("tagged video tags = %#v, want crawler-tag", got.Tags)
|
||||
}
|
||||
manual, err := cat.GetVideo(ctx, "spider91-91-spider-1200002")
|
||||
manual, err := cat.GetVideo(ctx, "scriptcrawler-crawler-a-source002")
|
||||
if err != nil {
|
||||
t.Fatalf("get manual video: %v", err)
|
||||
}
|
||||
if len(manual.Tags) != 0 {
|
||||
t.Fatalf("manual video tags = %#v, want unchanged", manual.Tags)
|
||||
}
|
||||
other, err := cat.GetVideo(ctx, "spider91-other-1200003")
|
||||
other, err := cat.GetVideo(ctx, "scriptcrawler-other-source003")
|
||||
if err != nil {
|
||||
t.Fatalf("get other prefix video: %v", err)
|
||||
}
|
||||
@@ -486,7 +439,6 @@ func TestMigrateDoesNotRewriteAlreadySyncedVideoTags(t *testing.T) {
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: "巨乳后入合集",
|
||||
Category: "Better Call Saul S03",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -585,6 +537,25 @@ CREATE TABLE videos (
|
||||
)`); err != nil {
|
||||
t.Fatalf("create legacy videos table: %v", err)
|
||||
}
|
||||
nowMillis := time.Now().UnixMilli()
|
||||
if _, err := db.Exec(`
|
||||
INSERT INTO videos (
|
||||
id, drive_id, file_id, content_hash, parent_id, title, author, tags,
|
||||
duration_seconds, size_bytes, ext, quality, thumbnail_url, preview_file_id,
|
||||
preview_local, preview_status, views, favorites, comments, likes, dislikes,
|
||||
category, hidden, tags_manual, badges, description, published_at, created_at, updated_at
|
||||
) VALUES (
|
||||
'legacy-video', 'drive', 'file-legacy', 'hash-legacy', 'parent-1', 'Legacy Video', 'Legacy Author', '["旧标签"]',
|
||||
180, 1024, 'mp4', 'HD', '/thumb.jpg', 'preview-file',
|
||||
'/preview.mp4', 'ready', 7, 1, 2, 3, 4,
|
||||
'legacy-category', 0, 0, '["精选"]', 'legacy description', ?, ?, ?
|
||||
)`,
|
||||
nowMillis, nowMillis, nowMillis); err != nil {
|
||||
t.Fatalf("insert legacy video: %v", err)
|
||||
}
|
||||
if _, err := db.Exec(`CREATE INDEX idx_legacy_videos_category ON videos(category)`); err != nil {
|
||||
t.Fatalf("create legacy category index: %v", err)
|
||||
}
|
||||
if err := db.Close(); err != nil {
|
||||
t.Fatalf("close raw db: %v", err)
|
||||
}
|
||||
@@ -603,6 +574,45 @@ CREATE TABLE videos (
|
||||
if err := cat.db.QueryRow(`SELECT COALESCE(file_name, '') FROM videos LIMIT 1`).Scan(&fileNameDefault); err != nil && err != sql.ErrNoRows {
|
||||
t.Fatalf("query migrated file_name column: %v", err)
|
||||
}
|
||||
if fileNameDefault != "" {
|
||||
t.Fatalf("file_name default = %q, want empty", fileNameDefault)
|
||||
}
|
||||
if hasColumn(t, cat, "videos", "category") {
|
||||
t.Fatal("legacy category column was not dropped")
|
||||
}
|
||||
if indexExists(t, cat, "idx_legacy_videos_category") {
|
||||
t.Fatal("legacy category index was not dropped")
|
||||
}
|
||||
for _, index := range []string{"idx_videos_drive", "idx_videos_pub", "idx_videos_views"} {
|
||||
if !indexExists(t, cat, index) {
|
||||
t.Fatalf("base video index %s was not recreated", index)
|
||||
}
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
got, err := cat.GetVideo(ctx, "legacy-video")
|
||||
if err != nil {
|
||||
t.Fatalf("get migrated legacy video: %v", err)
|
||||
}
|
||||
if got.Title != "Legacy Video" || got.Author != "Legacy Author" || got.Views != 7 {
|
||||
t.Fatalf("migrated video lost data: %#v", got)
|
||||
}
|
||||
if !sameStrings(got.Tags, []string{"旧标签"}) {
|
||||
t.Fatalf("migrated video tags = %#v, want legacy tag preserved", got.Tags)
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: "new-video",
|
||||
DriveID: "drive",
|
||||
FileID: "file-new",
|
||||
Title: "New Video",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert after migration: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetManualVideoTagsRejectsUnknownLabels(t *testing.T) {
|
||||
@@ -706,31 +716,6 @@ func TestCreateTagAndClassifyMapsAVCodeLabelToAV(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestLooksLikeCollectionTagRejectsAVCodes(t *testing.T) {
|
||||
cases := []string{
|
||||
"DASS-499-C",
|
||||
"dass-499-c",
|
||||
"ADN-778",
|
||||
"SONE-247-C",
|
||||
"JUQ-502-UC",
|
||||
"ABF-032",
|
||||
"SSIS-233",
|
||||
"MIDA-607",
|
||||
"cc-1750027",
|
||||
"FC2-PPV-74663555",
|
||||
"ADN-778-FHD(1)",
|
||||
"ADN-778-中文字幕",
|
||||
"[44x.me]idbd-786",
|
||||
"NTRH-018_FHD_CH",
|
||||
"390JAC-233",
|
||||
}
|
||||
for _, label := range cases {
|
||||
if LooksLikeCollectionTag(label) {
|
||||
t.Fatalf("LooksLikeCollectionTag(%q) = true, want false", label)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigrateCollapsesAVCodeTagsIntoAV(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
@@ -759,7 +744,6 @@ func TestMigrateCollapsesAVCodeTagsIntoAV(t *testing.T) {
|
||||
FileID: seed.id,
|
||||
Title: seed.label + " sample",
|
||||
Tags: []string{seed.label},
|
||||
Category: seed.label,
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -804,7 +788,7 @@ func TestMigrateCollapsesAVCodeTagsIntoAV(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigrateClearsRemoteNonSpiderThumbnailURLs(t *testing.T) {
|
||||
func TestMigrateClearsRemoteThumbnailURLs(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -848,14 +832,14 @@ func TestMigrateClearsRemoteNonSpiderThumbnailURLs(t *testing.T) {
|
||||
t.Fatalf("seed pikpak: %v", err)
|
||||
}
|
||||
if err := cat.UpsertDrive(ctx, &Drive{
|
||||
ID: "spider91-main",
|
||||
Kind: "spider91",
|
||||
Name: "91Spider",
|
||||
RootID: "root",
|
||||
ID: "crawler-main",
|
||||
Kind: "scriptcrawler",
|
||||
Name: "Crawler",
|
||||
RootID: "/",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed spider91: %v", err)
|
||||
t.Fatalf("seed crawler: %v", err)
|
||||
}
|
||||
|
||||
videos := []*Video{
|
||||
@@ -895,11 +879,18 @@ func TestMigrateClearsRemoteNonSpiderThumbnailURLs(t *testing.T) {
|
||||
ThumbnailURL: "/p/thumb/p123-local-thumb-video",
|
||||
},
|
||||
{
|
||||
ID: "spider91-local-thumb-video",
|
||||
DriveID: "spider91-main",
|
||||
ID: "scriptcrawler-crawler-main-local-thumb",
|
||||
DriveID: "crawler-main",
|
||||
FileID: "file-6",
|
||||
Title: "91Spider local thumb",
|
||||
ThumbnailURL: "/p/thumb/spider91-local-thumb-video",
|
||||
Title: "Crawler local thumb",
|
||||
ThumbnailURL: "/p/thumb/scriptcrawler-crawler-main-local-thumb",
|
||||
},
|
||||
{
|
||||
ID: "scriptcrawler-crawler-main-remote-thumb",
|
||||
DriveID: "crawler-main",
|
||||
FileID: "file-7",
|
||||
Title: "Crawler remote thumb",
|
||||
ThumbnailURL: "https://example.invalid/crawler-thumb.jpg",
|
||||
},
|
||||
}
|
||||
for _, v := range videos {
|
||||
@@ -962,12 +953,20 @@ func TestMigrateClearsRemoteNonSpiderThumbnailURLs(t *testing.T) {
|
||||
t.Fatalf("p123 local thumbnail = %q, want preserved", p123Local.ThumbnailURL)
|
||||
}
|
||||
|
||||
spider91Local, err := cat.GetVideo(ctx, "spider91-local-thumb-video")
|
||||
crawlerLocal, err := cat.GetVideo(ctx, "scriptcrawler-crawler-main-local-thumb")
|
||||
if err != nil {
|
||||
t.Fatalf("get spider91 local thumb video: %v", err)
|
||||
t.Fatalf("get crawler local thumb video: %v", err)
|
||||
}
|
||||
if spider91Local.ThumbnailURL != "/p/thumb/spider91-local-thumb-video" {
|
||||
t.Fatalf("spider91 local thumbnail = %q, want preserved", spider91Local.ThumbnailURL)
|
||||
if crawlerLocal.ThumbnailURL != "/p/thumb/scriptcrawler-crawler-main-local-thumb" {
|
||||
t.Fatalf("crawler local thumbnail = %q, want preserved", crawlerLocal.ThumbnailURL)
|
||||
}
|
||||
|
||||
crawlerRemote, err := cat.GetVideo(ctx, "scriptcrawler-crawler-main-remote-thumb")
|
||||
if err != nil {
|
||||
t.Fatalf("get crawler remote thumb video: %v", err)
|
||||
}
|
||||
if crawlerRemote.ThumbnailURL != "" {
|
||||
t.Fatalf("crawler remote thumbnail = %q, want cleared", crawlerRemote.ThumbnailURL)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1113,33 +1112,33 @@ func TestTagFilterMatchesCanonicalDuplicateVideo(t *testing.T) {
|
||||
UpdatedAt: now,
|
||||
},
|
||||
{
|
||||
ID: "spider91-dup-1",
|
||||
DriveID: "91-spider",
|
||||
ID: "scriptcrawler-crawler-a-dup-1",
|
||||
DriveID: "crawler-a",
|
||||
FileID: "dup-1.mp4",
|
||||
Title: "Spider duplicate 1",
|
||||
Tags: []string{"91porn"},
|
||||
Title: "Crawler duplicate 1",
|
||||
Tags: []string{"crawler-tag"},
|
||||
Size: 1024,
|
||||
PublishedAt: now.Add(time.Second),
|
||||
CreatedAt: now.Add(time.Second),
|
||||
UpdatedAt: now.Add(time.Second),
|
||||
},
|
||||
{
|
||||
ID: "spider91-dup-2",
|
||||
DriveID: "91-spider",
|
||||
ID: "scriptcrawler-crawler-a-dup-2",
|
||||
DriveID: "crawler-a",
|
||||
FileID: "dup-2.mp4",
|
||||
Title: "Spider duplicate 2",
|
||||
Tags: []string{"91porn"},
|
||||
Title: "Crawler duplicate 2",
|
||||
Tags: []string{"crawler-tag"},
|
||||
Size: 1024,
|
||||
PublishedAt: now.Add(2 * time.Second),
|
||||
CreatedAt: now.Add(2 * time.Second),
|
||||
UpdatedAt: now.Add(2 * time.Second),
|
||||
},
|
||||
{
|
||||
ID: "spider91-visible",
|
||||
DriveID: "91-spider",
|
||||
ID: "scriptcrawler-crawler-a-visible",
|
||||
DriveID: "crawler-a",
|
||||
FileID: "visible.mp4",
|
||||
Title: "Spider visible",
|
||||
Tags: []string{"91porn"},
|
||||
Title: "Crawler visible",
|
||||
Tags: []string{"crawler-tag"},
|
||||
Size: 2048,
|
||||
PublishedAt: now.Add(3 * time.Second),
|
||||
CreatedAt: now.Add(3 * time.Second),
|
||||
@@ -1150,16 +1149,16 @@ func TestTagFilterMatchesCanonicalDuplicateVideo(t *testing.T) {
|
||||
t.Fatalf("seed %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
for _, id := range []string{"pikpak-canonical", "spider91-dup-1", "spider91-dup-2"} {
|
||||
for _, id := range []string{"pikpak-canonical", "scriptcrawler-crawler-a-dup-1", "scriptcrawler-crawler-a-dup-2"} {
|
||||
if err := cat.UpdateVideoFingerprint(ctx, id, "same-sampled-sha256", "ready", ""); err != nil {
|
||||
t.Fatalf("fingerprint %s: %v", id, err)
|
||||
}
|
||||
}
|
||||
if err := cat.UpdateVideoFingerprint(ctx, "spider91-visible", "unique-sampled-sha256", "ready", ""); err != nil {
|
||||
if err := cat.UpdateVideoFingerprint(ctx, "scriptcrawler-crawler-a-visible", "unique-sampled-sha256", "ready", ""); err != nil {
|
||||
t.Fatalf("fingerprint visible: %v", err)
|
||||
}
|
||||
|
||||
items, total, err := cat.ListVideos(ctx, ListParams{Tag: "91porn", Page: 1, PageSize: 10})
|
||||
items, total, err := cat.ListVideos(ctx, ListParams{Tag: "crawler-tag", Page: 1, PageSize: 10})
|
||||
if err != nil {
|
||||
t.Fatalf("list videos by tag: %v", err)
|
||||
}
|
||||
@@ -1170,13 +1169,13 @@ func TestTagFilterMatchesCanonicalDuplicateVideo(t *testing.T) {
|
||||
for _, item := range items {
|
||||
gotIDs[item.ID] = true
|
||||
}
|
||||
for _, want := range []string{"pikpak-canonical", "spider91-visible"} {
|
||||
for _, want := range []string{"pikpak-canonical", "scriptcrawler-crawler-a-visible"} {
|
||||
if !gotIDs[want] {
|
||||
t.Fatalf("tagged video ids = %#v, want %s", gotIDs, want)
|
||||
}
|
||||
}
|
||||
if got := mustTagByLabel(t, ctx, cat, "91porn").Count; got != 2 {
|
||||
t.Fatalf("91porn count = %d, want 2 visible canonical videos", got)
|
||||
if got := mustTagByLabel(t, ctx, cat, "crawler-tag").Count; got != 2 {
|
||||
t.Fatalf("crawler-tag count = %d, want 2 visible canonical videos", got)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1265,6 +1264,41 @@ func mustTagByLabel(t *testing.T, ctx context.Context, cat *Catalog, label strin
|
||||
return Tag{}
|
||||
}
|
||||
|
||||
func hasColumn(t *testing.T, cat *Catalog, table, column string) bool {
|
||||
t.Helper()
|
||||
rows, err := cat.db.Query(`PRAGMA table_info(` + table + `)`)
|
||||
if err != nil {
|
||||
t.Fatalf("query table info for %s: %v", table, err)
|
||||
}
|
||||
defer rows.Close()
|
||||
for rows.Next() {
|
||||
var cid int
|
||||
var name, typ string
|
||||
var notNull int
|
||||
var defaultValue any
|
||||
var pk int
|
||||
if err := rows.Scan(&cid, &name, &typ, ¬Null, &defaultValue, &pk); err != nil {
|
||||
t.Fatalf("scan table info for %s: %v", table, err)
|
||||
}
|
||||
if strings.EqualFold(name, column) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
t.Fatalf("iterate table info for %s: %v", table, err)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func indexExists(t *testing.T, cat *Catalog, name string) bool {
|
||||
t.Helper()
|
||||
var count int
|
||||
if err := cat.db.QueryRow(`SELECT COUNT(*) FROM sqlite_schema WHERE type = 'index' AND name = ?`, name).Scan(&count); err != nil {
|
||||
t.Fatalf("query index %s: %v", name, err)
|
||||
}
|
||||
return count > 0
|
||||
}
|
||||
|
||||
func videoUpdatedAtByID(t *testing.T, ctx context.Context, cat *Catalog, ids ...string) map[string]int64 {
|
||||
t.Helper()
|
||||
out := make(map[string]int64, len(ids))
|
||||
@@ -1278,9 +1312,9 @@ func videoUpdatedAtByID(t *testing.T, ctx context.Context, cat *Catalog, ids ...
|
||||
return out
|
||||
}
|
||||
|
||||
// 删除 collection 标签的最后一个引用视频后,标签应当自动从 tags 表里消失。
|
||||
// 删除旧版本 collection 标签的最后一个引用视频后,标签应当自动从 tags 表里消失。
|
||||
// user/system 标签不受影响:用户/系统标签的语义由人维护,孤儿状态保留。
|
||||
func TestDeleteVideoPrunesOrphanCollectionTag(t *testing.T) {
|
||||
func TestDeleteVideoPrunesLegacyOrphanCollectionTag(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -1299,7 +1333,6 @@ func TestDeleteVideoPrunesOrphanCollectionTag(t *testing.T) {
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: id,
|
||||
Category: "Better Call Saul S02",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -1308,20 +1341,28 @@ func TestDeleteVideoPrunesOrphanCollectionTag(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
label, ok, err := cat.EnsureCollectionTag(ctx, "Better Call Saul S02")
|
||||
if err != nil {
|
||||
t.Fatalf("ensure collection tag: %v", err)
|
||||
nowMillis := now.UnixMilli()
|
||||
if _, err := cat.db.ExecContext(ctx,
|
||||
`INSERT INTO tags (label, aliases, source, created_at, updated_at) VALUES (?, '[]', 'collection', ?, ?)`,
|
||||
"Better Call Saul S02", nowMillis, nowMillis); err != nil {
|
||||
t.Fatalf("insert legacy collection tag: %v", err)
|
||||
}
|
||||
if !ok || label != "Better Call Saul S02" {
|
||||
t.Fatalf("ensure collection tag = %q ok=%v, want collection tag created", label, ok)
|
||||
var collectionTagID int64
|
||||
if err := cat.db.QueryRowContext(ctx, `SELECT id FROM tags WHERE label = ?`, "Better Call Saul S02").Scan(&collectionTagID); err != nil {
|
||||
t.Fatalf("lookup legacy collection tag: %v", err)
|
||||
}
|
||||
for _, id := range []string{"video-a", "video-b"} {
|
||||
if _, err := cat.db.ExecContext(ctx,
|
||||
`INSERT INTO video_tags (video_id, tag_id, source, created_at) VALUES (?, ?, 'auto', ?)`,
|
||||
id, collectionTagID, nowMillis); err != nil {
|
||||
t.Fatalf("attach legacy collection tag to %s: %v", id, err)
|
||||
}
|
||||
}
|
||||
|
||||
// 用户标签:手动建出来,让它和 video-a 关联,验证 user 标签不会被孤儿清理流程误删。
|
||||
if _, err := cat.CreateTagAndClassify(ctx, "用户标签", nil, "user"); err != nil {
|
||||
t.Fatalf("create user tag: %v", err)
|
||||
}
|
||||
if err := cat.SetManualVideoTags(ctx, "video-a", []string{"用户标签"}); err != nil {
|
||||
t.Fatalf("attach user tag: %v", err)
|
||||
if _, err := cat.db.ExecContext(ctx,
|
||||
`INSERT INTO tags (label, aliases, source, created_at, updated_at) VALUES (?, '[]', 'user', ?, ?)`,
|
||||
"用户标签", nowMillis, nowMillis); err != nil {
|
||||
t.Fatalf("insert user orphan tag: %v", err)
|
||||
}
|
||||
|
||||
collectionExists := func() bool {
|
||||
@@ -1337,7 +1378,7 @@ func TestDeleteVideoPrunesOrphanCollectionTag(t *testing.T) {
|
||||
t.Fatal("collection tag missing right after creation")
|
||||
}
|
||||
|
||||
// 删第一个视频:还有 video-b 在引用 collection 标签,应保留。
|
||||
// 删第一个视频:还有 video-b 在引用旧 collection 标签,应保留。
|
||||
if err := cat.DeleteVideo(ctx, "video-a"); err != nil {
|
||||
t.Fatalf("delete video-a: %v", err)
|
||||
}
|
||||
@@ -1345,7 +1386,7 @@ func TestDeleteVideoPrunesOrphanCollectionTag(t *testing.T) {
|
||||
t.Fatal("collection tag was pruned while another video still references it")
|
||||
}
|
||||
|
||||
// 删最后一个引用视频,collection 标签应当被同步清掉。
|
||||
// 删最后一个引用视频,旧 collection 标签应当被同步清掉。
|
||||
if err := cat.DeleteVideo(ctx, "video-b"); err != nil {
|
||||
t.Fatalf("delete video-b: %v", err)
|
||||
}
|
||||
@@ -1353,7 +1394,7 @@ func TestDeleteVideoPrunesOrphanCollectionTag(t *testing.T) {
|
||||
t.Fatal("orphan collection tag was not pruned after deleting the last referencing video")
|
||||
}
|
||||
|
||||
// 用户手动建的标签即使变成孤儿(已经因为 video-a 删除而失去引用)也必须保留。
|
||||
// 用户标签即使是孤儿也必须保留。
|
||||
var userCount int
|
||||
if err := cat.db.QueryRowContext(ctx,
|
||||
`SELECT COUNT(*) FROM tags WHERE label = ? AND source = 'user'`,
|
||||
@@ -1539,6 +1580,70 @@ func TestReconcileThumbnailStatusOnce(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRequeueSkippedPreviews(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
cases := []struct {
|
||||
id string
|
||||
status string
|
||||
local string
|
||||
fileID string
|
||||
wantStatus string
|
||||
wantLocal string
|
||||
wantFileID string
|
||||
}{
|
||||
{"preview-skipped", "skipped", "/tmp/old-preview.mp4", "old-preview-file", "pending", "", ""},
|
||||
{"preview-ready", "ready", "/tmp/ready-preview.mp4", "ready-preview-file", "ready", "/tmp/ready-preview.mp4", "ready-preview-file"},
|
||||
{"preview-failed", "failed", "/tmp/failed-preview.mp4", "failed-preview-file", "failed", "/tmp/failed-preview.mp4", "failed-preview-file"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: c.id, DriveID: "d", FileID: "source-" + c.id, Title: c.id,
|
||||
PreviewStatus: c.status, PreviewLocal: c.local, PreviewFileID: c.fileID,
|
||||
PublishedAt: now, CreatedAt: now, UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed %s: %v", c.id, err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := cat.requeueSkippedPreviews(ctx); err != nil {
|
||||
t.Fatalf("requeue skipped previews: %v", err)
|
||||
}
|
||||
if err := cat.requeueSkippedPreviews(ctx); err != nil {
|
||||
t.Fatalf("second requeue skipped previews: %v", err)
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
got, err := cat.GetVideo(ctx, c.id)
|
||||
if err != nil {
|
||||
t.Fatalf("get %s: %v", c.id, err)
|
||||
}
|
||||
if got.PreviewStatus != c.wantStatus {
|
||||
t.Errorf("%s: preview status = %q, want %q", c.id, got.PreviewStatus, c.wantStatus)
|
||||
}
|
||||
if got.PreviewLocal != c.wantLocal {
|
||||
t.Errorf("%s: preview local = %q, want %q", c.id, got.PreviewLocal, c.wantLocal)
|
||||
}
|
||||
if got.PreviewFileID != c.wantFileID {
|
||||
t.Errorf("%s: preview file id = %q, want %q", c.id, got.PreviewFileID, c.wantFileID)
|
||||
}
|
||||
}
|
||||
|
||||
pending, err := cat.ListVideosByPreviewStatus(ctx, "d", "pending", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list pending previews: %v", err)
|
||||
}
|
||||
if len(pending) != 1 || pending[0].ID != "preview-skipped" {
|
||||
t.Fatalf("pending previews = %#v, want only preview-skipped", pending)
|
||||
}
|
||||
}
|
||||
|
||||
// TestUpsertVideoSyncsThumbnailStatus 验证 scanner 创建/补回视频时
|
||||
// thumbnail_status 跟随 thumbnail_url 自动设。这是历史 bug 的修复回归测试 ——
|
||||
// 之前 UpsertVideo 的 SQL 不带 thumbnail_status 列,所有新视频都依赖
|
||||
|
||||
@@ -0,0 +1,166 @@
|
||||
package catalog
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestListHiddenVideosForMigration 验证:隐藏的视频不进可见列表,
|
||||
// 但能被 ListHiddenVideos 拿到(供一次性迁移为墓碑)。
|
||||
func TestListHiddenVideosForMigration(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
for _, id := range []string{"v1", "v2", "v3"} {
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: id, DriveID: "drive", FileID: "f-" + id, Title: id,
|
||||
PublishedAt: now, CreatedAt: now, UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed %s: %v", id, err)
|
||||
}
|
||||
}
|
||||
if err := cat.HideVideo(ctx, "v2"); err != nil {
|
||||
t.Fatalf("hide v2: %v", err)
|
||||
}
|
||||
|
||||
visible, total, err := cat.ListVideos(ctx, ListParams{Page: 1, PageSize: 50})
|
||||
if err != nil {
|
||||
t.Fatalf("list visible: %v", err)
|
||||
}
|
||||
if total != 2 || len(visible) != 2 {
|
||||
t.Fatalf("visible total/len = %d/%d, want 2/2", total, len(visible))
|
||||
}
|
||||
for _, v := range visible {
|
||||
if v.ID == "v2" {
|
||||
t.Fatalf("hidden v2 leaked into visible list")
|
||||
}
|
||||
}
|
||||
|
||||
hidden, err := cat.ListHiddenVideos(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("list hidden: %v", err)
|
||||
}
|
||||
if len(hidden) != 1 || hidden[0].ID != "v2" {
|
||||
t.Fatalf("ListHiddenVideos = %v, want only v2", hidden)
|
||||
}
|
||||
|
||||
current, blacklisted, err := cat.VideoManagementCounts(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("counts: %v", err)
|
||||
}
|
||||
if current != 2 || blacklisted != 0 {
|
||||
t.Fatalf("counts = current %d blacklisted %d, want 2/0", current, blacklisted)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBlacklistListAndRemove 验证墓碑表的列出、关键字过滤和移除。
|
||||
func TestBlacklistListAndRemove(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
|
||||
now := time.Now()
|
||||
seed := []struct{ id, drive, file string }{
|
||||
{"d1", "drive", "movie-alpha.avi"},
|
||||
{"d2", "drive", "movie-beta.mp4"},
|
||||
{"d3", "archive", "clip-gamma.wmv"},
|
||||
}
|
||||
for _, s := range seed {
|
||||
if err := cat.UpsertVideo(ctx, &Video{
|
||||
ID: s.id, DriveID: s.drive, FileID: "f-" + s.id, FileName: s.file,
|
||||
Title: s.id, PublishedAt: now, CreatedAt: now, UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed %s: %v", s.id, err)
|
||||
}
|
||||
var err error
|
||||
if s.id == "d2" {
|
||||
err = cat.DeleteVideoWithTombstoneReason(ctx, s.id, DeletedVideoReasonDuplicate)
|
||||
} else {
|
||||
err = cat.DeleteVideoWithTombstone(ctx, s.id)
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("tombstone %s: %v", s.id, err)
|
||||
}
|
||||
}
|
||||
|
||||
items, total, err := cat.ListDeletedVideos(ctx, ListParams{Page: 1, PageSize: 50})
|
||||
if err != nil {
|
||||
t.Fatalf("list deleted: %v", err)
|
||||
}
|
||||
if total != 3 || len(items) != 3 {
|
||||
t.Fatalf("deleted total/len = %d/%d, want 3/3", total, len(items))
|
||||
}
|
||||
reasons := map[string]string{}
|
||||
for _, item := range items {
|
||||
reasons[item.ID] = item.Reason
|
||||
}
|
||||
if reasons["d1"] != "" || reasons["d3"] != "" {
|
||||
t.Fatalf("manual tombstone reasons = %#v, want empty", reasons)
|
||||
}
|
||||
if reasons["d2"] != DeletedVideoReasonDuplicate {
|
||||
t.Fatalf("duplicate tombstone reason = %q, want %q", reasons["d2"], DeletedVideoReasonDuplicate)
|
||||
}
|
||||
|
||||
// 关键字过滤
|
||||
filtered, ftotal, err := cat.ListDeletedVideos(ctx, ListParams{Keyword: "movie", Page: 1, PageSize: 50})
|
||||
if err != nil {
|
||||
t.Fatalf("list deleted filtered: %v", err)
|
||||
}
|
||||
if ftotal != 2 || len(filtered) != 2 {
|
||||
t.Fatalf("filtered total/len = %d/%d, want 2/2", ftotal, len(filtered))
|
||||
}
|
||||
|
||||
// 网盘过滤
|
||||
driveFiltered, driveTotal, err := cat.ListDeletedVideos(ctx, ListParams{DriveID: "archive", Page: 1, PageSize: 50})
|
||||
if err != nil {
|
||||
t.Fatalf("list deleted drive filtered: %v", err)
|
||||
}
|
||||
if driveTotal != 1 || len(driveFiltered) != 1 || driveFiltered[0].ID != "d3" {
|
||||
t.Fatalf("drive filtered = total %d items %#v, want only d3", driveTotal, driveFiltered)
|
||||
}
|
||||
|
||||
combined, combinedTotal, err := cat.ListDeletedVideos(ctx, ListParams{Keyword: "movie", DriveID: "archive", Page: 1, PageSize: 50})
|
||||
if err != nil {
|
||||
t.Fatalf("list deleted combined filtered: %v", err)
|
||||
}
|
||||
if combinedTotal != 0 || len(combined) != 0 {
|
||||
t.Fatalf("combined filtered total/len = %d/%d, want 0/0", combinedTotal, len(combined))
|
||||
}
|
||||
|
||||
// 移出黑名单
|
||||
if err := cat.RemoveDeletedVideo(ctx, "d1"); err != nil {
|
||||
t.Fatalf("remove d1: %v", err)
|
||||
}
|
||||
if deleted, err := cat.IsVideoDeleted(ctx, "d1"); err != nil || deleted {
|
||||
t.Fatalf("d1 should no longer be blacklisted (deleted=%v err=%v)", deleted, err)
|
||||
}
|
||||
_, total, err = cat.ListDeletedVideos(ctx, ListParams{Page: 1, PageSize: 50})
|
||||
if err != nil {
|
||||
t.Fatalf("list deleted after remove: %v", err)
|
||||
}
|
||||
if total != 2 {
|
||||
t.Fatalf("deleted total after remove = %d, want 2", total)
|
||||
}
|
||||
|
||||
if err := cat.RemoveDeletedVideo(ctx, "does-not-exist"); err == nil {
|
||||
t.Fatalf("remove missing id should return error")
|
||||
}
|
||||
|
||||
// counts: 删完一个还剩 2 个黑名单;可见视频已全部被墓碑删除
|
||||
current, blacklisted, err := cat.VideoManagementCounts(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("counts: %v", err)
|
||||
}
|
||||
if current != 0 || blacklisted != 2 {
|
||||
t.Fatalf("counts = current %d blacklisted %d, want 0/2", current, blacklisted)
|
||||
}
|
||||
}
|
||||
@@ -207,7 +207,7 @@ type Nightly struct {
|
||||
// 这里保留 yaml 中的静态定义,用于启动时预置盘。生产建议只在 DB 里维护。
|
||||
type Drive struct {
|
||||
ID string `yaml:"id"`
|
||||
Kind string `yaml:"kind"` // quark / p115 / p123 / pikpak / wopan / onedrive / googledrive / localstorage
|
||||
Kind string `yaml:"kind"` // quark / p115 / p123 / pikpak / wopan / guangyapan / onedrive / googledrive / localstorage
|
||||
Name string `yaml:"name"`
|
||||
RootID string `yaml:"root_id"`
|
||||
Params map[string]string `yaml:"params,omitempty"`
|
||||
|
||||
@@ -1,18 +1,16 @@
|
||||
// Package spider91migrate 周期性把 spider91 drive 下载到本地的视频
|
||||
// 上传到一个指定的目标 drive 目录(PikPak、115、123、OneDrive、Google Drive 或联通网盘),上传成功后:
|
||||
// Package crawlerupload uploads videos saved by script crawlers to a configured
|
||||
// target drive. Each crawler drive chooses its own upload target.
|
||||
//
|
||||
// - 改写 catalog 行:drive_id / file_id / content_hash 改成目标盘的;
|
||||
// 视频自身的 id 不变(仍是 spider91-<driveID>-<viewkey>),video_tags、
|
||||
// 收藏、点赞、views 等关联数据全部保留
|
||||
// - 删除本地 mp4(spider91/<id>/videos/<viewkey>.<ext>)和源 thumb
|
||||
// (spider91/<id>/thumbs/<viewkey>.jpg);公共 /p/thumb/<videoID> 副本会保留
|
||||
// 视频自身的 id 不变,video_tags、收藏、点赞、views 等关联数据全部保留
|
||||
// - 删除爬虫本地 mp4 和源 thumb;公共 /p/thumb/<videoID> 副本会保留
|
||||
//
|
||||
// 之后回放时,videoSource() 自动落到 /p/stream/<target>/<file_id>,
|
||||
// proxy 层走对应盘的直链 / 302 直连。
|
||||
//
|
||||
// 下次目标盘扫盘时,scanner 通过 (content_hash) / (file_name+size)
|
||||
// 已有的 findDuplicate 兜底逻辑,不会为同一物理文件再建一行。
|
||||
package spider91migrate
|
||||
package crawlerupload
|
||||
|
||||
import (
|
||||
"context"
|
||||
@@ -31,18 +29,18 @@ import (
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
"github.com/video-site/backend/internal/drives/googledrive"
|
||||
"github.com/video-site/backend/internal/drives/guangyapan"
|
||||
"github.com/video-site/backend/internal/drives/onedrive"
|
||||
"github.com/video-site/backend/internal/drives/p115"
|
||||
"github.com/video-site/backend/internal/drives/p123"
|
||||
"github.com/video-site/backend/internal/drives/pikpak"
|
||||
"github.com/video-site/backend/internal/drives/scriptcrawler"
|
||||
"github.com/video-site/backend/internal/drives/spider91"
|
||||
"github.com/video-site/backend/internal/drives/wopan"
|
||||
"github.com/video-site/backend/internal/mediaasset"
|
||||
)
|
||||
|
||||
// uploadTarget 是 migrator 调用目标 drive 的最小接口。任何一种"接收 spider91 上传"的
|
||||
// 网盘都要实现它;当前 PikPak、115、123、OneDrive、Google Drive 和联通网盘各自通过适配器满足。
|
||||
// uploadTarget 是 migrator 调用目标 drive 的最小接口。任何一种"接收爬虫上传"的
|
||||
// 网盘都要实现它;当前 PikPak、115、123、OneDrive、Google Drive、联通网盘和光鸭网盘各自通过适配器满足。
|
||||
//
|
||||
// 这一层抽象把"迁移调用方"和"具体盘的 SDK 协议"解耦:
|
||||
// - PikPak 走 GCID + OSS PutObject(pikpak.UploadResult)
|
||||
@@ -51,6 +49,7 @@ import (
|
||||
// - OneDrive 走 SHA1 + 小文件 PUT / 大文件 upload session
|
||||
// - Google Drive 走 MD5 + resumable upload session
|
||||
// - 联通网盘 走 SDK Upload2C,当前上游不返回内容 hash
|
||||
// - 光鸭网盘 走 OSS 分片上传,当前上游不返回内容 hash
|
||||
//
|
||||
// 各家返回值都被归一成本地的 UploadResult,并在 catalog 改写阶段统一处理。
|
||||
type uploadTarget interface {
|
||||
@@ -62,10 +61,10 @@ type uploadTarget interface {
|
||||
Rename(ctx context.Context, fileID, newName string) error
|
||||
}
|
||||
|
||||
// Spider91LocalSource is the local source interface used by the migration
|
||||
// worker. Legacy spider91.Driver and the new scriptcrawler.Driver both satisfy
|
||||
// it when they are mounted for the Spider91 built-in crawler.
|
||||
type Spider91LocalSource interface {
|
||||
// LocalSource is the local source interface used by the migration
|
||||
// worker. scriptcrawler.Driver satisfies it when mounted for a crawler that
|
||||
// keeps videos in local storage before uploading them to a target drive.
|
||||
type LocalSource interface {
|
||||
drives.Drive
|
||||
VideosDir() string
|
||||
ThumbsDir() string
|
||||
@@ -76,7 +75,7 @@ type Spider91LocalSource interface {
|
||||
// UploadResult 是 uploadTarget.UploadAndReportHash 的归一返回。
|
||||
//
|
||||
// FileID 目标盘上的新文件 ID;
|
||||
// Hash GCID(PikPak)、MD5 HEX(123 / Google Drive)或 SHA1 HEX(115 / OneDrive),写入 catalog.content_hash 用于跨盘去重;联通网盘暂为空;
|
||||
// Hash GCID(PikPak)、MD5 HEX(123 / Google Drive)或 SHA1 HEX(115 / OneDrive),写入 catalog.content_hash 用于跨盘去重;联通网盘和光鸭网盘暂为空;
|
||||
// Size 实际上传字节数。
|
||||
type UploadResult struct {
|
||||
FileID string
|
||||
@@ -93,30 +92,26 @@ type UploadProgress struct {
|
||||
TotalCount int
|
||||
}
|
||||
|
||||
const (
|
||||
spider91UploadDirName = "91 Spider"
|
||||
scriptCrawlerUploadRootDirName = "Script Crawlers"
|
||||
)
|
||||
const scriptCrawlerUploadRootDirName = "Script Crawlers"
|
||||
|
||||
type migrationPlan struct {
|
||||
source Spider91LocalSource
|
||||
row *catalog.Drive
|
||||
sourceKinds []string
|
||||
targetDriveID string
|
||||
target uploadTarget
|
||||
uploadDir string
|
||||
keepLatestN int
|
||||
requireAssetsReady bool
|
||||
legacyBackfill bool
|
||||
source LocalSource
|
||||
row *catalog.Drive
|
||||
targetDriveID string
|
||||
target uploadTarget
|
||||
uploadDir string
|
||||
keepLatestN int
|
||||
requireAssetsReady bool
|
||||
requirePreviewReady bool
|
||||
}
|
||||
|
||||
// pikpakAdapter / p115Adapter / p123Adapter / onedriveAdapter / googledriveAdapter / wopanAdapter 把具体 driver 包装成 uploadTarget。
|
||||
// pikpakAdapter / p115Adapter / p123Adapter / onedriveAdapter / googledriveAdapter / wopanAdapter / guangyapanAdapter 把具体 driver 包装成 uploadTarget。
|
||||
//
|
||||
// 之所以不让 driver 直接实现 uploadTarget:
|
||||
//
|
||||
// 1. 各 driver 的 UploadAndReportXxx 返回的是各自包内的 UploadResult 类型,
|
||||
// 直接共用同名同签名方法会引入循环依赖;
|
||||
// 2. driver 包不应该感知 spider91migrate 这一层业务定义。
|
||||
// 2. driver 包不应该感知 crawlerupload 这一层业务定义。
|
||||
type pikpakAdapter struct {
|
||||
d *pikpak.Driver
|
||||
}
|
||||
@@ -243,6 +238,27 @@ func (a *wopanAdapter) Rename(ctx context.Context, fileID, newName string) error
|
||||
return a.d.Rename(ctx, fileID, newName)
|
||||
}
|
||||
|
||||
type guangyapanAdapter struct {
|
||||
d *guangyapan.Driver
|
||||
}
|
||||
|
||||
func (a *guangyapanAdapter) ID() string { return a.d.ID() }
|
||||
func (a *guangyapanAdapter) Kind() string { return a.d.Kind() }
|
||||
func (a *guangyapanAdapter) RootID() string { return a.d.RootID() }
|
||||
func (a *guangyapanAdapter) EnsureDir(ctx context.Context, pathFromRoot string) (string, error) {
|
||||
return a.d.EnsureDir(ctx, pathFromRoot)
|
||||
}
|
||||
func (a *guangyapanAdapter) UploadAndReportHash(ctx context.Context, parentID, name string, r io.Reader, size int64) (UploadResult, error) {
|
||||
fileID, err := a.d.Upload(ctx, parentID, name, r, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
return UploadResult{FileID: fileID, Size: size}, nil
|
||||
}
|
||||
func (a *guangyapanAdapter) Rename(ctx context.Context, fileID, newName string) error {
|
||||
return a.d.Rename(ctx, fileID, newName)
|
||||
}
|
||||
|
||||
// adaptUploadTarget 把通用 drive 包装成 uploadTarget。
|
||||
// 不支持的盘 kind 返回 error;调用方静默跳过。
|
||||
func adaptUploadTarget(d drives.Drive) (uploadTarget, error) {
|
||||
@@ -259,11 +275,13 @@ func adaptUploadTarget(d drives.Drive) (uploadTarget, error) {
|
||||
return &googledriveAdapter{d: v}, nil
|
||||
case *wopan.Driver:
|
||||
return &wopanAdapter{d: v}, nil
|
||||
case *guangyapan.Driver:
|
||||
return &guangyapanAdapter{d: v}, nil
|
||||
case uploadTarget:
|
||||
// 测试或自定义实现可以直接传入;优先使用具体类型分支以拿到适配器。
|
||||
return v, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("drive %q kind=%s does not support spider91 upload", d.ID(), d.Kind())
|
||||
return nil, fmt.Errorf("drive %q kind=%s does not support crawler upload", d.ID(), d.Kind())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -274,16 +292,15 @@ type Registry interface {
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
Catalog *catalog.Catalog
|
||||
Registry Registry
|
||||
GetTargetDriveID func() string // 通常对应 App.Spider91UploadDriveID()
|
||||
Catalog *catalog.Catalog
|
||||
Registry Registry
|
||||
// Interval 已废弃 —— 旧版迁移 worker 是周期 ticker,新版只通过 nightly
|
||||
// pipeline 调用 RunOnce,不再有内置定时器。保留字段不删是为了兼容外
|
||||
// 部 yaml / 测试代码里仍传值的场景。
|
||||
Interval time.Duration
|
||||
BatchLimit int // 单轮最多迁多少个,0 时默认 50
|
||||
// KeepLatestN 是每个 spider91 drive 在本地保留的最新视频数。
|
||||
// 超过的部分中"已迁移"的会被清理;未迁移的不动。0 时默认 15;< 0 关闭清理。
|
||||
// KeepLatestN is deprecated. Script crawler uploads use 0 internally so all
|
||||
// local videos that satisfy asset requirements are eligible for upload.
|
||||
KeepLatestN int
|
||||
// CaptchaCooldown 是迁移 worker 在遇到 PikPak captcha 错误(error_code
|
||||
// 4002 / 9)后整体进入冷却的时长。冷却期间 runOnce 直接返回,不再发起任何
|
||||
@@ -375,9 +392,8 @@ func (m *Migrator) markCooldownLogged() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// Trigger 安排一次"立即跑"。多次调用会被合并成一次(channel buffer=1)。
|
||||
// RunOnce 跑一次完整迁移:列出所有 spider91 drive,对每个超过 KeepLatestN 的旧
|
||||
// 视频上传到目标 drive,事务性改写 catalog 行,删本地文件。
|
||||
// RunOnce 跑一次完整迁移:列出所有配置了 upload_drive_id 的 scriptcrawler
|
||||
// drive,把本地视频上传到目标 drive,事务性改写 catalog 行,删本地文件。
|
||||
//
|
||||
// 这是上层 nightly 流水线 Phase 3 的入口;不再有周期 ticker / Trigger 通道。
|
||||
// captcha cooldown 状态在单次 RunOnce 内仍生效(多 drive 时遇到 4002 立即停整轮);
|
||||
@@ -391,7 +407,7 @@ func (m *Migrator) RunOnce(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// runOnce 单轮:扫所有 spider91 drive,对每条还有本地文件的视频做迁移。
|
||||
// runOnce 单轮:扫所有 scriptcrawler drive,对每条还有本地文件的视频做迁移。
|
||||
//
|
||||
// 互斥保证:同一 Migrator 内不会并发跑两轮(避免重复上传)。
|
||||
func (m *Migrator) runOnce(ctx context.Context) {
|
||||
@@ -413,11 +429,11 @@ func (m *Migrator) runOnce(ctx context.Context) {
|
||||
// 结束自然恢复。避免之前每秒一条 4002 的日志雪崩。
|
||||
if active, until, resumed := m.cooldownState(); active {
|
||||
if !m.markCooldownLogged() {
|
||||
log.Printf("[spider91migrate] captcha cooldown active until %s, skipping run", until.Format(time.RFC3339))
|
||||
log.Printf("[crawlerupload] captcha cooldown active until %s, skipping run", until.Format(time.RFC3339))
|
||||
}
|
||||
return
|
||||
} else if resumed {
|
||||
log.Printf("[spider91migrate] captcha cooldown ended at %s, resuming migration", until.Format(time.RFC3339))
|
||||
log.Printf("[crawlerupload] captcha cooldown ended at %s, resuming migration", until.Format(time.RFC3339))
|
||||
}
|
||||
|
||||
plans := m.migrationPlans(ctx)
|
||||
@@ -427,54 +443,39 @@ func (m *Migrator) runOnce(ctx context.Context) {
|
||||
}
|
||||
|
||||
migrated := 0
|
||||
backfillTargets := map[string]uploadTarget{}
|
||||
for _, plan := range plans {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return
|
||||
}
|
||||
n, err := m.migrateDrive(ctx, plan)
|
||||
if err != nil {
|
||||
log.Printf("[spider91migrate] drive=%s migrate batch error: %v", plan.source.ID(), err)
|
||||
log.Printf("[crawlerupload] drive=%s migrate batch error: %v", plan.source.ID(), err)
|
||||
}
|
||||
migrated += n
|
||||
if active, _ := m.inCooldown(); active {
|
||||
if migrated > 0 {
|
||||
log.Printf("[spider91migrate] migrated %d video(s)", migrated)
|
||||
log.Printf("[crawlerupload] migrated %d video(s)", migrated)
|
||||
}
|
||||
return
|
||||
}
|
||||
if plan.legacyBackfill {
|
||||
backfillTargets[plan.targetDriveID] = plan.target
|
||||
}
|
||||
}
|
||||
if migrated > 0 {
|
||||
log.Printf("[spider91migrate] migrated %d video(s)", migrated)
|
||||
log.Printf("[crawlerupload] migrated %d video(s)", migrated)
|
||||
}
|
||||
|
||||
// 收尾:扫每个本地爬虫 drive 的 videos 目录,把 catalog 已经迁到别处但本地
|
||||
// 仍有残留的孤儿文件清掉。这是纯防御性兜底——正常路径下 migrateDrive
|
||||
// 已经在迁移成功后立刻 CleanupSpider91Local,不会留孤儿。
|
||||
// 已经在迁移成功后立刻 CleanupLocal,不会留孤儿。
|
||||
for _, plan := range plans {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return
|
||||
}
|
||||
deleted, err := m.cleanupOldLocalVideos(ctx, plan)
|
||||
if err != nil {
|
||||
log.Printf("[spider91migrate] cleanup drive=%s: %v", plan.source.ID(), err)
|
||||
log.Printf("[crawlerupload] cleanup drive=%s: %v", plan.source.ID(), err)
|
||||
}
|
||||
if deleted > 0 {
|
||||
log.Printf("[spider91migrate] cleanup drive=%s deleted %d orphan local file(s)", plan.source.ID(), deleted)
|
||||
}
|
||||
}
|
||||
|
||||
// 回填:把已迁移到 PikPak 的 spider91-* 视频里文件名仍是旧格式
|
||||
// (比如刚迁完没改、或人工导入)的统一改成方案 B 期望的格式。
|
||||
// 这一步幂等:已经是期望格式的不会再调 Rename。
|
||||
for targetDriveID, pp := range backfillTargets {
|
||||
if renamed, err := m.backfillFileNames(ctx, targetDriveID, pp); err != nil {
|
||||
log.Printf("[spider91migrate] backfill names: %v", err)
|
||||
} else if renamed > 0 {
|
||||
log.Printf("[spider91migrate] backfilled %d %s file name(s) to desired format", renamed, pp.Kind())
|
||||
log.Printf("[crawlerupload] cleanup drive=%s deleted %d orphan local file(s)", plan.source.ID(), deleted)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -493,33 +494,6 @@ func (m *Migrator) reportUploadProgress(progress UploadProgress) {
|
||||
m.cfg.OnUploadProgress(progress)
|
||||
}
|
||||
|
||||
// targetKindForLog 把当前目标盘 kind 转成对人友好的简称,用于日志。
|
||||
// 解析失败时回退 "target"。
|
||||
func (m *Migrator) targetKindForLog() string {
|
||||
if m.cfg.GetTargetDriveID == nil || m.cfg.Registry == nil {
|
||||
return "target"
|
||||
}
|
||||
id := m.cfg.GetTargetDriveID()
|
||||
if id == "" {
|
||||
return "target"
|
||||
}
|
||||
d, ok := m.cfg.Registry.Get(id)
|
||||
if !ok {
|
||||
return "target"
|
||||
}
|
||||
return d.Kind()
|
||||
}
|
||||
|
||||
// resolveTarget 返回 (target drive ID, target uploadTarget, err)。
|
||||
// 没设置、drive 找不到,或 drive 类型不支持上传时返回 err(调用方静默跳过)。
|
||||
func (m *Migrator) resolveTarget() (string, uploadTarget, error) {
|
||||
if m.cfg.GetTargetDriveID == nil {
|
||||
return "", nil, errors.New("no target getter")
|
||||
}
|
||||
id := m.cfg.GetTargetDriveID()
|
||||
return m.resolveTargetID(id)
|
||||
}
|
||||
|
||||
func (m *Migrator) resolveTargetID(id string) (string, uploadTarget, error) {
|
||||
id = strings.TrimSpace(id)
|
||||
if id == "" {
|
||||
@@ -549,73 +523,37 @@ func (m *Migrator) migrationPlans(ctx context.Context) []migrationPlan {
|
||||
if d == nil {
|
||||
continue
|
||||
}
|
||||
src, ok := d.(Spider91LocalSource)
|
||||
src, ok := d.(LocalSource)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
row, err := m.cfg.Catalog.GetDrive(ctx, d.ID())
|
||||
if (err != nil || row == nil) && d.Kind() == spider91.Kind {
|
||||
row = &catalog.Drive{ID: d.ID(), Kind: spider91.Kind, RootID: "/"}
|
||||
}
|
||||
if row == nil {
|
||||
if err != nil || row == nil || row.Kind != scriptcrawler.Kind {
|
||||
continue
|
||||
}
|
||||
switch row.Kind {
|
||||
case scriptcrawler.Kind:
|
||||
targetID := strings.TrimSpace(row.Credentials["upload_drive_id"])
|
||||
if targetID == "" {
|
||||
continue
|
||||
}
|
||||
resolvedID, target, err := m.resolveTargetID(targetID)
|
||||
if err != nil {
|
||||
log.Printf("[spider91migrate] crawler=%s upload target=%q unavailable: %v", row.ID, targetID, err)
|
||||
continue
|
||||
}
|
||||
out = append(out, migrationPlan{
|
||||
source: src,
|
||||
row: row,
|
||||
sourceKinds: crawlerSourceKindsForRow(row),
|
||||
targetDriveID: resolvedID,
|
||||
target: target,
|
||||
uploadDir: scriptCrawlerUploadDir(row.ID),
|
||||
keepLatestN: 0,
|
||||
requireAssetsReady: true,
|
||||
})
|
||||
case spider91.Kind:
|
||||
if m.cfg.GetTargetDriveID == nil {
|
||||
continue
|
||||
}
|
||||
targetID := strings.TrimSpace(m.cfg.GetTargetDriveID())
|
||||
if targetID == "" {
|
||||
continue
|
||||
}
|
||||
resolvedID, target, err := m.resolveTargetID(targetID)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
out = append(out, migrationPlan{
|
||||
source: src,
|
||||
row: row,
|
||||
sourceKinds: []string{spider91.Kind},
|
||||
targetDriveID: resolvedID,
|
||||
target: target,
|
||||
uploadDir: spider91UploadDirName,
|
||||
keepLatestN: m.cfg.KeepLatestN,
|
||||
legacyBackfill: true,
|
||||
})
|
||||
targetID := strings.TrimSpace(row.Credentials["upload_drive_id"])
|
||||
if targetID == "" {
|
||||
continue
|
||||
}
|
||||
resolvedID, target, err := m.resolveTargetID(targetID)
|
||||
if err != nil {
|
||||
log.Printf("[crawlerupload] crawler=%s upload target=%q unavailable: %v", row.ID, targetID, err)
|
||||
continue
|
||||
}
|
||||
out = append(out, migrationPlan{
|
||||
source: src,
|
||||
row: row,
|
||||
targetDriveID: resolvedID,
|
||||
target: target,
|
||||
uploadDir: scriptCrawlerUploadDir(row.ID),
|
||||
keepLatestN: 0,
|
||||
requireAssetsReady: true,
|
||||
requirePreviewReady: row.TeaserEnabled,
|
||||
})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func crawlerSourceKindsForRow(d *catalog.Drive) []string {
|
||||
kinds := []string{scriptcrawler.Kind}
|
||||
if d != nil && strings.EqualFold(strings.TrimSpace(d.Credentials["builtin"]), spider91.Kind) {
|
||||
kinds = append(kinds, spider91.Kind)
|
||||
}
|
||||
return kinds
|
||||
}
|
||||
|
||||
func scriptCrawlerUploadDir(driveID string) string {
|
||||
driveID = sanitizeUploadDirSegment(driveID)
|
||||
if driveID == "" {
|
||||
@@ -633,41 +571,6 @@ func sanitizeUploadDirSegment(raw string) string {
|
||||
return clean
|
||||
}
|
||||
|
||||
// spider91Drives 返回当前注册的所有 Spider91 来源本地爬虫 driver。
|
||||
func (m *Migrator) spider91Drives(ctx context.Context) []Spider91LocalSource {
|
||||
all := m.cfg.Registry.All()
|
||||
out := make([]Spider91LocalSource, 0, len(all))
|
||||
for _, d := range all {
|
||||
if !m.isSpider91SourceDrive(ctx, d) {
|
||||
continue
|
||||
}
|
||||
if sd, ok := d.(Spider91LocalSource); ok {
|
||||
out = append(out, sd)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (m *Migrator) isSpider91SourceDrive(ctx context.Context, d drives.Drive) bool {
|
||||
if d == nil {
|
||||
return false
|
||||
}
|
||||
if d.Kind() == spider91.Kind {
|
||||
return true
|
||||
}
|
||||
if d.Kind() != scriptcrawler.Kind || m.cfg.Catalog == nil {
|
||||
return false
|
||||
}
|
||||
row, err := m.cfg.Catalog.GetDrive(ctx, d.ID())
|
||||
if err != nil || row == nil {
|
||||
return false
|
||||
}
|
||||
if row.Kind == spider91.Kind {
|
||||
return true
|
||||
}
|
||||
return row.Kind == scriptcrawler.Kind && strings.EqualFold(strings.TrimSpace(row.Credentials["builtin"]), spider91.Kind)
|
||||
}
|
||||
|
||||
// migrateDrive 对单个本地爬虫 drive 跑一批迁移;返回成功迁移的条数。
|
||||
func (m *Migrator) migrateDrive(ctx context.Context, plan migrationPlan) (int, error) {
|
||||
src := plan.source
|
||||
@@ -773,7 +676,7 @@ func (m *Migrator) migrateDrive(ctx context.Context, plan migrationPlan) (int, e
|
||||
})
|
||||
|
||||
if v.DriveID != src.ID() {
|
||||
CleanupSpider91Local(src, f.name)
|
||||
CleanupLocal(src, f.name)
|
||||
processed++
|
||||
m.reportUploadProgress(UploadProgress{
|
||||
DriveID: src.ID(),
|
||||
@@ -787,12 +690,12 @@ func (m *Migrator) migrateDrive(ctx context.Context, plan migrationPlan) (int, e
|
||||
|
||||
if targetDuplicate, err := m.cfg.Catalog.FindEquivalentVideoOnDrive(ctx, v, plan.targetDriveID); err != nil {
|
||||
if !errors.Is(err, sql.ErrNoRows) {
|
||||
log.Printf("[spider91migrate] %s find target duplicate: %v", v.ID, err)
|
||||
log.Printf("[crawlerupload] %s find target duplicate: %v", v.ID, err)
|
||||
}
|
||||
} else if targetDuplicate != nil {
|
||||
ok, err := m.bindToExistingTarget(ctx, v, targetDuplicate, plan)
|
||||
if err != nil {
|
||||
log.Printf("[spider91migrate] %s: %v", v.ID, err)
|
||||
log.Printf("[crawlerupload] %s: %v", v.ID, err)
|
||||
continue
|
||||
}
|
||||
if ok {
|
||||
@@ -813,9 +716,9 @@ func (m *Migrator) migrateDrive(ctx context.Context, plan migrationPlan) (int, e
|
||||
}
|
||||
|
||||
if plan.requireAssetsReady {
|
||||
ready, err := m.crawlerVideoAssetsReady(ctx, v)
|
||||
ready, err := m.crawlerVideoAssetsReady(ctx, v, plan.requirePreviewReady)
|
||||
if err != nil {
|
||||
log.Printf("[spider91migrate] %s check generated assets: %v", v.ID, err)
|
||||
log.Printf("[crawlerupload] %s check generated assets: %v", v.ID, err)
|
||||
continue
|
||||
}
|
||||
if !ready {
|
||||
@@ -833,14 +736,14 @@ func (m *Migrator) migrateDrive(ctx context.Context, plan migrationPlan) (int, e
|
||||
|
||||
ok, err := m.migrateOne(ctx, v, plan)
|
||||
if err != nil {
|
||||
log.Printf("[spider91migrate] %s: %v", v.ID, err)
|
||||
log.Printf("[crawlerupload] %s: %v", v.ID, err)
|
||||
// captcha 错误(4002 / 9)说明 PikPak 当前正拒绝我们;继续在
|
||||
// 同一轮里尝试其它文件大概率会拿到同样的 4002,并且每多一次
|
||||
// 失败就多一份"被风控加深"的风险。立即中止当前 batch 并
|
||||
// 打开冷却窗口,等 cfg.CaptchaCooldown 之后再重试。
|
||||
if pikpak.IsCaptchaError(err) {
|
||||
until := m.setCooldown()
|
||||
log.Printf("[spider91migrate] drive=%s captcha-blocked, cooling down until %s", src.ID(), until.Format(time.RFC3339))
|
||||
log.Printf("[crawlerupload] drive=%s captcha-blocked, cooling down until %s", src.ID(), until.Format(time.RFC3339))
|
||||
return migrated, nil
|
||||
}
|
||||
continue
|
||||
@@ -879,17 +782,15 @@ func (m *Migrator) findVideoForLocalFile(ctx context.Context, plan migrationPlan
|
||||
if plan.source != nil {
|
||||
driveID = plan.source.ID()
|
||||
}
|
||||
for _, kind := range plan.sourceKinds {
|
||||
id := scriptcrawler.BuildVideoIDForKind(kind, driveID, sourceID)
|
||||
v, err := m.cfg.Catalog.GetVideo(ctx, id)
|
||||
if err == nil && v != nil {
|
||||
return v
|
||||
}
|
||||
id := scriptcrawler.BuildVideoID(driveID, sourceID)
|
||||
v, err := m.cfg.Catalog.GetVideo(ctx, id)
|
||||
if err == nil && v != nil {
|
||||
return v
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Migrator) crawlerVideoAssetsReady(ctx context.Context, v *catalog.Video) (bool, error) {
|
||||
func (m *Migrator) crawlerVideoAssetsReady(ctx context.Context, v *catalog.Video, requirePreview bool) (bool, error) {
|
||||
if v == nil {
|
||||
return false, nil
|
||||
}
|
||||
@@ -897,6 +798,9 @@ func (m *Migrator) crawlerVideoAssetsReady(ctx context.Context, v *catalog.Video
|
||||
if !fingerprintReady {
|
||||
return false, nil
|
||||
}
|
||||
if !requirePreview {
|
||||
return true, nil
|
||||
}
|
||||
if strings.EqualFold(strings.TrimSpace(v.PreviewStatus), "ready") {
|
||||
return true, nil
|
||||
}
|
||||
@@ -916,8 +820,8 @@ func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, plan migrat
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
// 本地文件被人手动删了,但 catalog 还显示 spider91 drive;
|
||||
// 这种状态没法迁移。跳过即可(保留行让管理员可见,避免数据丢失)。
|
||||
// 本地文件被人手动删了,但 catalog 还指向该爬虫;
|
||||
// 这种状态没法上传。跳过即可(保留行让管理员可见,避免数据丢失)。
|
||||
return false, nil
|
||||
}
|
||||
return false, fmt.Errorf("stat local: %w", err)
|
||||
@@ -936,7 +840,7 @@ func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, plan migrat
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("%s ensure %q dir: %w", pp.Kind(), plan.uploadDir, err)
|
||||
}
|
||||
uploadName := desiredPikPakName(v.Title, sourceIDForUploadName(v, plan), v.Ext)
|
||||
uploadName := desiredUploadName(v.Title, sourceIDForUploadName(v, plan), v.Ext)
|
||||
res, err := pp.UploadAndReportHash(ctx, parent, uploadName, f, info.Size())
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("%s upload: %w", pp.Kind(), err)
|
||||
@@ -952,13 +856,13 @@ func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, plan migrat
|
||||
m.preserveCrawledThumbnail(ctx, src, v)
|
||||
// 同步 catalog 里的 file_name,让下次目标盘扫盘时 (file_name, size) 也能匹配上
|
||||
if err := m.cfg.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{FileName: uploadName}); err != nil {
|
||||
log.Printf("[spider91migrate] %s update file_name after migrate: %v", v.ID, err)
|
||||
log.Printf("[crawlerupload] %s update file_name after migrate: %v", v.ID, err)
|
||||
}
|
||||
|
||||
// 删除本地 mp4 和源 thumb(公共 /p/thumb 副本已在 preserveCrawledThumbnail 中保留)。
|
||||
CleanupSpider91Local(src, v.FileID)
|
||||
CleanupLocal(src, v.FileID)
|
||||
|
||||
log.Printf("[spider91migrate] %s migrated to drive=%s(kind=%s) file=%s name=%q", v.ID, plan.targetDriveID, pp.Kind(), res.FileID, uploadName)
|
||||
log.Printf("[crawlerupload] %s migrated to drive=%s(kind=%s) file=%s name=%q", v.ID, plan.targetDriveID, pp.Kind(), res.FileID, uploadName)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
@@ -974,12 +878,12 @@ func (m *Migrator) bindToExistingTarget(ctx context.Context, v, target *catalog.
|
||||
}
|
||||
if target.FileName != "" {
|
||||
if err := m.cfg.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{FileName: target.FileName}); err != nil {
|
||||
log.Printf("[spider91migrate] %s update file_name after duplicate bind: %v", v.ID, err)
|
||||
log.Printf("[crawlerupload] %s update file_name after duplicate bind: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
m.preserveCrawledThumbnail(ctx, plan.source, v)
|
||||
CleanupSpider91Local(plan.source, v.FileID)
|
||||
log.Printf("[spider91migrate] %s bound to existing drive=%s(kind=%s) file=%s duplicate=%s", v.ID, plan.targetDriveID, plan.target.Kind(), target.FileID, target.ID)
|
||||
CleanupLocal(plan.source, v.FileID)
|
||||
log.Printf("[crawlerupload] %s bound to existing drive=%s(kind=%s) file=%s duplicate=%s", v.ID, plan.targetDriveID, plan.target.Kind(), target.FileID, target.ID)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
@@ -996,22 +900,17 @@ func sourceIDForUploadName(v *catalog.Video, plan migrationPlan) string {
|
||||
if v == nil {
|
||||
return ""
|
||||
}
|
||||
if plan.legacyBackfill {
|
||||
return extractViewKey(v.ID)
|
||||
}
|
||||
for _, kind := range plan.sourceKinds {
|
||||
prefix := kind + "-" + plan.source.ID() + "-"
|
||||
if strings.HasPrefix(v.ID, prefix) {
|
||||
return strings.TrimPrefix(v.ID, prefix)
|
||||
}
|
||||
prefix := scriptcrawler.Kind + "-" + plan.source.ID() + "-"
|
||||
if strings.HasPrefix(v.ID, prefix) {
|
||||
return strings.TrimPrefix(v.ID, prefix)
|
||||
}
|
||||
if v.FileID != "" {
|
||||
return stripExt(v.FileID)
|
||||
}
|
||||
return extractViewKey(v.ID)
|
||||
return extractSourceID(v.ID)
|
||||
}
|
||||
|
||||
func (m *Migrator) preserveCrawledThumbnail(ctx context.Context, src Spider91LocalSource, v *catalog.Video) {
|
||||
func (m *Migrator) preserveCrawledThumbnail(ctx context.Context, src LocalSource, v *catalog.Video) {
|
||||
if m == nil || m.cfg.Catalog == nil || src == nil || v == nil || v.ID == "" || v.FileID == "" {
|
||||
return
|
||||
}
|
||||
@@ -1019,38 +918,38 @@ func (m *Migrator) preserveCrawledThumbnail(ctx context.Context, src Spider91Loc
|
||||
if commonDir == "" {
|
||||
return
|
||||
}
|
||||
thumbPath, ok := findSpider91ThumbPath(src, v.FileID)
|
||||
thumbPath, ok := findCrawlerThumbPath(src, v.FileID)
|
||||
if !ok {
|
||||
if v.ThumbnailURL == "" {
|
||||
log.Printf("[spider91migrate] %s crawled thumbnail missing before migration cleanup", v.ID)
|
||||
log.Printf("[crawlerupload] %s crawled thumbnail missing before migration cleanup", v.ID)
|
||||
}
|
||||
return
|
||||
}
|
||||
if err := os.MkdirAll(commonDir, 0o755); err != nil {
|
||||
log.Printf("[spider91migrate] %s mkdir common thumbs: %v", v.ID, err)
|
||||
log.Printf("[crawlerupload] %s mkdir common thumbs: %v", v.ID, err)
|
||||
return
|
||||
}
|
||||
dst := mediaasset.ThumbnailPathInDir(commonDir, v.ID)
|
||||
if _, err := os.Stat(dst); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
log.Printf("[spider91migrate] %s stat common thumb: %v", v.ID, err)
|
||||
log.Printf("[crawlerupload] %s stat common thumb: %v", v.ID, err)
|
||||
return
|
||||
}
|
||||
if err := copyFileAtomic(thumbPath, dst); err != nil {
|
||||
log.Printf("[spider91migrate] %s preserve crawled thumbnail: %v", v.ID, err)
|
||||
log.Printf("[crawlerupload] %s preserve crawled thumbnail: %v", v.ID, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
if err := m.cfg.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{
|
||||
ThumbnailURL: "/p/thumb/" + v.ID,
|
||||
}); err != nil {
|
||||
log.Printf("[spider91migrate] %s update crawled thumbnail url: %v", v.ID, err)
|
||||
log.Printf("[crawlerupload] %s update crawled thumbnail url: %v", v.ID, err)
|
||||
return
|
||||
}
|
||||
v.ThumbnailURL = "/p/thumb/" + v.ID
|
||||
}
|
||||
|
||||
func findSpider91ThumbPath(src Spider91LocalSource, fileID string) (string, bool) {
|
||||
func findCrawlerThumbPath(src LocalSource, fileID string) (string, bool) {
|
||||
thumbBase := stripExt(fileID)
|
||||
for _, ext := range []string{".jpg", ".jpeg", ".png", ".webp"} {
|
||||
thumbPath, err := src.ThumbPath(thumbBase + ext)
|
||||
@@ -1090,20 +989,19 @@ func copyFileAtomic(src, dst string) error {
|
||||
return os.Rename(tmp, dst)
|
||||
}
|
||||
|
||||
// CleanupSpider91Local 删除已迁移视频的本地 mp4 和 thumb。
|
||||
// CleanupLocal 删除已上传视频的本地 mp4 和 thumb。
|
||||
//
|
||||
// thumb 删除是 best-effort —— 找不到就算了(spider91 thumb 文件名带后缀,
|
||||
// 我们不知道具体是 .jpg 还是别的,逐个尝试常见后缀)。
|
||||
// thumb 删除是 best-effort —— 找不到就算了;逐个尝试常见后缀。
|
||||
//
|
||||
// 暴露成包级函数方便 cleanup 模块复用(任务 6)。
|
||||
func CleanupSpider91Local(src Spider91LocalSource, fileID string) {
|
||||
// 暴露成包级函数方便 cleanup 模块复用。
|
||||
func CleanupLocal(src LocalSource, fileID string) {
|
||||
videoPath, err := src.VideoPath(fileID)
|
||||
if err == nil {
|
||||
if err := os.Remove(videoPath); err != nil && !os.IsNotExist(err) {
|
||||
log.Printf("[spider91migrate] remove local mp4 %s: %v", videoPath, err)
|
||||
log.Printf("[crawlerupload] remove local mp4 %s: %v", videoPath, err)
|
||||
}
|
||||
}
|
||||
// thumb 文件名是 <viewkey>.<ext>;fileID 是 <viewkey>.<videoExt>,
|
||||
// thumb 文件名是 <sourceID>.<ext>;fileID 是 <sourceID>.<videoExt>,
|
||||
// 不一定相同。尝试用 fileID 去掉视频扩展名后拼 thumb 常见后缀。
|
||||
thumbBase := stripExt(fileID)
|
||||
for _, ext := range []string{".jpg", ".jpeg", ".png", ".webp"} {
|
||||
@@ -1120,7 +1018,7 @@ func stripExt(name string) string {
|
||||
return name[:len(name)-len(ext)]
|
||||
}
|
||||
|
||||
// cleanupOldLocalVideos 是防御性兜底:扫 spider91 drive 本地 videos/ 目录,
|
||||
// cleanupOldLocalVideos 是防御性兜底:扫爬虫本地 videos/ 目录,
|
||||
// 删除所有 catalog 中已经迁移到别处(drive_id != src.ID())的本地残留。
|
||||
//
|
||||
// 与 migrateDrive 的区别:
|
||||
@@ -1128,7 +1026,7 @@ func stripExt(name string) string {
|
||||
// - 不依赖 KeepLatestN —— 哪怕这个孤儿在"最新 N"窗口内,已迁移就该删
|
||||
// - 只看 catalog 状态,不看 mtime
|
||||
//
|
||||
// 正常路径下 migrateDrive 迁移成功后立刻 CleanupSpider91Local,所以这里
|
||||
// 正常路径下 migrateDrive 迁移成功后立刻 CleanupLocal,所以这里
|
||||
// 应该不会有任何工作。极端情况(手工改 catalog、迁移过程中 crash)才会
|
||||
// 找到孤儿。
|
||||
//
|
||||
@@ -1166,7 +1064,7 @@ func (m *Migrator) cleanupOldLocalVideos(ctx context.Context, plan migrationPlan
|
||||
continue
|
||||
}
|
||||
if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
|
||||
log.Printf("[spider91migrate] cleanup remove %s: %v", path, err)
|
||||
log.Printf("[crawlerupload] cleanup remove %s: %v", path, err)
|
||||
continue
|
||||
}
|
||||
// thumb 一并删(best-effort)
|
||||
@@ -1182,44 +1080,3 @@ func (m *Migrator) cleanupOldLocalVideos(ctx context.Context, plan migrationPlan
|
||||
}
|
||||
return deleted, nil
|
||||
}
|
||||
|
||||
// backfillFileNames 扫描目标 drive(PikPak、115、123、OneDrive、Google Drive 或联通网盘)下所有 spider91-* 起始 ID 的视频,
|
||||
// 对文件名不是 desiredPikPakName(...) 期望格式的,调 target.Rename 修正,
|
||||
// 并把 catalog.file_name 同步到新名字。
|
||||
//
|
||||
// 幂等:已经是期望格式的视频不会触发任何调用。
|
||||
//
|
||||
// 返回成功改名的条数。
|
||||
func (m *Migrator) backfillFileNames(ctx context.Context, targetDriveID string, pp uploadTarget) (int, error) {
|
||||
videos, err := m.cfg.Catalog.ListVideosByDriveID(ctx, targetDriveID, 10000)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("list videos: %w", err)
|
||||
}
|
||||
renamed := 0
|
||||
for _, v := range videos {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return renamed, err
|
||||
}
|
||||
if !strings.HasPrefix(v.ID, "spider91-") {
|
||||
continue
|
||||
}
|
||||
want := desiredPikPakName(v.Title, extractViewKey(v.ID), v.Ext)
|
||||
if v.FileName == want {
|
||||
continue
|
||||
}
|
||||
if v.FileID == "" {
|
||||
continue
|
||||
}
|
||||
if err := pp.Rename(ctx, v.FileID, want); err != nil {
|
||||
log.Printf("[spider91migrate] rename %s -> %q: %v", v.ID, want, err)
|
||||
continue
|
||||
}
|
||||
if err := m.cfg.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{FileName: want}); err != nil {
|
||||
log.Printf("[spider91migrate] %s update file_name after rename: %v", v.ID, err)
|
||||
// 目标盘已经改名成功,但 catalog 更新失败 —— 下轮会重试。继续。
|
||||
}
|
||||
log.Printf("[spider91migrate] renamed %s on %s: %q -> %q", v.ID, pp.Kind(), v.FileName, want)
|
||||
renamed++
|
||||
}
|
||||
return renamed, nil
|
||||
}
|
||||
@@ -0,0 +1,280 @@
|
||||
package crawlerupload
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
"github.com/video-site/backend/internal/drives/scriptcrawler"
|
||||
)
|
||||
|
||||
type fakeRegistry struct {
|
||||
byID map[string]drives.Drive
|
||||
}
|
||||
|
||||
func newFakeRegistry() *fakeRegistry {
|
||||
return &fakeRegistry{byID: make(map[string]drives.Drive)}
|
||||
}
|
||||
|
||||
func (r *fakeRegistry) Add(d drives.Drive) {
|
||||
r.byID[d.ID()] = d
|
||||
}
|
||||
|
||||
func (r *fakeRegistry) Get(id string) (drives.Drive, bool) {
|
||||
d, ok := r.byID[id]
|
||||
return d, ok
|
||||
}
|
||||
|
||||
func (r *fakeRegistry) All() []drives.Drive {
|
||||
out := make([]drives.Drive, 0, len(r.byID))
|
||||
for _, d := range r.byID {
|
||||
out = append(out, d)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
type fakeUploadDrive struct {
|
||||
id string
|
||||
kind string
|
||||
rootID string
|
||||
mu sync.Mutex
|
||||
uploadCalls int
|
||||
gotBodies map[string][]byte
|
||||
gotParents map[string]string
|
||||
ensureCalls []string
|
||||
}
|
||||
|
||||
func newFakeUploadDrive(id, kind, rootID string) *fakeUploadDrive {
|
||||
return &fakeUploadDrive{
|
||||
id: id,
|
||||
kind: kind,
|
||||
rootID: rootID,
|
||||
gotBodies: make(map[string][]byte),
|
||||
gotParents: make(map[string]string),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *fakeUploadDrive) Kind() string { return d.kind }
|
||||
func (d *fakeUploadDrive) ID() string { return d.id }
|
||||
func (d *fakeUploadDrive) RootID() string {
|
||||
return d.rootID
|
||||
}
|
||||
func (d *fakeUploadDrive) Init(context.Context) error { return nil }
|
||||
func (d *fakeUploadDrive) List(context.Context, string) ([]drives.Entry, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (d *fakeUploadDrive) Stat(context.Context, string) (*drives.Entry, error) {
|
||||
return nil, drives.ErrNotSupported
|
||||
}
|
||||
func (d *fakeUploadDrive) StreamURL(context.Context, string) (*drives.StreamLink, error) {
|
||||
return nil, drives.ErrNotSupported
|
||||
}
|
||||
func (d *fakeUploadDrive) Upload(context.Context, string, string, io.Reader, int64) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
func (d *fakeUploadDrive) EnsureDir(_ context.Context, pathFromRoot string) (string, error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
d.ensureCalls = append(d.ensureCalls, pathFromRoot)
|
||||
return d.rootID + "/" + pathFromRoot, nil
|
||||
}
|
||||
func (d *fakeUploadDrive) Rename(context.Context, string, string) error {
|
||||
return nil
|
||||
}
|
||||
func (d *fakeUploadDrive) UploadAndReportHash(_ context.Context, parentID, name string, r io.Reader, _ int64) (UploadResult, error) {
|
||||
body, _ := io.ReadAll(r)
|
||||
d.mu.Lock()
|
||||
d.uploadCalls++
|
||||
d.gotBodies[name] = body
|
||||
d.gotParents[name] = parentID
|
||||
d.mu.Unlock()
|
||||
return UploadResult{FileID: "remote-" + name, Hash: strings.Repeat("a", 40), Size: int64(len(body))}, nil
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*fakeUploadDrive)(nil)
|
||||
var _ uploadTarget = (*fakeUploadDrive)(nil)
|
||||
|
||||
func TestRunOnceUploadsScriptCrawlerLocalVideo(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat := setupCatalog(t)
|
||||
src := setupScriptCrawler(t, "crawler-one")
|
||||
target := newFakeUploadDrive("target-drive", "pikpak", "target-root")
|
||||
reg := newFakeRegistry()
|
||||
reg.Add(src)
|
||||
reg.Add(target)
|
||||
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: src.ID(),
|
||||
Kind: scriptcrawler.Kind,
|
||||
Name: "Example Crawler",
|
||||
RootID: "/",
|
||||
Credentials: map[string]string{"script_path": "/tmp/example.py", "upload_drive_id": target.ID()},
|
||||
TeaserEnabled: true,
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert crawler drive: %v", err)
|
||||
}
|
||||
|
||||
videoID := writeCrawlerVideo(t, cat, src, "source-001", ".mp4", []byte("video payload"), true)
|
||||
commonThumbDir := filepath.Join(t.TempDir(), "thumbs")
|
||||
m := New(Config{Catalog: cat, Registry: reg, CommonThumbDir: commonThumbDir})
|
||||
|
||||
if err := m.RunOnce(ctx); err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
|
||||
wantName := desiredUploadName("Sample source-001", "source-001", "mp4")
|
||||
if target.uploadCalls != 1 {
|
||||
t.Fatalf("upload calls = %d, want 1", target.uploadCalls)
|
||||
}
|
||||
if got := string(target.gotBodies[wantName]); got != "video payload" {
|
||||
t.Fatalf("uploaded body = %q, want payload", got)
|
||||
}
|
||||
if got := target.gotParents[wantName]; got != "target-root/Script Crawlers/crawler-one" {
|
||||
t.Fatalf("upload parent = %q, want crawler folder", got)
|
||||
}
|
||||
if len(target.ensureCalls) != 1 || target.ensureCalls[0] != "Script Crawlers/crawler-one" {
|
||||
t.Fatalf("ensure calls = %#v, want crawler upload folder", target.ensureCalls)
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(ctx, videoID)
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.DriveID != target.ID() || !strings.HasPrefix(got.FileID, "remote-") {
|
||||
t.Fatalf("catalog target = drive %q file %q, want target drive", got.DriveID, got.FileID)
|
||||
}
|
||||
if got.FileName != wantName {
|
||||
t.Fatalf("file_name = %q, want %q", got.FileName, wantName)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(src.VideosDir(), "source-001.mp4")); !os.IsNotExist(err) {
|
||||
t.Fatalf("local video still exists or stat failed: %v", err)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(src.ThumbsDir(), "source-001.jpg")); !os.IsNotExist(err) {
|
||||
t.Fatalf("local thumb still exists or stat failed: %v", err)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(commonThumbDir, videoID+".jpg")); err != nil {
|
||||
t.Fatalf("common thumbnail missing: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunOnceRequiresPerCrawlerUploadTarget(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat := setupCatalog(t)
|
||||
src := setupScriptCrawler(t, "crawler-local-only")
|
||||
target := newFakeUploadDrive("target-drive", "pikpak", "target-root")
|
||||
reg := newFakeRegistry()
|
||||
reg.Add(src)
|
||||
reg.Add(target)
|
||||
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: src.ID(),
|
||||
Kind: scriptcrawler.Kind,
|
||||
Name: "Local Only",
|
||||
RootID: "/",
|
||||
Credentials: map[string]string{"script_path": "/tmp/example.py"},
|
||||
TeaserEnabled: true,
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert crawler drive: %v", err)
|
||||
}
|
||||
videoID := writeCrawlerVideo(t, cat, src, "source-002", ".mp4", []byte("video payload"), true)
|
||||
|
||||
m := New(Config{Catalog: cat, Registry: reg})
|
||||
if err := m.RunOnce(ctx); err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if target.uploadCalls != 0 {
|
||||
t.Fatalf("upload calls = %d, want 0", target.uploadCalls)
|
||||
}
|
||||
got, err := cat.GetVideo(ctx, videoID)
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.DriveID != src.ID() {
|
||||
t.Fatalf("drive_id = %q, want local crawler drive", got.DriveID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdaptUploadTargetRejectsUnsupportedTarget(t *testing.T) {
|
||||
src := scriptcrawler.New(scriptcrawler.Config{ID: "crawler", RootDir: t.TempDir()})
|
||||
_, err := adaptUploadTarget(src)
|
||||
if err == nil || !strings.Contains(err.Error(), "does not support crawler upload") {
|
||||
t.Fatalf("err = %v, want unsupported crawler upload target", err)
|
||||
}
|
||||
}
|
||||
|
||||
func setupCatalog(t *testing.T) *catalog.Catalog {
|
||||
t.Helper()
|
||||
cat, err := catalog.Open(filepath.Join(t.TempDir(), "video-site.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cat.Close() })
|
||||
return cat
|
||||
}
|
||||
|
||||
func setupScriptCrawler(t *testing.T, id string) *scriptcrawler.Driver {
|
||||
t.Helper()
|
||||
d := scriptcrawler.New(scriptcrawler.Config{ID: id, RootDir: t.TempDir()})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("scriptcrawler init: %v", err)
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func writeCrawlerVideo(t *testing.T, cat *catalog.Catalog, d *scriptcrawler.Driver, sourceID, ext string, content []byte, readyAssets bool) string {
|
||||
t.Helper()
|
||||
ctx := context.Background()
|
||||
fileID := sourceID + ext
|
||||
videoPath, err := d.VideoPath(fileID)
|
||||
if err != nil {
|
||||
t.Fatalf("video path: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(videoPath, content, 0o644); err != nil {
|
||||
t.Fatalf("write video: %v", err)
|
||||
}
|
||||
thumbPath, err := d.ThumbPath(sourceID + ".jpg")
|
||||
if err != nil {
|
||||
t.Fatalf("thumb path: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(thumbPath, []byte("thumb"), 0o644); err != nil {
|
||||
t.Fatalf("write thumb: %v", err)
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
videoID := scriptcrawler.BuildVideoID(d.ID(), sourceID)
|
||||
previewStatus := "pending"
|
||||
fingerprintStatus := "pending"
|
||||
sampled := ""
|
||||
if readyAssets {
|
||||
previewStatus = "ready"
|
||||
fingerprintStatus = "ready"
|
||||
sampled = strings.Repeat("b", 64)
|
||||
}
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: videoID,
|
||||
DriveID: d.ID(),
|
||||
FileID: fileID,
|
||||
FileName: fileID,
|
||||
Title: "Sample " + sourceID,
|
||||
Author: "tester",
|
||||
Ext: strings.TrimPrefix(ext, "."),
|
||||
Quality: "HD",
|
||||
Size: int64(len(content)),
|
||||
PreviewStatus: previewStatus,
|
||||
FingerprintStatus: fingerprintStatus,
|
||||
SampledSHA256: sampled,
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert video: %v", err)
|
||||
}
|
||||
return videoID
|
||||
}
|
||||
@@ -1,13 +1,13 @@
|
||||
package spider91migrate
|
||||
package crawlerupload
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// 期望的 PikPak 文件名格式(方案 B):
|
||||
// 期望的上传文件名格式:
|
||||
//
|
||||
// <sanitized-title>-<viewkey-后8位>.<ext>
|
||||
// <sanitized-title>-<sourceID-后8位>.<ext>
|
||||
//
|
||||
// 例如:
|
||||
//
|
||||
@@ -15,8 +15,8 @@ import (
|
||||
//
|
||||
// 设计目标:
|
||||
// - 文件名一眼能看出视频内容(用 catalog 里的 title)
|
||||
// - 后缀的 viewkey 8 字符保证同标题不会撞名
|
||||
// - 全部字符在常见文件系统、PikPak、HTTP/Aliyun OSS Key 编码里都安全
|
||||
// - 后缀的 sourceID 8 字符保证同标题不会撞名
|
||||
// - 全部字符在常见文件系统、网盘 API、HTTP/Aliyun OSS Key 编码里都安全
|
||||
//
|
||||
// 字符清洗规则(sanitizeTitle):
|
||||
// - 去除控制字符(< 0x20 或 0x7F)
|
||||
@@ -85,47 +85,47 @@ func truncateRunes(s string, maxRunes int) string {
|
||||
return s
|
||||
}
|
||||
|
||||
// extractViewKey 从 video.ID("spider91-<driveID>-<viewkey>")里
|
||||
// 取出最后一段 viewkey。
|
||||
// extractSourceID 从 video.ID("<kind>-<driveID>-<sourceID>")里
|
||||
// 取出最后一段 sourceID。
|
||||
//
|
||||
// driveID 中如果有 "-" 不影响(用 LastIndex),viewkey 本身(91 网站的
|
||||
// view 标识)目前都是纯 hex 或纯数字,不包含 "-"。
|
||||
func extractViewKey(videoID string) string {
|
||||
// driveID 中如果有 "-" 不影响(用 LastIndex)。爬虫脚本应提供不包含 "-"
|
||||
// 的稳定 source_id;如果包含 "-",这里会取最后一段作为文件名后缀。
|
||||
func extractSourceID(videoID string) string {
|
||||
if i := strings.LastIndex(videoID, "-"); i >= 0 {
|
||||
return videoID[i+1:]
|
||||
}
|
||||
return videoID
|
||||
}
|
||||
|
||||
// viewKeySuffix 取 viewkey 的最后 N 个字符;不足 N 返回原字符串。
|
||||
// sourceIDSuffix 取 sourceID 的最后 N 个字符;不足 N 返回原字符串。
|
||||
//
|
||||
// 默认 N=8(足够稀疏避免标题撞名时的同名冲突)。
|
||||
const viewKeySuffixLen = 8
|
||||
const sourceIDSuffixLen = 8
|
||||
|
||||
func viewKeySuffix(viewkey string) string {
|
||||
r := []rune(viewkey)
|
||||
if len(r) <= viewKeySuffixLen {
|
||||
func sourceIDSuffix(sourceID string) string {
|
||||
r := []rune(sourceID)
|
||||
if len(r) <= sourceIDSuffixLen {
|
||||
return string(r)
|
||||
}
|
||||
return string(r[len(r)-viewKeySuffixLen:])
|
||||
return string(r[len(r)-sourceIDSuffixLen:])
|
||||
}
|
||||
|
||||
// desiredPikPakName 构造 spider91 视频在 PikPak 上的期望文件名。
|
||||
// desiredUploadName 构造爬虫视频上传到目标网盘时的期望文件名。
|
||||
//
|
||||
// desiredPikPakName("超白大奶律师约炮", "476fa8bf4b47e672d2fa", "mp4")
|
||||
// desiredUploadName("超白大奶律师约炮", "476fa8bf4b47e672d2fa", "mp4")
|
||||
// → "超白大奶律师约炮-72d2fa.mp4" // 实际是 e672d2fa(取最后 8)
|
||||
//
|
||||
// ext 不带前导点;空时默认 mp4。
|
||||
func desiredPikPakName(title, viewkey, ext string) string {
|
||||
func desiredUploadName(title, sourceID, ext string) string {
|
||||
clean := sanitizeTitle(title)
|
||||
suffix := viewKeySuffix(strings.TrimSpace(viewkey))
|
||||
suffix := sourceIDSuffix(strings.TrimSpace(sourceID))
|
||||
ext = strings.TrimSpace(ext)
|
||||
ext = strings.TrimPrefix(ext, ".")
|
||||
if ext == "" {
|
||||
ext = "mp4"
|
||||
}
|
||||
if suffix == "" {
|
||||
// viewkey 缺失时退化成 "<title>.<ext>"
|
||||
// sourceID 缺失时退化成 "<title>.<ext>"
|
||||
return clean + "." + ext
|
||||
}
|
||||
return clean + "-" + suffix + "." + ext
|
||||
@@ -1,4 +1,4 @@
|
||||
package spider91migrate
|
||||
package crawlerupload
|
||||
|
||||
import (
|
||||
"strings"
|
||||
@@ -13,11 +13,11 @@ func TestSanitizeTitleHandlesCommonCases(t *testing.T) {
|
||||
{"hello", "hello"},
|
||||
{" hello ", "hello"},
|
||||
{"hello\nworld", "hello world"},
|
||||
{"hello / world", "hello world"}, // 单 forbidden 折叠成空格
|
||||
{"hello / world", "hello world"}, // 单 forbidden 折叠成空格
|
||||
{"a/b\\c:d*e?f\"g<h>i|j", "a b c d e f g h i j"},
|
||||
{"a b", "a b"}, // 多空格折叠
|
||||
{"a b", "a b"}, // 多空格折叠
|
||||
{"a\t\nb", "a b"},
|
||||
{"...trim.dots...", "trim.dots"}, // 首尾点号被 trim 掉
|
||||
{"...trim.dots...", "trim.dots"}, // 首尾点号被 trim 掉
|
||||
{"control\x01char\x1f\x7f", "controlchar"}, // 控制字符直接丢弃
|
||||
{"", "video"}, // 空串回退
|
||||
{" / ", "video"}, // 全是 forbidden+空白 → 回退
|
||||
@@ -51,22 +51,22 @@ func TestSanitizeTitleKeepsCJKAndUnicode(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractViewKey(t *testing.T) {
|
||||
func TestExtractSourceID(t *testing.T) {
|
||||
cases := []struct{ in, want string }{
|
||||
{"spider91-91Spider-476fa8bf4b47e672d2fa", "476fa8bf4b47e672d2fa"},
|
||||
{"spider91-91Spider-1587338723", "1587338723"},
|
||||
{"spider91-some-drive-with-dashes-vk001", "vk001"}, // LastIndex 拿尾段
|
||||
{"scriptcrawler-demo-476fa8bf4b47e672d2fa", "476fa8bf4b47e672d2fa"},
|
||||
{"scriptcrawler-demo-1587338723", "1587338723"},
|
||||
{"scriptcrawler-some-drive-with-dashes-vk001", "vk001"}, // LastIndex 拿尾段
|
||||
{"no-dashes-after-prefix", "prefix"},
|
||||
{"single", "single"}, // 没 dash → 原样返回
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := extractViewKey(c.in); got != c.want {
|
||||
t.Errorf("extractViewKey(%q) = %q, want %q", c.in, got, c.want)
|
||||
if got := extractSourceID(c.in); got != c.want {
|
||||
t.Errorf("extractSourceID(%q) = %q, want %q", c.in, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestViewKeySuffix(t *testing.T) {
|
||||
func TestSourceIDSuffix(t *testing.T) {
|
||||
cases := []struct{ in, want string }{
|
||||
{"476fa8bf4b47e672d2fa", "e672d2fa"},
|
||||
{"1587338723", "87338723"},
|
||||
@@ -76,15 +76,15 @@ func TestViewKeySuffix(t *testing.T) {
|
||||
{"123456789", "23456789"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := viewKeySuffix(c.in); got != c.want {
|
||||
t.Errorf("viewKeySuffix(%q) = %q, want %q", c.in, got, c.want)
|
||||
if got := sourceIDSuffix(c.in); got != c.want {
|
||||
t.Errorf("sourceIDSuffix(%q) = %q, want %q", c.in, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDesiredPikPakName(t *testing.T) {
|
||||
func TestDesiredUploadName(t *testing.T) {
|
||||
cases := []struct {
|
||||
title, viewkey, ext, want string
|
||||
title, sourceID, ext, want string
|
||||
}{
|
||||
{
|
||||
"超白大奶律师约炮第一季",
|
||||
@@ -112,7 +112,7 @@ func TestDesiredPikPakName(t *testing.T) {
|
||||
},
|
||||
{
|
||||
"title",
|
||||
"", // 空 viewkey → 退化成 "<title>.<ext>"
|
||||
"", // 空 sourceID → 退化成 "<title>.<ext>"
|
||||
"webm",
|
||||
"title.webm",
|
||||
},
|
||||
@@ -130,9 +130,9 @@ func TestDesiredPikPakName(t *testing.T) {
|
||||
},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got := desiredPikPakName(c.title, c.viewkey, c.ext)
|
||||
got := desiredUploadName(c.title, c.sourceID, c.ext)
|
||||
if got != c.want {
|
||||
t.Errorf("desiredPikPakName(%q,%q,%q) = %q, want %q", c.title, c.viewkey, c.ext, got, c.want)
|
||||
t.Errorf("desiredUploadName(%q,%q,%q) = %q, want %q", c.title, c.sourceID, c.ext, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -647,7 +647,7 @@ func isGoogleUploadHTTPRateLimit(status int, header http.Header, body []byte, ap
|
||||
if isGoogleRateLimit(nil, apiErr) {
|
||||
return true
|
||||
}
|
||||
return googleLimitText(string(body))
|
||||
return false
|
||||
}
|
||||
|
||||
func googleUploadRateLimitError(status int, header http.Header, body []byte, message string) error {
|
||||
@@ -910,7 +910,7 @@ func isGoogleRateLimit(res *resty.Response, body apiErrorBody) bool {
|
||||
return true
|
||||
}
|
||||
for _, e := range body.Errors {
|
||||
if googleLimitReason(e.Reason) || googleLimitText(e.Message) {
|
||||
if googleLimitReason(e.Reason) {
|
||||
return true
|
||||
}
|
||||
domain := compactGoogleLimitText(e.Domain)
|
||||
@@ -918,7 +918,7 @@ func isGoogleRateLimit(res *resty.Response, body apiErrorBody) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return googleLimitText(body.Message)
|
||||
return false
|
||||
}
|
||||
|
||||
func isGoogleTokenRateLimit(res *resty.Response, out tokenResp) bool {
|
||||
@@ -930,9 +930,7 @@ func isGoogleTokenRateLimit(res *resty.Response, out tokenResp) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return googleLimitText(out.Text) ||
|
||||
googleLimitText(out.Error) ||
|
||||
googleLimitText(out.ErrorDescription)
|
||||
return googleLimitReason(out.Error)
|
||||
}
|
||||
|
||||
func googleLimitReason(reason string) bool {
|
||||
@@ -953,31 +951,6 @@ func googleLimitReason(reason string) bool {
|
||||
}
|
||||
}
|
||||
|
||||
func googleLimitText(text string) bool {
|
||||
text = strings.ToLower(strings.TrimSpace(text))
|
||||
if text == "" {
|
||||
return false
|
||||
}
|
||||
compact := compactGoogleLimitText(text)
|
||||
if strings.Contains(compact, "ratelimitexceeded") ||
|
||||
strings.Contains(compact, "userratelimitexceeded") ||
|
||||
strings.Contains(compact, "dailylimitexceeded") ||
|
||||
strings.Contains(compact, "downloadquotaexceeded") ||
|
||||
strings.Contains(compact, "sharingratelimitexceeded") ||
|
||||
strings.Contains(compact, "quotaexceeded") ||
|
||||
strings.Contains(compact, "toomanyrequests") {
|
||||
return true
|
||||
}
|
||||
return strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "quota exceeded") ||
|
||||
strings.Contains(text, "download quota") ||
|
||||
strings.Contains(text, "sharing rate") ||
|
||||
strings.Contains(text, "daily limit") ||
|
||||
strings.Contains(text, "user rate") ||
|
||||
strings.Contains(text, "usage limit")
|
||||
}
|
||||
|
||||
func compactGoogleLimitText(text string) string {
|
||||
text = strings.ToLower(strings.TrimSpace(text))
|
||||
replacer := strings.NewReplacer("_", "", "-", "", " ", "", ".", "", ":", "")
|
||||
|
||||
@@ -227,10 +227,10 @@ func TestEnsureDirAndRenameUseGoogleDriveFileAPI(t *testing.T) {
|
||||
if err := json.NewDecoder(r.Body).Decode(&meta); err != nil {
|
||||
t.Fatalf("decode mkdir body: %v", err)
|
||||
}
|
||||
if meta.Name != "91 Spider" || len(meta.Parents) != 1 || meta.Parents[0] != "root" || meta.MimeType != "application/vnd.google-apps.folder" {
|
||||
if meta.Name != "Crawler Uploads" || len(meta.Parents) != 1 || meta.Parents[0] != "root" || meta.MimeType != "application/vnd.google-apps.folder" {
|
||||
t.Fatalf("mkdir body = %+v", meta)
|
||||
}
|
||||
writeTestJSON(w, driveFile{ID: "folder-91", Name: "91 Spider", MimeType: "application/vnd.google-apps.folder"})
|
||||
writeTestJSON(w, driveFile{ID: "folder-crawler", Name: "Crawler Uploads", MimeType: "application/vnd.google-apps.folder"})
|
||||
case r.Method == http.MethodPatch && r.URL.Path == "/drive/v3/files/file-1":
|
||||
renamed = true
|
||||
var body map[string]string
|
||||
@@ -251,12 +251,12 @@ func TestEnsureDirAndRenameUseGoogleDriveFileAPI(t *testing.T) {
|
||||
d.accessToken = "access"
|
||||
d.listInterval = -1
|
||||
|
||||
dirID, err := d.EnsureDir(context.Background(), "91 Spider")
|
||||
dirID, err := d.EnsureDir(context.Background(), "Crawler Uploads")
|
||||
if err != nil {
|
||||
t.Fatalf("EnsureDir() error = %v", err)
|
||||
}
|
||||
if dirID != "folder-91" || !madeDir {
|
||||
t.Fatalf("dirID/madeDir = %q/%v, want folder-91/true", dirID, madeDir)
|
||||
if dirID != "folder-crawler" || !madeDir {
|
||||
t.Fatalf("dirID/madeDir = %q/%v, want folder-crawler/true", dirID, madeDir)
|
||||
}
|
||||
if err := d.Rename(context.Background(), "file-1", "new-name.mp4"); err != nil {
|
||||
t.Fatalf("Rename() error = %v", err)
|
||||
|
||||
@@ -0,0 +1,300 @@
|
||||
package guangyapan
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
func TestDriverRefreshListAndStream(t *testing.T) {
|
||||
var refreshed bool
|
||||
var listedRoot bool
|
||||
updates := map[string]string{}
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/v1/auth/token":
|
||||
refreshed = true
|
||||
writeTestJSON(w, map[string]any{
|
||||
"access_token": "new-access",
|
||||
"refresh_token": "new-refresh",
|
||||
})
|
||||
case "/v1/user/me":
|
||||
if got := r.Header.Get("Authorization"); got != "Bearer new-access" {
|
||||
t.Fatalf("auth header = %q, want new access token", got)
|
||||
}
|
||||
writeTestJSON(w, map[string]any{"sub": "user-1"})
|
||||
case "/userres/v1/file/get_file_list":
|
||||
if got := r.Header.Get("Authorization"); got != "Bearer new-access" {
|
||||
t.Fatalf("api auth header = %q, want new access token", got)
|
||||
}
|
||||
var body map[string]any
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode list body: %v", err)
|
||||
}
|
||||
if body["parentId"] != "" {
|
||||
t.Fatalf("parentId = %#v, want root empty string", body["parentId"])
|
||||
}
|
||||
listedRoot = true
|
||||
writeTestJSON(w, map[string]any{
|
||||
"code": 0,
|
||||
"msg": "success",
|
||||
"data": map[string]any{
|
||||
"total": 2,
|
||||
"list": []map[string]any{
|
||||
{"fileId": "dir-1", "parentId": "", "fileName": "Movies", "resType": 2},
|
||||
{"fileId": "file-1", "parentId": "", "fileName": "clip.mp4", "fileSize": 123, "resType": 1, "utime": 1700000000},
|
||||
},
|
||||
},
|
||||
})
|
||||
case "/nd.bizuserres.s/v1/get_res_download_url":
|
||||
writeTestJSON(w, map[string]any{
|
||||
"code": 0,
|
||||
"msg": "success",
|
||||
"data": map[string]any{"signedURL": "https://cdn.example.test/clip.mp4"},
|
||||
})
|
||||
default:
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "gy",
|
||||
RefreshToken: "old-refresh",
|
||||
AccountBaseURL: srv.URL,
|
||||
APIBaseURL: srv.URL,
|
||||
OnCredentialsUpdate: func(values map[string]string) {
|
||||
for k, v := range values {
|
||||
updates[k] = v
|
||||
}
|
||||
},
|
||||
})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
if !refreshed {
|
||||
t.Fatal("refresh token endpoint was not called")
|
||||
}
|
||||
if updates["access_token"] != "new-access" || updates["refresh_token"] != "new-refresh" {
|
||||
t.Fatalf("updates = %#v, want refreshed tokens", updates)
|
||||
}
|
||||
|
||||
entries, err := d.List(context.Background(), "")
|
||||
if err != nil {
|
||||
t.Fatalf("list: %v", err)
|
||||
}
|
||||
if !listedRoot || len(entries) != 2 {
|
||||
t.Fatalf("listedRoot=%v entries=%#v", listedRoot, entries)
|
||||
}
|
||||
if !entries[0].IsDir || entries[1].ID != "file-1" || entries[1].Size != 123 {
|
||||
t.Fatalf("entries = %#v", entries)
|
||||
}
|
||||
|
||||
link, err := d.StreamURL(context.Background(), "file-1")
|
||||
if err != nil {
|
||||
t.Fatalf("stream url: %v", err)
|
||||
}
|
||||
if link.URL != "https://cdn.example.test/clip.mp4" {
|
||||
t.Fatalf("stream url = %q", link.URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDriverResolvesRootPath(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/v1/user/me":
|
||||
writeTestJSON(w, map[string]any{"sub": "user-1"})
|
||||
case "/userres/v1/file/get_file_list":
|
||||
var body map[string]any
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode list body: %v", err)
|
||||
}
|
||||
parent, _ := body["parentId"].(string)
|
||||
switch parent {
|
||||
case "":
|
||||
writeTestJSON(w, listTestResponse([]map[string]any{
|
||||
{"fileId": "folder-a", "parentId": "", "fileName": "影视", "resType": 2},
|
||||
}))
|
||||
case "folder-a":
|
||||
writeTestJSON(w, listTestResponse([]map[string]any{
|
||||
{"fileId": "folder-b", "parentId": "folder-a", "fileName": "电影", "resType": 2},
|
||||
}))
|
||||
case "folder-b":
|
||||
writeTestJSON(w, listTestResponse([]map[string]any{
|
||||
{"fileId": "file-1", "parentId": "folder-b", "fileName": "movie.mp4", "fileSize": 456, "resType": 1},
|
||||
}))
|
||||
default:
|
||||
t.Fatalf("unexpected parent %q", parent)
|
||||
}
|
||||
default:
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "gy",
|
||||
RootID: "configured-root",
|
||||
RootPath: "影视/电影",
|
||||
AccessToken: "access",
|
||||
AccountBaseURL: srv.URL,
|
||||
APIBaseURL: srv.URL,
|
||||
})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
if d.RootID() != "folder-b" {
|
||||
t.Fatalf("root id = %q, want folder-b", d.RootID())
|
||||
}
|
||||
entries, err := d.List(context.Background(), "")
|
||||
if err != nil {
|
||||
t.Fatalf("list resolved root: %v", err)
|
||||
}
|
||||
if len(entries) != 1 || entries[0].ID != "file-1" {
|
||||
t.Fatalf("entries = %#v", entries)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDriverSendSMSCodeUpdatesVerificationState(t *testing.T) {
|
||||
updates := map[string]string{}
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/v1/shield/captcha/init":
|
||||
writeTestJSON(w, map[string]any{"captcha_token": "captcha-1"})
|
||||
case "/v1/auth/verification":
|
||||
writeTestJSON(w, map[string]any{"verification_id": "verify-1"})
|
||||
default:
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "gy",
|
||||
PhoneNumber: "13800000000",
|
||||
SendCode: true,
|
||||
AccountBaseURL: srv.URL,
|
||||
APIBaseURL: srv.URL,
|
||||
OnCredentialsUpdate: func(values map[string]string) {
|
||||
for k, v := range values {
|
||||
updates[k] = v
|
||||
}
|
||||
},
|
||||
})
|
||||
err := d.Init(context.Background())
|
||||
if err == nil || !strings.Contains(err.Error(), "验证码已发送") {
|
||||
t.Fatalf("init err = %v, want verification prompt", err)
|
||||
}
|
||||
if updates["captcha_token"] != "captcha-1" || updates["verification_id"] != "verify-1" || updates["send_code"] != "false" {
|
||||
t.Fatalf("updates = %#v, want sms state saved", updates)
|
||||
}
|
||||
if updates["device_id"] == "" {
|
||||
t.Fatalf("updates = %#v, want generated device id saved", updates)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListHTTP429ReturnsRateLimitError(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/userres/v1/file/get_file_list" {
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
w.Header().Set("Retry-After", "120")
|
||||
w.WriteHeader(http.StatusTooManyRequests)
|
||||
writeTestJSON(w, map[string]any{"code": 429, "msg": "操作频繁,请稍后重试"})
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "gy",
|
||||
AccessToken: "access",
|
||||
AccountBaseURL: srv.URL,
|
||||
APIBaseURL: srv.URL,
|
||||
})
|
||||
_, err := d.List(context.Background(), "")
|
||||
if err == nil {
|
||||
t.Fatal("list succeeded, want rate limit error")
|
||||
}
|
||||
var rateLimit *drives.RateLimitError
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want RateLimitError", err)
|
||||
}
|
||||
if rateLimit.RetryAfter != 2*time.Minute {
|
||||
t.Fatalf("retry after = %s, want 2m", rateLimit.RetryAfter)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListCode429ReturnsRateLimitError(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/userres/v1/file/get_file_list" {
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
writeTestJSON(w, map[string]any{"code": 429, "msg": "操作频繁,请稍后再试"})
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "gy",
|
||||
AccessToken: "access",
|
||||
AccountBaseURL: srv.URL,
|
||||
APIBaseURL: srv.URL,
|
||||
})
|
||||
_, err := d.List(context.Background(), "")
|
||||
if err == nil {
|
||||
t.Fatal("list succeeded, want rate limit error")
|
||||
}
|
||||
var rateLimit *drives.RateLimitError
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want RateLimitError", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListInvalidToken403DoesNotReturnRateLimitError(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/userres/v1/file/get_file_list" {
|
||||
t.Fatalf("unexpected path %s", r.URL.Path)
|
||||
}
|
||||
w.WriteHeader(http.StatusForbidden)
|
||||
writeTestJSON(w, map[string]any{"code": 401, "msg": "invalid access token"})
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
d := New(Config{
|
||||
ID: "gy",
|
||||
AccessToken: "access",
|
||||
AccountBaseURL: srv.URL,
|
||||
APIBaseURL: srv.URL,
|
||||
})
|
||||
_, err := d.List(context.Background(), "")
|
||||
if err == nil {
|
||||
t.Fatal("list succeeded, want auth error")
|
||||
}
|
||||
var rateLimit *drives.RateLimitError
|
||||
if errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want non-rate-limit error", err)
|
||||
}
|
||||
}
|
||||
|
||||
func listTestResponse(items []map[string]any) map[string]any {
|
||||
return map[string]any{
|
||||
"code": 0,
|
||||
"msg": "success",
|
||||
"data": map[string]any{
|
||||
"total": len(items),
|
||||
"list": items,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func writeTestJSON(w http.ResponseWriter, v any) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if err := json.NewEncoder(w).Encode(v); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,244 @@
|
||||
package guangyapan
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-resty/resty/v2"
|
||||
"github.com/skip2/go-qrcode"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultQRScope = "user"
|
||||
deviceCodeGrantType = "urn:ietf:params:oauth:grant-type:device_code"
|
||||
defaultQRUserAgent = "GuangYaPan-Login/1.0"
|
||||
)
|
||||
|
||||
type QRConfig struct {
|
||||
AccountBaseURL string
|
||||
HTTPClient *http.Client
|
||||
Now func() time.Time
|
||||
}
|
||||
|
||||
type QRClient struct {
|
||||
accountBaseURL string
|
||||
client *resty.Client
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
type QRCodeSession struct {
|
||||
DeviceCode string `json:"deviceCode"`
|
||||
QRCodeURL string `json:"qrCodeUrl"`
|
||||
QRImageDataURL string `json:"qrImageDataUrl"`
|
||||
IntervalSeconds int `json:"intervalSeconds"`
|
||||
ExpiresAt string `json:"expiresAt,omitempty"`
|
||||
}
|
||||
|
||||
type QRCodeStatus struct {
|
||||
State string `json:"state"`
|
||||
StatusText string `json:"statusText"`
|
||||
IntervalSeconds int `json:"intervalSeconds,omitempty"`
|
||||
AccessToken string `json:"accessToken,omitempty"`
|
||||
RefreshToken string `json:"refreshToken,omitempty"`
|
||||
TokenType string `json:"tokenType,omitempty"`
|
||||
ExpiresIn int64 `json:"expiresIn,omitempty"`
|
||||
}
|
||||
|
||||
type deviceCodeResp struct {
|
||||
DeviceCode string `json:"device_code"`
|
||||
VerificationURIComplete string `json:"verification_uri_complete"`
|
||||
ShortURIComplete string `json:"short_uri_complete"`
|
||||
Interval int `json:"interval"`
|
||||
ExpiresIn int `json:"expires_in"`
|
||||
Error string `json:"error"`
|
||||
ErrorCode int `json:"error_code"`
|
||||
ErrorDesc string `json:"error_description"`
|
||||
}
|
||||
|
||||
type deviceTokenResp struct {
|
||||
AccessToken string `json:"access_token"`
|
||||
RefreshToken string `json:"refresh_token"`
|
||||
TokenType string `json:"token_type"`
|
||||
ExpiresIn int64 `json:"expires_in"`
|
||||
Scope string `json:"scope"`
|
||||
Error string `json:"error"`
|
||||
ErrorCode int `json:"error_code"`
|
||||
ErrorDesc string `json:"error_description"`
|
||||
}
|
||||
|
||||
func NewQRClient(c QRConfig) *QRClient {
|
||||
accountBaseURL := strings.TrimRight(strings.TrimSpace(c.AccountBaseURL), "/")
|
||||
if accountBaseURL == "" {
|
||||
accountBaseURL = defaultAccountBaseURL
|
||||
}
|
||||
httpClient := c.HTTPClient
|
||||
if httpClient == nil {
|
||||
httpClient = &http.Client{Timeout: 20 * time.Second}
|
||||
}
|
||||
now := c.Now
|
||||
if now == nil {
|
||||
now = time.Now
|
||||
}
|
||||
return &QRClient{
|
||||
accountBaseURL: accountBaseURL,
|
||||
client: resty.NewWithClient(httpClient).
|
||||
SetTimeout(20*time.Second).
|
||||
SetBaseURL(accountBaseURL).
|
||||
SetHeader("User-Agent", defaultQRUserAgent).
|
||||
SetHeader("Accept", "application/json").
|
||||
SetHeader("Content-Type", "application/json"),
|
||||
now: now,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *QRClient) Generate(ctx context.Context) (QRCodeSession, error) {
|
||||
var out deviceCodeResp
|
||||
var errOut deviceCodeResp
|
||||
resp, err := c.client.R().
|
||||
SetContext(ctx).
|
||||
SetBody(map[string]any{
|
||||
"client_id": defaultClientID,
|
||||
"scope": defaultQRScope,
|
||||
}).
|
||||
SetResult(&out).
|
||||
SetError(&errOut).
|
||||
Post("/v1/auth/device/code")
|
||||
if err != nil {
|
||||
return QRCodeSession{}, err
|
||||
}
|
||||
if resp.IsError() || out.Error != "" {
|
||||
if out.Error == "" {
|
||||
out = errOut
|
||||
}
|
||||
return QRCodeSession{}, fmt.Errorf("guangyapan qr: %s", deviceAPIError(out.ErrorDesc, out.Error, resp))
|
||||
}
|
||||
|
||||
deviceCode := strings.TrimSpace(out.DeviceCode)
|
||||
if deviceCode == "" {
|
||||
return QRCodeSession{}, errors.New("guangyapan qr: empty device_code")
|
||||
}
|
||||
qrURL := strings.TrimSpace(out.VerificationURIComplete)
|
||||
if qrURL == "" {
|
||||
qrURL = strings.TrimSpace(out.ShortURIComplete)
|
||||
}
|
||||
if qrURL == "" {
|
||||
return QRCodeSession{}, errors.New("guangyapan qr: empty verification uri")
|
||||
}
|
||||
interval := out.Interval
|
||||
if interval <= 0 {
|
||||
interval = 5
|
||||
}
|
||||
expiresIn := out.ExpiresIn
|
||||
if expiresIn <= 0 {
|
||||
expiresIn = 300
|
||||
}
|
||||
png, err := qrcode.Encode(qrURL, qrcode.Medium, 220)
|
||||
if err != nil {
|
||||
return QRCodeSession{}, err
|
||||
}
|
||||
return QRCodeSession{
|
||||
DeviceCode: deviceCode,
|
||||
QRCodeURL: qrURL,
|
||||
QRImageDataURL: "data:image/png;base64," + base64.StdEncoding.EncodeToString(png),
|
||||
IntervalSeconds: interval,
|
||||
ExpiresAt: c.now().Add(time.Duration(expiresIn) * time.Second).Format(time.RFC3339),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *QRClient) Poll(ctx context.Context, deviceCode string) (QRCodeStatus, error) {
|
||||
deviceCode = strings.TrimSpace(deviceCode)
|
||||
if deviceCode == "" {
|
||||
return QRCodeStatus{}, errors.New("deviceCode is required")
|
||||
}
|
||||
|
||||
var out deviceTokenResp
|
||||
var errOut deviceTokenResp
|
||||
resp, err := c.client.R().
|
||||
SetContext(ctx).
|
||||
SetBody(map[string]any{
|
||||
"client_id": defaultClientID,
|
||||
"grant_type": deviceCodeGrantType,
|
||||
"device_code": deviceCode,
|
||||
}).
|
||||
SetResult(&out).
|
||||
SetError(&errOut).
|
||||
Post("/v1/auth/token")
|
||||
if err != nil {
|
||||
return QRCodeStatus{}, err
|
||||
}
|
||||
if resp.IsError() && out.Error == "" {
|
||||
out = errOut
|
||||
}
|
||||
if resp.IsError() && out.Error == "" {
|
||||
_ = json.Unmarshal(resp.Body(), &out)
|
||||
}
|
||||
if out.Error != "" {
|
||||
return qrStatusForDeviceError(out), nil
|
||||
}
|
||||
if resp.IsError() {
|
||||
return QRCodeStatus{}, fmt.Errorf("guangyapan qr: status=%d body=%s", resp.StatusCode(), resp.String())
|
||||
}
|
||||
access := strings.TrimSpace(out.AccessToken)
|
||||
refresh := strings.TrimSpace(out.RefreshToken)
|
||||
if access == "" || refresh == "" {
|
||||
return QRCodeStatus{}, errors.New("guangyapan qr: login succeeded but token response is incomplete")
|
||||
}
|
||||
tokenType := strings.TrimSpace(out.TokenType)
|
||||
if tokenType == "" {
|
||||
tokenType = "Bearer"
|
||||
}
|
||||
return QRCodeStatus{
|
||||
State: "success",
|
||||
StatusText: "登录成功",
|
||||
AccessToken: access,
|
||||
RefreshToken: refresh,
|
||||
TokenType: tokenType,
|
||||
ExpiresIn: out.ExpiresIn,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func qrStatusForDeviceError(out deviceTokenResp) QRCodeStatus {
|
||||
errCode := strings.TrimSpace(out.Error)
|
||||
switch errCode {
|
||||
case "authorization_pending":
|
||||
return QRCodeStatus{State: "pending", StatusText: "等待扫码确认"}
|
||||
case "slow_down":
|
||||
return QRCodeStatus{State: "pending", StatusText: "等待扫码确认,已降低查询频率", IntervalSeconds: 10}
|
||||
case "expired_token":
|
||||
return QRCodeStatus{State: "expired", StatusText: "二维码已过期"}
|
||||
case "access_denied":
|
||||
return QRCodeStatus{State: "denied", StatusText: "用户拒绝了授权"}
|
||||
default:
|
||||
msg := strings.TrimSpace(out.ErrorDesc)
|
||||
if msg == "" {
|
||||
msg = errCode
|
||||
}
|
||||
if msg == "" {
|
||||
msg = "未知错误"
|
||||
}
|
||||
return QRCodeStatus{State: "error", StatusText: msg}
|
||||
}
|
||||
}
|
||||
|
||||
func deviceAPIError(desc, short string, resp *resty.Response) string {
|
||||
msg := strings.TrimSpace(desc)
|
||||
if msg == "" {
|
||||
msg = strings.TrimSpace(short)
|
||||
}
|
||||
if msg == "" && resp != nil {
|
||||
msg = strings.TrimSpace(resp.String())
|
||||
}
|
||||
if msg == "" && resp != nil {
|
||||
msg = fmt.Sprintf("status=%d", resp.StatusCode())
|
||||
}
|
||||
if msg == "" {
|
||||
msg = "unknown error"
|
||||
}
|
||||
return msg
|
||||
}
|
||||
@@ -0,0 +1,102 @@
|
||||
package guangyapan
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestQRClientGenerate(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/v1/auth/device/code" {
|
||||
t.Fatalf("path = %s, want device code endpoint", r.URL.Path)
|
||||
}
|
||||
var body map[string]any
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode body: %v", err)
|
||||
}
|
||||
if body["client_id"] != defaultClientID || body["scope"] != defaultQRScope {
|
||||
t.Fatalf("body = %#v", body)
|
||||
}
|
||||
writeTestJSON(w, map[string]any{
|
||||
"device_code": "device-1",
|
||||
"verification_uri_complete": "https://account.guangyapan.com/device?code=abc",
|
||||
"interval": 7,
|
||||
"expires_in": 180,
|
||||
})
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
client := NewQRClient(QRConfig{
|
||||
AccountBaseURL: srv.URL,
|
||||
Now: func() time.Time { return time.Unix(1700000000, 0) },
|
||||
})
|
||||
session, err := client.Generate(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("generate: %v", err)
|
||||
}
|
||||
if session.DeviceCode != "device-1" || session.QRCodeURL != "https://account.guangyapan.com/device?code=abc" {
|
||||
t.Fatalf("session = %#v", session)
|
||||
}
|
||||
if session.IntervalSeconds != 7 {
|
||||
t.Fatalf("interval = %d, want 7", session.IntervalSeconds)
|
||||
}
|
||||
if session.ExpiresAt != time.Unix(1700000180, 0).Format(time.RFC3339) {
|
||||
t.Fatalf("expiresAt = %q", session.ExpiresAt)
|
||||
}
|
||||
if !strings.HasPrefix(session.QRImageDataURL, "data:image/png;base64,") {
|
||||
t.Fatalf("qr image = %q", session.QRImageDataURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQRClientPollPendingAndSuccess(t *testing.T) {
|
||||
var calls int
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/v1/auth/token" {
|
||||
t.Fatalf("path = %s, want token endpoint", r.URL.Path)
|
||||
}
|
||||
var body map[string]any
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode body: %v", err)
|
||||
}
|
||||
if body["client_id"] != defaultClientID ||
|
||||
body["grant_type"] != deviceCodeGrantType ||
|
||||
body["device_code"] != "device-1" {
|
||||
t.Fatalf("body = %#v", body)
|
||||
}
|
||||
calls++
|
||||
if calls == 1 {
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
writeTestJSON(w, map[string]any{"error": "authorization_pending"})
|
||||
return
|
||||
}
|
||||
writeTestJSON(w, map[string]any{
|
||||
"access_token": "access-1",
|
||||
"refresh_token": "refresh-1",
|
||||
"token_type": "Bearer",
|
||||
"expires_in": 7200,
|
||||
})
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
client := NewQRClient(QRConfig{AccountBaseURL: srv.URL})
|
||||
pending, err := client.Poll(context.Background(), "device-1")
|
||||
if err != nil {
|
||||
t.Fatalf("poll pending: %v", err)
|
||||
}
|
||||
if pending.State != "pending" || pending.AccessToken != "" {
|
||||
t.Fatalf("pending = %#v", pending)
|
||||
}
|
||||
|
||||
success, err := client.Poll(context.Background(), "device-1")
|
||||
if err != nil {
|
||||
t.Fatalf("poll success: %v", err)
|
||||
}
|
||||
if success.State != "success" || success.AccessToken != "access-1" || success.RefreshToken != "refresh-1" {
|
||||
t.Fatalf("success = %#v", success)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,129 @@
|
||||
package guangyapan
|
||||
|
||||
import "time"
|
||||
|
||||
type tokenResp struct {
|
||||
AccessToken string `json:"access_token"`
|
||||
RefreshToken string `json:"refresh_token"`
|
||||
Error string `json:"error"`
|
||||
ErrorCode int `json:"error_code"`
|
||||
ErrorDesc string `json:"error_description"`
|
||||
}
|
||||
|
||||
type verificationResp struct {
|
||||
VerificationID string `json:"verification_id"`
|
||||
Error string `json:"error"`
|
||||
ErrorCode int `json:"error_code"`
|
||||
ErrorDesc string `json:"error_description"`
|
||||
}
|
||||
|
||||
type captchaInitResp struct {
|
||||
CaptchaToken string `json:"captcha_token"`
|
||||
Error string `json:"error"`
|
||||
ErrorCode int `json:"error_code"`
|
||||
ErrorDesc string `json:"error_description"`
|
||||
}
|
||||
|
||||
type verifyResp struct {
|
||||
VerificationToken string `json:"verification_token"`
|
||||
Error string `json:"error"`
|
||||
ErrorCode int `json:"error_code"`
|
||||
ErrorDesc string `json:"error_description"`
|
||||
}
|
||||
|
||||
type userMeResp struct {
|
||||
Sub string `json:"sub"`
|
||||
}
|
||||
|
||||
type listResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data struct {
|
||||
Total int `json:"total"`
|
||||
List []fileItem `json:"list"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type fileItem struct {
|
||||
FileID string `json:"fileId"`
|
||||
ParentID string `json:"parentId"`
|
||||
FileName string `json:"fileName"`
|
||||
FileSize int64 `json:"fileSize"`
|
||||
ResType int `json:"resType"`
|
||||
CTime int64 `json:"ctime"`
|
||||
UTime int64 `json:"utime"`
|
||||
}
|
||||
|
||||
type downloadResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data struct {
|
||||
SignedURL string `json:"signedURL"`
|
||||
DownloadURL string `json:"downloadUrl"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type createDirResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data struct {
|
||||
FileID string `json:"fileId"`
|
||||
FileName string `json:"fileName"`
|
||||
ResType int `json:"resType"`
|
||||
CTime int64 `json:"ctime"`
|
||||
UTime int64 `json:"utime"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type deleteResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data struct {
|
||||
TaskID string `json:"taskId"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type taskStatusResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data struct {
|
||||
Status int `json:"status"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type uploadTokenResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data uploadTokenData `json:"data"`
|
||||
}
|
||||
|
||||
type uploadTokenData struct {
|
||||
TaskID string `json:"taskId"`
|
||||
ObjectPath string `json:"objectPath"`
|
||||
BucketName string `json:"bucketName"`
|
||||
EndPoint string `json:"endPoint"`
|
||||
FullEndPoint string `json:"fullEndPoint"`
|
||||
AccessKeyID string `json:"accessKeyID"`
|
||||
SecretAccessKey string `json:"secretAccessKey"`
|
||||
SessionToken string `json:"sessionToken"`
|
||||
Creds struct {
|
||||
AccessKeyID string `json:"accessKeyID"`
|
||||
SecretAccessKey string `json:"secretAccessKey"`
|
||||
SessionToken string `json:"sessionToken"`
|
||||
} `json:"creds"`
|
||||
}
|
||||
|
||||
type taskInfoResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data struct {
|
||||
FileID string `json:"fileId"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func unixOrZero(v int64) time.Time {
|
||||
if v <= 0 {
|
||||
return time.Time{}
|
||||
}
|
||||
return time.Unix(v, 0)
|
||||
}
|
||||
@@ -5,12 +5,14 @@ import (
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Drive 是多家网盘统一抽象。上层不区分盘,只区分 Kind。
|
||||
type Drive interface {
|
||||
// Kind 返回驱动代号:"quark" / "p115" / "p123" / "pikpak" / "wopan" / "onedrive" / "googledrive" / "localstorage"
|
||||
// Kind 返回驱动代号:"quark" / "p115" / "p123" / "pikpak" / "wopan" / "guangyapan" / "onedrive" / "googledrive" / "localstorage"
|
||||
Kind() string
|
||||
|
||||
// ID 返回该盘在 catalog 中的唯一标识
|
||||
@@ -119,3 +121,42 @@ func RateLimitRetryAfter(err error) (time.Duration, bool) {
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// TextMentionsHTTPStatus only looks for explicit numeric HTTP status contexts
|
||||
// in errors from tools that do not expose structured response metadata.
|
||||
func TextMentionsHTTPStatus(text string, statuses ...int) bool {
|
||||
text = strings.ToLower(strings.TrimSpace(text))
|
||||
if text == "" {
|
||||
return false
|
||||
}
|
||||
for _, status := range statuses {
|
||||
if status <= 0 {
|
||||
continue
|
||||
}
|
||||
code := strconv.Itoa(status)
|
||||
if strings.HasPrefix(text, code+" ") ||
|
||||
strings.Contains(text, "status="+code) ||
|
||||
strings.Contains(text, "status: "+code) ||
|
||||
strings.Contains(text, "status "+code) ||
|
||||
strings.Contains(text, "status code "+code) ||
|
||||
strings.Contains(text, "http "+code) ||
|
||||
strings.Contains(text, "http status="+code) ||
|
||||
strings.Contains(text, "http status: "+code) ||
|
||||
strings.Contains(text, "http status "+code) ||
|
||||
strings.Contains(text, "server returned "+code) ||
|
||||
strings.Contains(text, "code="+code) ||
|
||||
strings.Contains(text, "code: "+code) ||
|
||||
strings.Contains(text, "error_code="+code) ||
|
||||
strings.Contains(text, "error_code: "+code) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func ErrorMentionsHTTPStatus(err error, statuses ...int) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
return TextMentionsHTTPStatus(err.Error(), statuses...)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
package drives
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestTextMentionsHTTPStatus(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
text string
|
||||
want bool
|
||||
}{
|
||||
{name: "status context", text: "request failed with status: 429 Too Many Requests", want: true},
|
||||
{name: "http context", text: "http 503 service unavailable", want: true},
|
||||
{name: "server returned context", text: "Server returned 403 Forbidden", want: true},
|
||||
{name: "message only", text: "操作频繁,请稍后重试", want: false},
|
||||
{name: "unrelated number", text: "generated 429 bytes", want: false},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if got := TextMentionsHTTPStatus(tc.text, 403, 429, 503); got != tc.want {
|
||||
t.Fatalf("TextMentionsHTTPStatus(%q) = %v, want %v", tc.text, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -23,17 +23,24 @@ const maxSTRMBytes = 64 * 1024
|
||||
type Config struct {
|
||||
ID string
|
||||
RootPath string
|
||||
// STRMAllowOutsideRoot 允许 .strm 指向存储根目录之外的本地路径。
|
||||
// 默认关闭:strm 等于可以让 /p/stream 读到服务器上的任意文件,只有
|
||||
// 管理员明确知道自己在做什么(例如 strm 库与 rclone 挂载目录分离)
|
||||
// 时才应打开。
|
||||
STRMAllowOutsideRoot bool
|
||||
}
|
||||
|
||||
type Driver struct {
|
||||
id string
|
||||
rootPath string
|
||||
id string
|
||||
rootPath string
|
||||
strmAllowOutsideRoot bool
|
||||
}
|
||||
|
||||
func New(c Config) *Driver {
|
||||
return &Driver{
|
||||
id: c.ID,
|
||||
rootPath: c.RootPath,
|
||||
id: c.ID,
|
||||
rootPath: c.RootPath,
|
||||
strmAllowOutsideRoot: c.STRMAllowOutsideRoot,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -230,8 +237,8 @@ func (d *Driver) localSTRMLink(strmPath, target string) (*drives.StreamLink, err
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !within {
|
||||
return nil, errors.New("localstorage: strm target escapes root")
|
||||
if !within && !d.strmAllowOutsideRoot {
|
||||
return nil, errors.New("localstorage: strm target escapes root (enable strm_allow_outside_root to allow)")
|
||||
}
|
||||
if strings.EqualFold(filepath.Ext(p), ".strm") || strings.EqualFold(filepath.Ext(realPath), ".strm") {
|
||||
return nil, errors.New("localstorage: nested strm target is not supported")
|
||||
|
||||
@@ -195,6 +195,46 @@ func TestStreamURLRejectsSTRMTargetEscapingRootThroughSymlink(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLAllowsSTRMTargetOutsideRootWhenEnabled(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
outside := t.TempDir()
|
||||
target := filepath.Join(outside, "movie.mp4")
|
||||
writeLocalStorageTestFile(t, target, []byte("movie-data"))
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "movie.strm"), []byte(target+"\n"))
|
||||
|
||||
// 默认关闭:根目录外的目标仍被拒绝
|
||||
strict := New(Config{ID: "local", RootPath: root})
|
||||
if _, err := strict.StreamURL(context.Background(), encodeRel("movie.strm")); err == nil || !strings.Contains(err.Error(), "strm target escapes root") {
|
||||
t.Fatalf("default error = %v, want strm target escapes root", err)
|
||||
}
|
||||
|
||||
// 开启 strm_allow_outside_root 后放行
|
||||
relaxed := New(Config{ID: "local", RootPath: root, STRMAllowOutsideRoot: true})
|
||||
link, err := relaxed.StreamURL(context.Background(), encodeRel("movie.strm"))
|
||||
if err != nil {
|
||||
t.Fatalf("StreamURL with allow-outside-root: %v", err)
|
||||
}
|
||||
resolved, err := filepath.EvalSymlinks(target)
|
||||
if err != nil {
|
||||
t.Fatalf("eval target: %v", err)
|
||||
}
|
||||
if link.URL != resolved {
|
||||
t.Fatalf("link url = %q, want %q", link.URL, resolved)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLAllowOutsideRootStillRejectsNestedSTRM(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
outside := t.TempDir()
|
||||
writeLocalStorageTestFile(t, filepath.Join(outside, "inner.strm"), []byte("http://example.com/v.mp4\n"))
|
||||
writeLocalStorageTestFile(t, filepath.Join(root, "movie.strm"), []byte(filepath.Join(outside, "inner.strm")+"\n"))
|
||||
|
||||
drv := New(Config{ID: "local", RootPath: root, STRMAllowOutsideRoot: true})
|
||||
if _, err := drv.StreamURL(context.Background(), encodeRel("movie.strm")); err == nil || !strings.Contains(err.Error(), "nested strm") {
|
||||
t.Fatalf("error = %v, want nested strm rejection", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLRejectsSymlinkFileIDEscapingRoot(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
outside := t.TempDir()
|
||||
@@ -287,8 +327,8 @@ func TestScannerPersistsLocalStorageSTRM(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.Ext != "strm" || got.FileID != fileID || got.Category != "collection" {
|
||||
t.Fatalf("video = %#v, want local strm video in collection", got)
|
||||
if got.Ext != "strm" || got.FileID != fileID || got.ParentID != encodeRel("collection") {
|
||||
t.Fatalf("video = %#v, want local strm video under collection", got)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -326,8 +366,8 @@ func TestScannerPersistsLocalStorageVideo(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.DriveID != "local" || got.FileID != fileID || got.Category != "collection" {
|
||||
t.Fatalf("video = %#v, want local drive video in collection", got)
|
||||
if got.DriveID != "local" || got.FileID != fileID || got.ParentID != encodeRel("collection") {
|
||||
t.Fatalf("video = %#v, want local drive video under collection", got)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -594,8 +594,8 @@ func (d *Driver) refresh(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func isRateLimitResponse(res *resty.Response, code, message string) bool {
|
||||
if isRateLimitCode(code) || isRateLimitMessage(message) {
|
||||
func isRateLimitResponse(res *resty.Response, code, _ string) bool {
|
||||
if isRateLimitCode(code) {
|
||||
return true
|
||||
}
|
||||
if res == nil {
|
||||
@@ -632,18 +632,6 @@ func isRateLimitCode(code string) bool {
|
||||
}
|
||||
}
|
||||
|
||||
func isRateLimitMessage(message string) bool {
|
||||
text := strings.ToLower(strings.TrimSpace(message))
|
||||
if text == "" {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "throttl") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "activity limit") ||
|
||||
strings.Contains(text, "temporarily blocked")
|
||||
}
|
||||
|
||||
func onedriveRateLimitError(res *resty.Response, message string) error {
|
||||
if strings.TrimSpace(message) == "" {
|
||||
message = "onedrive rate limited"
|
||||
|
||||
@@ -214,7 +214,7 @@ func TestGraph429ReturnsRateLimitErrorWithRetryAfter(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestGraphThrottleMessageReturnsRateLimitError(t *testing.T) {
|
||||
func TestGraphThrottleMessageDoesNotReturnRateLimitError(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusForbidden)
|
||||
@@ -238,11 +238,11 @@ func TestGraphThrottleMessageReturnsRateLimitError(t *testing.T) {
|
||||
|
||||
_, err := d.StreamURL(context.Background(), "file-id")
|
||||
if err == nil {
|
||||
t.Fatal("list succeeded, want rate limit error")
|
||||
t.Fatal("list succeeded, want graph error")
|
||||
}
|
||||
var rateLimit *drives.RateLimitError
|
||||
if !errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want RateLimitError", err)
|
||||
if errors.As(err, &rateLimit) {
|
||||
t.Fatalf("error = %T %[1]v, want non-rate-limit error", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,11 +20,12 @@ import (
|
||||
)
|
||||
|
||||
type Driver struct {
|
||||
id string
|
||||
cookie string
|
||||
rootID string
|
||||
client *sdk.Pan115Client
|
||||
ua string
|
||||
id string
|
||||
cookie string
|
||||
rootID string
|
||||
client *sdk.Pan115Client
|
||||
ua string
|
||||
uploadTempDir string
|
||||
|
||||
listMu sync.Mutex
|
||||
lastListAt time.Time
|
||||
@@ -32,10 +33,11 @@ type Driver struct {
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
ID string
|
||||
Cookie string // 形如 "UID=xxx; CID=xxx; SEID=xxx; KID=xxx"
|
||||
RootID string // 默认 "0"
|
||||
UA string // 默认 UA115Browser
|
||||
ID string
|
||||
Cookie string // 形如 "UID=xxx; CID=xxx; SEID=xxx; KID=xxx"
|
||||
RootID string // 默认 "0"
|
||||
UA string // 默认 UA115Browser
|
||||
UploadTempDir string
|
||||
}
|
||||
|
||||
func New(c Config) *Driver {
|
||||
@@ -48,11 +50,12 @@ func New(c Config) *Driver {
|
||||
ua = sdk.UA115Browser
|
||||
}
|
||||
return &Driver{
|
||||
id: c.ID,
|
||||
cookie: c.Cookie,
|
||||
rootID: rootID,
|
||||
ua: ua,
|
||||
listInterval: 2 * time.Second,
|
||||
id: c.ID,
|
||||
cookie: c.Cookie,
|
||||
rootID: rootID,
|
||||
ua: ua,
|
||||
uploadTempDir: strings.TrimSpace(c.UploadTempDir),
|
||||
listInterval: 2 * time.Second,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,7 +90,7 @@ func (d *Driver) List(ctx context.Context, dirID string) ([]drives.Entry, error)
|
||||
// p115ListCooldown 是列目录触发疑似风控错误时的冷却时长。
|
||||
//
|
||||
// 历史上是 [30min × 3],3 次都失败就放弃;新策略改为 10 分钟无限重试 ——
|
||||
// 只要错误仍属 transient(429 / 405 / WAF / blocked / 安全威胁 / unexpected),
|
||||
// 只要错误仍属明确 HTTP transient 状态(429 / 405),
|
||||
// 就持续等 10 分钟再发一次列目录请求,直到成功或 ctx 取消。这样即使 115
|
||||
// 风控持续较长时间,扫描会自然延后到风控结束,不再丢半棵子树。
|
||||
const p115ListCooldown = 10 * time.Minute
|
||||
@@ -156,17 +159,7 @@ func isTransient115UpstreamError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
text := strings.ToLower(err.Error())
|
||||
return strings.Contains(text, "405") ||
|
||||
strings.Contains(text, "429") ||
|
||||
strings.Contains(text, "too many request") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "blocked") ||
|
||||
strings.Contains(text, "security") ||
|
||||
strings.Contains(text, "waf") ||
|
||||
strings.Contains(text, "unexpected error") ||
|
||||
strings.Contains(text, "访问被阻断") ||
|
||||
strings.Contains(text, "安全威胁")
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusMethodNotAllowed, http.StatusTooManyRequests)
|
||||
}
|
||||
|
||||
// ListDirsOnly 只列指定目录的直接**子目录**,不返回文件条目。专为 admin 后台
|
||||
@@ -357,7 +350,7 @@ func (d *Driver) UploadAndReportSha1(ctx context.Context, parentID, name string,
|
||||
parentID = d.rootID
|
||||
}
|
||||
|
||||
tmp, sha1Hex, written, err := bufferAndHashSha1(r, size)
|
||||
tmp, sha1Hex, written, err := bufferAndHashSha1(d.uploadTempDir, r, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
@@ -482,8 +475,14 @@ func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
// 返回临时文件(位置在末尾,需调用方 Seek 回 0)、SHA1 hex 大写、实际字节数。
|
||||
//
|
||||
// 调用方负责 Close + Remove 临时文件。
|
||||
func bufferAndHashSha1(r io.Reader, declaredSize int64) (*os.File, string, int64, error) {
|
||||
tmp, err := os.CreateTemp("", "p115-upload-*.bin")
|
||||
func bufferAndHashSha1(tempDir string, r io.Reader, declaredSize int64) (*os.File, string, int64, error) {
|
||||
tempDir = strings.TrimSpace(tempDir)
|
||||
if tempDir != "" {
|
||||
if err := os.MkdirAll(tempDir, 0o755); err != nil {
|
||||
return nil, "", 0, fmt.Errorf("p115 upload: create tmp dir: %w", err)
|
||||
}
|
||||
}
|
||||
tmp, err := os.CreateTemp(tempDir, "p115-upload-*.bin")
|
||||
if err != nil {
|
||||
return nil, "", 0, fmt.Errorf("p115 upload: create tmp: %w", err)
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -22,8 +23,9 @@ func TestIsTransient115ListError(t *testing.T) {
|
||||
want bool
|
||||
}{
|
||||
{name: "nil", err: nil, want: false},
|
||||
{name: "blocked html", err: errors.New(`<!doctype html><title>405</title>Sorry, your request has been blocked as it may cause potential threats to the server's security.`), want: true},
|
||||
{name: "chinese waf", err: errors.New("很抱歉,由于您访问的URL有可能对网站造成安全威胁,您的访问被阻断。"), want: true},
|
||||
{name: "blocked html without status context", err: errors.New(`<!doctype html><title>405</title>Sorry, your request has been blocked as it may cause potential threats to the server's security.`), want: false},
|
||||
{name: "chinese waf", err: errors.New("很抱歉,由于您访问的URL有可能对网站造成安全威胁,您的访问被阻断。"), want: false},
|
||||
{name: "status 405", err: errors.New("request failed with status: 405"), want: true},
|
||||
{name: "rate limit", err: errors.New("429 too many requests"), want: true},
|
||||
{name: "regular auth error", err: errors.New("invalid credential"), want: false},
|
||||
}
|
||||
@@ -43,10 +45,10 @@ func TestWrap115StreamTransientError(t *testing.T) {
|
||||
err error
|
||||
wantRateLimit bool
|
||||
}{
|
||||
{name: "unexpected", err: errors.New("unexpected error"), wantRateLimit: true},
|
||||
{name: "unexpected", err: errors.New("unexpected error"), wantRateLimit: false},
|
||||
{name: "405 blocked", err: errors.New("405 request has been blocked"), wantRateLimit: true},
|
||||
{name: "429", err: errors.New("429 too many requests"), wantRateLimit: true},
|
||||
{name: "blocked", err: errors.New("blocked by waf"), wantRateLimit: true},
|
||||
{name: "blocked", err: errors.New("blocked by waf"), wantRateLimit: false},
|
||||
{name: "auth", err: errors.New("invalid credential"), wantRateLimit: false},
|
||||
}
|
||||
|
||||
@@ -85,7 +87,7 @@ func TestBufferAndHashSha1(t *testing.T) {
|
||||
wantHex := strings.ToUpper(hex.EncodeToString(want[:]))
|
||||
|
||||
t.Run("declared size matches", func(t *testing.T) {
|
||||
tmp, gotHex, n, err := bufferAndHashSha1(bytes.NewReader(body), int64(len(body)))
|
||||
tmp, gotHex, n, err := bufferAndHashSha1("", bytes.NewReader(body), int64(len(body)))
|
||||
if err != nil {
|
||||
t.Fatalf("bufferAndHashSha1 returned error: %v", err)
|
||||
}
|
||||
@@ -110,14 +112,14 @@ func TestBufferAndHashSha1(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("declared size mismatch returns error", func(t *testing.T) {
|
||||
_, _, _, err := bufferAndHashSha1(bytes.NewReader(body), int64(len(body))+1)
|
||||
_, _, _, err := bufferAndHashSha1("", bytes.NewReader(body), int64(len(body))+1)
|
||||
if err == nil {
|
||||
t.Fatal("expected size mismatch error, got nil")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("declared size zero is unchecked", func(t *testing.T) {
|
||||
tmp, gotHex, n, err := bufferAndHashSha1(bytes.NewReader(body), 0)
|
||||
tmp, gotHex, n, err := bufferAndHashSha1("", bytes.NewReader(body), 0)
|
||||
if err != nil {
|
||||
t.Fatalf("bufferAndHashSha1 returned error: %v", err)
|
||||
}
|
||||
@@ -129,6 +131,18 @@ func TestBufferAndHashSha1(t *testing.T) {
|
||||
t.Errorf("written = %d, want %d", n, len(body))
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("uses configured temp dir", func(t *testing.T) {
|
||||
tempDir := filepath.Join(t.TempDir(), "upload-tmp")
|
||||
tmp, _, _, err := bufferAndHashSha1(tempDir, bytes.NewReader(body), int64(len(body)))
|
||||
if err != nil {
|
||||
t.Fatalf("bufferAndHashSha1 returned error: %v", err)
|
||||
}
|
||||
defer cleanup(tmp)
|
||||
if gotDir := filepath.Dir(tmp.Name()); gotDir != tempDir {
|
||||
t.Fatalf("tmp dir = %q, want %q", gotDir, tempDir)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestUploadAndReportSha1RejectsInvalidArgs 检查空 reader / 空 name / 负 size 在
|
||||
|
||||
@@ -70,6 +70,7 @@ type Driver struct {
|
||||
httpClient *http.Client
|
||||
|
||||
onTokenUpdate func(access string)
|
||||
uploadTempDir string
|
||||
|
||||
tokenMu sync.RWMutex
|
||||
|
||||
@@ -90,6 +91,7 @@ type Config struct {
|
||||
|
||||
MainAPIBaseURL string
|
||||
LoginAPIBaseURL string
|
||||
UploadTempDir string
|
||||
|
||||
OnTokenUpdate func(access string)
|
||||
}
|
||||
@@ -123,6 +125,7 @@ func New(c Config) *Driver {
|
||||
referer: defaultReferer,
|
||||
userAgent: defaultUserAgent,
|
||||
onTokenUpdate: c.OnTokenUpdate,
|
||||
uploadTempDir: strings.TrimSpace(c.UploadTempDir),
|
||||
client: resty.New().
|
||||
SetTimeout(30*time.Second).
|
||||
SetHeader("Accept", "application/json, text/plain, */*"),
|
||||
@@ -289,7 +292,7 @@ func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string,
|
||||
parentID = d.rootID
|
||||
}
|
||||
|
||||
tmp, md5Hex, actualSize, err := bufferAndHashMD5(r, size)
|
||||
tmp, md5Hex, actualSize, err := bufferAndHashMD5(d.uploadTempDir, r, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
@@ -754,8 +757,8 @@ func (d *Driver) request(ctx context.Context, endpoint, method string, configure
|
||||
return nil, errors.New("123pan request: unauthorized")
|
||||
}
|
||||
|
||||
func isP123RateLimitResponse(res *resty.Response, code int, message string) bool {
|
||||
if code == http.StatusTooManyRequests || isP123RateLimitMessage(message) {
|
||||
func isP123RateLimitResponse(res *resty.Response, code int, _ string) bool {
|
||||
if code == http.StatusTooManyRequests {
|
||||
return true
|
||||
}
|
||||
if res == nil {
|
||||
@@ -764,7 +767,7 @@ func isP123RateLimitResponse(res *resty.Response, code int, message string) bool
|
||||
return isP123RateLimitHTTPResponse(res.StatusCode(), res.Header().Get("Retry-After"), res.String())
|
||||
}
|
||||
|
||||
func isP123RateLimitHTTPResponse(status int, retryAfter, body string) bool {
|
||||
func isP123RateLimitHTTPResponse(status int, retryAfter, _ string) bool {
|
||||
if status == http.StatusTooManyRequests {
|
||||
return true
|
||||
}
|
||||
@@ -774,35 +777,9 @@ func isP123RateLimitHTTPResponse(status int, retryAfter, body string) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
if isP123RateLimitMessage(body) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isP123RateLimitMessage(message string) bool {
|
||||
text := strings.ToLower(strings.TrimSpace(message))
|
||||
if text == "" {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(text, "请求太频繁") ||
|
||||
strings.Contains(text, "请求过于频繁") ||
|
||||
strings.Contains(text, "请求频繁") ||
|
||||
strings.Contains(text, "操作频繁") ||
|
||||
strings.Contains(text, "频率限制") ||
|
||||
strings.Contains(text, "请求次数过多") ||
|
||||
strings.Contains(text, "too many request") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "rate-limit") ||
|
||||
strings.Contains(text, "ratelimit") ||
|
||||
strings.Contains(text, "throttl") ||
|
||||
strings.Contains(text, "temporarily blocked") ||
|
||||
strings.Contains(text, "request has been blocked") ||
|
||||
strings.Contains(text, "blocked") ||
|
||||
strings.Contains(text, "访问被阻断")
|
||||
}
|
||||
|
||||
func p123RateLimitError(res *resty.Response, code int, message string) error {
|
||||
if strings.TrimSpace(message) == "" {
|
||||
message = "123pan rate limited"
|
||||
@@ -1084,8 +1061,14 @@ func splitPath(p string) []string {
|
||||
return strings.Split(p, "/")
|
||||
}
|
||||
|
||||
func bufferAndHashMD5(r io.Reader, declaredSize int64) (*os.File, string, int64, error) {
|
||||
tmp, err := os.CreateTemp("", "p123-upload-*.bin")
|
||||
func bufferAndHashMD5(tempDir string, r io.Reader, declaredSize int64) (*os.File, string, int64, error) {
|
||||
tempDir = strings.TrimSpace(tempDir)
|
||||
if tempDir != "" {
|
||||
if err := os.MkdirAll(tempDir, 0o755); err != nil {
|
||||
return nil, "", 0, fmt.Errorf("123pan upload: create tmp dir: %w", err)
|
||||
}
|
||||
}
|
||||
tmp, err := os.CreateTemp(tempDir, "p123-upload-*.bin")
|
||||
if err != nil {
|
||||
return nil, "", 0, fmt.Errorf("123pan upload: create tmp: %w", err)
|
||||
}
|
||||
|
||||
@@ -11,6 +11,8 @@ import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -458,6 +460,29 @@ func TestUploadPresignedPUT429ReturnsRateLimitError(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestBufferAndHashMD5UsesConfiguredTempDir(t *testing.T) {
|
||||
body := []byte("hello-123-upload-test")
|
||||
tempDir := filepath.Join(t.TempDir(), "upload-tmp")
|
||||
tmp, gotHex, n, err := bufferAndHashMD5(tempDir, bytes.NewReader(body), int64(len(body)))
|
||||
if err != nil {
|
||||
t.Fatalf("bufferAndHashMD5 returned error: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
_ = tmp.Close()
|
||||
_ = os.Remove(tmp.Name())
|
||||
}()
|
||||
if gotDir := filepath.Dir(tmp.Name()); gotDir != tempDir {
|
||||
t.Fatalf("tmp dir = %q, want %q", gotDir, tempDir)
|
||||
}
|
||||
want := md5.Sum(body)
|
||||
if gotHex != fmt.Sprintf("%x", want) {
|
||||
t.Fatalf("md5 = %s, want %x", gotHex, want)
|
||||
}
|
||||
if n != int64(len(body)) {
|
||||
t.Fatalf("written = %d, want %d", n, len(body))
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenameSendsExpectedBody(t *testing.T) {
|
||||
var renameRequest map[string]any
|
||||
api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
@@ -47,6 +47,7 @@ type Driver struct {
|
||||
client *resty.Client
|
||||
onTokenUpdate func(access, refresh, captcha, deviceID string)
|
||||
uploadToOSSFunc func(context.Context, *s3Params, io.Reader) error
|
||||
uploadTempDir string
|
||||
|
||||
// captchaMu serializes captcha-token refreshes triggered by 4002 / 9
|
||||
// recovery in requestOnce. Without it, N concurrent callers all hitting
|
||||
@@ -77,6 +78,7 @@ type Config struct {
|
||||
DeviceID string
|
||||
RootID string
|
||||
DisableMediaLink bool
|
||||
UploadTempDir string
|
||||
OnTokenUpdate func(access, refresh, captcha, deviceID string)
|
||||
}
|
||||
|
||||
@@ -109,6 +111,7 @@ func New(c Config) *Driver {
|
||||
deviceID: deviceID,
|
||||
disableMediaLink: c.DisableMediaLink,
|
||||
onTokenUpdate: c.OnTokenUpdate,
|
||||
uploadTempDir: strings.TrimSpace(c.UploadTempDir),
|
||||
client: resty.New().
|
||||
SetTimeout(30*time.Second).
|
||||
SetHeader("Accept", "application/json, text/plain, */*"),
|
||||
@@ -175,8 +178,8 @@ func (d *Driver) List(ctx context.Context, dirID string) ([]drives.Entry, error)
|
||||
|
||||
// pikpakListCooldown 是列目录触发疑似限流错误时的冷却时长。
|
||||
//
|
||||
// 与 p115 driver 的 listCooldown 同语义:只要错误属 transient
|
||||
// (error_code=10 / HTTP 429 / 5xx / 通用 "rate limit" 文本),就持续
|
||||
// 与 p115 driver 的 listCooldown 同语义:只要错误属明确限流/临时状态
|
||||
// (结构化 error_code=10 / HTTP 429 / 5xx),就持续
|
||||
// 等 10 分钟再发一次列目录请求,直到成功或 ctx 取消。这样即使 PikPak
|
||||
// 风控持续较长时间,扫描会自然延后到风控结束,不再丢半棵子树。
|
||||
const pikpakListCooldown = 10 * time.Minute
|
||||
@@ -242,7 +245,6 @@ func pikpakSleepContext(ctx context.Context, d time.Duration) error {
|
||||
//
|
||||
// - PikPak 业务码 error_code=10 ("操作频繁",见 OpenList drivers/pikpak/util.go)
|
||||
// - HTTP 429 / 500 / 502 / 503 / 504 / 509(rclone 也把这些归为 retry)
|
||||
// - 通用文本:rate limit / too many requests / blocked / temporarily unavailable
|
||||
//
|
||||
// 不包含 4122/4121/16(access_token 过期)和 9/4002(captcha 过期)—— 这些
|
||||
// 由 requestOnce 内部已经做过一次自动恢复重试;如果恢复后仍然报这类错误,
|
||||
@@ -259,22 +261,14 @@ func isTransientPikPakListError(err error) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
text := strings.ToLower(err.Error())
|
||||
return strings.Contains(text, "error_code=10") ||
|
||||
strings.Contains(text, "429") ||
|
||||
strings.Contains(text, "http 500") ||
|
||||
strings.Contains(text, "http 502") ||
|
||||
strings.Contains(text, "http 503") ||
|
||||
strings.Contains(text, "http 504") ||
|
||||
strings.Contains(text, "http 509") ||
|
||||
strings.Contains(text, "too many request") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "operation frequent") ||
|
||||
strings.Contains(text, "操作频繁") ||
|
||||
strings.Contains(text, "blocked") ||
|
||||
strings.Contains(text, "temporarily unavailable") ||
|
||||
strings.Contains(text, "service unavailable")
|
||||
return drives.ErrorMentionsHTTPStatus(err,
|
||||
http.StatusTooManyRequests,
|
||||
http.StatusInternalServerError,
|
||||
http.StatusBadGateway,
|
||||
http.StatusServiceUnavailable,
|
||||
http.StatusGatewayTimeout,
|
||||
509,
|
||||
)
|
||||
}
|
||||
|
||||
func (d *Driver) Stat(ctx context.Context, fileID string) (*drives.Entry, error) {
|
||||
|
||||
@@ -110,7 +110,7 @@ func TestEnsureDirReusesExistingFolder(t *testing.T) {
|
||||
"files": []map[string]any{{
|
||||
"id": "existing-folder-id",
|
||||
"kind": "drive#folder",
|
||||
"name": "91 Spider",
|
||||
"name": "Crawler Uploads",
|
||||
}},
|
||||
})
|
||||
case http.MethodPost:
|
||||
@@ -124,7 +124,7 @@ func TestEnsureDirReusesExistingFolder(t *testing.T) {
|
||||
defer srv.Close()
|
||||
|
||||
d := newTestDriver(t, srv)
|
||||
got, err := d.EnsureDir(context.Background(), "91 Spider")
|
||||
got, err := d.EnsureDir(context.Background(), "Crawler Uploads")
|
||||
if err != nil {
|
||||
t.Fatalf("ensure dir: %v", err)
|
||||
}
|
||||
@@ -150,7 +150,7 @@ func TestEnsureDirCreatesMissingFolder(t *testing.T) {
|
||||
writePikPakJSON(t, w, map[string]any{
|
||||
"id": "new-folder-id",
|
||||
"kind": "drive#folder",
|
||||
"name": "91 Spider",
|
||||
"name": "Crawler Uploads",
|
||||
})
|
||||
default:
|
||||
t.Fatalf("unexpected method %s", r.Method)
|
||||
@@ -160,14 +160,14 @@ func TestEnsureDirCreatesMissingFolder(t *testing.T) {
|
||||
defer srv.Close()
|
||||
|
||||
d := newTestDriver(t, srv)
|
||||
id, err := d.EnsureDir(context.Background(), "91 Spider")
|
||||
id, err := d.EnsureDir(context.Background(), "Crawler Uploads")
|
||||
if err != nil {
|
||||
t.Fatalf("ensure dir: %v", err)
|
||||
}
|
||||
if id != "new-folder-id" {
|
||||
t.Fatalf("dir id = %q, want new-folder-id", id)
|
||||
}
|
||||
if got.Kind != "drive#folder" || got.ParentID != "root-id" || got.Name != "91 Spider" {
|
||||
if got.Kind != "drive#folder" || got.ParentID != "root-id" || got.Name != "Crawler Uploads" {
|
||||
t.Fatalf("create folder body = %#v", got)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -64,7 +64,7 @@ func isCaptchaTokenRejectedCode(code int64) bool {
|
||||
}
|
||||
|
||||
// APIError is the public alias for the PikPak API error response. Callers
|
||||
// outside this package (e.g. the spider91→PikPak migrator, tests) can either
|
||||
// outside this package (e.g. crawler upload workers and tests) can either
|
||||
// construct it for fakes or unwrap it via errors.As. Prefer IsCaptchaError
|
||||
// over hard-coding the numeric error codes.
|
||||
type APIError = errResp
|
||||
|
||||
@@ -39,8 +39,7 @@ const (
|
||||
ossSecurityTokenHeaderName = "X-OSS-Security-Token"
|
||||
ossUserAgent = "aliyun-sdk-android/2.9.13(Linux/Android 14/M2004j7ac;UKQ1.231108.001)"
|
||||
// 单次 PutObject 的硬上限(OSS 文档限制 5GiB;保守用 5GiB-1)。
|
||||
// spider91 视频通常 ~100MiB,远低于该值。超过则需走 multipart,
|
||||
// 当前未实现,遇到会显式报错。
|
||||
// 超过该值需走 multipart;当前未实现,遇到会显式报错。
|
||||
maxSinglePutSize = 5*1024*1024*1024 - 1
|
||||
// 首次上传失败后最多再重试 3 次。每次重试都会重新申请 PikPak
|
||||
// upload session,以避开偶发不可解析/不可达的临时上传 endpoint。
|
||||
@@ -79,6 +78,20 @@ type UploadResult struct {
|
||||
Size int64
|
||||
}
|
||||
|
||||
type preparedUploadBody struct {
|
||||
reader io.ReadSeeker
|
||||
start int64
|
||||
cleanup func()
|
||||
}
|
||||
|
||||
func (b preparedUploadBody) rewind() error {
|
||||
if b.reader == nil {
|
||||
return errors.New("pikpak upload: nil upload body")
|
||||
}
|
||||
_, err := b.reader.Seek(b.start, io.SeekStart)
|
||||
return err
|
||||
}
|
||||
|
||||
// Upload 实现 drives.Drive 接口;只返回 fileID。
|
||||
// 完整上传元数据见 UploadAndReportHash。
|
||||
func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader, size int64) (string, error) {
|
||||
@@ -91,7 +104,7 @@ func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader,
|
||||
|
||||
// UploadAndReportHash 上传并返回 file ID + GCID + 实际字节数。
|
||||
//
|
||||
// 用于 spider91 → PikPak 迁移 worker:上传完后直接把 hash 写回 catalog
|
||||
// 用于 crawler upload worker:上传完后直接把 hash 写回 catalog
|
||||
// 的 content_hash 字段,避免再读一次本地文件做 hash。
|
||||
//
|
||||
// 参数:
|
||||
@@ -104,8 +117,7 @@ func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader,
|
||||
// - 必须先算 GCID 再申请上传会话(PikPak API 要求 hash 字段),
|
||||
// 所以这里先 io.Copy 到临时文件并同步算 GCID。
|
||||
// - 命中秒传时不发任何字节;否则用 OSS PutObject 上传。
|
||||
// - 单次 PutObject 上限保守用 5GiB-1。spider91 视频远小于此值,
|
||||
// 超出该值会报错(暂不实现 multipart)。
|
||||
// - 单次 PutObject 上限保守用 5GiB-1,超出该值会报错(暂不实现 multipart)。
|
||||
func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string, r io.Reader, size int64) (UploadResult, error) {
|
||||
if r == nil {
|
||||
return UploadResult{}, errors.New("pikpak upload: nil reader")
|
||||
@@ -125,15 +137,15 @@ func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string,
|
||||
parentID = d.rootID
|
||||
}
|
||||
|
||||
// 1) 把 r 全量缓冲到临时文件,同时算 GCID。
|
||||
tmp, gcidHex, actualSize, err := bufferAndHashGCID(r, size)
|
||||
// 1) 算 GCID,并准备一个可重试读取的 body。爬虫迁移传入的是
|
||||
// *os.File,可直接复用原文件,避免再占用一份视频大小的临时空间。
|
||||
body, gcidHex, actualSize, err := d.prepareUploadBody(r, size)
|
||||
if err != nil {
|
||||
return UploadResult{}, err
|
||||
}
|
||||
defer func() {
|
||||
_ = tmp.Close()
|
||||
_ = os.Remove(tmp.Name())
|
||||
}()
|
||||
if body.cleanup != nil {
|
||||
defer body.cleanup()
|
||||
}
|
||||
|
||||
result := UploadResult{Hash: gcidHex, Size: actualSize}
|
||||
var lastErr error
|
||||
@@ -155,7 +167,7 @@ func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string,
|
||||
continue
|
||||
}
|
||||
|
||||
out, err := d.completeUploadAttempt(ctx, tmp, parentID, name, result, resp)
|
||||
out, err := d.completeUploadAttempt(ctx, body, parentID, name, result, resp)
|
||||
if err == nil {
|
||||
return out, nil
|
||||
}
|
||||
@@ -190,7 +202,7 @@ func (d *Driver) requestUploadSession(ctx context.Context, parentID, name string
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (d *Driver) completeUploadAttempt(ctx context.Context, tmp *os.File, parentID, name string, result UploadResult, resp uploadTaskData) (UploadResult, error) {
|
||||
func (d *Driver) completeUploadAttempt(ctx context.Context, body preparedUploadBody, parentID, name string, result UploadResult, resp uploadTaskData) (UploadResult, error) {
|
||||
// 命中秒传:服务端已经知道这个 hash,直接返回新文件 ID。
|
||||
if resp.Resumable == nil {
|
||||
if resp.File.ID != "" {
|
||||
@@ -207,10 +219,10 @@ func (d *Driver) completeUploadAttempt(ctx context.Context, tmp *os.File, parent
|
||||
}
|
||||
|
||||
// 未命中秒传:把字节传到 S3 兼容存储。
|
||||
if _, err := tmp.Seek(0, io.SeekStart); err != nil {
|
||||
return UploadResult{}, fmt.Errorf("pikpak upload: seek tmp: %w", err)
|
||||
if err := body.rewind(); err != nil {
|
||||
return UploadResult{}, fmt.Errorf("pikpak upload: rewind body: %w", err)
|
||||
}
|
||||
if err := d.uploadToOSS(ctx, &resp.Resumable.Params, tmp); err != nil {
|
||||
if err := d.uploadToOSS(ctx, &resp.Resumable.Params, body.reader); err != nil {
|
||||
return UploadResult{}, fmt.Errorf("pikpak upload: oss put: %w", err)
|
||||
}
|
||||
|
||||
@@ -279,12 +291,62 @@ func isRetryablePikPakUploadError(err error) bool {
|
||||
strings.Contains(text, "service unavailable")
|
||||
}
|
||||
|
||||
func (d *Driver) prepareUploadBody(r io.Reader, size int64) (preparedUploadBody, string, int64, error) {
|
||||
if rs, ok := r.(io.ReadSeeker); ok {
|
||||
gcidHex, actualSize, start, err := hashGCIDFromReadSeeker(rs, size)
|
||||
if err != nil {
|
||||
return preparedUploadBody{}, "", 0, err
|
||||
}
|
||||
return preparedUploadBody{reader: rs, start: start, cleanup: func() {}}, gcidHex, actualSize, nil
|
||||
}
|
||||
|
||||
tmp, gcidHex, actualSize, err := bufferAndHashGCID(d.uploadTempDir, r, size)
|
||||
if err != nil {
|
||||
return preparedUploadBody{}, "", 0, err
|
||||
}
|
||||
return preparedUploadBody{
|
||||
reader: tmp,
|
||||
start: 0,
|
||||
cleanup: func() {
|
||||
_ = tmp.Close()
|
||||
_ = os.Remove(tmp.Name())
|
||||
},
|
||||
}, gcidHex, actualSize, nil
|
||||
}
|
||||
|
||||
func hashGCIDFromReadSeeker(r io.ReadSeeker, size int64) (string, int64, int64, error) {
|
||||
start, err := r.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return "", 0, 0, fmt.Errorf("pikpak upload: seek body: %w", err)
|
||||
}
|
||||
|
||||
h := NewGCID(size)
|
||||
written, copyErr := io.Copy(h, r)
|
||||
_, seekErr := r.Seek(start, io.SeekStart)
|
||||
if copyErr != nil {
|
||||
return "", 0, start, fmt.Errorf("pikpak upload: hash body: %w", copyErr)
|
||||
}
|
||||
if seekErr != nil {
|
||||
return "", 0, start, fmt.Errorf("pikpak upload: rewind body: %w", seekErr)
|
||||
}
|
||||
if size > 0 && written != size {
|
||||
return "", 0, start, fmt.Errorf("pikpak upload: size mismatch: declared %d, copied %d", size, written)
|
||||
}
|
||||
return strings.ToUpper(hex.EncodeToString(h.Sum(nil))), written, start, nil
|
||||
}
|
||||
|
||||
// bufferAndHashGCID 把 r 复制到一个临时文件,同时计算 GCID。
|
||||
// 返回临时文件(位置在末尾,需要调用方 Seek 回 0)、GCID hex 大写、实际写入字节数。
|
||||
// 返回临时文件(位置在末尾,需要调用方 Seek 回 start)、GCID hex 大写、实际写入字节数。
|
||||
//
|
||||
// 调用方负责 Close + Remove 临时文件。
|
||||
func bufferAndHashGCID(r io.Reader, size int64) (*os.File, string, int64, error) {
|
||||
tmp, err := os.CreateTemp("", "pikpak-upload-*.bin")
|
||||
func bufferAndHashGCID(tempDir string, r io.Reader, size int64) (*os.File, string, int64, error) {
|
||||
tempDir = strings.TrimSpace(tempDir)
|
||||
if tempDir != "" {
|
||||
if err := os.MkdirAll(tempDir, 0o755); err != nil {
|
||||
return nil, "", 0, fmt.Errorf("pikpak upload: create tmp dir: %w", err)
|
||||
}
|
||||
}
|
||||
tmp, err := os.CreateTemp(tempDir, "pikpak-upload-*.bin")
|
||||
if err != nil {
|
||||
return nil, "", 0, fmt.Errorf("pikpak upload: create tmp: %w", err)
|
||||
}
|
||||
|
||||
@@ -11,6 +11,8 @@ import (
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
@@ -142,6 +144,80 @@ func TestUploadInstantSuccessReturnsFileID(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadUsesReadSeekerWithoutTempCopy(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/drive/v1/files", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{
|
||||
"upload_type": "UPLOAD_TYPE_RESUMABLE",
|
||||
"resumable": null,
|
||||
"file": {"id": "instant-file-id", "name": "test.mp4", "kind": "drive#file"}
|
||||
}`))
|
||||
})
|
||||
server := httptest.NewServer(mux)
|
||||
defer server.Close()
|
||||
|
||||
d := newTestDriver(t, server)
|
||||
uploadTempDir := filepath.Join(t.TempDir(), "upload-tmp")
|
||||
d.uploadTempDir = uploadTempDir
|
||||
|
||||
data := bytes.Repeat([]byte{0x31}, 1024)
|
||||
path := filepath.Join(t.TempDir(), "video.bin")
|
||||
if err := os.WriteFile(path, data, 0o644); err != nil {
|
||||
t.Fatalf("write source: %v", err)
|
||||
}
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
t.Fatalf("open source: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
id, err := d.Upload(context.Background(), "parent-id", "test.mp4", f, int64(len(data)))
|
||||
if err != nil {
|
||||
t.Fatalf("upload: %v", err)
|
||||
}
|
||||
if id != "instant-file-id" {
|
||||
t.Fatalf("file id = %q, want instant-file-id", id)
|
||||
}
|
||||
if _, err := os.Stat(uploadTempDir); !os.IsNotExist(err) {
|
||||
t.Fatalf("upload temp dir stat err = %v, want not created for read seeker input", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadBuffersNonSeekReaderInConfiguredTempDir(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/drive/v1/files", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{
|
||||
"upload_type": "UPLOAD_TYPE_RESUMABLE",
|
||||
"resumable": null,
|
||||
"file": {"id": "instant-file-id", "name": "test.mp4", "kind": "drive#file"}
|
||||
}`))
|
||||
})
|
||||
server := httptest.NewServer(mux)
|
||||
defer server.Close()
|
||||
|
||||
d := newTestDriver(t, server)
|
||||
uploadTempDir := filepath.Join(t.TempDir(), "upload-tmp")
|
||||
d.uploadTempDir = uploadTempDir
|
||||
|
||||
data := bytes.Repeat([]byte{0x42}, 1024)
|
||||
id, err := d.Upload(context.Background(), "parent-id", "test.mp4", bytes.NewBuffer(data), int64(len(data)))
|
||||
if err != nil {
|
||||
t.Fatalf("upload: %v", err)
|
||||
}
|
||||
if id != "instant-file-id" {
|
||||
t.Fatalf("file id = %q, want instant-file-id", id)
|
||||
}
|
||||
entries, err := os.ReadDir(uploadTempDir)
|
||||
if err != nil {
|
||||
t.Fatalf("read upload temp dir: %v", err)
|
||||
}
|
||||
if len(entries) != 0 {
|
||||
t.Fatalf("upload temp dir entries = %d, want cleaned", len(entries))
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadInstantSuccessFallsBackToListWhenFileIDMissing(t *testing.T) {
|
||||
listCalled := false
|
||||
mux := http.NewServeMux()
|
||||
@@ -304,7 +380,7 @@ func TestUploadRejectsInvalidArguments(t *testing.T) {
|
||||
func TestBufferAndHashGCIDDetectsSizeMismatch(t *testing.T) {
|
||||
src := bytes.NewReader([]byte("hello"))
|
||||
// 声明 size=10 但实际只有 5 字节
|
||||
_, _, _, err := bufferAndHashGCID(src, 10)
|
||||
_, _, _, err := bufferAndHashGCID("", src, 10)
|
||||
if err == nil {
|
||||
t.Fatal("expected size mismatch error")
|
||||
}
|
||||
@@ -315,7 +391,7 @@ func TestBufferAndHashGCIDDetectsSizeMismatch(t *testing.T) {
|
||||
|
||||
func TestBufferAndHashGCIDComputesCorrectHash(t *testing.T) {
|
||||
data := bytes.Repeat([]byte{0x55}, 1024)
|
||||
tmp, hex, written, err := bufferAndHashGCID(bytes.NewReader(data), int64(len(data)))
|
||||
tmp, hex, written, err := bufferAndHashGCID("", bytes.NewReader(data), int64(len(data)))
|
||||
if err != nil {
|
||||
t.Fatalf("buffer: %v", err)
|
||||
}
|
||||
|
||||
@@ -41,7 +41,6 @@ type CrawlerConfig struct {
|
||||
Driver *Driver
|
||||
Catalog *catalog.Catalog
|
||||
CrawlerName string
|
||||
SourceKind string
|
||||
PythonPath string
|
||||
FFmpegPath string
|
||||
FFprobePath string
|
||||
@@ -50,6 +49,7 @@ type CrawlerConfig struct {
|
||||
CommonThumbDir string
|
||||
ProxyURL string
|
||||
ConfigJSON string
|
||||
DisablePreview bool
|
||||
HTTPClient *http.Client
|
||||
DownloadTimeout time.Duration
|
||||
OnProgress func(CrawlProgress)
|
||||
@@ -144,7 +144,6 @@ type Event struct {
|
||||
DetailURL string `json:"detail_url,omitempty"`
|
||||
Author string `json:"author,omitempty"`
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
Category string `json:"category,omitempty"`
|
||||
Quality string `json:"quality,omitempty"`
|
||||
DurationSeconds int `json:"duration_seconds,omitempty"`
|
||||
Description string `json:"description,omitempty"`
|
||||
@@ -168,7 +167,6 @@ type Item struct {
|
||||
DetailURL string `json:"detail_url,omitempty"`
|
||||
Author string `json:"author,omitempty"`
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
Category string `json:"category,omitempty"`
|
||||
Quality string `json:"quality,omitempty"`
|
||||
DurationSeconds int `json:"duration_seconds,omitempty"`
|
||||
Description string `json:"description,omitempty"`
|
||||
@@ -215,9 +213,6 @@ func (e Event) normalizedItem() Item {
|
||||
if len(item.Tags) == 0 && len(e.Tags) > 0 {
|
||||
item.Tags = e.Tags
|
||||
}
|
||||
if strings.TrimSpace(item.Category) == "" {
|
||||
item.Category = e.Category
|
||||
}
|
||||
if strings.TrimSpace(item.Quality) == "" {
|
||||
item.Quality = e.Quality
|
||||
}
|
||||
@@ -392,7 +387,7 @@ func (c *Crawler) RunOnce(ctx context.Context, targetNew int) (*CrawlResult, err
|
||||
}
|
||||
|
||||
func (c *Crawler) writeSeenSourceIDs(ctx context.Context, path string) (int, error) {
|
||||
seenIDs, err := c.cfg.Catalog.ListCrawlerSourceIDs(ctx, c.sourceKind(), c.cfg.Driver.ID())
|
||||
seenIDs, err := c.cfg.Catalog.ListCrawlerSourceIDs(ctx, Kind, c.cfg.Driver.ID())
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
@@ -513,8 +508,7 @@ func (c *Crawler) processItem(ctx context.Context, item Item) (bool, error) {
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
sourceKind := c.sourceKind()
|
||||
videoID := BuildVideoIDForKind(sourceKind, c.cfg.Driver.ID(), sourceID)
|
||||
videoID := BuildVideoID(c.cfg.Driver.ID(), sourceID)
|
||||
if deleted, err := c.cfg.Catalog.IsVideoDeleted(ctx, videoID); err != nil {
|
||||
return false, err
|
||||
} else if deleted {
|
||||
@@ -562,6 +556,10 @@ func (c *Crawler) processItem(ctx context.Context, item Item) (bool, error) {
|
||||
if quality == "" {
|
||||
quality = "HD"
|
||||
}
|
||||
previewStatus := "pending"
|
||||
if c.previewDisabled(ctx) {
|
||||
previewStatus = "disabled"
|
||||
}
|
||||
v := &catalog.Video{
|
||||
ID: videoID,
|
||||
DriveID: c.cfg.Driver.ID(),
|
||||
@@ -574,9 +572,8 @@ func (c *Crawler) processItem(ctx context.Context, item Item) (bool, error) {
|
||||
Size: size,
|
||||
Ext: strings.TrimPrefix(videoExt, "."),
|
||||
Quality: quality,
|
||||
Category: strings.TrimSpace(item.Category),
|
||||
Description: strings.TrimSpace(item.Description),
|
||||
PreviewStatus: "pending",
|
||||
PreviewStatus: previewStatus,
|
||||
PublishedAt: publishedAt,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -588,9 +585,9 @@ func (c *Crawler) processItem(ctx context.Context, item Item) (bool, error) {
|
||||
}
|
||||
v.SampledSHA256 = sampled
|
||||
v.FingerprintStatus = "ready"
|
||||
if duplicate, err := c.cfg.Catalog.FindEquivalentVideo(ctx, v); err == nil && duplicate != nil {
|
||||
if duplicate, err := c.cfg.Catalog.FindVideoBySampledFingerprint(ctx, v); err == nil && duplicate != nil {
|
||||
_ = os.Remove(videoPath)
|
||||
if markErr := c.cfg.Catalog.MarkCrawlerSourceSeen(ctx, sourceKind, c.cfg.Driver.ID(), sourceID, "duplicate", duplicate.ID, sampled, size); markErr != nil {
|
||||
if markErr := c.cfg.Catalog.MarkCrawlerSourceSeen(ctx, Kind, c.cfg.Driver.ID(), sourceID, "duplicate", duplicate.ID, sampled, size); markErr != nil {
|
||||
log.Printf("[scriptcrawler] drive=%s source_id=%s mark duplicate seen: %v", c.cfg.Driver.ID(), sourceID, markErr)
|
||||
}
|
||||
log.Printf("[scriptcrawler] drive=%s source_id=%s duplicate_of=%s title=%q size=%d", c.cfg.Driver.ID(), sourceID, duplicate.ID, title, size)
|
||||
@@ -601,19 +598,25 @@ func (c *Crawler) processItem(ctx context.Context, item Item) (bool, error) {
|
||||
}
|
||||
|
||||
thumbReady := false
|
||||
thumbPath := ""
|
||||
commonThumbPath := ""
|
||||
if item.Thumbnail.URL != "" || item.Thumbnail.LocalFile != "" {
|
||||
thumbFile := sourceID + detectThumbExt(item.Thumbnail.URL, item.Thumbnail.LocalFile)
|
||||
thumbPath, err := c.cfg.Driver.ThumbPath(thumbFile)
|
||||
thumbPath, err = c.cfg.Driver.ThumbPath(thumbFile)
|
||||
if err == nil {
|
||||
if _, err := c.materializeMedia(ctx, item.Thumbnail, thumbPath, item.DetailURL, false); err != nil {
|
||||
log.Printf("[scriptcrawler] drive=%s source_id=%s thumbnail failed: %v", c.cfg.Driver.ID(), sourceID, err)
|
||||
} else if c.cfg.CommonThumbDir != "" {
|
||||
if err := os.MkdirAll(c.cfg.CommonThumbDir, 0o755); err != nil {
|
||||
log.Printf("[scriptcrawler] drive=%s common thumbs mkdir: %v", c.cfg.Driver.ID(), err)
|
||||
} else if err := copyFileAtomic(thumbPath, mediaasset.ThumbnailPathInDir(c.cfg.CommonThumbDir, videoID)); err != nil {
|
||||
log.Printf("[scriptcrawler] drive=%s source_id=%s copy thumbnail: %v", c.cfg.Driver.ID(), sourceID, err)
|
||||
} else {
|
||||
thumbReady = true
|
||||
dst := mediaasset.ThumbnailPathInDir(c.cfg.CommonThumbDir, videoID)
|
||||
if err := copyFileAtomic(thumbPath, dst); err != nil {
|
||||
log.Printf("[scriptcrawler] drive=%s source_id=%s copy thumbnail: %v", c.cfg.Driver.ID(), sourceID, err)
|
||||
} else {
|
||||
commonThumbPath = dst
|
||||
thumbReady = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -621,17 +624,66 @@ func (c *Crawler) processItem(ctx context.Context, item Item) (bool, error) {
|
||||
if thumbReady {
|
||||
v.ThumbnailURL = "/p/thumb/" + v.ID
|
||||
}
|
||||
if duplicate, err := c.findNearDuplicateVideo(ctx, v, commonThumbPath); err != nil {
|
||||
_ = os.Remove(videoPath)
|
||||
if thumbPath != "" {
|
||||
_ = os.Remove(thumbPath)
|
||||
}
|
||||
if commonThumbPath != "" {
|
||||
_ = os.Remove(commonThumbPath)
|
||||
}
|
||||
return false, fmt.Errorf("near duplicate lookup: %w", err)
|
||||
} else if duplicate != nil && duplicate.video != nil {
|
||||
if v.Size > duplicate.video.Size {
|
||||
if err := c.cfg.Catalog.DeleteVideoWithTombstoneReason(ctx, duplicate.video.ID, catalog.DeletedVideoReasonDuplicate); err != nil {
|
||||
_ = os.Remove(videoPath)
|
||||
if thumbPath != "" {
|
||||
_ = os.Remove(thumbPath)
|
||||
}
|
||||
if commonThumbPath != "" {
|
||||
_ = os.Remove(commonThumbPath)
|
||||
}
|
||||
return false, fmt.Errorf("delete smaller near duplicate %s: %w", duplicate.video.ID, err)
|
||||
}
|
||||
log.Printf("[scriptcrawler] drive=%s source_id=%s replacing_smaller_near_duplicate=%s old_size=%d new_size=%d title_similarity=%.3f thumbnail_ssim=%.3f title=%q duration=%d", c.cfg.Driver.ID(), sourceID, duplicate.video.ID, duplicate.video.Size, v.Size, duplicate.titleSimilarity, duplicate.thumbnailSSIM, title, v.DurationSeconds)
|
||||
} else {
|
||||
_ = os.Remove(videoPath)
|
||||
if thumbPath != "" {
|
||||
_ = os.Remove(thumbPath)
|
||||
}
|
||||
if commonThumbPath != "" {
|
||||
_ = os.Remove(commonThumbPath)
|
||||
}
|
||||
if markErr := c.cfg.Catalog.MarkCrawlerSourceSeen(ctx, Kind, c.cfg.Driver.ID(), sourceID, "duplicate", duplicate.video.ID, sampled, size); markErr != nil {
|
||||
log.Printf("[scriptcrawler] drive=%s source_id=%s mark near duplicate seen: %v", c.cfg.Driver.ID(), sourceID, markErr)
|
||||
}
|
||||
log.Printf("[scriptcrawler] drive=%s source_id=%s near_duplicate_of=%s old_size=%d new_size=%d title_similarity=%.3f thumbnail_ssim=%.3f title=%q duration=%d", c.cfg.Driver.ID(), sourceID, duplicate.video.ID, duplicate.video.Size, v.Size, duplicate.titleSimilarity, duplicate.thumbnailSSIM, title, v.DurationSeconds)
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
if err := c.cfg.Catalog.UpsertVideo(ctx, v); err != nil {
|
||||
_ = os.Remove(videoPath)
|
||||
return false, err
|
||||
}
|
||||
if err := c.cfg.Catalog.MarkCrawlerSourceSeen(ctx, sourceKind, c.cfg.Driver.ID(), sourceID, "imported", v.ID, sampled, size); err != nil {
|
||||
if err := c.cfg.Catalog.MarkCrawlerSourceSeen(ctx, Kind, c.cfg.Driver.ID(), sourceID, "imported", v.ID, sampled, size); err != nil {
|
||||
log.Printf("[scriptcrawler] drive=%s source_id=%s mark imported seen: %v", c.cfg.Driver.ID(), sourceID, err)
|
||||
}
|
||||
log.Printf("[scriptcrawler] drive=%s source_id=%s ok title=%q size=%d", c.cfg.Driver.ID(), sourceID, title, size)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (c *Crawler) previewDisabled(ctx context.Context) bool {
|
||||
if c == nil {
|
||||
return false
|
||||
}
|
||||
if c.cfg.Catalog != nil && c.cfg.Driver != nil {
|
||||
if d, err := c.cfg.Catalog.GetDrive(ctx, c.cfg.Driver.ID()); err == nil && d != nil {
|
||||
return !d.TeaserEnabled
|
||||
}
|
||||
}
|
||||
return c.cfg.DisablePreview
|
||||
}
|
||||
|
||||
func (c *Crawler) materializeMedia(ctx context.Context, ref MediaRef, dst, referer string, required bool) (int64, error) {
|
||||
if local := strings.TrimSpace(ref.LocalFile); local != "" {
|
||||
return c.copyLocalOutput(local, dst)
|
||||
@@ -783,6 +835,10 @@ func (c *Crawler) downloadHLSAtomic(ctx context.Context, ref MediaRef, dst, refe
|
||||
args = append(args, "-headers", h)
|
||||
}
|
||||
args = append(args,
|
||||
"-protocol_whitelist", "http,https,tcp,tls,crypto",
|
||||
"-allowed_extensions", "ALL",
|
||||
"-allowed_segment_extensions", "ALL",
|
||||
"-extension_picky", "0",
|
||||
"-i", src,
|
||||
"-c", "copy",
|
||||
"-bsf:a", "aac_adtstoasc",
|
||||
@@ -979,7 +1035,6 @@ func normalizeItemForImport(item Item) (Item, string, error) {
|
||||
}
|
||||
item.DetailURL = strings.TrimSpace(item.DetailURL)
|
||||
item.Author = strings.TrimSpace(item.Author)
|
||||
item.Category = strings.TrimSpace(item.Category)
|
||||
item.Quality = strings.TrimSpace(item.Quality)
|
||||
item.Description = strings.TrimSpace(item.Description)
|
||||
item.PublishedAt = strings.TrimSpace(item.PublishedAt)
|
||||
@@ -1084,16 +1139,6 @@ func stableURLKey(raw string) string {
|
||||
return u.String()
|
||||
}
|
||||
|
||||
func (c *Crawler) sourceKind() string {
|
||||
if c == nil {
|
||||
return Kind
|
||||
}
|
||||
if v := strings.TrimSpace(c.cfg.SourceKind); v != "" {
|
||||
return v
|
||||
}
|
||||
return Kind
|
||||
}
|
||||
|
||||
func (c *Crawler) crawlerTagName() string {
|
||||
if c == nil {
|
||||
return ""
|
||||
@@ -1125,14 +1170,7 @@ func candidateBudgetForTarget(targetNew int) int {
|
||||
}
|
||||
|
||||
func BuildVideoID(driveID, sourceID string) string {
|
||||
return BuildVideoIDForKind(Kind, driveID, sourceID)
|
||||
}
|
||||
|
||||
func BuildVideoIDForKind(kind, driveID, sourceID string) string {
|
||||
if kind = strings.TrimSpace(kind); kind == "" {
|
||||
kind = Kind
|
||||
}
|
||||
return kind + "-" + driveID + "-" + sourceID
|
||||
return Kind + "-" + driveID + "-" + sourceID
|
||||
}
|
||||
|
||||
func detectVideoExt(rawURL, localFile string) string {
|
||||
|
||||
@@ -4,6 +4,9 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"image"
|
||||
"image/color"
|
||||
"image/jpeg"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
@@ -14,6 +17,7 @@ import (
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/fingerprint"
|
||||
"github.com/video-site/backend/internal/mediaasset"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -39,13 +43,31 @@ func writeScriptCrawlerFFprobeStub(t *testing.T, dir string, ok bool) string {
|
||||
func writeScriptCrawlerFFmpegStub(t *testing.T, dir string) string {
|
||||
t.Helper()
|
||||
path := filepath.Join(dir, "ffmpeg-hls.sh")
|
||||
body := "#!/bin/sh\nout=\"\"\nfor arg do out=\"$arg\"; done\nprintf 'hls-video-bytes' > \"$out\"\n"
|
||||
body := "#!/bin/sh\nif [ -n \"$GO_SCRIPTCRAWLER_FFMPEG_ARGS_FILE\" ]; then printf '%s\\n' \"$@\" > \"$GO_SCRIPTCRAWLER_FFMPEG_ARGS_FILE\"; fi\nout=\"\"\nfor arg do out=\"$arg\"; done\nprintf 'hls-video-bytes' > \"$out\"\n"
|
||||
if err := os.WriteFile(path, []byte(body), 0o755); err != nil {
|
||||
t.Fatalf("write ffmpeg stub: %v", err)
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
func writeScriptCrawlerJPEG(t *testing.T, path string, c color.RGBA) {
|
||||
t.Helper()
|
||||
img := image.NewRGBA(image.Rect(0, 0, 48, 48))
|
||||
for y := 0; y < 48; y++ {
|
||||
for x := 0; x < 48; x++ {
|
||||
img.SetRGBA(x, y, c)
|
||||
}
|
||||
}
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
t.Fatalf("create jpeg: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
if err := jpeg.Encode(f, img, &jpeg.Options{Quality: 95}); err != nil {
|
||||
t.Fatalf("encode jpeg: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceImportsLocalFileAndSkipsExisting(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
@@ -114,7 +136,129 @@ func TestCrawlerRunOnceImportsLocalFileAndSkipsExisting(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceUsesSourceKindNamespace(t *testing.T) {
|
||||
func TestCrawlerRunOnceMarksPreviewDisabledWhenConfigured(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
dummyScript := filepath.Join(tmp, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
DisablePreview: true,
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Failed != 0 {
|
||||
t.Fatalf("result = new:%d failed:%d, want 1/0", res.NewVideos, res.Failed)
|
||||
}
|
||||
v, err := cat.GetVideo(ctx, BuildVideoID("demo", "abc-123"))
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if v.PreviewStatus != "disabled" {
|
||||
t.Fatalf("preview status = %q, want disabled", v.PreviewStatus)
|
||||
}
|
||||
if v.FingerprintStatus != "ready" || v.SampledSHA256 == "" {
|
||||
t.Fatalf("fingerprint status=%q sampled=%q, want ready and sampled hash", v.FingerprintStatus, v.SampledSHA256)
|
||||
}
|
||||
pending, err := cat.ListVideosByPreviewStatus(ctx, "demo", "pending", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list pending previews: %v", err)
|
||||
}
|
||||
if len(pending) != 0 {
|
||||
t.Fatalf("pending previews = %d, want 0", len(pending))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceUsesCurrentDrivePreviewSwitch(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: drv.ID(),
|
||||
Kind: Kind,
|
||||
Name: "Demo",
|
||||
RootID: "/",
|
||||
Credentials: map[string]string{"script_path": "/tmp/crawler.py"},
|
||||
TeaserEnabled: true,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed drive: %v", err)
|
||||
}
|
||||
dummyScript := filepath.Join(tmp, "helper-script")
|
||||
if err := os.WriteFile(dummyScript, []byte("helper"), 0o755); err != nil {
|
||||
t.Fatalf("write dummy script: %v", err)
|
||||
}
|
||||
wrapper := filepath.Join(tmp, "helper-wrapper.sh")
|
||||
wrapperScript := fmt.Sprintf("#!/bin/sh\nexec %q -test.run=TestScriptCrawlerHelperProcess \"$@\"\n", os.Args[0])
|
||||
if err := os.WriteFile(wrapper, []byte(wrapperScript), 0o755); err != nil {
|
||||
t.Fatalf("write helper wrapper: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
DisablePreview: true,
|
||||
})
|
||||
res, err := c.RunOnce(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("run once: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Failed != 0 {
|
||||
t.Fatalf("result = new:%d failed:%d, want 1/0", res.NewVideos, res.Failed)
|
||||
}
|
||||
v, err := cat.GetVideo(ctx, BuildVideoID("demo", "abc-123"))
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if v.PreviewStatus != "pending" {
|
||||
t.Fatalf("preview status = %q, want pending from current drive switch", v.PreviewStatus)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceUsesDefaultCrawlerNamespace(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
@@ -144,7 +288,6 @@ func TestCrawlerRunOnceUsesSourceKindNamespace(t *testing.T) {
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
SourceKind: "spider91",
|
||||
PythonPath: wrapper,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
ScriptPath: dummyScript,
|
||||
@@ -156,12 +299,9 @@ func TestCrawlerRunOnceUsesSourceKindNamespace(t *testing.T) {
|
||||
if res.NewVideos != 1 || res.SeenSnapshot != 0 {
|
||||
t.Fatalf("result = new:%d seen:%d, want 1/0", res.NewVideos, res.SeenSnapshot)
|
||||
}
|
||||
videoID := BuildVideoIDForKind("spider91", "demo", "abc-123")
|
||||
videoID := BuildVideoID("demo", "abc-123")
|
||||
if _, err := cat.GetVideo(ctx, videoID); err != nil {
|
||||
t.Fatalf("get source-kind video: %v", err)
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, BuildVideoID("demo", "abc-123")); err == nil {
|
||||
t.Fatalf("default namespace video unexpectedly exists")
|
||||
t.Fatalf("get crawler video: %v", err)
|
||||
}
|
||||
|
||||
res, err = c.RunOnce(ctx, 1)
|
||||
@@ -415,6 +555,182 @@ func TestCrawlerRunOnceSkipsFingerprintDuplicateAndContinues(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerProcessItemSkipsNearDuplicateByTitleDurationAndThumbnail(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
commonThumbDir := filepath.Join(tmp, "common-thumbs")
|
||||
if err := os.MkdirAll(commonThumbDir, 0o755); err != nil {
|
||||
t.Fatalf("mkdir common thumbs: %v", err)
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
canonicalID := "existing-canonical"
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: canonicalID,
|
||||
DriveID: "other-drive",
|
||||
FileID: "existing.mp4",
|
||||
FileName: "existing.mp4",
|
||||
Title: "91 Test Similar Title 1215516",
|
||||
DurationSeconds: 257,
|
||||
Size: 12345,
|
||||
Ext: "mp4",
|
||||
ThumbnailURL: "/p/thumb/" + canonicalID,
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed canonical video: %v", err)
|
||||
}
|
||||
writeScriptCrawlerJPEG(t, mediaasset.ThumbnailPathInDir(commonThumbDir, canonicalID), color.RGBA{R: 210, G: 40, B: 40, A: 255})
|
||||
|
||||
outputDir := drv.OutputDir()
|
||||
mediaPath := filepath.Join(outputDir, "near-video.mp4")
|
||||
if err := os.WriteFile(mediaPath, []byte("near-duplicate-but-different-bytes"), 0o644); err != nil {
|
||||
t.Fatalf("write media: %v", err)
|
||||
}
|
||||
thumbPath := filepath.Join(outputDir, "near-thumb.jpg")
|
||||
writeScriptCrawlerJPEG(t, thumbPath, color.RGBA{R: 211, G: 41, B: 41, A: 255})
|
||||
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
CommonThumbDir: commonThumbDir,
|
||||
})
|
||||
imported, err := c.processItem(ctx, Item{
|
||||
SourceID: "near-source",
|
||||
Title: "91 Test Similar Title 1215516 - source suffix",
|
||||
Author: "helper",
|
||||
DurationSeconds: 257,
|
||||
Media: MediaRef{LocalFile: mediaPath},
|
||||
Thumbnail: MediaRef{LocalFile: thumbPath},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("process item: %v", err)
|
||||
}
|
||||
if imported {
|
||||
t.Fatal("near duplicate imported, want skipped")
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, BuildVideoID("demo", "near-source")); err == nil {
|
||||
t.Fatal("near duplicate should not be inserted into catalog")
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(drv.VideosDir(), "near-source.mp4")); !os.IsNotExist(err) {
|
||||
t.Fatalf("near duplicate video stat = %v, want removed", err)
|
||||
}
|
||||
if sourceThumb, err := drv.ThumbPath("near-source.jpg"); err != nil {
|
||||
t.Fatalf("source thumb path: %v", err)
|
||||
} else if _, err := os.Stat(sourceThumb); !os.IsNotExist(err) {
|
||||
t.Fatalf("source thumb stat = %v, want removed", err)
|
||||
}
|
||||
if _, err := os.Stat(mediaasset.ThumbnailPathInDir(commonThumbDir, BuildVideoID("demo", "near-source"))); !os.IsNotExist(err) {
|
||||
t.Fatalf("common thumb stat = %v, want removed", err)
|
||||
}
|
||||
seen, err := cat.ListCrawlerSourceIDs(ctx, Kind, "demo")
|
||||
if err != nil {
|
||||
t.Fatalf("list seen source ids: %v", err)
|
||||
}
|
||||
if !hasString(seen, "near-source") {
|
||||
t.Fatalf("seen ids = %#v, want near-source", seen)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerProcessItemKeepsLargerNearDuplicate(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: "demo", RootDir: filepath.Join(tmp, "crawler")})
|
||||
if err := drv.Init(ctx); err != nil {
|
||||
t.Fatalf("driver init: %v", err)
|
||||
}
|
||||
commonThumbDir := filepath.Join(tmp, "common-thumbs")
|
||||
if err := os.MkdirAll(commonThumbDir, 0o755); err != nil {
|
||||
t.Fatalf("mkdir common thumbs: %v", err)
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
smallerID := "smaller-canonical"
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: smallerID,
|
||||
DriveID: "other-drive",
|
||||
FileID: "smaller.mp4",
|
||||
FileName: "smaller.mp4",
|
||||
Title: "91 Test Larger Candidate 1215516",
|
||||
DurationSeconds: 257,
|
||||
Size: 5,
|
||||
Ext: "mp4",
|
||||
ThumbnailURL: "/p/thumb/" + smallerID,
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed smaller video: %v", err)
|
||||
}
|
||||
writeScriptCrawlerJPEG(t, mediaasset.ThumbnailPathInDir(commonThumbDir, smallerID), color.RGBA{R: 80, G: 160, B: 80, A: 255})
|
||||
|
||||
outputDir := drv.OutputDir()
|
||||
mediaPath := filepath.Join(outputDir, "larger-video.mp4")
|
||||
if err := os.WriteFile(mediaPath, []byte("near-duplicate-larger-candidate-bytes"), 0o644); err != nil {
|
||||
t.Fatalf("write media: %v", err)
|
||||
}
|
||||
thumbPath := filepath.Join(outputDir, "larger-thumb.jpg")
|
||||
writeScriptCrawlerJPEG(t, thumbPath, color.RGBA{R: 81, G: 161, B: 81, A: 255})
|
||||
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
FFprobePath: writeScriptCrawlerFFprobeStub(t, tmp, true),
|
||||
CommonThumbDir: commonThumbDir,
|
||||
})
|
||||
imported, err := c.processItem(ctx, Item{
|
||||
SourceID: "larger-source",
|
||||
Title: "91 Test Larger Candidate 1215516 - source suffix",
|
||||
Author: "helper",
|
||||
DurationSeconds: 257,
|
||||
Media: MediaRef{LocalFile: mediaPath},
|
||||
Thumbnail: MediaRef{LocalFile: thumbPath},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("process item: %v", err)
|
||||
}
|
||||
if !imported {
|
||||
t.Fatal("larger near duplicate was skipped, want imported")
|
||||
}
|
||||
if _, err := cat.GetVideo(ctx, smallerID); err == nil {
|
||||
t.Fatal("smaller near duplicate should be deleted from catalog")
|
||||
}
|
||||
if deleted, err := cat.IsVideoDeleted(ctx, smallerID); err != nil || !deleted {
|
||||
t.Fatalf("smaller tombstone = %v, %v; want deleted tombstone", deleted, err)
|
||||
}
|
||||
larger, err := cat.GetVideo(ctx, BuildVideoID("demo", "larger-source"))
|
||||
if err != nil {
|
||||
t.Fatalf("larger video should be imported: %v", err)
|
||||
}
|
||||
if larger.Size <= 5 {
|
||||
t.Fatalf("larger size = %d, want > 5", larger.Size)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRunOnceRejectsInvalidDownloadedVideo(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
@@ -500,6 +816,8 @@ func TestCrawlerRunOnceDownloadsHLSMediaURL(t *testing.T) {
|
||||
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HELPER", "1")
|
||||
t.Setenv("GO_WANT_SCRIPTCRAWLER_HLS", "1")
|
||||
ffmpegArgsFile := filepath.Join(tmp, "ffmpeg-args.txt")
|
||||
t.Setenv("GO_SCRIPTCRAWLER_FFMPEG_ARGS_FILE", ffmpegArgsFile)
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
@@ -530,6 +848,21 @@ func TestCrawlerRunOnceDownloadsHLSMediaURL(t *testing.T) {
|
||||
if string(data) != "hls-video-bytes" {
|
||||
t.Fatalf("hls output = %q", string(data))
|
||||
}
|
||||
argsData, err := os.ReadFile(ffmpegArgsFile)
|
||||
if err != nil {
|
||||
t.Fatalf("read ffmpeg args: %v", err)
|
||||
}
|
||||
argsText := "\n" + string(argsData) + "\n"
|
||||
for _, want := range []string{
|
||||
"\n-protocol_whitelist\nhttp,https,tcp,tls,crypto\n",
|
||||
"\n-allowed_extensions\nALL\n",
|
||||
"\n-allowed_segment_extensions\nALL\n",
|
||||
"\n-extension_picky\n0\n",
|
||||
} {
|
||||
if !strings.Contains(argsText, want) {
|
||||
t.Fatalf("ffmpeg args missing %q in:\n%s", strings.TrimSpace(want), string(argsData))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestScriptCrawlerHelperProcess(t *testing.T) {
|
||||
|
||||
@@ -67,6 +67,55 @@ type DryRunResult struct {
|
||||
DurationMs int64 `json:"durationMs"`
|
||||
}
|
||||
|
||||
type dryRunLogTail struct {
|
||||
mu sync.Mutex
|
||||
lines []string
|
||||
partial string
|
||||
}
|
||||
|
||||
func newDryRunLogTail() *dryRunLogTail {
|
||||
return &dryRunLogTail{lines: make([]string, 0, dryRunLogTailLines)}
|
||||
}
|
||||
|
||||
func (t *dryRunLogTail) Write(p []byte) (int, error) {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
|
||||
chunk := strings.ReplaceAll(string(p), "\r\n", "\n")
|
||||
parts := strings.Split(t.partial+chunk, "\n")
|
||||
t.partial = parts[len(parts)-1]
|
||||
for _, line := range parts[:len(parts)-1] {
|
||||
t.appendLocked(line)
|
||||
}
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
func (t *dryRunLogTail) snapshot() []string {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
|
||||
lines := append([]string{}, t.lines...)
|
||||
if partial := strings.TrimSpace(t.partial); partial != "" {
|
||||
lines = appendDryRunLogLine(lines, partial)
|
||||
}
|
||||
return lines
|
||||
}
|
||||
|
||||
func (t *dryRunLogTail) appendLocked(line string) {
|
||||
t.lines = appendDryRunLogLine(t.lines, line)
|
||||
}
|
||||
|
||||
func appendDryRunLogLine(lines []string, line string) []string {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
return lines
|
||||
}
|
||||
if len(lines) >= dryRunLogTailLines {
|
||||
lines = lines[1:]
|
||||
}
|
||||
return append(lines, line)
|
||||
}
|
||||
|
||||
func DryRun(ctx context.Context, cfg DryRunConfig) *DryRunResult {
|
||||
started := time.Now()
|
||||
result := &DryRunResult{Items: []DryRunItem{}}
|
||||
@@ -169,41 +218,14 @@ func DryRun(ctx context.Context, cfg DryRunConfig) *DryRunResult {
|
||||
result.Error = fmt.Sprintf("启动脚本失败: %v", err)
|
||||
return result
|
||||
}
|
||||
stderr, err := cmd.StderrPipe()
|
||||
if err != nil {
|
||||
_ = stdout.Close()
|
||||
result.Error = fmt.Sprintf("启动脚本失败: %v", err)
|
||||
return result
|
||||
}
|
||||
logTail := newDryRunLogTail()
|
||||
cmd.Stderr = logTail
|
||||
if err := cmd.Start(); err != nil {
|
||||
_ = stdout.Close()
|
||||
_ = stderr.Close()
|
||||
result.Error = fmt.Sprintf("启动脚本失败: %v", err)
|
||||
return result
|
||||
}
|
||||
|
||||
// stderr 是脚本日志,保留尾部若干行用于排错回显。
|
||||
var logMu sync.Mutex
|
||||
logTail := make([]string, 0, dryRunLogTailLines)
|
||||
stderrDone := make(chan struct{})
|
||||
go func() {
|
||||
defer close(stderrDone)
|
||||
scanner := bufio.NewScanner(stderr)
|
||||
scanner.Buffer(make([]byte, 64*1024), 1024*1024)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
logMu.Lock()
|
||||
if len(logTail) >= dryRunLogTailLines {
|
||||
logTail = logTail[1:]
|
||||
}
|
||||
logTail = append(logTail, line)
|
||||
logMu.Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
items := []DryRunItem{}
|
||||
var firstMediaHeaders map[string]string
|
||||
parseFailures := 0
|
||||
@@ -264,11 +286,8 @@ func DryRun(ctx context.Context, cfg DryRunConfig) *DryRunResult {
|
||||
_ = killDryRunProcess(cmd)
|
||||
<-waitDone
|
||||
}
|
||||
<-stderrDone
|
||||
|
||||
logMu.Lock()
|
||||
result.Log = append([]string{}, logTail...)
|
||||
logMu.Unlock()
|
||||
result.Log = logTail.snapshot()
|
||||
result.Items = items
|
||||
|
||||
if len(items) == 0 {
|
||||
|
||||
@@ -48,6 +48,29 @@ echo '{"type":"done","stats":{"emitted":1}}'
|
||||
}
|
||||
}
|
||||
|
||||
func TestDryRunCapturesStderrWhenStoppingAfterFirstItem(t *testing.T) {
|
||||
script := writeDryRunScript(t, `
|
||||
echo '[log] first item ready' >&2
|
||||
echo '{"type":"item","item":{"title":"Early Stop Video","media_url":"https://cdn.example.test/v.mp4","source_id":"early-stop"}}'
|
||||
sleep 30
|
||||
`)
|
||||
start := time.Now()
|
||||
result := DryRun(context.Background(), DryRunConfig{
|
||||
PythonPath: "/bin/sh",
|
||||
ScriptPath: script,
|
||||
SkipMediaProbe: true,
|
||||
})
|
||||
if !result.OK {
|
||||
t.Fatalf("ok = false, error = %q, log = %v", result.Error, result.Log)
|
||||
}
|
||||
if elapsed := time.Since(start); elapsed > 5*time.Second {
|
||||
t.Fatalf("dry run took %s, script was not stopped after first item", elapsed)
|
||||
}
|
||||
if len(result.Log) == 0 || !strings.Contains(result.Log[0], "first item ready") {
|
||||
t.Fatalf("log tail = %v, want stderr captured before early stop", result.Log)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDryRunProbesMediaURL(t *testing.T) {
|
||||
var gotRange, gotReferer string
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
package scriptcrawler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/mediaasset"
|
||||
"github.com/video-site/backend/internal/mediasim"
|
||||
)
|
||||
|
||||
const (
|
||||
nearDuplicateTitleThreshold = 0.90
|
||||
nearDuplicateSSIMThreshold = 0.95
|
||||
nearDuplicateDurationToleranceSeconds = 2
|
||||
nearDuplicateCandidateLimit = 200
|
||||
)
|
||||
|
||||
type nearDuplicateMatch struct {
|
||||
video *catalog.Video
|
||||
titleSimilarity float64
|
||||
thumbnailSSIM float64
|
||||
}
|
||||
|
||||
func (c *Crawler) findNearDuplicateVideo(ctx context.Context, source *catalog.Video, sourceThumbPath string) (*nearDuplicateMatch, error) {
|
||||
if c == nil || c.cfg.Catalog == nil || source == nil {
|
||||
return nil, nil
|
||||
}
|
||||
sourceThumbPath = strings.TrimSpace(sourceThumbPath)
|
||||
commonThumbDir := strings.TrimSpace(c.cfg.CommonThumbDir)
|
||||
if sourceThumbPath == "" || commonThumbDir == "" || strings.TrimSpace(source.Title) == "" || source.DurationSeconds <= 0 {
|
||||
return nil, nil
|
||||
}
|
||||
if _, err := os.Stat(sourceThumbPath); err != nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
candidates, err := c.cfg.Catalog.ListNearDuplicateVideoCandidates(ctx, source, nearDuplicateDurationToleranceSeconds, nearDuplicateCandidateLimit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, candidate := range candidates {
|
||||
if candidate == nil || candidate.ID == source.ID {
|
||||
continue
|
||||
}
|
||||
titleScore := mediasim.TitleSimilarity(source.Title, candidate.Title)
|
||||
if titleScore < nearDuplicateTitleThreshold {
|
||||
continue
|
||||
}
|
||||
candidateThumbPath := mediaasset.ThumbnailPathInDir(commonThumbDir, candidate.ID)
|
||||
if _, err := os.Stat(candidateThumbPath); err != nil {
|
||||
continue
|
||||
}
|
||||
ssimScore, err := mediasim.ImageSSIM(sourceThumbPath, candidateThumbPath)
|
||||
if err != nil {
|
||||
log.Printf("[scriptcrawler] drive=%s source_id=%s candidate=%s thumbnail ssim failed: %v", c.cfg.Driver.ID(), source.ID, candidate.ID, err)
|
||||
continue
|
||||
}
|
||||
if ssimScore >= nearDuplicateSSIMThreshold {
|
||||
return &nearDuplicateMatch{
|
||||
video: candidate,
|
||||
titleSimilarity: titleScore,
|
||||
thumbnailSSIM: ssimScore,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
@@ -1,799 +0,0 @@
|
||||
package spider91
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
)
|
||||
|
||||
// TestCrawlerRunOnceFullFlow 用一个伪 python 脚本 + httptest 服务器
|
||||
// 把 Crawler.RunOnce 的完整流程跑一遍:脚本生成 JSON、下载视频和封面、入库、
|
||||
// 重复运行跳过已存在的 91 源视频 ID。
|
||||
func TestCrawlerRunOnceFullFlow(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-based fake script only on unix")
|
||||
}
|
||||
|
||||
tmp := t.TempDir()
|
||||
|
||||
// 1. 假 HTTP 服务器:根据路径返回视频数据或封面数据
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case strings.Contains(r.URL.Path, "120001.mp4"):
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("FAKEVIDEO1"))
|
||||
case strings.Contains(r.URL.Path, "120002.mp4"):
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("FAKEVIDEO2BYTES"))
|
||||
case strings.Contains(r.URL.Path, "/thumb/120001.jpg"):
|
||||
w.Header().Set("Content-Type", "image/jpeg")
|
||||
_, _ = w.Write([]byte("\xff\xd8\xff\xe0fakejpg1"))
|
||||
case strings.Contains(r.URL.Path, "/thumb/120002.jpg"):
|
||||
w.Header().Set("Content-Type", "image/jpeg")
|
||||
_, _ = w.Write([]byte("\xff\xd8\xff\xe0fakejpg2"))
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
// 2. 假 python 脚本:解析 --output / --stream-output 参数,
|
||||
// 在 stream 模式下逐行 echo 每条视频的 JSON 到 stdout(模拟 Python 端 stream),
|
||||
// 同时仍写 --output 文件作归档。
|
||||
videoEntries := []map[string]string{
|
||||
{
|
||||
"title": "Video One 口交",
|
||||
"thumb_url": srv.URL + "/thumb/not-120001.jpg",
|
||||
"video_url": srv.URL + "/videos/120001.mp4",
|
||||
"viewkey": "vk-001",
|
||||
"detail_url": srv.URL + "/v.php?viewkey=vk-001",
|
||||
},
|
||||
{
|
||||
"title": "Video Two",
|
||||
"thumb_url": srv.URL + "/thumb/not-120002.jpg",
|
||||
"video_url": srv.URL + "/videos/120002.mp4",
|
||||
"viewkey": "vk-002",
|
||||
"detail_url": srv.URL + "/v.php?viewkey=vk-002",
|
||||
},
|
||||
}
|
||||
scriptPath := filepath.Join(tmp, "fake_spider.sh")
|
||||
scriptBody := buildFakeSpiderScript(videoEntries)
|
||||
if err := os.WriteFile(scriptPath, []byte(scriptBody), 0o755); err != nil {
|
||||
t.Fatalf("write script: %v", err)
|
||||
}
|
||||
|
||||
// 3. 准备 catalog + driver + crawler
|
||||
dbPath := filepath.Join(tmp, "test.db")
|
||||
cat, err := catalog.Open(dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
defer cat.Close()
|
||||
|
||||
driveID := "spider91-test"
|
||||
rootDir := filepath.Join(tmp, "spider91", driveID)
|
||||
commonThumbs := filepath.Join(tmp, "previews", "thumbs")
|
||||
drv := New(Config{ID: driveID, RootDir: rootDir})
|
||||
|
||||
// 把 drive 也写入 catalog(Crawler 不直接读,但 main 真实流程会写)
|
||||
if err := cat.UpsertDrive(context.Background(), &catalog.Drive{
|
||||
ID: driveID,
|
||||
Kind: Kind,
|
||||
Name: "test crawler",
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert drive: %v", err)
|
||||
}
|
||||
if _, err := cat.CreateTagAndClassify(context.Background(), "Video One", nil, "user"); err != nil {
|
||||
t.Fatalf("create user tag: %v", err)
|
||||
}
|
||||
|
||||
var newVideos []*catalog.Video
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: "sh",
|
||||
ScriptPath: scriptPath,
|
||||
CommonThumbDir: commonThumbs,
|
||||
SpiderTimeout: 10 * time.Second,
|
||||
DownloadTimeout: 10 * time.Second,
|
||||
OnNewVideo: func(v *catalog.Video) {
|
||||
newVideos = append(newVideos, v)
|
||||
},
|
||||
})
|
||||
|
||||
// 4. 第一次 RunOnce:应该新入库 2 条
|
||||
res, err := c.RunOnce(context.Background(), 15)
|
||||
if err != nil {
|
||||
t.Fatalf("RunOnce: %v", err)
|
||||
}
|
||||
if res.NewVideos != 2 || res.Skipped != 0 || res.Failed != 0 {
|
||||
t.Fatalf("first run result: new=%d skipped=%d failed=%d, want 2/0/0",
|
||||
res.NewVideos, res.Skipped, res.Failed)
|
||||
}
|
||||
if res.TargetNew != 15 {
|
||||
t.Fatalf("first run TargetNew = %d, want 15", res.TargetNew)
|
||||
}
|
||||
if res.SeenSnapshot != 0 {
|
||||
t.Fatalf("first run SeenSnapshot = %d, want 0 (catalog empty before first run)", res.SeenSnapshot)
|
||||
}
|
||||
if len(newVideos) != 2 {
|
||||
t.Fatalf("OnNewVideo called %d times, want 2", len(newVideos))
|
||||
}
|
||||
|
||||
// 5. 检查文件落盘
|
||||
for _, item := range []struct {
|
||||
sourceID string
|
||||
size int64
|
||||
}{
|
||||
{"120001", 10},
|
||||
{"120002", 15},
|
||||
} {
|
||||
videoPath := filepath.Join(rootDir, "videos", item.sourceID+".mp4")
|
||||
info, err := os.Stat(videoPath)
|
||||
if err != nil {
|
||||
t.Fatalf("video %s missing: %v", item.sourceID, err)
|
||||
}
|
||||
if info.Size() != item.size {
|
||||
t.Fatalf("video %s size = %d, want %d", item.sourceID, info.Size(), item.size)
|
||||
}
|
||||
|
||||
thumbPath := filepath.Join(rootDir, "thumbs", item.sourceID+".jpg")
|
||||
if _, err := os.Stat(thumbPath); err != nil {
|
||||
t.Fatalf("thumb %s missing: %v", item.sourceID, err)
|
||||
}
|
||||
|
||||
// 复制到 common thumbs 目录的副本,名字按 videoID 来
|
||||
videoID := BuildVideoID(driveID, item.sourceID)
|
||||
commonThumb := filepath.Join(commonThumbs, videoID+".jpg")
|
||||
if _, err := os.Stat(commonThumb); err != nil {
|
||||
t.Fatalf("common thumb %s missing: %v", commonThumb, err)
|
||||
}
|
||||
}
|
||||
|
||||
// 6. 检查 catalog 入库
|
||||
for _, sourceID := range []string{"120001", "120002"} {
|
||||
videoID := BuildVideoID(driveID, sourceID)
|
||||
v, err := cat.GetVideo(context.Background(), videoID)
|
||||
if err != nil {
|
||||
t.Fatalf("GetVideo %s: %v", videoID, err)
|
||||
}
|
||||
if v.DriveID != driveID {
|
||||
t.Fatalf("video %s drive_id = %q want %q", videoID, v.DriveID, driveID)
|
||||
}
|
||||
if v.FileID != sourceID+".mp4" {
|
||||
t.Fatalf("video %s file_id = %q want %q", videoID, v.FileID, sourceID+".mp4")
|
||||
}
|
||||
if v.ThumbnailURL == "" {
|
||||
t.Fatalf("video %s ThumbnailURL empty (cover should be ready)", videoID)
|
||||
}
|
||||
if v.Author != DefaultAuthor {
|
||||
t.Fatalf("video %s author = %q want %q", videoID, v.Author, DefaultAuthor)
|
||||
}
|
||||
// 每条视频都应该带 "91porn" 标签(UpsertVideo 路径自动同步 tags 表)
|
||||
hasDefaultTag := false
|
||||
for _, tag := range v.Tags {
|
||||
if tag == DefaultTag {
|
||||
hasDefaultTag = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasDefaultTag {
|
||||
t.Fatalf("video %s tags = %v, want contain %q", videoID, v.Tags, DefaultTag)
|
||||
}
|
||||
if sourceID == "120001" {
|
||||
if !containsString(v.Tags, "口交") {
|
||||
t.Fatalf("video %s tags = %v, want contain built-in tag 口交", videoID, v.Tags)
|
||||
}
|
||||
if !containsString(v.Tags, "Video One") {
|
||||
t.Fatalf("video %s tags = %v, want contain user tag Video One", videoID, v.Tags)
|
||||
}
|
||||
}
|
||||
if sourceID == "120002" && (containsString(v.Tags, "口交") || containsString(v.Tags, "Video One")) {
|
||||
t.Fatalf("video %s tags = %v, should not inherit tags from other spider91 videos", videoID, v.Tags)
|
||||
}
|
||||
}
|
||||
|
||||
// 7. 第二次 RunOnce:源视频 ID 已存在 → 全部 skipped,无新文件下载
|
||||
newVideos = nil
|
||||
res2, err := c.RunOnce(context.Background(), 15)
|
||||
if err != nil {
|
||||
t.Fatalf("second RunOnce: %v", err)
|
||||
}
|
||||
if res2.NewVideos != 0 {
|
||||
t.Fatalf("second run NewVideos = %d, want 0", res2.NewVideos)
|
||||
}
|
||||
if res2.Skipped != 2 {
|
||||
t.Fatalf("second run Skipped = %d, want 2", res2.Skipped)
|
||||
}
|
||||
// 第二次运行时 catalog 里已经有 2 条,seen snapshot 应该写出 2 个源视频 ID
|
||||
if res2.SeenSnapshot != 2 {
|
||||
t.Fatalf("second run SeenSnapshot = %d, want 2", res2.SeenSnapshot)
|
||||
}
|
||||
if len(newVideos) != 0 {
|
||||
t.Fatalf("second run OnNewVideo fired %d times, want 0", len(newVideos))
|
||||
}
|
||||
}
|
||||
|
||||
// TestCrawlerRunOnceMissingScript 报错而不是 panic。
|
||||
func TestCrawlerRunOnceMissingScript(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "x.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("catalog: %v", err)
|
||||
}
|
||||
defer cat.Close()
|
||||
drv := New(Config{ID: "x", RootDir: filepath.Join(tmp, "x")})
|
||||
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: "python3",
|
||||
ScriptPath: filepath.Join(tmp, "does-not-exist.py"),
|
||||
})
|
||||
|
||||
if _, err := c.RunOnce(context.Background(), 1); err == nil {
|
||||
t.Fatalf("expected error for missing script")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerPassesProxyToSpiderProcess(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-based fake script only on unix")
|
||||
}
|
||||
|
||||
tmp := t.TempDir()
|
||||
scriptPath := filepath.Join(tmp, "print_proxy_env.sh")
|
||||
script := `#!/bin/sh
|
||||
printf 'HTTP_PROXY=%s\n' "$HTTP_PROXY"
|
||||
printf 'HTTPS_PROXY=%s\n' "$HTTPS_PROXY"
|
||||
printf 'http_proxy=%s\n' "$http_proxy"
|
||||
printf 'https_proxy=%s\n' "$https_proxy"
|
||||
printf 'NO_PROXY=%s\n' "$NO_PROXY"
|
||||
printf 'no_proxy=%s\n' "$no_proxy"
|
||||
`
|
||||
if err := os.WriteFile(scriptPath, []byte(script), 0o755); err != nil {
|
||||
t.Fatalf("write script: %v", err)
|
||||
}
|
||||
|
||||
proxyURL := "socks5h://proxy.local:1080"
|
||||
drv := New(Config{ID: "proxy-drive", RootDir: filepath.Join(tmp, "proxy-drive")})
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
PythonPath: "sh",
|
||||
ScriptPath: scriptPath,
|
||||
ProxyURL: proxyURL,
|
||||
})
|
||||
cmd, stdout, err := c.startSpiderTargetNew(
|
||||
context.Background(),
|
||||
1,
|
||||
filepath.Join(tmp, "seen.txt"),
|
||||
filepath.Join(tmp, "out.json"),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("startSpiderTargetNew: %v", err)
|
||||
}
|
||||
raw, err := io.ReadAll(stdout)
|
||||
if err != nil {
|
||||
t.Fatalf("read stdout: %v", err)
|
||||
}
|
||||
if err := cmd.Wait(); err != nil {
|
||||
t.Fatalf("wait: %v", err)
|
||||
}
|
||||
|
||||
want := strings.Join([]string{
|
||||
"HTTP_PROXY=" + proxyURL,
|
||||
"HTTPS_PROXY=" + proxyURL,
|
||||
"http_proxy=" + proxyURL,
|
||||
"https_proxy=" + proxyURL,
|
||||
"NO_PROXY=",
|
||||
"no_proxy=",
|
||||
}, "\n") + "\n"
|
||||
if string(raw) != want {
|
||||
t.Fatalf("proxy env = %q, want %q", string(raw), want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConfigureExplicitProxySupportsSocksSchemes(t *testing.T) {
|
||||
for _, raw := range []string{
|
||||
"socks5://127.0.0.1:1080",
|
||||
"socks5h://proxy-user:proxy-pass@127.0.0.1:1080",
|
||||
} {
|
||||
t.Run(raw, func(t *testing.T) {
|
||||
transport := &http.Transport{Proxy: http.ProxyFromEnvironment}
|
||||
if err := configureExplicitProxy(transport, raw); err != nil {
|
||||
t.Fatalf("configureExplicitProxy: %v", err)
|
||||
}
|
||||
if transport.Proxy != nil {
|
||||
t.Fatalf("Transport.Proxy should be nil for SOCKS proxy")
|
||||
}
|
||||
if transport.DialContext == nil {
|
||||
t.Fatalf("Transport.DialContext should be set for SOCKS proxy")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
transport := &http.Transport{Proxy: http.ProxyFromEnvironment}
|
||||
if err := configureExplicitProxy(transport, "http://127.0.0.1:7890"); err != nil {
|
||||
t.Fatalf("configureExplicitProxy http: %v", err)
|
||||
}
|
||||
if transport.Proxy == nil {
|
||||
t.Fatalf("Transport.Proxy should be set for HTTP proxy")
|
||||
}
|
||||
if transport.DialContext != nil {
|
||||
t.Fatalf("Transport.DialContext should not be set for HTTP proxy")
|
||||
}
|
||||
|
||||
if err := configureExplicitProxy(&http.Transport{}, "ftp://127.0.0.1:21"); err == nil {
|
||||
t.Fatalf("expected unsupported proxy scheme error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSelectSocksTargetIPPrefersIPv4(t *testing.T) {
|
||||
got := selectSocksTargetIP([]net.IPAddr{
|
||||
{IP: net.ParseIP("2606:4700:20::681a:229")},
|
||||
{IP: net.ParseIP("104.26.3.41")},
|
||||
})
|
||||
if got == nil || got.String() != "104.26.3.41" {
|
||||
t.Fatalf("selectSocksTargetIP = %v, want IPv4 104.26.3.41", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestCrawlerThumbDownloadFailureMarksStatusFailed 验证:网站封面下载失败时
|
||||
// crawler 把 thumbnail_status 显式标 'failed',避免后续封面补队列一直重复
|
||||
// 捞到这条 spider91 视频。
|
||||
//
|
||||
// 历史 bug:之前 thumb 下载失败仅打 log,url=”, status 走 schema DEFAULT 'pending'。
|
||||
// CountVideosNeedingThumbnail 条件是 url=” AND status != 'failed' → count=1。
|
||||
// spider91 drive 的 thumb worker 按设计不处理 spider91 视频 → 没人会改 status,
|
||||
// 后续补队列会一直认为它还缺封面。
|
||||
func TestCrawlerThumbDownloadFailureMarksStatusFailed(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-based fake script only on unix")
|
||||
}
|
||||
tmp := t.TempDir()
|
||||
|
||||
// 假 HTTP 服务器:thumb 路径返回 500,video 正常返回字节。
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case strings.Contains(r.URL.Path, "120101.mp4"):
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("FAKEVIDEO"))
|
||||
case strings.Contains(r.URL.Path, "120101.jpg"):
|
||||
http.Error(w, "broken", http.StatusInternalServerError)
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
videoEntries := []map[string]string{
|
||||
{
|
||||
"title": "Thumb Failure Video",
|
||||
"thumb_url": srv.URL + "/thumb/120101.jpg",
|
||||
"video_url": srv.URL + "/videos/120101.mp4",
|
||||
"viewkey": "vk-thumb-fail",
|
||||
"detail_url": srv.URL + "/v.php?viewkey=vk-thumb-fail",
|
||||
},
|
||||
}
|
||||
scriptPath := filepath.Join(tmp, "fake.sh")
|
||||
if err := os.WriteFile(scriptPath, []byte(buildFakeSpiderScript(videoEntries)), 0o755); err != nil {
|
||||
t.Fatalf("write script: %v", err)
|
||||
}
|
||||
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "test.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("catalog: %v", err)
|
||||
}
|
||||
defer cat.Close()
|
||||
|
||||
driveID := "thumbfail-drive"
|
||||
drv := New(Config{ID: driveID, RootDir: filepath.Join(tmp, "spider91", driveID)})
|
||||
if err := cat.UpsertDrive(context.Background(), &catalog.Drive{
|
||||
ID: driveID, Kind: Kind, Name: "thumbfail",
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert drive: %v", err)
|
||||
}
|
||||
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: "sh",
|
||||
ScriptPath: scriptPath,
|
||||
CommonThumbDir: filepath.Join(tmp, "previews", "thumbs"),
|
||||
SpiderTimeout: 10 * time.Second,
|
||||
DownloadTimeout: 10 * time.Second,
|
||||
})
|
||||
|
||||
res, err := c.RunOnce(context.Background(), 5)
|
||||
if err != nil {
|
||||
t.Fatalf("RunOnce: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 {
|
||||
t.Fatalf("expected 1 new video, got %d (failed=%d)", res.NewVideos, res.Failed)
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(context.Background(), "spider91-"+driveID+"-120101")
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.ThumbnailURL != "" {
|
||||
t.Errorf("ThumbnailURL = %q, want empty (download failed)", got.ThumbnailURL)
|
||||
}
|
||||
|
||||
// 关键断言:CountVideosNeedingThumbnail 应该返回 0。
|
||||
// 该函数的 SQL 条件是 `url = '' AND status != 'failed'`;如果 crawler 没把
|
||||
// status 标 'failed'(schema DEFAULT 'pending'),count 就会是 1。
|
||||
count, err := cat.CountVideosNeedingThumbnail(context.Background(), driveID)
|
||||
if err != nil {
|
||||
t.Fatalf("count: %v", err)
|
||||
}
|
||||
if count != 0 {
|
||||
t.Fatalf("CountVideosNeedingThumbnail = %d, want 0 (status should be 'failed' to unblock teaser worker)", count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerUsesCrawlerVideoURLForFirstDownload(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-based fake script only on unix")
|
||||
}
|
||||
tmp := t.TempDir()
|
||||
|
||||
var detailRequests int32
|
||||
var originalRequests int32
|
||||
var wrongRequests int32
|
||||
var srv *httptest.Server
|
||||
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.URL.Path == "/v.php":
|
||||
atomic.AddInt32(&detailRequests, 1)
|
||||
_, _ = w.Write([]byte(spider91DetailHTML(srv.URL + "/videos/856305.mp4?token=wrong")))
|
||||
case r.URL.Path == "/videos/120201.mp4" && r.URL.Query().Get("token") == "original":
|
||||
atomic.AddInt32(&originalRequests, 1)
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("ORIGINALVIDEO"))
|
||||
case r.URL.Path == "/videos/856305.mp4":
|
||||
atomic.AddInt32(&wrongRequests, 1)
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("WRONGVIDEO"))
|
||||
case r.URL.Path == "/thumb/120201.jpg":
|
||||
w.Header().Set("Content-Type", "image/jpeg")
|
||||
_, _ = w.Write([]byte("\xff\xd8\xff\xe0thumb"))
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
entry := map[string]string{
|
||||
"title": "Use Original URL First",
|
||||
"thumb_url": srv.URL + "/thumb/wrong-thumb.jpg",
|
||||
"video_url": srv.URL + "/videos/120201.mp4?token=original",
|
||||
"viewkey": "vk-use-original",
|
||||
"detail_url": srv.URL + "/v.php?viewkey=vk-use-original",
|
||||
}
|
||||
cat, drv, scriptPath := seedCrawlerTestDeps(t, tmp, "use-original-drive", []map[string]string{entry})
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: "sh",
|
||||
ScriptPath: scriptPath,
|
||||
CommonThumbDir: filepath.Join(tmp, "previews", "thumbs"),
|
||||
SpiderTimeout: 10 * time.Second,
|
||||
DownloadTimeout: 10 * time.Second,
|
||||
})
|
||||
|
||||
res, err := c.RunOnce(context.Background(), 1)
|
||||
if err != nil {
|
||||
t.Fatalf("RunOnce: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Failed != 0 {
|
||||
t.Fatalf("result new=%d failed=%d, want 1/0", res.NewVideos, res.Failed)
|
||||
}
|
||||
if got := atomic.LoadInt32(&detailRequests); got != 0 {
|
||||
t.Fatalf("detail requests = %d, want 0 (first download should use crawler URL)", got)
|
||||
}
|
||||
if got := atomic.LoadInt32(&originalRequests); got != 1 {
|
||||
t.Fatalf("original URL requests = %d, want 1", got)
|
||||
}
|
||||
if got := atomic.LoadInt32(&wrongRequests); got != 0 {
|
||||
t.Fatalf("wrong source URL requests = %d, want 0", got)
|
||||
}
|
||||
info, err := os.Stat(filepath.Join(drv.RootDir(), "videos", "120201.mp4"))
|
||||
if err != nil {
|
||||
t.Fatalf("original video missing: %v", err)
|
||||
}
|
||||
if info.Size() != int64(len("ORIGINALVIDEO")) {
|
||||
t.Fatalf("original video size = %d, want %d", info.Size(), len("ORIGINALVIDEO"))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRefreshesVideoURLAfterExpiredDownload(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-based fake script only on unix")
|
||||
}
|
||||
tmp := t.TempDir()
|
||||
|
||||
var detailRequests int32
|
||||
var staleRequests int32
|
||||
var freshRequests int32
|
||||
var srv *httptest.Server
|
||||
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.URL.Path == "/v.php":
|
||||
n := atomic.AddInt32(&detailRequests, 1)
|
||||
videoURL := srv.URL + "/videos/120202.mp4?token=stale"
|
||||
if n > 1 {
|
||||
videoURL = srv.URL + "/videos/120202.mp4?token=fresh"
|
||||
}
|
||||
_, _ = w.Write([]byte(spider91DetailHTML(videoURL)))
|
||||
case r.URL.Path == "/videos/120202.mp4" && r.URL.Query().Get("token") == "stale":
|
||||
atomic.AddInt32(&staleRequests, 1)
|
||||
http.Error(w, "expired", http.StatusForbidden)
|
||||
case r.URL.Path == "/videos/120202.mp4" && r.URL.Query().Get("token") == "fresh":
|
||||
atomic.AddInt32(&freshRequests, 1)
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("REFRESHEDVIDEO"))
|
||||
case r.URL.Path == "/thumb/120202.jpg":
|
||||
w.Header().Set("Content-Type", "image/jpeg")
|
||||
_, _ = w.Write([]byte("\xff\xd8\xff\xe0thumb"))
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
entry := map[string]string{
|
||||
"title": "Refresh After Expired Download",
|
||||
"thumb_url": srv.URL + "/thumb/wrong-thumb.jpg",
|
||||
"video_url": srv.URL + "/videos/120202.mp4?token=old",
|
||||
"viewkey": "vk-refresh-after",
|
||||
"detail_url": srv.URL + "/v.php?viewkey=vk-refresh-after",
|
||||
}
|
||||
cat, drv, scriptPath := seedCrawlerTestDeps(t, tmp, "refresh-after-drive", []map[string]string{entry})
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: "sh",
|
||||
ScriptPath: scriptPath,
|
||||
CommonThumbDir: filepath.Join(tmp, "previews", "thumbs"),
|
||||
SpiderTimeout: 10 * time.Second,
|
||||
DownloadTimeout: 10 * time.Second,
|
||||
})
|
||||
|
||||
res, err := c.RunOnce(context.Background(), 1)
|
||||
if err != nil {
|
||||
t.Fatalf("RunOnce: %v", err)
|
||||
}
|
||||
if res.NewVideos != 1 || res.Failed != 0 {
|
||||
t.Fatalf("result new=%d failed=%d, want 1/0", res.NewVideos, res.Failed)
|
||||
}
|
||||
if got := atomic.LoadInt32(&detailRequests); got < 2 {
|
||||
t.Fatalf("detail requests = %d, want at least 2 (initial refresh + retry refresh)", got)
|
||||
}
|
||||
if got := atomic.LoadInt32(&staleRequests); got != 1 {
|
||||
t.Fatalf("stale URL requests = %d, want 1", got)
|
||||
}
|
||||
if got := atomic.LoadInt32(&freshRequests); got != 1 {
|
||||
t.Fatalf("fresh URL requests = %d, want 1", got)
|
||||
}
|
||||
info, err := os.Stat(filepath.Join(drv.RootDir(), "videos", "120202.mp4"))
|
||||
if err != nil {
|
||||
t.Fatalf("refreshed video missing: %v", err)
|
||||
}
|
||||
if info.Size() != int64(len("REFRESHEDVIDEO")) {
|
||||
t.Fatalf("refreshed video size = %d, want %d", info.Size(), len("REFRESHEDVIDEO"))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlerRejectsRefreshedSourceIDMismatch(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-based fake script only on unix")
|
||||
}
|
||||
tmp := t.TempDir()
|
||||
|
||||
var srv *httptest.Server
|
||||
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.URL.Path == "/v.php":
|
||||
_, _ = w.Write([]byte(spider91DetailHTML(srv.URL + "/videos/856305.mp4?token=fresh")))
|
||||
case r.URL.Path == "/videos/1203058.mp4":
|
||||
http.Error(w, "expired", http.StatusForbidden)
|
||||
case r.URL.Path == "/videos/856305.mp4":
|
||||
w.Header().Set("Content-Type", "video/mp4")
|
||||
_, _ = w.Write([]byte("WRONGVIDEO"))
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
entry := map[string]string{
|
||||
"title": "Source ID Mismatch",
|
||||
"thumb_url": srv.URL + "/thumb/1203058.jpg",
|
||||
"video_url": srv.URL + "/videos/1203058.mp4?token=old",
|
||||
"viewkey": "86fd91cce1f2e1a154cc",
|
||||
"source_id": "1203058",
|
||||
"detail_url": srv.URL + "/v.php?viewkey=86fd91cce1f2e1a154cc",
|
||||
}
|
||||
cat, drv, scriptPath := seedCrawlerTestDeps(t, tmp, "mismatch-drive", []map[string]string{entry})
|
||||
c := NewCrawler(CrawlerConfig{
|
||||
Driver: drv,
|
||||
Catalog: cat,
|
||||
PythonPath: "sh",
|
||||
ScriptPath: scriptPath,
|
||||
CommonThumbDir: filepath.Join(tmp, "previews", "thumbs"),
|
||||
SpiderTimeout: 10 * time.Second,
|
||||
DownloadTimeout: 10 * time.Second,
|
||||
})
|
||||
|
||||
res, err := c.RunOnce(context.Background(), 1)
|
||||
if err != nil {
|
||||
t.Fatalf("RunOnce: %v", err)
|
||||
}
|
||||
if res.NewVideos != 0 || res.Failed != 1 {
|
||||
t.Fatalf("result new=%d failed=%d, want 0/1", res.NewVideos, res.Failed)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(drv.RootDir(), "videos", "1203058.mp4")); !os.IsNotExist(err) {
|
||||
t.Fatalf("mismatched source file should not be written, stat err=%v", err)
|
||||
}
|
||||
if v, _ := cat.GetVideo(context.Background(), BuildVideoID(drv.ID(), "1203058")); v != nil {
|
||||
t.Fatalf("mismatched video should not be inserted: %+v", v)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSourceIDForItemRequiresNumericSourceID(t *testing.T) {
|
||||
if got := sourceIDForItem(spiderVideoEntry{
|
||||
Viewkey: "86fd91cce1f2e1a154cc",
|
||||
VideoURL: "https://cdn.example/videos/1203058.mp4?token=x",
|
||||
}); got != "1203058" {
|
||||
t.Fatalf("sourceIDForItem(video url) = %q, want 1203058", got)
|
||||
}
|
||||
if got := sourceIDForItem(spiderVideoEntry{
|
||||
Viewkey: "86fd91cce1f2e1a154cc",
|
||||
ThumbURL: "https://img.example/thumb/1203058.jpg",
|
||||
}); got != "1203058" {
|
||||
t.Fatalf("sourceIDForItem(thumb url) = %q, want 1203058", got)
|
||||
}
|
||||
if got := sourceIDForItem(spiderVideoEntry{
|
||||
Viewkey: "86fd91cce1f2e1a154cc",
|
||||
SourceID: "not-numeric",
|
||||
VideoURL: "https://cdn.example/videos/video.mp4",
|
||||
}); got != "" {
|
||||
t.Fatalf("sourceIDForItem(non numeric) = %q, want empty", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeThumbURLForSource(t *testing.T) {
|
||||
got := normalizeThumbURLForSource("https://img.example/thumb/856305.jpg?x=1#frag", "1203058")
|
||||
want := "https://img.example/thumb/1203058.jpg"
|
||||
if got != want {
|
||||
t.Fatalf("normalizeThumbURLForSource = %q, want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpider91ListURLForDetail(t *testing.T) {
|
||||
got := spider91ListURLForDetail("https://www.91porn.com/view_video.php?viewkey=abc&page=5&c=furum&viewtype=basic&category=top")
|
||||
want := "https://www.91porn.com/v.php?category=top&page=5&viewtype=basic"
|
||||
if got != want {
|
||||
t.Fatalf("spider91ListURLForDetail = %q, want %q", got, want)
|
||||
}
|
||||
if got := spider91ListURLForDetail("http://127.0.0.1/v.php?viewkey=abc&page=5&viewtype=basic&category=top"); got != "" {
|
||||
t.Fatalf("spider91ListURLForDetail(localhost) = %q, want empty", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpider91CookieHeader(t *testing.T) {
|
||||
got := spider91CookieHeader([]*http.Cookie{
|
||||
{Name: "CLIPSHARE", Value: "abc"},
|
||||
{Name: "ga", Value: "def"},
|
||||
{Name: "mode", Value: "m"},
|
||||
})
|
||||
want := "mode=d; CLIPSHARE=abc; ga=def"
|
||||
if got != want {
|
||||
t.Fatalf("spider91CookieHeader = %q, want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpider91ProgressLogLineClassifiers(t *testing.T) {
|
||||
if !isSpider91CheckedVideoLogLine("[2026-06-08 16:49:17] 处理视频 3/24: 标题") {
|
||||
t.Fatal("checked video log line was not recognized")
|
||||
}
|
||||
if isSpider91CheckedVideoLogLine("[2026-06-08 16:49:17] [页 2] 发现 24 个视频") {
|
||||
t.Fatal("page summary log line should not count as checked video")
|
||||
}
|
||||
if !isSpider91ExtractedVideoLogLine("[2026-06-08 16:49:39] [OK] 成功提取视频直链") {
|
||||
t.Fatal("extracted video log line was not recognized")
|
||||
}
|
||||
}
|
||||
|
||||
func spider91DetailHTML(videoURL string) string {
|
||||
fragment := `<video><source src="` + videoURL + `" type="video/mp4"></video>`
|
||||
return `document.write(strencode2("` + url.PathEscape(fragment) + `"));`
|
||||
}
|
||||
|
||||
func seedCrawlerTestDeps(t *testing.T, tmp, driveID string, entries []map[string]string) (*catalog.Catalog, *Driver, string) {
|
||||
t.Helper()
|
||||
scriptPath := filepath.Join(tmp, driveID+"-fake.sh")
|
||||
if err := os.WriteFile(scriptPath, []byte(buildFakeSpiderScript(entries)), 0o755); err != nil {
|
||||
t.Fatalf("write script: %v", err)
|
||||
}
|
||||
cat, err := catalog.Open(filepath.Join(tmp, driveID+".db"))
|
||||
if err != nil {
|
||||
t.Fatalf("catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
drv := New(Config{ID: driveID, RootDir: filepath.Join(tmp, "spider91", driveID)})
|
||||
if err := cat.UpsertDrive(context.Background(), &catalog.Drive{
|
||||
ID: driveID, Kind: Kind, Name: driveID,
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert drive: %v", err)
|
||||
}
|
||||
return cat, drv, scriptPath
|
||||
}
|
||||
|
||||
// buildFakeSpiderScript 生成一个伪 python 脚本(其实是 sh)。
|
||||
//
|
||||
// 行为:
|
||||
// - 解析 --output FILE / --stream-output 两个 flag
|
||||
// - --stream-output 时:逐行输出每个 entry 的 JSON 到 stdout 并 flush
|
||||
// - --output 时:把完整 JSON 数据写到 FILE(向后兼容,且作归档)
|
||||
//
|
||||
// 用 sh 来写是为了避免 Python 依赖。每条 entry 的 JSON 用 Go marshal 出来后嵌入。
|
||||
func buildFakeSpiderScript(entries []map[string]string) string {
|
||||
var sb strings.Builder
|
||||
sb.WriteString("#!/bin/sh\n")
|
||||
sb.WriteString("out=\"\"; stream=0\n")
|
||||
sb.WriteString("while [ $# -gt 0 ]; do case \"$1\" in --output) out=\"$2\"; shift 2;; --stream-output) stream=1; shift;; *) shift;; esac; done\n")
|
||||
|
||||
// stream 模式:逐行 echo
|
||||
sb.WriteString("if [ \"$stream\" = \"1\" ]; then\n")
|
||||
for _, e := range entries {
|
||||
raw, _ := json.Marshal(e)
|
||||
// 用单引号 here-string 形式确保 JSON 中的双引号原样出来
|
||||
sb.WriteString(" cat <<'STREAM_EOF'\n")
|
||||
sb.Write(raw)
|
||||
sb.WriteString("\nSTREAM_EOF\n")
|
||||
}
|
||||
sb.WriteString("fi\n")
|
||||
|
||||
// 写 --output 文件(带完整 wrapper)
|
||||
sb.WriteString("if [ -n \"$out\" ]; then\n")
|
||||
sb.WriteString(" mkdir -p \"$(dirname \"$out\")\" 2>/dev/null\n")
|
||||
sb.WriteString(" cat > \"$out\" <<'OUT_EOF'\n")
|
||||
wrapper := map[string]any{
|
||||
"crawl_time": "2026-01-01T00:00:00",
|
||||
"total_videos": len(entries),
|
||||
"videos": entries,
|
||||
}
|
||||
wrapped, _ := json.MarshalIndent(wrapper, "", " ")
|
||||
sb.Write(wrapped)
|
||||
sb.WriteString("\nOUT_EOF\n")
|
||||
sb.WriteString("fi\n")
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func containsString(values []string, want string) bool {
|
||||
for _, value := range values {
|
||||
if value == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -1,235 +0,0 @@
|
||||
// Package spider91 把 91porn 爬虫的产物(本地下载好的视频和封面)
|
||||
// 包装成一个 drives.Drive 实现,让它跟其它网盘一样可以挂载到 catalog 上。
|
||||
//
|
||||
// 与其它 drive 不同的是:
|
||||
// - 数据来源不是云盘 API,而是 Python 子进程跑 spider_91porn.py 后下载到本地
|
||||
// - StreamURL 直接返回本地文件路径,由 api.handleSpider91Video 用 http.ServeFile 服务
|
||||
// - List/Stat 用于 GC 兜底(按本地文件名列出 videos/ 目录)
|
||||
package spider91
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
// Kind 是该 drive 的类型代号,写到 catalog.drives.kind。
|
||||
const Kind = "spider91"
|
||||
|
||||
// Config 创建 Driver 所需的配置。
|
||||
type Config struct {
|
||||
// ID 是 catalog 中的 drive id,driver 用它隔离每个 spider91 实例的本地目录。
|
||||
ID string
|
||||
// RootDir 是该 drive 在磁盘上的根目录,driver 会在下面创建 videos/ 和 thumbs/。
|
||||
// 一般由 backend 拼成 <data_dir>/spider91/<driveID>/。
|
||||
RootDir string
|
||||
}
|
||||
|
||||
// Driver 实现 drives.Drive。
|
||||
type Driver struct {
|
||||
id string
|
||||
rootDir string
|
||||
}
|
||||
|
||||
// New 构造一个 Driver。
|
||||
func New(c Config) *Driver {
|
||||
return &Driver{
|
||||
id: c.ID,
|
||||
rootDir: c.RootDir,
|
||||
}
|
||||
}
|
||||
|
||||
// Kind 返回 "spider91"。
|
||||
func (d *Driver) Kind() string { return Kind }
|
||||
|
||||
// ID 返回 catalog 中的 drive id。
|
||||
func (d *Driver) ID() string { return d.id }
|
||||
|
||||
// RootID 返回根目录的逻辑 ID。spider91 没有真正的目录结构,
|
||||
// 这里固定返回 "/" 占位,调用方实际不会用它去 List 子目录。
|
||||
func (d *Driver) RootID() string { return "/" }
|
||||
|
||||
// Init 确保 rootDir/videos 和 rootDir/thumbs 存在。
|
||||
func (d *Driver) Init(ctx context.Context) error {
|
||||
if strings.TrimSpace(d.rootDir) == "" {
|
||||
return errors.New("spider91: empty rootDir")
|
||||
}
|
||||
for _, sub := range []string{"videos", "thumbs"} {
|
||||
if err := os.MkdirAll(filepath.Join(d.rootDir, sub), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// VideosDir 返回视频文件存放目录的绝对路径。
|
||||
func (d *Driver) VideosDir() string { return filepath.Join(d.rootDir, "videos") }
|
||||
|
||||
// ThumbsDir 返回封面文件存放目录的绝对路径。
|
||||
func (d *Driver) ThumbsDir() string { return filepath.Join(d.rootDir, "thumbs") }
|
||||
|
||||
// RootDir 返回 driver 的存储根。
|
||||
func (d *Driver) RootDir() string { return d.rootDir }
|
||||
|
||||
// VideoPath 返回某个视频文件的绝对路径,并校验路径不会逃出 videos/ 目录。
|
||||
func (d *Driver) VideoPath(fileID string) (string, error) {
|
||||
return safeJoin(d.VideosDir(), fileID)
|
||||
}
|
||||
|
||||
// ThumbPath 返回某个封面文件的绝对路径。
|
||||
func (d *Driver) ThumbPath(fileID string) (string, error) {
|
||||
return safeJoin(d.ThumbsDir(), fileID)
|
||||
}
|
||||
|
||||
// List 列出 videos/ 目录下的视频文件,便于上层做 GC 兜底;
|
||||
// dirID 当前会被忽略,spider91 没有目录树。
|
||||
func (d *Driver) List(ctx context.Context, dirID string) ([]drives.Entry, error) {
|
||||
entries, err := os.ReadDir(d.VideosDir())
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
out := make([]drives.Entry, 0, len(entries))
|
||||
for _, e := range entries {
|
||||
if e.IsDir() {
|
||||
continue
|
||||
}
|
||||
info, err := e.Info()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
out = append(out, drives.Entry{
|
||||
ID: e.Name(),
|
||||
Name: e.Name(),
|
||||
Size: info.Size(),
|
||||
IsDir: false,
|
||||
ModTime: info.ModTime(),
|
||||
})
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// Stat 查询单个视频文件的元数据。
|
||||
func (d *Driver) Stat(ctx context.Context, fileID string) (*drives.Entry, error) {
|
||||
path, err := d.VideoPath(fileID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &drives.Entry{
|
||||
ID: fileID,
|
||||
Name: fileID,
|
||||
Size: info.Size(),
|
||||
IsDir: info.IsDir(),
|
||||
ModTime: info.ModTime(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// StreamURL 返回本地视频文件路径,给 ffmpeg / 上层服务使用。
|
||||
// 注意:proxy.serve 不能直接处理本地路径,回放要走 api.handleSpider91Video。
|
||||
// 预览视频/封面 worker 通过 localPreviewLink 兜底走本地文件,刚好兼容 path 形式的 URL。
|
||||
func (d *Driver) StreamURL(ctx context.Context, fileID string) (*drives.StreamLink, error) {
|
||||
path, err := d.VideoPath(fileID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if info.IsDir() || info.Size() == 0 {
|
||||
return nil, os.ErrNotExist
|
||||
}
|
||||
return &drives.StreamLink{
|
||||
URL: path,
|
||||
Expires: time.Now().Add(24 * time.Hour),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Upload 不支持:上传由 crawler 自己完成,不通过 Drive 接口。
|
||||
func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader, size int64) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
|
||||
// EnsureDir 不支持。
|
||||
func (d *Driver) EnsureDir(ctx context.Context, pathFromRoot string) (string, error) {
|
||||
return "", drives.ErrNotSupported
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(ctx context.Context, fileID string) error {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
videoPath, err := d.VideoPath(fileID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
info, err := os.Stat(videoPath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
removeThumbCandidates(d.ThumbPath, strings.TrimSuffix(fileID, filepath.Ext(fileID)))
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if info.IsDir() {
|
||||
return errors.New("spider91: refusing to remove directory")
|
||||
}
|
||||
if err := os.Remove(videoPath); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
removeThumbCandidates(d.ThumbPath, strings.TrimSuffix(fileID, filepath.Ext(fileID)))
|
||||
return nil
|
||||
}
|
||||
|
||||
func removeThumbCandidates(pathFor func(string) (string, error), stem string) {
|
||||
stem = strings.TrimSpace(stem)
|
||||
if stem == "" {
|
||||
return
|
||||
}
|
||||
for _, ext := range []string{".jpg", ".jpeg", ".png", ".webp"} {
|
||||
path, err := pathFor(stem + ext)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
_ = os.Remove(path)
|
||||
}
|
||||
}
|
||||
|
||||
// safeJoin 把 fileID 拼到 root 下,保证最终路径不会逃出 root。
|
||||
// fileID 必须是单纯的文件名(不含 / 或 .. 等组件)。
|
||||
func safeJoin(root, fileID string) (string, error) {
|
||||
id := strings.TrimSpace(fileID)
|
||||
if id == "" || filepath.Base(id) != id {
|
||||
return "", errors.New("spider91: invalid file id")
|
||||
}
|
||||
if root == "" {
|
||||
return "", errors.New("spider91: empty root dir")
|
||||
}
|
||||
rootAbs, err := filepath.Abs(root)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
pathAbs, err := filepath.Abs(filepath.Join(rootAbs, id))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if pathAbs != rootAbs && !strings.HasPrefix(pathAbs, rootAbs+string(os.PathSeparator)) {
|
||||
return "", errors.New("spider91: file id escapes root")
|
||||
}
|
||||
return pathAbs, nil
|
||||
}
|
||||
|
||||
var _ drives.Drive = (*Driver)(nil)
|
||||
var _ drives.Remover = (*Driver)(nil)
|
||||
@@ -1,149 +0,0 @@
|
||||
package spider91
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDriverInitCreatesSubdirs(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
d := New(Config{ID: "test", RootDir: filepath.Join(dir, "drive1")})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
for _, sub := range []string{"videos", "thumbs"} {
|
||||
info, err := os.Stat(filepath.Join(dir, "drive1", sub))
|
||||
if err != nil {
|
||||
t.Fatalf("stat %s: %v", sub, err)
|
||||
}
|
||||
if !info.IsDir() {
|
||||
t.Fatalf("%s is not a dir", sub)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDriverInitRejectsEmptyRoot(t *testing.T) {
|
||||
d := New(Config{ID: "test", RootDir: ""})
|
||||
if err := d.Init(context.Background()); err == nil {
|
||||
t.Fatalf("expected error for empty root")
|
||||
}
|
||||
}
|
||||
|
||||
func TestVideoPathRejectsTraversal(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
d := New(Config{ID: "test", RootDir: dir})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
cases := []string{
|
||||
"",
|
||||
" ",
|
||||
"../etc/passwd",
|
||||
"sub/dir.mp4",
|
||||
"./abc.mp4",
|
||||
}
|
||||
for _, c := range cases {
|
||||
if _, err := d.VideoPath(c); err == nil {
|
||||
t.Fatalf("VideoPath(%q) accepted, want error", c)
|
||||
}
|
||||
if _, err := d.ThumbPath(c); err == nil {
|
||||
t.Fatalf("ThumbPath(%q) accepted, want error", c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestVideoPathHappy(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
d := New(Config{ID: "test", RootDir: dir})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
got, err := d.VideoPath("abc.mp4")
|
||||
if err != nil {
|
||||
t.Fatalf("VideoPath: %v", err)
|
||||
}
|
||||
want := filepath.Join(dir, "videos", "abc.mp4")
|
||||
wantAbs, _ := filepath.Abs(want)
|
||||
if got != wantAbs {
|
||||
t.Fatalf("VideoPath: got %q want %q", got, wantAbs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListReturnsFiles(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
d := New(Config{ID: "test", RootDir: dir})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
mustWrite(t, filepath.Join(d.VideosDir(), "abc.mp4"), "data")
|
||||
mustWrite(t, filepath.Join(d.VideosDir(), "def.mp4"), "x")
|
||||
|
||||
entries, err := d.List(context.Background(), "/")
|
||||
if err != nil {
|
||||
t.Fatalf("List: %v", err)
|
||||
}
|
||||
if len(entries) != 2 {
|
||||
t.Fatalf("List len = %d, want 2", len(entries))
|
||||
}
|
||||
names := map[string]int64{}
|
||||
for _, e := range entries {
|
||||
names[e.Name] = e.Size
|
||||
}
|
||||
if names["abc.mp4"] != 4 || names["def.mp4"] != 1 {
|
||||
t.Fatalf("unexpected entries: %+v", names)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLReturnsLocalPath(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
d := New(Config{ID: "test", RootDir: dir})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
mustWrite(t, filepath.Join(d.VideosDir(), "abc.mp4"), "videodata")
|
||||
|
||||
link, err := d.StreamURL(context.Background(), "abc.mp4")
|
||||
if err != nil {
|
||||
t.Fatalf("StreamURL: %v", err)
|
||||
}
|
||||
if !strings.HasSuffix(link.URL, "videos/abc.mp4") {
|
||||
t.Fatalf("StreamURL.URL = %q, want suffix videos/abc.mp4", link.URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamURLEmptyFile(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
d := New(Config{ID: "test", RootDir: dir})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
mustWrite(t, filepath.Join(d.VideosDir(), "abc.mp4"), "")
|
||||
if _, err := d.StreamURL(context.Background(), "abc.mp4"); !errors.Is(err, os.ErrNotExist) {
|
||||
t.Fatalf("empty file should return os.ErrNotExist, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildVideoIDStable(t *testing.T) {
|
||||
id1 := BuildVideoID("crawler1", "abc")
|
||||
id2 := BuildVideoID("crawler1", "abc")
|
||||
if id1 != id2 {
|
||||
t.Fatalf("BuildVideoID not deterministic")
|
||||
}
|
||||
if id1 != "spider91-crawler1-abc" {
|
||||
t.Fatalf("BuildVideoID format unexpected: %q", id1)
|
||||
}
|
||||
}
|
||||
|
||||
func mustWrite(t *testing.T, path, content string) {
|
||||
t.Helper()
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
||||
t.Fatalf("mkdir: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
|
||||
t.Fatalf("write: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -1,55 +0,0 @@
|
||||
package spider91
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestDetectVideoExt(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
url string
|
||||
want string
|
||||
}{
|
||||
{"mp4 with token", "https://cdn.example.com/mp43/abc.mp4?st=xyz&e=12345", ".mp4"},
|
||||
{"webm", "https://cdn.example.com/path/video.webm?token=1", ".webm"},
|
||||
{"mkv", "https://cdn.example.com/path/foo.mkv", ".mkv"},
|
||||
{"mov", "https://cdn.example.com/path/foo.mov?x=1", ".mov"},
|
||||
{"flv", "https://cdn.example.com/path/foo.flv", ".flv"},
|
||||
{"m4v", "https://cdn.example.com/path/foo.m4v", ".m4v"},
|
||||
{"avi", "https://cdn.example.com/path/foo.avi", ".avi"},
|
||||
{"m3u8 fallback to mp4", "https://cdn.example.com/path/playlist.m3u8", ".mp4"},
|
||||
{"ts fallback to mp4", "https://cdn.example.com/path/seg001.ts", ".mp4"},
|
||||
{"unknown ext fallback", "https://cdn.example.com/path/foo.weird", ".mp4"},
|
||||
{"no ext fallback", "https://cdn.example.com/v.php?id=12345", ".mp4"},
|
||||
{"empty url", "", ".mp4"},
|
||||
{"uppercase", "https://cdn.example.com/path/FOO.MP4?token=1", ".mp4"},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := detectVideoExt(tc.url)
|
||||
if got != tc.want {
|
||||
t.Fatalf("detectVideoExt(%q) = %q, want %q", tc.url, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectThumbExt(t *testing.T) {
|
||||
tests := []struct {
|
||||
url string
|
||||
want string
|
||||
}{
|
||||
{"https://cdn.example.com/thumb/foo.jpg", ".jpg"},
|
||||
{"https://cdn.example.com/thumb/foo.jpeg", ".jpeg"},
|
||||
{"https://cdn.example.com/thumb/foo.png", ".png"},
|
||||
{"https://cdn.example.com/thumb/foo.webp", ".webp"},
|
||||
{"https://cdn.example.com/thumb/foo.gif", ".gif"},
|
||||
{"https://cdn.example.com/thumb/foo.svg", ".jpg"}, // not in whitelist
|
||||
{"https://cdn.example.com/thumb/no-ext", ".jpg"},
|
||||
{"", ".jpg"},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
got := detectThumbExt(tc.url)
|
||||
if got != tc.want {
|
||||
t.Fatalf("detectThumbExt(%q) = %q, want %q", tc.url, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -27,6 +27,7 @@ type Driver struct {
|
||||
refreshToken string
|
||||
client *sdk.WoClient
|
||||
onTokenUpdate func(access, refresh string)
|
||||
uploadTempDir string
|
||||
|
||||
listMu sync.Mutex
|
||||
lastListAt time.Time
|
||||
@@ -38,11 +39,12 @@ type Driver struct {
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
ID string
|
||||
AccessToken string
|
||||
RefreshToken string
|
||||
FamilyID string // 空则走个人空间,有值则走家庭空间
|
||||
RootID string // 根目录 ID,默认 "0"
|
||||
ID string
|
||||
AccessToken string
|
||||
RefreshToken string
|
||||
FamilyID string // 空则走个人空间,有值则走家庭空间
|
||||
RootID string // 根目录 ID,默认 "0"
|
||||
UploadTempDir string
|
||||
// 当 SDK 刷新 token 时回调,便于持久化
|
||||
OnTokenUpdate func(access, refresh string)
|
||||
}
|
||||
@@ -59,6 +61,7 @@ func New(c Config) *Driver {
|
||||
accessToken: c.AccessToken,
|
||||
refreshToken: c.RefreshToken,
|
||||
onTokenUpdate: c.OnTokenUpdate,
|
||||
uploadTempDir: strings.TrimSpace(c.UploadTempDir),
|
||||
listInterval: 800 * time.Millisecond,
|
||||
listCooldown: 5 * time.Minute,
|
||||
fidToID: make(map[string]string),
|
||||
@@ -162,7 +165,12 @@ func (d *Driver) StreamURL(ctx context.Context, fileID string) (*drives.StreamLi
|
||||
|
||||
func (d *Driver) Upload(ctx context.Context, parentID, name string, r io.Reader, size int64) (string, error) {
|
||||
// wopan SDK 要求 *os.File,先把流落到临时文件再上传
|
||||
tmp, err := os.CreateTemp("", "wopan-upload-*.tmp")
|
||||
if d.uploadTempDir != "" {
|
||||
if err := os.MkdirAll(d.uploadTempDir, 0o755); err != nil {
|
||||
return "", fmt.Errorf("wopan upload: create tmp dir: %w", err)
|
||||
}
|
||||
}
|
||||
tmp, err := os.CreateTemp(d.uploadTempDir, "wopan-upload-*.tmp")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
@@ -510,42 +518,14 @@ func isWopanRateLimitError(err error) bool {
|
||||
if err == nil || errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||
return false
|
||||
}
|
||||
text := strings.ToLower(strings.TrimSpace(err.Error()))
|
||||
if text == "" {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(text, "status: 429") ||
|
||||
strings.Contains(text, "status 429") ||
|
||||
strings.Contains(text, "http status: 429") ||
|
||||
strings.Contains(text, "status: 500") ||
|
||||
strings.Contains(text, "status 500") ||
|
||||
strings.Contains(text, "status: 502") ||
|
||||
strings.Contains(text, "status 502") ||
|
||||
strings.Contains(text, "status: 503") ||
|
||||
strings.Contains(text, "status 503") ||
|
||||
strings.Contains(text, "status: 504") ||
|
||||
strings.Contains(text, "status 504") ||
|
||||
strings.Contains(text, "status: 509") ||
|
||||
strings.Contains(text, "status 509") ||
|
||||
strings.Contains(text, "too many request") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "rate-limit") ||
|
||||
strings.Contains(text, "throttl") ||
|
||||
strings.Contains(text, "blocked") ||
|
||||
strings.Contains(text, "request has been blocked") ||
|
||||
strings.Contains(text, "操作频繁") ||
|
||||
strings.Contains(text, "请求频繁") ||
|
||||
strings.Contains(text, "请求太频繁") ||
|
||||
strings.Contains(text, "请求过于频繁") ||
|
||||
strings.Contains(text, "频率限制") ||
|
||||
strings.Contains(text, "请求次数过多") ||
|
||||
strings.Contains(text, "系统繁忙") ||
|
||||
strings.Contains(text, "服务繁忙") ||
|
||||
strings.Contains(text, "稍后再试") ||
|
||||
strings.Contains(text, "稍后重试") ||
|
||||
strings.Contains(text, "访问被阻断") ||
|
||||
strings.Contains(text, "风控")
|
||||
return drives.ErrorMentionsHTTPStatus(err,
|
||||
http.StatusTooManyRequests,
|
||||
http.StatusInternalServerError,
|
||||
http.StatusBadGateway,
|
||||
http.StatusServiceUnavailable,
|
||||
http.StatusGatewayTimeout,
|
||||
509,
|
||||
)
|
||||
}
|
||||
|
||||
func guessMime(name string) string {
|
||||
|
||||
@@ -372,37 +372,10 @@ func remoteRangeResponseLooksRateLimited(rawURL string, status int, body []byte)
|
||||
status == 509) {
|
||||
return true
|
||||
}
|
||||
text := strings.ToLower(strings.TrimSpace(string(body)))
|
||||
compact := compactRemoteRangeErrorText(text)
|
||||
if strings.Contains(text, "too many request") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "quota exceeded") ||
|
||||
strings.Contains(text, "操作频繁") ||
|
||||
strings.Contains(text, "请求频繁") ||
|
||||
strings.Contains(text, "请求太频繁") ||
|
||||
strings.Contains(text, "请求过于频繁") ||
|
||||
strings.Contains(text, "频率限制") ||
|
||||
strings.Contains(text, "请求次数过多") ||
|
||||
strings.Contains(text, "系统繁忙") ||
|
||||
strings.Contains(text, "服务繁忙") ||
|
||||
strings.Contains(text, "稍后再试") ||
|
||||
strings.Contains(text, "稍后重试") ||
|
||||
strings.Contains(text, "访问被阻断") ||
|
||||
strings.Contains(text, "风控") ||
|
||||
strings.Contains(text, "download quota") ||
|
||||
strings.Contains(text, "sharing rate") ||
|
||||
strings.Contains(text, "daily limit") ||
|
||||
strings.Contains(text, "user rate") ||
|
||||
strings.Contains(text, "usage limit") ||
|
||||
strings.Contains(compact, "ratelimitexceeded") ||
|
||||
strings.Contains(compact, "userratelimitexceeded") ||
|
||||
strings.Contains(compact, "dailylimitexceeded") ||
|
||||
strings.Contains(compact, "downloadquotaexceeded") ||
|
||||
strings.Contains(compact, "sharingratelimitexceeded") ||
|
||||
strings.Contains(compact, "quotaexceeded") ||
|
||||
strings.Contains(compact, "toomanyrequests") ||
|
||||
strings.Contains(compact, "usagelimits") {
|
||||
if isGuangYaPanMediaURL(rawURL) && (status == http.StatusForbidden || status == http.StatusTooManyRequests ||
|
||||
status == http.StatusInternalServerError || status == http.StatusBadGateway ||
|
||||
status == http.StatusServiceUnavailable || status == http.StatusGatewayTimeout ||
|
||||
status == 509) {
|
||||
return true
|
||||
}
|
||||
if status == http.StatusForbidden && isGoogleDriveMediaURL(rawURL) {
|
||||
@@ -424,6 +397,16 @@ func isWopanMediaURL(rawURL string) bool {
|
||||
strings.Contains(path, "/openapi/download")
|
||||
}
|
||||
|
||||
func isGuangYaPanMediaURL(rawURL string) bool {
|
||||
u, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
host := strings.ToLower(u.Hostname())
|
||||
return strings.HasSuffix(host, "guangyacdn.com") ||
|
||||
strings.HasSuffix(host, "guangyapan.com")
|
||||
}
|
||||
|
||||
func isGoogleDriveMediaURL(rawURL string) bool {
|
||||
u, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
@@ -434,11 +417,6 @@ func isGoogleDriveMediaURL(rawURL string) bool {
|
||||
return strings.Contains(host, "googleapis.com") && strings.Contains(path, "/drive/")
|
||||
}
|
||||
|
||||
func compactRemoteRangeErrorText(text string) string {
|
||||
replacer := strings.NewReplacer("_", "", "-", "", " ", "", ".", "", ":", "")
|
||||
return replacer.Replace(strings.ToLower(strings.TrimSpace(text)))
|
||||
}
|
||||
|
||||
func parseRetryAfter(raw string) time.Duration {
|
||||
raw = strings.TrimSpace(raw)
|
||||
if raw == "" {
|
||||
|
||||
@@ -86,16 +86,16 @@ func TestComputeRemoteUsesRangeSamples(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeRemoteGoogleQuotaExceededReturnsRateLimit(t *testing.T) {
|
||||
func TestComputeRemote429ReturnsRateLimit(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Retry-After", "60")
|
||||
w.WriteHeader(http.StatusForbidden)
|
||||
_, _ = w.Write([]byte(`{"error":{"code":403,"message":"The download quota for this file has been exceeded.","errors":[{"domain":"usageLimits","reason":"downloadQuotaExceeded","message":"The download quota for this file has been exceeded."}]}}`))
|
||||
w.WriteHeader(http.StatusTooManyRequests)
|
||||
_, _ = w.Write([]byte(`{"error":{"code":429}}`))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
drv := &fakeDrive{paths: map[string]string{"remote": srv.URL + "/drive/v3/files/file-1?alt=media"}}
|
||||
drv := &fakeDrive{paths: map[string]string{"remote": srv.URL + "/video.mp4"}}
|
||||
_, err := Compute(ctx, drv, &catalog.Video{ID: "remote", FileID: "remote", Size: 1024 * 1024}, Config{
|
||||
SampleSizeBytes: 4,
|
||||
FullHashMaxSize: 8,
|
||||
@@ -131,6 +131,30 @@ func TestWopanRemoteRangeErrorsLookRateLimited(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestGuangYaPanRemoteRangeErrorsLookRateLimited(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
rawURL string
|
||||
status int
|
||||
}{
|
||||
{rawURL: "https://txgz02-httpdown.guangyacdn.com/download/?fid=encoded", status: http.StatusForbidden},
|
||||
{rawURL: "https://txgz02-httpdown.guangyacdn.com/download/?fid=encoded", status: http.StatusServiceUnavailable},
|
||||
{rawURL: "https://txgz02-httpdown.guangyacdn.com/download/?fid=encoded", status: 509},
|
||||
} {
|
||||
if !remoteRangeResponseLooksRateLimited(tc.rawURL, tc.status, nil) {
|
||||
t.Fatalf("remoteRangeResponseLooksRateLimited(%q, %d) = false, want true", tc.rawURL, tc.status)
|
||||
}
|
||||
}
|
||||
if remoteRangeResponseLooksRateLimited("https://example.com/video.mp4", http.StatusForbidden, nil) {
|
||||
t.Fatal("generic 403 should not be treated as guangyapan rate limit")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGoogleDriveRemoteRangeForbiddenLooksRateLimitedByURL(t *testing.T) {
|
||||
if !remoteRangeResponseLooksRateLimited("https://www.googleapis.com/drive/v3/files/file-1?alt=media", http.StatusForbidden, nil) {
|
||||
t.Fatal("google drive media 403 should be treated as rate limit by URL and status")
|
||||
}
|
||||
}
|
||||
|
||||
type fakeDrive struct {
|
||||
paths map[string]string
|
||||
}
|
||||
|
||||
@@ -0,0 +1,280 @@
|
||||
package mediasim
|
||||
|
||||
import (
|
||||
"image"
|
||||
_ "image/gif"
|
||||
_ "image/jpeg"
|
||||
_ "image/png"
|
||||
"math"
|
||||
"os"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
const (
|
||||
ssimSampleSize = 96
|
||||
minCoreTitleRunes = 12
|
||||
)
|
||||
|
||||
var titleCoreSeparators = []string{
|
||||
" - ",
|
||||
" -- ",
|
||||
" — ",
|
||||
" – ",
|
||||
" | ",
|
||||
" | ",
|
||||
"_",
|
||||
"_",
|
||||
"-",
|
||||
"—",
|
||||
"–",
|
||||
"-",
|
||||
"|",
|
||||
}
|
||||
|
||||
// TitleSimilarity returns the best normalized Levenshtein similarity in [0, 1]
|
||||
// between the full titles and their leading core title segments.
|
||||
func TitleSimilarity(a, b string) float64 {
|
||||
leftVariants := titleVariants(a)
|
||||
rightVariants := titleVariants(b)
|
||||
if len(leftVariants) == 0 && len(rightVariants) == 0 {
|
||||
return 1
|
||||
}
|
||||
if len(leftVariants) == 0 || len(rightVariants) == 0 {
|
||||
return 0
|
||||
}
|
||||
best := 0.0
|
||||
for _, left := range leftVariants {
|
||||
for _, right := range rightVariants {
|
||||
score := normalizedLevenshteinSimilarity(left, right)
|
||||
if score > best {
|
||||
best = score
|
||||
}
|
||||
}
|
||||
}
|
||||
return best
|
||||
}
|
||||
|
||||
// TitleKeys returns the normalized full title and core-title variants used by
|
||||
// TitleSimilarity. It is intended for cheap caller-side prefiltering before
|
||||
// running the heavier Levenshtein comparison.
|
||||
func TitleKeys(value string) []string {
|
||||
return append([]string(nil), titleVariants(value)...)
|
||||
}
|
||||
|
||||
func normalizedLevenshteinSimilarity(left, right string) float64 {
|
||||
leftRunes := []rune(left)
|
||||
rightRunes := []rune(right)
|
||||
if len(leftRunes) == 0 && len(rightRunes) == 0 {
|
||||
return 1
|
||||
}
|
||||
if len(leftRunes) == 0 || len(rightRunes) == 0 {
|
||||
return 0
|
||||
}
|
||||
maxLen := len(leftRunes)
|
||||
if len(rightRunes) > maxLen {
|
||||
maxLen = len(rightRunes)
|
||||
}
|
||||
return 1 - float64(levenshtein(leftRunes, rightRunes))/float64(maxLen)
|
||||
}
|
||||
|
||||
func titleVariants(value string) []string {
|
||||
full := normalizeTitle(value)
|
||||
if full == "" {
|
||||
return nil
|
||||
}
|
||||
out := appendTitleVariant(nil, full)
|
||||
if core := normalizeTitleCore(value); core != "" && core != full {
|
||||
out = appendTitleVariant(out, core)
|
||||
}
|
||||
for _, tail := range titleTailVariants(value) {
|
||||
normalized := normalizeTitle(tail)
|
||||
if len([]rune(normalized)) >= minCoreTitleRunes {
|
||||
out = appendTitleVariant(out, normalized)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func appendTitleVariant(out []string, value string) []string {
|
||||
for _, existing := range out {
|
||||
if existing == value {
|
||||
return out
|
||||
}
|
||||
}
|
||||
return append(out, value)
|
||||
}
|
||||
|
||||
func titleTailVariants(value string) []string {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" {
|
||||
return nil
|
||||
}
|
||||
var out []string
|
||||
for _, sep := range []string{"@", "@"} {
|
||||
if idx := strings.LastIndex(value, sep); idx >= 0 && idx+len(sep) < len(value) {
|
||||
out = append(out, strings.TrimSpace(value[idx+len(sep):]))
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func normalizeTitleCore(value string) string {
|
||||
head := strings.TrimSpace(value)
|
||||
for _, sep := range titleCoreSeparators {
|
||||
if idx := strings.Index(head, sep); idx > 0 {
|
||||
head = strings.TrimSpace(head[:idx])
|
||||
break
|
||||
}
|
||||
}
|
||||
normalized := normalizeTitle(head)
|
||||
if len([]rune(normalized)) < minCoreTitleRunes {
|
||||
return ""
|
||||
}
|
||||
return normalized
|
||||
}
|
||||
|
||||
func normalizeTitle(value string) string {
|
||||
value = strings.ToLower(strings.TrimSpace(value))
|
||||
for _, ext := range []string{".mp4", ".m4v", ".mkv", ".mov", ".avi", ".webm", ".ts", ".m3u8"} {
|
||||
if strings.HasSuffix(value, ext) {
|
||||
value = strings.TrimSuffix(value, ext)
|
||||
break
|
||||
}
|
||||
}
|
||||
var b strings.Builder
|
||||
for _, r := range value {
|
||||
if unicode.IsLetter(r) || unicode.IsDigit(r) {
|
||||
b.WriteRune(r)
|
||||
}
|
||||
}
|
||||
if b.Len() > 0 {
|
||||
return b.String()
|
||||
}
|
||||
return strings.Join(strings.Fields(value), "")
|
||||
}
|
||||
|
||||
func levenshtein(a, b []rune) int {
|
||||
if len(a) < len(b) {
|
||||
a, b = b, a
|
||||
}
|
||||
previous := make([]int, len(b)+1)
|
||||
current := make([]int, len(b)+1)
|
||||
for j := range previous {
|
||||
previous[j] = j
|
||||
}
|
||||
for i := 1; i <= len(a); i++ {
|
||||
current[0] = i
|
||||
for j := 1; j <= len(b); j++ {
|
||||
cost := 0
|
||||
if a[i-1] != b[j-1] {
|
||||
cost = 1
|
||||
}
|
||||
current[j] = minInt(
|
||||
previous[j]+1,
|
||||
current[j-1]+1,
|
||||
previous[j-1]+cost,
|
||||
)
|
||||
}
|
||||
previous, current = current, previous
|
||||
}
|
||||
return previous[len(b)]
|
||||
}
|
||||
|
||||
func minInt(values ...int) int {
|
||||
min := values[0]
|
||||
for _, value := range values[1:] {
|
||||
if value < min {
|
||||
min = value
|
||||
}
|
||||
}
|
||||
return min
|
||||
}
|
||||
|
||||
// ImageSSIM compares two local images using luminance SSIM over a fixed grid.
|
||||
func ImageSSIM(leftPath, rightPath string) (float64, error) {
|
||||
left, err := decodeImage(leftPath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
right, err := decodeImage(rightPath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return SSIM(left, right), nil
|
||||
}
|
||||
|
||||
func decodeImage(path string) (image.Image, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
img, _, err := image.Decode(f)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return img, nil
|
||||
}
|
||||
|
||||
// SSIM compares two images after nearest-neighbor sampling onto the same grid.
|
||||
func SSIM(left, right image.Image) float64 {
|
||||
if left == nil || right == nil {
|
||||
return 0
|
||||
}
|
||||
leftSamples := grayscaleSamples(left, ssimSampleSize, ssimSampleSize)
|
||||
rightSamples := grayscaleSamples(right, ssimSampleSize, ssimSampleSize)
|
||||
if len(leftSamples) == 0 || len(leftSamples) != len(rightSamples) {
|
||||
return 0
|
||||
}
|
||||
|
||||
var leftMean, rightMean float64
|
||||
for i := range leftSamples {
|
||||
leftMean += leftSamples[i]
|
||||
rightMean += rightSamples[i]
|
||||
}
|
||||
n := float64(len(leftSamples))
|
||||
leftMean /= n
|
||||
rightMean /= n
|
||||
|
||||
var leftVariance, rightVariance, covariance float64
|
||||
for i := range leftSamples {
|
||||
leftDelta := leftSamples[i] - leftMean
|
||||
rightDelta := rightSamples[i] - rightMean
|
||||
leftVariance += leftDelta * leftDelta
|
||||
rightVariance += rightDelta * rightDelta
|
||||
covariance += leftDelta * rightDelta
|
||||
}
|
||||
leftVariance /= n
|
||||
rightVariance /= n
|
||||
covariance /= n
|
||||
|
||||
const c1 = 6.5025 // (0.01 * 255)^2
|
||||
const c2 = 58.5225 // (0.03 * 255)^2
|
||||
denominator := (leftMean*leftMean + rightMean*rightMean + c1) * (leftVariance + rightVariance + c2)
|
||||
if denominator == 0 {
|
||||
return 0
|
||||
}
|
||||
score := ((2*leftMean*rightMean + c1) * (2*covariance + c2)) / denominator
|
||||
if math.IsNaN(score) || math.IsInf(score, 0) {
|
||||
return 0
|
||||
}
|
||||
return score
|
||||
}
|
||||
|
||||
func grayscaleSamples(img image.Image, width, height int) []float64 {
|
||||
bounds := img.Bounds()
|
||||
if bounds.Dx() <= 0 || bounds.Dy() <= 0 || width <= 0 || height <= 0 {
|
||||
return nil
|
||||
}
|
||||
out := make([]float64, 0, width*height)
|
||||
for y := 0; y < height; y++ {
|
||||
sourceY := bounds.Min.Y + y*bounds.Dy()/height
|
||||
for x := 0; x < width; x++ {
|
||||
sourceX := bounds.Min.X + x*bounds.Dx()/width
|
||||
r, g, b, _ := img.At(sourceX, sourceY).RGBA()
|
||||
out = append(out, 0.299*float64(r>>8)+0.587*float64(g>>8)+0.114*float64(b>>8))
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
package mediasim
|
||||
|
||||
import (
|
||||
"image"
|
||||
"image/color"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTitleSimilarityNormalizesPunctuationAndWhitespace(t *testing.T) {
|
||||
score := TitleSimilarity("AB-123 测试视频.mp4", "ab123测试视频")
|
||||
if score < 0.90 {
|
||||
t.Fatalf("similarity = %.3f, want >= 0.90", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTitleSimilarityUsesLeadingCoreTitle(t *testing.T) {
|
||||
score := TitleSimilarity(
|
||||
"反差极品大二女友,叫声可射~,“射进小骚逼里面~” - 性感小皮鞭",
|
||||
"反差极品大二女友,叫声可射~,“射进小骚逼里面~”",
|
||||
)
|
||||
if score < 0.99 {
|
||||
t.Fatalf("similarity = %.3f, want core-title match", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTitleSimilarityDoesNotMatchBySharedSuffixOnly(t *testing.T) {
|
||||
score := TitleSimilarity(
|
||||
"高颜值大学生宿舍自拍视频完整流出 - 同一个来源",
|
||||
"户外旅行风景记录城市夜景合集 - 同一个来源",
|
||||
)
|
||||
if score >= 0.90 {
|
||||
t.Fatalf("similarity = %.3f, want < 0.90", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTitleSimilarityRejectsDifferentTitles(t *testing.T) {
|
||||
score := TitleSimilarity("完全不同的视频标题", "another unrelated movie")
|
||||
if score >= 0.90 {
|
||||
t.Fatalf("similarity = %.3f, want < 0.90", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSSIMScoresIdenticalAndDifferentImages(t *testing.T) {
|
||||
red := solidImage(color.RGBA{R: 220, G: 20, B: 20, A: 255})
|
||||
redAgain := solidImage(color.RGBA{R: 220, G: 20, B: 20, A: 255})
|
||||
blue := solidImage(color.RGBA{R: 20, G: 20, B: 220, A: 255})
|
||||
|
||||
if score := SSIM(red, redAgain); score < 0.999 {
|
||||
t.Fatalf("identical SSIM = %.6f, want close to 1", score)
|
||||
}
|
||||
if score := SSIM(red, blue); score >= 0.95 {
|
||||
t.Fatalf("different SSIM = %.6f, want < 0.95", score)
|
||||
}
|
||||
}
|
||||
|
||||
func solidImage(c color.RGBA) image.Image {
|
||||
img := image.NewRGBA(image.Rect(0, 0, 32, 32))
|
||||
for y := 0; y < 32; y++ {
|
||||
for x := 0; x < 32; x++ {
|
||||
img.SetRGBA(x, y, c)
|
||||
}
|
||||
}
|
||||
return img
|
||||
}
|
||||
@@ -1,19 +1,19 @@
|
||||
// Package nightly orchestrates the single nightly maintenance pipeline that
|
||||
// replaces the legacy scanLoop / crawlerLoop / spider91 migrator periodic loop.
|
||||
// replaces the legacy scanLoop / crawlerLoop / crawler upload periodic loop.
|
||||
//
|
||||
// Pipeline (fired once per day at cron_hour, also via TriggerNow for admin
|
||||
// "扫描所有网盘"):
|
||||
//
|
||||
// Phase 1: for each non-spider91 cloud drive
|
||||
// Phase 1: for each non-crawler cloud drive
|
||||
// scan + delete-detection + enqueue thumb + enqueue preview video
|
||||
// wait until all thumb / preview-video queues are idle
|
||||
// Phase 2: if any spider91 drive configured
|
||||
// Phase 2: if any script crawler configured
|
||||
// crawl + enqueue preview video for new videos
|
||||
// wait until preview-video queues are idle
|
||||
// Phase 3: spider91 → cloud migration (single sweep, captcha cooldown still
|
||||
// Phase 3: crawler local video → cloud upload (single sweep, captcha cooldown still
|
||||
// honored within this call)
|
||||
// Phase 4: cleanup duplicate local preview/thumbnail assets after sampled
|
||||
// fingerprints have identified canonical videos
|
||||
// Phase 4: full-library duplicate video maintenance:
|
||||
// exact size+sampled_sha256 dedupe, then title/duration/thumbnail dedupe
|
||||
//
|
||||
// A 6h soft deadline guards each pipeline run; phases check deadline at their
|
||||
// boundaries and exit cleanly if exceeded (no in-flight ffmpeg / upload is
|
||||
@@ -64,32 +64,32 @@ type Config struct {
|
||||
MaxDuration time.Duration
|
||||
|
||||
// ListScanTargets returns the drive IDs to run Phase 1 on, in deterministic
|
||||
// order. Should exclude spider91 and localupload drives.
|
||||
// order. Should exclude crawler and localupload drives.
|
||||
ListScanTargets func(ctx context.Context) []string
|
||||
|
||||
// RunScan synchronously runs scan + cleanup + enqueueDriveGeneration for
|
||||
// one drive. Errors are expected to be logged inside, not surfaced.
|
||||
RunScan func(ctx context.Context, driveID string)
|
||||
|
||||
// ListSpider91Drives returns spider91 drive IDs to crawl in Phase 2.
|
||||
// Returns empty slice when no spider91 drive is configured.
|
||||
ListSpider91Drives func(ctx context.Context) []string
|
||||
// ListCrawlerDrives returns script crawler drive IDs to crawl in Phase 2.
|
||||
// Returns empty slice when no crawler is configured.
|
||||
ListCrawlerDrives func(ctx context.Context) []string
|
||||
|
||||
// RunSpider91Crawl synchronously runs one crawl cycle (downloads + thumbs +
|
||||
// preview-video enqueue) for a single spider91 drive.
|
||||
RunSpider91Crawl func(ctx context.Context, driveID string)
|
||||
// RunCrawlerCrawl synchronously runs one crawl cycle (downloads + thumbs +
|
||||
// preview-video enqueue) for a single crawler drive.
|
||||
RunCrawlerCrawl func(ctx context.Context, driveID string)
|
||||
|
||||
// WaitPreviewQueuesIdle blocks until both the thumbnail and preview-video queues
|
||||
// across all drives are drained (queue empty + no in-flight task). It must
|
||||
// honor ctx cancellation.
|
||||
WaitPreviewQueuesIdle func(ctx context.Context) error
|
||||
|
||||
// RunMigration runs spider91migrate.Migrator.RunOnce for Phase 3.
|
||||
// RunMigration runs crawlerupload.Migrator.RunOnce for Phase 3.
|
||||
RunMigration func(ctx context.Context) error
|
||||
|
||||
// RunDedupeAssetCleanup removes generated local assets from non-canonical
|
||||
// videos in size+sampled_sha256 duplicate groups. It must not delete cloud
|
||||
// files or catalog rows.
|
||||
// RunDedupeAssetCleanup runs full-library duplicate video maintenance. It
|
||||
// removes duplicate catalog rows and local generated assets, but never
|
||||
// deletes cloud source files.
|
||||
RunDedupeAssetCleanup func(ctx context.Context) error
|
||||
|
||||
// Now is injected for tests; nil → time.Now.
|
||||
@@ -351,23 +351,23 @@ func (r *Runner) runPipeline(ctx context.Context) {
|
||||
if r.checkDeadline(ctx, "phase 2") {
|
||||
return
|
||||
}
|
||||
spiderIDs := []string{}
|
||||
if r.cfg.ListSpider91Drives != nil {
|
||||
spiderIDs = r.cfg.ListSpider91Drives(ctx)
|
||||
crawlerIDs := []string{}
|
||||
if r.cfg.ListCrawlerDrives != nil {
|
||||
crawlerIDs = r.cfg.ListCrawlerDrives(ctx)
|
||||
}
|
||||
if len(spiderIDs) == 0 {
|
||||
log.Printf("[nightly] phase 2/3 skipped: no spider91 drive configured")
|
||||
if len(crawlerIDs) == 0 {
|
||||
log.Printf("[nightly] phase 2/3 skipped: no crawler configured")
|
||||
r.runDedupeAssetCleanupPhase(ctx)
|
||||
return
|
||||
}
|
||||
log.Printf("[nightly] phase 2: crawling %d spider91 drive(s)", len(spiderIDs))
|
||||
for _, id := range spiderIDs {
|
||||
log.Printf("[nightly] phase 2: crawling %d crawler drive(s)", len(crawlerIDs))
|
||||
for _, id := range crawlerIDs {
|
||||
if ctx.Err() != nil {
|
||||
log.Printf("[nightly] phase 2 aborted by ctx: %v", ctx.Err())
|
||||
return
|
||||
}
|
||||
log.Printf("[nightly] phase 2: crawling drive=%s", id)
|
||||
r.cfg.RunSpider91Crawl(ctx, id)
|
||||
r.cfg.RunCrawlerCrawl(ctx, id)
|
||||
}
|
||||
log.Printf("[nightly] phase 2: waiting for teaser queue to drain")
|
||||
if err := r.waitIdle(ctx, "phase 2"); err != nil {
|
||||
@@ -378,7 +378,7 @@ func (r *Runner) runPipeline(ctx context.Context) {
|
||||
if r.checkDeadline(ctx, "phase 3") {
|
||||
return
|
||||
}
|
||||
log.Printf("[nightly] phase 3: spider91 migration")
|
||||
log.Printf("[nightly] phase 3: crawler upload")
|
||||
if r.cfg.RunMigration != nil {
|
||||
if err := r.cfg.RunMigration(ctx); err != nil {
|
||||
log.Printf("[nightly] phase 3 migration: %v", err)
|
||||
@@ -418,9 +418,9 @@ func (r *Runner) runDedupeAssetCleanupPhase(ctx context.Context) {
|
||||
if r.cfg.RunDedupeAssetCleanup == nil {
|
||||
return
|
||||
}
|
||||
log.Printf("[nightly] phase 4: duplicate asset cleanup")
|
||||
log.Printf("[nightly] phase 4: duplicate video maintenance")
|
||||
if err := r.cfg.RunDedupeAssetCleanup(ctx); err != nil {
|
||||
log.Printf("[nightly] phase 4 duplicate asset cleanup: %v", err)
|
||||
log.Printf("[nightly] phase 4 duplicate video maintenance: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -99,11 +99,11 @@ func TestRunPipelineHonoursPhaseOrder(t *testing.T) {
|
||||
RunScan: func(_ context.Context, id string) {
|
||||
rec.push("scan:" + id)
|
||||
},
|
||||
ListSpider91Drives: func(context.Context) []string {
|
||||
rec.push("list-spider")
|
||||
ListCrawlerDrives: func(context.Context) []string {
|
||||
rec.push("list-crawler")
|
||||
return []string{"sp-1"}
|
||||
},
|
||||
RunSpider91Crawl: func(_ context.Context, id string) {
|
||||
RunCrawlerCrawl: func(_ context.Context, id string) {
|
||||
rec.push("crawl:" + id)
|
||||
},
|
||||
WaitPreviewQueuesIdle: func(context.Context) error {
|
||||
@@ -128,7 +128,7 @@ func TestRunPipelineHonoursPhaseOrder(t *testing.T) {
|
||||
"scan:drive-a",
|
||||
"scan:drive-b",
|
||||
"wait-idle", // after phase 1
|
||||
"list-spider",
|
||||
"list-crawler",
|
||||
"crawl:sp-1",
|
||||
"wait-idle", // after phase 2
|
||||
"migrate",
|
||||
@@ -144,15 +144,15 @@ func TestRunPipelineHonoursPhaseOrder(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunPipelineSkipsMigrationWhenNoSpider91(t *testing.T) {
|
||||
func TestRunPipelineSkipsMigrationWhenNoCrawler(t *testing.T) {
|
||||
rec := &recorder{}
|
||||
|
||||
r := New(Config{
|
||||
Settings: newStubSettings(),
|
||||
ListScanTargets: func(context.Context) []string { return []string{"drive-a"} },
|
||||
RunScan: func(_ context.Context, id string) { rec.push("scan:" + id) },
|
||||
ListSpider91Drives: func(context.Context) []string { return nil },
|
||||
RunSpider91Crawl: func(_ context.Context, id string) { rec.push("crawl:" + id) },
|
||||
Settings: newStubSettings(),
|
||||
ListScanTargets: func(context.Context) []string { return []string{"drive-a"} },
|
||||
RunScan: func(_ context.Context, id string) { rec.push("scan:" + id) },
|
||||
ListCrawlerDrives: func(context.Context) []string { return nil },
|
||||
RunCrawlerCrawl: func(_ context.Context, id string) { rec.push("crawl:" + id) },
|
||||
WaitPreviewQueuesIdle: func(context.Context) error {
|
||||
rec.push("wait-idle")
|
||||
return nil
|
||||
@@ -171,7 +171,7 @@ func TestRunPipelineSkipsMigrationWhenNoSpider91(t *testing.T) {
|
||||
|
||||
for _, c := range rec.snapshot() {
|
||||
if c == "migrate" || c == "crawl:sp-1" {
|
||||
t.Fatalf("phase 2/3 should be skipped when no spider91 drive, got call %q", c)
|
||||
t.Fatalf("phase 2/3 should be skipped when no crawler, got call %q", c)
|
||||
}
|
||||
}
|
||||
foundCleanup := false
|
||||
@@ -181,7 +181,7 @@ func TestRunPipelineSkipsMigrationWhenNoSpider91(t *testing.T) {
|
||||
}
|
||||
}
|
||||
if !foundCleanup {
|
||||
t.Fatalf("dedupe cleanup should still run when spider91 is absent; calls=%v", rec.snapshot())
|
||||
t.Fatalf("dedupe cleanup should still run when crawler is absent; calls=%v", rec.snapshot())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -200,8 +200,8 @@ func TestRunPipelineExitsWhenContextCancelledMidPhase(t *testing.T) {
|
||||
cancel()
|
||||
}
|
||||
},
|
||||
ListSpider91Drives: func(context.Context) []string { return []string{"x"} },
|
||||
RunSpider91Crawl: func(context.Context, string) { rec.push("crawl") },
|
||||
ListCrawlerDrives: func(context.Context) []string { return []string{"x"} },
|
||||
RunCrawlerCrawl: func(context.Context, string) { rec.push("crawl") },
|
||||
WaitPreviewQueuesIdle: func(context.Context) error { rec.push("wait-idle"); return nil },
|
||||
RunMigration: func(context.Context) error { rec.push("migrate"); return nil },
|
||||
RunDedupeAssetCleanup: func(context.Context) error { rec.push("dedupe-cleanup"); return nil },
|
||||
@@ -289,12 +289,12 @@ func TestCtxCancelPreventsLaterPhases(t *testing.T) {
|
||||
WaitPreviewQueuesIdle: func(ctx context.Context) error {
|
||||
return ctx.Err()
|
||||
},
|
||||
ListSpider91Drives: func(context.Context) []string {
|
||||
rec.push("list-spider")
|
||||
ListCrawlerDrives: func(context.Context) []string {
|
||||
rec.push("list-crawler")
|
||||
return []string{"x"}
|
||||
},
|
||||
RunSpider91Crawl: func(context.Context, string) { rec.push("crawl") },
|
||||
RunMigration: func(context.Context) error { rec.push("migrate"); return nil },
|
||||
RunCrawlerCrawl: func(context.Context, string) { rec.push("crawl") },
|
||||
RunMigration: func(context.Context) error { rec.push("migrate"); return nil },
|
||||
})
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
@@ -303,7 +303,7 @@ func TestCtxCancelPreventsLaterPhases(t *testing.T) {
|
||||
r.runPipeline(ctx)
|
||||
|
||||
for _, c := range rec.snapshot() {
|
||||
if c == "crawl" || c == "migrate" || c == "list-spider" {
|
||||
if c == "crawl" || c == "migrate" || c == "list-crawler" {
|
||||
t.Fatalf("later phase should not run after ctx done; got %q", c)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -952,15 +952,7 @@ func redactURLs(text string) string {
|
||||
}
|
||||
|
||||
func ffmpegOutputLooksRateLimited(output []byte) bool {
|
||||
text := strings.ToLower(string(output))
|
||||
if !strings.Contains(text, "429") {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "throttl") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "rate-limit") ||
|
||||
strings.Contains(text, "server returned 429")
|
||||
return drives.TextMentionsHTTPStatus(string(output), http.StatusTooManyRequests)
|
||||
}
|
||||
|
||||
// --- 本地落盘 ---
|
||||
@@ -1064,12 +1056,10 @@ type ThumbWorker struct {
|
||||
}
|
||||
|
||||
const (
|
||||
defaultTransientMediaCooldown = 5 * time.Minute
|
||||
defaultGenerationRateLimitCooldown = 5 * time.Minute
|
||||
defaultThumbTransientMediaMaxFailures = 3
|
||||
defaultWorkerQueueSize = 10000
|
||||
maxPreviewTeaserSizeBytes int64 = 5 * 1024 * 1024 * 1024
|
||||
previewStatusSkipped = "skipped"
|
||||
defaultTransientMediaCooldown = 5 * time.Minute
|
||||
defaultGenerationRateLimitCooldown = 5 * time.Minute
|
||||
defaultThumbTransientMediaMaxFailures = 3
|
||||
defaultWorkerQueueSize = 10000
|
||||
)
|
||||
|
||||
type rateLimitState struct {
|
||||
@@ -1124,6 +1114,19 @@ func (q *videoQueue) release(v *catalog.Video) {
|
||||
q.mu.Unlock()
|
||||
}
|
||||
|
||||
func (q *videoQueue) idsSnapshot() []string {
|
||||
q.mu.Lock()
|
||||
defer q.mu.Unlock()
|
||||
if len(q.ids) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := make([]string, 0, len(q.ids))
|
||||
for id := range q.ids {
|
||||
out = append(out, id)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (q *videoQueue) lengthExcluding(currentID string) int {
|
||||
q.mu.Lock()
|
||||
defer q.mu.Unlock()
|
||||
@@ -1251,6 +1254,13 @@ func (w *Worker) Status() TaskStatus {
|
||||
return taskStatus(&w.activity, &w.rateLimit, w.queue.lengthExcluding(currentID))
|
||||
}
|
||||
|
||||
func (w *Worker) ActiveVideoIDs() []string {
|
||||
if w == nil {
|
||||
return nil
|
||||
}
|
||||
return w.queue.idsSnapshot()
|
||||
}
|
||||
|
||||
func (w *ThumbWorker) Status() TaskStatus {
|
||||
if w == nil {
|
||||
return TaskStatus{State: "idle"}
|
||||
@@ -1518,145 +1528,21 @@ func driveErrorShouldCooldown(d drives.Drive, err error) bool {
|
||||
}
|
||||
switch d.Kind() {
|
||||
case "p115":
|
||||
text := strings.ToLower(err.Error())
|
||||
return strings.Contains(text, "server returned 403") ||
|
||||
strings.Contains(text, "403 forbidden") ||
|
||||
strings.Contains(text, "server returned 405") ||
|
||||
strings.Contains(text, "405 method") ||
|
||||
strings.Contains(text, "access denied") ||
|
||||
strings.Contains(text, "moov atom not found") ||
|
||||
strings.Contains(text, "partial file") ||
|
||||
strings.Contains(text, "request has been blocked") ||
|
||||
strings.Contains(text, "访问被阻断")
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusForbidden, http.StatusMethodNotAllowed, http.StatusTooManyRequests)
|
||||
case "pikpak":
|
||||
// PikPak 在预览视频 / 封面生成阶段(取链或拉直链字节)可能命中:
|
||||
// - error_code=10 操作频繁
|
||||
// - HTTP 429 / 5xx / 509 限流和服务端不可用
|
||||
// - 通用文本:rate limit / too many requests / blocked
|
||||
// 命中时让 worker 冷却 5 分钟,避免连续请求加重风控。
|
||||
text := strings.ToLower(err.Error())
|
||||
return strings.Contains(text, "error_code=10") ||
|
||||
strings.Contains(text, "操作频繁") ||
|
||||
strings.Contains(text, "429") ||
|
||||
strings.Contains(text, "http 500") ||
|
||||
strings.Contains(text, "http 502") ||
|
||||
strings.Contains(text, "http 503") ||
|
||||
strings.Contains(text, "http 504") ||
|
||||
strings.Contains(text, "http 509") ||
|
||||
strings.Contains(text, "too many request") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "blocked") ||
|
||||
strings.Contains(text, "partial file") ||
|
||||
strings.Contains(text, "service unavailable")
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusTooManyRequests, http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout, 509)
|
||||
case "p123":
|
||||
// 123网盘直链解析 / ffmpeg 读取阶段可能返回 429、5xx,或 WAF 类
|
||||
// blocked / 访问阻断文本。命中时冷却,避免封面和预览视频生成连续打接口。
|
||||
text := strings.ToLower(err.Error())
|
||||
return strings.Contains(text, "请求太频繁") ||
|
||||
strings.Contains(text, "请求过于频繁") ||
|
||||
strings.Contains(text, "请求频繁") ||
|
||||
strings.Contains(text, "操作频繁") ||
|
||||
strings.Contains(text, "频率限制") ||
|
||||
strings.Contains(text, "请求次数过多") ||
|
||||
strings.Contains(text, "429") ||
|
||||
strings.Contains(text, "http 500") ||
|
||||
strings.Contains(text, "http 502") ||
|
||||
strings.Contains(text, "http 503") ||
|
||||
strings.Contains(text, "http 504") ||
|
||||
strings.Contains(text, "server returned 403") ||
|
||||
strings.Contains(text, "403 forbidden") ||
|
||||
strings.Contains(text, "too many request") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "blocked") ||
|
||||
strings.Contains(text, "访问被阻断") ||
|
||||
strings.Contains(text, "service unavailable")
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusForbidden, http.StatusTooManyRequests, http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout)
|
||||
case "wopan":
|
||||
// 联通网盘的取链接口和下载直链都可能返回"操作频繁"、429、5xx
|
||||
// 或 WAF 阻断文本。封面/预览失败时先冷却,避免持续触发风控。
|
||||
text := strings.ToLower(err.Error())
|
||||
return strings.Contains(text, "请求太频繁") ||
|
||||
strings.Contains(text, "请求过于频繁") ||
|
||||
strings.Contains(text, "请求频繁") ||
|
||||
strings.Contains(text, "操作频繁") ||
|
||||
strings.Contains(text, "频率限制") ||
|
||||
strings.Contains(text, "请求次数过多") ||
|
||||
strings.Contains(text, "系统繁忙") ||
|
||||
strings.Contains(text, "服务繁忙") ||
|
||||
strings.Contains(text, "稍后再试") ||
|
||||
strings.Contains(text, "稍后重试") ||
|
||||
strings.Contains(text, "429") ||
|
||||
strings.Contains(text, "http 500") ||
|
||||
strings.Contains(text, "http 502") ||
|
||||
strings.Contains(text, "http 503") ||
|
||||
strings.Contains(text, "http 504") ||
|
||||
strings.Contains(text, "http 509") ||
|
||||
strings.Contains(text, "server returned 403") ||
|
||||
strings.Contains(text, "403 forbidden") ||
|
||||
strings.Contains(text, "server returned 429") ||
|
||||
strings.Contains(text, "server returned 500") ||
|
||||
strings.Contains(text, "server returned 502") ||
|
||||
strings.Contains(text, "server returned 503") ||
|
||||
strings.Contains(text, "server returned 504") ||
|
||||
strings.Contains(text, "too many request") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "rate-limit") ||
|
||||
strings.Contains(text, "throttl") ||
|
||||
strings.Contains(text, "blocked") ||
|
||||
strings.Contains(text, "request has been blocked") ||
|
||||
strings.Contains(text, "访问被阻断") ||
|
||||
strings.Contains(text, "风控") ||
|
||||
strings.Contains(text, "service unavailable")
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusForbidden, http.StatusTooManyRequests, http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout, 509)
|
||||
case "guangyapan":
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusForbidden, http.StatusTooManyRequests, http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout, 509)
|
||||
case "googledrive":
|
||||
// Google Drive 下载/取样阶段常把频控和配额问题包装成 403,
|
||||
// 具体标识在 error.errors[].reason/message 里(OpenList 也按该结构解析)。
|
||||
// ffmpeg/ffprobe 只能看到 stderr 文本时,按这些 reason/文本兜底冷却。
|
||||
text := strings.ToLower(err.Error())
|
||||
return googleDriveMediaErrorShouldCooldown(text)
|
||||
return drives.ErrorMentionsHTTPStatus(err, http.StatusForbidden, http.StatusTooManyRequests, http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func googleDriveMediaErrorShouldCooldown(text string) bool {
|
||||
if text == "" {
|
||||
return false
|
||||
}
|
||||
compact := compactGoogleDriveErrorText(text)
|
||||
return strings.Contains(text, "server returned 403") ||
|
||||
strings.Contains(text, "403 forbidden") ||
|
||||
strings.Contains(text, "server returned 429") ||
|
||||
strings.Contains(text, "http 429") ||
|
||||
strings.Contains(text, "http 500") ||
|
||||
strings.Contains(text, "http 502") ||
|
||||
strings.Contains(text, "http 503") ||
|
||||
strings.Contains(text, "http 504") ||
|
||||
strings.Contains(text, "too many request") ||
|
||||
strings.Contains(text, "too many requests") ||
|
||||
strings.Contains(text, "rate limit") ||
|
||||
strings.Contains(text, "quota exceeded") ||
|
||||
strings.Contains(text, "download quota") ||
|
||||
strings.Contains(text, "sharing rate") ||
|
||||
strings.Contains(text, "daily limit") ||
|
||||
strings.Contains(text, "user rate") ||
|
||||
strings.Contains(text, "usage limit") ||
|
||||
strings.Contains(text, "service unavailable") ||
|
||||
strings.Contains(compact, "ratelimitexceeded") ||
|
||||
strings.Contains(compact, "userratelimitexceeded") ||
|
||||
strings.Contains(compact, "dailylimitexceeded") ||
|
||||
strings.Contains(compact, "downloadquotaexceeded") ||
|
||||
strings.Contains(compact, "sharingratelimitexceeded") ||
|
||||
strings.Contains(compact, "quotaexceeded") ||
|
||||
strings.Contains(compact, "toomanyrequests") ||
|
||||
strings.Contains(compact, "usagelimits")
|
||||
}
|
||||
|
||||
func compactGoogleDriveErrorText(text string) string {
|
||||
replacer := strings.NewReplacer("_", "", "-", "", " ", "", ".", "", ":", "")
|
||||
return replacer.Replace(strings.ToLower(strings.TrimSpace(text)))
|
||||
}
|
||||
|
||||
func (w *ThumbWorker) process(ctx context.Context, v *catalog.Video) bool {
|
||||
if w.skipIfRateLimited(v) {
|
||||
return false
|
||||
@@ -1703,11 +1589,6 @@ func (w *ThumbWorker) process(ctx context.Context, v *catalog.Video) bool {
|
||||
return false
|
||||
}
|
||||
_ = w.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{ThumbnailStatus: "pending"})
|
||||
if isSpider91OriginVideo(v) {
|
||||
log.Printf("[thumb] skip %s: spider91-origin video must use crawled thumbnail", v.Title)
|
||||
_ = w.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{ThumbnailStatus: "failed"})
|
||||
return false
|
||||
}
|
||||
link, err := w.streamLink(ctx, v)
|
||||
if err != nil {
|
||||
if w.pauseForRecoverableError(ctx, v, err, "streamURL") {
|
||||
@@ -1789,10 +1670,6 @@ func (w *ThumbWorker) generateThumbnailFromLink(ctx context.Context, v *catalog.
|
||||
return nil
|
||||
}
|
||||
|
||||
func isSpider91OriginVideo(v *catalog.Video) bool {
|
||||
return v != nil && strings.HasPrefix(v.ID, "spider91-")
|
||||
}
|
||||
|
||||
func localPreviewLink(v *catalog.Video) (*drives.StreamLink, bool) {
|
||||
if v.PreviewLocal == "" {
|
||||
return nil, false
|
||||
@@ -1806,15 +1683,6 @@ func localPreviewLink(v *catalog.Video) (*drives.StreamLink, bool) {
|
||||
}
|
||||
|
||||
func (w *Worker) process(ctx context.Context, v *catalog.Video) {
|
||||
if shouldSkipTeaser(v) {
|
||||
removePreviousLocalTeaser(v.PreviewLocal, "")
|
||||
if err := w.Catalog.UpdatePreview(ctx, v.ID, "", previewStatusSkipped); err != nil {
|
||||
log.Printf("[preview] skip %s: update status: %v", v.Title, err)
|
||||
return
|
||||
}
|
||||
log.Printf("[preview] skip %s: size=%d exceeds 5GiB teaser limit", v.Title, v.Size)
|
||||
return
|
||||
}
|
||||
if w.skipIfRateLimited(v) {
|
||||
return
|
||||
}
|
||||
@@ -1867,10 +1735,6 @@ func (w *Worker) process(ctx context.Context, v *catalog.Video) {
|
||||
log.Printf("[preview] ready %s (duration=%.1fs)", v.Title, duration)
|
||||
}
|
||||
|
||||
func shouldSkipTeaser(v *catalog.Video) bool {
|
||||
return v != nil && v.Size > maxPreviewTeaserSizeBytes
|
||||
}
|
||||
|
||||
func (w *Worker) generateTeaser(ctx context.Context, v *catalog.Video, link *drives.StreamLink, duration float64) (string, error) {
|
||||
gen, ok := w.Gen.(refreshingTeaserGenerator)
|
||||
if !ok || w.Drive == nil || w.Drive.Kind() != "p115" {
|
||||
|
||||
@@ -89,9 +89,9 @@ func TestThumbWorkerBackfillsDurationWhenThumbnailAlreadyExists(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestThumbWorkerDoesNotGenerateThumbnailForSpider91OriginVideo(t *testing.T) {
|
||||
func TestThumbWorkerGeneratesThumbnailForCrawlerLikeVideoID(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "spider91-91-spider-1200001")
|
||||
cat, video := seedPreviewTestVideo(t, "scriptcrawler-crawler-main-source001")
|
||||
|
||||
gen := &fakeThumbGenerator{probeDuration: 42}
|
||||
drv := &previewFakeDrive{kind: "pikpak"}
|
||||
@@ -103,18 +103,18 @@ func TestThumbWorkerDoesNotGenerateThumbnailForSpider91OriginVideo(t *testing.T)
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.ThumbnailURL != "" {
|
||||
t.Fatalf("thumbnail = %q, want empty when crawled spider91 thumbnail is missing", got.ThumbnailURL)
|
||||
if got.ThumbnailURL != "/p/thumb/"+video.ID {
|
||||
t.Fatalf("thumbnail = %q, want generated thumb URL", got.ThumbnailURL)
|
||||
}
|
||||
failed, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "failed", 0)
|
||||
ready, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "ready", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list failed thumbnails: %v", err)
|
||||
t.Fatalf("list ready thumbnails: %v", err)
|
||||
}
|
||||
if len(failed) != 1 || failed[0].ID != video.ID {
|
||||
t.Fatalf("failed thumbnails = %#v, want only %s", failed, video.ID)
|
||||
if len(ready) != 1 || ready[0].ID != video.ID {
|
||||
t.Fatalf("ready thumbnails = %#v, want only %s", ready, video.ID)
|
||||
}
|
||||
if gen.probeCalls != 0 || gen.generateCalls != 0 {
|
||||
t.Fatalf("generator calls probe=%d generate=%d, want no ffmpeg work for spider91-origin thumbnail", gen.probeCalls, gen.generateCalls)
|
||||
if gen.probeCalls != 1 || gen.generateCalls != 1 {
|
||||
t.Fatalf("generator calls probe=%d generate=%d, want one thumbnail generation", gen.probeCalls, gen.generateCalls)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -349,42 +349,10 @@ func TestPreviewWorkerNeverCallsDriveUploadOrEnsureDir(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestPreviewWorkerSkipsTeaserForVideoLargerThanFiveGiB(t *testing.T) {
|
||||
func TestPreviewWorkerGeneratesTeaserForLargeVideo(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "preview-large-video")
|
||||
video.Size = maxPreviewTeaserSizeBytes + 1
|
||||
if err := cat.UpsertVideo(ctx, video); err != nil {
|
||||
t.Fatalf("update video: %v", err)
|
||||
}
|
||||
|
||||
gen := &fakeTeaserGenerator{}
|
||||
drv := &previewFakeDrive{}
|
||||
worker := NewWorker(gen, cat, drv)
|
||||
|
||||
worker.process(ctx, video)
|
||||
|
||||
got, err := cat.GetVideo(ctx, video.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.PreviewStatus != previewStatusSkipped {
|
||||
t.Fatalf("preview status = %q, want skipped", got.PreviewStatus)
|
||||
}
|
||||
if got.PreviewLocal != "" {
|
||||
t.Fatalf("preview local = %q, want empty", got.PreviewLocal)
|
||||
}
|
||||
if drv.streamCalls != 0 {
|
||||
t.Fatalf("stream calls = %d, want 0", drv.streamCalls)
|
||||
}
|
||||
if gen.generateCalls != 0 {
|
||||
t.Fatalf("generate calls = %d, want 0", gen.generateCalls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPreviewWorkerGeneratesTeaserAtFiveGiBBoundary(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "preview-five-gib-video")
|
||||
video.Size = maxPreviewTeaserSizeBytes
|
||||
video.Size = 6 * 1024 * 1024 * 1024
|
||||
if err := cat.UpsertVideo(ctx, video); err != nil {
|
||||
t.Fatalf("update video: %v", err)
|
||||
}
|
||||
@@ -485,9 +453,9 @@ func TestThumbWorkerRateLimitHonorsRetryAfter(t *testing.T) {
|
||||
assertCooldownAround(t, worker.Status().CooldownUntil, before, 2*time.Hour)
|
||||
}
|
||||
|
||||
func TestThumbWorkerP115TransientErrorFailsAfterRetryLimit(t *testing.T) {
|
||||
func TestThumbWorkerP115MessageOnlyErrorFailsWithoutCooldown(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "thumb-p115-transient")
|
||||
cat, video := seedPreviewTestVideo(t, "thumb-p115-message-only")
|
||||
|
||||
gen := &fakeThumbGenerator{
|
||||
generateErr: errors.New("ffmpeg thumb: exit status 183, stderr: partial file Cannot determine format of input 0:0 after EOF"),
|
||||
@@ -495,69 +463,26 @@ func TestThumbWorkerP115TransientErrorFailsAfterRetryLimit(t *testing.T) {
|
||||
drv := &previewFakeDrive{kind: "p115"}
|
||||
worker := NewThumbWorker(gen, cat, drv)
|
||||
|
||||
for attempt := 1; attempt <= defaultThumbTransientMediaMaxFailures; attempt++ {
|
||||
worker.rateLimit = rateLimitState{}
|
||||
worker.process(ctx, video)
|
||||
|
||||
if attempt < defaultThumbTransientMediaMaxFailures {
|
||||
pending, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "pending", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list pending thumbnails: %v", err)
|
||||
}
|
||||
if len(pending) != 1 || pending[0].ID != video.ID {
|
||||
t.Fatalf("attempt %d pending thumbnails = %#v, want only %s", attempt, pending, video.ID)
|
||||
}
|
||||
missing, err := cat.CountVideosNeedingThumbnail(ctx, video.DriveID)
|
||||
if err != nil {
|
||||
t.Fatalf("count missing thumbnails: %v", err)
|
||||
}
|
||||
if missing != 1 {
|
||||
t.Fatalf("attempt %d missing thumbnails = %d, want 1 before retry limit", attempt, missing)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
failed, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "failed", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list failed thumbnails: %v", err)
|
||||
}
|
||||
if len(failed) != 1 || failed[0].ID != video.ID {
|
||||
t.Fatalf("failed thumbnails = %#v, want only %s", failed, video.ID)
|
||||
}
|
||||
missing, err := cat.CountVideosNeedingThumbnail(ctx, video.DriveID)
|
||||
if err != nil {
|
||||
t.Fatalf("count missing thumbnails: %v", err)
|
||||
}
|
||||
if missing != 0 {
|
||||
t.Fatalf("missing thumbnails = %d, want 0 after retry limit marks failed", missing)
|
||||
}
|
||||
}
|
||||
|
||||
if gen.generateCalls != defaultThumbTransientMediaMaxFailures {
|
||||
t.Fatalf("generate calls = %d, want %d", gen.generateCalls, defaultThumbTransientMediaMaxFailures)
|
||||
}
|
||||
|
||||
if err := cat.UpdateVideoMeta(ctx, video.ID, catalog.VideoMetaPatch{
|
||||
ThumbnailStatus: "pending",
|
||||
ResetThumbnailFailures: true,
|
||||
}); err != nil {
|
||||
t.Fatalf("reset thumbnail status: %v", err)
|
||||
}
|
||||
worker.rateLimit = rateLimitState{}
|
||||
worker.process(ctx, video)
|
||||
|
||||
pending, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "pending", 0)
|
||||
failed, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "failed", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list pending thumbnails after reset: %v", err)
|
||||
t.Fatalf("list failed thumbnails: %v", err)
|
||||
}
|
||||
if len(pending) != 1 || pending[0].ID != video.ID {
|
||||
t.Fatalf("pending thumbnails after reset = %#v, want only %s", pending, video.ID)
|
||||
if len(failed) != 1 || failed[0].ID != video.ID {
|
||||
t.Fatalf("failed thumbnails = %#v, want only %s", failed, video.ID)
|
||||
}
|
||||
if !worker.Status().CooldownUntil.IsZero() {
|
||||
t.Fatalf("cooldown until = %s, want no cooldown for message-only media error", worker.Status().CooldownUntil)
|
||||
}
|
||||
if gen.generateCalls != 1 {
|
||||
t.Fatalf("generate calls = %d, want 1", gen.generateCalls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestThumbWorkerRequeuesP115TransientErrorBeforeRetryLimit(t *testing.T) {
|
||||
func TestThumbWorkerDoesNotRequeueP115MessageOnlyError(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, video := seedPreviewTestVideo(t, "thumb-p115-requeue")
|
||||
cat, video := seedPreviewTestVideo(t, "thumb-p115-no-requeue")
|
||||
|
||||
gen := &fakeThumbGenerator{
|
||||
generateErr: errors.New("ffmpeg thumb: partial file Cannot determine format of input 0:0 after EOF"),
|
||||
@@ -569,11 +494,8 @@ func TestThumbWorkerRequeuesP115TransientErrorBeforeRetryLimit(t *testing.T) {
|
||||
|
||||
select {
|
||||
case queued := <-worker.ch:
|
||||
if queued.ID != video.ID {
|
||||
t.Fatalf("requeued video id = %q, want %q", queued.ID, video.ID)
|
||||
}
|
||||
t.Fatalf("unexpected requeued video id = %q", queued.ID)
|
||||
default:
|
||||
t.Fatal("expected transient thumbnail failure to requeue the same video")
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(ctx, video.ID)
|
||||
@@ -581,14 +503,14 @@ func TestThumbWorkerRequeuesP115TransientErrorBeforeRetryLimit(t *testing.T) {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.ThumbnailURL != "" {
|
||||
t.Fatalf("thumbnail = %q, want empty after transient failure", got.ThumbnailURL)
|
||||
t.Fatalf("thumbnail = %q, want empty after message-only failure", got.ThumbnailURL)
|
||||
}
|
||||
pending, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "pending", 0)
|
||||
failed, err := cat.ListVideosByThumbnailStatus(ctx, video.DriveID, "failed", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("list pending thumbnails: %v", err)
|
||||
t.Fatalf("list failed thumbnails: %v", err)
|
||||
}
|
||||
if len(pending) != 1 || pending[0].ID != video.ID {
|
||||
t.Fatalf("pending thumbnails = %#v, want only %s", pending, video.ID)
|
||||
if len(failed) != 1 || failed[0].ID != video.ID {
|
||||
t.Fatalf("failed thumbnails = %#v, want only %s", failed, video.ID)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -649,13 +571,15 @@ func TestP123TransientErrorsShouldCooldown(t *testing.T) {
|
||||
drv := &previewFakeDrive{kind: "p123"}
|
||||
for _, err := range []error{
|
||||
errors.New("Server returned 403 Forbidden"),
|
||||
errors.New("请求太频繁"),
|
||||
errors.New("http 503 service unavailable"),
|
||||
} {
|
||||
if !driveErrorShouldCooldown(drv, err) {
|
||||
t.Fatalf("driveErrorShouldCooldown(%v) = false, want true", err)
|
||||
}
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("请求太频繁")) {
|
||||
t.Fatal("message-only throttling text should not trigger p123 cooldown")
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("invalid credential")) {
|
||||
t.Fatal("invalid credential should not trigger p123 cooldown")
|
||||
}
|
||||
@@ -666,31 +590,58 @@ func TestWopanTransientErrorsShouldCooldown(t *testing.T) {
|
||||
for _, err := range []error{
|
||||
errors.New("ffmpeg: Server returned 403 Forbidden"),
|
||||
errors.New("wopan download url: request failed with status: 429 Too Many Requests"),
|
||||
errors.New("操作频繁,请稍后重试"),
|
||||
errors.New("http 503 service unavailable"),
|
||||
} {
|
||||
if !driveErrorShouldCooldown(drv, err) {
|
||||
t.Fatalf("driveErrorShouldCooldown(%v) = false, want true", err)
|
||||
}
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("操作频繁,请稍后重试")) {
|
||||
t.Fatal("message-only throttling text should not trigger wopan cooldown")
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("invalid access token")) {
|
||||
t.Fatal("invalid access token should not trigger wopan cooldown")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGuangYaPanTransientErrorsShouldCooldown(t *testing.T) {
|
||||
drv := &previewFakeDrive{kind: "guangyapan"}
|
||||
for _, err := range []error{
|
||||
errors.New("ffmpeg: Server returned 403 Forbidden"),
|
||||
errors.New("guangyapan api rate limited: status=429 msg=操作频繁,请稍后重试"),
|
||||
errors.New("http 503 service unavailable"),
|
||||
} {
|
||||
if !driveErrorShouldCooldown(drv, err) {
|
||||
t.Fatalf("driveErrorShouldCooldown(%v) = false, want true", err)
|
||||
}
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("操作频繁,请稍后重试")) {
|
||||
t.Fatal("message-only throttling text should not trigger guangyapan cooldown")
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("invalid access token")) {
|
||||
t.Fatal("invalid access token should not trigger guangyapan cooldown")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGoogleDriveMediaErrorsShouldCooldown(t *testing.T) {
|
||||
drv := &previewFakeDrive{kind: "googledrive"}
|
||||
for _, err := range []error{
|
||||
errors.New("google drive api error: usageLimits userRateLimitExceeded"),
|
||||
errors.New("ffmpeg: Server returned 403 Forbidden"),
|
||||
errors.New("downloadQuotaExceeded: The download quota for this file has been exceeded"),
|
||||
errors.New("sharingRateLimitExceeded"),
|
||||
errors.New("http 503 service unavailable"),
|
||||
} {
|
||||
if !driveErrorShouldCooldown(drv, err) {
|
||||
t.Fatalf("driveErrorShouldCooldown(%v) = false, want true", err)
|
||||
}
|
||||
}
|
||||
for _, err := range []error{
|
||||
errors.New("google drive api error: usageLimits userRateLimitExceeded"),
|
||||
errors.New("downloadQuotaExceeded: The download quota for this file has been exceeded"),
|
||||
errors.New("sharingRateLimitExceeded"),
|
||||
} {
|
||||
if driveErrorShouldCooldown(drv, err) {
|
||||
t.Fatalf("message-only google drive error %v should not trigger cooldown", err)
|
||||
}
|
||||
}
|
||||
if driveErrorShouldCooldown(drv, errors.New("invalid credentials")) {
|
||||
t.Fatal("invalid credentials should not trigger googledrive cooldown")
|
||||
}
|
||||
|
||||
@@ -151,13 +151,15 @@ func (p *Proxy) ServeStream(w http.ResponseWriter, r *http.Request, driveID, fil
|
||||
// 先解出最终 Location,浏览器可直接 302 到该短期地址
|
||||
// - wopan:联通网盘 GetDownloadUrlV2 返回的是短期直链,OpenList 也是直接
|
||||
// 将该 URL 交给客户端使用;不需要后端持续代传视频字节
|
||||
// - guangyapan:光鸭 get_res_download_url 返回 signedURL / downloadUrl,
|
||||
// 浏览器可直接访问,不需要后端持续代传视频字节
|
||||
//
|
||||
// 其余网盘(如夸克等)仍走反代,因为它们的下载
|
||||
// 链接通常需要随请求带上后端持有的 Cookie / Authorization / Range
|
||||
// 的特殊处理,浏览器拿不到这些上下文。
|
||||
func shouldRedirect(d drives.Drive) bool {
|
||||
switch d.Kind() {
|
||||
case "p115", "pikpak", "onedrive", "p123", "wopan":
|
||||
case "p115", "pikpak", "onedrive", "p123", "wopan", "guangyapan":
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
||||
@@ -226,6 +226,31 @@ func TestServeStreamRedirectsWopan(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeStreamRedirectsGuangYaPan(t *testing.T) {
|
||||
reg := NewRegistry()
|
||||
drv := &proxyFakeSimpleDrive{
|
||||
kind: "guangyapan",
|
||||
url: "https://cdn.guangyapan.example/video.mp4?sign=encoded",
|
||||
}
|
||||
reg.Set("guangyapan", drv)
|
||||
|
||||
p := New(reg)
|
||||
req := httptest.NewRequest(http.MethodGet, "/p/stream/guangyapan/file-1", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
|
||||
p.ServeStream(rr, req, "guangyapan", "file-1")
|
||||
|
||||
if rr.Code != http.StatusFound {
|
||||
t.Fatalf("status = %d, want %d", rr.Code, http.StatusFound)
|
||||
}
|
||||
if got := rr.Header().Get("Location"); got != "https://cdn.guangyapan.example/video.mp4?sign=encoded" {
|
||||
t.Fatalf("Location = %q", got)
|
||||
}
|
||||
if drv.calls != 1 {
|
||||
t.Fatalf("link calls = %d, want 1", drv.calls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServeStreamServesLocalFilePath(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "video.mp4")
|
||||
if err := os.WriteFile(path, []byte("0123456789"), 0o644); err != nil {
|
||||
|
||||
@@ -189,12 +189,6 @@ func (s *Scanner) walk(ctx context.Context, dirID, dirName string, stats *Stats,
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if label, ok, err := s.Catalog.EnsureCollectionTag(ctx, dirName); err == nil && ok {
|
||||
tags = mergeTags(tags, []string{label})
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
existing, _ := s.Catalog.GetVideo(ctx, id)
|
||||
if err := ctx.Err(); err != nil {
|
||||
@@ -206,15 +200,15 @@ func (s *Scanner) walk(ctx context.Context, dirID, dirName string, stats *Stats,
|
||||
patch.ContentHash = e.Hash
|
||||
existing.ContentHash = e.Hash
|
||||
}
|
||||
if e.Name != "" && existing.FileName == "" {
|
||||
if e.Name != "" && existing.FileName != e.Name {
|
||||
patch.FileName = e.Name
|
||||
existing.FileName = e.Name
|
||||
patch.Title = parsed.Title
|
||||
patch.TitleSet = true
|
||||
patch.Author = parsed.Author
|
||||
patch.AuthorSet = true
|
||||
}
|
||||
// 已存在但轻量元数据空缺时,顺便补齐。
|
||||
if existing.Category == "" && dirName != "" {
|
||||
patch.Category = dirName
|
||||
}
|
||||
if patch.Category != "" || patch.ContentHash != "" || patch.FileName != "" {
|
||||
if patch.ContentHash != "" || patch.FileName != "" || patch.TitleSet || patch.AuthorSet {
|
||||
_ = s.Catalog.UpdateVideoMeta(ctx, id, patch)
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
@@ -257,7 +251,6 @@ func (s *Scanner) walk(ctx context.Context, dirID, dirName string, stats *Stats,
|
||||
Quality: "HD",
|
||||
Size: e.Size,
|
||||
PreviewStatus: "pending",
|
||||
Category: dirName,
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
|
||||
@@ -323,6 +323,67 @@ func TestRunDoesNotBackfillRemoteThumbnailForExistingVideo(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunSyncsRenamedExistingVideoMetadata(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
now := time.Now()
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: "fake-drive-file-1",
|
||||
DriveID: "drive",
|
||||
FileID: "file-1",
|
||||
FileName: "old-name - Old Author.mp4",
|
||||
Title: "old-name",
|
||||
Author: "Old Author",
|
||||
PreviewStatus: "pending",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed video: %v", err)
|
||||
}
|
||||
|
||||
drv := &scannerFakeDrive{
|
||||
entries: []drives.Entry{{
|
||||
ID: "file-1",
|
||||
Name: "[4K] renamed clip.mp4",
|
||||
Size: 123,
|
||||
ModTime: now,
|
||||
}},
|
||||
}
|
||||
sc := New(cat, drv, []string{".mp4"}, nil, nil)
|
||||
|
||||
stats, err := sc.Run(ctx, "")
|
||||
if err != nil {
|
||||
t.Fatalf("scan: %v", err)
|
||||
}
|
||||
if stats.Added != 0 {
|
||||
t.Fatalf("added = %d, want existing video to be updated in place", stats.Added)
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(ctx, "fake-drive-file-1")
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.FileName != "[4K] renamed clip.mp4" {
|
||||
t.Fatalf("file_name = %q, want remote name", got.FileName)
|
||||
}
|
||||
if got.Title != "renamed clip" {
|
||||
t.Fatalf("title = %q, want parsed title from remote name", got.Title)
|
||||
}
|
||||
if got.Author != "" {
|
||||
t.Fatalf("author = %q, want cleared author from remote name without author suffix", got.Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunReplacesExistingVideoTagsWithFixedFilenameTags(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
@@ -374,7 +435,7 @@ func TestRunReplacesExistingVideoTagsWithFixedFilenameTags(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunAddsShortCollectionDirectoryAsTag(t *testing.T) {
|
||||
func TestRunDoesNotCreateTagFromDirectoryName(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
@@ -392,7 +453,6 @@ func TestRunAddsShortCollectionDirectoryAsTag(t *testing.T) {
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: "Existing",
|
||||
Category: "sunny",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
@@ -423,84 +483,6 @@ func TestRunAddsShortCollectionDirectoryAsTag(t *testing.T) {
|
||||
t.Fatalf("scan: %v", err)
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(ctx, "fake-drive-file-1")
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if !sameStrings(got.Tags, []string{"sunny"}) {
|
||||
t.Fatalf("tags = %#v, want sunny", got.Tags)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunDoesNotRecreateDeletedCollectionDirectoryTag(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
now := time.Now()
|
||||
for _, id := range []string{"existing-1", "existing-2"} {
|
||||
if err := cat.UpsertVideo(ctx, &catalog.Video{
|
||||
ID: id,
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: "Existing",
|
||||
Category: "sunny",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed existing sunny video: %v", err)
|
||||
}
|
||||
}
|
||||
if label, ok, err := cat.EnsureCollectionTag(ctx, "sunny"); err != nil || !ok || label != "sunny" {
|
||||
t.Fatalf("ensure collection = %q, %v, %v; want sunny true nil", label, ok, err)
|
||||
}
|
||||
tags, err := cat.ListTags(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("list tags: %v", err)
|
||||
}
|
||||
var tagID int64
|
||||
for _, tag := range tags {
|
||||
if tag.Label == "sunny" {
|
||||
tagID = tag.ID
|
||||
break
|
||||
}
|
||||
}
|
||||
if tagID == 0 {
|
||||
t.Fatal("sunny tag not found before delete")
|
||||
}
|
||||
if _, err := cat.DeleteTag(ctx, tagID); err != nil {
|
||||
t.Fatalf("delete tag: %v", err)
|
||||
}
|
||||
|
||||
drv := &scannerTreeFakeDrive{
|
||||
entries: map[string][]drives.Entry{
|
||||
"root": {{
|
||||
ID: "dir-1",
|
||||
Name: "sunny",
|
||||
IsDir: true,
|
||||
}},
|
||||
"dir-1": {{
|
||||
ID: "file-1",
|
||||
ParentID: "dir-1",
|
||||
Name: "clip.mp4",
|
||||
Size: 123,
|
||||
ModTime: now,
|
||||
}},
|
||||
},
|
||||
}
|
||||
sc := New(cat, drv, []string{".mp4"}, nil, nil)
|
||||
|
||||
if _, err := sc.Run(ctx, ""); err != nil {
|
||||
t.Fatalf("scan: %v", err)
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(ctx, "fake-drive-file-1")
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
@@ -508,15 +490,6 @@ func TestRunDoesNotRecreateDeletedCollectionDirectoryTag(t *testing.T) {
|
||||
if len(got.Tags) != 0 {
|
||||
t.Fatalf("tags = %#v, want none", got.Tags)
|
||||
}
|
||||
tags, err = cat.ListTags(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("list tags after scan: %v", err)
|
||||
}
|
||||
for _, tag := range tags {
|
||||
if tag.Label == "sunny" {
|
||||
t.Fatal("deleted collection tag was recreated during scan")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunMapsAVCodeDirectoryToAVTag(t *testing.T) {
|
||||
@@ -537,7 +510,6 @@ func TestRunMapsAVCodeDirectoryToAVTag(t *testing.T) {
|
||||
DriveID: "drive",
|
||||
FileID: id,
|
||||
Title: "Existing",
|
||||
Category: "cc-1750027",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
|
||||
@@ -0,0 +1,178 @@
|
||||
// Package transcode 实现"浏览器兼容性转码":把网盘/本地存储中浏览器
|
||||
// <video> 播不动的视频(AVI/WMV/FLV、MPEG-4 Part 2、RMVB 等)转成
|
||||
// H.264 + AAC 的 MP4,并把产物上传回同一存储,播放源切到产物文件。
|
||||
//
|
||||
// 与封面/预览生成不同,转码不会自动运行——只能由管理员在网盘管理页
|
||||
// 手动开启,也可以随时手动停止。
|
||||
package transcode
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// MediaInfo 是 ffprobe 探测出来的、做兼容性判定所需的最小信息。
|
||||
type MediaInfo struct {
|
||||
// FormatName 是 ffprobe 的 format_name,逗号分隔的 demuxer 别名,
|
||||
// 例如 "mov,mp4,m4a,3gp,3g2,mj2" / "avi" / "matroska,webm"。
|
||||
FormatName string
|
||||
VideoCodecs []string
|
||||
AudioCodecs []string
|
||||
}
|
||||
|
||||
// browserCompatibleVideoCodecs 是主流浏览器 <video> 普遍可解码的视频编码。
|
||||
// HEVC/H.265 只有部分平台支持,保守起见不算兼容。
|
||||
var browserCompatibleVideoCodecs = map[string]bool{
|
||||
"h264": true,
|
||||
"vp8": true,
|
||||
"vp9": true,
|
||||
"av1": true,
|
||||
}
|
||||
|
||||
// browserCompatibleAudioCodecs 是主流浏览器普遍可解码的音频编码。
|
||||
var browserCompatibleAudioCodecs = map[string]bool{
|
||||
"aac": true,
|
||||
"mp3": true,
|
||||
"opus": true,
|
||||
"vorbis": true,
|
||||
"flac": true,
|
||||
}
|
||||
|
||||
// NeedsTranscode 判断这个文件是否需要转码才能在浏览器里播放。
|
||||
// ext 是 catalog 里记录的扩展名(小写、不带点),用来区分 mkv 和 webm
|
||||
// (两者的 format_name 都是 "matroska,webm")。
|
||||
func NeedsTranscode(info MediaInfo, ext string) bool {
|
||||
if !containerCompatible(info.FormatName, ext) {
|
||||
return true
|
||||
}
|
||||
for _, codec := range info.VideoCodecs {
|
||||
if !browserCompatibleVideoCodecs[strings.ToLower(codec)] {
|
||||
return true
|
||||
}
|
||||
}
|
||||
for _, codec := range info.AudioCodecs {
|
||||
if !browserCompatibleAudioCodecs[strings.ToLower(codec)] {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func containerCompatible(formatName, ext string) bool {
|
||||
format := strings.ToLower(formatName)
|
||||
for _, name := range strings.Split(format, ",") {
|
||||
if name == "mp4" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
// matroska,webm:只有真 .webm 信任为浏览器可播容器;.mkv 保守转码。
|
||||
if strings.Contains(format, "webm") && strings.EqualFold(ext, "webm") {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ProbeFile 用 ffprobe 探测本地文件的容器与音视频编码。
|
||||
func ProbeFile(ctx context.Context, ffprobePath, path string) (MediaInfo, error) {
|
||||
ctx2, cancel := context.WithTimeout(ctx, 60*time.Second)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx2, ffprobePath,
|
||||
"-v", "error",
|
||||
"-show_entries", "format=format_name",
|
||||
"-show_entries", "stream=codec_type,codec_name",
|
||||
"-of", "json",
|
||||
path,
|
||||
)
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
return MediaInfo{}, fmt.Errorf("transcode: ffprobe: %w", err)
|
||||
}
|
||||
var parsed struct {
|
||||
Format struct {
|
||||
FormatName string `json:"format_name"`
|
||||
} `json:"format"`
|
||||
Streams []struct {
|
||||
CodecType string `json:"codec_type"`
|
||||
CodecName string `json:"codec_name"`
|
||||
} `json:"streams"`
|
||||
}
|
||||
if err := json.Unmarshal(out, &parsed); err != nil {
|
||||
return MediaInfo{}, fmt.Errorf("transcode: parse ffprobe output: %w", err)
|
||||
}
|
||||
info := MediaInfo{FormatName: parsed.Format.FormatName}
|
||||
for _, s := range parsed.Streams {
|
||||
switch s.CodecType {
|
||||
case "video":
|
||||
info.VideoCodecs = append(info.VideoCodecs, s.CodecName)
|
||||
case "audio":
|
||||
info.AudioCodecs = append(info.AudioCodecs, s.CodecName)
|
||||
}
|
||||
}
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// buildFFmpegArgs 按探测结果生成转码参数:
|
||||
// - 编码本就兼容、只是容器不行(如 AVI 里装 H.264)→ 流拷贝 remux,零质量损失;
|
||||
// - 否则视频转 H.264(裁到偶数尺寸 + yuv420p 保证兼容性)、音频转 AAC。
|
||||
//
|
||||
// 两种情况都加 +faststart 把 moov 提前,便于边下边播。
|
||||
func buildFFmpegArgs(info MediaInfo, inPath, outPath string) []string {
|
||||
args := []string{"-y", "-i", inPath}
|
||||
videoOK := true
|
||||
for _, codec := range info.VideoCodecs {
|
||||
if !browserCompatibleVideoCodecs[strings.ToLower(codec)] {
|
||||
videoOK = false
|
||||
break
|
||||
}
|
||||
}
|
||||
audioOK := true
|
||||
for _, codec := range info.AudioCodecs {
|
||||
if !browserCompatibleAudioCodecs[strings.ToLower(codec)] {
|
||||
audioOK = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if videoOK {
|
||||
args = append(args, "-c:v", "copy")
|
||||
} else {
|
||||
args = append(args,
|
||||
"-c:v", "libx264",
|
||||
"-preset", "veryfast",
|
||||
"-crf", "23",
|
||||
"-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2",
|
||||
"-pix_fmt", "yuv420p",
|
||||
)
|
||||
}
|
||||
if len(info.AudioCodecs) == 0 {
|
||||
args = append(args, "-an")
|
||||
} else if audioOK {
|
||||
args = append(args, "-c:a", "copy")
|
||||
} else {
|
||||
args = append(args, "-c:a", "aac", "-b:a", "128k")
|
||||
}
|
||||
args = append(args, "-movflags", "+faststart", "-f", "mp4", outPath)
|
||||
return args
|
||||
}
|
||||
|
||||
// TranscodeFile 把本地输入文件转成浏览器可播的 MP4 写到 outPath。
|
||||
func TranscodeFile(ctx context.Context, ffmpegPath string, info MediaInfo, inPath, outPath string) error {
|
||||
args := buildFFmpegArgs(info, inPath, outPath)
|
||||
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("transcode: ffmpeg: %w: %s", err, tailOf(string(out), 400))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func tailOf(s string, n int) string {
|
||||
s = strings.TrimSpace(s)
|
||||
if len(s) <= n {
|
||||
return s
|
||||
}
|
||||
return s[len(s)-n:]
|
||||
}
|
||||
@@ -0,0 +1,125 @@
|
||||
package transcode
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
)
|
||||
|
||||
func TestNeedsTranscode(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
info MediaInfo
|
||||
ext string
|
||||
want bool
|
||||
}{
|
||||
{
|
||||
name: "h264 aac mp4 is compatible",
|
||||
info: MediaInfo{FormatName: "mov,mp4,m4a,3gp,3g2,mj2", VideoCodecs: []string{"h264"}, AudioCodecs: []string{"aac"}},
|
||||
ext: "mp4",
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "mpeg4 in avi needs transcode",
|
||||
info: MediaInfo{FormatName: "avi", VideoCodecs: []string{"mpeg4"}, AudioCodecs: []string{"mp3"}},
|
||||
ext: "avi",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "h264 in avi needs remux",
|
||||
info: MediaInfo{FormatName: "avi", VideoCodecs: []string{"h264"}, AudioCodecs: []string{"aac"}},
|
||||
ext: "avi",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "hevc in mp4 needs transcode",
|
||||
info: MediaInfo{FormatName: "mov,mp4,m4a,3gp,3g2,mj2", VideoCodecs: []string{"hevc"}, AudioCodecs: []string{"aac"}},
|
||||
ext: "mp4",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "vp9 opus webm is compatible",
|
||||
info: MediaInfo{FormatName: "matroska,webm", VideoCodecs: []string{"vp9"}, AudioCodecs: []string{"opus"}},
|
||||
ext: "webm",
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "h264 in mkv is conservative transcode",
|
||||
info: MediaInfo{FormatName: "matroska,webm", VideoCodecs: []string{"h264"}, AudioCodecs: []string{"aac"}},
|
||||
ext: "mkv",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "pcm audio in mov needs transcode",
|
||||
info: MediaInfo{FormatName: "mov,mp4,m4a,3gp,3g2,mj2", VideoCodecs: []string{"h264"}, AudioCodecs: []string{"pcm_s16le"}},
|
||||
ext: "mov",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "video only h264 mp4 is compatible",
|
||||
info: MediaInfo{FormatName: "mov,mp4,m4a,3gp,3g2,mj2", VideoCodecs: []string{"h264"}},
|
||||
ext: "mp4",
|
||||
want: false,
|
||||
},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if got := NeedsTranscode(tc.info, tc.ext); got != tc.want {
|
||||
t.Fatalf("NeedsTranscode(%+v, %q) = %v, want %v", tc.info, tc.ext, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildFFmpegArgsRemuxWhenCodecsCompatible(t *testing.T) {
|
||||
// AVI 里装 H.264+AAC:只需要换容器,应该走流拷贝
|
||||
info := MediaInfo{FormatName: "avi", VideoCodecs: []string{"h264"}, AudioCodecs: []string{"aac"}}
|
||||
args := strings.Join(buildFFmpegArgs(info, "in.avi", "out.mp4"), " ")
|
||||
if !strings.Contains(args, "-c:v copy") {
|
||||
t.Fatalf("expected video stream copy, got: %s", args)
|
||||
}
|
||||
if !strings.Contains(args, "-c:a copy") {
|
||||
t.Fatalf("expected audio stream copy, got: %s", args)
|
||||
}
|
||||
if !strings.Contains(args, "+faststart") {
|
||||
t.Fatalf("expected faststart flag, got: %s", args)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildFFmpegArgsTranscodesIncompatibleCodecs(t *testing.T) {
|
||||
info := MediaInfo{FormatName: "avi", VideoCodecs: []string{"mpeg4"}, AudioCodecs: []string{"wmav2"}}
|
||||
args := strings.Join(buildFFmpegArgs(info, "in.avi", "out.mp4"), " ")
|
||||
if !strings.Contains(args, "-c:v libx264") {
|
||||
t.Fatalf("expected libx264 video encode, got: %s", args)
|
||||
}
|
||||
if !strings.Contains(args, "-c:a aac") {
|
||||
t.Fatalf("expected aac audio encode, got: %s", args)
|
||||
}
|
||||
if !strings.Contains(args, "yuv420p") {
|
||||
t.Fatalf("expected yuv420p pixel format, got: %s", args)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildFFmpegArgsDropsAudioWhenNoAudioStream(t *testing.T) {
|
||||
info := MediaInfo{FormatName: "avi", VideoCodecs: []string{"mpeg4"}}
|
||||
args := strings.Join(buildFFmpegArgs(info, "in.avi", "out.mp4"), " ")
|
||||
if !strings.Contains(args, "-an") {
|
||||
t.Fatalf("expected -an for video without audio, got: %s", args)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTranscodedName(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
fileName, title, id, want string
|
||||
}{
|
||||
{"www.98T.la@167.avi", "www.98T.la@167", "p115-1", "www.98T.la@167.mp4"},
|
||||
{"", "标题", "p115-2", "标题.mp4"},
|
||||
{"a/b\\c.wmv", "", "p115-3", "a_b_c.mp4"},
|
||||
} {
|
||||
v := &catalog.Video{FileName: tc.fileName, Title: tc.title, ID: tc.id}
|
||||
if got := transcodedName(v); got != tc.want {
|
||||
t.Fatalf("transcodedName(%q,%q,%q) = %q, want %q", tc.fileName, tc.title, tc.id, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,308 @@
|
||||
package transcode
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives"
|
||||
)
|
||||
|
||||
// DefaultTargetDirName 是转码产物在网盘上的存放目录(相对根目录)。
|
||||
// worker 第一次上传前会 EnsureDir 并把该目录加进 drive 的扫描跳过列表,
|
||||
// 避免 scanner 把转码产物当成新视频重复入库。
|
||||
const DefaultTargetDirName = "91转码"
|
||||
|
||||
type Config struct {
|
||||
FFmpegPath string
|
||||
FFprobePath string
|
||||
// WorkDir 是下载原始文件 / 写转码产物的本地临时目录。
|
||||
WorkDir string
|
||||
// TargetDirName 为空时用 DefaultTargetDirName。
|
||||
TargetDirName string
|
||||
}
|
||||
|
||||
// TaskStatus 与 preview/fingerprint worker 的状态结构对齐,供 admin 展示。
|
||||
type TaskStatus struct {
|
||||
State string
|
||||
CurrentTitle string
|
||||
QueueLength int
|
||||
DoneCount int
|
||||
TotalCount int
|
||||
}
|
||||
|
||||
// Worker 串行处理一个 drive 的转码任务。生命周期与一次"开始转码"对应:
|
||||
// Run 处理完整个候选列表(或 ctx 被取消)后即结束,不常驻。
|
||||
type Worker struct {
|
||||
cfg Config
|
||||
cat *catalog.Catalog
|
||||
drv drives.Drive
|
||||
hc *http.Client
|
||||
|
||||
mu sync.Mutex
|
||||
state string
|
||||
currentTitle string
|
||||
done int
|
||||
total int
|
||||
|
||||
targetDirOnce sync.Once
|
||||
targetDirID string
|
||||
targetDirErr error
|
||||
}
|
||||
|
||||
func NewWorker(cfg Config, cat *catalog.Catalog, drv drives.Drive) *Worker {
|
||||
if cfg.FFmpegPath == "" {
|
||||
cfg.FFmpegPath = "ffmpeg"
|
||||
}
|
||||
if cfg.FFprobePath == "" {
|
||||
cfg.FFprobePath = "ffprobe"
|
||||
}
|
||||
if cfg.TargetDirName == "" {
|
||||
cfg.TargetDirName = DefaultTargetDirName
|
||||
}
|
||||
if cfg.WorkDir == "" {
|
||||
cfg.WorkDir = os.TempDir()
|
||||
}
|
||||
return &Worker{
|
||||
cfg: cfg,
|
||||
cat: cat,
|
||||
drv: drv,
|
||||
hc: &http.Client{Timeout: 0},
|
||||
state: "idle",
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Worker) Status() TaskStatus {
|
||||
w.mu.Lock()
|
||||
defer w.mu.Unlock()
|
||||
queueLen := w.total - w.done
|
||||
if w.state == "generating" && queueLen > 0 {
|
||||
// 正在处理的那条不算"排队中"
|
||||
queueLen--
|
||||
}
|
||||
if queueLen < 0 {
|
||||
queueLen = 0
|
||||
}
|
||||
return TaskStatus{
|
||||
State: w.state,
|
||||
CurrentTitle: w.currentTitle,
|
||||
QueueLength: queueLen,
|
||||
DoneCount: w.done,
|
||||
TotalCount: w.total,
|
||||
}
|
||||
}
|
||||
|
||||
// Run 串行转码整个候选列表。ctx 取消时停在当前条目边界(正在跑的 ffmpeg
|
||||
// 会被 CommandContext 杀掉),未处理的候选保持原状态,下次开始时继续。
|
||||
func (w *Worker) Run(ctx context.Context, videos []*catalog.Video) {
|
||||
w.mu.Lock()
|
||||
w.state = "generating"
|
||||
w.total = len(videos)
|
||||
w.done = 0
|
||||
w.mu.Unlock()
|
||||
|
||||
defer func() {
|
||||
w.mu.Lock()
|
||||
w.state = "idle"
|
||||
w.currentTitle = ""
|
||||
w.mu.Unlock()
|
||||
}()
|
||||
|
||||
for _, v := range videos {
|
||||
if ctx.Err() != nil {
|
||||
log.Printf("[transcode] drive=%s canceled after %d/%d", w.drv.ID(), w.doneCount(), len(videos))
|
||||
return
|
||||
}
|
||||
w.mu.Lock()
|
||||
w.currentTitle = v.Title
|
||||
w.mu.Unlock()
|
||||
|
||||
if err := w.process(ctx, v); err != nil {
|
||||
if ctx.Err() != nil {
|
||||
// 取消导致的失败不要写 failed,保持候选状态便于下次继续
|
||||
log.Printf("[transcode] drive=%s canceled while processing %s", w.drv.ID(), v.ID)
|
||||
return
|
||||
}
|
||||
log.Printf("[transcode] drive=%s video=%s failed: %v", w.drv.ID(), v.ID, err)
|
||||
if uerr := w.cat.UpdateVideoTranscode(context.WithoutCancel(ctx), v.ID, "failed", err.Error(), "", 0); uerr != nil {
|
||||
log.Printf("[transcode] mark failed %s: %v", v.ID, uerr)
|
||||
}
|
||||
}
|
||||
w.mu.Lock()
|
||||
w.done++
|
||||
w.mu.Unlock()
|
||||
}
|
||||
log.Printf("[transcode] drive=%s finished %d videos", w.drv.ID(), len(videos))
|
||||
}
|
||||
|
||||
func (w *Worker) doneCount() int {
|
||||
w.mu.Lock()
|
||||
defer w.mu.Unlock()
|
||||
return w.done
|
||||
}
|
||||
|
||||
func (w *Worker) process(ctx context.Context, v *catalog.Video) error {
|
||||
localPath, cleanup, err := w.fetchSource(ctx, v)
|
||||
if err != nil {
|
||||
return fmt.Errorf("fetch source: %w", err)
|
||||
}
|
||||
defer cleanup()
|
||||
|
||||
info, err := ProbeFile(ctx, w.cfg.FFprobePath, localPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !NeedsTranscode(info, v.Ext) {
|
||||
log.Printf("[transcode] drive=%s video=%s compatible (%s), skip", w.drv.ID(), v.ID, info.FormatName)
|
||||
return w.cat.UpdateVideoTranscode(ctx, v.ID, "skipped", "", "", 0)
|
||||
}
|
||||
|
||||
outPath := filepath.Join(w.cfg.WorkDir, sanitizeFileName(v.ID)+".transcoding.mp4")
|
||||
defer os.Remove(outPath)
|
||||
if err := TranscodeFile(ctx, w.cfg.FFmpegPath, info, localPath, outPath); err != nil {
|
||||
return err
|
||||
}
|
||||
stat, err := os.Stat(outPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("stat transcoded output: %w", err)
|
||||
}
|
||||
|
||||
dirID, err := w.ensureTargetDir(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("ensure target dir: %w", err)
|
||||
}
|
||||
f, err := os.Open(outPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
fileID, err := w.drv.Upload(ctx, dirID, transcodedName(v), f, stat.Size())
|
||||
if err != nil {
|
||||
return fmt.Errorf("upload transcoded file: %w", err)
|
||||
}
|
||||
log.Printf("[transcode] drive=%s video=%s ready: file=%s size=%d", w.drv.ID(), v.ID, fileID, stat.Size())
|
||||
return w.cat.UpdateVideoTranscode(ctx, v.ID, "ready", "", fileID, stat.Size())
|
||||
}
|
||||
|
||||
// fetchSource 把原始文件准备成本地路径。本地存储直接复用源路径(cleanup
|
||||
// 不删除源文件);云盘则整文件下载到 WorkDir。
|
||||
func (w *Worker) fetchSource(ctx context.Context, v *catalog.Video) (string, func(), error) {
|
||||
link, err := w.drv.StreamURL(ctx, v.FileID)
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
u, err := url.Parse(link.URL)
|
||||
if isLocal := err == nil && u.Scheme != "http" && u.Scheme != "https"; isLocal {
|
||||
path := link.URL
|
||||
if err == nil && u.Scheme == "file" {
|
||||
path = u.Path
|
||||
}
|
||||
return path, func() {}, nil
|
||||
}
|
||||
|
||||
tmpPath := filepath.Join(w.cfg.WorkDir, sanitizeFileName(v.ID)+".src.tmp")
|
||||
cleanup := func() { os.Remove(tmpPath) }
|
||||
if err := w.downloadTo(ctx, link, tmpPath); err != nil {
|
||||
cleanup()
|
||||
return "", nil, err
|
||||
}
|
||||
return tmpPath, cleanup, nil
|
||||
}
|
||||
|
||||
func (w *Worker) downloadTo(ctx context.Context, link *drives.StreamLink, dst string) error {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, link.URL, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for k, vals := range link.Headers {
|
||||
for _, val := range vals {
|
||||
req.Header.Add(k, val)
|
||||
}
|
||||
}
|
||||
res, err := w.hc.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode < 200 || res.StatusCode >= 300 {
|
||||
return fmt.Errorf("download source: HTTP %d", res.StatusCode)
|
||||
}
|
||||
f, err := os.Create(dst)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
if _, err := io.Copy(f, res.Body); err != nil {
|
||||
return fmt.Errorf("download source: %w", err)
|
||||
}
|
||||
return f.Sync()
|
||||
}
|
||||
|
||||
// ensureTargetDir 确保网盘上的转码产物目录存在,并把它写进 drive 的扫描
|
||||
// 跳过列表(幂等),避免 scanner 把产物再当新视频收进库。
|
||||
func (w *Worker) ensureTargetDir(ctx context.Context) (string, error) {
|
||||
w.targetDirOnce.Do(func() {
|
||||
dirID, err := w.drv.EnsureDir(ctx, w.cfg.TargetDirName)
|
||||
if err != nil {
|
||||
w.targetDirErr = err
|
||||
return
|
||||
}
|
||||
w.targetDirID = dirID
|
||||
if err := w.addDirToSkipList(ctx, dirID); err != nil {
|
||||
// 跳过列表更新失败不阻塞转码,只记日志(最坏情况是 scanner
|
||||
// 之后把产物扫成新视频,可手动加跳过目录修复)。
|
||||
log.Printf("[transcode] drive=%s add skip dir %s: %v", w.drv.ID(), dirID, err)
|
||||
}
|
||||
})
|
||||
return w.targetDirID, w.targetDirErr
|
||||
}
|
||||
|
||||
func (w *Worker) addDirToSkipList(ctx context.Context, dirID string) error {
|
||||
d, err := w.cat.GetDrive(ctx, w.drv.ID())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, existing := range d.SkipDirIDs {
|
||||
if existing == dirID {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return w.cat.SetDriveSkipDirIDs(ctx, w.drv.ID(), append(d.SkipDirIDs, dirID))
|
||||
}
|
||||
|
||||
// transcodedName 生成产物文件名:原文件名去掉扩展名 + .mp4。
|
||||
func transcodedName(v *catalog.Video) string {
|
||||
base := strings.TrimSpace(v.FileName)
|
||||
if base == "" {
|
||||
base = v.Title
|
||||
}
|
||||
if base == "" {
|
||||
base = v.ID
|
||||
}
|
||||
if ext := filepath.Ext(base); ext != "" {
|
||||
base = strings.TrimSuffix(base, ext)
|
||||
}
|
||||
return sanitizeFileName(base) + ".mp4"
|
||||
}
|
||||
|
||||
// sanitizeFileName 把路径分隔符等危险字符替换掉,避免拼出意外路径。
|
||||
func sanitizeFileName(name string) string {
|
||||
replacer := strings.NewReplacer(
|
||||
"/", "_", "\\", "_", ":", "_", "*", "_", "?", "_",
|
||||
"\"", "_", "<", "_", ">", "_", "|", "_", "\x00", "_",
|
||||
)
|
||||
out := strings.TrimSpace(replacer.Replace(name))
|
||||
if out == "" {
|
||||
out = fmt.Sprintf("transcoded-%d", time.Now().UnixMilli())
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -3,8 +3,15 @@
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="referrer" content="no-referrer" />
|
||||
<link rel="icon" type="image/svg+xml" href="/favicon.svg" />
|
||||
<link rel="icon" type="image/png" href="/icon.png" />
|
||||
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png" />
|
||||
<link rel="manifest" href="/manifest.webmanifest" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, viewport-fit=cover" />
|
||||
<meta name="mobile-web-app-capable" content="yes" />
|
||||
<meta name="apple-mobile-web-app-capable" content="yes" />
|
||||
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
|
||||
<meta name="apple-mobile-web-app-title" content="91" />
|
||||
<meta name="theme-color" content="#000000" />
|
||||
<meta name="description" content="91 视频站" />
|
||||
<title>91</title>
|
||||
<!-- Premium Fonts Preconnect & Links -->
|
||||
@@ -19,7 +26,7 @@
|
||||
(function () {
|
||||
try {
|
||||
var t = localStorage.getItem("video-site:theme");
|
||||
if (t === "pink" || t === "dark") {
|
||||
if (t === "pink" || t === "dark" || t === "sky") {
|
||||
document.documentElement.setAttribute("data-theme", t);
|
||||
} else {
|
||||
document.documentElement.setAttribute("data-theme", "dark");
|
||||
|
||||
@@ -194,7 +194,7 @@ backup_install_files() {
|
||||
local backup="$1"
|
||||
mkdir -p "$backup"
|
||||
cp -a "$INSTALL_PATH/server" "$backup/server"
|
||||
for item in dist config.example.yaml 91VideoSpider config.yaml .version; do
|
||||
for item in dist config.example.yaml config.yaml .version; do
|
||||
if [[ -e "$INSTALL_PATH/$item" ]]; then
|
||||
cp -a "$INSTALL_PATH/$item" "$backup/$item"
|
||||
fi
|
||||
@@ -205,7 +205,7 @@ restore_install_files() {
|
||||
local backup="$1"
|
||||
mkdir -p "$INSTALL_PATH"
|
||||
cp -a "$backup/server" "$INSTALL_PATH/server"
|
||||
for item in dist config.example.yaml 91VideoSpider config.yaml .version; do
|
||||
for item in dist config.example.yaml config.yaml .version; do
|
||||
rm -rf "${INSTALL_PATH:?}/$item"
|
||||
if [[ -e "$backup/$item" ]]; then
|
||||
cp -a "$backup/$item" "$INSTALL_PATH/$item"
|
||||
@@ -441,7 +441,6 @@ process_looks_like_app() {
|
||||
[[ "$cmd" == *"VIDEO_FRONTEND_DIR=$INSTALL_PATH/dist"* ]] && return 0
|
||||
[[ "$cmd" == *"VIDEO_CONFIG=$INSTALL_PATH/config.yaml"* ]] && return 0
|
||||
[[ "$cmd" == *"video-site-91"* ]] && return 0
|
||||
[[ "$cmd" == *"91VideoSpider"* ]] && return 0
|
||||
return 1
|
||||
}
|
||||
|
||||
@@ -595,10 +594,6 @@ fetch_and_unpack() {
|
||||
rm -rf "$INSTALL_PATH/dist"
|
||||
cp -R "$root/dist" "$INSTALL_PATH/dist"
|
||||
cp "$root/config.example.yaml" "$INSTALL_PATH/config.example.yaml"
|
||||
if [[ -d "$root/91VideoSpider" ]]; then
|
||||
rm -rf "$INSTALL_PATH/91VideoSpider"
|
||||
cp -R "$root/91VideoSpider" "$INSTALL_PATH/91VideoSpider"
|
||||
fi
|
||||
chmod +x "$INSTALL_PATH/server"
|
||||
rm -rf "$tmp"
|
||||
}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "video-site",
|
||||
"version": "0.1.6",
|
||||
"version": "0.2.2",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "video-site",
|
||||
"version": "0.1.6",
|
||||
"version": "0.2.2",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"artplayer": "^5.4.0",
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
"name": "video-site",
|
||||
"private": true,
|
||||
"license": "MIT",
|
||||
"version": "0.1.6",
|
||||
"version": "0.2.2",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
|
||||
|
After Width: | Height: | Size: 21 KiB |
|
After Width: | Height: | Size: 136 KiB |
|
After Width: | Height: | Size: 114 KiB |
|
After Width: | Height: | Size: 19 KiB |
@@ -1,28 +0,0 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32">
|
||||
<defs>
|
||||
<!-- Background Gradient: Warm Orange to Sakura Pink, representing both themes -->
|
||||
<linearGradient id="bg-grad" x1="0%" y1="0%" x2="100%" y2="100%">
|
||||
<stop offset="0%" stop-color="#FF7E40" />
|
||||
<stop offset="100%" stop-color="#FF4B91" />
|
||||
</linearGradient>
|
||||
|
||||
<!-- Subtle drop shadow for the play button to give it depth -->
|
||||
<filter id="shadow" x="-20%" y="-20%" width="140%" height="140%">
|
||||
<feDropShadow dx="0" dy="1.5" stdDeviation="1" flood-opacity="0.25" />
|
||||
</filter>
|
||||
</defs>
|
||||
|
||||
<!-- Main Squircle Background -->
|
||||
<rect x="2" y="2" width="28" height="28" rx="8" fill="url(#bg-grad)" />
|
||||
|
||||
<!-- Inner border for a premium, glassmorphic feel -->
|
||||
<rect x="3" y="3" width="26" height="26" rx="7" fill="none" stroke="#ffffff" stroke-width="1" opacity="0.2" />
|
||||
|
||||
<!-- Stylized Play Button Icon, perfectly centered with rounded corners and drop shadow -->
|
||||
<path d="M13 10.5 L21.5 16 L13 21.5 Z"
|
||||
fill="#ffffff"
|
||||
stroke="#ffffff"
|
||||
stroke-width="2.5"
|
||||
stroke-linejoin="round"
|
||||
filter="url(#shadow)" />
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 1.1 KiB |
|
After Width: | Height: | Size: 212 KiB |
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"name": "91",
|
||||
"short_name": "91",
|
||||
"start_url": "/",
|
||||
"scope": "/",
|
||||
"display": "standalone",
|
||||
"display_override": ["fullscreen", "standalone"],
|
||||
"background_color": "#000000",
|
||||
"theme_color": "#000000",
|
||||
"icons": [
|
||||
{
|
||||
"src": "/app-icon-192.png",
|
||||
"sizes": "192x192",
|
||||
"type": "image/png",
|
||||
"purpose": "any"
|
||||
},
|
||||
{
|
||||
"src": "/app-icon-512.png",
|
||||
"sizes": "512x512",
|
||||
"type": "image/png",
|
||||
"purpose": "any"
|
||||
},
|
||||
{
|
||||
"src": "/app-icon-maskable-512.png",
|
||||
"sizes": "512x512",
|
||||
"type": "image/png",
|
||||
"purpose": "maskable"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
After Width: | Height: | Size: 864 KiB |
|
After Width: | Height: | Size: 855 KiB |
|
After Width: | Height: | Size: 1.3 MiB |
|
After Width: | Height: | Size: 2.5 MiB |
@@ -1,83 +1,173 @@
|
||||
import { Navigate, Route, Routes } from "react-router-dom";
|
||||
import HomePage from "@/pages/HomePage";
|
||||
import ListingPage from "@/pages/ListingPage";
|
||||
import ShortsPage from "@/pages/ShortsPage";
|
||||
import UploadPage from "@/pages/UploadPage";
|
||||
import VideoDetailPage from "@/pages/VideoDetailPage";
|
||||
import { Suspense, lazy, useEffect, type ReactNode } from "react";
|
||||
import { Navigate, Route, Routes, useLocation } from "react-router-dom";
|
||||
import { SkyStarfield } from "@/components/SkyStarfield";
|
||||
import { AdminLayout } from "@/admin/AdminLayout";
|
||||
import { LoginPage } from "@/admin/LoginPage";
|
||||
import { RequireAuth } from "@/admin/RequireAuth";
|
||||
import { DrivesPage } from "@/admin/DrivesPage";
|
||||
import { CrawlersPage } from "@/admin/CrawlersPage";
|
||||
import { VideosPage } from "@/admin/VideosPage";
|
||||
import { TagsPage } from "@/admin/TagsPage";
|
||||
import { ThemePage } from "@/admin/ThemePage";
|
||||
import { rememberVideoReturnPath, routeToPath } from "@/lib/videoReturnPath";
|
||||
|
||||
const HomePage = lazy(() => import("@/pages/HomePage"));
|
||||
const ListingPage = lazy(() => import("@/pages/ListingPage"));
|
||||
const ShortsPage = lazy(() => import("@/pages/ShortsPage"));
|
||||
const UploadPage = lazy(() => import("@/pages/UploadPage"));
|
||||
const VideoDetailPage = lazy(() => import("@/pages/VideoDetailPage"));
|
||||
|
||||
const LoginPage = lazy(() =>
|
||||
import("@/admin/LoginPage").then((module) => ({ default: module.LoginPage }))
|
||||
);
|
||||
const DrivesPage = lazy(() =>
|
||||
import("@/admin/DrivesPage").then((module) => ({ default: module.DrivesPage }))
|
||||
);
|
||||
const CrawlersPage = lazy(() =>
|
||||
import("@/admin/CrawlersPage").then((module) => ({
|
||||
default: module.CrawlersPage,
|
||||
}))
|
||||
);
|
||||
const VideosPage = lazy(() =>
|
||||
import("@/admin/VideosPage").then((module) => ({ default: module.VideosPage }))
|
||||
);
|
||||
const TagsPage = lazy(() =>
|
||||
import("@/admin/TagsPage").then((module) => ({ default: module.TagsPage }))
|
||||
);
|
||||
const ThemePage = lazy(() =>
|
||||
import("@/admin/ThemePage").then((module) => ({ default: module.ThemePage }))
|
||||
);
|
||||
|
||||
function PageSuspense({ children }: { children: ReactNode }) {
|
||||
return <Suspense fallback={null}>{children}</Suspense>;
|
||||
}
|
||||
|
||||
function VideoReturnPathRecorder() {
|
||||
const location = useLocation();
|
||||
|
||||
useEffect(() => {
|
||||
rememberVideoReturnPath(routeToPath(location));
|
||||
}, [location.pathname, location.search, location.hash]);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export default function App() {
|
||||
return (
|
||||
<Routes>
|
||||
<Route path="/login" element={<LoginPage />} />
|
||||
<>
|
||||
{/* 星空蓝主题的固定位置星星层,仅在 data-theme="sky" 下可见 */}
|
||||
<SkyStarfield />
|
||||
<VideoReturnPathRecorder />
|
||||
<Routes>
|
||||
<Route
|
||||
path="/login"
|
||||
element={
|
||||
<PageSuspense>
|
||||
<LoginPage />
|
||||
</PageSuspense>
|
||||
}
|
||||
/>
|
||||
|
||||
{/* 主站需要登录 */}
|
||||
<Route
|
||||
path="/"
|
||||
element={
|
||||
<RequireAuth>
|
||||
<HomePage />
|
||||
</RequireAuth>
|
||||
}
|
||||
/>
|
||||
<Route
|
||||
path="/list"
|
||||
element={
|
||||
<RequireAuth>
|
||||
<ListingPage />
|
||||
</RequireAuth>
|
||||
}
|
||||
/>
|
||||
<Route
|
||||
path="/shorts"
|
||||
element={
|
||||
<RequireAuth>
|
||||
<ShortsPage />
|
||||
</RequireAuth>
|
||||
}
|
||||
/>
|
||||
<Route
|
||||
path="/upload"
|
||||
element={
|
||||
<RequireAuth>
|
||||
<UploadPage />
|
||||
</RequireAuth>
|
||||
}
|
||||
/>
|
||||
<Route
|
||||
path="/video/:id"
|
||||
element={
|
||||
<RequireAuth>
|
||||
<VideoDetailPage />
|
||||
</RequireAuth>
|
||||
}
|
||||
/>
|
||||
{/* 主站需要登录 */}
|
||||
<Route
|
||||
path="/"
|
||||
element={
|
||||
<RequireAuth>
|
||||
<PageSuspense>
|
||||
<HomePage />
|
||||
</PageSuspense>
|
||||
</RequireAuth>
|
||||
}
|
||||
/>
|
||||
<Route
|
||||
path="/list"
|
||||
element={
|
||||
<RequireAuth>
|
||||
<PageSuspense>
|
||||
<ListingPage />
|
||||
</PageSuspense>
|
||||
</RequireAuth>
|
||||
}
|
||||
/>
|
||||
<Route
|
||||
path="/shorts"
|
||||
element={
|
||||
<RequireAuth>
|
||||
<PageSuspense>
|
||||
<ShortsPage />
|
||||
</PageSuspense>
|
||||
</RequireAuth>
|
||||
}
|
||||
/>
|
||||
<Route
|
||||
path="/upload"
|
||||
element={
|
||||
<RequireAuth>
|
||||
<PageSuspense>
|
||||
<UploadPage />
|
||||
</PageSuspense>
|
||||
</RequireAuth>
|
||||
}
|
||||
/>
|
||||
<Route
|
||||
path="/video/:id"
|
||||
element={
|
||||
<RequireAuth>
|
||||
<PageSuspense>
|
||||
<VideoDetailPage />
|
||||
</PageSuspense>
|
||||
</RequireAuth>
|
||||
}
|
||||
/>
|
||||
|
||||
{/* 管理后台也需要登录 */}
|
||||
<Route
|
||||
path="/admin"
|
||||
element={
|
||||
<RequireAuth>
|
||||
<AdminLayout />
|
||||
</RequireAuth>
|
||||
}
|
||||
>
|
||||
<Route index element={<Navigate to="/admin/drives" replace />} />
|
||||
<Route path="drives" element={<DrivesPage />} />
|
||||
<Route path="crawlers" element={<CrawlersPage />} />
|
||||
<Route path="videos" element={<VideosPage />} />
|
||||
<Route path="tags" element={<TagsPage />} />
|
||||
<Route path="theme" element={<ThemePage />} />
|
||||
</Route>
|
||||
{/* 管理后台也需要登录 */}
|
||||
<Route
|
||||
path="/admin"
|
||||
element={
|
||||
<RequireAuth>
|
||||
<AdminLayout />
|
||||
</RequireAuth>
|
||||
}
|
||||
>
|
||||
<Route index element={<Navigate to="/admin/drives" replace />} />
|
||||
<Route
|
||||
path="drives"
|
||||
element={
|
||||
<PageSuspense>
|
||||
<DrivesPage />
|
||||
</PageSuspense>
|
||||
}
|
||||
/>
|
||||
<Route
|
||||
path="crawlers"
|
||||
element={
|
||||
<PageSuspense>
|
||||
<CrawlersPage />
|
||||
</PageSuspense>
|
||||
}
|
||||
/>
|
||||
<Route
|
||||
path="videos"
|
||||
element={
|
||||
<PageSuspense>
|
||||
<VideosPage />
|
||||
</PageSuspense>
|
||||
}
|
||||
/>
|
||||
<Route
|
||||
path="tags"
|
||||
element={
|
||||
<PageSuspense>
|
||||
<TagsPage />
|
||||
</PageSuspense>
|
||||
}
|
||||
/>
|
||||
<Route
|
||||
path="theme"
|
||||
element={
|
||||
<PageSuspense>
|
||||
<ThemePage />
|
||||
</PageSuspense>
|
||||
}
|
||||
/>
|
||||
</Route>
|
||||
|
||||
<Route path="*" element={<Navigate to="/" replace />} />
|
||||
</Routes>
|
||||
<Route path="*" element={<Navigate to="/" replace />} />
|
||||
</Routes>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@ import {
|
||||
HardDrive,
|
||||
Film,
|
||||
LogOut,
|
||||
Play,
|
||||
Home,
|
||||
Tags,
|
||||
Palette,
|
||||
@@ -71,12 +70,6 @@ export function AdminLayout() {
|
||||
return (
|
||||
<div className="admin-shell">
|
||||
<aside className="admin-sidebar">
|
||||
<div className="admin-sidebar__brand">
|
||||
<span className="admin-sidebar__brand-mark">
|
||||
<Play size={14} fill="#000" />
|
||||
</span>
|
||||
<span className="admin-sidebar__brand-text">91后台</span>
|
||||
</div>
|
||||
<nav className="admin-nav">
|
||||
<div className="admin-nav__group admin-nav__group--home">
|
||||
<span className="admin-nav__group-label">主站</span>
|
||||
|
||||
@@ -18,6 +18,8 @@ import {
|
||||
Link as LinkIcon,
|
||||
Pencil,
|
||||
Plus,
|
||||
Power,
|
||||
PowerOff,
|
||||
RefreshCw,
|
||||
TestTube,
|
||||
Trash2,
|
||||
@@ -28,12 +30,12 @@ import { Modal } from "./Modal";
|
||||
import { ConfirmModal } from "./ConfirmModal";
|
||||
import { useToast } from "./ToastContext";
|
||||
import { generationStateClass, generationStateLabel } from "./drive/constants";
|
||||
import { Spider91UploadTargetField } from "./drive/Spider91UploadTargetField";
|
||||
import { CrawlerUploadTargetField } from "./drive/CrawlerUploadTargetField";
|
||||
import { SpiderIcon } from "./icons/SpiderIcon";
|
||||
|
||||
const BUSY_STATES = new Set(["scanning", "generating", "uploading", "queued"]);
|
||||
const POLL_INTERVAL_MS = 5000;
|
||||
const UPLOAD_TARGET_KINDS = new Set(["p115", "pikpak", "p123", "googledrive", "onedrive", "wopan"]);
|
||||
const UPLOAD_TARGET_KINDS = new Set(["p115", "pikpak", "p123", "googledrive", "onedrive", "wopan", "guangyapan"]);
|
||||
|
||||
function statusBusy(status?: api.DriveGenerationStatus) {
|
||||
return BUSY_STATES.has(status?.state ?? "");
|
||||
@@ -55,7 +57,9 @@ export function CrawlersPage() {
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [expandedId, setExpandedId] = useState("");
|
||||
const [runningId, setRunningId] = useState("");
|
||||
const [uploadingId, setUploadingId] = useState("");
|
||||
const [stoppingId, setStoppingId] = useState("");
|
||||
const [togglingTeaserId, setTogglingTeaserId] = useState("");
|
||||
// undefined = 编辑器关闭;null = 新建;其余 = 编辑已有爬虫
|
||||
const [editorTarget, setEditorTarget] = useState<api.AdminCrawler | null | undefined>(undefined);
|
||||
const [deleteTarget, setDeleteTarget] = useState<api.AdminCrawler | null>(null);
|
||||
@@ -123,6 +127,23 @@ export function CrawlersPage() {
|
||||
}
|
||||
}
|
||||
|
||||
async function uploadVideos(crawler: api.AdminCrawler) {
|
||||
setUploadingId(crawler.id);
|
||||
try {
|
||||
const resp = await api.uploadCrawlerVideos(crawler.id);
|
||||
if (!resp.accepted) {
|
||||
show(resp.message || "当前爬虫暂不满足上传条件", "info");
|
||||
return;
|
||||
}
|
||||
show("已触发上传任务", "success");
|
||||
await refresh(true);
|
||||
} catch (e) {
|
||||
show(e instanceof Error ? e.message : "触发上传失败", "error");
|
||||
} finally {
|
||||
setUploadingId("");
|
||||
}
|
||||
}
|
||||
|
||||
async function stop(crawler: api.AdminCrawler) {
|
||||
setStoppingId(crawler.id);
|
||||
try {
|
||||
@@ -136,6 +157,23 @@ export function CrawlersPage() {
|
||||
}
|
||||
}
|
||||
|
||||
async function toggleTeaser(crawler: api.AdminCrawler) {
|
||||
const next = !crawler.teaserEnabled;
|
||||
setTogglingTeaserId(crawler.id);
|
||||
setList((prev) => prev.map((item) => (item.id === crawler.id ? { ...item, teaserEnabled: next } : item)));
|
||||
try {
|
||||
const resp = await api.setDriveTeaserEnabled(crawler.id, next);
|
||||
setList((prev) => prev.map((item) => (item.id === crawler.id ? { ...item, teaserEnabled: resp.teaserEnabled } : item)));
|
||||
show(resp.teaserEnabled ? `已开启「${crawler.name}」预览视频生成` : `已关闭「${crawler.name}」预览视频生成`, "success");
|
||||
await refresh(true);
|
||||
} catch (e) {
|
||||
setList((prev) => prev.map((item) => (item.id === crawler.id ? { ...item, teaserEnabled: crawler.teaserEnabled } : item)));
|
||||
show(e instanceof Error ? e.message : "切换预览视频失败", "error");
|
||||
} finally {
|
||||
setTogglingTeaserId("");
|
||||
}
|
||||
}
|
||||
|
||||
async function confirmDelete() {
|
||||
if (!deleteTarget) return;
|
||||
setDeleting(true);
|
||||
@@ -213,10 +251,14 @@ export function CrawlersPage() {
|
||||
crawler={crawler}
|
||||
expanded={expandedId === crawler.id}
|
||||
running={runningId === crawler.id}
|
||||
uploading={uploadingId === crawler.id}
|
||||
stopping={stoppingId === crawler.id}
|
||||
togglingTeaser={togglingTeaserId === crawler.id}
|
||||
onToggle={() => setExpandedId(expandedId === crawler.id ? "" : crawler.id)}
|
||||
onRun={() => run(crawler)}
|
||||
onUpload={() => uploadVideos(crawler)}
|
||||
onStop={() => stop(crawler)}
|
||||
onToggleTeaser={() => toggleTeaser(crawler)}
|
||||
onEdit={() => setEditorTarget(crawler)}
|
||||
onDelete={() => setDeleteTarget(crawler)}
|
||||
/>
|
||||
@@ -262,51 +304,37 @@ function CrawlerMetric({ label, value, icon, tone }: { label: string; value: num
|
||||
);
|
||||
}
|
||||
|
||||
type StageInfo = {
|
||||
key: string;
|
||||
label: string;
|
||||
status?: api.DriveGenerationStatus;
|
||||
};
|
||||
|
||||
function crawlerStages(crawler: api.AdminCrawler): StageInfo[] {
|
||||
return [
|
||||
{ key: "scan", label: "抓取", status: crawler.scanGenerationStatus },
|
||||
{ key: "thumbnail", label: "封面", status: crawler.thumbnailGenerationStatus },
|
||||
{ key: "preview", label: "预览", status: crawler.previewGenerationStatus },
|
||||
{ key: "fingerprint", label: "指纹", status: crawler.fingerprintGenerationStatus },
|
||||
{ key: "upload", label: "上传", status: crawler.uploadGenerationStatus },
|
||||
];
|
||||
}
|
||||
|
||||
function stageStateLabel(stage: StageInfo): string {
|
||||
const state = stage.status?.state || "idle";
|
||||
if (stage.key === "scan" && state === "scanning") return "抓取中";
|
||||
if (stage.key === "upload" && state === "uploading") return "上传中";
|
||||
return generationStateLabel(state);
|
||||
}
|
||||
|
||||
function CrawlerRow({
|
||||
crawler,
|
||||
expanded,
|
||||
running,
|
||||
uploading,
|
||||
stopping,
|
||||
togglingTeaser,
|
||||
onToggle,
|
||||
onRun,
|
||||
onUpload,
|
||||
onStop,
|
||||
onToggleTeaser,
|
||||
onEdit,
|
||||
onDelete,
|
||||
}: {
|
||||
crawler: api.AdminCrawler;
|
||||
expanded: boolean;
|
||||
running: boolean;
|
||||
uploading: boolean;
|
||||
stopping: boolean;
|
||||
togglingTeaser: boolean;
|
||||
onToggle: () => void;
|
||||
onRun: () => void;
|
||||
onUpload: () => void;
|
||||
onStop: () => void;
|
||||
onToggleTeaser: () => void;
|
||||
onEdit: () => void;
|
||||
onDelete: () => void;
|
||||
}) {
|
||||
const busy = crawlerBusy(crawler);
|
||||
const uploadButtonTitle = uploading ? "上传请求处理中" : "上传本地爬虫视频到已配置的上传网盘";
|
||||
return (
|
||||
<div className={`admin-crawler-row ${expanded ? "is-expanded" : ""}`}>
|
||||
<div className="admin-crawler-row__line">
|
||||
@@ -320,29 +348,20 @@ function CrawlerRow({
|
||||
上次抓取 {formatLastCrawl(crawler.lastCrawlAt)} · 每次新增 {crawler.targetNew || "10"} 条 · 累计爬取 {crawler.totalCrawledCount ?? 0} 条
|
||||
</span>
|
||||
</span>
|
||||
<span className="admin-crawler-pipeline">
|
||||
{crawlerStages(crawler).map((stage) => {
|
||||
const state = stage.status?.state || "idle";
|
||||
const active = BUSY_STATES.has(state) || state === "cooling";
|
||||
return (
|
||||
<span
|
||||
key={stage.key}
|
||||
className={`admin-crawler-stage is-${generationStateClass(state)}`}
|
||||
title={`${stage.label}:${stageStateLabel(stage)}`}
|
||||
>
|
||||
<span className="admin-crawler-stage__dot" />
|
||||
{stage.label}
|
||||
{active && <em>{stageStateLabel(stage)}</em>}
|
||||
</span>
|
||||
);
|
||||
})}
|
||||
</span>
|
||||
<span className={`admin-status is-${crawler.status === "ok" ? "ok" : crawler.status === "error" ? "error" : "pending"}`}>
|
||||
{crawlerStatusLabel(crawler)}
|
||||
</span>
|
||||
<ChevronDown size={16} className="admin-crawler-row__chevron" />
|
||||
</button>
|
||||
<div className="admin-crawler-row__actions">
|
||||
<button
|
||||
className="admin-btn admin-crawler-preview-card-toggle"
|
||||
type="button"
|
||||
onClick={onToggleTeaser}
|
||||
disabled={togglingTeaser}
|
||||
aria-pressed={crawler.teaserEnabled}
|
||||
title={crawler.teaserEnabled ? "关闭后,该爬虫新爬取的视频不再生成预览视频" : "开启后,该爬虫新爬取的视频会生成预览视频"}
|
||||
>
|
||||
{crawler.teaserEnabled ? <Power size={13} /> : <PowerOff size={13} />}
|
||||
<span>{crawler.teaserEnabled ? "预览:开" : "预览:关"}</span>
|
||||
</button>
|
||||
{busy ? (
|
||||
<button className="admin-btn is-stop" type="button" onClick={onStop} disabled={stopping}>
|
||||
<CircleStop size={13} /> {stopping ? "停止中..." : "停止"}
|
||||
@@ -352,6 +371,14 @@ function CrawlerRow({
|
||||
<Download size={13} /> {running ? "触发中..." : "立即抓取"}
|
||||
</button>
|
||||
)}
|
||||
<button
|
||||
className="admin-btn"
|
||||
type="button"
|
||||
onClick={onUpload}
|
||||
title={uploadButtonTitle}
|
||||
>
|
||||
<Upload size={13} /> {uploading ? "上传中..." : "上传视频"}
|
||||
</button>
|
||||
<button className="admin-btn" type="button" onClick={onEdit}>
|
||||
<Pencil size={13} /> 编辑
|
||||
</button>
|
||||
@@ -943,7 +970,7 @@ function CrawlerEditorModal({
|
||||
placeholder="http://127.0.0.1:7890"
|
||||
/>
|
||||
</div>
|
||||
<Spider91UploadTargetField
|
||||
<CrawlerUploadTargetField
|
||||
value={form.uploadDriveId}
|
||||
onChange={(value) => set("uploadDriveId", value)}
|
||||
uploadTargets={uploadTargets}
|
||||
@@ -1038,12 +1065,6 @@ function crawlerTestFailure(result: api.CrawlerDryRunResult) {
|
||||
return result.error || result.mediaCheck?.error || "";
|
||||
}
|
||||
|
||||
function crawlerStatusLabel(crawler: api.AdminCrawler) {
|
||||
if (crawler.status === "ok") return "已就绪";
|
||||
if (crawler.status === "error") return "错误";
|
||||
return "未连接";
|
||||
}
|
||||
|
||||
function formatLastCrawl(ts?: number) {
|
||||
if (!ts) return "从未";
|
||||
return new Date(ts * 1000).toLocaleString("zh-CN", {
|
||||
|
||||
@@ -4,7 +4,6 @@ import {
|
||||
ArrowLeft,
|
||||
ChevronRight,
|
||||
CircleStop,
|
||||
Download,
|
||||
FolderTree,
|
||||
HardDrive,
|
||||
PlayCircle,
|
||||
@@ -48,6 +47,7 @@ function isDriveBusy(d: api.AdminDrive) {
|
||||
d.thumbnailGenerationStatus,
|
||||
d.previewGenerationStatus,
|
||||
d.fingerprintGenerationStatus,
|
||||
d.transcodeGenerationStatus,
|
||||
].some((status) => {
|
||||
const state = status?.state || "idle";
|
||||
return state !== "idle";
|
||||
@@ -57,7 +57,6 @@ function isDriveBusy(d: api.AdminDrive) {
|
||||
export function DrivesPage() {
|
||||
const [list, setList] = useState<api.AdminDrive[]>([]);
|
||||
const [storage, setStorage] = useState<api.AdminDriveStorage | null>(null);
|
||||
const [settings, setSettings] = useState<api.Settings | null>(null);
|
||||
const [nightlyStatus, setNightlyStatus] =
|
||||
useState<api.NightlyJobStatus>(idleNightlyStatus);
|
||||
const [loading, setLoading] = useState(true);
|
||||
@@ -74,6 +73,7 @@ export function DrivesPage() {
|
||||
const [regenFailedThumbId, setRegenFailedThumbId] = useState("");
|
||||
const [regenFailedFingerprintId, setRegenFailedFingerprintId] = useState("");
|
||||
const [togglingTeaserId, setTogglingTeaserId] = useState("");
|
||||
const [togglingTranscodeId, setTogglingTranscodeId] = useState("");
|
||||
const [scanningAll, setScanningAll] = useState(false);
|
||||
const [stoppingAll, setStoppingAll] = useState(false);
|
||||
const [trackingNightly, setTrackingNightly] = useState(false);
|
||||
@@ -89,21 +89,7 @@ export function DrivesPage() {
|
||||
const nameError = nameTouched && nameMissing ? "请填写网盘名称" : "";
|
||||
const formDirty = form.id
|
||||
? !sameForm(form, initialForm)
|
||||
: hasCreateFormChanges(form, initialForm);
|
||||
|
||||
const uploadTargets = useMemo(
|
||||
() =>
|
||||
list.filter(
|
||||
(d) =>
|
||||
d.kind === "pikpak" ||
|
||||
d.kind === "p115" ||
|
||||
d.kind === "p123" ||
|
||||
d.kind === "onedrive" ||
|
||||
d.kind === "googledrive" ||
|
||||
d.kind === "wopan"
|
||||
),
|
||||
[list]
|
||||
);
|
||||
: hasCreateFormChanges(form);
|
||||
|
||||
function openDriveDetail(id: string) {
|
||||
setSearchParams((prev) => {
|
||||
@@ -125,15 +111,13 @@ export function DrivesPage() {
|
||||
setLoading(true);
|
||||
setLoadError("");
|
||||
try {
|
||||
const [data, storageData, settingsData, jobStatus] = await Promise.all([
|
||||
const [data, storageData, jobStatus] = await Promise.all([
|
||||
api.listDrives(),
|
||||
api.getDriveStorage(),
|
||||
api.getSettings().catch(() => null),
|
||||
api.getNightlyJobStatus().catch(() => null),
|
||||
]);
|
||||
setList(data ?? []);
|
||||
setStorage(storageData);
|
||||
if (settingsData) setSettings(settingsData);
|
||||
if (jobStatus) setNightlyStatus(jobStatus);
|
||||
} catch (e) {
|
||||
const message = e instanceof Error ? e.message : "加载失败";
|
||||
@@ -194,10 +178,7 @@ export function DrivesPage() {
|
||||
}, [trackingNightly]);
|
||||
|
||||
function openCreate() {
|
||||
const nextForm = {
|
||||
...emptyForm,
|
||||
spider91UploadDriveId: settings?.spider91UploadDriveId ?? "",
|
||||
};
|
||||
const nextForm = { ...emptyForm };
|
||||
setForm(nextForm);
|
||||
setInitialForm(nextForm);
|
||||
setNameTouched(false);
|
||||
@@ -211,12 +192,14 @@ export function DrivesPage() {
|
||||
name: d.name,
|
||||
rootId: d.rootId,
|
||||
creds:
|
||||
d.kind === "spider91"
|
||||
? { proxy: d.spider91Proxy ?? "" }
|
||||
: d.kind === "googledrive"
|
||||
? { use_online_api: (d.googleDriveUseOnlineAPI ?? true) ? "true" : "false" }
|
||||
d.kind === "googledrive"
|
||||
? {
|
||||
use_online_api: (d.googleDriveUseOnlineAPI ?? true) ? "true" : "false",
|
||||
api_url_address: d.googleDriveOpenListApiUrl ?? "",
|
||||
}
|
||||
: d.kind === "localstorage"
|
||||
? { strm_allow_outside_root: (d.strmAllowOutsideRoot ?? false) ? "true" : "false" }
|
||||
: {},
|
||||
spider91UploadDriveId: settings?.spider91UploadDriveId ?? "",
|
||||
};
|
||||
setForm(nextForm);
|
||||
setInitialForm(nextForm);
|
||||
@@ -242,7 +225,7 @@ export function DrivesPage() {
|
||||
|
||||
function handleCreateFormChange(nextForm: FormState) {
|
||||
setForm(nextForm);
|
||||
if (!nextForm.id && !hasCreateFormChanges(nextForm, initialForm)) {
|
||||
if (!nextForm.id && !hasCreateFormChanges(nextForm)) {
|
||||
setInitialForm(nextForm);
|
||||
}
|
||||
}
|
||||
@@ -271,26 +254,6 @@ export function DrivesPage() {
|
||||
credentials: form.creds,
|
||||
});
|
||||
|
||||
if (form.kind === "spider91" && form.spider91UploadDriveId !== (settings?.spider91UploadDriveId ?? "")) {
|
||||
try {
|
||||
const updated = await api.updateSettings({
|
||||
spider91UploadDriveId: form.spider91UploadDriveId,
|
||||
});
|
||||
setSettings(updated);
|
||||
} catch (settingsErr) {
|
||||
show(
|
||||
settingsErr instanceof Error
|
||||
? `Drive 已保存,但上传目标设置失败:${settingsErr.message}`
|
||||
: "上传目标设置失败",
|
||||
"error"
|
||||
);
|
||||
setModalOpen(false);
|
||||
setInitialForm(form);
|
||||
refresh();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (resp.warning) {
|
||||
show(`已保存,但 driver 初始化失败:${resp.warning}`, "error");
|
||||
} else {
|
||||
@@ -326,10 +289,6 @@ export function DrivesPage() {
|
||||
}
|
||||
|
||||
async function handleRescan(d: api.AdminDrive) {
|
||||
if (d.kind === "spider91") {
|
||||
show("91Spider 不再支持通过网盘运行,请到爬虫管理添加爬虫脚本", "info");
|
||||
return;
|
||||
}
|
||||
if (nightlyBusy) {
|
||||
show(nightlyBusyText(nightlyStatus) || NIGHTLY_BUSY_MESSAGE, "info");
|
||||
return;
|
||||
@@ -499,6 +458,41 @@ export function DrivesPage() {
|
||||
}
|
||||
}
|
||||
|
||||
async function handleStartTranscode(d: api.AdminDrive) {
|
||||
setTogglingTranscodeId(d.id);
|
||||
try {
|
||||
const resp = await api.startDriveTranscode(d.id);
|
||||
if (resp.accepted) {
|
||||
show(`已开始「${d.name || d.id}」的视频转码`, "success");
|
||||
} else {
|
||||
show(resp.message || "转码任务未能开启", "info");
|
||||
}
|
||||
refreshDriveList();
|
||||
} catch (e) {
|
||||
show(e instanceof Error ? e.message : "开启失败", "error");
|
||||
} finally {
|
||||
setTogglingTranscodeId("");
|
||||
}
|
||||
}
|
||||
|
||||
async function handleStopTranscode(d: api.AdminDrive) {
|
||||
setTogglingTranscodeId(d.id);
|
||||
try {
|
||||
const resp = await api.stopDriveTranscode(d.id);
|
||||
show(
|
||||
resp.stopped
|
||||
? `已停止「${d.name || d.id}」的视频转码`
|
||||
: `「${d.name || d.id}」没有正在运行的转码任务`,
|
||||
"success"
|
||||
);
|
||||
refreshDriveList();
|
||||
} catch (e) {
|
||||
show(e instanceof Error ? e.message : "停止失败", "error");
|
||||
} finally {
|
||||
setTogglingTranscodeId("");
|
||||
}
|
||||
}
|
||||
|
||||
const selectedDrive = useMemo(() => {
|
||||
return selectedDriveId ? list.find((d) => d.id === selectedDriveId) : null;
|
||||
}, [selectedDriveId, list]);
|
||||
@@ -524,7 +518,7 @@ export function DrivesPage() {
|
||||
</div>
|
||||
<div className="admin-drive-detail__header-right">
|
||||
<span className="admin-drive-detail__kind-chip">{kindLabel[d.kind] ?? d.kind}</span>
|
||||
<StatusTag kind={d.kind} status={d.status} error={d.lastError} hasCred={d.hasCredential} />
|
||||
<StatusTag status={d.status} error={d.lastError} hasCred={d.hasCredential} />
|
||||
</div>
|
||||
</header>
|
||||
|
||||
@@ -549,12 +543,6 @@ export function DrivesPage() {
|
||||
<span className="admin-detail-value admin-mono-cell">{d.rootId}</span>
|
||||
</div>
|
||||
)}
|
||||
{d.kind === "spider91" && (
|
||||
<div className="admin-detail-row">
|
||||
<span className="admin-detail-label">配置状态</span>
|
||||
<span className="admin-detail-value">已废弃,请到爬虫管理添加</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
{d.lastError && (
|
||||
<div className="admin-detail-error">{d.lastError}</div>
|
||||
@@ -566,33 +554,21 @@ export function DrivesPage() {
|
||||
type="button"
|
||||
className="admin-btn is-primary"
|
||||
onClick={() => handleRescan(d)}
|
||||
disabled={d.kind === "spider91"}
|
||||
aria-disabled={d.kind === "spider91" || nightlyBusy || isDriveBusy(d) || !!scanningDriveIds[d.id]}
|
||||
aria-disabled={nightlyBusy || isDriveBusy(d) || !!scanningDriveIds[d.id]}
|
||||
title={
|
||||
d.kind === "spider91"
|
||||
? "91Spider 不再支持通过网盘运行,请到爬虫管理添加爬虫脚本"
|
||||
: nightlyBusy
|
||||
nightlyBusy
|
||||
? nightlyBusyText(nightlyStatus) || NIGHTLY_BUSY_MESSAGE
|
||||
: isDriveBusy(d) || scanningDriveIds[d.id]
|
||||
? DRIVE_BUSY_MESSAGE
|
||||
: undefined
|
||||
}
|
||||
>
|
||||
{d.kind === "spider91" ? (
|
||||
<>
|
||||
<Download size={13} className={scanningDriveIds[d.id] ? "admin-spin" : undefined} />
|
||||
已废弃
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<RefreshCw size={13} className={scanningDriveIds[d.id] ? "admin-spin" : undefined} />
|
||||
{scanningDriveIds[d.id] ? "触发中..." : "立即重扫"}
|
||||
</>
|
||||
)}
|
||||
<RefreshCw size={13} className={scanningDriveIds[d.id] ? "admin-spin" : undefined} />
|
||||
{scanningDriveIds[d.id] ? "触发中..." : "立即重扫"}
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
className="admin-btn is-stop"
|
||||
className="admin-btn is-primary"
|
||||
onClick={() => handleStopDriveTasks(d)}
|
||||
disabled={!!stoppingDriveId}
|
||||
title="停止此网盘当前的扫描、封面、预览视频和视频指纹生成任务。"
|
||||
@@ -601,30 +577,26 @@ export function DrivesPage() {
|
||||
{stoppingDriveId === d.id ? "停止中..." : "停止所有任务"}
|
||||
</button>
|
||||
</div>
|
||||
{d.kind !== "spider91" && (
|
||||
<button type="button" className="admin-btn" onClick={() => openEdit(d)}>
|
||||
编辑配置凭证
|
||||
</button>
|
||||
)}
|
||||
<button type="button" className="admin-btn is-primary" onClick={() => openEdit(d)}>
|
||||
编辑配置凭证
|
||||
</button>
|
||||
<button type="button" className="admin-btn is-danger admin-detail-actions__danger" onClick={() => setDeleteTarget(d)}>
|
||||
<Trash2 size={13} /> 删除网盘
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{d.kind !== "spider91" && (
|
||||
<SkipDirsPanel
|
||||
drive={d}
|
||||
onSaved={(saved) => {
|
||||
setList((prev) =>
|
||||
prev.map((item) =>
|
||||
item.id === saved.id ? { ...item, skipDirIds: saved.skipDirIds } : item
|
||||
)
|
||||
);
|
||||
refreshDriveList();
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
<SkipDirsPanel
|
||||
drive={d}
|
||||
onSaved={(saved) => {
|
||||
setList((prev) =>
|
||||
prev.map((item) =>
|
||||
item.id === saved.id ? { ...item, skipDirIds: saved.skipDirIds } : item
|
||||
)
|
||||
);
|
||||
refreshDriveList();
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
@@ -634,10 +606,13 @@ export function DrivesPage() {
|
||||
regenFailedThumbId={regenFailedThumbId}
|
||||
regenFailedFingerprintId={regenFailedFingerprintId}
|
||||
togglingTeaserId={togglingTeaserId}
|
||||
togglingTranscodeId={togglingTranscodeId}
|
||||
onToggleTeaser={() => handleToggleTeaser(d)}
|
||||
onRegenFailed={() => handleRegenFailed(d)}
|
||||
onRegenFailedThumbnails={() => handleRegenFailedThumbnails(d)}
|
||||
onRegenFailedFingerprints={() => handleRegenFailedFingerprints(d)}
|
||||
onStartTranscode={() => handleStartTranscode(d)}
|
||||
onStopTranscode={() => handleStopTranscode(d)}
|
||||
/>
|
||||
|
||||
<div className="admin-detail-card">
|
||||
@@ -689,7 +664,6 @@ export function DrivesPage() {
|
||||
form={form}
|
||||
onChange={setForm}
|
||||
isEdit={true}
|
||||
uploadTargets={uploadTargets}
|
||||
nameError={nameError}
|
||||
onNameBlur={() => setNameTouched(true)}
|
||||
/>
|
||||
@@ -787,7 +761,7 @@ export function DrivesPage() {
|
||||
</span>
|
||||
<span>{d.name || d.id}</span>
|
||||
</div>
|
||||
<StatusTag kind={d.kind} status={d.status} error={d.lastError} hasCred={d.hasCredential} />
|
||||
<StatusTag status={d.status} error={d.lastError} hasCred={d.hasCredential} />
|
||||
</div>
|
||||
|
||||
<DriveCardMetrics d={d} />
|
||||
@@ -827,7 +801,6 @@ export function DrivesPage() {
|
||||
form={form}
|
||||
onChange={handleCreateFormChange}
|
||||
isEdit={!!list.find((x) => x.id === form.id)}
|
||||
uploadTargets={uploadTargets}
|
||||
nameError={nameError}
|
||||
onNameBlur={() => setNameTouched(true)}
|
||||
onBack={() => setNameTouched(false)}
|
||||
@@ -864,7 +837,6 @@ function sameForm(a: FormState, b: FormState): boolean {
|
||||
a.kind === b.kind &&
|
||||
a.name === b.name &&
|
||||
a.rootId === b.rootId &&
|
||||
a.spider91UploadDriveId === b.spider91UploadDriveId &&
|
||||
sameRecord(a.creds, b.creds)
|
||||
);
|
||||
}
|
||||
@@ -877,9 +849,8 @@ function sameRecord(a: Record<string, string>, b: Record<string, string>): boole
|
||||
return true;
|
||||
}
|
||||
|
||||
function hasCreateFormChanges(form: FormState, initial: FormState): boolean {
|
||||
function hasCreateFormChanges(form: FormState): boolean {
|
||||
if (form.name.trim() !== "") return true;
|
||||
if (form.rootId.trim() !== "") return true;
|
||||
if (form.spider91UploadDriveId !== initial.spider91UploadDriveId) return true;
|
||||
return Object.values(form.creds).some((value) => value.trim() !== "");
|
||||
}
|
||||
|
||||
@@ -79,9 +79,11 @@ export function LoginPage() {
|
||||
return (
|
||||
<div className="admin-login">
|
||||
<form className="admin-login__card" onSubmit={handleSubmit}>
|
||||
<h1 className="admin-login__title">
|
||||
<Play size={18} fill="currentColor" /> {setupRequired ? "首次设置管理员" : "登录"}
|
||||
</h1>
|
||||
{setupRequired && (
|
||||
<h1 className="admin-login__title">
|
||||
<Play size={18} fill="currentColor" /> 首次设置管理员
|
||||
</h1>
|
||||
)}
|
||||
<div className="admin-form">
|
||||
<div className="admin-form__row">
|
||||
<label htmlFor="admin-login-username">用户名</label>
|
||||
|
||||
@@ -1,16 +1,18 @@
|
||||
import { useEffect, useId, useRef, ReactNode } from "react";
|
||||
import { createPortal } from "react-dom";
|
||||
import { X } from "lucide-react";
|
||||
|
||||
type Props = {
|
||||
open: boolean;
|
||||
title: string;
|
||||
title?: string;
|
||||
ariaLabel?: string;
|
||||
onClose: () => void;
|
||||
children: ReactNode;
|
||||
footer?: ReactNode;
|
||||
className?: string;
|
||||
};
|
||||
|
||||
export function Modal({ open, title, onClose, children, footer, className = "" }: Props) {
|
||||
export function Modal({ open, title, ariaLabel, onClose, children, footer, className = "" }: Props) {
|
||||
const dialogRef = useRef<HTMLDivElement>(null);
|
||||
const onCloseRef = useRef(onClose);
|
||||
const titleId = useId();
|
||||
@@ -74,18 +76,19 @@ export function Modal({ open, title, onClose, children, footer, className = "" }
|
||||
}, [open]);
|
||||
|
||||
if (!open) return null;
|
||||
return (
|
||||
return createPortal(
|
||||
<div className="admin-modal-backdrop">
|
||||
<div
|
||||
ref={dialogRef}
|
||||
className={`admin-modal${className ? ` ${className}` : ""}`}
|
||||
role="dialog"
|
||||
aria-modal="true"
|
||||
aria-labelledby={titleId}
|
||||
aria-labelledby={title ? titleId : undefined}
|
||||
aria-label={title ? undefined : ariaLabel ?? "对话框"}
|
||||
tabIndex={-1}
|
||||
>
|
||||
<div className="admin-modal__header">
|
||||
<span id={titleId}>{title}</span>
|
||||
<div className={`admin-modal__header${title ? "" : " is-titleless"}`}>
|
||||
{title && <span id={titleId}>{title}</span>}
|
||||
<button
|
||||
type="button"
|
||||
className="admin-btn"
|
||||
@@ -99,7 +102,8 @@ export function Modal({ open, title, onClose, children, footer, className = "" }
|
||||
<div className="admin-modal__body">{children}</div>
|
||||
{footer && <div className="admin-modal__footer">{footer}</div>}
|
||||
</div>
|
||||
</div>
|
||||
</div>,
|
||||
document.body
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -55,7 +55,7 @@ export function TagsPage() {
|
||||
setSaving(true);
|
||||
try {
|
||||
const r = await api.createTag(cleanLabel, splitList(aliases));
|
||||
show(`已添加标签,自动归类 ${r.classified} 个视频`, "success");
|
||||
show(`已添加标签,自动匹配 ${r.classified} 个视频`, "success");
|
||||
setLabel("");
|
||||
setAliases("");
|
||||
await refresh();
|
||||
@@ -131,14 +131,12 @@ export function TagsPage() {
|
||||
let totalVideos = 0;
|
||||
let systemCount = 0;
|
||||
let userCount = 0;
|
||||
let collectionCount = 0;
|
||||
let legacyCount = 0;
|
||||
|
||||
tags.forEach((t) => {
|
||||
totalVideos += t.count ?? 0;
|
||||
if (t.source === "system") systemCount++;
|
||||
else if (t.source === "user") userCount++;
|
||||
else if (t.source === "collection") collectionCount++;
|
||||
else if (t.source === "legacy") legacyCount++;
|
||||
});
|
||||
|
||||
@@ -147,7 +145,6 @@ export function TagsPage() {
|
||||
totalVideos,
|
||||
systemCount,
|
||||
userCount,
|
||||
collectionCount,
|
||||
legacyCount,
|
||||
};
|
||||
}, [tags]);
|
||||
@@ -213,7 +210,7 @@ export function TagsPage() {
|
||||
<div>
|
||||
<div className="admin-card">
|
||||
<div className="admin-card__title">
|
||||
<Plus size={15} /> 新增分类标签
|
||||
<Plus size={15} /> 新增标签
|
||||
</div>
|
||||
<form
|
||||
className="admin-form"
|
||||
@@ -245,7 +242,7 @@ export function TagsPage() {
|
||||
className="admin-btn is-primary"
|
||||
disabled={saving || !label.trim()}
|
||||
>
|
||||
<Plus size={13} /> {saving ? "添加中..." : "添加并自动归类"}
|
||||
<Plus size={13} /> {saving ? "添加中..." : "添加并自动匹配"}
|
||||
</button>
|
||||
</form>
|
||||
</div>
|
||||
@@ -303,13 +300,6 @@ export function TagsPage() {
|
||||
>
|
||||
用户 ({stats.userCount})
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
className={`admin-tags-filter-tab ${filterSource === "collection" ? "is-active" : ""}`}
|
||||
onClick={() => setFilterSource("collection")}
|
||||
>
|
||||
合集 ({stats.collectionCount})
|
||||
</button>
|
||||
{stats.legacyCount > 0 && (
|
||||
<button
|
||||
type="button"
|
||||
@@ -538,7 +528,6 @@ function splitList(s: string): string[] {
|
||||
|
||||
function sourceLabel(source: string): string {
|
||||
if (source === "system") return "系统";
|
||||
if (source === "collection") return "合集";
|
||||
if (source === "legacy") return "旧数据";
|
||||
return "用户";
|
||||
}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import { useEffect, useState } from "react";
|
||||
import { Check, Loader2, Moon, Sparkles } from "lucide-react";
|
||||
import { Check, Loader2, Moon, Sparkles, Star } from "lucide-react";
|
||||
import * as api from "./api";
|
||||
import type { Theme } from "./api";
|
||||
import { useToast } from "./ToastContext";
|
||||
import { applyTheme, getCurrentTheme } from "@/lib/theme";
|
||||
|
||||
function isTheme(value: unknown): value is Theme {
|
||||
return value === "dark" || value === "pink";
|
||||
return value === "dark" || value === "pink" || value === "sky";
|
||||
}
|
||||
|
||||
type Option = {
|
||||
@@ -32,6 +32,13 @@ const OPTIONS: Option[] = [
|
||||
description: "柔和奶白底 + 樱花粉主色,清爽温柔,日间使用更舒适。",
|
||||
icon: Sparkles,
|
||||
},
|
||||
{
|
||||
id: "sky",
|
||||
title: "星空蓝 + 暖星黄",
|
||||
subtitle: "Starry Sky",
|
||||
description: "浅天空蓝底 + 暖星黄主色,配上淡淡的网格与点点星光,顶级美感。",
|
||||
icon: Star,
|
||||
},
|
||||
];
|
||||
|
||||
/**
|
||||
|
||||