Improve crawler asset stats and admin navigation

- Count crawler assets by crawler source ID prefix after cloud migration

- Add crawler API totals for cumulative, local, and migrated videos

- Let crawler thumbnail and preview readiness inherit equivalent canonical videos

- Show cumulative crawl data in crawler management cards

- Remove low-value expanded crawler metadata fields from the card body

- Move return-to-site into the main admin navigation with grouped sections

- Rename the content admin group to management and adjust footer icon sizing

- Update backend and frontend tests for crawler/admin behavior
This commit is contained in:
nianzhibai
2026-06-10 23:41:13 +08:00
parent c1355385e1
commit 7ddf33d726
13 changed files with 2284 additions and 919 deletions
+190 -14
View File
@@ -164,7 +164,11 @@ func main() {
if err != nil {
return err
}
return app.attachDrive(ctx, d)
if err := app.attachDrive(ctx, d); err != nil {
return err
}
app.scheduleCrawlerUploadMigration(ctx, driveID)
return nil
},
OnDriveDeleteCleanup: func(cleanupCtx context.Context, driveID string) (int, error) {
return app.cleanupDriveVideosForDelete(cleanupCtx, driveID)
@@ -355,6 +359,10 @@ type App struct {
// reconcile 和扫盘结束同时为同一批 pending 视频启动多个长时间入队 goroutine。
fingerprintQueueMu sync.Mutex
fingerprintQueueing map[string]bool
// crawlerUploadRunning 去重"保存上传目标后检查本地未上传文件"的后台任务。
crawlerUploadMu sync.Mutex
crawlerUploadRunning map[string]bool
}
type driveScanProgress struct {
@@ -2419,22 +2427,26 @@ func (a *App) listSpider91DriveIDs(ctx context.Context) []string {
return out
}
// waitAllPreviewQueuesIdle 阻塞直到所有 drive 的封面 worker 和预览视频 worker
// waitAllPreviewQueuesIdle 阻塞直到所有 drive 的封面预览视频和指纹 worker
// 队列都为空且无 in-flight 任务。
//
// 顺序:先等所有 thumb worker,再等所有预览视频。两个队列生成时互不等待;
// nightly 只在 phase 边界统一等待它们都 drain。
// 顺序:先等所有 thumb worker,再等预览视频,最后等指纹。队列生成时互不等待;
// nightly 只在 phase 边界统一等待它们都 drain,保证爬虫视频迁移前本地资产已产出
// 若 ctx 在等待中被取消(软超时 / shutdown),立即返回 ctx.Err。
func (a *App) waitAllPreviewQueuesIdle(ctx context.Context) error {
a.mu.Lock()
thumbWorkers := make([]*preview.ThumbWorker, 0, len(a.thumbWorkers))
previewWorkers := make([]*preview.Worker, 0, len(a.workers))
fingerprintWorkers := make([]*fingerprint.Worker, 0, len(a.fingerprintWorkers))
for _, w := range a.thumbWorkers {
thumbWorkers = append(thumbWorkers, w)
}
for _, w := range a.workers {
previewWorkers = append(previewWorkers, w)
}
for _, w := range a.fingerprintWorkers {
fingerprintWorkers = append(fingerprintWorkers, w)
}
a.mu.Unlock()
for _, w := range thumbWorkers {
@@ -2447,9 +2459,65 @@ func (a *App) waitAllPreviewQueuesIdle(ctx context.Context) error {
return err
}
}
if err := a.waitFingerprintQueueingIdle(ctx, ""); err != nil {
return err
}
for _, w := range fingerprintWorkers {
if err := w.WaitIdle(ctx); err != nil {
return err
}
}
return nil
}
func (a *App) waitDriveGenerationQueuesIdle(ctx context.Context, driveID string) error {
a.mu.Lock()
thumbWorker := a.thumbWorkers[driveID]
previewWorker := a.workers[driveID]
fingerprintWorker := a.fingerprintWorkers[driveID]
a.mu.Unlock()
if err := thumbWorker.WaitIdle(ctx); err != nil {
return err
}
if err := previewWorker.WaitIdle(ctx); err != nil {
return err
}
if err := a.waitFingerprintQueueingIdle(ctx, driveID); err != nil {
return err
}
if err := fingerprintWorker.WaitIdle(ctx); err != nil {
return err
}
return nil
}
func (a *App) waitFingerprintQueueingIdle(ctx context.Context, driveID string) error {
if !a.fingerprintQueueingBusy(driveID) {
return nil
}
ticker := time.NewTicker(200 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-ticker.C:
if !a.fingerprintQueueingBusy(driveID) {
return nil
}
}
}
}
func (a *App) fingerprintQueueingBusy(driveID string) bool {
a.fingerprintQueueMu.Lock()
defer a.fingerprintQueueMu.Unlock()
if driveID != "" {
return a.fingerprintQueueing[driveID]
}
return len(a.fingerprintQueueing) > 0
}
func shouldScanDrive(d drives.Drive) bool {
if d == nil || d.ID() == localupload.DriveID {
return false
@@ -2502,7 +2570,9 @@ func (a *App) scheduleScriptCrawlerCrawl(ctx context.Context, driveID string) bo
a.endDriveScanOrCrawl(driveID)
done()
}()
a.runScriptCrawlerCrawlWithTaskContext(taskCtx, driveID)
if a.runScriptCrawlerCrawlWithTaskContext(taskCtx, driveID) {
a.runCrawlerMigrationAfterManualCrawl(taskCtx, driveID)
}
}()
return true
}
@@ -2606,30 +2676,136 @@ func (a *App) runScriptCrawlerCrawlWithTaskContext(ctx context.Context, driveID
}
func (a *App) runSpider91MigrationAfterManualCrawl(ctx context.Context, driveID string) {
a.runCrawlerMigrationAfterManualCrawl(ctx, driveID)
}
func (a *App) scheduleCrawlerUploadMigration(ctx context.Context, driveID string) bool {
driveID = strings.TrimSpace(driveID)
if driveID == "" || a == nil || a.cat == nil {
return false
}
d, err := a.cat.GetDrive(ctx, driveID)
if err != nil || d == nil || d.Kind != scriptcrawler.Kind || strings.TrimSpace(d.Credentials["upload_drive_id"]) == "" {
return false
}
if a.spider91Migrator == nil {
log.Printf("[scriptcrawler] drive=%s skip saved upload migration: migrator not configured", driveID)
return false
}
a.crawlerUploadMu.Lock()
if a.crawlerUploadRunning == nil {
a.crawlerUploadRunning = make(map[string]bool)
}
if a.crawlerUploadRunning[driveID] {
a.crawlerUploadMu.Unlock()
log.Printf("[scriptcrawler] drive=%s saved upload migration already running", driveID)
return false
}
a.crawlerUploadRunning[driveID] = true
a.crawlerUploadMu.Unlock()
taskCtx, done := a.registerDriveTaskContext(ctx, driveID)
go func() {
defer func() {
done()
a.crawlerUploadMu.Lock()
delete(a.crawlerUploadRunning, driveID)
a.crawlerUploadMu.Unlock()
}()
a.runCrawlerUploadMigrationAfterSave(taskCtx, driveID)
}()
return true
}
func (a *App) runCrawlerUploadMigrationAfterSave(ctx context.Context, driveID string) {
if err := ctx.Err(); err != nil {
log.Printf("[spider91] drive=%s skip post-crawl migration: %v", driveID, err)
log.Printf("[scriptcrawler] drive=%s skip saved upload migration: %v", driveID, err)
return
}
targetDriveID := a.Spider91UploadDriveID()
d, err := a.cat.GetDrive(ctx, driveID)
if err != nil || d == nil {
log.Printf("[scriptcrawler] drive=%s saved upload migration lookup: %v", driveID, err)
return
}
targetDriveID := strings.TrimSpace(d.Credentials["upload_drive_id"])
if d.Kind != scriptcrawler.Kind || targetDriveID == "" {
return
}
if err := a.ensureDriveAttached(ctx, driveID); err != nil {
log.Printf("[scriptcrawler] drive=%s saved upload migration attach: %v", driveID, err)
return
}
a.mu.Lock()
worker := a.workers[driveID]
thumbWorker := a.thumbWorkers[driveID]
fingerprintWorker := a.fingerprintWorkers[driveID]
a.mu.Unlock()
a.scheduleFingerprintBackfill(ctx, driveID, fingerprintWorker)
a.enqueueDriveGeneration(ctx, driveID, worker, thumbWorker)
log.Printf("[scriptcrawler] drive=%s checking local videos for upload target=%s", driveID, targetDriveID)
if err := a.waitDriveGenerationQueuesIdle(ctx, driveID); err != nil {
log.Printf("[scriptcrawler] drive=%s saved upload migration wait canceled: %v", driveID, err)
return
}
if err := ctx.Err(); err != nil {
log.Printf("[scriptcrawler] drive=%s skip saved upload migration after wait: %v", driveID, err)
return
}
if err := a.spider91Migrator.RunOnce(ctx); err != nil {
log.Printf("[scriptcrawler] drive=%s saved upload migration: %v", driveID, err)
}
}
func (a *App) runCrawlerMigrationAfterManualCrawl(ctx context.Context, driveID string) {
if err := ctx.Err(); err != nil {
log.Printf("[scriptcrawler] drive=%s skip post-crawl migration: %v", driveID, err)
return
}
if a.cat == nil {
targetDriveID := a.Spider91UploadDriveID()
if targetDriveID == "" || a.spider91Migrator == nil {
return
}
if err := a.waitDriveGenerationQueuesIdle(ctx, driveID); err != nil {
log.Printf("[scriptcrawler] drive=%s post-crawl migration wait canceled: %v", driveID, err)
return
}
if err := a.spider91Migrator.RunOnce(ctx); err != nil {
log.Printf("[scriptcrawler] drive=%s post-crawl migration: %v", driveID, err)
}
return
}
d, err := a.cat.GetDrive(ctx, driveID)
if err != nil || d == nil {
log.Printf("[scriptcrawler] drive=%s skip post-crawl migration lookup: %v", driveID, err)
return
}
targetDriveID := strings.TrimSpace(d.Credentials["upload_drive_id"])
if targetDriveID == "" && d.Kind == spider91.Kind {
targetDriveID = a.Spider91UploadDriveID()
}
if targetDriveID == "" {
return
}
if a.spider91Migrator == nil {
log.Printf("[spider91] drive=%s skip post-crawl migration: migrator not configured", driveID)
log.Printf("[scriptcrawler] drive=%s skip post-crawl migration: migrator not configured", driveID)
return
}
log.Printf("[spider91] drive=%s waiting for generation queues before post-crawl migration target=%s", driveID, targetDriveID)
if err := a.waitAllPreviewQueuesIdle(ctx); err != nil {
log.Printf("[spider91] drive=%s post-crawl migration wait canceled: %v", driveID, err)
log.Printf("[scriptcrawler] drive=%s waiting for generation queues before post-crawl migration target=%s", driveID, targetDriveID)
if err := a.waitDriveGenerationQueuesIdle(ctx, driveID); err != nil {
log.Printf("[scriptcrawler] drive=%s post-crawl migration wait canceled: %v", driveID, err)
return
}
if err := ctx.Err(); err != nil {
log.Printf("[spider91] drive=%s skip post-crawl migration after wait: %v", driveID, err)
log.Printf("[scriptcrawler] drive=%s skip post-crawl migration after wait: %v", driveID, err)
return
}
log.Printf("[spider91] drive=%s running post-crawl migration target=%s", driveID, targetDriveID)
log.Printf("[scriptcrawler] drive=%s running post-crawl migration target=%s", driveID, targetDriveID)
if err := a.spider91Migrator.RunOnce(ctx); err != nil {
log.Printf("[spider91] drive=%s post-crawl migration: %v", driveID, err)
log.Printf("[scriptcrawler] drive=%s post-crawl migration: %v", driveID, err)
}
}
+79
View File
@@ -421,6 +421,85 @@ func TestRunSpider91MigrationAfterManualCrawlRequiresConfiguredUploadTarget(t *t
}
}
func TestScheduleCrawlerUploadMigrationRunsForConfiguredCrawler(t *testing.T) {
ctx := context.Background()
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
if err != nil {
t.Fatalf("open catalog: %v", err)
}
t.Cleanup(func() {
if err := cat.Close(); err != nil {
t.Fatalf("close catalog: %v", err)
}
})
if err := cat.UpsertDrive(ctx, &catalog.Drive{
ID: "crawler-truvaze",
Kind: scriptcrawler.Kind,
Name: "Truvaze",
RootID: "/",
Credentials: map[string]string{
"script_path": "/tmp/Truvaze.py",
"upload_drive_id": "pikpak",
},
}); err != nil {
t.Fatalf("seed crawler: %v", err)
}
registry := proxy.NewRegistry()
registry.Set("crawler-truvaze", &serverFakeKindDrive{id: "crawler-truvaze", kind: scriptcrawler.Kind})
migrator := &serverFakeSpider91MigrationRunner{}
app := &App{
cat: cat,
registry: registry,
spider91Migrator: migrator,
workers: map[string]*preview.Worker{},
thumbWorkers: map[string]*preview.ThumbWorker{},
fingerprintWorkers: map[string]*fingerprint.Worker{},
}
if !app.scheduleCrawlerUploadMigration(ctx, "crawler-truvaze") {
t.Fatal("scheduleCrawlerUploadMigration returned false, want true")
}
deadline := time.After(time.Second)
for migrator.called == 0 {
select {
case <-deadline:
t.Fatalf("migration calls = %d, want 1", migrator.called)
case <-time.After(10 * time.Millisecond):
}
}
}
func TestScheduleCrawlerUploadMigrationSkipsWithoutUploadTarget(t *testing.T) {
ctx := context.Background()
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
if err != nil {
t.Fatalf("open catalog: %v", err)
}
t.Cleanup(func() {
if err := cat.Close(); err != nil {
t.Fatalf("close catalog: %v", err)
}
})
if err := cat.UpsertDrive(ctx, &catalog.Drive{
ID: "crawler-local",
Kind: scriptcrawler.Kind,
Name: "Local Only",
RootID: "/",
Credentials: map[string]string{"script_path": "/tmp/local.py"},
}); err != nil {
t.Fatalf("seed crawler: %v", err)
}
migrator := &serverFakeSpider91MigrationRunner{}
app := &App{cat: cat, registry: proxy.NewRegistry(), spider91Migrator: migrator}
if app.scheduleCrawlerUploadMigration(ctx, "crawler-local") {
t.Fatal("scheduleCrawlerUploadMigration returned true without upload target")
}
if migrator.called != 0 {
t.Fatalf("migration calls = %d, want 0", migrator.called)
}
}
func TestDriveGenerationStatusUsesWorkerQueueNotPendingCatalogRows(t *testing.T) {
ctx := context.Background()
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
+75 -33
View File
@@ -22,6 +22,7 @@ import (
"github.com/video-site/backend/internal/catalog"
"github.com/video-site/backend/internal/drives/p123"
"github.com/video-site/backend/internal/drives/scriptcrawler"
"github.com/video-site/backend/internal/drives/spider91"
)
type AdminServer struct {
@@ -630,6 +631,7 @@ type crawlerDTO struct {
ScriptPath string `json:"scriptPath"`
Proxy string `json:"proxy,omitempty"`
TargetNew string `json:"targetNew,omitempty"`
UploadDriveID string `json:"uploadDriveId,omitempty"`
LastCrawlAt int64 `json:"lastCrawlAt,omitempty"`
ScanGenerationStatus GenerationStatus `json:"scanGenerationStatus"`
ThumbnailGenerationStatus GenerationStatus `json:"thumbnailGenerationStatus"`
@@ -644,13 +646,17 @@ type crawlerDTO struct {
FingerprintReadyCount int `json:"fingerprintReadyCount"`
FingerprintPendingCount int `json:"fingerprintPendingCount"`
FingerprintFailedCount int `json:"fingerprintFailedCount"`
TotalCrawledCount int `json:"totalCrawledCount"`
LocalVideoCount int `json:"localVideoCount"`
MigratedVideoCount int `json:"migratedVideoCount"`
}
type upsertCrawlerReq struct {
ID string `json:"id"`
ScriptPath string `json:"scriptPath"`
Proxy string `json:"proxy"`
TargetNew string `json:"targetNew"`
ID string `json:"id"`
ScriptPath string `json:"scriptPath"`
Proxy string `json:"proxy"`
TargetNew string `json:"targetNew"`
UploadDriveID string `json:"uploadDriveId"`
}
func (a *AdminServer) handleListCrawlers(w http.ResponseWriter, r *http.Request) {
@@ -659,21 +665,6 @@ func (a *AdminServer) handleListCrawlers(w http.ResponseWriter, r *http.Request)
writeErr(w, http.StatusInternalServerError, err)
return
}
teaserCounts, err := a.Catalog.CountTeasersByDrive(r.Context())
if err != nil {
writeErr(w, http.StatusInternalServerError, err)
return
}
thumbnailCounts, err := a.Catalog.CountThumbnailsByDrive(r.Context())
if err != nil {
writeErr(w, http.StatusInternalServerError, err)
return
}
fingerprintCounts, err := a.Catalog.CountFingerprintsByDrive(r.Context())
if err != nil {
writeErr(w, http.StatusInternalServerError, err)
return
}
generationStatuses := map[string]DriveGenerationStatuses{}
if a.GetDriveGenerationStatuses != nil {
generationStatuses = a.GetDriveGenerationStatuses()
@@ -684,12 +675,17 @@ func (a *AdminServer) handleListCrawlers(w http.ResponseWriter, r *http.Request)
if d == nil || !isConfiguredCrawlerDrive(d) {
continue
}
out = append(out, a.crawlerDTOForDrive(d, teaserCounts[d.ID], thumbnailCounts[d.ID], fingerprintCounts[d.ID], generationStatuses[d.ID]))
assetCounts, err := a.Catalog.CountCrawlerAssets(r.Context(), d.ID, crawlerVideoIDPrefixes(d))
if err != nil {
writeErr(w, http.StatusInternalServerError, err)
return
}
out = append(out, a.crawlerDTOForDrive(d, assetCounts, generationStatuses[d.ID]))
}
writeJSON(w, http.StatusOK, out)
}
func (a *AdminServer) crawlerDTOForDrive(d *catalog.Drive, teaser catalog.DriveTeaserCounts, thumb catalog.DriveThumbnailCounts, fp catalog.DriveFingerprintCounts, generation DriveGenerationStatuses) crawlerDTO {
func (a *AdminServer) crawlerDTOForDrive(d *catalog.Drive, assets catalog.CrawlerAssetCounts, generation DriveGenerationStatuses) crawlerDTO {
if generation.Scan.State == "" {
generation.Scan.State = "idle"
}
@@ -717,20 +713,34 @@ func (a *AdminServer) crawlerDTOForDrive(d *catalog.Drive, teaser catalog.DriveT
ScriptPath: strings.TrimSpace(d.Credentials["script_path"]),
Proxy: strings.TrimSpace(d.Credentials["proxy"]),
TargetNew: strings.TrimSpace(d.Credentials["target_new"]),
UploadDriveID: strings.TrimSpace(d.Credentials["upload_drive_id"]),
LastCrawlAt: lastCrawlAt,
ScanGenerationStatus: generation.Scan,
ThumbnailGenerationStatus: generation.Thumbnail,
PreviewGenerationStatus: generation.Preview,
FingerprintGenerationStatus: generation.Fingerprint,
ThumbnailReadyCount: thumb.Ready,
ThumbnailPendingCount: thumb.Pending,
ThumbnailFailedCount: thumb.Failed,
TeaserReadyCount: teaser.Ready,
TeaserPendingCount: teaser.Pending,
TeaserFailedCount: teaser.Failed,
FingerprintReadyCount: fp.Ready,
FingerprintPendingCount: fp.Pending,
FingerprintFailedCount: fp.Failed,
ThumbnailReadyCount: assets.Thumbnail.Ready,
ThumbnailPendingCount: assets.Thumbnail.Pending,
ThumbnailFailedCount: assets.Thumbnail.Failed,
TeaserReadyCount: assets.Teaser.Ready,
TeaserPendingCount: assets.Teaser.Pending,
TeaserFailedCount: assets.Teaser.Failed,
FingerprintReadyCount: assets.Fingerprint.Ready,
FingerprintPendingCount: assets.Fingerprint.Pending,
FingerprintFailedCount: assets.Fingerprint.Failed,
TotalCrawledCount: assets.Total,
LocalVideoCount: assets.Local,
MigratedVideoCount: assets.Migrated,
}
}
func crawlerVideoIDPrefixes(d *catalog.Drive) []string {
if d == nil {
return nil
}
return []string{
scriptcrawler.Kind + "-" + d.ID + "-",
spider91.Kind + "-" + d.ID + "-",
}
}
@@ -765,13 +775,18 @@ func (a *AdminServer) handleUpsertCrawler(w http.ResponseWriter, r *http.Request
}
scriptPath := strings.TrimSpace(body.ScriptPath)
incoming := map[string]string{
"script_path": scriptPath,
"proxy": strings.TrimSpace(body.Proxy),
"target_new": strings.TrimSpace(body.TargetNew),
"script_path": scriptPath,
"proxy": strings.TrimSpace(body.Proxy),
"target_new": strings.TrimSpace(body.TargetNew),
"upload_drive_id": strings.TrimSpace(body.UploadDriveID),
}
for k, v := range incoming {
creds[k] = v
}
if err := a.validateCrawlerUploadDrive(r.Context(), creds["upload_drive_id"]); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
merged, err := mergeScriptCrawlerCredentials(existing, creds)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
@@ -843,6 +858,33 @@ func (a *AdminServer) generateCrawlerID(ctx context.Context, name string) (strin
return candidate, nil
}
func (a *AdminServer) validateCrawlerUploadDrive(ctx context.Context, driveID string) error {
driveID = strings.TrimSpace(driveID)
if driveID == "" {
return nil
}
if a == nil || a.Catalog == nil {
return errors.New("crawler upload target validation unavailable")
}
d, err := a.Catalog.GetDrive(ctx, driveID)
if err != nil || d == nil {
return fmt.Errorf("上传目标网盘 %q 不存在", driveID)
}
if !isCrawlerUploadTargetKind(d.Kind) {
return fmt.Errorf("上传目标网盘 %q 类型为 %s,仅支持 115网盘、PikPak、123网盘、Google Drive、OneDrive", driveID, d.Kind)
}
return nil
}
func isCrawlerUploadTargetKind(kind string) bool {
switch strings.TrimSpace(kind) {
case "p115", "pikpak", "p123", "googledrive", "onedrive":
return true
default:
return false
}
}
func crawlerIDSlug(raw string) string {
var b strings.Builder
lastDash := false
+160 -18
View File
@@ -880,13 +880,22 @@ func TestHandleListCrawlersOnlyIncludesCrawlerPageScripts(t *testing.T) {
Name: "91 Spider",
RootID: "/",
Credentials: map[string]string{
"builtin": "spider91",
"last_crawl_at": "1800000000",
"proxy": " http://127.0.0.1:7890 ",
"script_path": scriptPath,
"builtin": "spider91",
"last_crawl_at": "1800000000",
"proxy": " http://127.0.0.1:7890 ",
"script_path": scriptPath,
"upload_drive_id": "p115-target",
},
Status: "ok",
},
{
ID: "p115-target",
Kind: "p115",
Name: "115",
RootID: "0",
Credentials: map[string]string{"cookie": "x"},
Status: "ok",
},
{
ID: "onedrive-main",
Kind: "onedrive",
@@ -910,6 +919,41 @@ func TestHandleListCrawlersOnlyIncludesCrawlerPageScripts(t *testing.T) {
t.Fatalf("seed drive %s: %v", d.ID, err)
}
}
for _, v := range []*catalog.Video{
{
ID: "spider91-crawler-spider91-local",
DriveID: "crawler-spider91",
FileID: "local.mp4",
FileName: "local.mp4",
Title: "Local",
Size: 123,
Ext: "mp4",
ThumbnailURL: "/p/thumb/spider91-crawler-spider91-local",
PreviewStatus: "ready",
DurationSeconds: 12,
PublishedAt: time.Now(),
},
{
ID: "scriptcrawler-crawler-spider91-migrated",
DriveID: "p115-target",
FileID: "uploaded-id",
FileName: "migrated.mp4",
Title: "Migrated",
Size: 456,
Ext: "mp4",
ThumbnailURL: "/p/thumb/scriptcrawler-crawler-spider91-migrated",
PreviewStatus: "ready",
DurationSeconds: 34,
PublishedAt: time.Now(),
},
} {
if err := cat.UpsertVideo(ctx, v); err != nil {
t.Fatalf("seed crawler video %s: %v", v.ID, err)
}
if err := cat.UpdateVideoFingerprint(ctx, v.ID, "sha-"+v.ID, "ready", ""); err != nil {
t.Fatalf("seed crawler fingerprint %s: %v", v.ID, err)
}
}
req := httptest.NewRequest(http.MethodGet, "/admin/api/crawlers", nil)
rr := httptest.NewRecorder()
@@ -920,28 +964,61 @@ func TestHandleListCrawlersOnlyIncludesCrawlerPageScripts(t *testing.T) {
}
var got []struct {
ID string `json:"id"`
Name string `json:"name"`
Kind string `json:"kind"`
Proxy string `json:"proxy"`
LastCrawlAt int64 `json:"lastCrawlAt"`
ID string `json:"id"`
Name string `json:"name"`
Kind string `json:"kind"`
Proxy string `json:"proxy"`
UploadDriveID string `json:"uploadDriveId"`
LastCrawlAt int64 `json:"lastCrawlAt"`
TotalCrawled int `json:"totalCrawledCount"`
LocalVideos int `json:"localVideoCount"`
MigratedVideo int `json:"migratedVideoCount"`
ThumbnailReady int `json:"thumbnailReadyCount"`
TeaserReady int `json:"teaserReadyCount"`
FingerprintReady int `json:"fingerprintReadyCount"`
}
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
t.Fatalf("decode: %v", err)
}
byID := map[string]struct {
Name string
Kind string
Proxy string
LastCrawlAt int64
Name string
Kind string
Proxy string
UploadDriveID string
LastCrawlAt int64
TotalCrawled int
LocalVideos int
MigratedVideo int
ThumbnailReady int
TeaserReady int
FingerprintReady int
}{}
for _, d := range got {
byID[d.ID] = struct {
Name string
Kind string
Proxy string
LastCrawlAt int64
}{Name: d.Name, Kind: d.Kind, Proxy: d.Proxy, LastCrawlAt: d.LastCrawlAt}
Name string
Kind string
Proxy string
UploadDriveID string
LastCrawlAt int64
TotalCrawled int
LocalVideos int
MigratedVideo int
ThumbnailReady int
TeaserReady int
FingerprintReady int
}{
Name: d.Name,
Kind: d.Kind,
Proxy: d.Proxy,
UploadDriveID: d.UploadDriveID,
LastCrawlAt: d.LastCrawlAt,
TotalCrawled: d.TotalCrawled,
LocalVideos: d.LocalVideos,
MigratedVideo: d.MigratedVideo,
ThumbnailReady: d.ThumbnailReady,
TeaserReady: d.TeaserReady,
FingerprintReady: d.FingerprintReady,
}
}
if _, ok := byID["spider91-main"]; ok {
t.Fatal("legacy spider91 drive should not be returned by crawler list")
@@ -958,9 +1035,18 @@ func TestHandleListCrawlersOnlyIncludesCrawlerPageScripts(t *testing.T) {
if byID["crawler-spider91"].Proxy != "http://127.0.0.1:7890" {
t.Fatalf("crawler proxy = %q, want trimmed proxy", byID["crawler-spider91"].Proxy)
}
if byID["crawler-spider91"].UploadDriveID != "p115-target" {
t.Fatalf("uploadDriveId = %q, want p115-target", byID["crawler-spider91"].UploadDriveID)
}
if byID["crawler-spider91"].LastCrawlAt != 1800000000 {
t.Fatalf("lastCrawlAt = %d, want 1800000000", byID["crawler-spider91"].LastCrawlAt)
}
if byID["crawler-spider91"].TotalCrawled != 2 || byID["crawler-spider91"].LocalVideos != 1 || byID["crawler-spider91"].MigratedVideo != 1 {
t.Fatalf("crawler counts = total %d local %d migrated %d, want 2/1/1", byID["crawler-spider91"].TotalCrawled, byID["crawler-spider91"].LocalVideos, byID["crawler-spider91"].MigratedVideo)
}
if byID["crawler-spider91"].ThumbnailReady != 2 || byID["crawler-spider91"].TeaserReady != 2 || byID["crawler-spider91"].FingerprintReady != 2 {
t.Fatalf("asset ready counts = thumb %d teaser %d fingerprint %d, want 2/2/2", byID["crawler-spider91"].ThumbnailReady, byID["crawler-spider91"].TeaserReady, byID["crawler-spider91"].FingerprintReady)
}
if _, ok := byID["onedrive-main"]; ok {
t.Fatal("onedrive should not be returned by crawler list")
}
@@ -1108,6 +1194,62 @@ func TestHandleUpsertCrawlerGeneratesIDFromScriptName(t *testing.T) {
}
}
func TestHandleUpsertCrawlerPersistsAndValidatesUploadDrive(t *testing.T) {
ctx := context.Background()
tmp := t.TempDir()
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
if err != nil {
t.Fatalf("open catalog: %v", err)
}
t.Cleanup(func() {
if err := cat.Close(); err != nil {
t.Fatalf("close catalog: %v", err)
}
})
scriptPath := filepath.Join(tmp, "custom.py")
if err := os.WriteFile(scriptPath, []byte("CRAWLER_NAME = \"Upload Spider\"\n"), 0o644); err != nil {
t.Fatalf("write crawler script: %v", err)
}
for _, d := range []*catalog.Drive{
{ID: "p115-target", Kind: "p115", Name: "115", RootID: "0", Credentials: map[string]string{"cookie": "x"}},
{ID: "local-target", Kind: "localstorage", Name: "Local", RootID: "/", Credentials: map[string]string{"path": tmp}},
} {
if err := cat.UpsertDrive(ctx, d); err != nil {
t.Fatalf("seed drive %s: %v", d.ID, err)
}
}
srv := &AdminServer{Catalog: cat}
req := httptest.NewRequest(http.MethodPost, "/admin/api/crawlers", strings.NewReader(`{
"id": "crawler-upload",
"scriptPath": "`+scriptPath+`",
"uploadDriveId": "p115-target"
}`))
rr := httptest.NewRecorder()
srv.handleUpsertCrawler(rr, req)
if rr.Code != http.StatusOK {
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
}
got, err := cat.GetDrive(ctx, "crawler-upload")
if err != nil {
t.Fatalf("get crawler: %v", err)
}
if got.Credentials["upload_drive_id"] != "p115-target" {
t.Fatalf("upload_drive_id = %q, want p115-target", got.Credentials["upload_drive_id"])
}
req = httptest.NewRequest(http.MethodPost, "/admin/api/crawlers", strings.NewReader(`{
"id": "crawler-upload",
"scriptPath": "`+scriptPath+`",
"uploadDriveId": "local-target"
}`))
rr = httptest.NewRecorder()
srv.handleUpsertCrawler(rr, req)
if rr.Code != http.StatusBadRequest {
t.Fatalf("invalid target status = %d, body = %s, want 400", rr.Code, rr.Body.String())
}
}
func TestHandleImportCrawlerScriptFile(t *testing.T) {
tmp := t.TempDir()
script := "CRAWLER_NAME = \"Demo Crawler\"\nprint('crawler')\n"
+124
View File
@@ -20,6 +20,15 @@ type Catalog struct {
db *sql.DB
}
type CrawlerAssetCounts struct {
Total int
Local int
Migrated int
Thumbnail DriveThumbnailCounts
Teaser DriveTeaserCounts
Fingerprint DriveFingerprintCounts
}
func Open(path string) (*Catalog, error) {
db, err := sql.Open("sqlite", path+"?_pragma=journal_mode(WAL)&_pragma=busy_timeout(5000)")
if err != nil {
@@ -1455,6 +1464,121 @@ func (c *Catalog) CountFingerprintsByDrive(ctx context.Context) (map[string]Driv
return out, nil
}
func (c *Catalog) CountCrawlerAssets(ctx context.Context, crawlerID string, prefixes []string) (CrawlerAssetCounts, error) {
var out CrawlerAssetCounts
crawlerID = strings.TrimSpace(crawlerID)
prefixes = cleanCrawlerIDPrefixes(prefixes)
if crawlerID == "" || len(prefixes) == 0 {
return out, nil
}
where := make([]string, 0, len(prefixes))
args := make([]any, 0, 2+len(prefixes))
args = append(args, crawlerID, crawlerID)
for range prefixes {
where = append(where, "id LIKE ? ESCAPE '\\'")
}
for _, prefix := range prefixes {
args = append(args, escapeSQLLike(prefix)+"%")
}
query := `SELECT
COUNT(*) AS total_count,
COUNT(CASE WHEN drive_id = ? THEN 1 END) AS local_count,
COUNT(CASE WHEN drive_id != ? THEN 1 END) AS migrated_count,
COUNT(CASE WHEN EXISTS (
SELECT 1 FROM videos AS asset_dup
WHERE ` + crawlerAssetEquivalentSQL("asset_dup", "videos") + `
AND COALESCE(asset_dup.thumbnail_url, '') != ''
) THEN 1 END) AS thumbnail_ready_count,
COUNT(CASE WHEN NOT EXISTS (
SELECT 1 FROM videos AS asset_dup
WHERE ` + crawlerAssetEquivalentSQL("asset_dup", "videos") + `
AND COALESCE(asset_dup.thumbnail_url, '') != ''
)
AND COALESCE(thumbnail_url, '') = ''
AND COALESCE(thumbnail_status, 'pending') NOT IN ('failed', 'skipped') THEN 1 END) AS thumbnail_pending_count,
COUNT(CASE WHEN NOT EXISTS (
SELECT 1 FROM videos AS asset_dup
WHERE ` + crawlerAssetEquivalentSQL("asset_dup", "videos") + `
AND COALESCE(asset_dup.thumbnail_url, '') != ''
)
AND COALESCE(thumbnail_url, '') = ''
AND COALESCE(thumbnail_status, 'pending') = 'failed' THEN 1 END) AS thumbnail_failed_count,
COUNT(CASE WHEN EXISTS (
SELECT 1 FROM videos AS asset_dup
WHERE ` + crawlerAssetEquivalentSQL("asset_dup", "videos") + `
AND COALESCE(asset_dup.preview_status, 'pending') = 'ready'
) THEN 1 END) AS teaser_ready_count,
COUNT(CASE WHEN NOT EXISTS (
SELECT 1 FROM videos AS asset_dup
WHERE ` + crawlerAssetEquivalentSQL("asset_dup", "videos") + `
AND COALESCE(asset_dup.preview_status, 'pending') = 'ready'
)
AND COALESCE(preview_status, 'pending') = 'pending' THEN 1 END) AS teaser_pending_count,
COUNT(CASE WHEN NOT EXISTS (
SELECT 1 FROM videos AS asset_dup
WHERE ` + crawlerAssetEquivalentSQL("asset_dup", "videos") + `
AND COALESCE(asset_dup.preview_status, 'pending') = 'ready'
)
AND COALESCE(preview_status, 'pending') = 'failed' THEN 1 END) AS teaser_failed_count,
COUNT(CASE WHEN COALESCE(sampled_sha256, '') != ''
OR COALESCE(fingerprint_status, 'pending') = 'ready' THEN 1 END) AS fingerprint_ready_count,
COUNT(CASE WHEN size_bytes > 0
AND COALESCE(sampled_sha256, '') = ''
AND COALESCE(fingerprint_status, 'pending') = 'pending' THEN 1 END) AS fingerprint_pending_count,
COUNT(CASE WHEN COALESCE(sampled_sha256, '') = ''
AND COALESCE(fingerprint_status, 'pending') = 'failed' THEN 1 END) AS fingerprint_failed_count
FROM videos
WHERE COALESCE(hidden, 0) = 0
AND (` + strings.Join(where, " OR ") + `)`
err := c.db.QueryRowContext(ctx, query, args...).Scan(
&out.Total,
&out.Local,
&out.Migrated,
&out.Thumbnail.Ready,
&out.Thumbnail.Pending,
&out.Thumbnail.Failed,
&out.Teaser.Ready,
&out.Teaser.Pending,
&out.Teaser.Failed,
&out.Fingerprint.Ready,
&out.Fingerprint.Pending,
&out.Fingerprint.Failed,
)
return out, err
}
func crawlerAssetEquivalentSQL(candidateAlias, sourceAlias string) string {
return fmt.Sprintf(`(%[1]s.id = %[2]s.id
OR (COALESCE(%[2]s.content_hash, '') != ''
AND %[1]s.content_hash = %[2]s.content_hash)
OR (%[2]s.size_bytes > 0
AND COALESCE(%[2]s.sampled_sha256, '') != ''
AND %[1]s.size_bytes = %[2]s.size_bytes
AND %[1]s.sampled_sha256 = %[2]s.sampled_sha256))`, candidateAlias, sourceAlias)
}
func cleanCrawlerIDPrefixes(prefixes []string) []string {
out := make([]string, 0, len(prefixes))
seen := map[string]bool{}
for _, prefix := range prefixes {
prefix = strings.TrimSpace(prefix)
if prefix == "" || seen[prefix] {
continue
}
seen[prefix] = true
out = append(out, prefix)
}
return out
}
func escapeSQLLike(raw string) string {
raw = strings.ReplaceAll(raw, `\`, `\\`)
raw = strings.ReplaceAll(raw, `%`, `\%`)
raw = strings.ReplaceAll(raw, `_`, `\_`)
return raw
}
func (c *Catalog) CountVideosNeedingFingerprint(ctx context.Context, driveID string) (int, error) {
var count int
err := c.db.QueryRowContext(ctx,
+22
View File
@@ -149,6 +149,28 @@ func (w *Worker) Status() TaskStatus {
return status
}
// WaitIdle blocks until the fingerprint queue is empty and no item is being processed.
func (w *Worker) WaitIdle(ctx context.Context) error {
if w == nil {
return nil
}
if w.queue.lengthExcluding("") == 0 {
return nil
}
ticker := time.NewTicker(200 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-ticker.C:
if w.queue.lengthExcluding("") == 0 {
return nil
}
}
}
}
func (w *Worker) processQueued(ctx context.Context, v *catalog.Video) {
defer w.queue.release(v.ID)
if w.Catalog == nil || w.Drive == nil || v == nil || v.ID == "" {
+231 -67
View File
@@ -81,7 +81,22 @@ type UploadResult struct {
Size int64
}
const spider91UploadDirName = "91 Spider"
const (
spider91UploadDirName = "91 Spider"
scriptCrawlerUploadRootDirName = "Script Crawlers"
)
type migrationPlan struct {
source Spider91LocalSource
row *catalog.Drive
sourceKinds []string
targetDriveID string
target uploadTarget
uploadDir string
keepLatestN int
requireAssetsReady bool
legacyBackfill bool
}
// pikpakAdapter / p115Adapter / p123Adapter / onedriveAdapter / googledriveAdapter 把具体 driver 包装成 uploadTarget。
//
@@ -369,56 +384,62 @@ func (m *Migrator) runOnce(ctx context.Context) {
log.Printf("[spider91migrate] captcha cooldown ended at %s, resuming migration", until.Format(time.RFC3339))
}
target, pp, err := m.resolveTarget()
if err != nil {
// 没目标就静默 —— 用户选择了本地保存,或还没配 115/PikPak drive
plans := m.migrationPlans(ctx)
if len(plans) == 0 {
// 没目标就静默 —— 用户选择了本地保存,或目标盘还没挂载
return
}
migrated := 0
for _, src := range m.spider91Drives(ctx) {
backfillTargets := map[string]uploadTarget{}
for _, plan := range plans {
if err := ctx.Err(); err != nil {
return
}
n, err := m.migrateDrive(ctx, src, target, pp)
n, err := m.migrateDrive(ctx, plan)
if err != nil {
log.Printf("[spider91migrate] drive=%s migrate batch error: %v", src.ID(), err)
log.Printf("[spider91migrate] drive=%s migrate batch error: %v", plan.source.ID(), err)
}
migrated += n
if active, _ := m.inCooldown(); active {
if migrated > 0 {
log.Printf("[spider91migrate] migrated %d video(s) to drive=%s", migrated, target)
log.Printf("[spider91migrate] migrated %d video(s)", migrated)
}
return
}
if plan.legacyBackfill {
backfillTargets[plan.targetDriveID] = plan.target
}
}
if migrated > 0 {
log.Printf("[spider91migrate] migrated %d video(s) to drive=%s", migrated, target)
log.Printf("[spider91migrate] migrated %d video(s)", migrated)
}
// 收尾:扫每个 spider91 drive 的本地目录,把 catalog 已经迁到别处但本地
// 收尾:扫每个本地爬虫 drive 的 videos 目录,把 catalog 已经迁到别处但本地
// 仍有残留的孤儿文件清掉。这是纯防御性兜底——正常路径下 migrateDrive
// 已经在迁移成功后立刻 CleanupSpider91Local,不会留孤儿。
for _, src := range m.spider91Drives(ctx) {
for _, plan := range plans {
if err := ctx.Err(); err != nil {
return
}
deleted, err := m.cleanupOldLocalVideos(ctx, src)
deleted, err := m.cleanupOldLocalVideos(ctx, plan)
if err != nil {
log.Printf("[spider91migrate] cleanup drive=%s: %v", src.ID(), err)
log.Printf("[spider91migrate] cleanup drive=%s: %v", plan.source.ID(), err)
}
if deleted > 0 {
log.Printf("[spider91migrate] cleanup drive=%s deleted %d orphan local file(s)", src.ID(), deleted)
log.Printf("[spider91migrate] cleanup drive=%s deleted %d orphan local file(s)", plan.source.ID(), deleted)
}
}
// 回填:把已迁移到 PikPak 的 spider91-* 视频里文件名仍是旧格式
// (比如刚迁完没改、或人工导入)的统一改成方案 B 期望的格式。
// 这一步幂等:已经是期望格式的不会再调 Rename。
if renamed, err := m.backfillFileNames(ctx, target, pp); err != nil {
log.Printf("[spider91migrate] backfill names: %v", err)
} else if renamed > 0 {
log.Printf("[spider91migrate] backfilled %d %s file name(s) to desired format", renamed, m.targetKindForLog())
for targetDriveID, pp := range backfillTargets {
if renamed, err := m.backfillFileNames(ctx, targetDriveID, pp); err != nil {
log.Printf("[spider91migrate] backfill names: %v", err)
} else if renamed > 0 {
log.Printf("[spider91migrate] backfilled %d %s file name(s) to desired format", renamed, pp.Kind())
}
}
}
@@ -446,9 +467,17 @@ func (m *Migrator) resolveTarget() (string, uploadTarget, error) {
return "", nil, errors.New("no target getter")
}
id := m.cfg.GetTargetDriveID()
return m.resolveTargetID(id)
}
func (m *Migrator) resolveTargetID(id string) (string, uploadTarget, error) {
id = strings.TrimSpace(id)
if id == "" {
return "", nil, errors.New("target drive not configured")
}
if m.cfg.Registry == nil {
return "", nil, errors.New("registry not configured")
}
d, ok := m.cfg.Registry.Get(id)
if !ok {
return "", nil, fmt.Errorf("target drive %q not in registry", id)
@@ -460,6 +489,100 @@ func (m *Migrator) resolveTarget() (string, uploadTarget, error) {
return id, t, nil
}
func (m *Migrator) migrationPlans(ctx context.Context) []migrationPlan {
if m == nil || m.cfg.Catalog == nil || m.cfg.Registry == nil {
return nil
}
all := m.cfg.Registry.All()
out := make([]migrationPlan, 0, len(all))
for _, d := range all {
if d == nil {
continue
}
src, ok := d.(Spider91LocalSource)
if !ok {
continue
}
row, err := m.cfg.Catalog.GetDrive(ctx, d.ID())
if (err != nil || row == nil) && d.Kind() == spider91.Kind {
row = &catalog.Drive{ID: d.ID(), Kind: spider91.Kind, RootID: "/"}
}
if row == nil {
continue
}
switch row.Kind {
case scriptcrawler.Kind:
targetID := strings.TrimSpace(row.Credentials["upload_drive_id"])
if targetID == "" {
continue
}
resolvedID, target, err := m.resolveTargetID(targetID)
if err != nil {
log.Printf("[spider91migrate] crawler=%s upload target=%q unavailable: %v", row.ID, targetID, err)
continue
}
out = append(out, migrationPlan{
source: src,
row: row,
sourceKinds: crawlerSourceKindsForRow(row),
targetDriveID: resolvedID,
target: target,
uploadDir: scriptCrawlerUploadDir(row.ID),
keepLatestN: 0,
requireAssetsReady: true,
})
case spider91.Kind:
if m.cfg.GetTargetDriveID == nil {
continue
}
targetID := strings.TrimSpace(m.cfg.GetTargetDriveID())
if targetID == "" {
continue
}
resolvedID, target, err := m.resolveTargetID(targetID)
if err != nil {
continue
}
out = append(out, migrationPlan{
source: src,
row: row,
sourceKinds: []string{spider91.Kind},
targetDriveID: resolvedID,
target: target,
uploadDir: spider91UploadDirName,
keepLatestN: m.cfg.KeepLatestN,
legacyBackfill: true,
})
}
}
return out
}
func crawlerSourceKindsForRow(d *catalog.Drive) []string {
kinds := []string{scriptcrawler.Kind}
if d != nil && strings.EqualFold(strings.TrimSpace(d.Credentials["builtin"]), spider91.Kind) {
kinds = append(kinds, spider91.Kind)
}
return kinds
}
func scriptCrawlerUploadDir(driveID string) string {
driveID = sanitizeUploadDirSegment(driveID)
if driveID == "" {
driveID = "crawler"
}
return scriptCrawlerUploadRootDirName + "/" + driveID
}
func sanitizeUploadDirSegment(raw string) string {
clean := sanitizeTitle(raw)
clean = strings.Trim(clean, "/")
if clean == "." || clean == ".." {
return ""
}
return clean
}
// spider91Drives 返回当前注册的所有 Spider91 来源本地爬虫 driver。
func (m *Migrator) spider91Drives(ctx context.Context) []Spider91LocalSource {
all := m.cfg.Registry.All()
@@ -495,18 +618,13 @@ func (m *Migrator) isSpider91SourceDrive(ctx context.Context, d drives.Drive) bo
return row.Kind == scriptcrawler.Kind && strings.EqualFold(strings.TrimSpace(row.Credentials["builtin"]), spider91.Kind)
}
// migrateDrive 对单个 spider91 drive 跑一批迁移;返回成功迁移的条数。
//
// 策略(与"本地缓存最新 N 个"语义一致):
// - 列出 spider91 drive 本地 videos/ 目录所有 mp4 文件,按 mtime 降序排
// - 跳过最新 KeepLatestN 个:这些是用户希望保留在本地的最新爬取
// - 对剩下的(更旧)逐个处理:
// - 还没迁移(drive_id 仍是 src.ID())→ 上传到目标盘 + 改 catalog + 删本地
// - 已经迁移过但本地还有残留 → 仅删本地(兜底)
//
// KeepLatestN < 0 时不保护任何本地文件,全部尝试迁移(旧行为,主要给测试用)。
func (m *Migrator) migrateDrive(ctx context.Context, src Spider91LocalSource, targetDriveID string, pp uploadTarget) (int, error) {
keepN := m.cfg.KeepLatestN
// migrateDrive 对单个本地爬虫 drive 跑一批迁移;返回成功迁移的条数。
func (m *Migrator) migrateDrive(ctx context.Context, plan migrationPlan) (int, error) {
src := plan.source
if src == nil || plan.target == nil || plan.targetDriveID == "" {
return 0, nil
}
keepN := plan.keepLatestN
if keepN < 0 {
keepN = 0
}
@@ -536,17 +654,14 @@ func (m *Migrator) migrateDrive(ctx context.Context, src Spider91LocalSource, ta
files = append(files, localFile{name: e.Name(), modTime: info.ModTime()})
}
// 本地数量没超过 keepN 时不动任何文件 —— 这条是 KeepLatestN 语义的核心
if m.cfg.KeepLatestN >= 0 && len(files) <= keepN {
if plan.keepLatestN >= 0 && len(files) <= keepN {
return 0, nil
}
// 按 mtime 降序:最新的排前面,保留前 keepN 个
sort.Slice(files, func(i, j int) bool { return files[i].modTime.After(files[j].modTime) })
// 候选 = 跳过最新 keepN 个之外的(更旧的)。KeepLatestN < 0 时 candidates=files。
skip := keepN
if m.cfg.KeepLatestN < 0 {
if plan.keepLatestN < 0 {
skip = 0
}
candidates := files
@@ -556,6 +671,17 @@ func (m *Migrator) migrateDrive(ctx context.Context, src Spider91LocalSource, ta
return 0, nil
}
localVideos, err := m.cfg.Catalog.ListVideosByDriveID(ctx, src.ID(), 100000)
if err != nil {
return 0, fmt.Errorf("list local catalog videos: %w", err)
}
byFileID := make(map[string]*catalog.Video, len(localVideos))
for _, v := range localVideos {
if v != nil && strings.TrimSpace(v.FileID) != "" {
byFileID[v.FileID] = v
}
}
migrated := 0
for _, f := range candidates {
if err := ctx.Err(); err != nil {
@@ -565,21 +691,21 @@ func (m *Migrator) migrateDrive(ctx context.Context, src Spider91LocalSource, ta
break
}
viewkey := stripExt(f.name)
videoID := "spider91-" + src.ID() + "-" + viewkey
v, err := m.cfg.Catalog.GetVideo(ctx, videoID)
if err != nil || v == nil {
// 找不到 catalog 行:保险起见保留本地,让管理员可见
v := m.findVideoForLocalFile(ctx, plan, f.name, byFileID)
if v == nil {
continue
}
if v.DriveID != src.ID() {
// catalog 已迁移到别的 drive,但本地还有残留 → 兜底删本地
CleanupSpider91Local(src, v.FileID)
CleanupSpider91Local(src, f.name)
continue
}
ok, err := m.migrateOne(ctx, v, src, targetDriveID, pp)
if plan.requireAssetsReady && !crawlerVideoAssetsReady(v) {
continue
}
ok, err := m.migrateOne(ctx, v, plan)
if err != nil {
log.Printf("[spider91migrate] %s: %v", v.ID, err)
// captcha 错误(4002 / 9)说明 PikPak 当前正拒绝我们;继续在
@@ -603,10 +729,39 @@ func (m *Migrator) migrateDrive(ctx context.Context, src Spider91LocalSource, ta
return migrated, nil
}
// migrateOne 把单条 spider91 视频上传到目标盘并改写 catalog。
func (m *Migrator) findVideoForLocalFile(ctx context.Context, plan migrationPlan, localFile string, byFileID map[string]*catalog.Video) *catalog.Video {
if v := byFileID[localFile]; v != nil {
return v
}
sourceID := stripExt(localFile)
driveID := ""
if plan.source != nil {
driveID = plan.source.ID()
}
for _, kind := range plan.sourceKinds {
id := scriptcrawler.BuildVideoIDForKind(kind, driveID, sourceID)
v, err := m.cfg.Catalog.GetVideo(ctx, id)
if err == nil && v != nil {
return v
}
}
return nil
}
func crawlerVideoAssetsReady(v *catalog.Video) bool {
if v == nil {
return false
}
return strings.EqualFold(strings.TrimSpace(v.PreviewStatus), "ready") &&
strings.EqualFold(strings.TrimSpace(v.FingerprintStatus), "ready")
}
// migrateOne 把单条本地爬虫视频上传到目标盘并改写 catalog。
// 返回 (true, nil) 表示真的迁了一条;(false, nil) 表示跳过(本地文件已不在等);
// (false, err) 表示真出错。
func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, src Spider91LocalSource, targetDriveID string, pp uploadTarget) (bool, error) {
func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, plan migrationPlan) (bool, error) {
src := plan.source
pp := plan.target
path, err := src.VideoPath(v.FileID)
if err != nil {
return false, fmt.Errorf("resolve local path: %w", err)
@@ -630,20 +785,11 @@ func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, src Spider9
}
defer f.Close()
// 上传到目标盘 rootID 下的固定 "91 Spider" 子目录。若用户把目标盘 rootID
// 配成某个自定义目录,这里会在该自定义目录下查找/创建 "91 Spider"。
// 上传名走 desiredPikPakName 算出来的方案 B 格式:
//
// <sanitized title>-<viewkey 后 8 位>.<ext>
//
// 这样网盘 Web 端列出来的文件名能直接看出是哪个视频,
// 又用 viewkey 后 8 位避免同标题撞名。所有目标盘共用同一格式,
// 简化前端 / catalog 的认知。
parent, err := pp.EnsureDir(ctx, spider91UploadDirName)
parent, err := pp.EnsureDir(ctx, plan.uploadDir)
if err != nil {
return false, fmt.Errorf("%s ensure %q dir: %w", pp.Kind(), spider91UploadDirName, err)
return false, fmt.Errorf("%s ensure %q dir: %w", pp.Kind(), plan.uploadDir, err)
}
uploadName := desiredPikPakName(v.Title, extractViewKey(v.ID), v.Ext)
uploadName := desiredPikPakName(v.Title, sourceIDForUploadName(v, plan), v.Ext)
res, err := pp.UploadAndReportHash(ctx, parent, uploadName, f, info.Size())
if err != nil {
return false, fmt.Errorf("%s upload: %w", pp.Kind(), err)
@@ -653,7 +799,7 @@ func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, src Spider9
}
// 事务性改写 catalog 行:drive_id / file_id / content_hash
if err := m.cfg.Catalog.MigrateVideoToDrive(ctx, v.ID, targetDriveID, res.FileID, res.Hash); err != nil {
if err := m.cfg.Catalog.MigrateVideoToDrive(ctx, v.ID, plan.targetDriveID, res.FileID, res.Hash); err != nil {
return false, fmt.Errorf("catalog migrate: %w", err)
}
m.preserveCrawledThumbnail(ctx, src, v)
@@ -665,10 +811,29 @@ func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, src Spider9
// 删除本地 mp4 和源 thumb(公共 /p/thumb 副本已在 preserveCrawledThumbnail 中保留)。
CleanupSpider91Local(src, v.FileID)
log.Printf("[spider91migrate] %s migrated to drive=%s(kind=%s) file=%s name=%q", v.ID, targetDriveID, pp.Kind(), res.FileID, uploadName)
log.Printf("[spider91migrate] %s migrated to drive=%s(kind=%s) file=%s name=%q", v.ID, plan.targetDriveID, pp.Kind(), res.FileID, uploadName)
return true, nil
}
func sourceIDForUploadName(v *catalog.Video, plan migrationPlan) string {
if v == nil {
return ""
}
if plan.legacyBackfill {
return extractViewKey(v.ID)
}
for _, kind := range plan.sourceKinds {
prefix := kind + "-" + plan.source.ID() + "-"
if strings.HasPrefix(v.ID, prefix) {
return strings.TrimPrefix(v.ID, prefix)
}
}
if v.FileID != "" {
return stripExt(v.FileID)
}
return extractViewKey(v.ID)
}
func (m *Migrator) preserveCrawledThumbnail(ctx context.Context, src Spider91LocalSource, v *catalog.Video) {
if m == nil || m.cfg.Catalog == nil || src == nil || v == nil || v.ID == "" || v.FileID == "" {
return
@@ -791,7 +956,11 @@ func stripExt(name string) string {
// 找到孤儿。
//
// 返回实际删除的文件个数。
func (m *Migrator) cleanupOldLocalVideos(ctx context.Context, src Spider91LocalSource) (int, error) {
func (m *Migrator) cleanupOldLocalVideos(ctx context.Context, plan migrationPlan) (int, error) {
src := plan.source
if src == nil {
return 0, nil
}
entries, err := os.ReadDir(src.VideosDir())
if err != nil {
if os.IsNotExist(err) {
@@ -808,18 +977,13 @@ func (m *Migrator) cleanupOldLocalVideos(ctx context.Context, src Spider91LocalS
if e.IsDir() {
continue
}
viewkey := stripExt(e.Name())
videoID := "spider91-" + src.ID() + "-" + viewkey
v, err := m.cfg.Catalog.GetVideo(ctx, videoID)
if err != nil || v == nil {
// 找不到 catalog 行:保险起见保留,等管理员处理
v := m.findVideoForLocalFile(ctx, plan, e.Name(), nil)
if v == nil {
continue
}
if v.DriveID == src.ID() {
// 还没迁移,归 migrateDrive 管,不在这里动
continue
}
// 已迁移到别的 drive 但本地还有 → 删
path, perr := src.VideoPath(e.Name())
if perr != nil {
continue
@@ -344,6 +344,81 @@ func writeSpider91Video(t *testing.T, cat *catalog.Catalog, d *spider91.Driver,
return id
}
func setupScriptCrawler(t *testing.T, id string) *scriptcrawler.Driver {
t.Helper()
d := scriptcrawler.New(scriptcrawler.Config{ID: id, RootDir: t.TempDir()})
if err := d.Init(context.Background()); err != nil {
t.Fatalf("scriptcrawler init: %v", err)
}
return d
}
func seedScriptCrawlerDrive(t *testing.T, cat *catalog.Catalog, d *scriptcrawler.Driver, uploadDriveID string) {
t.Helper()
if err := cat.UpsertDrive(context.Background(), &catalog.Drive{
ID: d.ID(),
Kind: scriptcrawler.Kind,
Name: "Script Crawler",
RootID: "/",
Credentials: map[string]string{
"script_path": "/tmp/crawler.py",
"upload_drive_id": uploadDriveID,
},
}); err != nil {
t.Fatalf("seed scriptcrawler drive: %v", err)
}
}
func writeScriptCrawlerVideo(t *testing.T, cat *catalog.Catalog, d *scriptcrawler.Driver, sourceID, ext string, content []byte, readyAssets bool) string {
t.Helper()
fileID := sourceID + ext
path, err := d.VideoPath(fileID)
if err != nil {
t.Fatalf("video path: %v", err)
}
if err := os.WriteFile(path, content, 0o644); err != nil {
t.Fatalf("write video: %v", err)
}
thumbPath, err := d.ThumbPath(sourceID + ".jpg")
if err != nil {
t.Fatalf("thumb path: %v", err)
}
if err := os.WriteFile(thumbPath, []byte("thumb"), 0o644); err != nil {
t.Fatalf("write thumb: %v", err)
}
now := time.Now()
id := scriptcrawler.BuildVideoID(d.ID(), sourceID)
previewStatus := "pending"
if readyAssets {
previewStatus = "ready"
}
v := &catalog.Video{
ID: id,
DriveID: d.ID(),
FileID: fileID,
FileName: fileID,
Title: "Crawler " + sourceID,
Author: "tester",
Ext: strings.TrimPrefix(ext, "."),
Quality: "HD",
Size: int64(len(content)),
ThumbnailURL: "/p/thumb/" + id,
PreviewStatus: previewStatus,
PublishedAt: now,
CreatedAt: now,
UpdatedAt: now,
}
if err := cat.UpsertVideo(context.Background(), v); err != nil {
t.Fatalf("upsert scriptcrawler video: %v", err)
}
if readyAssets {
if err := cat.UpdateVideoFingerprint(context.Background(), id, "sampled-"+sourceID, "ready", ""); err != nil {
t.Fatalf("mark fingerprint ready: %v", err)
}
}
return id
}
func TestRunOnceMigratesSpider91VideosAndCleansLocalFiles(t *testing.T) {
cat := setupCatalog(t)
src, _ := setupSpider91(t)
@@ -419,6 +494,98 @@ func TestRunOnceMigratesSpider91VideosAndCleansLocalFiles(t *testing.T) {
}
}
func TestRunOnceMigratesReadyScriptCrawlerVideoToConfiguredUploadDrive(t *testing.T) {
cat := setupCatalog(t)
src := setupScriptCrawler(t, "crawler-alpha")
pp := newFakePikPak("pikpak-target", "pikpak-root-id")
seedScriptCrawlerDrive(t, cat, src, pp.ID())
reg := newFakeRegistry()
reg.Add(src)
reg.Add(pp)
id := writeScriptCrawlerVideo(t, cat, src, "source-with-dash-001", ".mp4", []byte("script video bytes"), true)
commonThumbDir := t.TempDir()
m := New(Config{
Catalog: cat,
Registry: reg,
CommonThumbDir: commonThumbDir,
})
m.runOnce(context.Background())
if pp.uploadCalls != 1 {
t.Fatalf("upload calls = %d, want 1", pp.uploadCalls)
}
wantDir := "Script Crawlers/crawler-alpha"
if len(pp.ensureCalls) != 1 || pp.ensureCalls[0] != wantDir {
t.Fatalf("ensure calls = %#v, want %q", pp.ensureCalls, wantDir)
}
wantName := desiredPikPakName("Crawler source-with-dash-001", "source-with-dash-001", "mp4")
if gotParent := pp.gotParents[wantName]; gotParent != "pikpak-root-id/"+wantDir {
t.Fatalf("upload parent = %q, want root/%s", gotParent, wantDir)
}
got, err := cat.GetVideo(context.Background(), id)
if err != nil {
t.Fatalf("get migrated video: %v", err)
}
if got.DriveID != pp.ID() {
t.Fatalf("drive_id = %q, want %q", got.DriveID, pp.ID())
}
if got.FileID != "remote-"+wantName {
t.Fatalf("file_id = %q, want remote upload id", got.FileID)
}
if got.FileName != wantName {
t.Fatalf("file_name = %q, want %q", got.FileName, wantName)
}
if got.PreviewStatus != "ready" || got.FingerprintStatus != "ready" || got.SampledSHA256 == "" {
t.Fatalf("generated assets not preserved after migration: preview=%q fingerprint=%q sampled=%q", got.PreviewStatus, got.FingerprintStatus, got.SampledSHA256)
}
videoPath, _ := src.VideoPath("source-with-dash-001.mp4")
if _, err := os.Stat(videoPath); !os.IsNotExist(err) {
t.Fatalf("local scriptcrawler video still exists or stat error %v", err)
}
thumbPath, _ := src.ThumbPath("source-with-dash-001.jpg")
if _, err := os.Stat(thumbPath); !os.IsNotExist(err) {
t.Fatalf("local scriptcrawler thumb still exists or stat error %v", err)
}
commonThumbPath := filepath.Join(commonThumbDir, id+".jpg")
if data, err := os.ReadFile(commonThumbPath); err != nil || string(data) != "thumb" {
t.Fatalf("common thumb = %q, %v; want copied crawled thumb", string(data), err)
}
}
func TestRunOnceSkipsScriptCrawlerVideoUntilPreviewAndFingerprintReady(t *testing.T) {
cat := setupCatalog(t)
src := setupScriptCrawler(t, "crawler-beta")
pp := newFakePikPak("pikpak-target", "pikpak-root-id")
seedScriptCrawlerDrive(t, cat, src, pp.ID())
reg := newFakeRegistry()
reg.Add(src)
reg.Add(pp)
id := writeScriptCrawlerVideo(t, cat, src, "pending-assets", ".mp4", []byte("script video bytes"), false)
m := New(Config{Catalog: cat, Registry: reg})
m.runOnce(context.Background())
if pp.uploadCalls != 0 {
t.Fatalf("upload calls = %d, want 0 while generated assets are pending", pp.uploadCalls)
}
got, err := cat.GetVideo(context.Background(), id)
if err != nil {
t.Fatalf("get video: %v", err)
}
if got.DriveID != src.ID() {
t.Fatalf("drive_id = %q, want local crawler drive %q", got.DriveID, src.ID())
}
videoPath, _ := src.VideoPath("pending-assets.mp4")
if _, err := os.Stat(videoPath); err != nil {
t.Fatalf("local video should remain while assets pending: %v", err)
}
}
func TestRunOnceSkipsWhenLocalFileMissing(t *testing.T) {
cat := setupCatalog(t)
src, _ := setupSpider91(t)
@@ -578,7 +745,10 @@ func TestCleanupRemovesAllAlreadyMigratedOrphans(t *testing.T) {
GetTargetDriveID: func() string { return pp.ID() },
})
deleted, err := m.cleanupOldLocalVideos(context.Background(), src)
deleted, err := m.cleanupOldLocalVideos(context.Background(), migrationPlan{
source: src,
sourceKinds: []string{spider91.Kind},
})
if err != nil {
t.Fatalf("cleanup: %v", err)
}
@@ -608,10 +778,14 @@ func TestRunOnceMigratesBuiltInSpider91ScriptCrawlerSource(t *testing.T) {
t.Fatalf("scriptcrawler init: %v", err)
}
if err := cat.UpsertDrive(ctx, &catalog.Drive{
ID: src.ID(),
Kind: scriptcrawler.Kind,
Name: "Built-in Spider91",
Credentials: map[string]string{"builtin": "spider91"},
ID: src.ID(),
Kind: scriptcrawler.Kind,
Name: "Built-in Spider91",
Credentials: map[string]string{
"builtin": "spider91",
"script_path": "/tmp/spider91.py",
"upload_drive_id": "pikpak-target",
},
}); err != nil {
t.Fatalf("upsert source drive: %v", err)
}
@@ -647,13 +821,16 @@ func TestRunOnceMigratesBuiltInSpider91ScriptCrawlerSource(t *testing.T) {
Ext: "mp4",
Quality: "HD",
Size: int64(len("scriptcrawler spider91 video")),
PreviewStatus: "pending",
PreviewStatus: "ready",
PublishedAt: now,
CreatedAt: now,
UpdatedAt: now,
}); err != nil {
t.Fatalf("upsert video: %v", err)
}
if err := cat.UpdateVideoFingerprint(ctx, id, "sampled-vk-script", "ready", ""); err != nil {
t.Fatalf("mark fingerprint ready: %v", err)
}
m := New(Config{
Catalog: cat,
+76 -43
View File
@@ -78,49 +78,79 @@ export function AdminLayout() {
<span className="admin-sidebar__brand-text">91</span>
</div>
<nav className="admin-nav">
<NavLink to="/" className="admin-nav__link">
<Home size={16} />
</NavLink>
<NavLink
to="/admin/drives"
className={({ isActive }) =>
`admin-nav__link ${isActive ? "is-active" : ""}`
}
>
<HardDrive size={16} />
</NavLink>
<NavLink
to="/admin/crawlers"
className={({ isActive }) =>
`admin-nav__link ${isActive ? "is-active" : ""}`
}
>
<SpiderIcon size={16} />
</NavLink>
<NavLink
to="/admin/videos"
className={({ isActive }) =>
`admin-nav__link ${isActive ? "is-active" : ""}`
}
>
<Film size={16} />
</NavLink>
<NavLink
to="/admin/tags"
className={({ isActive }) =>
`admin-nav__link ${isActive ? "is-active" : ""}`
}
>
<Tags size={16} />
</NavLink>
<NavLink
to="/admin/theme"
className={({ isActive }) =>
`admin-nav__link ${isActive ? "is-active" : ""}`
}
>
<Palette size={16} />
</NavLink>
<div className="admin-nav__group admin-nav__group--home">
<span className="admin-nav__group-label"></span>
<NavLink to="/" className="admin-nav__link">
<span className="admin-nav__icon"><Home size={16} /></span>
<span className="admin-nav__text">
<span className="admin-nav__title"></span>
</span>
</NavLink>
</div>
<div className="admin-nav__group">
<span className="admin-nav__group-label"></span>
<NavLink
to="/admin/drives"
className={({ isActive }) =>
`admin-nav__link ${isActive ? "is-active" : ""}`
}
>
<span className="admin-nav__icon"><HardDrive size={16} /></span>
<span className="admin-nav__text">
<span className="admin-nav__title"></span>
</span>
</NavLink>
<NavLink
to="/admin/crawlers"
className={({ isActive }) =>
`admin-nav__link ${isActive ? "is-active" : ""}`
}
>
<span className="admin-nav__icon"><SpiderIcon size={16} /></span>
<span className="admin-nav__text">
<span className="admin-nav__title"></span>
</span>
</NavLink>
</div>
<div className="admin-nav__group">
<span className="admin-nav__group-label"></span>
<NavLink
to="/admin/videos"
className={({ isActive }) =>
`admin-nav__link ${isActive ? "is-active" : ""}`
}
>
<span className="admin-nav__icon"><Film size={16} /></span>
<span className="admin-nav__text">
<span className="admin-nav__title"></span>
</span>
</NavLink>
<NavLink
to="/admin/tags"
className={({ isActive }) =>
`admin-nav__link ${isActive ? "is-active" : ""}`
}
>
<span className="admin-nav__icon"><Tags size={16} /></span>
<span className="admin-nav__text">
<span className="admin-nav__title"></span>
</span>
</NavLink>
</div>
<div className="admin-nav__group">
<span className="admin-nav__group-label"></span>
<NavLink
to="/admin/theme"
className={({ isActive }) =>
`admin-nav__link ${isActive ? "is-active" : ""}`
}
>
<span className="admin-nav__icon"><Palette size={16} /></span>
<span className="admin-nav__text">
<span className="admin-nav__title"></span>
</span>
</NavLink>
</div>
</nav>
<div className="admin-sidebar__footer">
<button
@@ -148,6 +178,9 @@ export function AdminLayout() {
<div className="admin-sidebar__mobile-overlay" onClick={() => setMobileMenuOpen(false)} />
)}
<div className={`admin-sidebar__mobile-panel${mobileMenuOpen ? " is-open" : ""}`}>
<NavLink to="/" className="admin-sidebar__home" onClick={() => setMobileMenuOpen(false)}>
<Home size={14} />
</NavLink>
<button
className="admin-sidebar__check-update"
onClick={() => { handleCheckUpdate(); setMobileMenuOpen(false); }}
+616 -432
View File
File diff suppressed because it is too large Load Diff
+5
View File
@@ -200,6 +200,7 @@ export type AdminCrawler = {
scriptPath: string;
proxy?: string;
targetNew?: string;
uploadDriveId?: string;
lastCrawlAt?: number;
scanGenerationStatus?: DriveGenerationStatus;
thumbnailGenerationStatus?: DriveGenerationStatus;
@@ -214,6 +215,9 @@ export type AdminCrawler = {
fingerprintReadyCount: number;
fingerprintPendingCount: number;
fingerprintFailedCount: number;
totalCrawledCount: number;
localVideoCount: number;
migratedVideoCount: number;
};
export type UpsertCrawlerInput = {
@@ -221,6 +225,7 @@ export type UpsertCrawlerInput = {
scriptPath: string;
proxy?: string;
targetNew?: string;
uploadDriveId?: string;
};
export type ImportCrawlerScriptResult = {
+509 -300
View File
File diff suppressed because it is too large Load Diff
+14 -6
View File
@@ -205,16 +205,19 @@ test("drive type selector keeps primary source order", () => {
test("crawler management is a separate admin section", () => {
assert.match(adminLayoutSource, /to="\/admin\/crawlers"/);
assert.match(adminLayoutSource, /> 爬虫管理/);
assert.match(adminLayoutSource, /SpiderIcon size=\{16\} \/> 爬虫管理/);
assert.match(adminLayoutSource, /admin-nav__title">爬虫管理/);
assert.match(adminLayoutSource, /admin-nav__icon"><SpiderIcon size=\{16\} \/>/);
assert.match(appSource, /path="crawlers" element=\{<CrawlersPage \/>/);
assert.match(crawlerPageSource, /export function CrawlersPage/);
assert.match(crawlerPageSource, /SpiderIcon/);
assert.match(crawlerPageSource, /添加爬虫/);
assert.match(crawlerPageSource, /返回列表/);
assert.match(crawlerPageSource, /setMode\("detail"\)/);
assert.match(crawlerPageSource, /setMode\("list"\)/);
// 新设计:列表 + Modal 三步编辑器,删除确认走 ConfirmModal,任务进行中自动轮询
assert.match(crawlerPageSource, /CrawlerEditorModal/);
assert.match(crawlerPageSource, /ConfirmModal/);
assert.doesNotMatch(crawlerPageSource, /window\.confirm/);
assert.match(crawlerPageSource, /POLL_INTERVAL_MS/);
assert.match(crawlerPageSource, /api\.listCrawlers/);
assert.match(crawlerPageSource, /api\.listDrives/);
assert.match(crawlerPageSource, /api\.upsertCrawler/);
assert.match(crawlerPageSource, /api\.runCrawler/);
assert.match(crawlerPageSource, /api\.stopCrawlerTasks/);
@@ -226,11 +229,15 @@ test("crawler management is a separate admin section", () => {
assert.match(crawlerPageSource, /链接导入/);
assert.match(crawlerPageSource, /测试脚本/);
assert.match(crawlerPageSource, /测试通过/);
assert.match(crawlerPageSource, /Spider91UploadTargetField/);
assert.match(crawlerPageSource, /uploadDriveId/);
assert.match(crawlerPageSource, /UPLOAD_TARGET_KINDS/);
assert.doesNotMatch(crawlerPageSource, /新建脚本/);
assert.doesNotMatch(crawlerPageSource, /爬虫 ID/);
assert.doesNotMatch(crawlerPageSource, /crawler-id/);
assert.doesNotMatch(crawlerPageSource, /crawler-name/);
assert.doesNotMatch(crawlerPageSource, /脚本路径/);
// 脚本路径只读展示,不允许手动填写
assert.doesNotMatch(crawlerPageSource, /crawler-script-path/);
assert.doesNotMatch(crawlerPageSource, /Python 解释器/);
assert.doesNotMatch(crawlerPageSource, /自定义配置 JSON/);
assert.doesNotMatch(crawlerPageSource, /Bot/);
@@ -238,6 +245,7 @@ test("crawler management is a separate admin section", () => {
assert.doesNotMatch(crawlerPageSource, /builtin/);
assert.doesNotMatch(crawlerPageSource, /内置 91/);
assert.match(apiSource, /type AdminCrawler/);
assert.match(apiSource, /uploadDriveId\?: string/);
assert.match(apiSource, /"\/crawlers"/);
assert.match(apiSource, /"\/crawlers\/import-file"/);
assert.match(apiSource, /"\/crawlers\/import-url"/);