mirror of
https://github.com/nianzhibai/91.git
synced 2026-06-15 08:45:41 +08:00
Improve crawler asset stats and admin navigation
- Count crawler assets by crawler source ID prefix after cloud migration - Add crawler API totals for cumulative, local, and migrated videos - Let crawler thumbnail and preview readiness inherit equivalent canonical videos - Show cumulative crawl data in crawler management cards - Remove low-value expanded crawler metadata fields from the card body - Move return-to-site into the main admin navigation with grouped sections - Rename the content admin group to management and adjust footer icon sizing - Update backend and frontend tests for crawler/admin behavior
This commit is contained in:
+190
-14
@@ -164,7 +164,11 @@ func main() {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return app.attachDrive(ctx, d)
|
||||
if err := app.attachDrive(ctx, d); err != nil {
|
||||
return err
|
||||
}
|
||||
app.scheduleCrawlerUploadMigration(ctx, driveID)
|
||||
return nil
|
||||
},
|
||||
OnDriveDeleteCleanup: func(cleanupCtx context.Context, driveID string) (int, error) {
|
||||
return app.cleanupDriveVideosForDelete(cleanupCtx, driveID)
|
||||
@@ -355,6 +359,10 @@ type App struct {
|
||||
// reconcile 和扫盘结束同时为同一批 pending 视频启动多个长时间入队 goroutine。
|
||||
fingerprintQueueMu sync.Mutex
|
||||
fingerprintQueueing map[string]bool
|
||||
|
||||
// crawlerUploadRunning 去重"保存上传目标后检查本地未上传文件"的后台任务。
|
||||
crawlerUploadMu sync.Mutex
|
||||
crawlerUploadRunning map[string]bool
|
||||
}
|
||||
|
||||
type driveScanProgress struct {
|
||||
@@ -2419,22 +2427,26 @@ func (a *App) listSpider91DriveIDs(ctx context.Context) []string {
|
||||
return out
|
||||
}
|
||||
|
||||
// waitAllPreviewQueuesIdle 阻塞直到所有 drive 的封面 worker 和预览视频 worker
|
||||
// waitAllPreviewQueuesIdle 阻塞直到所有 drive 的封面、预览视频和指纹 worker
|
||||
// 队列都为空且无 in-flight 任务。
|
||||
//
|
||||
// 顺序:先等所有 thumb worker,再等所有预览视频。两个队列生成时互不等待;
|
||||
// nightly 只在 phase 边界统一等待它们都 drain。
|
||||
// 顺序:先等所有 thumb worker,再等预览视频,最后等指纹。队列生成时互不等待;
|
||||
// nightly 只在 phase 边界统一等待它们都 drain,保证爬虫视频迁移前本地资产已产出。
|
||||
// 若 ctx 在等待中被取消(软超时 / shutdown),立即返回 ctx.Err。
|
||||
func (a *App) waitAllPreviewQueuesIdle(ctx context.Context) error {
|
||||
a.mu.Lock()
|
||||
thumbWorkers := make([]*preview.ThumbWorker, 0, len(a.thumbWorkers))
|
||||
previewWorkers := make([]*preview.Worker, 0, len(a.workers))
|
||||
fingerprintWorkers := make([]*fingerprint.Worker, 0, len(a.fingerprintWorkers))
|
||||
for _, w := range a.thumbWorkers {
|
||||
thumbWorkers = append(thumbWorkers, w)
|
||||
}
|
||||
for _, w := range a.workers {
|
||||
previewWorkers = append(previewWorkers, w)
|
||||
}
|
||||
for _, w := range a.fingerprintWorkers {
|
||||
fingerprintWorkers = append(fingerprintWorkers, w)
|
||||
}
|
||||
a.mu.Unlock()
|
||||
|
||||
for _, w := range thumbWorkers {
|
||||
@@ -2447,9 +2459,65 @@ func (a *App) waitAllPreviewQueuesIdle(ctx context.Context) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := a.waitFingerprintQueueingIdle(ctx, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, w := range fingerprintWorkers {
|
||||
if err := w.WaitIdle(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *App) waitDriveGenerationQueuesIdle(ctx context.Context, driveID string) error {
|
||||
a.mu.Lock()
|
||||
thumbWorker := a.thumbWorkers[driveID]
|
||||
previewWorker := a.workers[driveID]
|
||||
fingerprintWorker := a.fingerprintWorkers[driveID]
|
||||
a.mu.Unlock()
|
||||
if err := thumbWorker.WaitIdle(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := previewWorker.WaitIdle(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := a.waitFingerprintQueueingIdle(ctx, driveID); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := fingerprintWorker.WaitIdle(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *App) waitFingerprintQueueingIdle(ctx context.Context, driveID string) error {
|
||||
if !a.fingerprintQueueingBusy(driveID) {
|
||||
return nil
|
||||
}
|
||||
ticker := time.NewTicker(200 * time.Millisecond)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-ticker.C:
|
||||
if !a.fingerprintQueueingBusy(driveID) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (a *App) fingerprintQueueingBusy(driveID string) bool {
|
||||
a.fingerprintQueueMu.Lock()
|
||||
defer a.fingerprintQueueMu.Unlock()
|
||||
if driveID != "" {
|
||||
return a.fingerprintQueueing[driveID]
|
||||
}
|
||||
return len(a.fingerprintQueueing) > 0
|
||||
}
|
||||
|
||||
func shouldScanDrive(d drives.Drive) bool {
|
||||
if d == nil || d.ID() == localupload.DriveID {
|
||||
return false
|
||||
@@ -2502,7 +2570,9 @@ func (a *App) scheduleScriptCrawlerCrawl(ctx context.Context, driveID string) bo
|
||||
a.endDriveScanOrCrawl(driveID)
|
||||
done()
|
||||
}()
|
||||
a.runScriptCrawlerCrawlWithTaskContext(taskCtx, driveID)
|
||||
if a.runScriptCrawlerCrawlWithTaskContext(taskCtx, driveID) {
|
||||
a.runCrawlerMigrationAfterManualCrawl(taskCtx, driveID)
|
||||
}
|
||||
}()
|
||||
return true
|
||||
}
|
||||
@@ -2606,30 +2676,136 @@ func (a *App) runScriptCrawlerCrawlWithTaskContext(ctx context.Context, driveID
|
||||
}
|
||||
|
||||
func (a *App) runSpider91MigrationAfterManualCrawl(ctx context.Context, driveID string) {
|
||||
a.runCrawlerMigrationAfterManualCrawl(ctx, driveID)
|
||||
}
|
||||
|
||||
func (a *App) scheduleCrawlerUploadMigration(ctx context.Context, driveID string) bool {
|
||||
driveID = strings.TrimSpace(driveID)
|
||||
if driveID == "" || a == nil || a.cat == nil {
|
||||
return false
|
||||
}
|
||||
d, err := a.cat.GetDrive(ctx, driveID)
|
||||
if err != nil || d == nil || d.Kind != scriptcrawler.Kind || strings.TrimSpace(d.Credentials["upload_drive_id"]) == "" {
|
||||
return false
|
||||
}
|
||||
if a.spider91Migrator == nil {
|
||||
log.Printf("[scriptcrawler] drive=%s skip saved upload migration: migrator not configured", driveID)
|
||||
return false
|
||||
}
|
||||
|
||||
a.crawlerUploadMu.Lock()
|
||||
if a.crawlerUploadRunning == nil {
|
||||
a.crawlerUploadRunning = make(map[string]bool)
|
||||
}
|
||||
if a.crawlerUploadRunning[driveID] {
|
||||
a.crawlerUploadMu.Unlock()
|
||||
log.Printf("[scriptcrawler] drive=%s saved upload migration already running", driveID)
|
||||
return false
|
||||
}
|
||||
a.crawlerUploadRunning[driveID] = true
|
||||
a.crawlerUploadMu.Unlock()
|
||||
|
||||
taskCtx, done := a.registerDriveTaskContext(ctx, driveID)
|
||||
go func() {
|
||||
defer func() {
|
||||
done()
|
||||
a.crawlerUploadMu.Lock()
|
||||
delete(a.crawlerUploadRunning, driveID)
|
||||
a.crawlerUploadMu.Unlock()
|
||||
}()
|
||||
a.runCrawlerUploadMigrationAfterSave(taskCtx, driveID)
|
||||
}()
|
||||
return true
|
||||
}
|
||||
|
||||
func (a *App) runCrawlerUploadMigrationAfterSave(ctx context.Context, driveID string) {
|
||||
if err := ctx.Err(); err != nil {
|
||||
log.Printf("[spider91] drive=%s skip post-crawl migration: %v", driveID, err)
|
||||
log.Printf("[scriptcrawler] drive=%s skip saved upload migration: %v", driveID, err)
|
||||
return
|
||||
}
|
||||
targetDriveID := a.Spider91UploadDriveID()
|
||||
d, err := a.cat.GetDrive(ctx, driveID)
|
||||
if err != nil || d == nil {
|
||||
log.Printf("[scriptcrawler] drive=%s saved upload migration lookup: %v", driveID, err)
|
||||
return
|
||||
}
|
||||
targetDriveID := strings.TrimSpace(d.Credentials["upload_drive_id"])
|
||||
if d.Kind != scriptcrawler.Kind || targetDriveID == "" {
|
||||
return
|
||||
}
|
||||
if err := a.ensureDriveAttached(ctx, driveID); err != nil {
|
||||
log.Printf("[scriptcrawler] drive=%s saved upload migration attach: %v", driveID, err)
|
||||
return
|
||||
}
|
||||
|
||||
a.mu.Lock()
|
||||
worker := a.workers[driveID]
|
||||
thumbWorker := a.thumbWorkers[driveID]
|
||||
fingerprintWorker := a.fingerprintWorkers[driveID]
|
||||
a.mu.Unlock()
|
||||
a.scheduleFingerprintBackfill(ctx, driveID, fingerprintWorker)
|
||||
a.enqueueDriveGeneration(ctx, driveID, worker, thumbWorker)
|
||||
|
||||
log.Printf("[scriptcrawler] drive=%s checking local videos for upload target=%s", driveID, targetDriveID)
|
||||
if err := a.waitDriveGenerationQueuesIdle(ctx, driveID); err != nil {
|
||||
log.Printf("[scriptcrawler] drive=%s saved upload migration wait canceled: %v", driveID, err)
|
||||
return
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
log.Printf("[scriptcrawler] drive=%s skip saved upload migration after wait: %v", driveID, err)
|
||||
return
|
||||
}
|
||||
if err := a.spider91Migrator.RunOnce(ctx); err != nil {
|
||||
log.Printf("[scriptcrawler] drive=%s saved upload migration: %v", driveID, err)
|
||||
}
|
||||
}
|
||||
|
||||
func (a *App) runCrawlerMigrationAfterManualCrawl(ctx context.Context, driveID string) {
|
||||
if err := ctx.Err(); err != nil {
|
||||
log.Printf("[scriptcrawler] drive=%s skip post-crawl migration: %v", driveID, err)
|
||||
return
|
||||
}
|
||||
if a.cat == nil {
|
||||
targetDriveID := a.Spider91UploadDriveID()
|
||||
if targetDriveID == "" || a.spider91Migrator == nil {
|
||||
return
|
||||
}
|
||||
if err := a.waitDriveGenerationQueuesIdle(ctx, driveID); err != nil {
|
||||
log.Printf("[scriptcrawler] drive=%s post-crawl migration wait canceled: %v", driveID, err)
|
||||
return
|
||||
}
|
||||
if err := a.spider91Migrator.RunOnce(ctx); err != nil {
|
||||
log.Printf("[scriptcrawler] drive=%s post-crawl migration: %v", driveID, err)
|
||||
}
|
||||
return
|
||||
}
|
||||
d, err := a.cat.GetDrive(ctx, driveID)
|
||||
if err != nil || d == nil {
|
||||
log.Printf("[scriptcrawler] drive=%s skip post-crawl migration lookup: %v", driveID, err)
|
||||
return
|
||||
}
|
||||
targetDriveID := strings.TrimSpace(d.Credentials["upload_drive_id"])
|
||||
if targetDriveID == "" && d.Kind == spider91.Kind {
|
||||
targetDriveID = a.Spider91UploadDriveID()
|
||||
}
|
||||
if targetDriveID == "" {
|
||||
return
|
||||
}
|
||||
if a.spider91Migrator == nil {
|
||||
log.Printf("[spider91] drive=%s skip post-crawl migration: migrator not configured", driveID)
|
||||
log.Printf("[scriptcrawler] drive=%s skip post-crawl migration: migrator not configured", driveID)
|
||||
return
|
||||
}
|
||||
log.Printf("[spider91] drive=%s waiting for generation queues before post-crawl migration target=%s", driveID, targetDriveID)
|
||||
if err := a.waitAllPreviewQueuesIdle(ctx); err != nil {
|
||||
log.Printf("[spider91] drive=%s post-crawl migration wait canceled: %v", driveID, err)
|
||||
log.Printf("[scriptcrawler] drive=%s waiting for generation queues before post-crawl migration target=%s", driveID, targetDriveID)
|
||||
if err := a.waitDriveGenerationQueuesIdle(ctx, driveID); err != nil {
|
||||
log.Printf("[scriptcrawler] drive=%s post-crawl migration wait canceled: %v", driveID, err)
|
||||
return
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
log.Printf("[spider91] drive=%s skip post-crawl migration after wait: %v", driveID, err)
|
||||
log.Printf("[scriptcrawler] drive=%s skip post-crawl migration after wait: %v", driveID, err)
|
||||
return
|
||||
}
|
||||
log.Printf("[spider91] drive=%s running post-crawl migration target=%s", driveID, targetDriveID)
|
||||
log.Printf("[scriptcrawler] drive=%s running post-crawl migration target=%s", driveID, targetDriveID)
|
||||
if err := a.spider91Migrator.RunOnce(ctx); err != nil {
|
||||
log.Printf("[spider91] drive=%s post-crawl migration: %v", driveID, err)
|
||||
log.Printf("[scriptcrawler] drive=%s post-crawl migration: %v", driveID, err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -421,6 +421,85 @@ func TestRunSpider91MigrationAfterManualCrawlRequiresConfiguredUploadTarget(t *t
|
||||
}
|
||||
}
|
||||
|
||||
func TestScheduleCrawlerUploadMigrationRunsForConfiguredCrawler(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: "crawler-truvaze",
|
||||
Kind: scriptcrawler.Kind,
|
||||
Name: "Truvaze",
|
||||
RootID: "/",
|
||||
Credentials: map[string]string{
|
||||
"script_path": "/tmp/Truvaze.py",
|
||||
"upload_drive_id": "pikpak",
|
||||
},
|
||||
}); err != nil {
|
||||
t.Fatalf("seed crawler: %v", err)
|
||||
}
|
||||
registry := proxy.NewRegistry()
|
||||
registry.Set("crawler-truvaze", &serverFakeKindDrive{id: "crawler-truvaze", kind: scriptcrawler.Kind})
|
||||
migrator := &serverFakeSpider91MigrationRunner{}
|
||||
app := &App{
|
||||
cat: cat,
|
||||
registry: registry,
|
||||
spider91Migrator: migrator,
|
||||
workers: map[string]*preview.Worker{},
|
||||
thumbWorkers: map[string]*preview.ThumbWorker{},
|
||||
fingerprintWorkers: map[string]*fingerprint.Worker{},
|
||||
}
|
||||
|
||||
if !app.scheduleCrawlerUploadMigration(ctx, "crawler-truvaze") {
|
||||
t.Fatal("scheduleCrawlerUploadMigration returned false, want true")
|
||||
}
|
||||
deadline := time.After(time.Second)
|
||||
for migrator.called == 0 {
|
||||
select {
|
||||
case <-deadline:
|
||||
t.Fatalf("migration calls = %d, want 1", migrator.called)
|
||||
case <-time.After(10 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestScheduleCrawlerUploadMigrationSkipsWithoutUploadTarget(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: "crawler-local",
|
||||
Kind: scriptcrawler.Kind,
|
||||
Name: "Local Only",
|
||||
RootID: "/",
|
||||
Credentials: map[string]string{"script_path": "/tmp/local.py"},
|
||||
}); err != nil {
|
||||
t.Fatalf("seed crawler: %v", err)
|
||||
}
|
||||
migrator := &serverFakeSpider91MigrationRunner{}
|
||||
app := &App{cat: cat, registry: proxy.NewRegistry(), spider91Migrator: migrator}
|
||||
|
||||
if app.scheduleCrawlerUploadMigration(ctx, "crawler-local") {
|
||||
t.Fatal("scheduleCrawlerUploadMigration returned true without upload target")
|
||||
}
|
||||
if migrator.called != 0 {
|
||||
t.Fatalf("migration calls = %d, want 0", migrator.called)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDriveGenerationStatusUsesWorkerQueueNotPendingCatalogRows(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cat, err := catalog.Open(t.TempDir() + "/catalog.db")
|
||||
|
||||
@@ -22,6 +22,7 @@ import (
|
||||
"github.com/video-site/backend/internal/catalog"
|
||||
"github.com/video-site/backend/internal/drives/p123"
|
||||
"github.com/video-site/backend/internal/drives/scriptcrawler"
|
||||
"github.com/video-site/backend/internal/drives/spider91"
|
||||
)
|
||||
|
||||
type AdminServer struct {
|
||||
@@ -630,6 +631,7 @@ type crawlerDTO struct {
|
||||
ScriptPath string `json:"scriptPath"`
|
||||
Proxy string `json:"proxy,omitempty"`
|
||||
TargetNew string `json:"targetNew,omitempty"`
|
||||
UploadDriveID string `json:"uploadDriveId,omitempty"`
|
||||
LastCrawlAt int64 `json:"lastCrawlAt,omitempty"`
|
||||
ScanGenerationStatus GenerationStatus `json:"scanGenerationStatus"`
|
||||
ThumbnailGenerationStatus GenerationStatus `json:"thumbnailGenerationStatus"`
|
||||
@@ -644,13 +646,17 @@ type crawlerDTO struct {
|
||||
FingerprintReadyCount int `json:"fingerprintReadyCount"`
|
||||
FingerprintPendingCount int `json:"fingerprintPendingCount"`
|
||||
FingerprintFailedCount int `json:"fingerprintFailedCount"`
|
||||
TotalCrawledCount int `json:"totalCrawledCount"`
|
||||
LocalVideoCount int `json:"localVideoCount"`
|
||||
MigratedVideoCount int `json:"migratedVideoCount"`
|
||||
}
|
||||
|
||||
type upsertCrawlerReq struct {
|
||||
ID string `json:"id"`
|
||||
ScriptPath string `json:"scriptPath"`
|
||||
Proxy string `json:"proxy"`
|
||||
TargetNew string `json:"targetNew"`
|
||||
ID string `json:"id"`
|
||||
ScriptPath string `json:"scriptPath"`
|
||||
Proxy string `json:"proxy"`
|
||||
TargetNew string `json:"targetNew"`
|
||||
UploadDriveID string `json:"uploadDriveId"`
|
||||
}
|
||||
|
||||
func (a *AdminServer) handleListCrawlers(w http.ResponseWriter, r *http.Request) {
|
||||
@@ -659,21 +665,6 @@ func (a *AdminServer) handleListCrawlers(w http.ResponseWriter, r *http.Request)
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
teaserCounts, err := a.Catalog.CountTeasersByDrive(r.Context())
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
thumbnailCounts, err := a.Catalog.CountThumbnailsByDrive(r.Context())
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
fingerprintCounts, err := a.Catalog.CountFingerprintsByDrive(r.Context())
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
generationStatuses := map[string]DriveGenerationStatuses{}
|
||||
if a.GetDriveGenerationStatuses != nil {
|
||||
generationStatuses = a.GetDriveGenerationStatuses()
|
||||
@@ -684,12 +675,17 @@ func (a *AdminServer) handleListCrawlers(w http.ResponseWriter, r *http.Request)
|
||||
if d == nil || !isConfiguredCrawlerDrive(d) {
|
||||
continue
|
||||
}
|
||||
out = append(out, a.crawlerDTOForDrive(d, teaserCounts[d.ID], thumbnailCounts[d.ID], fingerprintCounts[d.ID], generationStatuses[d.ID]))
|
||||
assetCounts, err := a.Catalog.CountCrawlerAssets(r.Context(), d.ID, crawlerVideoIDPrefixes(d))
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
}
|
||||
out = append(out, a.crawlerDTOForDrive(d, assetCounts, generationStatuses[d.ID]))
|
||||
}
|
||||
writeJSON(w, http.StatusOK, out)
|
||||
}
|
||||
|
||||
func (a *AdminServer) crawlerDTOForDrive(d *catalog.Drive, teaser catalog.DriveTeaserCounts, thumb catalog.DriveThumbnailCounts, fp catalog.DriveFingerprintCounts, generation DriveGenerationStatuses) crawlerDTO {
|
||||
func (a *AdminServer) crawlerDTOForDrive(d *catalog.Drive, assets catalog.CrawlerAssetCounts, generation DriveGenerationStatuses) crawlerDTO {
|
||||
if generation.Scan.State == "" {
|
||||
generation.Scan.State = "idle"
|
||||
}
|
||||
@@ -717,20 +713,34 @@ func (a *AdminServer) crawlerDTOForDrive(d *catalog.Drive, teaser catalog.DriveT
|
||||
ScriptPath: strings.TrimSpace(d.Credentials["script_path"]),
|
||||
Proxy: strings.TrimSpace(d.Credentials["proxy"]),
|
||||
TargetNew: strings.TrimSpace(d.Credentials["target_new"]),
|
||||
UploadDriveID: strings.TrimSpace(d.Credentials["upload_drive_id"]),
|
||||
LastCrawlAt: lastCrawlAt,
|
||||
ScanGenerationStatus: generation.Scan,
|
||||
ThumbnailGenerationStatus: generation.Thumbnail,
|
||||
PreviewGenerationStatus: generation.Preview,
|
||||
FingerprintGenerationStatus: generation.Fingerprint,
|
||||
ThumbnailReadyCount: thumb.Ready,
|
||||
ThumbnailPendingCount: thumb.Pending,
|
||||
ThumbnailFailedCount: thumb.Failed,
|
||||
TeaserReadyCount: teaser.Ready,
|
||||
TeaserPendingCount: teaser.Pending,
|
||||
TeaserFailedCount: teaser.Failed,
|
||||
FingerprintReadyCount: fp.Ready,
|
||||
FingerprintPendingCount: fp.Pending,
|
||||
FingerprintFailedCount: fp.Failed,
|
||||
ThumbnailReadyCount: assets.Thumbnail.Ready,
|
||||
ThumbnailPendingCount: assets.Thumbnail.Pending,
|
||||
ThumbnailFailedCount: assets.Thumbnail.Failed,
|
||||
TeaserReadyCount: assets.Teaser.Ready,
|
||||
TeaserPendingCount: assets.Teaser.Pending,
|
||||
TeaserFailedCount: assets.Teaser.Failed,
|
||||
FingerprintReadyCount: assets.Fingerprint.Ready,
|
||||
FingerprintPendingCount: assets.Fingerprint.Pending,
|
||||
FingerprintFailedCount: assets.Fingerprint.Failed,
|
||||
TotalCrawledCount: assets.Total,
|
||||
LocalVideoCount: assets.Local,
|
||||
MigratedVideoCount: assets.Migrated,
|
||||
}
|
||||
}
|
||||
|
||||
func crawlerVideoIDPrefixes(d *catalog.Drive) []string {
|
||||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
return []string{
|
||||
scriptcrawler.Kind + "-" + d.ID + "-",
|
||||
spider91.Kind + "-" + d.ID + "-",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -765,13 +775,18 @@ func (a *AdminServer) handleUpsertCrawler(w http.ResponseWriter, r *http.Request
|
||||
}
|
||||
scriptPath := strings.TrimSpace(body.ScriptPath)
|
||||
incoming := map[string]string{
|
||||
"script_path": scriptPath,
|
||||
"proxy": strings.TrimSpace(body.Proxy),
|
||||
"target_new": strings.TrimSpace(body.TargetNew),
|
||||
"script_path": scriptPath,
|
||||
"proxy": strings.TrimSpace(body.Proxy),
|
||||
"target_new": strings.TrimSpace(body.TargetNew),
|
||||
"upload_drive_id": strings.TrimSpace(body.UploadDriveID),
|
||||
}
|
||||
for k, v := range incoming {
|
||||
creds[k] = v
|
||||
}
|
||||
if err := a.validateCrawlerUploadDrive(r.Context(), creds["upload_drive_id"]); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
merged, err := mergeScriptCrawlerCredentials(existing, creds)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||
@@ -843,6 +858,33 @@ func (a *AdminServer) generateCrawlerID(ctx context.Context, name string) (strin
|
||||
return candidate, nil
|
||||
}
|
||||
|
||||
func (a *AdminServer) validateCrawlerUploadDrive(ctx context.Context, driveID string) error {
|
||||
driveID = strings.TrimSpace(driveID)
|
||||
if driveID == "" {
|
||||
return nil
|
||||
}
|
||||
if a == nil || a.Catalog == nil {
|
||||
return errors.New("crawler upload target validation unavailable")
|
||||
}
|
||||
d, err := a.Catalog.GetDrive(ctx, driveID)
|
||||
if err != nil || d == nil {
|
||||
return fmt.Errorf("上传目标网盘 %q 不存在", driveID)
|
||||
}
|
||||
if !isCrawlerUploadTargetKind(d.Kind) {
|
||||
return fmt.Errorf("上传目标网盘 %q 类型为 %s,仅支持 115网盘、PikPak、123网盘、Google Drive、OneDrive", driveID, d.Kind)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func isCrawlerUploadTargetKind(kind string) bool {
|
||||
switch strings.TrimSpace(kind) {
|
||||
case "p115", "pikpak", "p123", "googledrive", "onedrive":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func crawlerIDSlug(raw string) string {
|
||||
var b strings.Builder
|
||||
lastDash := false
|
||||
|
||||
@@ -880,13 +880,22 @@ func TestHandleListCrawlersOnlyIncludesCrawlerPageScripts(t *testing.T) {
|
||||
Name: "91 Spider",
|
||||
RootID: "/",
|
||||
Credentials: map[string]string{
|
||||
"builtin": "spider91",
|
||||
"last_crawl_at": "1800000000",
|
||||
"proxy": " http://127.0.0.1:7890 ",
|
||||
"script_path": scriptPath,
|
||||
"builtin": "spider91",
|
||||
"last_crawl_at": "1800000000",
|
||||
"proxy": " http://127.0.0.1:7890 ",
|
||||
"script_path": scriptPath,
|
||||
"upload_drive_id": "p115-target",
|
||||
},
|
||||
Status: "ok",
|
||||
},
|
||||
{
|
||||
ID: "p115-target",
|
||||
Kind: "p115",
|
||||
Name: "115",
|
||||
RootID: "0",
|
||||
Credentials: map[string]string{"cookie": "x"},
|
||||
Status: "ok",
|
||||
},
|
||||
{
|
||||
ID: "onedrive-main",
|
||||
Kind: "onedrive",
|
||||
@@ -910,6 +919,41 @@ func TestHandleListCrawlersOnlyIncludesCrawlerPageScripts(t *testing.T) {
|
||||
t.Fatalf("seed drive %s: %v", d.ID, err)
|
||||
}
|
||||
}
|
||||
for _, v := range []*catalog.Video{
|
||||
{
|
||||
ID: "spider91-crawler-spider91-local",
|
||||
DriveID: "crawler-spider91",
|
||||
FileID: "local.mp4",
|
||||
FileName: "local.mp4",
|
||||
Title: "Local",
|
||||
Size: 123,
|
||||
Ext: "mp4",
|
||||
ThumbnailURL: "/p/thumb/spider91-crawler-spider91-local",
|
||||
PreviewStatus: "ready",
|
||||
DurationSeconds: 12,
|
||||
PublishedAt: time.Now(),
|
||||
},
|
||||
{
|
||||
ID: "scriptcrawler-crawler-spider91-migrated",
|
||||
DriveID: "p115-target",
|
||||
FileID: "uploaded-id",
|
||||
FileName: "migrated.mp4",
|
||||
Title: "Migrated",
|
||||
Size: 456,
|
||||
Ext: "mp4",
|
||||
ThumbnailURL: "/p/thumb/scriptcrawler-crawler-spider91-migrated",
|
||||
PreviewStatus: "ready",
|
||||
DurationSeconds: 34,
|
||||
PublishedAt: time.Now(),
|
||||
},
|
||||
} {
|
||||
if err := cat.UpsertVideo(ctx, v); err != nil {
|
||||
t.Fatalf("seed crawler video %s: %v", v.ID, err)
|
||||
}
|
||||
if err := cat.UpdateVideoFingerprint(ctx, v.ID, "sha-"+v.ID, "ready", ""); err != nil {
|
||||
t.Fatalf("seed crawler fingerprint %s: %v", v.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/admin/api/crawlers", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
@@ -920,28 +964,61 @@ func TestHandleListCrawlersOnlyIncludesCrawlerPageScripts(t *testing.T) {
|
||||
}
|
||||
|
||||
var got []struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Kind string `json:"kind"`
|
||||
Proxy string `json:"proxy"`
|
||||
LastCrawlAt int64 `json:"lastCrawlAt"`
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Kind string `json:"kind"`
|
||||
Proxy string `json:"proxy"`
|
||||
UploadDriveID string `json:"uploadDriveId"`
|
||||
LastCrawlAt int64 `json:"lastCrawlAt"`
|
||||
TotalCrawled int `json:"totalCrawledCount"`
|
||||
LocalVideos int `json:"localVideoCount"`
|
||||
MigratedVideo int `json:"migratedVideoCount"`
|
||||
ThumbnailReady int `json:"thumbnailReadyCount"`
|
||||
TeaserReady int `json:"teaserReadyCount"`
|
||||
FingerprintReady int `json:"fingerprintReadyCount"`
|
||||
}
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
byID := map[string]struct {
|
||||
Name string
|
||||
Kind string
|
||||
Proxy string
|
||||
LastCrawlAt int64
|
||||
Name string
|
||||
Kind string
|
||||
Proxy string
|
||||
UploadDriveID string
|
||||
LastCrawlAt int64
|
||||
TotalCrawled int
|
||||
LocalVideos int
|
||||
MigratedVideo int
|
||||
ThumbnailReady int
|
||||
TeaserReady int
|
||||
FingerprintReady int
|
||||
}{}
|
||||
for _, d := range got {
|
||||
byID[d.ID] = struct {
|
||||
Name string
|
||||
Kind string
|
||||
Proxy string
|
||||
LastCrawlAt int64
|
||||
}{Name: d.Name, Kind: d.Kind, Proxy: d.Proxy, LastCrawlAt: d.LastCrawlAt}
|
||||
Name string
|
||||
Kind string
|
||||
Proxy string
|
||||
UploadDriveID string
|
||||
LastCrawlAt int64
|
||||
TotalCrawled int
|
||||
LocalVideos int
|
||||
MigratedVideo int
|
||||
ThumbnailReady int
|
||||
TeaserReady int
|
||||
FingerprintReady int
|
||||
}{
|
||||
Name: d.Name,
|
||||
Kind: d.Kind,
|
||||
Proxy: d.Proxy,
|
||||
UploadDriveID: d.UploadDriveID,
|
||||
LastCrawlAt: d.LastCrawlAt,
|
||||
TotalCrawled: d.TotalCrawled,
|
||||
LocalVideos: d.LocalVideos,
|
||||
MigratedVideo: d.MigratedVideo,
|
||||
ThumbnailReady: d.ThumbnailReady,
|
||||
TeaserReady: d.TeaserReady,
|
||||
FingerprintReady: d.FingerprintReady,
|
||||
}
|
||||
}
|
||||
if _, ok := byID["spider91-main"]; ok {
|
||||
t.Fatal("legacy spider91 drive should not be returned by crawler list")
|
||||
@@ -958,9 +1035,18 @@ func TestHandleListCrawlersOnlyIncludesCrawlerPageScripts(t *testing.T) {
|
||||
if byID["crawler-spider91"].Proxy != "http://127.0.0.1:7890" {
|
||||
t.Fatalf("crawler proxy = %q, want trimmed proxy", byID["crawler-spider91"].Proxy)
|
||||
}
|
||||
if byID["crawler-spider91"].UploadDriveID != "p115-target" {
|
||||
t.Fatalf("uploadDriveId = %q, want p115-target", byID["crawler-spider91"].UploadDriveID)
|
||||
}
|
||||
if byID["crawler-spider91"].LastCrawlAt != 1800000000 {
|
||||
t.Fatalf("lastCrawlAt = %d, want 1800000000", byID["crawler-spider91"].LastCrawlAt)
|
||||
}
|
||||
if byID["crawler-spider91"].TotalCrawled != 2 || byID["crawler-spider91"].LocalVideos != 1 || byID["crawler-spider91"].MigratedVideo != 1 {
|
||||
t.Fatalf("crawler counts = total %d local %d migrated %d, want 2/1/1", byID["crawler-spider91"].TotalCrawled, byID["crawler-spider91"].LocalVideos, byID["crawler-spider91"].MigratedVideo)
|
||||
}
|
||||
if byID["crawler-spider91"].ThumbnailReady != 2 || byID["crawler-spider91"].TeaserReady != 2 || byID["crawler-spider91"].FingerprintReady != 2 {
|
||||
t.Fatalf("asset ready counts = thumb %d teaser %d fingerprint %d, want 2/2/2", byID["crawler-spider91"].ThumbnailReady, byID["crawler-spider91"].TeaserReady, byID["crawler-spider91"].FingerprintReady)
|
||||
}
|
||||
if _, ok := byID["onedrive-main"]; ok {
|
||||
t.Fatal("onedrive should not be returned by crawler list")
|
||||
}
|
||||
@@ -1108,6 +1194,62 @@ func TestHandleUpsertCrawlerGeneratesIDFromScriptName(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleUpsertCrawlerPersistsAndValidatesUploadDrive(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
tmp := t.TempDir()
|
||||
cat, err := catalog.Open(filepath.Join(tmp, "catalog.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open catalog: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := cat.Close(); err != nil {
|
||||
t.Fatalf("close catalog: %v", err)
|
||||
}
|
||||
})
|
||||
scriptPath := filepath.Join(tmp, "custom.py")
|
||||
if err := os.WriteFile(scriptPath, []byte("CRAWLER_NAME = \"Upload Spider\"\n"), 0o644); err != nil {
|
||||
t.Fatalf("write crawler script: %v", err)
|
||||
}
|
||||
for _, d := range []*catalog.Drive{
|
||||
{ID: "p115-target", Kind: "p115", Name: "115", RootID: "0", Credentials: map[string]string{"cookie": "x"}},
|
||||
{ID: "local-target", Kind: "localstorage", Name: "Local", RootID: "/", Credentials: map[string]string{"path": tmp}},
|
||||
} {
|
||||
if err := cat.UpsertDrive(ctx, d); err != nil {
|
||||
t.Fatalf("seed drive %s: %v", d.ID, err)
|
||||
}
|
||||
}
|
||||
srv := &AdminServer{Catalog: cat}
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/admin/api/crawlers", strings.NewReader(`{
|
||||
"id": "crawler-upload",
|
||||
"scriptPath": "`+scriptPath+`",
|
||||
"uploadDriveId": "p115-target"
|
||||
}`))
|
||||
rr := httptest.NewRecorder()
|
||||
srv.handleUpsertCrawler(rr, req)
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, body = %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
got, err := cat.GetDrive(ctx, "crawler-upload")
|
||||
if err != nil {
|
||||
t.Fatalf("get crawler: %v", err)
|
||||
}
|
||||
if got.Credentials["upload_drive_id"] != "p115-target" {
|
||||
t.Fatalf("upload_drive_id = %q, want p115-target", got.Credentials["upload_drive_id"])
|
||||
}
|
||||
|
||||
req = httptest.NewRequest(http.MethodPost, "/admin/api/crawlers", strings.NewReader(`{
|
||||
"id": "crawler-upload",
|
||||
"scriptPath": "`+scriptPath+`",
|
||||
"uploadDriveId": "local-target"
|
||||
}`))
|
||||
rr = httptest.NewRecorder()
|
||||
srv.handleUpsertCrawler(rr, req)
|
||||
if rr.Code != http.StatusBadRequest {
|
||||
t.Fatalf("invalid target status = %d, body = %s, want 400", rr.Code, rr.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleImportCrawlerScriptFile(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
script := "CRAWLER_NAME = \"Demo Crawler\"\nprint('crawler')\n"
|
||||
|
||||
@@ -20,6 +20,15 @@ type Catalog struct {
|
||||
db *sql.DB
|
||||
}
|
||||
|
||||
type CrawlerAssetCounts struct {
|
||||
Total int
|
||||
Local int
|
||||
Migrated int
|
||||
Thumbnail DriveThumbnailCounts
|
||||
Teaser DriveTeaserCounts
|
||||
Fingerprint DriveFingerprintCounts
|
||||
}
|
||||
|
||||
func Open(path string) (*Catalog, error) {
|
||||
db, err := sql.Open("sqlite", path+"?_pragma=journal_mode(WAL)&_pragma=busy_timeout(5000)")
|
||||
if err != nil {
|
||||
@@ -1455,6 +1464,121 @@ func (c *Catalog) CountFingerprintsByDrive(ctx context.Context) (map[string]Driv
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *Catalog) CountCrawlerAssets(ctx context.Context, crawlerID string, prefixes []string) (CrawlerAssetCounts, error) {
|
||||
var out CrawlerAssetCounts
|
||||
crawlerID = strings.TrimSpace(crawlerID)
|
||||
prefixes = cleanCrawlerIDPrefixes(prefixes)
|
||||
if crawlerID == "" || len(prefixes) == 0 {
|
||||
return out, nil
|
||||
}
|
||||
|
||||
where := make([]string, 0, len(prefixes))
|
||||
args := make([]any, 0, 2+len(prefixes))
|
||||
args = append(args, crawlerID, crawlerID)
|
||||
for range prefixes {
|
||||
where = append(where, "id LIKE ? ESCAPE '\\'")
|
||||
}
|
||||
for _, prefix := range prefixes {
|
||||
args = append(args, escapeSQLLike(prefix)+"%")
|
||||
}
|
||||
query := `SELECT
|
||||
COUNT(*) AS total_count,
|
||||
COUNT(CASE WHEN drive_id = ? THEN 1 END) AS local_count,
|
||||
COUNT(CASE WHEN drive_id != ? THEN 1 END) AS migrated_count,
|
||||
COUNT(CASE WHEN EXISTS (
|
||||
SELECT 1 FROM videos AS asset_dup
|
||||
WHERE ` + crawlerAssetEquivalentSQL("asset_dup", "videos") + `
|
||||
AND COALESCE(asset_dup.thumbnail_url, '') != ''
|
||||
) THEN 1 END) AS thumbnail_ready_count,
|
||||
COUNT(CASE WHEN NOT EXISTS (
|
||||
SELECT 1 FROM videos AS asset_dup
|
||||
WHERE ` + crawlerAssetEquivalentSQL("asset_dup", "videos") + `
|
||||
AND COALESCE(asset_dup.thumbnail_url, '') != ''
|
||||
)
|
||||
AND COALESCE(thumbnail_url, '') = ''
|
||||
AND COALESCE(thumbnail_status, 'pending') NOT IN ('failed', 'skipped') THEN 1 END) AS thumbnail_pending_count,
|
||||
COUNT(CASE WHEN NOT EXISTS (
|
||||
SELECT 1 FROM videos AS asset_dup
|
||||
WHERE ` + crawlerAssetEquivalentSQL("asset_dup", "videos") + `
|
||||
AND COALESCE(asset_dup.thumbnail_url, '') != ''
|
||||
)
|
||||
AND COALESCE(thumbnail_url, '') = ''
|
||||
AND COALESCE(thumbnail_status, 'pending') = 'failed' THEN 1 END) AS thumbnail_failed_count,
|
||||
COUNT(CASE WHEN EXISTS (
|
||||
SELECT 1 FROM videos AS asset_dup
|
||||
WHERE ` + crawlerAssetEquivalentSQL("asset_dup", "videos") + `
|
||||
AND COALESCE(asset_dup.preview_status, 'pending') = 'ready'
|
||||
) THEN 1 END) AS teaser_ready_count,
|
||||
COUNT(CASE WHEN NOT EXISTS (
|
||||
SELECT 1 FROM videos AS asset_dup
|
||||
WHERE ` + crawlerAssetEquivalentSQL("asset_dup", "videos") + `
|
||||
AND COALESCE(asset_dup.preview_status, 'pending') = 'ready'
|
||||
)
|
||||
AND COALESCE(preview_status, 'pending') = 'pending' THEN 1 END) AS teaser_pending_count,
|
||||
COUNT(CASE WHEN NOT EXISTS (
|
||||
SELECT 1 FROM videos AS asset_dup
|
||||
WHERE ` + crawlerAssetEquivalentSQL("asset_dup", "videos") + `
|
||||
AND COALESCE(asset_dup.preview_status, 'pending') = 'ready'
|
||||
)
|
||||
AND COALESCE(preview_status, 'pending') = 'failed' THEN 1 END) AS teaser_failed_count,
|
||||
COUNT(CASE WHEN COALESCE(sampled_sha256, '') != ''
|
||||
OR COALESCE(fingerprint_status, 'pending') = 'ready' THEN 1 END) AS fingerprint_ready_count,
|
||||
COUNT(CASE WHEN size_bytes > 0
|
||||
AND COALESCE(sampled_sha256, '') = ''
|
||||
AND COALESCE(fingerprint_status, 'pending') = 'pending' THEN 1 END) AS fingerprint_pending_count,
|
||||
COUNT(CASE WHEN COALESCE(sampled_sha256, '') = ''
|
||||
AND COALESCE(fingerprint_status, 'pending') = 'failed' THEN 1 END) AS fingerprint_failed_count
|
||||
FROM videos
|
||||
WHERE COALESCE(hidden, 0) = 0
|
||||
AND (` + strings.Join(where, " OR ") + `)`
|
||||
err := c.db.QueryRowContext(ctx, query, args...).Scan(
|
||||
&out.Total,
|
||||
&out.Local,
|
||||
&out.Migrated,
|
||||
&out.Thumbnail.Ready,
|
||||
&out.Thumbnail.Pending,
|
||||
&out.Thumbnail.Failed,
|
||||
&out.Teaser.Ready,
|
||||
&out.Teaser.Pending,
|
||||
&out.Teaser.Failed,
|
||||
&out.Fingerprint.Ready,
|
||||
&out.Fingerprint.Pending,
|
||||
&out.Fingerprint.Failed,
|
||||
)
|
||||
return out, err
|
||||
}
|
||||
|
||||
func crawlerAssetEquivalentSQL(candidateAlias, sourceAlias string) string {
|
||||
return fmt.Sprintf(`(%[1]s.id = %[2]s.id
|
||||
OR (COALESCE(%[2]s.content_hash, '') != ''
|
||||
AND %[1]s.content_hash = %[2]s.content_hash)
|
||||
OR (%[2]s.size_bytes > 0
|
||||
AND COALESCE(%[2]s.sampled_sha256, '') != ''
|
||||
AND %[1]s.size_bytes = %[2]s.size_bytes
|
||||
AND %[1]s.sampled_sha256 = %[2]s.sampled_sha256))`, candidateAlias, sourceAlias)
|
||||
}
|
||||
|
||||
func cleanCrawlerIDPrefixes(prefixes []string) []string {
|
||||
out := make([]string, 0, len(prefixes))
|
||||
seen := map[string]bool{}
|
||||
for _, prefix := range prefixes {
|
||||
prefix = strings.TrimSpace(prefix)
|
||||
if prefix == "" || seen[prefix] {
|
||||
continue
|
||||
}
|
||||
seen[prefix] = true
|
||||
out = append(out, prefix)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func escapeSQLLike(raw string) string {
|
||||
raw = strings.ReplaceAll(raw, `\`, `\\`)
|
||||
raw = strings.ReplaceAll(raw, `%`, `\%`)
|
||||
raw = strings.ReplaceAll(raw, `_`, `\_`)
|
||||
return raw
|
||||
}
|
||||
|
||||
func (c *Catalog) CountVideosNeedingFingerprint(ctx context.Context, driveID string) (int, error) {
|
||||
var count int
|
||||
err := c.db.QueryRowContext(ctx,
|
||||
|
||||
@@ -149,6 +149,28 @@ func (w *Worker) Status() TaskStatus {
|
||||
return status
|
||||
}
|
||||
|
||||
// WaitIdle blocks until the fingerprint queue is empty and no item is being processed.
|
||||
func (w *Worker) WaitIdle(ctx context.Context) error {
|
||||
if w == nil {
|
||||
return nil
|
||||
}
|
||||
if w.queue.lengthExcluding("") == 0 {
|
||||
return nil
|
||||
}
|
||||
ticker := time.NewTicker(200 * time.Millisecond)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-ticker.C:
|
||||
if w.queue.lengthExcluding("") == 0 {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Worker) processQueued(ctx context.Context, v *catalog.Video) {
|
||||
defer w.queue.release(v.ID)
|
||||
if w.Catalog == nil || w.Drive == nil || v == nil || v.ID == "" {
|
||||
|
||||
@@ -81,7 +81,22 @@ type UploadResult struct {
|
||||
Size int64
|
||||
}
|
||||
|
||||
const spider91UploadDirName = "91 Spider"
|
||||
const (
|
||||
spider91UploadDirName = "91 Spider"
|
||||
scriptCrawlerUploadRootDirName = "Script Crawlers"
|
||||
)
|
||||
|
||||
type migrationPlan struct {
|
||||
source Spider91LocalSource
|
||||
row *catalog.Drive
|
||||
sourceKinds []string
|
||||
targetDriveID string
|
||||
target uploadTarget
|
||||
uploadDir string
|
||||
keepLatestN int
|
||||
requireAssetsReady bool
|
||||
legacyBackfill bool
|
||||
}
|
||||
|
||||
// pikpakAdapter / p115Adapter / p123Adapter / onedriveAdapter / googledriveAdapter 把具体 driver 包装成 uploadTarget。
|
||||
//
|
||||
@@ -369,56 +384,62 @@ func (m *Migrator) runOnce(ctx context.Context) {
|
||||
log.Printf("[spider91migrate] captcha cooldown ended at %s, resuming migration", until.Format(time.RFC3339))
|
||||
}
|
||||
|
||||
target, pp, err := m.resolveTarget()
|
||||
if err != nil {
|
||||
// 没目标就静默 —— 用户选择了本地保存,或还没配 115/PikPak drive。
|
||||
plans := m.migrationPlans(ctx)
|
||||
if len(plans) == 0 {
|
||||
// 没目标就静默 —— 用户选择了本地保存,或目标盘还没挂载。
|
||||
return
|
||||
}
|
||||
|
||||
migrated := 0
|
||||
for _, src := range m.spider91Drives(ctx) {
|
||||
backfillTargets := map[string]uploadTarget{}
|
||||
for _, plan := range plans {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return
|
||||
}
|
||||
n, err := m.migrateDrive(ctx, src, target, pp)
|
||||
n, err := m.migrateDrive(ctx, plan)
|
||||
if err != nil {
|
||||
log.Printf("[spider91migrate] drive=%s migrate batch error: %v", src.ID(), err)
|
||||
log.Printf("[spider91migrate] drive=%s migrate batch error: %v", plan.source.ID(), err)
|
||||
}
|
||||
migrated += n
|
||||
if active, _ := m.inCooldown(); active {
|
||||
if migrated > 0 {
|
||||
log.Printf("[spider91migrate] migrated %d video(s) to drive=%s", migrated, target)
|
||||
log.Printf("[spider91migrate] migrated %d video(s)", migrated)
|
||||
}
|
||||
return
|
||||
}
|
||||
if plan.legacyBackfill {
|
||||
backfillTargets[plan.targetDriveID] = plan.target
|
||||
}
|
||||
}
|
||||
if migrated > 0 {
|
||||
log.Printf("[spider91migrate] migrated %d video(s) to drive=%s", migrated, target)
|
||||
log.Printf("[spider91migrate] migrated %d video(s)", migrated)
|
||||
}
|
||||
|
||||
// 收尾:扫每个 spider91 drive 的本地目录,把 catalog 已经迁到别处但本地
|
||||
// 收尾:扫每个本地爬虫 drive 的 videos 目录,把 catalog 已经迁到别处但本地
|
||||
// 仍有残留的孤儿文件清掉。这是纯防御性兜底——正常路径下 migrateDrive
|
||||
// 已经在迁移成功后立刻 CleanupSpider91Local,不会留孤儿。
|
||||
for _, src := range m.spider91Drives(ctx) {
|
||||
for _, plan := range plans {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return
|
||||
}
|
||||
deleted, err := m.cleanupOldLocalVideos(ctx, src)
|
||||
deleted, err := m.cleanupOldLocalVideos(ctx, plan)
|
||||
if err != nil {
|
||||
log.Printf("[spider91migrate] cleanup drive=%s: %v", src.ID(), err)
|
||||
log.Printf("[spider91migrate] cleanup drive=%s: %v", plan.source.ID(), err)
|
||||
}
|
||||
if deleted > 0 {
|
||||
log.Printf("[spider91migrate] cleanup drive=%s deleted %d orphan local file(s)", src.ID(), deleted)
|
||||
log.Printf("[spider91migrate] cleanup drive=%s deleted %d orphan local file(s)", plan.source.ID(), deleted)
|
||||
}
|
||||
}
|
||||
|
||||
// 回填:把已迁移到 PikPak 的 spider91-* 视频里文件名仍是旧格式
|
||||
// (比如刚迁完没改、或人工导入)的统一改成方案 B 期望的格式。
|
||||
// 这一步幂等:已经是期望格式的不会再调 Rename。
|
||||
if renamed, err := m.backfillFileNames(ctx, target, pp); err != nil {
|
||||
log.Printf("[spider91migrate] backfill names: %v", err)
|
||||
} else if renamed > 0 {
|
||||
log.Printf("[spider91migrate] backfilled %d %s file name(s) to desired format", renamed, m.targetKindForLog())
|
||||
for targetDriveID, pp := range backfillTargets {
|
||||
if renamed, err := m.backfillFileNames(ctx, targetDriveID, pp); err != nil {
|
||||
log.Printf("[spider91migrate] backfill names: %v", err)
|
||||
} else if renamed > 0 {
|
||||
log.Printf("[spider91migrate] backfilled %d %s file name(s) to desired format", renamed, pp.Kind())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -446,9 +467,17 @@ func (m *Migrator) resolveTarget() (string, uploadTarget, error) {
|
||||
return "", nil, errors.New("no target getter")
|
||||
}
|
||||
id := m.cfg.GetTargetDriveID()
|
||||
return m.resolveTargetID(id)
|
||||
}
|
||||
|
||||
func (m *Migrator) resolveTargetID(id string) (string, uploadTarget, error) {
|
||||
id = strings.TrimSpace(id)
|
||||
if id == "" {
|
||||
return "", nil, errors.New("target drive not configured")
|
||||
}
|
||||
if m.cfg.Registry == nil {
|
||||
return "", nil, errors.New("registry not configured")
|
||||
}
|
||||
d, ok := m.cfg.Registry.Get(id)
|
||||
if !ok {
|
||||
return "", nil, fmt.Errorf("target drive %q not in registry", id)
|
||||
@@ -460,6 +489,100 @@ func (m *Migrator) resolveTarget() (string, uploadTarget, error) {
|
||||
return id, t, nil
|
||||
}
|
||||
|
||||
func (m *Migrator) migrationPlans(ctx context.Context) []migrationPlan {
|
||||
if m == nil || m.cfg.Catalog == nil || m.cfg.Registry == nil {
|
||||
return nil
|
||||
}
|
||||
all := m.cfg.Registry.All()
|
||||
out := make([]migrationPlan, 0, len(all))
|
||||
for _, d := range all {
|
||||
if d == nil {
|
||||
continue
|
||||
}
|
||||
src, ok := d.(Spider91LocalSource)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
row, err := m.cfg.Catalog.GetDrive(ctx, d.ID())
|
||||
if (err != nil || row == nil) && d.Kind() == spider91.Kind {
|
||||
row = &catalog.Drive{ID: d.ID(), Kind: spider91.Kind, RootID: "/"}
|
||||
}
|
||||
if row == nil {
|
||||
continue
|
||||
}
|
||||
switch row.Kind {
|
||||
case scriptcrawler.Kind:
|
||||
targetID := strings.TrimSpace(row.Credentials["upload_drive_id"])
|
||||
if targetID == "" {
|
||||
continue
|
||||
}
|
||||
resolvedID, target, err := m.resolveTargetID(targetID)
|
||||
if err != nil {
|
||||
log.Printf("[spider91migrate] crawler=%s upload target=%q unavailable: %v", row.ID, targetID, err)
|
||||
continue
|
||||
}
|
||||
out = append(out, migrationPlan{
|
||||
source: src,
|
||||
row: row,
|
||||
sourceKinds: crawlerSourceKindsForRow(row),
|
||||
targetDriveID: resolvedID,
|
||||
target: target,
|
||||
uploadDir: scriptCrawlerUploadDir(row.ID),
|
||||
keepLatestN: 0,
|
||||
requireAssetsReady: true,
|
||||
})
|
||||
case spider91.Kind:
|
||||
if m.cfg.GetTargetDriveID == nil {
|
||||
continue
|
||||
}
|
||||
targetID := strings.TrimSpace(m.cfg.GetTargetDriveID())
|
||||
if targetID == "" {
|
||||
continue
|
||||
}
|
||||
resolvedID, target, err := m.resolveTargetID(targetID)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
out = append(out, migrationPlan{
|
||||
source: src,
|
||||
row: row,
|
||||
sourceKinds: []string{spider91.Kind},
|
||||
targetDriveID: resolvedID,
|
||||
target: target,
|
||||
uploadDir: spider91UploadDirName,
|
||||
keepLatestN: m.cfg.KeepLatestN,
|
||||
legacyBackfill: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func crawlerSourceKindsForRow(d *catalog.Drive) []string {
|
||||
kinds := []string{scriptcrawler.Kind}
|
||||
if d != nil && strings.EqualFold(strings.TrimSpace(d.Credentials["builtin"]), spider91.Kind) {
|
||||
kinds = append(kinds, spider91.Kind)
|
||||
}
|
||||
return kinds
|
||||
}
|
||||
|
||||
func scriptCrawlerUploadDir(driveID string) string {
|
||||
driveID = sanitizeUploadDirSegment(driveID)
|
||||
if driveID == "" {
|
||||
driveID = "crawler"
|
||||
}
|
||||
return scriptCrawlerUploadRootDirName + "/" + driveID
|
||||
}
|
||||
|
||||
func sanitizeUploadDirSegment(raw string) string {
|
||||
clean := sanitizeTitle(raw)
|
||||
clean = strings.Trim(clean, "/")
|
||||
if clean == "." || clean == ".." {
|
||||
return ""
|
||||
}
|
||||
return clean
|
||||
}
|
||||
|
||||
// spider91Drives 返回当前注册的所有 Spider91 来源本地爬虫 driver。
|
||||
func (m *Migrator) spider91Drives(ctx context.Context) []Spider91LocalSource {
|
||||
all := m.cfg.Registry.All()
|
||||
@@ -495,18 +618,13 @@ func (m *Migrator) isSpider91SourceDrive(ctx context.Context, d drives.Drive) bo
|
||||
return row.Kind == scriptcrawler.Kind && strings.EqualFold(strings.TrimSpace(row.Credentials["builtin"]), spider91.Kind)
|
||||
}
|
||||
|
||||
// migrateDrive 对单个 spider91 drive 跑一批迁移;返回成功迁移的条数。
|
||||
//
|
||||
// 策略(与"本地缓存最新 N 个"语义一致):
|
||||
// - 列出 spider91 drive 本地 videos/ 目录所有 mp4 文件,按 mtime 降序排
|
||||
// - 跳过最新 KeepLatestN 个:这些是用户希望保留在本地的最新爬取
|
||||
// - 对剩下的(更旧)逐个处理:
|
||||
// - 还没迁移(drive_id 仍是 src.ID())→ 上传到目标盘 + 改 catalog + 删本地
|
||||
// - 已经迁移过但本地还有残留 → 仅删本地(兜底)
|
||||
//
|
||||
// KeepLatestN < 0 时不保护任何本地文件,全部尝试迁移(旧行为,主要给测试用)。
|
||||
func (m *Migrator) migrateDrive(ctx context.Context, src Spider91LocalSource, targetDriveID string, pp uploadTarget) (int, error) {
|
||||
keepN := m.cfg.KeepLatestN
|
||||
// migrateDrive 对单个本地爬虫 drive 跑一批迁移;返回成功迁移的条数。
|
||||
func (m *Migrator) migrateDrive(ctx context.Context, plan migrationPlan) (int, error) {
|
||||
src := plan.source
|
||||
if src == nil || plan.target == nil || plan.targetDriveID == "" {
|
||||
return 0, nil
|
||||
}
|
||||
keepN := plan.keepLatestN
|
||||
if keepN < 0 {
|
||||
keepN = 0
|
||||
}
|
||||
@@ -536,17 +654,14 @@ func (m *Migrator) migrateDrive(ctx context.Context, src Spider91LocalSource, ta
|
||||
files = append(files, localFile{name: e.Name(), modTime: info.ModTime()})
|
||||
}
|
||||
|
||||
// 本地数量没超过 keepN 时不动任何文件 —— 这条是 KeepLatestN 语义的核心
|
||||
if m.cfg.KeepLatestN >= 0 && len(files) <= keepN {
|
||||
if plan.keepLatestN >= 0 && len(files) <= keepN {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// 按 mtime 降序:最新的排前面,保留前 keepN 个
|
||||
sort.Slice(files, func(i, j int) bool { return files[i].modTime.After(files[j].modTime) })
|
||||
|
||||
// 候选 = 跳过最新 keepN 个之外的(更旧的)。KeepLatestN < 0 时 candidates=files。
|
||||
skip := keepN
|
||||
if m.cfg.KeepLatestN < 0 {
|
||||
if plan.keepLatestN < 0 {
|
||||
skip = 0
|
||||
}
|
||||
candidates := files
|
||||
@@ -556,6 +671,17 @@ func (m *Migrator) migrateDrive(ctx context.Context, src Spider91LocalSource, ta
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
localVideos, err := m.cfg.Catalog.ListVideosByDriveID(ctx, src.ID(), 100000)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("list local catalog videos: %w", err)
|
||||
}
|
||||
byFileID := make(map[string]*catalog.Video, len(localVideos))
|
||||
for _, v := range localVideos {
|
||||
if v != nil && strings.TrimSpace(v.FileID) != "" {
|
||||
byFileID[v.FileID] = v
|
||||
}
|
||||
}
|
||||
|
||||
migrated := 0
|
||||
for _, f := range candidates {
|
||||
if err := ctx.Err(); err != nil {
|
||||
@@ -565,21 +691,21 @@ func (m *Migrator) migrateDrive(ctx context.Context, src Spider91LocalSource, ta
|
||||
break
|
||||
}
|
||||
|
||||
viewkey := stripExt(f.name)
|
||||
videoID := "spider91-" + src.ID() + "-" + viewkey
|
||||
v, err := m.cfg.Catalog.GetVideo(ctx, videoID)
|
||||
if err != nil || v == nil {
|
||||
// 找不到 catalog 行:保险起见保留本地,让管理员可见
|
||||
v := m.findVideoForLocalFile(ctx, plan, f.name, byFileID)
|
||||
if v == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if v.DriveID != src.ID() {
|
||||
// catalog 已迁移到别的 drive,但本地还有残留 → 兜底删本地
|
||||
CleanupSpider91Local(src, v.FileID)
|
||||
CleanupSpider91Local(src, f.name)
|
||||
continue
|
||||
}
|
||||
|
||||
ok, err := m.migrateOne(ctx, v, src, targetDriveID, pp)
|
||||
if plan.requireAssetsReady && !crawlerVideoAssetsReady(v) {
|
||||
continue
|
||||
}
|
||||
|
||||
ok, err := m.migrateOne(ctx, v, plan)
|
||||
if err != nil {
|
||||
log.Printf("[spider91migrate] %s: %v", v.ID, err)
|
||||
// captcha 错误(4002 / 9)说明 PikPak 当前正拒绝我们;继续在
|
||||
@@ -603,10 +729,39 @@ func (m *Migrator) migrateDrive(ctx context.Context, src Spider91LocalSource, ta
|
||||
return migrated, nil
|
||||
}
|
||||
|
||||
// migrateOne 把单条 spider91 视频上传到目标盘并改写 catalog。
|
||||
func (m *Migrator) findVideoForLocalFile(ctx context.Context, plan migrationPlan, localFile string, byFileID map[string]*catalog.Video) *catalog.Video {
|
||||
if v := byFileID[localFile]; v != nil {
|
||||
return v
|
||||
}
|
||||
sourceID := stripExt(localFile)
|
||||
driveID := ""
|
||||
if plan.source != nil {
|
||||
driveID = plan.source.ID()
|
||||
}
|
||||
for _, kind := range plan.sourceKinds {
|
||||
id := scriptcrawler.BuildVideoIDForKind(kind, driveID, sourceID)
|
||||
v, err := m.cfg.Catalog.GetVideo(ctx, id)
|
||||
if err == nil && v != nil {
|
||||
return v
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func crawlerVideoAssetsReady(v *catalog.Video) bool {
|
||||
if v == nil {
|
||||
return false
|
||||
}
|
||||
return strings.EqualFold(strings.TrimSpace(v.PreviewStatus), "ready") &&
|
||||
strings.EqualFold(strings.TrimSpace(v.FingerprintStatus), "ready")
|
||||
}
|
||||
|
||||
// migrateOne 把单条本地爬虫视频上传到目标盘并改写 catalog。
|
||||
// 返回 (true, nil) 表示真的迁了一条;(false, nil) 表示跳过(本地文件已不在等);
|
||||
// (false, err) 表示真出错。
|
||||
func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, src Spider91LocalSource, targetDriveID string, pp uploadTarget) (bool, error) {
|
||||
func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, plan migrationPlan) (bool, error) {
|
||||
src := plan.source
|
||||
pp := plan.target
|
||||
path, err := src.VideoPath(v.FileID)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("resolve local path: %w", err)
|
||||
@@ -630,20 +785,11 @@ func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, src Spider9
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// 上传到目标盘 rootID 下的固定 "91 Spider" 子目录。若用户把目标盘 rootID
|
||||
// 配成某个自定义目录,这里会在该自定义目录下查找/创建 "91 Spider"。
|
||||
// 上传名走 desiredPikPakName 算出来的方案 B 格式:
|
||||
//
|
||||
// <sanitized title>-<viewkey 后 8 位>.<ext>
|
||||
//
|
||||
// 这样网盘 Web 端列出来的文件名能直接看出是哪个视频,
|
||||
// 又用 viewkey 后 8 位避免同标题撞名。所有目标盘共用同一格式,
|
||||
// 简化前端 / catalog 的认知。
|
||||
parent, err := pp.EnsureDir(ctx, spider91UploadDirName)
|
||||
parent, err := pp.EnsureDir(ctx, plan.uploadDir)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("%s ensure %q dir: %w", pp.Kind(), spider91UploadDirName, err)
|
||||
return false, fmt.Errorf("%s ensure %q dir: %w", pp.Kind(), plan.uploadDir, err)
|
||||
}
|
||||
uploadName := desiredPikPakName(v.Title, extractViewKey(v.ID), v.Ext)
|
||||
uploadName := desiredPikPakName(v.Title, sourceIDForUploadName(v, plan), v.Ext)
|
||||
res, err := pp.UploadAndReportHash(ctx, parent, uploadName, f, info.Size())
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("%s upload: %w", pp.Kind(), err)
|
||||
@@ -653,7 +799,7 @@ func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, src Spider9
|
||||
}
|
||||
|
||||
// 事务性改写 catalog 行:drive_id / file_id / content_hash
|
||||
if err := m.cfg.Catalog.MigrateVideoToDrive(ctx, v.ID, targetDriveID, res.FileID, res.Hash); err != nil {
|
||||
if err := m.cfg.Catalog.MigrateVideoToDrive(ctx, v.ID, plan.targetDriveID, res.FileID, res.Hash); err != nil {
|
||||
return false, fmt.Errorf("catalog migrate: %w", err)
|
||||
}
|
||||
m.preserveCrawledThumbnail(ctx, src, v)
|
||||
@@ -665,10 +811,29 @@ func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, src Spider9
|
||||
// 删除本地 mp4 和源 thumb(公共 /p/thumb 副本已在 preserveCrawledThumbnail 中保留)。
|
||||
CleanupSpider91Local(src, v.FileID)
|
||||
|
||||
log.Printf("[spider91migrate] %s migrated to drive=%s(kind=%s) file=%s name=%q", v.ID, targetDriveID, pp.Kind(), res.FileID, uploadName)
|
||||
log.Printf("[spider91migrate] %s migrated to drive=%s(kind=%s) file=%s name=%q", v.ID, plan.targetDriveID, pp.Kind(), res.FileID, uploadName)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func sourceIDForUploadName(v *catalog.Video, plan migrationPlan) string {
|
||||
if v == nil {
|
||||
return ""
|
||||
}
|
||||
if plan.legacyBackfill {
|
||||
return extractViewKey(v.ID)
|
||||
}
|
||||
for _, kind := range plan.sourceKinds {
|
||||
prefix := kind + "-" + plan.source.ID() + "-"
|
||||
if strings.HasPrefix(v.ID, prefix) {
|
||||
return strings.TrimPrefix(v.ID, prefix)
|
||||
}
|
||||
}
|
||||
if v.FileID != "" {
|
||||
return stripExt(v.FileID)
|
||||
}
|
||||
return extractViewKey(v.ID)
|
||||
}
|
||||
|
||||
func (m *Migrator) preserveCrawledThumbnail(ctx context.Context, src Spider91LocalSource, v *catalog.Video) {
|
||||
if m == nil || m.cfg.Catalog == nil || src == nil || v == nil || v.ID == "" || v.FileID == "" {
|
||||
return
|
||||
@@ -791,7 +956,11 @@ func stripExt(name string) string {
|
||||
// 找到孤儿。
|
||||
//
|
||||
// 返回实际删除的文件个数。
|
||||
func (m *Migrator) cleanupOldLocalVideos(ctx context.Context, src Spider91LocalSource) (int, error) {
|
||||
func (m *Migrator) cleanupOldLocalVideos(ctx context.Context, plan migrationPlan) (int, error) {
|
||||
src := plan.source
|
||||
if src == nil {
|
||||
return 0, nil
|
||||
}
|
||||
entries, err := os.ReadDir(src.VideosDir())
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
@@ -808,18 +977,13 @@ func (m *Migrator) cleanupOldLocalVideos(ctx context.Context, src Spider91LocalS
|
||||
if e.IsDir() {
|
||||
continue
|
||||
}
|
||||
viewkey := stripExt(e.Name())
|
||||
videoID := "spider91-" + src.ID() + "-" + viewkey
|
||||
v, err := m.cfg.Catalog.GetVideo(ctx, videoID)
|
||||
if err != nil || v == nil {
|
||||
// 找不到 catalog 行:保险起见保留,等管理员处理
|
||||
v := m.findVideoForLocalFile(ctx, plan, e.Name(), nil)
|
||||
if v == nil {
|
||||
continue
|
||||
}
|
||||
if v.DriveID == src.ID() {
|
||||
// 还没迁移,归 migrateDrive 管,不在这里动
|
||||
continue
|
||||
}
|
||||
// 已迁移到别的 drive 但本地还有 → 删
|
||||
path, perr := src.VideoPath(e.Name())
|
||||
if perr != nil {
|
||||
continue
|
||||
|
||||
@@ -344,6 +344,81 @@ func writeSpider91Video(t *testing.T, cat *catalog.Catalog, d *spider91.Driver,
|
||||
return id
|
||||
}
|
||||
|
||||
func setupScriptCrawler(t *testing.T, id string) *scriptcrawler.Driver {
|
||||
t.Helper()
|
||||
d := scriptcrawler.New(scriptcrawler.Config{ID: id, RootDir: t.TempDir()})
|
||||
if err := d.Init(context.Background()); err != nil {
|
||||
t.Fatalf("scriptcrawler init: %v", err)
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func seedScriptCrawlerDrive(t *testing.T, cat *catalog.Catalog, d *scriptcrawler.Driver, uploadDriveID string) {
|
||||
t.Helper()
|
||||
if err := cat.UpsertDrive(context.Background(), &catalog.Drive{
|
||||
ID: d.ID(),
|
||||
Kind: scriptcrawler.Kind,
|
||||
Name: "Script Crawler",
|
||||
RootID: "/",
|
||||
Credentials: map[string]string{
|
||||
"script_path": "/tmp/crawler.py",
|
||||
"upload_drive_id": uploadDriveID,
|
||||
},
|
||||
}); err != nil {
|
||||
t.Fatalf("seed scriptcrawler drive: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func writeScriptCrawlerVideo(t *testing.T, cat *catalog.Catalog, d *scriptcrawler.Driver, sourceID, ext string, content []byte, readyAssets bool) string {
|
||||
t.Helper()
|
||||
fileID := sourceID + ext
|
||||
path, err := d.VideoPath(fileID)
|
||||
if err != nil {
|
||||
t.Fatalf("video path: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(path, content, 0o644); err != nil {
|
||||
t.Fatalf("write video: %v", err)
|
||||
}
|
||||
thumbPath, err := d.ThumbPath(sourceID + ".jpg")
|
||||
if err != nil {
|
||||
t.Fatalf("thumb path: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(thumbPath, []byte("thumb"), 0o644); err != nil {
|
||||
t.Fatalf("write thumb: %v", err)
|
||||
}
|
||||
now := time.Now()
|
||||
id := scriptcrawler.BuildVideoID(d.ID(), sourceID)
|
||||
previewStatus := "pending"
|
||||
if readyAssets {
|
||||
previewStatus = "ready"
|
||||
}
|
||||
v := &catalog.Video{
|
||||
ID: id,
|
||||
DriveID: d.ID(),
|
||||
FileID: fileID,
|
||||
FileName: fileID,
|
||||
Title: "Crawler " + sourceID,
|
||||
Author: "tester",
|
||||
Ext: strings.TrimPrefix(ext, "."),
|
||||
Quality: "HD",
|
||||
Size: int64(len(content)),
|
||||
ThumbnailURL: "/p/thumb/" + id,
|
||||
PreviewStatus: previewStatus,
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}
|
||||
if err := cat.UpsertVideo(context.Background(), v); err != nil {
|
||||
t.Fatalf("upsert scriptcrawler video: %v", err)
|
||||
}
|
||||
if readyAssets {
|
||||
if err := cat.UpdateVideoFingerprint(context.Background(), id, "sampled-"+sourceID, "ready", ""); err != nil {
|
||||
t.Fatalf("mark fingerprint ready: %v", err)
|
||||
}
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
func TestRunOnceMigratesSpider91VideosAndCleansLocalFiles(t *testing.T) {
|
||||
cat := setupCatalog(t)
|
||||
src, _ := setupSpider91(t)
|
||||
@@ -419,6 +494,98 @@ func TestRunOnceMigratesSpider91VideosAndCleansLocalFiles(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunOnceMigratesReadyScriptCrawlerVideoToConfiguredUploadDrive(t *testing.T) {
|
||||
cat := setupCatalog(t)
|
||||
src := setupScriptCrawler(t, "crawler-alpha")
|
||||
pp := newFakePikPak("pikpak-target", "pikpak-root-id")
|
||||
seedScriptCrawlerDrive(t, cat, src, pp.ID())
|
||||
|
||||
reg := newFakeRegistry()
|
||||
reg.Add(src)
|
||||
reg.Add(pp)
|
||||
|
||||
id := writeScriptCrawlerVideo(t, cat, src, "source-with-dash-001", ".mp4", []byte("script video bytes"), true)
|
||||
commonThumbDir := t.TempDir()
|
||||
|
||||
m := New(Config{
|
||||
Catalog: cat,
|
||||
Registry: reg,
|
||||
CommonThumbDir: commonThumbDir,
|
||||
})
|
||||
m.runOnce(context.Background())
|
||||
|
||||
if pp.uploadCalls != 1 {
|
||||
t.Fatalf("upload calls = %d, want 1", pp.uploadCalls)
|
||||
}
|
||||
wantDir := "Script Crawlers/crawler-alpha"
|
||||
if len(pp.ensureCalls) != 1 || pp.ensureCalls[0] != wantDir {
|
||||
t.Fatalf("ensure calls = %#v, want %q", pp.ensureCalls, wantDir)
|
||||
}
|
||||
wantName := desiredPikPakName("Crawler source-with-dash-001", "source-with-dash-001", "mp4")
|
||||
if gotParent := pp.gotParents[wantName]; gotParent != "pikpak-root-id/"+wantDir {
|
||||
t.Fatalf("upload parent = %q, want root/%s", gotParent, wantDir)
|
||||
}
|
||||
|
||||
got, err := cat.GetVideo(context.Background(), id)
|
||||
if err != nil {
|
||||
t.Fatalf("get migrated video: %v", err)
|
||||
}
|
||||
if got.DriveID != pp.ID() {
|
||||
t.Fatalf("drive_id = %q, want %q", got.DriveID, pp.ID())
|
||||
}
|
||||
if got.FileID != "remote-"+wantName {
|
||||
t.Fatalf("file_id = %q, want remote upload id", got.FileID)
|
||||
}
|
||||
if got.FileName != wantName {
|
||||
t.Fatalf("file_name = %q, want %q", got.FileName, wantName)
|
||||
}
|
||||
if got.PreviewStatus != "ready" || got.FingerprintStatus != "ready" || got.SampledSHA256 == "" {
|
||||
t.Fatalf("generated assets not preserved after migration: preview=%q fingerprint=%q sampled=%q", got.PreviewStatus, got.FingerprintStatus, got.SampledSHA256)
|
||||
}
|
||||
videoPath, _ := src.VideoPath("source-with-dash-001.mp4")
|
||||
if _, err := os.Stat(videoPath); !os.IsNotExist(err) {
|
||||
t.Fatalf("local scriptcrawler video still exists or stat error %v", err)
|
||||
}
|
||||
thumbPath, _ := src.ThumbPath("source-with-dash-001.jpg")
|
||||
if _, err := os.Stat(thumbPath); !os.IsNotExist(err) {
|
||||
t.Fatalf("local scriptcrawler thumb still exists or stat error %v", err)
|
||||
}
|
||||
commonThumbPath := filepath.Join(commonThumbDir, id+".jpg")
|
||||
if data, err := os.ReadFile(commonThumbPath); err != nil || string(data) != "thumb" {
|
||||
t.Fatalf("common thumb = %q, %v; want copied crawled thumb", string(data), err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunOnceSkipsScriptCrawlerVideoUntilPreviewAndFingerprintReady(t *testing.T) {
|
||||
cat := setupCatalog(t)
|
||||
src := setupScriptCrawler(t, "crawler-beta")
|
||||
pp := newFakePikPak("pikpak-target", "pikpak-root-id")
|
||||
seedScriptCrawlerDrive(t, cat, src, pp.ID())
|
||||
|
||||
reg := newFakeRegistry()
|
||||
reg.Add(src)
|
||||
reg.Add(pp)
|
||||
|
||||
id := writeScriptCrawlerVideo(t, cat, src, "pending-assets", ".mp4", []byte("script video bytes"), false)
|
||||
m := New(Config{Catalog: cat, Registry: reg})
|
||||
m.runOnce(context.Background())
|
||||
|
||||
if pp.uploadCalls != 0 {
|
||||
t.Fatalf("upload calls = %d, want 0 while generated assets are pending", pp.uploadCalls)
|
||||
}
|
||||
got, err := cat.GetVideo(context.Background(), id)
|
||||
if err != nil {
|
||||
t.Fatalf("get video: %v", err)
|
||||
}
|
||||
if got.DriveID != src.ID() {
|
||||
t.Fatalf("drive_id = %q, want local crawler drive %q", got.DriveID, src.ID())
|
||||
}
|
||||
videoPath, _ := src.VideoPath("pending-assets.mp4")
|
||||
if _, err := os.Stat(videoPath); err != nil {
|
||||
t.Fatalf("local video should remain while assets pending: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunOnceSkipsWhenLocalFileMissing(t *testing.T) {
|
||||
cat := setupCatalog(t)
|
||||
src, _ := setupSpider91(t)
|
||||
@@ -578,7 +745,10 @@ func TestCleanupRemovesAllAlreadyMigratedOrphans(t *testing.T) {
|
||||
GetTargetDriveID: func() string { return pp.ID() },
|
||||
})
|
||||
|
||||
deleted, err := m.cleanupOldLocalVideos(context.Background(), src)
|
||||
deleted, err := m.cleanupOldLocalVideos(context.Background(), migrationPlan{
|
||||
source: src,
|
||||
sourceKinds: []string{spider91.Kind},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("cleanup: %v", err)
|
||||
}
|
||||
@@ -608,10 +778,14 @@ func TestRunOnceMigratesBuiltInSpider91ScriptCrawlerSource(t *testing.T) {
|
||||
t.Fatalf("scriptcrawler init: %v", err)
|
||||
}
|
||||
if err := cat.UpsertDrive(ctx, &catalog.Drive{
|
||||
ID: src.ID(),
|
||||
Kind: scriptcrawler.Kind,
|
||||
Name: "Built-in Spider91",
|
||||
Credentials: map[string]string{"builtin": "spider91"},
|
||||
ID: src.ID(),
|
||||
Kind: scriptcrawler.Kind,
|
||||
Name: "Built-in Spider91",
|
||||
Credentials: map[string]string{
|
||||
"builtin": "spider91",
|
||||
"script_path": "/tmp/spider91.py",
|
||||
"upload_drive_id": "pikpak-target",
|
||||
},
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert source drive: %v", err)
|
||||
}
|
||||
@@ -647,13 +821,16 @@ func TestRunOnceMigratesBuiltInSpider91ScriptCrawlerSource(t *testing.T) {
|
||||
Ext: "mp4",
|
||||
Quality: "HD",
|
||||
Size: int64(len("scriptcrawler spider91 video")),
|
||||
PreviewStatus: "pending",
|
||||
PreviewStatus: "ready",
|
||||
PublishedAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("upsert video: %v", err)
|
||||
}
|
||||
if err := cat.UpdateVideoFingerprint(ctx, id, "sampled-vk-script", "ready", ""); err != nil {
|
||||
t.Fatalf("mark fingerprint ready: %v", err)
|
||||
}
|
||||
|
||||
m := New(Config{
|
||||
Catalog: cat,
|
||||
|
||||
+76
-43
@@ -78,49 +78,79 @@ export function AdminLayout() {
|
||||
<span className="admin-sidebar__brand-text">91后台</span>
|
||||
</div>
|
||||
<nav className="admin-nav">
|
||||
<NavLink to="/" className="admin-nav__link">
|
||||
<Home size={16} /> 返回主站
|
||||
</NavLink>
|
||||
<NavLink
|
||||
to="/admin/drives"
|
||||
className={({ isActive }) =>
|
||||
`admin-nav__link ${isActive ? "is-active" : ""}`
|
||||
}
|
||||
>
|
||||
<HardDrive size={16} /> 网盘管理
|
||||
</NavLink>
|
||||
<NavLink
|
||||
to="/admin/crawlers"
|
||||
className={({ isActive }) =>
|
||||
`admin-nav__link ${isActive ? "is-active" : ""}`
|
||||
}
|
||||
>
|
||||
<SpiderIcon size={16} /> 爬虫管理
|
||||
</NavLink>
|
||||
<NavLink
|
||||
to="/admin/videos"
|
||||
className={({ isActive }) =>
|
||||
`admin-nav__link ${isActive ? "is-active" : ""}`
|
||||
}
|
||||
>
|
||||
<Film size={16} /> 视频管理
|
||||
</NavLink>
|
||||
<NavLink
|
||||
to="/admin/tags"
|
||||
className={({ isActive }) =>
|
||||
`admin-nav__link ${isActive ? "is-active" : ""}`
|
||||
}
|
||||
>
|
||||
<Tags size={16} /> 标签管理
|
||||
</NavLink>
|
||||
<NavLink
|
||||
to="/admin/theme"
|
||||
className={({ isActive }) =>
|
||||
`admin-nav__link ${isActive ? "is-active" : ""}`
|
||||
}
|
||||
>
|
||||
<Palette size={16} /> 主题外观
|
||||
</NavLink>
|
||||
<div className="admin-nav__group admin-nav__group--home">
|
||||
<span className="admin-nav__group-label">主站</span>
|
||||
<NavLink to="/" className="admin-nav__link">
|
||||
<span className="admin-nav__icon"><Home size={16} /></span>
|
||||
<span className="admin-nav__text">
|
||||
<span className="admin-nav__title">返回主站</span>
|
||||
</span>
|
||||
</NavLink>
|
||||
</div>
|
||||
<div className="admin-nav__group">
|
||||
<span className="admin-nav__group-label">资源</span>
|
||||
<NavLink
|
||||
to="/admin/drives"
|
||||
className={({ isActive }) =>
|
||||
`admin-nav__link ${isActive ? "is-active" : ""}`
|
||||
}
|
||||
>
|
||||
<span className="admin-nav__icon"><HardDrive size={16} /></span>
|
||||
<span className="admin-nav__text">
|
||||
<span className="admin-nav__title">网盘管理</span>
|
||||
</span>
|
||||
</NavLink>
|
||||
<NavLink
|
||||
to="/admin/crawlers"
|
||||
className={({ isActive }) =>
|
||||
`admin-nav__link ${isActive ? "is-active" : ""}`
|
||||
}
|
||||
>
|
||||
<span className="admin-nav__icon"><SpiderIcon size={16} /></span>
|
||||
<span className="admin-nav__text">
|
||||
<span className="admin-nav__title">爬虫管理</span>
|
||||
</span>
|
||||
</NavLink>
|
||||
</div>
|
||||
<div className="admin-nav__group">
|
||||
<span className="admin-nav__group-label">管理</span>
|
||||
<NavLink
|
||||
to="/admin/videos"
|
||||
className={({ isActive }) =>
|
||||
`admin-nav__link ${isActive ? "is-active" : ""}`
|
||||
}
|
||||
>
|
||||
<span className="admin-nav__icon"><Film size={16} /></span>
|
||||
<span className="admin-nav__text">
|
||||
<span className="admin-nav__title">视频管理</span>
|
||||
</span>
|
||||
</NavLink>
|
||||
<NavLink
|
||||
to="/admin/tags"
|
||||
className={({ isActive }) =>
|
||||
`admin-nav__link ${isActive ? "is-active" : ""}`
|
||||
}
|
||||
>
|
||||
<span className="admin-nav__icon"><Tags size={16} /></span>
|
||||
<span className="admin-nav__text">
|
||||
<span className="admin-nav__title">标签管理</span>
|
||||
</span>
|
||||
</NavLink>
|
||||
</div>
|
||||
<div className="admin-nav__group">
|
||||
<span className="admin-nav__group-label">系统</span>
|
||||
<NavLink
|
||||
to="/admin/theme"
|
||||
className={({ isActive }) =>
|
||||
`admin-nav__link ${isActive ? "is-active" : ""}`
|
||||
}
|
||||
>
|
||||
<span className="admin-nav__icon"><Palette size={16} /></span>
|
||||
<span className="admin-nav__text">
|
||||
<span className="admin-nav__title">主题外观</span>
|
||||
</span>
|
||||
</NavLink>
|
||||
</div>
|
||||
</nav>
|
||||
<div className="admin-sidebar__footer">
|
||||
<button
|
||||
@@ -148,6 +178,9 @@ export function AdminLayout() {
|
||||
<div className="admin-sidebar__mobile-overlay" onClick={() => setMobileMenuOpen(false)} />
|
||||
)}
|
||||
<div className={`admin-sidebar__mobile-panel${mobileMenuOpen ? " is-open" : ""}`}>
|
||||
<NavLink to="/" className="admin-sidebar__home" onClick={() => setMobileMenuOpen(false)}>
|
||||
<Home size={14} /> 返回主站
|
||||
</NavLink>
|
||||
<button
|
||||
className="admin-sidebar__check-update"
|
||||
onClick={() => { handleCheckUpdate(); setMobileMenuOpen(false); }}
|
||||
|
||||
+616
-432
File diff suppressed because it is too large
Load Diff
@@ -200,6 +200,7 @@ export type AdminCrawler = {
|
||||
scriptPath: string;
|
||||
proxy?: string;
|
||||
targetNew?: string;
|
||||
uploadDriveId?: string;
|
||||
lastCrawlAt?: number;
|
||||
scanGenerationStatus?: DriveGenerationStatus;
|
||||
thumbnailGenerationStatus?: DriveGenerationStatus;
|
||||
@@ -214,6 +215,9 @@ export type AdminCrawler = {
|
||||
fingerprintReadyCount: number;
|
||||
fingerprintPendingCount: number;
|
||||
fingerprintFailedCount: number;
|
||||
totalCrawledCount: number;
|
||||
localVideoCount: number;
|
||||
migratedVideoCount: number;
|
||||
};
|
||||
|
||||
export type UpsertCrawlerInput = {
|
||||
@@ -221,6 +225,7 @@ export type UpsertCrawlerInput = {
|
||||
scriptPath: string;
|
||||
proxy?: string;
|
||||
targetNew?: string;
|
||||
uploadDriveId?: string;
|
||||
};
|
||||
|
||||
export type ImportCrawlerScriptResult = {
|
||||
|
||||
+509
-300
File diff suppressed because it is too large
Load Diff
@@ -205,16 +205,19 @@ test("drive type selector keeps primary source order", () => {
|
||||
|
||||
test("crawler management is a separate admin section", () => {
|
||||
assert.match(adminLayoutSource, /to="\/admin\/crawlers"/);
|
||||
assert.match(adminLayoutSource, /> 爬虫管理/);
|
||||
assert.match(adminLayoutSource, /SpiderIcon size=\{16\} \/> 爬虫管理/);
|
||||
assert.match(adminLayoutSource, /admin-nav__title">爬虫管理/);
|
||||
assert.match(adminLayoutSource, /admin-nav__icon"><SpiderIcon size=\{16\} \/>/);
|
||||
assert.match(appSource, /path="crawlers" element=\{<CrawlersPage \/>/);
|
||||
assert.match(crawlerPageSource, /export function CrawlersPage/);
|
||||
assert.match(crawlerPageSource, /SpiderIcon/);
|
||||
assert.match(crawlerPageSource, /添加爬虫/);
|
||||
assert.match(crawlerPageSource, /返回列表/);
|
||||
assert.match(crawlerPageSource, /setMode\("detail"\)/);
|
||||
assert.match(crawlerPageSource, /setMode\("list"\)/);
|
||||
// 新设计:列表 + Modal 三步编辑器,删除确认走 ConfirmModal,任务进行中自动轮询
|
||||
assert.match(crawlerPageSource, /CrawlerEditorModal/);
|
||||
assert.match(crawlerPageSource, /ConfirmModal/);
|
||||
assert.doesNotMatch(crawlerPageSource, /window\.confirm/);
|
||||
assert.match(crawlerPageSource, /POLL_INTERVAL_MS/);
|
||||
assert.match(crawlerPageSource, /api\.listCrawlers/);
|
||||
assert.match(crawlerPageSource, /api\.listDrives/);
|
||||
assert.match(crawlerPageSource, /api\.upsertCrawler/);
|
||||
assert.match(crawlerPageSource, /api\.runCrawler/);
|
||||
assert.match(crawlerPageSource, /api\.stopCrawlerTasks/);
|
||||
@@ -226,11 +229,15 @@ test("crawler management is a separate admin section", () => {
|
||||
assert.match(crawlerPageSource, /链接导入/);
|
||||
assert.match(crawlerPageSource, /测试脚本/);
|
||||
assert.match(crawlerPageSource, /测试通过/);
|
||||
assert.match(crawlerPageSource, /Spider91UploadTargetField/);
|
||||
assert.match(crawlerPageSource, /uploadDriveId/);
|
||||
assert.match(crawlerPageSource, /UPLOAD_TARGET_KINDS/);
|
||||
assert.doesNotMatch(crawlerPageSource, /新建脚本/);
|
||||
assert.doesNotMatch(crawlerPageSource, /爬虫 ID/);
|
||||
assert.doesNotMatch(crawlerPageSource, /crawler-id/);
|
||||
assert.doesNotMatch(crawlerPageSource, /crawler-name/);
|
||||
assert.doesNotMatch(crawlerPageSource, /脚本路径/);
|
||||
// 脚本路径只读展示,不允许手动填写
|
||||
assert.doesNotMatch(crawlerPageSource, /crawler-script-path/);
|
||||
assert.doesNotMatch(crawlerPageSource, /Python 解释器/);
|
||||
assert.doesNotMatch(crawlerPageSource, /自定义配置 JSON/);
|
||||
assert.doesNotMatch(crawlerPageSource, /Bot/);
|
||||
@@ -238,6 +245,7 @@ test("crawler management is a separate admin section", () => {
|
||||
assert.doesNotMatch(crawlerPageSource, /builtin/);
|
||||
assert.doesNotMatch(crawlerPageSource, /内置 91/);
|
||||
assert.match(apiSource, /type AdminCrawler/);
|
||||
assert.match(apiSource, /uploadDriveId\?: string/);
|
||||
assert.match(apiSource, /"\/crawlers"/);
|
||||
assert.match(apiSource, /"\/crawlers\/import-file"/);
|
||||
assert.match(apiSource, /"\/crawlers\/import-url"/);
|
||||
|
||||
Reference in New Issue
Block a user