Fix script crawler migration to PikPak

Handle already-migrated crawler assets by binding local script crawler rows to equivalent files that already exist on the configured target drive. This keeps thumbnail, preview, and fingerprint readiness stable while removing local crawler videos once an equivalent target object is available.

Harden PikPak uploads by retrying failed upload sessions, requesting fresh resumable upload metadata between attempts, and using CNAME-style OSS requests for PikPak upload endpoints so the SDK does not generate invalid bucket-prefixed hosts such as vip-lixian-07.upload-a10b.mypikpak.com.

Add focused tests for duplicate target binding, retrying failed PikPak OSS uploads with a fresh session, and preserving the expected PikPak upload endpoint URL shape.
This commit is contained in:
nianzhibai
2026-06-11 14:03:37 +08:00
parent 7ddf33d726
commit a8ccc19e9e
6 changed files with 460 additions and 17 deletions
+74
View File
@@ -922,6 +922,80 @@ func (c *Catalog) FindVideoByFileSignature(ctx context.Context, fileName string,
return scanVideo(row)
}
// FindEquivalentVideoOnDrive returns a visible video on driveID that represents
// the same content as source by strong hash or sampled fingerprint.
func (c *Catalog) FindEquivalentVideoOnDrive(ctx context.Context, source *Video, driveID string) (*Video, error) {
driveID = strings.TrimSpace(driveID)
if source == nil || driveID == "" {
return nil, sql.ErrNoRows
}
where, args, ok := equivalentVideoLookupWhere(source)
if !ok {
return nil, sql.ErrNoRows
}
args = append([]any{driveID, source.ID}, args...)
row := c.db.QueryRowContext(ctx,
`SELECT `+allVideoCols+` FROM videos
WHERE drive_id = ?
AND id != ?
AND COALESCE(hidden, 0) = 0
AND COALESCE(file_id, '') != ''
AND (`+where+`)
ORDER BY created_at ASC, id ASC
LIMIT 1`, args...)
return scanVideo(row)
}
// HasReadyEquivalentPreview reports whether another visible row for the same
// content already has a ready preview video.
func (c *Catalog) HasReadyEquivalentPreview(ctx context.Context, source *Video) (bool, error) {
if source == nil {
return false, nil
}
where, args, ok := equivalentVideoLookupWhere(source)
if !ok {
return false, nil
}
args = append([]any{source.ID}, args...)
var found int
err := c.db.QueryRowContext(ctx,
`SELECT 1 FROM videos
WHERE id != ?
AND COALESCE(hidden, 0) = 0
AND COALESCE(preview_status, 'pending') = 'ready'
AND (`+where+`)
LIMIT 1`, args...).Scan(&found)
if errors.Is(err, sql.ErrNoRows) {
return false, nil
}
if err != nil {
return false, err
}
return true, nil
}
func equivalentVideoLookupWhere(source *Video) (string, []any, bool) {
if source == nil {
return "", nil, false
}
var parts []string
var args []any
if hash := normalizeContentHash(source.ContentHash); hash != "" {
parts = append(parts, "(COALESCE(content_hash, '') != '' AND content_hash = ?)")
args = append(args, hash)
}
if source.Size > 0 {
if sampled := normalizeContentHash(source.SampledSHA256); sampled != "" {
parts = append(parts, "(size_bytes = ? AND COALESCE(sampled_sha256, '') != '' AND sampled_sha256 = ?)")
args = append(args, source.Size, sampled)
}
}
if len(parts) == 0 {
return "", nil, false
}
return strings.Join(parts, " OR "), args, true
}
func (c *Catalog) ListVideosNeedingFingerprint(ctx context.Context, driveID string, limit int) ([]*Video, error) {
if limit <= 0 {
limit = 10000
+4 -2
View File
@@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"io"
"log"
"net/http"
"path"
@@ -43,8 +44,9 @@ type Driver struct {
algorithms []string
userAgent string
client *resty.Client
onTokenUpdate func(access, refresh, captcha, deviceID string)
client *resty.Client
onTokenUpdate func(access, refresh, captcha, deviceID string)
uploadToOSSFunc func(context.Context, *s3Params, io.Reader) error
// captchaMu serializes captcha-token refreshes triggered by 4002 / 9
// recovery in requestOnce. Without it, N concurrent callers all hitting
+146 -10
View File
@@ -6,7 +6,10 @@ import (
"errors"
"fmt"
"io"
"log"
"net"
"net/http"
"net/url"
"os"
"strings"
"time"
@@ -26,7 +29,7 @@ import (
// - 未命中:resumable.params 含 S3 兼容凭证(access_key / secret /
// bucket / endpoint / key / security_token
//
// 3. 用 Aliyun OSS SDK PutObject 把字节传到 endpoint+bucket+key
// 3. 用 Aliyun OSS SDK PutObject 把字节传到 PikPak 返回的临时 OSS endpoint
//
// 4. PikPak 服务端轮询 OSS,发现完成后把 resp.File.ID 标记为可用;
// 所以 Upload 完成后直接返回 resp.File.ID 即可(一开始就有,
@@ -39,6 +42,9 @@ const (
// spider91 视频通常 ~100MiB,远低于该值。超过则需走 multipart,
// 当前未实现,遇到会显式报错。
maxSinglePutSize = 5*1024*1024*1024 - 1
// 首次上传失败后最多再重试 3 次。每次重试都会重新申请 PikPak
// upload session,以避开偶发不可解析/不可达的临时上传 endpoint。
pikpakUploadMaxAttempts = 4
)
// uploadTaskData 是 POST /drive/v1/files 的响应结构。
@@ -129,13 +135,49 @@ func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string,
_ = os.Remove(tmp.Name())
}()
// 2) 申请上传会话。
result := UploadResult{Hash: gcidHex, Size: actualSize}
var lastErr error
for attempt := 1; attempt <= pikpakUploadMaxAttempts; attempt++ {
if err := ctx.Err(); err != nil {
return UploadResult{}, err
}
resp, err := d.requestUploadSession(ctx, parentID, name, actualSize, gcidHex)
if err != nil {
lastErr = fmt.Errorf("pikpak upload: request session: %w", err)
if !shouldRetryPikPakUploadAttempt(lastErr, attempt) {
return UploadResult{}, lastErr
}
d.logUploadRetry(name, attempt, lastErr)
if err := pikpakSleepContext(ctx, pikpakUploadRetryDelay(attempt)); err != nil {
return UploadResult{}, err
}
continue
}
out, err := d.completeUploadAttempt(ctx, tmp, parentID, name, result, resp)
if err == nil {
return out, nil
}
lastErr = err
if !shouldRetryPikPakUploadAttempt(lastErr, attempt) {
return UploadResult{}, lastErr
}
d.logUploadRetry(name, attempt, lastErr)
if err := pikpakSleepContext(ctx, pikpakUploadRetryDelay(attempt)); err != nil {
return UploadResult{}, err
}
}
return UploadResult{}, lastErr
}
func (d *Driver) requestUploadSession(ctx context.Context, parentID, name string, size int64, gcidHex string) (uploadTaskData, error) {
var resp uploadTaskData
if err := d.request(ctx, filesURL, http.MethodPost, func(req *resty.Request) {
req.SetBody(map[string]any{
"kind": "drive#file",
"name": name,
"size": actualSize,
"size": size,
"hash": gcidHex,
"upload_type": "UPLOAD_TYPE_RESUMABLE",
"objProvider": map[string]any{"provider": "UPLOAD_TYPE_UNKNOWN"},
@@ -143,12 +185,13 @@ func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string,
"folder_type": "NORMAL",
})
}, &resp); err != nil {
return UploadResult{}, fmt.Errorf("pikpak upload: request session: %w", err)
return uploadTaskData{}, err
}
return resp, nil
}
result := UploadResult{Hash: gcidHex, Size: actualSize}
// 3) 命中秒传:服务端已经知道这个 hash,直接返回新文件 ID。
func (d *Driver) completeUploadAttempt(ctx context.Context, tmp *os.File, parentID, name string, result UploadResult, resp uploadTaskData) (UploadResult, error) {
// 命中秒传:服务端已经知道这个 hash,直接返回新文件 ID。
if resp.Resumable == nil {
if resp.File.ID != "" {
result.FileID = resp.File.ID
@@ -163,7 +206,7 @@ func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string,
return result, nil
}
// 4) 未命中秒传:把字节传到 S3 兼容存储。
// 未命中秒传:把字节传到 S3 兼容存储。
if _, err := tmp.Seek(0, io.SeekStart); err != nil {
return UploadResult{}, fmt.Errorf("pikpak upload: seek tmp: %w", err)
}
@@ -171,7 +214,7 @@ func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string,
return UploadResult{}, fmt.Errorf("pikpak upload: oss put: %w", err)
}
// 5) 拿到 fileID。优先走响应里的预分配 ID;为空就回查目录。
// 拿到 fileID。优先走响应里的预分配 ID;为空就回查目录。
if resp.File.ID != "" {
result.FileID = resp.File.ID
return result, nil
@@ -184,6 +227,58 @@ func (d *Driver) UploadAndReportHash(ctx context.Context, parentID, name string,
return result, nil
}
func shouldRetryPikPakUploadAttempt(err error, attempt int) bool {
return attempt < pikpakUploadMaxAttempts && isRetryablePikPakUploadError(err)
}
func pikpakUploadRetryDelay(attempt int) time.Duration {
if attempt <= 0 {
return 0
}
return time.Duration(attempt) * time.Second
}
func (d *Driver) logUploadRetry(name string, attempt int, err error) {
log.Printf("[pikpak] upload retry drive=%s name=%q next_attempt=%d/%d err=%v",
d.id, name, attempt+1, pikpakUploadMaxAttempts, err)
}
func isRetryablePikPakUploadError(err error) bool {
if err == nil {
return false
}
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
return false
}
var serviceErr oss.ServiceError
if errors.As(err, &serviceErr) {
return serviceErr.StatusCode == http.StatusTooManyRequests || serviceErr.StatusCode >= 500
}
var netErr net.Error
if errors.As(err, &netErr) {
return true
}
text := strings.ToLower(err.Error())
return strings.Contains(text, "no such host") ||
strings.Contains(text, "temporary failure in name resolution") ||
strings.Contains(text, "server misbehaving") ||
strings.Contains(text, "connection reset") ||
strings.Contains(text, "connection refused") ||
strings.Contains(text, "broken pipe") ||
strings.Contains(text, "eof") ||
strings.Contains(text, "i/o timeout") ||
strings.Contains(text, "tls handshake timeout") ||
strings.Contains(text, "http 429") ||
strings.Contains(text, "http 500") ||
strings.Contains(text, "http 502") ||
strings.Contains(text, "http 503") ||
strings.Contains(text, "http 504") ||
strings.Contains(text, "http 509") ||
strings.Contains(text, "too many requests") ||
strings.Contains(text, "temporarily unavailable") ||
strings.Contains(text, "service unavailable")
}
// bufferAndHashGCID 把 r 复制到一个临时文件,同时计算 GCID。
// 返回临时文件(位置在末尾,需要调用方 Seek 回 0)、GCID hex 大写、实际写入字节数。
//
@@ -215,10 +310,13 @@ func bufferAndHashGCID(r io.Reader, size int64) (*os.File, string, int64, error)
//
// 参数复用 PikPak 的临时凭证;必须带 Security Token 头部 + UserAgent,与 OpenList 一致。
func (d *Driver) uploadToOSS(ctx context.Context, p *s3Params, body io.Reader) error {
if d.uploadToOSSFunc != nil {
return d.uploadToOSSFunc(ctx, p, body)
}
if p == nil {
return errors.New("pikpak upload: nil s3 params")
}
client, err := oss.New(p.Endpoint, p.AccessKeyID, p.AccessKeySecret)
client, err := newPikPakOSSClient(p)
if err != nil {
return fmt.Errorf("oss client: %w", err)
}
@@ -235,6 +333,44 @@ func (d *Driver) uploadToOSS(ctx context.Context, p *s3Params, body io.Reader) e
)
}
func newPikPakOSSClient(p *s3Params, options ...oss.ClientOption) (*oss.Client, error) {
if p == nil {
return nil, errors.New("pikpak upload: nil s3 params")
}
clientOptions := make([]oss.ClientOption, 0, len(options)+1)
if isPikPakCNAMEEndpoint(p.Endpoint) {
clientOptions = append(clientOptions, oss.UseCname(true))
}
clientOptions = append(clientOptions, options...)
return oss.New(p.Endpoint, p.AccessKeyID, p.AccessKeySecret, clientOptions...)
}
func isPikPakCNAMEEndpoint(endpoint string) bool {
host := endpointHost(endpoint)
if host == "" {
return false
}
host = strings.TrimSuffix(strings.ToLower(host), ".")
return host != "mypikpak.com" && host != "mypikpak.net" &&
(strings.HasSuffix(host, ".mypikpak.com") || strings.HasSuffix(host, ".mypikpak.net"))
}
func endpointHost(endpoint string) string {
endpoint = strings.TrimSpace(endpoint)
if endpoint == "" {
return ""
}
if u, err := url.Parse(endpoint); err == nil && u.Host != "" {
endpoint = u.Host
} else if idx := strings.IndexByte(endpoint, '/'); idx >= 0 {
endpoint = endpoint[:idx]
}
if host, _, err := net.SplitHostPort(endpoint); err == nil {
endpoint = host
}
return strings.Trim(endpoint, "[]")
}
type readerWithCtx struct {
ctx context.Context
r io.Reader
@@ -6,12 +6,15 @@ import (
"crypto/sha1"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/aliyun/aliyun-oss-go-sdk/oss"
"github.com/go-resty/resty/v2"
)
@@ -181,6 +184,95 @@ func TestUploadInstantSuccessFallsBackToListWhenFileIDMissing(t *testing.T) {
}
}
func TestUploadRetriesWithNewSessionWhenOSSEndpointDNSFails(t *testing.T) {
sessionRequests := 0
mux := http.NewServeMux()
mux.HandleFunc("/drive/v1/files", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
t.Errorf("method = %q, want POST", r.Method)
}
sessionRequests++
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(fmt.Sprintf(`{
"upload_type": "UPLOAD_TYPE_RESUMABLE",
"resumable": {
"kind": "drive#resumable",
"provider": "UPLOAD_TYPE_UNKNOWN",
"params": {
"access_key_id": "ak",
"access_key_secret": "sk",
"bucket": "bucket",
"endpoint": "https://vip-lixian-%02d.upload-a10b.mypikpak.com",
"key": "object-key-%02d",
"security_token": "token"
}
},
"file": {"id": "retry-file-%02d", "name": "retry.mp4", "kind": "drive#file"}
}`, sessionRequests, sessionRequests, sessionRequests)))
})
server := httptest.NewServer(mux)
defer server.Close()
d := newTestDriver(t, server)
uploadAttempts := 0
var uploaded []byte
d.uploadToOSSFunc = func(_ context.Context, _ *s3Params, body io.Reader) error {
uploadAttempts++
if uploadAttempts == 1 {
return &net.DNSError{Err: "no such host", Name: "vip-lixian-01.upload-a10b.mypikpak.com"}
}
var err error
uploaded, err = io.ReadAll(body)
return err
}
payload := []byte("retry payload body")
id, err := d.Upload(context.Background(), "parent-id", "retry.mp4", bytes.NewReader(payload), int64(len(payload)))
if err != nil {
t.Fatalf("upload: %v", err)
}
if id != "retry-file-02" {
t.Fatalf("file id = %q, want retry-file-02 from the second session", id)
}
if sessionRequests != 2 {
t.Fatalf("session requests = %d, want 2", sessionRequests)
}
if uploadAttempts != 2 {
t.Fatalf("upload attempts = %d, want 2", uploadAttempts)
}
if !bytes.Equal(uploaded, payload) {
t.Fatalf("uploaded body = %q, want %q", string(uploaded), string(payload))
}
}
func TestPikPakOSSClientUsesCNAMEForPikPakUploadEndpoint(t *testing.T) {
params := &s3Params{
AccessKeyID: "ak",
AccessKeySecret: "sk",
Bucket: "vip-lixian-07",
Endpoint: "http://upload-a10b.mypikpak.com",
Key: "upload_tmp/object-key",
}
client, err := newPikPakOSSClient(params)
if err != nil {
t.Fatalf("new oss client: %v", err)
}
bucket, err := client.Bucket(params.Bucket)
if err != nil {
t.Fatalf("bucket: %v", err)
}
signed, err := bucket.SignURL(params.Key, oss.HTTPPut, 60)
if err != nil {
t.Fatalf("sign url: %v", err)
}
if strings.Contains(signed, "vip-lixian-07.upload-a10b.mypikpak.com") {
t.Fatalf("signed url uses invalid bucket-prefixed PikPak host: %s", signed)
}
if !strings.Contains(signed, "http://upload-a10b.mypikpak.com/upload_tmp%2Fobject-key") {
t.Fatalf("signed url = %s, want PikPak endpoint host with object key path", signed)
}
}
func TestUploadRejectsInvalidArguments(t *testing.T) {
d := New(Config{ID: "x", Username: "u", Password: "p", Platform: "web"})
cases := []struct {
+68 -5
View File
@@ -16,6 +16,7 @@ package spider91migrate
import (
"context"
"database/sql"
"errors"
"fmt"
"io"
@@ -701,10 +702,36 @@ func (m *Migrator) migrateDrive(ctx context.Context, plan migrationPlan) (int, e
continue
}
if plan.requireAssetsReady && !crawlerVideoAssetsReady(v) {
if targetDuplicate, err := m.cfg.Catalog.FindEquivalentVideoOnDrive(ctx, v, plan.targetDriveID); err != nil {
if !errors.Is(err, sql.ErrNoRows) {
log.Printf("[spider91migrate] %s find target duplicate: %v", v.ID, err)
}
} else if targetDuplicate != nil {
ok, err := m.bindToExistingTarget(ctx, v, targetDuplicate, plan)
if err != nil {
log.Printf("[spider91migrate] %s: %v", v.ID, err)
continue
}
if ok {
migrated++
if m.cfg.OnMigrated != nil {
m.cfg.OnMigrated(v.ID)
}
}
continue
}
if plan.requireAssetsReady {
ready, err := m.crawlerVideoAssetsReady(ctx, v)
if err != nil {
log.Printf("[spider91migrate] %s check generated assets: %v", v.ID, err)
continue
}
if !ready {
continue
}
}
ok, err := m.migrateOne(ctx, v, plan)
if err != nil {
log.Printf("[spider91migrate] %s: %v", v.ID, err)
@@ -748,12 +775,18 @@ func (m *Migrator) findVideoForLocalFile(ctx context.Context, plan migrationPlan
return nil
}
func crawlerVideoAssetsReady(v *catalog.Video) bool {
func (m *Migrator) crawlerVideoAssetsReady(ctx context.Context, v *catalog.Video) (bool, error) {
if v == nil {
return false
return false, nil
}
return strings.EqualFold(strings.TrimSpace(v.PreviewStatus), "ready") &&
strings.EqualFold(strings.TrimSpace(v.FingerprintStatus), "ready")
fingerprintReady := strings.EqualFold(strings.TrimSpace(v.FingerprintStatus), "ready") || strings.TrimSpace(v.SampledSHA256) != ""
if !fingerprintReady {
return false, nil
}
if strings.EqualFold(strings.TrimSpace(v.PreviewStatus), "ready") {
return true, nil
}
return m.cfg.Catalog.HasReadyEquivalentPreview(ctx, v)
}
// migrateOne 把单条本地爬虫视频上传到目标盘并改写 catalog。
@@ -815,6 +848,36 @@ func (m *Migrator) migrateOne(ctx context.Context, v *catalog.Video, plan migrat
return true, nil
}
func (m *Migrator) bindToExistingTarget(ctx context.Context, v, target *catalog.Video, plan migrationPlan) (bool, error) {
if v == nil || target == nil || plan.source == nil {
return false, nil
}
if plan.targetDriveID == "" || target.FileID == "" {
return false, nil
}
if err := m.cfg.Catalog.MigrateVideoToDrive(ctx, v.ID, plan.targetDriveID, target.FileID, firstNonEmpty(target.ContentHash, v.ContentHash)); err != nil {
return false, fmt.Errorf("catalog bind existing target: %w", err)
}
if target.FileName != "" {
if err := m.cfg.Catalog.UpdateVideoMeta(ctx, v.ID, catalog.VideoMetaPatch{FileName: target.FileName}); err != nil {
log.Printf("[spider91migrate] %s update file_name after duplicate bind: %v", v.ID, err)
}
}
m.preserveCrawledThumbnail(ctx, plan.source, v)
CleanupSpider91Local(plan.source, v.FileID)
log.Printf("[spider91migrate] %s bound to existing drive=%s(kind=%s) file=%s duplicate=%s", v.ID, plan.targetDriveID, plan.target.Kind(), target.FileID, target.ID)
return true, nil
}
func firstNonEmpty(values ...string) string {
for _, value := range values {
if strings.TrimSpace(value) != "" {
return value
}
}
return ""
}
func sourceIDForUploadName(v *catalog.Video, plan migrationPlan) string {
if v == nil {
return ""
@@ -586,6 +586,82 @@ func TestRunOnceSkipsScriptCrawlerVideoUntilPreviewAndFingerprintReady(t *testin
}
}
func TestRunOnceBindsScriptCrawlerDuplicateToExistingTargetWithoutUpload(t *testing.T) {
cat := setupCatalog(t)
src := setupScriptCrawler(t, "crawler-duplicate")
pp := newFakePikPak("pikpak-target", "pikpak-root-id")
seedScriptCrawlerDrive(t, cat, src, pp.ID())
reg := newFakeRegistry()
reg.Add(src)
reg.Add(pp)
content := []byte("duplicate script video bytes")
id := writeScriptCrawlerVideo(t, cat, src, "duplicate-source", ".mp4", content, false)
sampled := "same-sampled-fingerprint"
if err := cat.UpdateVideoFingerprint(context.Background(), id, sampled, "ready", ""); err != nil {
t.Fatalf("mark source fingerprint ready: %v", err)
}
now := time.Now()
target := &catalog.Video{
ID: "pikpak-existing-duplicate",
DriveID: pp.ID(),
FileID: "existing-target-file",
FileName: "existing-target-name.mp4",
ContentHash: "existing-content-hash",
Title: "Existing duplicate",
Ext: "mp4",
Size: int64(len(content)),
PreviewStatus: "ready",
PublishedAt: now.Add(-time.Hour),
CreatedAt: now.Add(-time.Hour),
UpdatedAt: now.Add(-time.Hour),
}
if err := cat.UpsertVideo(context.Background(), target); err != nil {
t.Fatalf("upsert existing target: %v", err)
}
if err := cat.UpdateVideoFingerprint(context.Background(), target.ID, sampled, "ready", ""); err != nil {
t.Fatalf("mark target fingerprint ready: %v", err)
}
commonThumbDir := t.TempDir()
m := New(Config{Catalog: cat, Registry: reg, CommonThumbDir: commonThumbDir})
m.runOnce(context.Background())
if pp.uploadCalls != 0 {
t.Fatalf("upload calls = %d, want 0 when equivalent target file already exists", pp.uploadCalls)
}
got, err := cat.GetVideo(context.Background(), id)
if err != nil {
t.Fatalf("get bound video: %v", err)
}
if got.DriveID != pp.ID() {
t.Fatalf("drive_id = %q, want %q", got.DriveID, pp.ID())
}
if got.FileID != target.FileID {
t.Fatalf("file_id = %q, want existing target file %q", got.FileID, target.FileID)
}
if got.FileName != target.FileName {
t.Fatalf("file_name = %q, want existing target name %q", got.FileName, target.FileName)
}
if got.ContentHash != target.ContentHash {
t.Fatalf("content_hash = %q, want %q", got.ContentHash, target.ContentHash)
}
videoPath, _ := src.VideoPath("duplicate-source.mp4")
if _, err := os.Stat(videoPath); !os.IsNotExist(err) {
t.Fatalf("local duplicate video still exists or stat error %v", err)
}
thumbPath, _ := src.ThumbPath("duplicate-source.jpg")
if _, err := os.Stat(thumbPath); !os.IsNotExist(err) {
t.Fatalf("local duplicate thumb still exists or stat error %v", err)
}
commonThumbPath := filepath.Join(commonThumbDir, id+".jpg")
if data, err := os.ReadFile(commonThumbPath); err != nil || string(data) != "thumb" {
t.Fatalf("common thumb = %q, %v; want copied crawled thumb", string(data), err)
}
}
func TestRunOnceSkipsWhenLocalFileMissing(t *testing.T) {
cat := setupCatalog(t)
src, _ := setupSpider91(t)