From f0187df4a1c4c94125dc6eb7d294c956ea84404a Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Thu, 7 May 2020 19:42:04 +0200 Subject: [PATCH] Purge command to remove missing files from the index #234 Signed-off-by: Michael Mayer --- cmd/photoprism/photoprism.go | 1 + internal/commands/purge.go | 78 +++++++++++++ internal/entity/file.go | 9 ++ internal/entity/photo.go | 21 ++++ internal/photoprism/index.go | 10 +- internal/photoprism/purge.go | 160 +++++++++++++++++++++++++++ internal/photoprism/purge_options.go | 6 + internal/query/file.go | 9 +- internal/query/photo.go | 10 ++ internal/service/purge.go | 19 ++++ internal/service/service.go | 1 + internal/service/service_test.go | 4 + 12 files changed, 317 insertions(+), 11 deletions(-) create mode 100644 internal/commands/purge.go create mode 100644 internal/photoprism/purge.go create mode 100644 internal/photoprism/purge_options.go create mode 100644 internal/service/purge.go diff --git a/cmd/photoprism/photoprism.go b/cmd/photoprism/photoprism.go index 9d35bbe9b..61616ccf4 100644 --- a/cmd/photoprism/photoprism.go +++ b/cmd/photoprism/photoprism.go @@ -26,6 +26,7 @@ func main() { commands.StopCommand, commands.IndexCommand, commands.ImportCommand, + commands.PurgeCommand, commands.CopyCommand, commands.ConvertCommand, commands.ResampleCommand, diff --git a/internal/commands/purge.go b/internal/commands/purge.go new file mode 100644 index 000000000..709cd0250 --- /dev/null +++ b/internal/commands/purge.go @@ -0,0 +1,78 @@ +package commands + +import ( + "context" + "path/filepath" + "strings" + "time" + + "github.com/photoprism/photoprism/internal/config" + "github.com/photoprism/photoprism/internal/photoprism" + "github.com/photoprism/photoprism/internal/service" + "github.com/photoprism/photoprism/pkg/fs" + "github.com/photoprism/photoprism/pkg/txt" + "github.com/urfave/cli" +) + +// PurgeCommand is used to register the index cli command +var PurgeCommand = cli.Command{ + Name: "purge", + Usage: "Removes missing files from search results", + Flags: purgeFlags, + Action: purgeAction, +} + +var purgeFlags = []cli.Flag{ + cli.BoolFlag{ + Name: "hard", + Usage: "delete all data and permanently remove from index", + }, +} + +// purgeAction removes missing files from search results +func purgeAction(ctx *cli.Context) error { + start := time.Now() + + conf := config.NewConfig(ctx) + service.SetConfig(conf) + + cctx, cancel := context.WithCancel(context.Background()) + defer cancel() + if err := conf.Init(cctx); err != nil { + return err + } + + conf.InitDb() + + // get cli first argument + subPath := strings.TrimSpace(ctx.Args().First()) + + if subPath == "" { + log.Infof("purging missing files in %s", txt.Quote(filepath.Base(conf.OriginalsPath()))) + } else { + log.Infof("purging missing files in %s", txt.Quote(fs.RelativeName(filepath.Join(conf.OriginalsPath(), subPath), filepath.Dir(conf.OriginalsPath())))) + } + + if conf.ReadOnly() { + log.Infof("read-only mode enabled") + } + + prg := service.Purge() + + opt := photoprism.PurgeOptions{ + Path: subPath, + Hard: ctx.Bool("hard"), + } + + if files, photos, err := prg.Start(opt); err != nil { + return err + } else { + elapsed := time.Since(start) + + log.Infof("purged %d files and %d photos in %s", len(files), len(photos), elapsed) + } + + conf.Shutdown() + + return nil +} diff --git a/internal/entity/file.go b/internal/entity/file.go index 572b0c469..a1f912785 100644 --- a/internal/entity/file.go +++ b/internal/entity/file.go @@ -103,3 +103,12 @@ func (m File) Changed(fileSize int64, fileModified time.Time) bool { return true } + +// Purge removes a file from the index by marking it as missing. +func (m *File) Purge() error { + if err := Db().Unscoped().Model(m).Update("file_missing", true).Error; err != nil { + return err + } + + return nil +} diff --git a/internal/entity/photo.go b/internal/entity/photo.go index 9bb9b2c56..a0af033cc 100644 --- a/internal/entity/photo.go +++ b/internal/entity/photo.go @@ -482,3 +482,24 @@ func (m *Photo) SetCoordinates(lat, lng float32, altitude int, source string) { m.PhotoAltitude = altitude m.LocationSrc = source } + +// Delete deletes the entity from the database. +func (m *Photo) Delete(permanently bool) error { + if permanently { + return m.DeletePermanently() + } + + Db().Delete(File{}, "photo_id = ?", m.ID) + + return Db().Delete(m).Error +} + +// Delete permanently deletes the entity from the database. +func (m *Photo) DeletePermanently() error { + Db().Unscoped().Delete(File{}, "photo_id = ?", m.ID) + Db().Unscoped().Delete(PhotoKeyword{}, "photo_id = ?", m.ID) + Db().Unscoped().Delete(PhotoLabel{}, "photo_id = ?", m.ID) + Db().Unscoped().Delete(PhotoAlbum{}, "photo_uuid = ?", m.PhotoUUID) + + return Db().Unscoped().Delete(m).Error +} diff --git a/internal/photoprism/index.go b/internal/photoprism/index.go index 3bd6a702b..aa0bad631 100644 --- a/internal/photoprism/index.go +++ b/internal/photoprism/index.go @@ -16,6 +16,7 @@ import ( "github.com/photoprism/photoprism/internal/nsfw" "github.com/photoprism/photoprism/internal/query" "github.com/photoprism/photoprism/pkg/fs" + "github.com/photoprism/photoprism/pkg/txt" ) // Index represents an indexer that indexes files in the originals directory. @@ -59,9 +60,10 @@ func (ind *Index) Cancel() { func (ind *Index) Start(opt IndexOptions) map[string]bool { done := make(map[string]bool) originalsPath := ind.originalsPath() + optionsPath := filepath.Join(originalsPath, opt.Path) - if !fs.PathExists(originalsPath) { - event.Error(fmt.Sprintf("index: %s does not exist", originalsPath)) + if !fs.PathExists(optionsPath) { + event.Error(fmt.Sprintf("index: %s does not exist", txt.Quote(optionsPath))) return done } @@ -101,9 +103,7 @@ func (ind *Index) Start(opt IndexOptions) map[string]bool { log.Infof(`index: ignored "%s"`, fs.RelativeName(fileName, originalsPath)) } - indexPath := filepath.Join(originalsPath, opt.Path) - - err := godirwalk.Walk(indexPath, &godirwalk.Options{ + err := godirwalk.Walk(optionsPath, &godirwalk.Options{ Callback: func(fileName string, info *godirwalk.Dirent) error { defer func() { if err := recover(); err != nil { diff --git a/internal/photoprism/purge.go b/internal/photoprism/purge.go new file mode 100644 index 000000000..f2be8f4e3 --- /dev/null +++ b/internal/photoprism/purge.go @@ -0,0 +1,160 @@ +package photoprism + +import ( + "errors" + "fmt" + "path" + "path/filepath" + "runtime" + + "github.com/photoprism/photoprism/internal/config" + "github.com/photoprism/photoprism/internal/event" + "github.com/photoprism/photoprism/internal/mutex" + "github.com/photoprism/photoprism/internal/query" + "github.com/photoprism/photoprism/pkg/fs" + "github.com/photoprism/photoprism/pkg/txt" +) + +// Purge represents a worker that removes missing files from search results. +type Purge struct { + conf *config.Config +} + +// NewPurge returns a new purge worker. +func NewPurge(conf *config.Config) *Purge { + instance := &Purge{ + conf: conf, + } + + return instance +} + +// originalsPath returns the original media files path as string. +func (prg *Purge) originalsPath() string { + return prg.conf.OriginalsPath() +} + +// Start removes missing files from search results. +func (prg *Purge) Start(opt PurgeOptions) (purgedFiles map[string]bool, purgedPhotos map[string]bool, err error) { + defer func() { + if err := recover(); err != nil { + log.Errorf("purge: %s [panic]", err) + } + }() + + purgedFiles = make(map[string]bool) + purgedPhotos = make(map[string]bool) + originalsPath := prg.originalsPath() + optionsPath := filepath.Join(originalsPath, opt.Path) + + if !fs.PathExists(optionsPath) { + err = fmt.Errorf("purge: %s does not exist", txt.Quote(optionsPath)) + event.Error(err.Error()) + return purgedFiles, purgedPhotos, err + } + + if err := mutex.Worker.Start(); err != nil { + err = fmt.Errorf("purge: %s", err.Error()) + event.Error(err.Error()) + return purgedFiles, purgedPhotos, err + } + + defer func() { + mutex.Worker.Stop() + runtime.GC() + }() + + q := query.New(prg.conf.Db()) + + limit := 250 + offset := 0 + + for { + files, err := q.Files(limit, offset) + + if err != nil { + return purgedFiles, purgedPhotos, err + } + + if len(files) == 0 { + break + } + + for _, file := range files { + if mutex.Worker.Canceled() { + return purgedFiles, purgedPhotos, errors.New("purge canceled") + } + + fileName := path.Join(prg.conf.OriginalsPath(), file.FileName) + + if !fs.FileExists(fileName) && !purgedFiles[fileName]{ + if file.FileMissing { + continue + } + + if err := file.Purge(); err != nil { + log.Errorf("purge: %s", err) + } else { + purgedFiles[fileName] = true + log.Infof("purge: removed %s", txt.Quote(fs.RelativeName(fileName, originalsPath))) + } + } + } + + if mutex.Worker.Canceled() { + return purgedFiles, purgedPhotos, errors.New("purge canceled") + } + + offset += limit + } + + limit = 250 + offset = 0 + + for { + photos, err := q.MissingPhotos(limit, offset) + + if err != nil { + return purgedFiles, purgedPhotos, err + } + + if len(photos) == 0 { + break + } + + for _, photo := range photos { + if mutex.Worker.Canceled() { + return purgedFiles, purgedPhotos, errors.New("purge canceled") + } + + if purgedPhotos[photo.PhotoUUID] { + continue + } + + if err := photo.Delete(opt.Hard); err != nil { + log.Errorf("purge: %s", err) + } else { + purgedPhotos[photo.PhotoUUID] = true + + if opt.Hard { + log.Infof("purge: permanently deleted photo %s", txt.Quote(photo.PhotoTitle)) + } else { + log.Infof("purge: archived photo %s", txt.Quote(photo.PhotoTitle)) + } + } + } + + if mutex.Worker.Canceled() { + return purgedFiles, purgedPhotos, errors.New("purge canceled") + } + + offset += limit + } + + return purgedFiles, purgedPhotos, err +} + +// Cancel stops the current purge operation. +func (prg *Purge) Cancel() { + mutex.Worker.Cancel() +} diff --git a/internal/photoprism/purge_options.go b/internal/photoprism/purge_options.go new file mode 100644 index 000000000..5034d484e --- /dev/null +++ b/internal/photoprism/purge_options.go @@ -0,0 +1,6 @@ +package photoprism + +type PurgeOptions struct { + Path string + Hard bool +} diff --git a/internal/query/file.go b/internal/query/file.go index b346e005b..d425c9187 100644 --- a/internal/query/file.go +++ b/internal/query/file.go @@ -4,14 +4,11 @@ import ( "github.com/photoprism/photoprism/internal/entity" ) -// Files finds files returning maximum results defined by limit -// and finding them from an offest defined by offset. +// Files returns file entities in the range of limit and offset sorted by id. func (q *Query) Files(limit int, offset int) (files []entity.File, err error) { - if err := q.db.Where(&entity.File{}).Limit(limit).Offset(offset).Find(&files).Error; err != nil { - return files, err - } + err = q.db.Unscoped().Where(&entity.File{}).Order("id").Limit(limit).Offset(offset).Find(&files).Error - return files, nil + return files, err } // FilesByUUID diff --git a/internal/query/photo.go b/internal/query/photo.go index 594312cee..4ca750162 100644 --- a/internal/query/photo.go +++ b/internal/query/photo.go @@ -456,3 +456,13 @@ func (q *Query) PreloadPhotoByUUID(photoUUID string) (photo entity.Photo, err er return photo, nil } + +// MissingPhotos returns photo entities without existing files. +func (q *Query) MissingPhotos(limit int, offset int) (entities []entity.Photo, err error){ + err = q.db. + Joins("LEFT JOIN files ON photos.id = files.id AND files.file_missing = 0"). + Where("files.id IS NULL"). + Order("photos.id").Limit(limit).Offset(offset).Find(&entities).Error + + return entities, err +} diff --git a/internal/service/purge.go b/internal/service/purge.go new file mode 100644 index 000000000..0f1e23c11 --- /dev/null +++ b/internal/service/purge.go @@ -0,0 +1,19 @@ +package service + +import ( + "sync" + + "github.com/photoprism/photoprism/internal/photoprism" +) + +var oncePurge sync.Once + +func initPurge() { + services.Purge = photoprism.NewPurge(Config()) +} + +func Purge() *photoprism.Purge { + oncePurge.Do(initPurge) + + return services.Purge +} diff --git a/internal/service/service.go b/internal/service/service.go index edb9c3333..2c671ee8c 100644 --- a/internal/service/service.go +++ b/internal/service/service.go @@ -16,6 +16,7 @@ var services struct { Convert *photoprism.Convert Import *photoprism.Import Index *photoprism.Index + Purge *photoprism.Purge Nsfw *nsfw.Detector Query *query.Query Resample *photoprism.Resample diff --git a/internal/service/service_test.go b/internal/service/service_test.go index 1bbc570c3..fad66e9cd 100644 --- a/internal/service/service_test.go +++ b/internal/service/service_test.go @@ -44,6 +44,10 @@ func TestIndex(t *testing.T) { assert.IsType(t, &photoprism.Index{}, Index()) } +func TestPurge(t *testing.T) { + assert.IsType(t, &photoprism.Purge{}, Purge()) +} + func TestNsfwDetector(t *testing.T) { assert.IsType(t, &nsfw.Detector{}, NsfwDetector()) }