Purge command to remove missing files from the index #234

Signed-off-by: Michael Mayer <michael@liquidbytes.net>
This commit is contained in:
Michael Mayer 2020-05-07 19:42:04 +02:00
parent 5c0d12c4d9
commit f0187df4a1
12 changed files with 317 additions and 11 deletions

View file

@ -26,6 +26,7 @@ func main() {
commands.StopCommand,
commands.IndexCommand,
commands.ImportCommand,
commands.PurgeCommand,
commands.CopyCommand,
commands.ConvertCommand,
commands.ResampleCommand,

View file

@ -0,0 +1,78 @@
package commands
import (
"context"
"path/filepath"
"strings"
"time"
"github.com/photoprism/photoprism/internal/config"
"github.com/photoprism/photoprism/internal/photoprism"
"github.com/photoprism/photoprism/internal/service"
"github.com/photoprism/photoprism/pkg/fs"
"github.com/photoprism/photoprism/pkg/txt"
"github.com/urfave/cli"
)
// PurgeCommand is used to register the index cli command
var PurgeCommand = cli.Command{
Name: "purge",
Usage: "Removes missing files from search results",
Flags: purgeFlags,
Action: purgeAction,
}
var purgeFlags = []cli.Flag{
cli.BoolFlag{
Name: "hard",
Usage: "delete all data and permanently remove from index",
},
}
// purgeAction removes missing files from search results
func purgeAction(ctx *cli.Context) error {
start := time.Now()
conf := config.NewConfig(ctx)
service.SetConfig(conf)
cctx, cancel := context.WithCancel(context.Background())
defer cancel()
if err := conf.Init(cctx); err != nil {
return err
}
conf.InitDb()
// get cli first argument
subPath := strings.TrimSpace(ctx.Args().First())
if subPath == "" {
log.Infof("purging missing files in %s", txt.Quote(filepath.Base(conf.OriginalsPath())))
} else {
log.Infof("purging missing files in %s", txt.Quote(fs.RelativeName(filepath.Join(conf.OriginalsPath(), subPath), filepath.Dir(conf.OriginalsPath()))))
}
if conf.ReadOnly() {
log.Infof("read-only mode enabled")
}
prg := service.Purge()
opt := photoprism.PurgeOptions{
Path: subPath,
Hard: ctx.Bool("hard"),
}
if files, photos, err := prg.Start(opt); err != nil {
return err
} else {
elapsed := time.Since(start)
log.Infof("purged %d files and %d photos in %s", len(files), len(photos), elapsed)
}
conf.Shutdown()
return nil
}

View file

@ -103,3 +103,12 @@ func (m File) Changed(fileSize int64, fileModified time.Time) bool {
return true
}
// Purge removes a file from the index by marking it as missing.
func (m *File) Purge() error {
if err := Db().Unscoped().Model(m).Update("file_missing", true).Error; err != nil {
return err
}
return nil
}

View file

@ -482,3 +482,24 @@ func (m *Photo) SetCoordinates(lat, lng float32, altitude int, source string) {
m.PhotoAltitude = altitude
m.LocationSrc = source
}
// Delete deletes the entity from the database.
func (m *Photo) Delete(permanently bool) error {
if permanently {
return m.DeletePermanently()
}
Db().Delete(File{}, "photo_id = ?", m.ID)
return Db().Delete(m).Error
}
// Delete permanently deletes the entity from the database.
func (m *Photo) DeletePermanently() error {
Db().Unscoped().Delete(File{}, "photo_id = ?", m.ID)
Db().Unscoped().Delete(PhotoKeyword{}, "photo_id = ?", m.ID)
Db().Unscoped().Delete(PhotoLabel{}, "photo_id = ?", m.ID)
Db().Unscoped().Delete(PhotoAlbum{}, "photo_uuid = ?", m.PhotoUUID)
return Db().Unscoped().Delete(m).Error
}

View file

@ -16,6 +16,7 @@ import (
"github.com/photoprism/photoprism/internal/nsfw"
"github.com/photoprism/photoprism/internal/query"
"github.com/photoprism/photoprism/pkg/fs"
"github.com/photoprism/photoprism/pkg/txt"
)
// Index represents an indexer that indexes files in the originals directory.
@ -59,9 +60,10 @@ func (ind *Index) Cancel() {
func (ind *Index) Start(opt IndexOptions) map[string]bool {
done := make(map[string]bool)
originalsPath := ind.originalsPath()
optionsPath := filepath.Join(originalsPath, opt.Path)
if !fs.PathExists(originalsPath) {
event.Error(fmt.Sprintf("index: %s does not exist", originalsPath))
if !fs.PathExists(optionsPath) {
event.Error(fmt.Sprintf("index: %s does not exist", txt.Quote(optionsPath)))
return done
}
@ -101,9 +103,7 @@ func (ind *Index) Start(opt IndexOptions) map[string]bool {
log.Infof(`index: ignored "%s"`, fs.RelativeName(fileName, originalsPath))
}
indexPath := filepath.Join(originalsPath, opt.Path)
err := godirwalk.Walk(indexPath, &godirwalk.Options{
err := godirwalk.Walk(optionsPath, &godirwalk.Options{
Callback: func(fileName string, info *godirwalk.Dirent) error {
defer func() {
if err := recover(); err != nil {

View file

@ -0,0 +1,160 @@
package photoprism
import (
"errors"
"fmt"
"path"
"path/filepath"
"runtime"
"github.com/photoprism/photoprism/internal/config"
"github.com/photoprism/photoprism/internal/event"
"github.com/photoprism/photoprism/internal/mutex"
"github.com/photoprism/photoprism/internal/query"
"github.com/photoprism/photoprism/pkg/fs"
"github.com/photoprism/photoprism/pkg/txt"
)
// Purge represents a worker that removes missing files from search results.
type Purge struct {
conf *config.Config
}
// NewPurge returns a new purge worker.
func NewPurge(conf *config.Config) *Purge {
instance := &Purge{
conf: conf,
}
return instance
}
// originalsPath returns the original media files path as string.
func (prg *Purge) originalsPath() string {
return prg.conf.OriginalsPath()
}
// Start removes missing files from search results.
func (prg *Purge) Start(opt PurgeOptions) (purgedFiles map[string]bool, purgedPhotos map[string]bool, err error) {
defer func() {
if err := recover(); err != nil {
log.Errorf("purge: %s [panic]", err)
}
}()
purgedFiles = make(map[string]bool)
purgedPhotos = make(map[string]bool)
originalsPath := prg.originalsPath()
optionsPath := filepath.Join(originalsPath, opt.Path)
if !fs.PathExists(optionsPath) {
err = fmt.Errorf("purge: %s does not exist", txt.Quote(optionsPath))
event.Error(err.Error())
return purgedFiles, purgedPhotos, err
}
if err := mutex.Worker.Start(); err != nil {
err = fmt.Errorf("purge: %s", err.Error())
event.Error(err.Error())
return purgedFiles, purgedPhotos, err
}
defer func() {
mutex.Worker.Stop()
runtime.GC()
}()
q := query.New(prg.conf.Db())
limit := 250
offset := 0
for {
files, err := q.Files(limit, offset)
if err != nil {
return purgedFiles, purgedPhotos, err
}
if len(files) == 0 {
break
}
for _, file := range files {
if mutex.Worker.Canceled() {
return purgedFiles, purgedPhotos, errors.New("purge canceled")
}
fileName := path.Join(prg.conf.OriginalsPath(), file.FileName)
if !fs.FileExists(fileName) && !purgedFiles[fileName]{
if file.FileMissing {
continue
}
if err := file.Purge(); err != nil {
log.Errorf("purge: %s", err)
} else {
purgedFiles[fileName] = true
log.Infof("purge: removed %s", txt.Quote(fs.RelativeName(fileName, originalsPath)))
}
}
}
if mutex.Worker.Canceled() {
return purgedFiles, purgedPhotos, errors.New("purge canceled")
}
offset += limit
}
limit = 250
offset = 0
for {
photos, err := q.MissingPhotos(limit, offset)
if err != nil {
return purgedFiles, purgedPhotos, err
}
if len(photos) == 0 {
break
}
for _, photo := range photos {
if mutex.Worker.Canceled() {
return purgedFiles, purgedPhotos, errors.New("purge canceled")
}
if purgedPhotos[photo.PhotoUUID] {
continue
}
if err := photo.Delete(opt.Hard); err != nil {
log.Errorf("purge: %s", err)
} else {
purgedPhotos[photo.PhotoUUID] = true
if opt.Hard {
log.Infof("purge: permanently deleted photo %s", txt.Quote(photo.PhotoTitle))
} else {
log.Infof("purge: archived photo %s", txt.Quote(photo.PhotoTitle))
}
}
}
if mutex.Worker.Canceled() {
return purgedFiles, purgedPhotos, errors.New("purge canceled")
}
offset += limit
}
return purgedFiles, purgedPhotos, err
}
// Cancel stops the current purge operation.
func (prg *Purge) Cancel() {
mutex.Worker.Cancel()
}

View file

@ -0,0 +1,6 @@
package photoprism
type PurgeOptions struct {
Path string
Hard bool
}

View file

@ -4,14 +4,11 @@ import (
"github.com/photoprism/photoprism/internal/entity"
)
// Files finds files returning maximum results defined by limit
// and finding them from an offest defined by offset.
// Files returns file entities in the range of limit and offset sorted by id.
func (q *Query) Files(limit int, offset int) (files []entity.File, err error) {
if err := q.db.Where(&entity.File{}).Limit(limit).Offset(offset).Find(&files).Error; err != nil {
return files, err
}
err = q.db.Unscoped().Where(&entity.File{}).Order("id").Limit(limit).Offset(offset).Find(&files).Error
return files, nil
return files, err
}
// FilesByUUID

View file

@ -456,3 +456,13 @@ func (q *Query) PreloadPhotoByUUID(photoUUID string) (photo entity.Photo, err er
return photo, nil
}
// MissingPhotos returns photo entities without existing files.
func (q *Query) MissingPhotos(limit int, offset int) (entities []entity.Photo, err error){
err = q.db.
Joins("LEFT JOIN files ON photos.id = files.id AND files.file_missing = 0").
Where("files.id IS NULL").
Order("photos.id").Limit(limit).Offset(offset).Find(&entities).Error
return entities, err
}

19
internal/service/purge.go Normal file
View file

@ -0,0 +1,19 @@
package service
import (
"sync"
"github.com/photoprism/photoprism/internal/photoprism"
)
var oncePurge sync.Once
func initPurge() {
services.Purge = photoprism.NewPurge(Config())
}
func Purge() *photoprism.Purge {
oncePurge.Do(initPurge)
return services.Purge
}

View file

@ -16,6 +16,7 @@ var services struct {
Convert *photoprism.Convert
Import *photoprism.Import
Index *photoprism.Index
Purge *photoprism.Purge
Nsfw *nsfw.Detector
Query *query.Query
Resample *photoprism.Resample

View file

@ -44,6 +44,10 @@ func TestIndex(t *testing.T) {
assert.IsType(t, &photoprism.Index{}, Index())
}
func TestPurge(t *testing.T) {
assert.IsType(t, &photoprism.Purge{}, Purge())
}
func TestNsfwDetector(t *testing.T) {
assert.IsType(t, &nsfw.Detector{}, NsfwDetector())
}