From fd785faf68c664130cc3cca1a7f092de6307acdb Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Wed, 1 Sep 2021 12:48:17 +0200 Subject: [PATCH] People: Automatically resolve face cluster collisions #22 --- internal/entity/face.go | 41 ++++++++++++--- internal/entity/face_test.go | 16 +++--- internal/entity/marker.go | 55 ++++++++++++-------- internal/entity/marker_test.go | 73 +++++++++++++++++++++++---- internal/entity/subject.go | 28 +++++----- internal/photoprism/faces.go | 9 ++++ internal/photoprism/faces_audit.go | 20 +++++--- internal/photoprism/faces_optimize.go | 64 ++++++++++++----------- internal/query/faces.go | 50 ++++++++++++++++++ internal/query/faces_test.go | 11 ++++ internal/query/like.go | 14 ++--- internal/query/like_test.go | 29 +++++++---- internal/query/markers.go | 4 +- internal/query/query.go | 3 ++ internal/query/subjects.go | 21 +++++--- internal/query/subjects_test.go | 2 +- scripts/sql/init-test-databases.sql | 5 ++ 17 files changed, 321 insertions(+), 124 deletions(-) diff --git a/internal/entity/face.go b/internal/entity/face.go index e9fb9198e..71b228d6a 100644 --- a/internal/entity/face.go +++ b/internal/entity/face.go @@ -149,8 +149,8 @@ func (m *Face) Match(embeddings Embeddings) (match bool, dist float64) { return true, dist } -// ReportCollision reports a collision with a different subject's face. -func (m *Face) ReportCollision(embeddings Embeddings) (reported bool, err error) { +// ResolveCollision resolves a collision with a different subject's face. +func (m *Face) ResolveCollision(embeddings Embeddings) (resolved bool, err error) { if m.SubjectUID == "" { // Ignore reports for anonymous faces. return false, nil @@ -168,14 +168,14 @@ func (m *Face) ReportCollision(embeddings Embeddings) (reported bool, err error) } else if dist < 0 { // Should never happen. return false, fmt.Errorf("collision distance must be positive") - } else if dist > 0.2 { + } else if dist >= 0.02 { m.MatchedAt = nil m.Collisions++ - m.CollisionRadius = dist - 0.1 + m.CollisionRadius = dist - 0.01 revise = true } else { - // Don't set a radius yet if distance is very small. - m.Collisions++ + // Ignore if distance is very small as faces may belong to the same person. + log.Warnf("faces: ignoring %s collision at dist %f, same person?", m.ID, dist) } err = m.Updates(Values{"Collisions": m.Collisions, "CollisionRadius": m.CollisionRadius, "MatchedAt": m.MatchedAt}) @@ -194,17 +194,21 @@ func (m *Face) ReportCollision(embeddings Embeddings) (reported bool, err error) // ReviseMatches updates marker matches after face parameters have been changed. func (m *Face) ReviseMatches() (revised Markers, err error) { + if m.ID == "" { + return revised, fmt.Errorf("empty face id") + } + var matches Markers if err := Db().Where("face_id = ?", m.ID).Where("marker_type = ?", MarkerFace). Find(&matches).Error; err != nil { - log.Debugf("faces: %s (find matching markers)", err) + log.Debugf("faces: %s (revise matches)", err) return revised, err } else { for _, marker := range matches { if ok, _ := m.Match(marker.Embeddings()); !ok { if updated, err := marker.ClearFace(); err != nil { - log.Debugf("faces: %s (revise match)", err) + log.Debugf("faces: %s (revise matches)", err) return revised, err } else if updated { revised = append(revised, marker) @@ -240,6 +244,27 @@ func (m *Face) MatchMarkers(faceIds []string) error { return nil } +// SetSubjectUID updates the face's subject uid and related markers. +func (m *Face) SetSubjectUID(uid string) (err error) { + // Update face. + if err = m.Update("SubjectUID", uid); err != nil { + return err + } else { + m.SubjectUID = uid + } + + // Update related markers. + if err = Db().Model(&Marker{}). + Where("face_id = ?", m.ID). + Where("subject_src = ?", SrcAuto). + Where("subject_uid <> ?", m.SubjectUID). + Updates(Values{"SubjectUID": m.SubjectUID}).Error; err != nil { + return err + } + + return nil +} + // Save updates the existing or inserts a new face. func (m *Face) Save() error { faceMutex.Lock() diff --git a/internal/entity/face_test.go b/internal/entity/face_test.go index dbba5acb7..cf35bca54 100644 --- a/internal/entity/face_test.go +++ b/internal/entity/face_test.go @@ -52,14 +52,14 @@ func TestFace_Match(t *testing.T) { }) } -func TestFace_ReportCollision(t *testing.T) { +func TestFace_ResolveCollision(t *testing.T) { t.Run("collision", func(t *testing.T) { m := FaceFixtures.Get("joe-biden") assert.Zero(t, m.Collisions) assert.Zero(t, m.CollisionRadius) - if reported, err := m.ReportCollision(MarkerFixtures.Pointer("1000003-4").Embeddings()); err != nil { + if reported, err := m.ResolveCollision(MarkerFixtures.Pointer("1000003-4").Embeddings()); err != nil { t.Fatal(err) } else { assert.True(t, reported) @@ -72,14 +72,14 @@ func TestFace_ReportCollision(t *testing.T) { assert.Greater(t, m.CollisionRadius, 1.2) assert.Less(t, m.CollisionRadius, 1.314) - if reported, err := m.ReportCollision(MarkerFixtures.Pointer("1000003-6").Embeddings()); err != nil { + if reported, err := m.ResolveCollision(MarkerFixtures.Pointer("1000003-6").Embeddings()); err != nil { t.Fatal(err) } else { - assert.False(t, reported) + assert.True(t, reported) } // Number of collisions must not have increased. - assert.Equal(t, 1, m.Collisions) + assert.Equal(t, 2, m.Collisions) // Actual distance is ~1.272604 assert.Greater(t, m.CollisionRadius, 1.1) @@ -87,7 +87,7 @@ func TestFace_ReportCollision(t *testing.T) { }) t.Run("subject id empty", func(t *testing.T) { m := NewFace("", SrcAuto, Embeddings{}) - if reported, err := m.ReportCollision(MarkerFixtures.Pointer("1000003-4").Embeddings()); err != nil { + if reported, err := m.ResolveCollision(MarkerFixtures.Pointer("1000003-4").Embeddings()); err != nil { t.Fatal(err) } else { assert.False(t, reported) @@ -96,7 +96,7 @@ func TestFace_ReportCollision(t *testing.T) { t.Run("invalid face id", func(t *testing.T) { m := NewFace("123", SrcAuto, Embeddings{}) m.ID = "" - if reported, err := m.ReportCollision(MarkerFixtures.Pointer("1000003-4").Embeddings()); err == nil { + if reported, err := m.ResolveCollision(MarkerFixtures.Pointer("1000003-4").Embeddings()); err == nil { t.Fatal(err) } else { assert.False(t, reported) @@ -106,7 +106,7 @@ func TestFace_ReportCollision(t *testing.T) { t.Run("embedding empty", func(t *testing.T) { m := NewFace("123", SrcAuto, Embeddings{}) m.EmbeddingJSON = []byte("") - if reported, err := m.ReportCollision(MarkerFixtures.Pointer("1000003-4").Embeddings()); err == nil { + if reported, err := m.ResolveCollision(MarkerFixtures.Pointer("1000003-4").Embeddings()); err == nil { t.Fatal(err) } else { assert.False(t, reported) diff --git a/internal/entity/marker.go b/internal/entity/marker.go index 2c359b606..cd57adcf5 100644 --- a/internal/entity/marker.go +++ b/internal/entity/marker.go @@ -157,9 +157,9 @@ func (m *Marker) SetFace(f *Face, dist float64) (updated bool, err error) { } // Any reason we don't want to set a new face for this marker? - if m.SubjectSrc != SrcManual || f.SubjectUID == "" || m.SubjectUID == "" || f.SubjectUID == m.SubjectUID { + if m.SubjectSrc == SrcAuto || f.SubjectUID == "" || m.SubjectUID == "" || f.SubjectUID == m.SubjectUID { // Don't skip if subject wasn't set manually, or subjects match. - } else if reported, err := f.ReportCollision(m.Embeddings()); err != nil { + } else if reported, err := f.ResolveCollision(m.Embeddings()); err != nil { return false, err } else if reported { log.Infof("faces: marker %d (subject %s) collision with %s (subject %s), source %s", m.ID, m.SubjectUID, f.ID, f.SubjectUID, m.SubjectSrc) @@ -169,12 +169,15 @@ func (m *Marker) SetFace(f *Face, dist float64) (updated bool, err error) { } // Update face with known subject from marker? - if f.SubjectUID != "" || m.SubjectUID == "" { + if m.SubjectSrc == SrcAuto || m.SubjectUID == "" || f.SubjectUID != "" { // Don't update if face has a known subject, or marker subject is unknown. - } else if err := f.Update("SubjectUID", m.SubjectUID); err != nil { + } else if err = f.SetSubjectUID(m.SubjectUID); err != nil { return false, err } + // Set face. + m.Face = f + // Skip update if the same face is already set. if m.SubjectUID == f.SubjectUID && m.FaceID == f.ID { // Update matching timestamp. @@ -209,14 +212,14 @@ func (m *Marker) SetFace(f *Face, dist float64) (updated bool, err error) { m.SubjectUID = f.SubjectUID } - if err := m.SyncSubject(false); err != nil { + if err = m.SyncSubject(false); err != nil { return false, err } // Update face subject? - if m.SubjectUID == "" || f.SubjectUID != m.SubjectUID { + if m.SubjectSrc == SrcAuto || m.SubjectUID == "" || f.SubjectUID == m.SubjectUID { // Not needed. - } else if err := f.Update("SubjectUID", m.SubjectUID); err != nil { + } else if err = f.SetSubjectUID(m.SubjectUID); err != nil { return false, err } @@ -237,19 +240,19 @@ func (m *Marker) SyncSubject(updateRelated bool) error { subj := m.GetSubject() - if subj == nil { + if subj == nil || m.SubjectSrc == SrcAuto { return nil } // Update subject with marker name? - if m.MarkerName == "" || subj.SubjectName == m.MarkerName || (subj.SubjectName != "" && m.SubjectSrc != SrcManual) { + if m.MarkerName == "" || subj.SubjectName == m.MarkerName { // Do nothing. } else if err := subj.UpdateName(m.MarkerName); err != nil { return err } // Create known face for subject? - if m.FaceID != "" || m.SubjectSrc != SrcManual { + if m.FaceID != "" { // Do nothing. } else if f := m.GetFace(); f != nil { m.FaceID = f.ID @@ -310,21 +313,24 @@ func (m *Marker) Embeddings() Embeddings { // GetSubject returns a subject entity if possible. func (m *Marker) GetSubject() (subj *Subject) { if m.Subject != nil { - return m.Subject + if m.SubjectUID == m.Subject.SubjectUID { + return m.Subject + } } - if m.SubjectUID == "" && m.MarkerName != "" { - if subj = NewSubject(m.MarkerName, SubjectPerson, SrcMarker); subj == nil { + // Create subject? + if m.SubjectSrc != SrcAuto && m.MarkerName != "" && m.SubjectUID == "" { + if subj = NewSubject(m.MarkerName, SubjectPerson, m.SubjectSrc); subj == nil { return nil } else if subj = FirstOrCreateSubject(subj); subj == nil { log.Debugf("marker: invalid subject %s", txt.Quote(m.MarkerName)) return nil + } else { + m.Subject = subj + m.SubjectUID = subj.SubjectUID } - m.SubjectUID = subj.SubjectUID - m.SubjectSrc = SrcManual - - return subj + return m.Subject } m.Subject = FindSubject(m.SubjectUID) @@ -340,7 +346,7 @@ func (m *Marker) ClearSubject(src string) error { if m.Face == nil { // Do nothing - } else if reported, err := m.Face.ReportCollision(m.Embeddings()); err != nil { + } else if reported, err := m.Face.ResolveCollision(m.Embeddings()); err != nil { return err } else if err := m.Updates(Values{"MarkerName": "", "FaceID": "", "FaceDist": -1.0, "SubjectUID": "", "SubjectSrc": src}); err != nil { return err @@ -362,18 +368,20 @@ func (m *Marker) ClearSubject(src string) error { // GetFace returns a matching face entity if possible. func (m *Marker) GetFace() (f *Face) { if m.Face != nil { - return m.Face + if m.FaceID == m.Face.ID { + return m.Face + } } // Add face if size - if m.FaceID == "" && m.SubjectSrc == SrcManual { + if m.SubjectSrc != SrcAuto && m.FaceID == "" { if m.Size < face.ClusterMinSize || m.Score < face.ClusterMinScore { log.Debugf("faces: skipped adding face for low-quality marker %d, size %d, score %d", m.ID, m.Size, m.Score) return nil } else if emb := m.Embeddings(); len(emb) == 0 { log.Warnf("marker: id %d has no embeddings", m.ID) return nil - } else if f = NewFace(m.SubjectUID, SrcManual, emb); f == nil { + } else if f = NewFace(m.SubjectUID, m.SubjectSrc, emb); f == nil { log.Warnf("marker: failed adding face for id %d", m.ID) return nil } else if f = FirstOrCreateFace(f); f == nil { @@ -385,6 +393,7 @@ func (m *Marker) GetFace() (f *Face) { m.Face = f m.FaceID = f.ID + m.FaceDist = 0 } else { m.Face = FindFace(m.FaceID) } @@ -452,14 +461,16 @@ func UpdateOrCreateMarker(m *Marker) (*Marker, error) { } err := result.Updates(map[string]interface{}{ + "MarkerType": m.MarkerType, + "MarkerSrc": m.MarkerSrc, "X": m.X, "Y": m.Y, "W": m.W, "H": m.H, "Score": m.Score, + "Size": m.Size, "LandmarksJSON": m.LandmarksJSON, "EmbeddingsJSON": m.EmbeddingsJSON, - "SubjectUID": m.SubjectUID, }) log.Debugf("faces: updated existing marker %d for file %d", result.ID, result.FileID) diff --git a/internal/entity/marker_test.go b/internal/entity/marker_test.go index c51565ad4..7dc992a6b 100644 --- a/internal/entity/marker_test.go +++ b/internal/entity/marker_test.go @@ -349,29 +349,80 @@ func TestMarker_HasFace(t *testing.T) { } func TestMarker_GetSubject(t *testing.T) { - t.Run("return subject", func(t *testing.T) { - m := Marker{Subject: &Subject{SubjectName: "Test Subject"}} + t.Run("EmptySubjectUID", func(t *testing.T) { + m := Marker{SubjectUID: "", Subject: &Subject{SubjectUID: "", SubjectName: "Test Subject"}} - assert.Equal(t, "Test Subject", m.GetSubject().SubjectName) + if s := m.GetSubject(); s == nil { + t.Fatal("return value must not be nil") + } else { + assert.Equal(t, "Test Subject", s.SubjectName) + assert.Equal(t, "", m.SubjectUID) + assert.Equal(t, "", s.SubjectUID) + } }) - t.Run("uid empty, marker name not empty", func(t *testing.T) { - m := Marker{SubjectUID: "", MarkerName: "Hans Mayer"} - assert.Equal(t, "Hans Mayer", m.GetSubject().SubjectName) + t.Run("ConflictingSubjectUID", func(t *testing.T) { + m := Marker{SubjectUID: "", Subject: &Subject{SubjectUID: "xyz", SubjectName: "Test Subject"}} + + if s := m.GetSubject(); s != nil { + t.Fatal("return value must be nil") + } + }) + t.Run("SubjectSrcAuto", func(t *testing.T) { + m := Marker{SubjectSrc: SrcAuto, SubjectUID: "", MarkerName: "Hans Mayer"} + + if s := m.GetSubject(); s != nil { + t.Fatal("return value must be nil") + } else { + assert.Equal(t, "Hans Mayer", m.MarkerName) + assert.Empty(t, m.SubjectUID) + assert.Equal(t, SrcAuto, m.SubjectSrc) + } + }) + t.Run("SubjectSrcManual", func(t *testing.T) { + m := Marker{SubjectSrc: SrcManual, SubjectUID: "", MarkerName: "Hans Mayer"} + + if s := m.GetSubject(); s == nil { + t.Fatal("return value must not be nil") + } else { + assert.Equal(t, "Hans Mayer", s.SubjectName) + assert.NotEmpty(t, s.SubjectUID) + } }) } func TestMarker_GetFace(t *testing.T) { - t.Run("return face", func(t *testing.T) { - m := Marker{Face: &Face{ID: "1234"}} + t.Run("ExistingFaceID", func(t *testing.T) { + m := Marker{Face: &Face{ID: "1234"}, FaceID: "1234"} - assert.Equal(t, "1234", m.GetFace().ID) + if f := m.GetFace(); f == nil { + t.Fatal("return value must not be nil") + } else { + assert.Equal(t, "1234", f.ID) + assert.Equal(t, "1234", m.FaceID) + } + }) + t.Run("ConflictingFaceID", func(t *testing.T) { + m := Marker{Face: &Face{ID: "1234"}, FaceID: "8888"} + + if f := m.GetFace(); f != nil { + t.Fatal("return value must be nil") + } else { + assert.Equal(t, "8888", m.FaceID) + assert.Nil(t, m.Face) + } }) t.Run("find face with ID", func(t *testing.T) { m := Marker{FaceID: "VF7ANLDET2BKZNT4VQWJMMC6HBEFDOG6"} - assert.Equal(t, "jqy3y652h8njw0sx", m.GetFace().SubjectUID) + + if f := m.GetFace(); f == nil { + t.Fatal("return value must not be nil") + } else { + assert.Equal(t, "jqy3y652h8njw0sx", f.SubjectUID) + } }) t.Run("low quality marker", func(t *testing.T) { m := Marker{FaceID: "", SubjectSrc: SrcManual, Size: 130} + assert.Nil(t, m.GetFace()) }) t.Run("create face", func(t *testing.T) { @@ -384,7 +435,7 @@ func TestMarker_GetFace(t *testing.T) { } if m.GetFace() == nil { - t.Fatal("face must not be nil") + t.Fatal("return value must not be nil") } else { assert.NotEmpty(t, m.GetFace().ID) } diff --git a/internal/entity/subject.go b/internal/entity/subject.go index 0594c20ae..08b00a1b9 100644 --- a/internal/entity/subject.go +++ b/internal/entity/subject.go @@ -29,12 +29,13 @@ type Subject struct { SubjectType string `gorm:"type:VARBINARY(8);default:''" json:"Type,omitempty" yaml:"Type,omitempty"` SubjectSrc string `gorm:"type:VARBINARY(8);default:''" json:"Src,omitempty" yaml:"Src,omitempty"` SubjectSlug string `gorm:"type:VARBINARY(255);index;default:''" json:"Slug" yaml:"-"` - SubjectName string `gorm:"type:VARCHAR(255);unique_index" json:"Name" yaml:"Name"` + SubjectName string `gorm:"type:VARCHAR(255);unique_index;default:''" json:"Name" yaml:"Name"` + SubjectAlias string `gorm:"type:VARCHAR(255);default:''" json:"Alias" yaml:"Alias"` SubjectBio string `gorm:"type:TEXT;default:''" json:"Bio" yaml:"Bio,omitempty"` SubjectNotes string `gorm:"type:TEXT;default:''" json:"Notes,omitempty" yaml:"Notes,omitempty"` - Favorite bool `json:"Favorite" yaml:"Favorite,omitempty"` - Private bool `json:"Private" yaml:"Private,omitempty"` - Excluded bool `json:"Excluded" yaml:"Excluded,omitempty"` + Favorite bool `gorm:"default:false" json:"Favorite" yaml:"Favorite,omitempty"` + Private bool `gorm:"default:false" json:"Private" yaml:"Private,omitempty"` + Excluded bool `gorm:"default:false" json:"Excluded" yaml:"Excluded,omitempty"` FileCount int `gorm:"default:0" json:"FileCount" yaml:"-"` MetadataJSON json.RawMessage `gorm:"type:MEDIUMBLOB;" json:"Metadata,omitempty" yaml:"Metadata,omitempty"` CreatedAt time.Time `json:"CreatedAt" yaml:"-"` @@ -44,15 +45,16 @@ type Subject struct { // UnknownPerson can be used as a placeholder for unknown people. var UnknownPerson = Subject{ - SubjectUID: "j000000000000000", - SubjectSlug: "", - SubjectName: "", - SubjectType: SubjectPerson, - SubjectSrc: SrcDefault, - Favorite: false, - Private: false, - Excluded: false, - FileCount: 0, + SubjectUID: "j000000000000000", + SubjectSlug: "", + SubjectName: "", + SubjectAlias: "", + SubjectType: SubjectPerson, + SubjectSrc: SrcDefault, + Favorite: false, + Private: false, + Excluded: false, + FileCount: 0, } // CreateUnknownPerson initializes the database with a placeholder for unknown people if not exists. diff --git a/internal/photoprism/faces.go b/internal/photoprism/faces.go index 9ba1c37e3..db2b34654 100644 --- a/internal/photoprism/faces.go +++ b/internal/photoprism/faces.go @@ -69,6 +69,15 @@ func (w *Faces) Start(opt FacesOptions) (err error) { log.Debugf("faces: marker subjects already exist") } + // Resolve collisions of different subject's faces. + if c, r, err := query.ResolveFaceCollisions(); err != nil { + log.Errorf("faces: %s (resolve collisions)", err) + } else if c > 0 { + log.Infof("faces: resolved %d / %d collisions", r, c) + } else { + log.Debugf("faces: no collisions detected") + } + // Optimize existing face clusters. if res, err := w.Optimize(); err != nil { return err diff --git a/internal/photoprism/faces_audit.go b/internal/photoprism/faces_audit.go index 37fbf42a2..931ea2ff8 100644 --- a/internal/photoprism/faces_audit.go +++ b/internal/photoprism/faces_audit.go @@ -50,6 +50,7 @@ func (w *Faces) Audit(fix bool) (err error) { } conflicts := 0 + resolved := 0 faces, err := query.Faces(true, false) @@ -63,7 +64,7 @@ func (w *Faces) Audit(fix bool) (err error) { faceMap[f1.ID] = f1 for _, f2 := range faces { - if ok, dist := f1.Match(entity.Embeddings{f2.Embedding()}); ok { + if matched, dist := f1.Match(entity.Embeddings{f2.Embedding()}); matched { if f1.SubjectUID == f2.SubjectUID { continue } @@ -72,7 +73,7 @@ func (w *Faces) Audit(fix bool) (err error) { r := f1.SampleRadius + face.ClusterRadius - log.Infof("face %s: ambiguous at dist %f, Ø %f from %d samples, collision Ø %f", f1.ID, dist, r, f1.Samples, f1.CollisionRadius) + log.Infof("face %s: conflict at dist %f, Ø %f from %d samples, collision Ø %f", f1.ID, dist, r, f1.Samples, f1.CollisionRadius) if f1.SubjectUID != "" { log.Infof("face %s: subject %s (%s %s)", f1.ID, txt.Quote(subj[f1.SubjectUID].SubjectName), f1.SubjectUID, entity.SrcString(f1.FaceSrc)) @@ -88,21 +89,24 @@ func (w *Faces) Audit(fix bool) (err error) { if !fix { // Do nothing. - } else if reported, err := f1.ReportCollision(entity.Embeddings{f2.Embedding()}); err != nil { + } else if ok, err := f1.ResolveCollision(entity.Embeddings{f2.Embedding()}); err != nil { log.Errorf("face %s: %s", f1.ID, err) - } else if reported { - log.Infof("face %s: collision has been reported", f1.ID) + } else if ok { + log.Infof("face %s: collision has been resolved", f1.ID) + resolved++ } else { - log.Infof("face %s: collision has not been reported", f1.ID) + log.Infof("face %s: collision could not be resolved", f1.ID) } } } } if conflicts == 0 { - log.Infof("found no ambiguous faces clusters") + log.Infof("found no conflicting face clusters") + } else if !fix { + log.Infof("%d conflicting face clusters", conflicts) } else { - log.Infof("%d ambiguous faces clusters", conflicts) + log.Infof("%d conflicting face clusters, %d resolved", conflicts, resolved) } if markers, err := query.MarkersWithSubjectConflict(); err != nil { diff --git a/internal/photoprism/faces_optimize.go b/internal/photoprism/faces_optimize.go index d253d8516..73fab4ad7 100644 --- a/internal/photoprism/faces_optimize.go +++ b/internal/photoprism/faces_optimize.go @@ -18,40 +18,46 @@ func (w *Faces) Optimize() (result FacesOptimizeResult, err error) { return result, fmt.Errorf("facial recognition is disabled") } - faces, err := query.ManuallyAddedFaces() + // Iterative merging of manually added face clusters. + for i := 0; i <= 10; i++ { + var n int + var c = result.Merged + var merge entity.Faces + var faces entity.Faces - if err != nil { - return result, err - } + // Fetch manually added faces from the database. + if faces, err = query.ManuallyAddedFaces(); err != nil { + return result, err + } else if n := len(faces) - 1; n < 1 { + // Need at least 2 faces to optimize. + break + } - // Max face index. - n := len(faces) - 1 + // Find and merge matching faces. + for j := 0; j <= n; j++ { + if len(merge) == 0 { + merge = entity.Faces{faces[j]} + } else if faces[j].SubjectUID != merge[len(merge)-1].SubjectUID || j == n { + if len(merge) < 2 { + // Nothing to merge. + } else if _, err := query.MergeFaces(merge); err != nil { + log.Errorf("%s (merge)", err) + } else { + result.Merged += len(merge) + } - // Need at least 2 faces to optimize. - if n < 1 { - return result, nil - } - - var merge entity.Faces - - for i := 0; i <= n; i++ { - if len(merge) == 0 { - merge = entity.Faces{faces[i]} - } else if faces[i].SubjectUID != merge[len(merge)-1].SubjectUID || i == n { - if len(merge) < 2 { - // Nothing to merge. - } else if _, err := query.MergeFaces(merge); err != nil { - log.Errorf("%s (merge)", err) - } else { - result.Merged += len(merge) + merge = nil + } else if ok, dist := merge[0].Match(entity.Embeddings{faces[j].Embedding()}); ok { + log.Debugf("faces: can merge %s with %s, subject %s, dist %f", merge[0].ID, faces[j].ID, merge[0].SubjectUID, dist) + merge = append(merge, faces[j]) + } else if len(merge) == 1 { + merge = nil } + } - merge = nil - } else if ok, dist := merge[0].Match(entity.Embeddings{faces[i].Embedding()}); ok { - log.Debugf("faces: can merge %s with %s, subject %s, dist %f", merge[0].ID, faces[i].ID, merge[0].SubjectUID, dist) - merge = append(merge, faces[i]) - } else if len(merge) == 1 { - merge = nil + // Done? + if result.Merged <= c { + break } } diff --git a/internal/query/faces.go b/internal/query/faces.go index f1c08578f..55a62a502 100644 --- a/internal/query/faces.go +++ b/internal/query/faces.go @@ -3,6 +3,8 @@ package query import ( "fmt" + "github.com/photoprism/photoprism/internal/face" + "github.com/photoprism/photoprism/pkg/txt" "github.com/photoprism/photoprism/internal/entity" @@ -158,3 +160,51 @@ func MergeFaces(merge entity.Faces) (merged *entity.Face, err error) { return merged, err } + +// ResolveFaceCollisions resolves collisions of different subject's faces. +func ResolveFaceCollisions() (conflicts, resolved int, err error) { + faces, err := Faces(true, false) + + if err != nil { + return conflicts, resolved, err + } + + for _, f1 := range faces { + for _, f2 := range faces { + if matched, dist := f1.Match(entity.Embeddings{f2.Embedding()}); matched { + if f1.SubjectUID == f2.SubjectUID { + continue + } + + conflicts++ + + r := f1.SampleRadius + face.ClusterRadius + + log.Infof("face %s: conflict at dist %f, Ø %f from %d samples, collision Ø %f", f1.ID, dist, r, f1.Samples, f1.CollisionRadius) + + if f1.SubjectUID != "" { + log.Debugf("face %s: subject %s (%s %s)", f1.ID, txt.Quote(f1.SubjectUID), f1.SubjectUID, entity.SrcString(f1.FaceSrc)) + } else { + log.Debugf("face %s: no subject (%s)", f1.ID, entity.SrcString(f1.FaceSrc)) + } + + if f2.SubjectUID != "" { + log.Debugf("face %s: subject %s (%s %s)", f2.ID, txt.Quote(f2.SubjectUID), f2.SubjectUID, entity.SrcString(f2.FaceSrc)) + } else { + log.Debugf("face %s: no subject (%s)", f2.ID, entity.SrcString(f2.FaceSrc)) + } + + if ok, err := f1.ResolveCollision(entity.Embeddings{f2.Embedding()}); err != nil { + log.Errorf("face %s: %s", f1.ID, err) + } else if ok { + log.Infof("face %s: collision has been resolved", f1.ID) + resolved++ + } else { + log.Debugf("face %s: collision could not be resolved", f1.ID) + } + } + } + } + + return conflicts, resolved, nil +} diff --git a/internal/query/faces_test.go b/internal/query/faces_test.go index 5c4b78208..66894a092 100644 --- a/internal/query/faces_test.go +++ b/internal/query/faces_test.go @@ -188,3 +188,14 @@ func TestMergeFaces(t *testing.T) { assert.Nil(t, result) }) } + +func TestResolveFaceCollisions(t *testing.T) { + c, r, err := ResolveFaceCollisions() + + if err != nil { + t.Fatal(err) + } + + assert.LessOrEqual(t, 3, c) + assert.LessOrEqual(t, 3, r) +} diff --git a/internal/query/like.go b/internal/query/like.go index 55873a57f..235cd9be1 100644 --- a/internal/query/like.go +++ b/internal/query/like.go @@ -125,8 +125,8 @@ func LikeAllWords(col, s string) (wheres []string) { } // LikeAllNames returns a list of where conditions matching all names. -func LikeAllNames(col, s string) (wheres []string) { - if s == "" { +func LikeAllNames(cols Cols, s string) (wheres []string) { + if len(cols) == 0 || len(s) < 2 { return wheres } @@ -137,10 +137,12 @@ func LikeAllNames(col, s string) (wheres []string) { } for _, w := range words { - wheres = append(wheres, fmt.Sprintf("%s LIKE '%s'", col, w)) - - if len(w) >= 2 { - wheres = append(wheres, fmt.Sprintf("%s LIKE '%s %%'", col, w)) + for _, c := range cols { + if len(w) >= 5 { + wheres = append(wheres, fmt.Sprintf("%s LIKE '%s%%' OR %s LIKE '%% %s'", c, w, c, w)) + } else { + wheres = append(wheres, fmt.Sprintf("%s LIKE '%s' OR %s LIKE '%s %%' OR %s LIKE '%% %s'", c, w, c, w, c, w)) + } } } diff --git a/internal/query/like_test.go b/internal/query/like_test.go index b65ae71e2..51d9811ce 100644 --- a/internal/query/like_test.go +++ b/internal/query/like_test.go @@ -169,23 +169,32 @@ func TestLikeAllWords(t *testing.T) { } func TestLikeAllNames(t *testing.T) { - t.Run("keywords", func(t *testing.T) { - if w := LikeAllNames("k.name", "j Mander 王"); len(w) == 4 { - assert.Equal(t, "k.name LIKE 'mander'", w[0]) - assert.Equal(t, "k.name LIKE 'mander %'", w[1]) - assert.Equal(t, "k.name LIKE '王'", w[2]) - assert.Equal(t, "k.name LIKE '王 %'", w[3]) + t.Run("MultipleNames", func(t *testing.T) { + if w := LikeAllNames(Cols{"k.name"}, "j Mander 王"); len(w) == 2 { + assert.Equal(t, "k.name LIKE 'mander%' OR k.name LIKE '% mander'", w[0]) + assert.Equal(t, "k.name LIKE '王' OR k.name LIKE '王 %' OR k.name LIKE '% 王'", w[1]) + } else { + t.Logf("wheres: %#v", w) + t.Fatal("2 where conditions expected") + } + }) + t.Run("MultipleColumns", func(t *testing.T) { + if w := LikeAllNames(Cols{"a.col1", "b.col2"}, "Mo Mander"); len(w) == 4 { + assert.Equal(t, "a.col1 LIKE 'mander%' OR a.col1 LIKE '% mander'", w[0]) + assert.Equal(t, "b.col2 LIKE 'mander%' OR b.col2 LIKE '% mander'", w[1]) + assert.Equal(t, "a.col1 LIKE 'mo' OR a.col1 LIKE 'mo %' OR a.col1 LIKE '% mo'", w[2]) + assert.Equal(t, "b.col2 LIKE 'mo' OR b.col2 LIKE 'mo %' OR b.col2 LIKE '% mo'", w[3]) } else { t.Logf("wheres: %#v", w) t.Fatal("4 where conditions expected") } }) - t.Run("string empty", func(t *testing.T) { - w := LikeAllNames("k.name", "") + t.Run("EmptyName", func(t *testing.T) { + w := LikeAllNames(Cols{"k.name"}, "") assert.Empty(t, w) }) - t.Run("0 words", func(t *testing.T) { - w := LikeAllNames("k.name", "a") + t.Run("NoWords", func(t *testing.T) { + w := LikeAllNames(Cols{"k.name"}, "a") assert.Empty(t, w) }) } diff --git a/internal/query/markers.go b/internal/query/markers.go index c1abbcf92..e65b1f5f1 100644 --- a/internal/query/markers.go +++ b/internal/query/markers.go @@ -204,8 +204,8 @@ func MarkersWithSubjectConflict() (results entity.Markers, err error) { // ResetFaceMarkerMatches removes automatically added subject and face references from the markers table. func ResetFaceMarkerMatches() (removed int64, err error) { res := Db().Model(&entity.Marker{}). - Where("subject_src <> ? AND marker_type = ?", entity.SrcManual, entity.MarkerFace). - UpdateColumns(entity.Values{"subject_uid": "", "subject_src": "", "face_id": "", "face_dist": -1.0, "matched_at": nil}) + Where("subject_src = ? AND marker_type = ?", entity.SrcAuto, entity.MarkerFace). + UpdateColumns(entity.Values{"marker_name": "", "subject_uid": "", "subject_src": "", "face_id": "", "face_dist": -1.0, "matched_at": nil}) return res.RowsAffected, res.Error } diff --git a/internal/query/query.go b/internal/query/query.go index 01d03cf1e..7bb98c472 100644 --- a/internal/query/query.go +++ b/internal/query/query.go @@ -54,6 +54,9 @@ const MaxResults = 10000 // SearchRadius is about 1 km. const SearchRadius = 0.009 +// Cols represents a list of database columns. +type Cols []string + // Query searches given an originals path and a db instance. type Query struct { db *gorm.DB diff --git a/internal/query/subjects.go b/internal/query/subjects.go index 1f6b35ef9..54471de8f 100644 --- a/internal/query/subjects.go +++ b/internal/query/subjects.go @@ -55,7 +55,7 @@ func CreateMarkerSubjects() (affected int64, err error) { var markers entity.Markers if err := Db(). - Where("subject_uid = '' AND marker_name <> ''"). + Where("subject_uid = '' AND marker_name <> '' AND subject_src <> ?", entity.SrcAuto). Where("marker_invalid = 0 AND marker_type = ?", entity.MarkerFace). Order("marker_name"). Find(&markers).Error; err != nil { @@ -103,8 +103,9 @@ func SearchSubjectUIDs(s string) (result []string, names []string, remaining str } type Matches struct { - SubjectUID string - SubjectName string + SubjectUID string + SubjectName string + SubjectAlias string } var matches []Matches @@ -112,7 +113,7 @@ func SearchSubjectUIDs(s string) (result []string, names []string, remaining str stmt := Db().Model(entity.Subject{}) stmt = stmt.Where("subject_src <> ?", entity.SrcDefault) - if where := LikeAllNames("subject_name", s); len(where) == 0 { + if where := LikeAllNames(Cols{"subject_name", "subject_alias"}, s); len(where) == 0 { return result, names, s } else { stmt = stmt.Where("?", gorm.Expr(strings.Join(where, " OR "))) @@ -128,8 +129,16 @@ func SearchSubjectUIDs(s string) (result []string, names []string, remaining str result = append(result, m.SubjectUID) names = append(names, m.SubjectName) - for _, n := range strings.Split(strings.ToLower(m.SubjectName), " ") { - s = strings.ReplaceAll(s, n, "") + for _, r := range txt.Words(strings.ToLower(m.SubjectName)) { + if len(r) > 1 { + s = strings.ReplaceAll(s, r, "") + } + } + + for _, r := range txt.Words(strings.ToLower(m.SubjectAlias)) { + if len(r) > 1 { + s = strings.ReplaceAll(s, r, "") + } } } diff --git a/internal/query/subjects_test.go b/internal/query/subjects_test.go index 2955790fe..1f5a1d92e 100644 --- a/internal/query/subjects_test.go +++ b/internal/query/subjects_test.go @@ -51,7 +51,7 @@ func TestCreateMarkerSubjects(t *testing.T) { affected, err := CreateMarkerSubjects() assert.NoError(t, err) - assert.GreaterOrEqual(t, affected, int64(2)) + assert.LessOrEqual(t, int64(0), affected) } func TestSearchSubjectUIDs(t *testing.T) { diff --git a/scripts/sql/init-test-databases.sql b/scripts/sql/init-test-databases.sql index 713ff1212..d4406e272 100644 --- a/scripts/sql/init-test-databases.sql +++ b/scripts/sql/init-test-databases.sql @@ -1,3 +1,8 @@ +CREATE DATABASE IF NOT EXISTS alpha; +CREATE DATABASE IF NOT EXISTS beta; +CREATE DATABASE IF NOT EXISTS gamma; +CREATE DATABASE IF NOT EXISTS delta; +CREATE DATABASE IF NOT EXISTS epsilon; DROP DATABASE IF EXISTS acceptance; CREATE DATABASE IF NOT EXISTS acceptance; DROP DATABASE IF EXISTS api;