From 960fce2933fc6f0a04a97140601c1a604f4caf48 Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Mon, 6 Sep 2021 14:16:46 +0200 Subject: [PATCH] People: Improve searching for names #22 --- internal/query/geo.go | 4 +-- internal/query/like.go | 43 ++++++++++++++++++++------ internal/query/like_test.go | 56 ++++++++++++++++++++++++++-------- internal/query/photo_search.go | 6 ++-- internal/query/query.go | 3 ++ internal/query/subjects.go | 55 ++++++++++++++++++--------------- 6 files changed, 116 insertions(+), 51 deletions(-) diff --git a/internal/query/geo.go b/internal/query/geo.go index 279236de0..587e9c97d 100644 --- a/internal/query/geo.go +++ b/internal/query/geo.go @@ -39,8 +39,8 @@ func Geo(f form.GeoSearch) (results GeoResults, err error) { Where("photos.deleted_at IS NULL"). Where("photos.photo_lat <> 0") - // Clip query to reasonable size if needed. - f.Query = txt.Clip(f.Query, txt.ClipQuery) + // Clip to reasonable size and normalize operators. + f.Query = NormalizeSearchQuery(f.Query) // Modify query if it contains subject names. if f.Query != "" && f.Subject == "" { diff --git a/internal/query/like.go b/internal/query/like.go index 235cd9be1..5ca4f5c65 100644 --- a/internal/query/like.go +++ b/internal/query/like.go @@ -10,6 +10,16 @@ import ( "github.com/jinzhu/inflection" ) +// NormalizeSearchQuery replaces search operator with default symbols. +func NormalizeSearchQuery(s string) string { + s = strings.ToLower(txt.Clip(s, txt.ClipQuery)) + s = strings.ReplaceAll(s, OrEn, Or) + s = strings.ReplaceAll(s, AndEn, And) + s = strings.ReplaceAll(s, Plus, And) + s = strings.ReplaceAll(s, "%", "*") + return strings.Trim(s, "+&|_-=!@$%^(){}\\<>,.;: ") +} + // LikeAny returns a single where condition matching the search words. func LikeAny(col, s string, keywords, exact bool) (wheres []string) { if s == "" { @@ -19,6 +29,7 @@ func LikeAny(col, s string, keywords, exact bool) (wheres []string) { s = strings.ReplaceAll(s, Or, " ") s = strings.ReplaceAll(s, OrEn, " ") s = strings.ReplaceAll(s, AndEn, And) + s = strings.ReplaceAll(s, Plus, And) var wildcardThreshold int @@ -130,19 +141,31 @@ func LikeAllNames(cols Cols, s string) (wheres []string) { return wheres } - words := txt.UniqueWords(txt.Words(s)) + for _, k := range strings.Split(s, And) { + var orWheres []string - if len(words) == 0 { - return wheres - } + for _, w := range strings.Split(k, Or) { + w = strings.TrimSpace(w) - for _, w := range words { - for _, c := range cols { - if len(w) >= 5 { - wheres = append(wheres, fmt.Sprintf("%s LIKE '%s%%' OR %s LIKE '%% %s'", c, w, c, w)) - } else { - wheres = append(wheres, fmt.Sprintf("%s LIKE '%s' OR %s LIKE '%s %%' OR %s LIKE '%% %s'", c, w, c, w, c, w)) + if w == Empty || len(w) < 2 && txt.IsLatin(w) { + continue } + + for _, c := range cols { + if len(w) > 4 { + if strings.Contains(w, Space) { + orWheres = append(orWheres, fmt.Sprintf("%s LIKE '%s%%'", c, w)) + } else { + orWheres = append(orWheres, fmt.Sprintf("%s LIKE '%%%s%%'", c, w)) + } + } else { + orWheres = append(orWheres, fmt.Sprintf("%s LIKE '%s' OR %s LIKE '%s %%'", c, w, c, w)) + } + } + } + + if len(orWheres) > 0 { + wheres = append(wheres, strings.Join(orWheres, " OR ")) } } diff --git a/internal/query/like_test.go b/internal/query/like_test.go index 51d9811ce..a8102f36a 100644 --- a/internal/query/like_test.go +++ b/internal/query/like_test.go @@ -6,6 +6,13 @@ import ( "github.com/stretchr/testify/assert" ) +func TestNormalizeSearchQuery(t *testing.T) { + t.Run("Replace", func(t *testing.T) { + q := NormalizeSearchQuery("table spoon & usa | img% json OR BILL!") + assert.Equal(t, "table spoon & usa | img* json|bill", q) + }) +} + func TestLikeAny(t *testing.T) { t.Run("and_or_search", func(t *testing.T) { if w := LikeAny("k.keyword", "table spoon & usa | img json", true, false); len(w) != 2 { @@ -170,23 +177,17 @@ func TestLikeAllWords(t *testing.T) { func TestLikeAllNames(t *testing.T) { t.Run("MultipleNames", func(t *testing.T) { - if w := LikeAllNames(Cols{"k.name"}, "j Mander 王"); len(w) == 2 { - assert.Equal(t, "k.name LIKE 'mander%' OR k.name LIKE '% mander'", w[0]) - assert.Equal(t, "k.name LIKE '王' OR k.name LIKE '王 %' OR k.name LIKE '% 王'", w[1]) + if w := LikeAllNames(Cols{"k.name"}, "j Mander 王"); len(w) == 1 { + assert.Equal(t, "k.name LIKE 'j Mander 王%'", w[0]) } else { - t.Logf("wheres: %#v", w) - t.Fatal("2 where conditions expected") + t.Fatalf("one where conditions expected: %#v", w) } }) t.Run("MultipleColumns", func(t *testing.T) { - if w := LikeAllNames(Cols{"a.col1", "b.col2"}, "Mo Mander"); len(w) == 4 { - assert.Equal(t, "a.col1 LIKE 'mander%' OR a.col1 LIKE '% mander'", w[0]) - assert.Equal(t, "b.col2 LIKE 'mander%' OR b.col2 LIKE '% mander'", w[1]) - assert.Equal(t, "a.col1 LIKE 'mo' OR a.col1 LIKE 'mo %' OR a.col1 LIKE '% mo'", w[2]) - assert.Equal(t, "b.col2 LIKE 'mo' OR b.col2 LIKE 'mo %' OR b.col2 LIKE '% mo'", w[3]) + if w := LikeAllNames(Cols{"a.col1", "b.col2"}, "Mo Mander"); len(w) == 1 { + assert.Equal(t, "a.col1 LIKE 'Mo Mander%' OR b.col2 LIKE 'Mo Mander%'", w[0]) } else { - t.Logf("wheres: %#v", w) - t.Fatal("4 where conditions expected") + t.Fatalf("one where conditions expected: %#v", w) } }) t.Run("EmptyName", func(t *testing.T) { @@ -197,6 +198,37 @@ func TestLikeAllNames(t *testing.T) { w := LikeAllNames(Cols{"k.name"}, "a") assert.Empty(t, w) }) + t.Run("FullNames", func(t *testing.T) { + if w := LikeAllNames(Cols{"j.name", "j.alias"}, "Bill & Melinda Gates"); len(w) == 2 { + assert.Equal(t, "j.name LIKE 'Bill' OR j.name LIKE 'Bill %' OR j.alias LIKE 'Bill' OR j.alias LIKE 'Bill %'", w[0]) + assert.Equal(t, "j.name LIKE 'Melinda Gates%' OR j.alias LIKE 'Melinda Gates%'", w[1]) + } else { + t.Fatalf("two where conditions expected: %#v", w) + } + }) + t.Run("Plus", func(t *testing.T) { + if w := LikeAllNames(Cols{"name"}, NormalizeSearchQuery("Paul + Paula")); len(w) == 2 { + assert.Equal(t, "name LIKE 'paul' OR name LIKE 'paul %'", w[0]) + assert.Equal(t, "name LIKE '%paula%'", w[1]) + } else { + t.Fatalf("two where conditions expected: %#v", w) + } + }) + t.Run("Ane", func(t *testing.T) { + if w := LikeAllNames(Cols{"name"}, NormalizeSearchQuery("Paul and Paula")); len(w) == 2 { + assert.Equal(t, "name LIKE 'paul' OR name LIKE 'paul %'", w[0]) + assert.Equal(t, "name LIKE '%paula%'", w[1]) + } else { + t.Fatalf("two where conditions expected: %#v", w) + } + }) + t.Run("Or", func(t *testing.T) { + if w := LikeAllNames(Cols{"name"}, NormalizeSearchQuery("Paul or Paula")); len(w) == 1 { + assert.Equal(t, "name LIKE 'paul' OR name LIKE 'paul %' OR name LIKE '%paula%'", w[0]) + } else { + t.Fatalf("one where conditions expected: %#v", w) + } + }) } func TestAnySlug(t *testing.T) { diff --git a/internal/query/photo_search.go b/internal/query/photo_search.go index 9ee2e6380..fd5211275 100644 --- a/internal/query/photo_search.go +++ b/internal/query/photo_search.go @@ -132,14 +132,14 @@ func PhotoSearch(f form.PhotoSearch) (results PhotoResults, count int, err error } } - // Clip query to reasonable size if needed. - f.Query = txt.Clip(f.Query, txt.ClipQuery) + // Clip to reasonable size and normalize operators. + f.Query = NormalizeSearchQuery(f.Query) // Modify query if it contains subject names. if f.Query != "" && f.Subject == "" { if subj, names, remaining := SearchSubjectUIDs(f.Query); len(subj) > 0 { f.Subject = strings.Join(subj, And) - log.Debugf("search: subject %s", txt.Quote(strings.Join(names, " & "))) + log.Debugf("people: searching for %s", txt.Quote(txt.JoinNames(names))) f.Query = remaining } } diff --git a/internal/query/query.go b/internal/query/query.go index 7bb98c472..029d1236b 100644 --- a/internal/query/query.go +++ b/internal/query/query.go @@ -44,8 +44,11 @@ const ( SQLite = "sqlite3" Or = "|" And = "&" + Plus = " + " OrEn = " or " AndEn = " and " + Space = " " + Empty = "" ) // MaxResults is max result limit for queries. diff --git a/internal/query/subjects.go b/internal/query/subjects.go index 6fa5f9ec5..c8ef12a60 100644 --- a/internal/query/subjects.go +++ b/internal/query/subjects.go @@ -134,38 +134,45 @@ func SearchSubjectUIDs(s string) (result []string, names []string, remaining str var matches []Matches - stmt := Db().Model(entity.Subject{}) + wheres := LikeAllNames(Cols{"subject_name", "subject_alias"}, s) - if where := LikeAllNames(Cols{"subject_name", "subject_alias"}, s); len(where) == 0 { - return result, names, s - } else { - stmt = stmt.Where("?", gorm.Expr(strings.Join(where, " OR "))) - } - - if err := stmt.Scan(&matches).Error; err != nil { - log.Errorf("search: %s while finding subjects", err) - } else if len(matches) == 0 { + if len(wheres) == 0 { return result, names, s } - for _, m := range matches { - result = append(result, m.SubjectUID) - names = append(names, m.SubjectName) + remaining = s - for _, r := range txt.Words(strings.ToLower(m.SubjectName)) { - if len(r) > 1 { - s = strings.ReplaceAll(s, r, "") + for _, where := range wheres { + var subj []string + + stmt := Db().Model(entity.Subject{}) + stmt = stmt.Where("?", gorm.Expr(where)) + + if err := stmt.Scan(&matches).Error; err != nil { + log.Errorf("search: %s while finding subjects", err) + } else if len(matches) == 0 { + continue + } + + for _, m := range matches { + subj = append(subj, m.SubjectUID) + names = append(names, m.SubjectName) + + for _, r := range txt.Words(strings.ToLower(m.SubjectName)) { + if len(r) > 1 { + remaining = strings.ReplaceAll(remaining, r, "") + } + } + + for _, r := range txt.Words(strings.ToLower(m.SubjectAlias)) { + if len(r) > 1 { + remaining = strings.ReplaceAll(remaining, r, "") + } } } - for _, r := range txt.Words(strings.ToLower(m.SubjectAlias)) { - if len(r) > 1 { - s = strings.ReplaceAll(s, r, "") - } - } + result = append(result, strings.Join(subj, Or)) } - s = strings.Trim(s, "&| ") - - return result, names, s + return result, names, NormalizeSearchQuery(remaining) }