People: Improve searching for names #22

This commit is contained in:
Michael Mayer 2021-09-06 14:16:46 +02:00
parent afcf78efe2
commit 960fce2933
6 changed files with 116 additions and 51 deletions

View file

@ -39,8 +39,8 @@ func Geo(f form.GeoSearch) (results GeoResults, err error) {
Where("photos.deleted_at IS NULL").
Where("photos.photo_lat <> 0")
// Clip query to reasonable size if needed.
f.Query = txt.Clip(f.Query, txt.ClipQuery)
// Clip to reasonable size and normalize operators.
f.Query = NormalizeSearchQuery(f.Query)
// Modify query if it contains subject names.
if f.Query != "" && f.Subject == "" {

View file

@ -10,6 +10,16 @@ import (
"github.com/jinzhu/inflection"
)
// NormalizeSearchQuery replaces search operator with default symbols.
func NormalizeSearchQuery(s string) string {
s = strings.ToLower(txt.Clip(s, txt.ClipQuery))
s = strings.ReplaceAll(s, OrEn, Or)
s = strings.ReplaceAll(s, AndEn, And)
s = strings.ReplaceAll(s, Plus, And)
s = strings.ReplaceAll(s, "%", "*")
return strings.Trim(s, "+&|_-=!@$%^(){}\\<>,.;: ")
}
// LikeAny returns a single where condition matching the search words.
func LikeAny(col, s string, keywords, exact bool) (wheres []string) {
if s == "" {
@ -19,6 +29,7 @@ func LikeAny(col, s string, keywords, exact bool) (wheres []string) {
s = strings.ReplaceAll(s, Or, " ")
s = strings.ReplaceAll(s, OrEn, " ")
s = strings.ReplaceAll(s, AndEn, And)
s = strings.ReplaceAll(s, Plus, And)
var wildcardThreshold int
@ -130,19 +141,31 @@ func LikeAllNames(cols Cols, s string) (wheres []string) {
return wheres
}
words := txt.UniqueWords(txt.Words(s))
for _, k := range strings.Split(s, And) {
var orWheres []string
if len(words) == 0 {
return wheres
}
for _, w := range strings.Split(k, Or) {
w = strings.TrimSpace(w)
for _, w := range words {
for _, c := range cols {
if len(w) >= 5 {
wheres = append(wheres, fmt.Sprintf("%s LIKE '%s%%' OR %s LIKE '%% %s'", c, w, c, w))
} else {
wheres = append(wheres, fmt.Sprintf("%s LIKE '%s' OR %s LIKE '%s %%' OR %s LIKE '%% %s'", c, w, c, w, c, w))
if w == Empty || len(w) < 2 && txt.IsLatin(w) {
continue
}
for _, c := range cols {
if len(w) > 4 {
if strings.Contains(w, Space) {
orWheres = append(orWheres, fmt.Sprintf("%s LIKE '%s%%'", c, w))
} else {
orWheres = append(orWheres, fmt.Sprintf("%s LIKE '%%%s%%'", c, w))
}
} else {
orWheres = append(orWheres, fmt.Sprintf("%s LIKE '%s' OR %s LIKE '%s %%'", c, w, c, w))
}
}
}
if len(orWheres) > 0 {
wheres = append(wheres, strings.Join(orWheres, " OR "))
}
}

View file

@ -6,6 +6,13 @@ import (
"github.com/stretchr/testify/assert"
)
func TestNormalizeSearchQuery(t *testing.T) {
t.Run("Replace", func(t *testing.T) {
q := NormalizeSearchQuery("table spoon & usa | img% json OR BILL!")
assert.Equal(t, "table spoon & usa | img* json|bill", q)
})
}
func TestLikeAny(t *testing.T) {
t.Run("and_or_search", func(t *testing.T) {
if w := LikeAny("k.keyword", "table spoon & usa | img json", true, false); len(w) != 2 {
@ -170,23 +177,17 @@ func TestLikeAllWords(t *testing.T) {
func TestLikeAllNames(t *testing.T) {
t.Run("MultipleNames", func(t *testing.T) {
if w := LikeAllNames(Cols{"k.name"}, "j Mander 王"); len(w) == 2 {
assert.Equal(t, "k.name LIKE 'mander%' OR k.name LIKE '% mander'", w[0])
assert.Equal(t, "k.name LIKE '王' OR k.name LIKE '王 %' OR k.name LIKE '% 王'", w[1])
if w := LikeAllNames(Cols{"k.name"}, "j Mander 王"); len(w) == 1 {
assert.Equal(t, "k.name LIKE 'j Mander 王%'", w[0])
} else {
t.Logf("wheres: %#v", w)
t.Fatal("2 where conditions expected")
t.Fatalf("one where conditions expected: %#v", w)
}
})
t.Run("MultipleColumns", func(t *testing.T) {
if w := LikeAllNames(Cols{"a.col1", "b.col2"}, "Mo Mander"); len(w) == 4 {
assert.Equal(t, "a.col1 LIKE 'mander%' OR a.col1 LIKE '% mander'", w[0])
assert.Equal(t, "b.col2 LIKE 'mander%' OR b.col2 LIKE '% mander'", w[1])
assert.Equal(t, "a.col1 LIKE 'mo' OR a.col1 LIKE 'mo %' OR a.col1 LIKE '% mo'", w[2])
assert.Equal(t, "b.col2 LIKE 'mo' OR b.col2 LIKE 'mo %' OR b.col2 LIKE '% mo'", w[3])
if w := LikeAllNames(Cols{"a.col1", "b.col2"}, "Mo Mander"); len(w) == 1 {
assert.Equal(t, "a.col1 LIKE 'Mo Mander%' OR b.col2 LIKE 'Mo Mander%'", w[0])
} else {
t.Logf("wheres: %#v", w)
t.Fatal("4 where conditions expected")
t.Fatalf("one where conditions expected: %#v", w)
}
})
t.Run("EmptyName", func(t *testing.T) {
@ -197,6 +198,37 @@ func TestLikeAllNames(t *testing.T) {
w := LikeAllNames(Cols{"k.name"}, "a")
assert.Empty(t, w)
})
t.Run("FullNames", func(t *testing.T) {
if w := LikeAllNames(Cols{"j.name", "j.alias"}, "Bill & Melinda Gates"); len(w) == 2 {
assert.Equal(t, "j.name LIKE 'Bill' OR j.name LIKE 'Bill %' OR j.alias LIKE 'Bill' OR j.alias LIKE 'Bill %'", w[0])
assert.Equal(t, "j.name LIKE 'Melinda Gates%' OR j.alias LIKE 'Melinda Gates%'", w[1])
} else {
t.Fatalf("two where conditions expected: %#v", w)
}
})
t.Run("Plus", func(t *testing.T) {
if w := LikeAllNames(Cols{"name"}, NormalizeSearchQuery("Paul + Paula")); len(w) == 2 {
assert.Equal(t, "name LIKE 'paul' OR name LIKE 'paul %'", w[0])
assert.Equal(t, "name LIKE '%paula%'", w[1])
} else {
t.Fatalf("two where conditions expected: %#v", w)
}
})
t.Run("Ane", func(t *testing.T) {
if w := LikeAllNames(Cols{"name"}, NormalizeSearchQuery("Paul and Paula")); len(w) == 2 {
assert.Equal(t, "name LIKE 'paul' OR name LIKE 'paul %'", w[0])
assert.Equal(t, "name LIKE '%paula%'", w[1])
} else {
t.Fatalf("two where conditions expected: %#v", w)
}
})
t.Run("Or", func(t *testing.T) {
if w := LikeAllNames(Cols{"name"}, NormalizeSearchQuery("Paul or Paula")); len(w) == 1 {
assert.Equal(t, "name LIKE 'paul' OR name LIKE 'paul %' OR name LIKE '%paula%'", w[0])
} else {
t.Fatalf("one where conditions expected: %#v", w)
}
})
}
func TestAnySlug(t *testing.T) {

View file

@ -132,14 +132,14 @@ func PhotoSearch(f form.PhotoSearch) (results PhotoResults, count int, err error
}
}
// Clip query to reasonable size if needed.
f.Query = txt.Clip(f.Query, txt.ClipQuery)
// Clip to reasonable size and normalize operators.
f.Query = NormalizeSearchQuery(f.Query)
// Modify query if it contains subject names.
if f.Query != "" && f.Subject == "" {
if subj, names, remaining := SearchSubjectUIDs(f.Query); len(subj) > 0 {
f.Subject = strings.Join(subj, And)
log.Debugf("search: subject %s", txt.Quote(strings.Join(names, " & ")))
log.Debugf("people: searching for %s", txt.Quote(txt.JoinNames(names)))
f.Query = remaining
}
}

View file

@ -44,8 +44,11 @@ const (
SQLite = "sqlite3"
Or = "|"
And = "&"
Plus = " + "
OrEn = " or "
AndEn = " and "
Space = " "
Empty = ""
)
// MaxResults is max result limit for queries.

View file

@ -134,38 +134,45 @@ func SearchSubjectUIDs(s string) (result []string, names []string, remaining str
var matches []Matches
stmt := Db().Model(entity.Subject{})
wheres := LikeAllNames(Cols{"subject_name", "subject_alias"}, s)
if where := LikeAllNames(Cols{"subject_name", "subject_alias"}, s); len(where) == 0 {
return result, names, s
} else {
stmt = stmt.Where("?", gorm.Expr(strings.Join(where, " OR ")))
}
if err := stmt.Scan(&matches).Error; err != nil {
log.Errorf("search: %s while finding subjects", err)
} else if len(matches) == 0 {
if len(wheres) == 0 {
return result, names, s
}
for _, m := range matches {
result = append(result, m.SubjectUID)
names = append(names, m.SubjectName)
remaining = s
for _, r := range txt.Words(strings.ToLower(m.SubjectName)) {
if len(r) > 1 {
s = strings.ReplaceAll(s, r, "")
for _, where := range wheres {
var subj []string
stmt := Db().Model(entity.Subject{})
stmt = stmt.Where("?", gorm.Expr(where))
if err := stmt.Scan(&matches).Error; err != nil {
log.Errorf("search: %s while finding subjects", err)
} else if len(matches) == 0 {
continue
}
for _, m := range matches {
subj = append(subj, m.SubjectUID)
names = append(names, m.SubjectName)
for _, r := range txt.Words(strings.ToLower(m.SubjectName)) {
if len(r) > 1 {
remaining = strings.ReplaceAll(remaining, r, "")
}
}
for _, r := range txt.Words(strings.ToLower(m.SubjectAlias)) {
if len(r) > 1 {
remaining = strings.ReplaceAll(remaining, r, "")
}
}
}
for _, r := range txt.Words(strings.ToLower(m.SubjectAlias)) {
if len(r) > 1 {
s = strings.ReplaceAll(s, r, "")
}
}
result = append(result, strings.Join(subj, Or))
}
s = strings.Trim(s, "&| ")
return result, names, s
return result, names, NormalizeSearchQuery(remaining)
}