From e858a34f3e50bc977e16ce943db0ec5484d02ce4 Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Mon, 3 Jan 2022 13:30:34 +0100 Subject: [PATCH] Search: Return empty result if keywords contain stopwords only #1859 --- internal/search/geojson.go | 4 ++++ internal/search/photos.go | 4 ++++ pkg/txt/resources/stopwords.txt | 5 ----- pkg/txt/stopwords.go | 5 ----- pkg/txt/words.go | 23 +++++++++++++++++++++++ pkg/txt/words_test.go | 33 +++++++++++++++++++++++++++++++++ 6 files changed, 64 insertions(+), 10 deletions(-) diff --git a/internal/search/geojson.go b/internal/search/geojson.go index f60f77d9b..c5c7b1d23 100644 --- a/internal/search/geojson.go +++ b/internal/search/geojson.go @@ -132,6 +132,10 @@ func Geo(f form.SearchGeo) (results GeoResults, err error) { // Search for one or more keywords? if f.Keywords != "" { + if txt.StopwordsOnly(f.Keywords) { + return GeoResults{}, fmt.Errorf("keywords contain stopwords only") + } + for _, where := range LikeAnyKeyword("k.keyword", f.Keywords) { s = s.Where("photos.id IN (SELECT pk.photo_id FROM keywords k JOIN photos_keywords pk ON k.id = pk.keyword_id WHERE (?))", gorm.Expr(where)) } diff --git a/internal/search/photos.go b/internal/search/photos.go index 4de1fc525..c454266b1 100644 --- a/internal/search/photos.go +++ b/internal/search/photos.go @@ -226,6 +226,10 @@ func Photos(f form.SearchPhotos) (results PhotoResults, count int, err error) { // Search for one or more keywords? if f.Keywords != "" { + if txt.StopwordsOnly(f.Keywords) { + return PhotoResults{}, 0, fmt.Errorf("keywords contain stopwords only") + } + for _, where := range LikeAnyKeyword("k.keyword", f.Keywords) { s = s.Where("photos.id IN (SELECT pk.photo_id FROM keywords k JOIN photos_keywords pk ON k.id = pk.keyword_id WHERE (?))", gorm.Expr(where)) } diff --git a/pkg/txt/resources/stopwords.txt b/pkg/txt/resources/stopwords.txt index 6d860a105..20f87f7d9 100644 --- a/pkg/txt/resources/stopwords.txt +++ b/pkg/txt/resources/stopwords.txt @@ -13,8 +13,6 @@ handy tumblr bilder bild -films -filme foto fotos flickr @@ -1958,9 +1956,6 @@ mais malgre malgré maximale -meme -memes -merci mes mien mienne diff --git a/pkg/txt/stopwords.go b/pkg/txt/stopwords.go index 11ecd7197..083d12a34 100644 --- a/pkg/txt/stopwords.go +++ b/pkg/txt/stopwords.go @@ -18,8 +18,6 @@ var StopWords = map[string]bool{ "tumblr": true, "bilder": true, "bild": true, - "films": true, - "filme": true, "foto": true, "fotos": true, "flickr": true, @@ -1963,9 +1961,6 @@ var StopWords = map[string]bool{ "malgre": true, "malgré": true, "maximale": true, - "meme": true, - "memes": true, - "merci": true, "mes": true, "mien": true, "mienne": true, diff --git a/pkg/txt/words.go b/pkg/txt/words.go index 11988b047..544a72099 100644 --- a/pkg/txt/words.go +++ b/pkg/txt/words.go @@ -202,3 +202,26 @@ func UniqueKeywords(s string) (results []string) { func SortCaseInsensitive(words []string) { sort.Slice(words, func(i, j int) bool { return strings.ToLower(words[i]) < strings.ToLower(words[j]) }) } + +// StopwordsOnly tests if the string contains stopwords only. +func StopwordsOnly(s string) bool { + s = strings.TrimSpace(s) + + if s == "" { + return false + } + + for _, w := range Words(s) { + w = strings.ToLower(w) + + if UnknownWord(w) { + continue + } + + if _, ok := StopWords[w]; ok == false { + return false + } + } + + return true +} diff --git a/pkg/txt/words_test.go b/pkg/txt/words_test.go index 1a3539381..96ba52b58 100644 --- a/pkg/txt/words_test.go +++ b/pkg/txt/words_test.go @@ -239,3 +239,36 @@ func TestRemoveFromWords(t *testing.T) { assert.Equal(t, []string{"apple", "brown", "jpg", "lazy"}, result) }) } + +func TestStopwordsOnly(t *testing.T) { + t.Run("Empty", func(t *testing.T) { + assert.False(t, StopwordsOnly("")) + }) + t.Run("FoldersDateienFile", func(t *testing.T) { + assert.True(t, StopwordsOnly("Folders Dateien File")) + }) + t.Run("FoldersDateienFile", func(t *testing.T) { + assert.False(t, StopwordsOnly("Folders Dateien Meme File")) + }) + t.Run("qx", func(t *testing.T) { + assert.True(t, StopwordsOnly("qx")) + }) + t.Run("atz", func(t *testing.T) { + assert.True(t, StopwordsOnly("atz")) + }) + t.Run("xqx", func(t *testing.T) { + assert.True(t, StopwordsOnly("xqx")) + }) + t.Run("kuh", func(t *testing.T) { + assert.False(t, StopwordsOnly("kuh")) + }) + t.Run("muh", func(t *testing.T) { + assert.False(t, StopwordsOnly("muh")) + }) + t.Run("桥", func(t *testing.T) { + assert.False(t, StopwordsOnly("桥")) + }) + t.Run("桥船", func(t *testing.T) { + assert.False(t, StopwordsOnly("桥船")) + }) +}