Search: Improve query string normalization and sanitation #1814 #1820

This commit is contained in:
Michael Mayer 2021-12-16 15:26:54 +01:00
parent 2636255c95
commit e8a1e85f14
22 changed files with 92 additions and 67 deletions

4
go.mod
View file

@ -54,9 +54,9 @@ require (
github.com/ulule/deepcopier v0.0.0-20200430083143-45decc6639b6
github.com/urfave/cli v1.22.5
go4.org v0.0.0-20201209231011-d4a079459e60 // indirect
golang.org/x/crypto v0.0.0-20211209193657-4570a0811e8b
golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3
golang.org/x/image v0.0.0-20211028202545-6944b10bf410 // indirect
golang.org/x/net v0.0.0-20211215060638-4ddde0e984e9
golang.org/x/net v0.0.0-20211216030914-fe4d6282115f
golang.org/x/sys v0.0.0-20211213223007-03aa0b5f6827 // indirect
golang.org/x/text v0.3.7 // indirect
gonum.org/v1/gonum v0.9.3

8
go.sum
View file

@ -310,8 +310,8 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U
golang.org/x/crypto v0.0.0-20191205180655-e7c4368fe9dd/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20211209193657-4570a0811e8b h1:QAqMVf3pSa6eeTsuklijukjXBlj7Es2QQplab+/RbQ4=
golang.org/x/crypto v0.0.0-20211209193657-4570a0811e8b/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3 h1:0es+/5331RGQPcXlMfP+WrnIIS6dNnNRe0WB02W0F4M=
golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
@ -377,8 +377,8 @@ golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20211215060638-4ddde0e984e9 h1:kmreh1vGI63l2FxOAYS3Yv6ATsi7lSTuwNSVbGfJV9I=
golang.org/x/net v0.0.0-20211215060638-4ddde0e984e9/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20211216030914-fe4d6282115f h1:hEYJvxw1lSnWIl8X9ofsYMklzaDs90JI2az5YMd4fPM=
golang.org/x/net v0.0.0-20211216030914-fe4d6282115f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=

View file

@ -51,6 +51,7 @@ func SearchPhotos(router *gin.RouterGroup) {
return
}
f.UID = ""
f.Public = true
f.Private = false
f.Hidden = false

View file

@ -126,7 +126,7 @@ func Unserialize(f SearchForm, q string) (result error) {
field.SetUint(uint64(intValue))
}
case string:
field.SetString(stringValue)
field.SetString(sanitize.Query(stringValue))
case bool:
field.SetBool(txt.Bool(stringValue))
default:
@ -155,11 +155,11 @@ func Unserialize(f SearchForm, q string) (result error) {
}
if len(queryStrings) > 0 {
f.SetQuery(strings.Join(queryStrings, " "))
f.SetQuery(sanitize.Query(strings.Join(queryStrings, " ")))
}
if result != nil {
log.Warnf("form: failed parsing values")
log.Warnf("form: failed parsing search query")
}
return result

View file

@ -3,8 +3,6 @@ package search
import (
"strings"
"github.com/photoprism/photoprism/pkg/sanitize"
"github.com/photoprism/photoprism/internal/entity"
"github.com/photoprism/photoprism/internal/form"
"github.com/photoprism/photoprism/pkg/txt"
@ -16,9 +14,6 @@ func Albums(f form.SearchAlbums) (results AlbumResults, err error) {
return results, err
}
// Clip and normalize search query.
f.Query = sanitize.Query(f.Query)
// Base query.
s := UnscopedDb().Table("albums").
Select("albums.*, cp.photo_count, cl.link_count, CASE WHEN albums.album_year = 0 THEN 0 ELSE 1 END AS has_year").

View file

@ -6,8 +6,6 @@ import (
"strings"
"time"
"github.com/photoprism/photoprism/pkg/sanitize"
"github.com/dustin/go-humanize/english"
"github.com/jinzhu/gorm"
@ -58,9 +56,6 @@ func Geo(f form.SearchGeo) (results GeoResults, err error) {
Where("photos.deleted_at IS NULL").
Where("photos.photo_lat <> 0")
// Clip and normalize search query.
f.Query = sanitize.Query(f.Query)
// Set search filters based on search terms.
if terms := txt.SearchTerms(f.Query); f.Query != "" && len(terms) == 0 {
if f.Title == "" {
@ -368,7 +363,7 @@ func Geo(f form.SearchGeo) (results GeoResults, err error) {
return results, result.Error
}
log.Infof("geo: found %s for %s [%s]", english.Plural(len(results), "result", "results"), f.SerializeAll(), time.Since(start))
log.Debugf("geo: found %s for %s [%s]", english.Plural(len(results), "result", "results"), f.SerializeAll(), time.Since(start))
return results, nil
}

View file

@ -208,23 +208,23 @@ func TestLikeAllNames(t *testing.T) {
})
t.Run("Plus", func(t *testing.T) {
if w := LikeAllNames(Cols{"name"}, sanitize.Query("Paul + Paula")); len(w) == 2 {
assert.Equal(t, "name LIKE '%paul%'", w[0])
assert.Equal(t, "name LIKE '%paula%'", w[1])
assert.Equal(t, "name LIKE '%Paul%'", w[0])
assert.Equal(t, "name LIKE '%Paula%'", w[1])
} else {
t.Fatalf("unexpected result: %#v", w)
}
})
t.Run("And", func(t *testing.T) {
if w := LikeAllNames(Cols{"name"}, sanitize.Query("P and Paula")); len(w) == 2 {
assert.Equal(t, "name LIKE '%p%'", w[0])
assert.Equal(t, "name LIKE '%paula%'", w[1])
assert.Equal(t, "name LIKE '%P%'", w[0])
assert.Equal(t, "name LIKE '%Paula%'", w[1])
} else {
t.Fatalf("unexpected result: %#v", w)
}
})
t.Run("Or", func(t *testing.T) {
if w := LikeAllNames(Cols{"name"}, sanitize.Query("Paul or Paula")); len(w) == 1 {
assert.Equal(t, "name LIKE '%paul%' OR name LIKE '%paula%'", w[0])
assert.Equal(t, "name LIKE '%Paul%' OR name LIKE '%Paula%'", w[0])
} else {
t.Fatalf("unexpected result: %#v", w)
}

View file

@ -6,8 +6,6 @@ import (
"strings"
"time"
"github.com/photoprism/photoprism/pkg/sanitize"
"github.com/dustin/go-humanize/english"
"github.com/jinzhu/gorm"
@ -97,14 +95,14 @@ func Photos(f form.SearchPhotos) (results PhotoResults, count int, err error) {
s = s.Where("photos.photo_uid IN (?)", strings.Split(strings.ToLower(f.UID), txt.Or))
// Take shortcut?
if f.Album == "" && f.Albums == "" && f.Label == "" && f.Query == "" {
if f.Album == "" && f.Query == "" {
s = s.Order("files.file_primary DESC")
if result := s.Scan(&results); result.Error != nil {
return results, 0, result.Error
}
log.Infof("photos: found %s for %s [%s]", english.Plural(len(results), "result", "results"), f.SerializeAll(), time.Since(start))
log.Debugf("photos: found %s for %s [%s]", english.Plural(len(results), "result", "results"), f.SerializeAll(), time.Since(start))
if f.Merged {
return results.Merged()
@ -141,9 +139,6 @@ func Photos(f form.SearchPhotos) (results PhotoResults, count int, err error) {
}
}
// Clip and normalize search query.
f.Query = sanitize.Query(f.Query)
// Set search filters based on search terms.
if terms := txt.SearchTerms(f.Query); f.Query != "" && len(terms) == 0 {
if f.Title == "" {
@ -517,7 +512,7 @@ func Photos(f form.SearchPhotos) (results PhotoResults, count int, err error) {
return results, 0, err
}
log.Infof("photos: found %s for %s [%s]", english.Plural(len(results), "result", "results"), f.SerializeAll(), time.Since(start))
log.Debugf("photos: found %s for %s [%s]", english.Plural(len(results), "result", "results"), f.SerializeAll(), time.Since(start))
if f.Merged {
return results.Merged()

View file

@ -56,9 +56,6 @@ func Subjects(f form.SearchSubjects) (results SubjectResults, err error) {
return results, nil
}
// Clip to reasonable size and normalize operators.
f.Query = sanitize.Query(f.Query)
if f.Query != "" {
for _, where := range LikeAllNames(Cols{"subj_name", "subj_alias"}, f.Query) {
s = s.Where("?", gorm.Expr(where))

View file

@ -7,7 +7,7 @@ import (
// FileName removes invalid character from a filename string.
func FileName(s string) string {
if len(s) > 512 || strings.Contains(s, "${") || strings.Contains(s, "/") || strings.Contains(s, "..") {
if s == "" || reject(s, 512) || strings.Contains(s, "/") || strings.Contains(s, "..") {
return ""
}

View file

@ -6,11 +6,11 @@ import (
// Hex removes invalid character from a hex string and makes it lowercase.
func Hex(s string) string {
if s == "" || len(s) > 1024 || strings.Contains(s, "${") {
if s == "" || reject(s, 1024) {
return ""
}
s = strings.ToLower(s)
s = strings.ToLower(strings.TrimSpace(s))
// Remove all invalid characters.
s = strings.Map(func(r rune) rune {

View file

@ -7,7 +7,7 @@ import (
// IdString removes invalid character from an id string.
func IdString(s string) string {
if s == "" || len(s) > 256 || strings.Contains(s, "${") {
if s == "" || reject(s, 512) {
return ""
}
@ -30,10 +30,12 @@ func IdUint(s string) uint {
// Largest possible values:
// UInt64: 18446744073709551615 (20 digits)
// UInt32: 4294967295 (10 digits)
if s == "" || len(s) > 10 || strings.Contains(s, "${") {
if s == "" || reject(s, 10) {
return 0
}
s = strings.TrimSpace(s)
result, err := strconv.ParseUint(s, 10, 32)
if err != nil {

View file

@ -8,12 +8,12 @@ import (
// Log sanitizes strings created from user input in response to the log4j debacle.
func Log(s string) string {
if len(s) > 200 || strings.Contains(s, "${") {
if reject(s, 512) {
return "?"
}
// Trim quotes, tabs, and newline characters.
s = strings.Trim(s, "'\"“`\t\n\r")
s = strings.Trim(s, " '\"“`\t\n\r")
// Remove non-printable and other potentially problematic characters.
s = strings.Map(func(r rune) rune {

View file

@ -20,7 +20,7 @@ func TestLog(t *testing.T) {
assert.Equal(t, "?", Log("${https://<host>:<port>/<path>}"))
})
t.Run("Ldap", func(t *testing.T) {
assert.Equal(t, "'User-Agent: ?jndi:ldap://?host?:?port?/?path??'", Log("User-Agent: {jndi:ldap://<host>:<port>/<path>}"))
assert.Equal(t, "?", Log("User-Agent: {jndi:ldap://<host>:<port>/<path>}"))
})
}

View file

@ -8,7 +8,7 @@ import (
// Name sanitizes and capitalizes names.
func Name(name string) string {
if name == "" {
if name == "" || reject(name, txt.ClipDefault) {
return ""
}

View file

@ -7,7 +7,7 @@ import (
// Path removes invalid character from a path string.
func Path(s string) string {
if len(s) > 512 || strings.Contains(s, "${") || strings.Contains(s, "..") || strings.Contains(s, "//") {
if s == "" || reject(s, 512) || strings.Contains(s, "..") || strings.Contains(s, "//") {
return ""
}

View file

@ -1,26 +1,36 @@
package sanitize
import "strings"
import (
"regexp"
"strings"
)
// spaced returns the string padded with a space left and right.
func spaced(s string) string {
return Space + s + Space
}
// replace performs a case-insensitive string replacement.
func replace(subject string, search string, replace string) string {
return regexp.MustCompile("(?i)"+search).ReplaceAllString(subject, replace)
}
// Query replaces search operator with default symbols.
func Query(s string) string {
if s == "" || len(s) > 1024 || strings.Contains(s, "${") {
if s == "" || reject(s, MaxLength) {
return Empty
}
s = strings.ToLower(s)
s = strings.ReplaceAll(s, spaced(EnOr), Or)
s = strings.ReplaceAll(s, spaced(EnAnd), And)
s = strings.ReplaceAll(s, spaced(EnWith), And)
s = strings.ReplaceAll(s, spaced(EnIn), And)
s = strings.ReplaceAll(s, spaced(EnAt), And)
// Normalize.
s = replace(s, spaced(EnOr), Or)
s = replace(s, spaced(EnOr), Or)
s = replace(s, spaced(EnAnd), And)
s = replace(s, spaced(EnWith), And)
s = replace(s, spaced(EnIn), And)
s = replace(s, spaced(EnAt), And)
s = strings.ReplaceAll(s, SpacedPlus, And)
s = strings.ReplaceAll(s, "%", "*")
return strings.Trim(s, "+&|_-=!@$%^(){}\\<>,.;: ")
// Trim.
return strings.Trim(s, "+&|-=$^(){}\\<>,;: \n\r\t")
}

View file

@ -8,7 +8,11 @@ import (
func TestQuery(t *testing.T) {
t.Run("Replace", func(t *testing.T) {
q := Query("table spoon & usa | img% json OR BILL!")
assert.Equal(t, "table spoon & usa | img* json|bill", q)
q := Query("table spoon & usa | img% json OR BILL!\n")
assert.Equal(t, "table spoon & usa | img* json|BILL!", q)
})
t.Run("AndOr", func(t *testing.T) {
q := Query("Jens AND Mander and me Or Kitty AND ")
assert.Equal(t, "Jens&Mander&me|Kitty", q)
})
}

View file

@ -30,3 +30,19 @@ https://docs.photoprism.app/developer-guide/
*/
package sanitize
import "strings"
const MaxLength = 4096
func reject(s string, maxLength int) bool {
if maxLength > 0 && len(s) > maxLength {
return true
}
if strings.Contains(s, "${") || strings.Contains(s, "ldap://") {
return true
}
return false
}

View file

@ -8,18 +8,22 @@ import (
)
// State returns the full, normalized state name.
func State(stateName, countryCode string) string {
func State(s, countryCode string) string {
if s == "" || reject(s, txt.ClipName) {
return Empty
}
// Remove whitespace from name.
stateName = strings.TrimSpace(stateName)
s = strings.TrimSpace(s)
// Empty?
if stateName == "" || stateName == txt.UnknownStateCode {
if s == "" || s == txt.UnknownStateCode {
// State doesn't have a name.
return ""
}
// Remove non-printable and other potentially problematic characters.
stateName = strings.Map(func(r rune) rune {
s = strings.Map(func(r rune) rune {
if !unicode.IsPrint(r) {
return -1
}
@ -30,7 +34,7 @@ func State(stateName, countryCode string) string {
default:
return r
}
}, stateName)
}, s)
// Normalize country code.
countryCode = strings.ToLower(strings.TrimSpace(countryCode))
@ -38,13 +42,13 @@ func State(stateName, countryCode string) string {
// Is the name an abbreviation that should be normalized?
if states, found := txt.StatesByCountry[countryCode]; !found {
// Unknown country.
} else if normalized, found := states[stateName]; !found {
} else if normalized, found := states[s]; !found {
// Unknown abbreviation.
} else if normalized != "" {
// Yes, use normalized name.
stateName = normalized
s = normalized
}
// Return normalized state name.
return stateName
return s
}

View file

@ -6,7 +6,7 @@ import (
// Token removes invalid character from a token string.
func Token(s string) string {
if s == "" || len(s) > 200 || strings.Contains(s, "${") {
if s == "" || reject(s, 200) {
return ""
}

View file

@ -8,5 +8,11 @@ import (
// Username returns the normalized username (lowercase, whitespace trimmed).
func Username(s string) string {
return strings.ToLower(txt.Clip(s, txt.ClipUsername))
s = strings.TrimSpace(s)
if s == "" || reject(s, txt.ClipUsername) {
return ""
}
return strings.ToLower(s)
}