photoprism/internal/util/strings.go
Michael Mayer c796431e1b Faster keyword search (proof-of-concept)
Signed-off-by: Michael Mayer <michael@liquidbytes.net>
2019-12-08 15:05:35 +01:00

68 lines
1.2 KiB
Go

package util
import (
"regexp"
"strings"
"unicode"
)
// isSeparator reports whether the rune could mark a word boundary.
func isSeparator(r rune) bool {
// ASCII alphanumerics and underscore are not separators
if r <= 0x7F {
switch {
case '0' <= r && r <= '9':
return false
case 'a' <= r && r <= 'z':
return false
case 'A' <= r && r <= 'Z':
return false
case r == '_', r == '\'':
return false
}
return true
}
// Letters and digits are not separators
if unicode.IsLetter(r) || unicode.IsDigit(r) {
return false
}
// Otherwise, all we can do for now is treat spaces as separators.
return unicode.IsSpace(r)
}
func UcFirst(str string) string {
for i, v := range str {
return string(unicode.ToUpper(v)) + str[i+1:]
}
return ""
}
func Title(s string) string {
prev := ' '
return strings.Map(
func(r rune) rune {
if isSeparator(prev) {
prev = r
return unicode.ToTitle(r)
}
prev = r
return r
},
s)
}
func Keywords(s string) (results []string) {
r := regexp.MustCompile("[\\p{L}\\d]{3,}")
all := r.FindAllString(s, -1)
for _, w := range all {
w = strings.ToLower(w)
if _, ok := Stopwords[w]; ok == false {
results = append(results, w)
}
}
return results
}