photoprism/pkg/txt/file_title.go
2021-01-27 15:21:54 +01:00

70 lines
1.2 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package txt
import (
"regexp"
"strings"
"github.com/photoprism/photoprism/pkg/fs"
)
var FileTitleRegexp = regexp.MustCompile("[\\p{L}\\-,':&+!?]{1,}|( [&+] )?")
// FileTitle returns the string with the first characters of each word converted to uppercase.
func FileTitle(s string) string {
s = fs.BasePrefix(s, true)
if len(s) < 3 && IsASCII(s) {
return ""
}
words := FileTitleRegexp.FindAllString(s, -1)
var result []string
found := 0
for _, w := range words {
w = strings.ToLower(w)
if IsASCII(w) && (len(w) < 3 && found == 0 || len(w) == 1) {
continue
}
if _, ok := StopWords[w]; ok && found == 0 {
continue
}
if UnknownWord(w) {
continue
}
result = append(result, w)
found++
if found > 10 {
break
}
}
if found == 0 {
return ""
}
title := strings.Join(result, " ")
title = strings.ReplaceAll(title, "--", " / ")
title = strings.ReplaceAll(title, "-", " ")
title = strings.ReplaceAll(title, " ", " ")
// Remove small words from title ending.
for w, _ := range SmallWords {
title = strings.TrimSuffix(title, " "+w)
}
if len(title) <= 4 && IsASCII(title) {
return ""
}
return Title(title)
}