Backend: Improve FileTitle() function
Signed-off-by: Michael Mayer <michael@liquidbytes.net>
This commit is contained in:
parent
a01ce5c439
commit
1284091b77
8 changed files with 193 additions and 127 deletions
|
@ -534,24 +534,24 @@ func (m *Photo) DetailsLoaded() bool {
|
|||
return m.Details.PhotoID == m.ID
|
||||
}
|
||||
|
||||
// TitleFromFileName returns a photo title based on the file name and/or path.
|
||||
func (m *Photo) TitleFromFileName() string {
|
||||
// FileTitle returns a photo title based on the file name and/or path.
|
||||
func (m *Photo) FileTitle() string {
|
||||
if !fs.IsID(m.PhotoName) {
|
||||
if title := txt.TitleFromFileName(m.PhotoName); title != "" {
|
||||
if title := txt.FileTitle(m.PhotoName); title != "" {
|
||||
return title
|
||||
}
|
||||
}
|
||||
|
||||
if m.OriginalName != "" && !fs.IsID(m.OriginalName) {
|
||||
if title := txt.TitleFromFileName(m.OriginalName); title != "" {
|
||||
if title := txt.FileTitle(m.OriginalName); title != "" {
|
||||
return title
|
||||
} else if title := txt.TitleFromFileName(path.Dir(m.OriginalName)); title != "" {
|
||||
} else if title := txt.FileTitle(path.Dir(m.OriginalName)); title != "" {
|
||||
return title
|
||||
}
|
||||
}
|
||||
|
||||
if m.PhotoPath != "" {
|
||||
return txt.TitleFromFileName(m.PhotoPath)
|
||||
return txt.FileTitle(m.PhotoPath)
|
||||
}
|
||||
|
||||
return ""
|
||||
|
@ -566,7 +566,7 @@ func (m *Photo) UpdateTitle(labels classify.Labels) error {
|
|||
var knownLocation bool
|
||||
|
||||
oldTitle := m.PhotoTitle
|
||||
fileTitle := m.TitleFromFileName()
|
||||
fileTitle := m.FileTitle()
|
||||
|
||||
if m.LocationLoaded() {
|
||||
knownLocation = true
|
||||
|
|
|
@ -291,10 +291,10 @@ func TestPhoto_DetailsLoaded(t *testing.T) {
|
|||
})
|
||||
}
|
||||
|
||||
func TestPhoto_TitleFromFileName(t *testing.T) {
|
||||
func TestPhoto_FileTitle(t *testing.T) {
|
||||
t.Run("changing-of-the-guard--buckingham-palace_7925318070_o.jpg", func(t *testing.T) {
|
||||
photo := Photo{PhotoName: "20200102_194030_9EFA9E5E", PhotoPath: "2000/05", OriginalName: "flickr import/changing-of-the-guard--buckingham-palace_7925318070_o.jpg"}
|
||||
result := photo.TitleFromFileName()
|
||||
result := photo.FileTitle()
|
||||
assert.Equal(t, "Changing of the Guard / Buckingham Palace", result)
|
||||
})
|
||||
}
|
||||
|
|
25
pkg/fs/id.go
25
pkg/fs/id.go
|
@ -9,12 +9,12 @@ import (
|
|||
var DscNameRegexp = regexp.MustCompile("\\D{3}[\\d_]\\d{4}(.JPG)?")
|
||||
|
||||
// IsInt tests if the file base is an integer number.
|
||||
func IsInt(base string) bool {
|
||||
if base == "" {
|
||||
func IsInt(s string) bool {
|
||||
if s == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, r := range base {
|
||||
for _, r := range s {
|
||||
if r < 48 || r > 57 {
|
||||
return false
|
||||
}
|
||||
|
@ -23,6 +23,21 @@ func IsInt(base string) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
// IsAsciiID tests if the string is a file name that only contains uppercase ascii letters and numbers like "IQVG4929".
|
||||
func IsAsciiID(s string) bool {
|
||||
if s == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, r := range s {
|
||||
if (r < 65 || r > 90) && (r < 48 || r > 57) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// IsID tests if the file name looks like an automatically created identifier.
|
||||
func IsID(fileName string) bool {
|
||||
if fileName == "" {
|
||||
|
@ -51,5 +66,9 @@ func IsID(fileName string) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
if IsAsciiID(base) {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
|
|
@ -6,6 +6,30 @@ import (
|
|||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestIsAsciiID(t *testing.T) {
|
||||
assert.False(t, IsAsciiID("lt9k3pw1wowuy3c2"))
|
||||
assert.False(t, IsAsciiID("dafbfeb8-a129-4e7c-9cf0-e7996a701cdb"))
|
||||
assert.False(t, IsAsciiID("6ba7b810-9dad-11d1-80b4-00c04fd430c8"))
|
||||
assert.False(t, IsAsciiID("55785BAC-9A4B-4747-B090-EE123FFEE437"))
|
||||
assert.False(t, IsAsciiID("550e8400-e29b-11d4-a716-446655440000"))
|
||||
assert.False(t, IsAsciiID("IMG_0599.JPG"))
|
||||
assert.True(t, IsAsciiID("DSC10599"))
|
||||
assert.True(t, IsAsciiID("IQVG4929"))
|
||||
assert.False(t, IsAsciiID("DSC_0599"))
|
||||
assert.False(t, IsAsciiID("iqVG4929"))
|
||||
assert.False(t, IsAsciiID("20091117_203458_ERROR000"))
|
||||
assert.False(t, IsAsciiID("20091117_203458_12345678"))
|
||||
assert.True(t, IsAsciiID("4B1FEF2D1CF4A5BE38B263E0637EDEAD"))
|
||||
assert.True(t, IsAsciiID("123"))
|
||||
assert.False(t, IsAsciiID("_"))
|
||||
assert.False(t, IsAsciiID(""))
|
||||
assert.False(t, IsAsciiID("20191117-153400-Central-Park-New-York-2019-3qy.mov"))
|
||||
assert.False(t, IsAsciiID("e98eb86480a72bd585d228a709f0622f90e86cbc.jpg"))
|
||||
assert.False(t, IsAsciiID("IMG_8115.jpg"))
|
||||
assert.False(t, IsAsciiID("01 Introduction Businessmodel.pdf"))
|
||||
assert.False(t, IsAsciiID("A regular file name with 121345678643 numbers"))
|
||||
}
|
||||
|
||||
func TestIsID(t *testing.T) {
|
||||
assert.True(t, IsID("lt9k3pw1wowuy3c2"))
|
||||
assert.True(t, IsID("dafbfeb8-a129-4e7c-9cf0-e7996a701cdb"))
|
||||
|
@ -14,6 +38,7 @@ func TestIsID(t *testing.T) {
|
|||
assert.True(t, IsID("550e8400-e29b-11d4-a716-446655440000"))
|
||||
assert.True(t, IsID("IMG_0599.JPG"))
|
||||
assert.True(t, IsID("DSC10599"))
|
||||
assert.True(t, IsID("IQVG4929"))
|
||||
assert.True(t, IsID("20091117_203458_ERROR000"))
|
||||
assert.True(t, IsID("20091117_203458_12345678"))
|
||||
assert.True(t, IsID("4B1FEF2D1CF4A5BE38B263E0637EDEAD"))
|
||||
|
|
|
@ -1,15 +1,10 @@
|
|||
package txt
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"github.com/photoprism/photoprism/pkg/fs"
|
||||
)
|
||||
|
||||
var FileTitleRegexp = regexp.MustCompile("[\\p{L}\\-,':]{2,}")
|
||||
|
||||
// isSeparator reports whether the rune could mark a word boundary.
|
||||
func isSeparator(r rune) bool {
|
||||
// ASCII alphanumerics and underscore are not separators
|
||||
|
@ -83,57 +78,3 @@ func Title(s string) string {
|
|||
|
||||
return strings.Join(result, " / ")
|
||||
}
|
||||
|
||||
// TitleFromFileName returns the string with the first characters of each word converted to uppercase.
|
||||
func TitleFromFileName(s string) string {
|
||||
s = fs.Base(s, true)
|
||||
|
||||
if len(s) < 3 {
|
||||
return ""
|
||||
}
|
||||
|
||||
words := FileTitleRegexp.FindAllString(s, -1)
|
||||
var result []string
|
||||
|
||||
found := 0
|
||||
|
||||
for _, w := range words {
|
||||
w = strings.ToLower(w)
|
||||
|
||||
if len(w) < 3 && found == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := StopWords[w]; ok && found == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if UnknownWord(w) {
|
||||
continue
|
||||
}
|
||||
|
||||
result = append(result, w)
|
||||
|
||||
found++
|
||||
|
||||
if found > 10 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if found == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
title := strings.Join(result, " ")
|
||||
|
||||
title = strings.ReplaceAll(title, "--", " / ")
|
||||
title = strings.ReplaceAll(title, "-", " ")
|
||||
title = strings.ReplaceAll(title, " ", " ")
|
||||
|
||||
if len(title) < 3 {
|
||||
return ""
|
||||
}
|
||||
|
||||
return Title(title)
|
||||
}
|
||||
|
|
|
@ -83,59 +83,3 @@ func TestTitle(t *testing.T) {
|
|||
assert.Equal(t, "A Horse Is Not a Cow :-)", Title("a horse is not a cow :-)"))
|
||||
})
|
||||
}
|
||||
|
||||
func TestTitleFromFileName(t *testing.T) {
|
||||
t.Run("photoprism", func(t *testing.T) {
|
||||
assert.Equal(t, "PhotoPrism: Browse Your Life in Pictures", TitleFromFileName("photoprism: Browse your life in pictures"))
|
||||
})
|
||||
t.Run("dash", func(t *testing.T) {
|
||||
assert.Equal(t, "Photo Lover", TitleFromFileName("photo-lover"))
|
||||
})
|
||||
t.Run("nyc", func(t *testing.T) {
|
||||
assert.Equal(t, "Bridge in, or by, NYC", TitleFromFileName("BRIDGE in, or by, nyc"))
|
||||
})
|
||||
t.Run("apple", func(t *testing.T) {
|
||||
assert.Equal(t, "Phil Unveils iPhone, iPad, iPod, 'airpods', Airpod, AirPlay, iMac or MacBook", TitleFromFileName("phil unveils iphone, ipad, ipod, 'airpods', airpod, airplay, imac or macbook 11 pro and max"))
|
||||
})
|
||||
t.Run("IMG_4568", func(t *testing.T) {
|
||||
assert.Equal(t, "", TitleFromFileName("IMG_4568"))
|
||||
})
|
||||
t.Run("queen-city-yacht-club--toronto-island_7999432607_o.jpg", func(t *testing.T) {
|
||||
assert.Equal(t, "Queen City Yacht Club / Toronto Island", TitleFromFileName("queen-city-yacht-club--toronto-island_7999432607_o.jpg"))
|
||||
})
|
||||
t.Run("tim-robbins--tiff-2012_7999233420_o.jpg", func(t *testing.T) {
|
||||
assert.Equal(t, "Tim Robbins / TIFF", TitleFromFileName("tim-robbins--tiff-2012_7999233420_o.jpg"))
|
||||
})
|
||||
t.Run("20200102-204030-Berlin-Germany-2020-3h4.jpg", func(t *testing.T) {
|
||||
assert.Equal(t, "Berlin Germany", TitleFromFileName("20200102-204030-Berlin-Germany-2020-3h4.jpg"))
|
||||
})
|
||||
t.Run("changing-of-the-guard--buckingham-palace_7925318070_o.jpg", func(t *testing.T) {
|
||||
assert.Equal(t, "Changing of the Guard / Buckingham Palace", TitleFromFileName("changing-of-the-guard--buckingham-palace_7925318070_o.jpg"))
|
||||
})
|
||||
/*
|
||||
Additional tests for https://github.com/photoprism/photoprism/issues/361
|
||||
|
||||
-rw-r--r-- 1 root root 813009 Jun 8 23:42 えく - スカイフレア (82063926) .png
|
||||
-rw-r--r-- 1 root root 161749 Jun 6 15:48 紅シャケ@お仕事募集中 - モスティマ (81974640) .jpg
|
||||
[root@docker Pictures]# ls -l Originals/al
|
||||
total 1276
|
||||
-rw-r--r-- 1 root root 451062 Jun 18 19:00 Cyka - swappable mag (82405706) .jpg
|
||||
-rw-r--r-- 1 root root 662922 Jun 15 21:18 dishwasher1910 - Friedrich the smol (82201574) 1ページ.jpg
|
||||
-rw-r--r-- 1 root root 185971 Jun 19 21:07 EaycddvU0AAfuUR.jpg
|
||||
*/
|
||||
t.Run("issue_361_a", func(t *testing.T) {
|
||||
assert.Equal(t, "えく スカイフレア", TitleFromFileName("えく - スカイフレア (82063926) .png"))
|
||||
})
|
||||
t.Run("issue_361_b", func(t *testing.T) {
|
||||
assert.Equal(t, "紅シャケ お仕事募集中 モスティマ", TitleFromFileName("紅シャケ@お仕事募集中 - モスティマ (81974640) .jpg"))
|
||||
})
|
||||
t.Run("issue_361_c", func(t *testing.T) {
|
||||
assert.Equal(t, "Cyka Swappable Mag", TitleFromFileName("Cyka - swappable mag (82405706) .jpg"))
|
||||
})
|
||||
t.Run("issue_361_d", func(t *testing.T) {
|
||||
assert.Equal(t, "Dishwasher Friedrich the Smol", TitleFromFileName("dishwasher1910 - Friedrich the smol (82201574) 1ページ.jpg"))
|
||||
})
|
||||
t.Run("issue_361_e", func(t *testing.T) {
|
||||
assert.Equal(t, "Eaycddvu Aafuur", TitleFromFileName("EaycddvU0AAfuUR.jpg"))
|
||||
})
|
||||
}
|
||||
|
|
64
pkg/txt/file_title.go
Normal file
64
pkg/txt/file_title.go
Normal file
|
@ -0,0 +1,64 @@
|
|||
package txt
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/photoprism/photoprism/pkg/fs"
|
||||
)
|
||||
|
||||
var FileTitleRegexp = regexp.MustCompile("[\\p{L}\\-,':]{2,}")
|
||||
|
||||
// FileTitle returns the string with the first characters of each word converted to uppercase.
|
||||
func FileTitle(s string) string {
|
||||
s = fs.Base(s, true)
|
||||
|
||||
if len(s) < 3 {
|
||||
return ""
|
||||
}
|
||||
|
||||
words := FileTitleRegexp.FindAllString(s, -1)
|
||||
var result []string
|
||||
|
||||
found := 0
|
||||
|
||||
for _, w := range words {
|
||||
w = strings.ToLower(w)
|
||||
|
||||
if len(w) < 3 && found == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := StopWords[w]; ok && found == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if UnknownWord(w) {
|
||||
continue
|
||||
}
|
||||
|
||||
result = append(result, w)
|
||||
|
||||
found++
|
||||
|
||||
if found > 10 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if found == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
title := strings.Join(result, " ")
|
||||
|
||||
title = strings.ReplaceAll(title, "--", " / ")
|
||||
title = strings.ReplaceAll(title, "-", " ")
|
||||
title = strings.ReplaceAll(title, " ", " ")
|
||||
|
||||
if len(title) <= 4 {
|
||||
return ""
|
||||
}
|
||||
|
||||
return Title(title)
|
||||
}
|
73
pkg/txt/file_title_test.go
Normal file
73
pkg/txt/file_title_test.go
Normal file
|
@ -0,0 +1,73 @@
|
|||
package txt
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestFileTitle(t *testing.T) {
|
||||
t.Run("photoprism", func(t *testing.T) {
|
||||
assert.Equal(t, "PhotoPrism: Browse Your Life in Pictures", FileTitle("photoprism: Browse your life in pictures"))
|
||||
})
|
||||
t.Run("dash", func(t *testing.T) {
|
||||
assert.Equal(t, "Photo Lover", FileTitle("photo-lover"))
|
||||
})
|
||||
t.Run("nyc", func(t *testing.T) {
|
||||
assert.Equal(t, "Bridge in, or by, NYC", FileTitle("BRIDGE in, or by, nyc"))
|
||||
})
|
||||
t.Run("apple", func(t *testing.T) {
|
||||
assert.Equal(t, "Phil Unveils iPhone, iPad, iPod, 'airpods', Airpod, AirPlay, iMac or MacBook", FileTitle("phil unveils iphone, ipad, ipod, 'airpods', airpod, airplay, imac or macbook 11 pro and max"))
|
||||
})
|
||||
t.Run("IMG_4568", func(t *testing.T) {
|
||||
assert.Equal(t, "", FileTitle("IMG_4568"))
|
||||
})
|
||||
t.Run("queen-city-yacht-club--toronto-island_7999432607_o.jpg", func(t *testing.T) {
|
||||
assert.Equal(t, "Queen City Yacht Club / Toronto Island", FileTitle("queen-city-yacht-club--toronto-island_7999432607_o.jpg"))
|
||||
})
|
||||
t.Run("tim-robbins--tiff-2012_7999233420_o.jpg", func(t *testing.T) {
|
||||
assert.Equal(t, "Tim Robbins / TIFF", FileTitle("tim-robbins--tiff-2012_7999233420_o.jpg"))
|
||||
})
|
||||
t.Run("20200102-204030-Berlin-Germany-2020-3h4.jpg", func(t *testing.T) {
|
||||
assert.Equal(t, "Berlin Germany", FileTitle("20200102-204030-Berlin-Germany-2020-3h4.jpg"))
|
||||
})
|
||||
t.Run("changing-of-the-guard--buckingham-palace_7925318070_o.jpg", func(t *testing.T) {
|
||||
assert.Equal(t, "Changing of the Guard / Buckingham Palace", FileTitle("changing-of-the-guard--buckingham-palace_7925318070_o.jpg"))
|
||||
})
|
||||
/*
|
||||
Additional tests for https://github.com/photoprism/photoprism/issues/361
|
||||
|
||||
-rw-r--r-- 1 root root 813009 Jun 8 23:42 えく - スカイフレア (82063926) .png
|
||||
-rw-r--r-- 1 root root 161749 Jun 6 15:48 紅シャケ@お仕事募集中 - モスティマ (81974640) .jpg
|
||||
[root@docker Pictures]# ls -l Originals/al
|
||||
total 1276
|
||||
-rw-r--r-- 1 root root 451062 Jun 18 19:00 Cyka - swappable mag (82405706) .jpg
|
||||
-rw-r--r-- 1 root root 662922 Jun 15 21:18 dishwasher1910 - Friedrich the smol (82201574) 1ページ.jpg
|
||||
-rw-r--r-- 1 root root 185971 Jun 19 21:07 EaycddvU0AAfuUR.jpg
|
||||
*/
|
||||
t.Run("issue_361_a", func(t *testing.T) {
|
||||
assert.Equal(t, "えく スカイフレア", FileTitle("えく - スカイフレア (82063926) .png"))
|
||||
})
|
||||
t.Run("issue_361_b", func(t *testing.T) {
|
||||
assert.Equal(t, "紅シャケ お仕事募集中 モスティマ", FileTitle("紅シャケ@お仕事募集中 - モスティマ (81974640) .jpg"))
|
||||
})
|
||||
t.Run("issue_361_c", func(t *testing.T) {
|
||||
assert.Equal(t, "Cyka Swappable Mag", FileTitle("Cyka - swappable mag (82405706) .jpg"))
|
||||
})
|
||||
t.Run("issue_361_d", func(t *testing.T) {
|
||||
assert.Equal(t, "Dishwasher Friedrich the Smol", FileTitle("dishwasher1910 - Friedrich the smol (82201574) 1ページ.jpg"))
|
||||
})
|
||||
t.Run("issue_361_e", func(t *testing.T) {
|
||||
assert.Equal(t, "Eaycddvu Aafuur", FileTitle("EaycddvU0AAfuUR.jpg"))
|
||||
})
|
||||
t.Run("Eigene Bilder 1013/2007/oldies/neumühle", func(t *testing.T) {
|
||||
// TODO: Normalize strings, see https://godoc.org/golang.org/x/text/unicode/norm
|
||||
assert.Equal(t, "Neumu", FileTitle("Eigene Bilder 1013/2007/oldies/neumühle"))
|
||||
})
|
||||
t.Run("Neumühle", func(t *testing.T) {
|
||||
assert.Equal(t, "Neumühle", FileTitle("Neumühle"))
|
||||
})
|
||||
t.Run("IQVG4929", func(t *testing.T) {
|
||||
assert.Equal(t, "", FileTitle("IQVG4929.jpg"))
|
||||
})
|
||||
}
|
Loading…
Reference in a new issue