Backend: Improve FileTitle() function

Signed-off-by: Michael Mayer <michael@liquidbytes.net>
This commit is contained in:
Michael Mayer 2020-07-06 11:31:03 +02:00
parent a01ce5c439
commit 1284091b77
8 changed files with 193 additions and 127 deletions

View file

@ -534,24 +534,24 @@ func (m *Photo) DetailsLoaded() bool {
return m.Details.PhotoID == m.ID
}
// TitleFromFileName returns a photo title based on the file name and/or path.
func (m *Photo) TitleFromFileName() string {
// FileTitle returns a photo title based on the file name and/or path.
func (m *Photo) FileTitle() string {
if !fs.IsID(m.PhotoName) {
if title := txt.TitleFromFileName(m.PhotoName); title != "" {
if title := txt.FileTitle(m.PhotoName); title != "" {
return title
}
}
if m.OriginalName != "" && !fs.IsID(m.OriginalName) {
if title := txt.TitleFromFileName(m.OriginalName); title != "" {
if title := txt.FileTitle(m.OriginalName); title != "" {
return title
} else if title := txt.TitleFromFileName(path.Dir(m.OriginalName)); title != "" {
} else if title := txt.FileTitle(path.Dir(m.OriginalName)); title != "" {
return title
}
}
if m.PhotoPath != "" {
return txt.TitleFromFileName(m.PhotoPath)
return txt.FileTitle(m.PhotoPath)
}
return ""
@ -566,7 +566,7 @@ func (m *Photo) UpdateTitle(labels classify.Labels) error {
var knownLocation bool
oldTitle := m.PhotoTitle
fileTitle := m.TitleFromFileName()
fileTitle := m.FileTitle()
if m.LocationLoaded() {
knownLocation = true

View file

@ -291,10 +291,10 @@ func TestPhoto_DetailsLoaded(t *testing.T) {
})
}
func TestPhoto_TitleFromFileName(t *testing.T) {
func TestPhoto_FileTitle(t *testing.T) {
t.Run("changing-of-the-guard--buckingham-palace_7925318070_o.jpg", func(t *testing.T) {
photo := Photo{PhotoName: "20200102_194030_9EFA9E5E", PhotoPath: "2000/05", OriginalName: "flickr import/changing-of-the-guard--buckingham-palace_7925318070_o.jpg"}
result := photo.TitleFromFileName()
result := photo.FileTitle()
assert.Equal(t, "Changing of the Guard / Buckingham Palace", result)
})
}

View file

@ -9,12 +9,12 @@ import (
var DscNameRegexp = regexp.MustCompile("\\D{3}[\\d_]\\d{4}(.JPG)?")
// IsInt tests if the file base is an integer number.
func IsInt(base string) bool {
if base == "" {
func IsInt(s string) bool {
if s == "" {
return false
}
for _, r := range base {
for _, r := range s {
if r < 48 || r > 57 {
return false
}
@ -23,6 +23,21 @@ func IsInt(base string) bool {
return true
}
// IsAsciiID tests if the string is a file name that only contains uppercase ascii letters and numbers like "IQVG4929".
func IsAsciiID(s string) bool {
if s == "" {
return false
}
for _, r := range s {
if (r < 65 || r > 90) && (r < 48 || r > 57) {
return false
}
}
return true
}
// IsID tests if the file name looks like an automatically created identifier.
func IsID(fileName string) bool {
if fileName == "" {
@ -51,5 +66,9 @@ func IsID(fileName string) bool {
return true
}
if IsAsciiID(base) {
return true
}
return false
}

View file

@ -6,6 +6,30 @@ import (
"github.com/stretchr/testify/assert"
)
func TestIsAsciiID(t *testing.T) {
assert.False(t, IsAsciiID("lt9k3pw1wowuy3c2"))
assert.False(t, IsAsciiID("dafbfeb8-a129-4e7c-9cf0-e7996a701cdb"))
assert.False(t, IsAsciiID("6ba7b810-9dad-11d1-80b4-00c04fd430c8"))
assert.False(t, IsAsciiID("55785BAC-9A4B-4747-B090-EE123FFEE437"))
assert.False(t, IsAsciiID("550e8400-e29b-11d4-a716-446655440000"))
assert.False(t, IsAsciiID("IMG_0599.JPG"))
assert.True(t, IsAsciiID("DSC10599"))
assert.True(t, IsAsciiID("IQVG4929"))
assert.False(t, IsAsciiID("DSC_0599"))
assert.False(t, IsAsciiID("iqVG4929"))
assert.False(t, IsAsciiID("20091117_203458_ERROR000"))
assert.False(t, IsAsciiID("20091117_203458_12345678"))
assert.True(t, IsAsciiID("4B1FEF2D1CF4A5BE38B263E0637EDEAD"))
assert.True(t, IsAsciiID("123"))
assert.False(t, IsAsciiID("_"))
assert.False(t, IsAsciiID(""))
assert.False(t, IsAsciiID("20191117-153400-Central-Park-New-York-2019-3qy.mov"))
assert.False(t, IsAsciiID("e98eb86480a72bd585d228a709f0622f90e86cbc.jpg"))
assert.False(t, IsAsciiID("IMG_8115.jpg"))
assert.False(t, IsAsciiID("01 Introduction Businessmodel.pdf"))
assert.False(t, IsAsciiID("A regular file name with 121345678643 numbers"))
}
func TestIsID(t *testing.T) {
assert.True(t, IsID("lt9k3pw1wowuy3c2"))
assert.True(t, IsID("dafbfeb8-a129-4e7c-9cf0-e7996a701cdb"))
@ -14,6 +38,7 @@ func TestIsID(t *testing.T) {
assert.True(t, IsID("550e8400-e29b-11d4-a716-446655440000"))
assert.True(t, IsID("IMG_0599.JPG"))
assert.True(t, IsID("DSC10599"))
assert.True(t, IsID("IQVG4929"))
assert.True(t, IsID("20091117_203458_ERROR000"))
assert.True(t, IsID("20091117_203458_12345678"))
assert.True(t, IsID("4B1FEF2D1CF4A5BE38B263E0637EDEAD"))

View file

@ -1,15 +1,10 @@
package txt
import (
"regexp"
"strings"
"unicode"
"github.com/photoprism/photoprism/pkg/fs"
)
var FileTitleRegexp = regexp.MustCompile("[\\p{L}\\-,':]{2,}")
// isSeparator reports whether the rune could mark a word boundary.
func isSeparator(r rune) bool {
// ASCII alphanumerics and underscore are not separators
@ -83,57 +78,3 @@ func Title(s string) string {
return strings.Join(result, " / ")
}
// TitleFromFileName returns the string with the first characters of each word converted to uppercase.
func TitleFromFileName(s string) string {
s = fs.Base(s, true)
if len(s) < 3 {
return ""
}
words := FileTitleRegexp.FindAllString(s, -1)
var result []string
found := 0
for _, w := range words {
w = strings.ToLower(w)
if len(w) < 3 && found == 0 {
continue
}
if _, ok := StopWords[w]; ok && found == 0 {
continue
}
if UnknownWord(w) {
continue
}
result = append(result, w)
found++
if found > 10 {
break
}
}
if found == 0 {
return ""
}
title := strings.Join(result, " ")
title = strings.ReplaceAll(title, "--", " / ")
title = strings.ReplaceAll(title, "-", " ")
title = strings.ReplaceAll(title, " ", " ")
if len(title) < 3 {
return ""
}
return Title(title)
}

View file

@ -83,59 +83,3 @@ func TestTitle(t *testing.T) {
assert.Equal(t, "A Horse Is Not a Cow :-)", Title("a horse is not a cow :-)"))
})
}
func TestTitleFromFileName(t *testing.T) {
t.Run("photoprism", func(t *testing.T) {
assert.Equal(t, "PhotoPrism: Browse Your Life in Pictures", TitleFromFileName("photoprism: Browse your life in pictures"))
})
t.Run("dash", func(t *testing.T) {
assert.Equal(t, "Photo Lover", TitleFromFileName("photo-lover"))
})
t.Run("nyc", func(t *testing.T) {
assert.Equal(t, "Bridge in, or by, NYC", TitleFromFileName("BRIDGE in, or by, nyc"))
})
t.Run("apple", func(t *testing.T) {
assert.Equal(t, "Phil Unveils iPhone, iPad, iPod, 'airpods', Airpod, AirPlay, iMac or MacBook", TitleFromFileName("phil unveils iphone, ipad, ipod, 'airpods', airpod, airplay, imac or macbook 11 pro and max"))
})
t.Run("IMG_4568", func(t *testing.T) {
assert.Equal(t, "", TitleFromFileName("IMG_4568"))
})
t.Run("queen-city-yacht-club--toronto-island_7999432607_o.jpg", func(t *testing.T) {
assert.Equal(t, "Queen City Yacht Club / Toronto Island", TitleFromFileName("queen-city-yacht-club--toronto-island_7999432607_o.jpg"))
})
t.Run("tim-robbins--tiff-2012_7999233420_o.jpg", func(t *testing.T) {
assert.Equal(t, "Tim Robbins / TIFF", TitleFromFileName("tim-robbins--tiff-2012_7999233420_o.jpg"))
})
t.Run("20200102-204030-Berlin-Germany-2020-3h4.jpg", func(t *testing.T) {
assert.Equal(t, "Berlin Germany", TitleFromFileName("20200102-204030-Berlin-Germany-2020-3h4.jpg"))
})
t.Run("changing-of-the-guard--buckingham-palace_7925318070_o.jpg", func(t *testing.T) {
assert.Equal(t, "Changing of the Guard / Buckingham Palace", TitleFromFileName("changing-of-the-guard--buckingham-palace_7925318070_o.jpg"))
})
/*
Additional tests for https://github.com/photoprism/photoprism/issues/361
-rw-r--r-- 1 root root 813009 Jun 8 23:42 えく - スカイフレア (82063926) .png
-rw-r--r-- 1 root root 161749 Jun 6 15:48 紅シャケお仕事募集中 - モスティマ (81974640) .jpg
[root@docker Pictures]# ls -l Originals/al
total 1276
-rw-r--r-- 1 root root 451062 Jun 18 19:00 Cyka - swappable mag (82405706) .jpg
-rw-r--r-- 1 root root 662922 Jun 15 21:18 dishwasher1910 - Friedrich the smol (82201574) 1ページ.jpg
-rw-r--r-- 1 root root 185971 Jun 19 21:07 EaycddvU0AAfuUR.jpg
*/
t.Run("issue_361_a", func(t *testing.T) {
assert.Equal(t, "えく スカイフレア", TitleFromFileName("えく - スカイフレア (82063926) .png"))
})
t.Run("issue_361_b", func(t *testing.T) {
assert.Equal(t, "紅シャケ お仕事募集中 モスティマ", TitleFromFileName("紅シャケ@お仕事募集中 - モスティマ (81974640) .jpg"))
})
t.Run("issue_361_c", func(t *testing.T) {
assert.Equal(t, "Cyka Swappable Mag", TitleFromFileName("Cyka - swappable mag (82405706) .jpg"))
})
t.Run("issue_361_d", func(t *testing.T) {
assert.Equal(t, "Dishwasher Friedrich the Smol", TitleFromFileName("dishwasher1910 - Friedrich the smol (82201574) 1ページ.jpg"))
})
t.Run("issue_361_e", func(t *testing.T) {
assert.Equal(t, "Eaycddvu Aafuur", TitleFromFileName("EaycddvU0AAfuUR.jpg"))
})
}

64
pkg/txt/file_title.go Normal file
View file

@ -0,0 +1,64 @@
package txt
import (
"regexp"
"strings"
"github.com/photoprism/photoprism/pkg/fs"
)
var FileTitleRegexp = regexp.MustCompile("[\\p{L}\\-,':]{2,}")
// FileTitle returns the string with the first characters of each word converted to uppercase.
func FileTitle(s string) string {
s = fs.Base(s, true)
if len(s) < 3 {
return ""
}
words := FileTitleRegexp.FindAllString(s, -1)
var result []string
found := 0
for _, w := range words {
w = strings.ToLower(w)
if len(w) < 3 && found == 0 {
continue
}
if _, ok := StopWords[w]; ok && found == 0 {
continue
}
if UnknownWord(w) {
continue
}
result = append(result, w)
found++
if found > 10 {
break
}
}
if found == 0 {
return ""
}
title := strings.Join(result, " ")
title = strings.ReplaceAll(title, "--", " / ")
title = strings.ReplaceAll(title, "-", " ")
title = strings.ReplaceAll(title, " ", " ")
if len(title) <= 4 {
return ""
}
return Title(title)
}

View file

@ -0,0 +1,73 @@
package txt
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestFileTitle(t *testing.T) {
t.Run("photoprism", func(t *testing.T) {
assert.Equal(t, "PhotoPrism: Browse Your Life in Pictures", FileTitle("photoprism: Browse your life in pictures"))
})
t.Run("dash", func(t *testing.T) {
assert.Equal(t, "Photo Lover", FileTitle("photo-lover"))
})
t.Run("nyc", func(t *testing.T) {
assert.Equal(t, "Bridge in, or by, NYC", FileTitle("BRIDGE in, or by, nyc"))
})
t.Run("apple", func(t *testing.T) {
assert.Equal(t, "Phil Unveils iPhone, iPad, iPod, 'airpods', Airpod, AirPlay, iMac or MacBook", FileTitle("phil unveils iphone, ipad, ipod, 'airpods', airpod, airplay, imac or macbook 11 pro and max"))
})
t.Run("IMG_4568", func(t *testing.T) {
assert.Equal(t, "", FileTitle("IMG_4568"))
})
t.Run("queen-city-yacht-club--toronto-island_7999432607_o.jpg", func(t *testing.T) {
assert.Equal(t, "Queen City Yacht Club / Toronto Island", FileTitle("queen-city-yacht-club--toronto-island_7999432607_o.jpg"))
})
t.Run("tim-robbins--tiff-2012_7999233420_o.jpg", func(t *testing.T) {
assert.Equal(t, "Tim Robbins / TIFF", FileTitle("tim-robbins--tiff-2012_7999233420_o.jpg"))
})
t.Run("20200102-204030-Berlin-Germany-2020-3h4.jpg", func(t *testing.T) {
assert.Equal(t, "Berlin Germany", FileTitle("20200102-204030-Berlin-Germany-2020-3h4.jpg"))
})
t.Run("changing-of-the-guard--buckingham-palace_7925318070_o.jpg", func(t *testing.T) {
assert.Equal(t, "Changing of the Guard / Buckingham Palace", FileTitle("changing-of-the-guard--buckingham-palace_7925318070_o.jpg"))
})
/*
Additional tests for https://github.com/photoprism/photoprism/issues/361
-rw-r--r-- 1 root root 813009 Jun 8 23:42 えく - スカイフレア (82063926) .png
-rw-r--r-- 1 root root 161749 Jun 6 15:48 紅シャケお仕事募集中 - モスティマ (81974640) .jpg
[root@docker Pictures]# ls -l Originals/al
total 1276
-rw-r--r-- 1 root root 451062 Jun 18 19:00 Cyka - swappable mag (82405706) .jpg
-rw-r--r-- 1 root root 662922 Jun 15 21:18 dishwasher1910 - Friedrich the smol (82201574) 1ページ.jpg
-rw-r--r-- 1 root root 185971 Jun 19 21:07 EaycddvU0AAfuUR.jpg
*/
t.Run("issue_361_a", func(t *testing.T) {
assert.Equal(t, "えく スカイフレア", FileTitle("えく - スカイフレア (82063926) .png"))
})
t.Run("issue_361_b", func(t *testing.T) {
assert.Equal(t, "紅シャケ お仕事募集中 モスティマ", FileTitle("紅シャケ@お仕事募集中 - モスティマ (81974640) .jpg"))
})
t.Run("issue_361_c", func(t *testing.T) {
assert.Equal(t, "Cyka Swappable Mag", FileTitle("Cyka - swappable mag (82405706) .jpg"))
})
t.Run("issue_361_d", func(t *testing.T) {
assert.Equal(t, "Dishwasher Friedrich the Smol", FileTitle("dishwasher1910 - Friedrich the smol (82201574) 1ページ.jpg"))
})
t.Run("issue_361_e", func(t *testing.T) {
assert.Equal(t, "Eaycddvu Aafuur", FileTitle("EaycddvU0AAfuUR.jpg"))
})
t.Run("Eigene Bilder 1013/2007/oldies/neumühle", func(t *testing.T) {
// TODO: Normalize strings, see https://godoc.org/golang.org/x/text/unicode/norm
assert.Equal(t, "Neumu", FileTitle("Eigene Bilder 1013/2007/oldies/neumühle"))
})
t.Run("Neumühle", func(t *testing.T) {
assert.Equal(t, "Neumühle", FileTitle("Neumühle"))
})
t.Run("IQVG4929", func(t *testing.T) {
assert.Equal(t, "", FileTitle("IQVG4929.jpg"))
})
}