Backend: Refactor txt package #260

Signed-off-by: Michael Mayer <michael@liquidbytes.net>
2020-05-31 15:17:01 +02:00 · 2020-05-31 15:17:01 +02:00 · 5c2ae9e698
commit 5c2ae9e698
parent 97cab01e6d
5 changed files with 252 additions and 235 deletions
--- a/pkg/txt/capitalization.go
+++ b/pkg/txt/capitalization.go
@ -0,0 +1,167 @@
+package txt
+
+import (
+	"regexp"
+	"strings"
+	"unicode"
+
+	"github.com/photoprism/photoprism/pkg/fs"
+)
+
+var FileTitleRegexp = regexp.MustCompile("[\\p{L}\\-]{2,}")
+
+var TitleReplacements = map[string]string{
+	"Nyc":     "NYC",
+	"Ny ":     "NY ",
+	"Uae":     "UAE",
+	"Usa":     "USA",
+	"Amd ":    "AMD ",
+	"Tiff":    "TIFF",
+	"Ibm":     "IBM",
+	"Usd":     "USD",
+	"Gbp":     "GBP",
+	"Chf":     "CHF",
+	"Ceo":     "CEO",
+	"Cto":     "CTO",
+	"Cfo":     "CFO",
+	"Cia ":    "CIA ",
+	"Fbi":     "FBI",
+	"Bnd":     "BND",
+	"Fsb":     "FSB",
+	"Nsa":     "NSA",
+	"Lax ":    "LAX ",
+	"Sfx":     "SFX",
+	"Ber ":    "BER ",
+	"Sfo":     "SFO",
+	"Lh ":     "LH ",
+	"Lhr":     "LHR",
+	"Afl ":    "AFL ",
+	"Nrl":     "NRL",
+	"Nsw":     "NSW",
+	"Qld":     "QLD",
+	"Vic ":    "VIC ",
+	"Iphone":  "iPhone",
+	"Imac":    "iMac",
+	"Ipad":    "iPad",
+	"Macbook": "MacBook",
+	" And ":   " and ",
+	" Or ":    " or ",
+	" A ":     " a ",
+	" An ":    " an ",
+	" To ":    " to ",
+	" At ":    " at ",
+	" By ":    " by ",
+	" But ":   " but ",
+	" For ":   " for ",
+	" Of ":    " of ",
+	" The ":   " the ",
+	" On ":    " on ",
+	" From ":  " from ",
+	" With ":  " with ",
+}
+
+// isSeparator reports whether the rune could mark a word boundary.
+func isSeparator(r rune) bool {
+	// ASCII alphanumerics and underscore are not separators
+	if r <= 0x7F {
+		switch {
+		case '0' <= r && r <= '9':
+			return false
+		case 'a' <= r && r <= 'z':
+			return false
+		case 'A' <= r && r <= 'Z':
+			return false
+		case r == '_', r == '\'':
+			return false
+		}
+		return true
+	}
+	// Letters and digits are not separators
+	if unicode.IsLetter(r) || unicode.IsDigit(r) {
+		return false
+	}
+	// Otherwise, all we can do for now is treat spaces as separators.
+	return unicode.IsSpace(r)
+}
+
+// UcFirst returns the string with the first character converted to uppercase.
+func UcFirst(str string) string {
+	for i, v := range str {
+		return string(unicode.ToUpper(v)) + str[i+1:]
+	}
+	return ""
+}
+
+// Title returns the string with the first characters of each word converted to uppercase.
+func Title(s string) string {
+	s = strings.TrimSpace(s)
+	s = strings.ReplaceAll(s, "_", " ")
+
+	prev := ' '
+	result := strings.Map(
+		func(r rune) rune {
+			if isSeparator(prev) {
+				prev = r
+				return unicode.ToTitle(r)
+			}
+			prev = r
+			return r
+		},
+		s)
+
+	for match, abbr := range TitleReplacements {
+		result = strings.ReplaceAll(result, match, abbr)
+	}
+
+	return result
+}
+
+// TitleFromFileName returns the string with the first characters of each word converted to uppercase.
+func TitleFromFileName(s string) string {
+	s = fs.Base(s, true)
+
+	if len(s) < 3 {
+		return ""
+	}
+
+	words := FileTitleRegexp.FindAllString(s, -1)
+	var result []string
+
+	found := 0
+
+	for _, w := range words {
+		w = strings.ToLower(w)
+
+		if len(w) < 3 && found == 0 {
+			continue
+		}
+
+		if _, ok := Stopwords[w]; ok && found == 0 {
+			continue
+		}
+
+		result = append(result, w)
+
+		found++
+
+		if found >= 10 {
+			break
+		}
+	}
+
+	if found == 0 {
+		return ""
+	}
+
+	title := strings.Join(result, " ")
+
+	title = strings.ReplaceAll(title, "--", " / ")
+	title = strings.ReplaceAll(title, "-", " ")
+	title = strings.ReplaceAll(title, "  ", " ")
+
+	if len(title) < 3 {
+		return ""
+	}
+
+	return Title(title)
+}
--- a/pkg/txt/capitalization_test.go
+++ b/pkg/txt/capitalization_test.go
@ -0,0 +1,85 @@
+package txt
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestIsSeparator(t *testing.T) {
+	t.Run("rune A", func(t *testing.T) {
+		assert.Equal(t, false, isSeparator('A'))
+	})
+	t.Run("rune 99", func(t *testing.T) {
+		assert.Equal(t, false, isSeparator('9'))
+	})
+	t.Run("rune /", func(t *testing.T) {
+		assert.Equal(t, true, isSeparator('/'))
+	})
+	t.Run("rune \\", func(t *testing.T) {
+		assert.Equal(t, true, isSeparator('\\'))
+	})
+	t.Run("rune ♥ ", func(t *testing.T) {
+		assert.Equal(t, false, isSeparator('♥'))
+	})
+	t.Run("rune  space", func(t *testing.T) {
+		assert.Equal(t, true, isSeparator(' '))
+	})
+	t.Run("rune '", func(t *testing.T) {
+		assert.Equal(t, false, isSeparator('\''))
+	})
+	t.Run("rune ý", func(t *testing.T) {
+		assert.Equal(t, false, isSeparator('ý'))
+	})
+}
+
+func TestUcFirst(t *testing.T) {
+	t.Run("photo-lover", func(t *testing.T) {
+		assert.Equal(t, "Photo-lover", UcFirst("photo-lover"))
+	})
+	t.Run("cat", func(t *testing.T) {
+		assert.Equal(t, "Cat", UcFirst("Cat"))
+	})
+	t.Run("empty string", func(t *testing.T) {
+		assert.Equal(t, "", UcFirst(""))
+	})
+}
+
+func TestTitle(t *testing.T) {
+	t.Run("Browse your life in pictures", func(t *testing.T) {
+		assert.Equal(t, "Browse Your Life In Pictures", Title("Browse your life in pictures"))
+	})
+	t.Run("photo-lover", func(t *testing.T) {
+		assert.Equal(t, "Photo-Lover", Title("photo-lover"))
+	})
+}
+
+func TestTitleFromFileName(t *testing.T) {
+	t.Run("Browse your life in pictures", func(t *testing.T) {
+		assert.Equal(t, "Browse Your Life In Pictures", TitleFromFileName("Browse your life in pictures"))
+	})
+	t.Run("photo-lover", func(t *testing.T) {
+		assert.Equal(t, "Photo Lover", TitleFromFileName("photo-lover"))
+	})
+	t.Run("BRIDGE in nyc", func(t *testing.T) {
+		assert.Equal(t, "Bridge In NYC", TitleFromFileName("BRIDGE in nyc"))
+	})
+	t.Run("phil unveils iphone, ipad, imac or macbook 11 pro and max", func(t *testing.T) {
+		assert.Equal(t, "Phil Unveils iPhone iPad iMac or MacBook Pro and Max", TitleFromFileName("phil unveils iphone, ipad, imac or macbook 11 pro and max"))
+	})
+	t.Run("IMG_4568", func(t *testing.T) {
+		assert.Equal(t, "", TitleFromFileName("IMG_4568"))
+	})
+	t.Run("queen-city-yacht-club--toronto-island_7999432607_o.jpg", func(t *testing.T) {
+		assert.Equal(t, "Queen City Yacht Club / Toronto Island", TitleFromFileName("queen-city-yacht-club--toronto-island_7999432607_o.jpg"))
+	})
+	t.Run("tim-robbins--tiff-2012_7999233420_o.jpg", func(t *testing.T) {
+		assert.Equal(t, "Tim Robbins / TIFF", TitleFromFileName("tim-robbins--tiff-2012_7999233420_o.jpg"))
+	})
+	t.Run("20200102-204030-Berlin-Germany-2020-3h4.jpg", func(t *testing.T) {
+		assert.Equal(t, "Berlin Germany", TitleFromFileName("20200102-204030-Berlin-Germany-2020-3h4.jpg"))
+	})
+	t.Run("changing-of-the-guard--buckingham-palace_7925318070_o.jpg", func(t *testing.T) {
+		assert.Equal(t, "Changing of the Guard / Buckingham Palace", TitleFromFileName("changing-of-the-guard--buckingham-palace_7925318070_o.jpg"))
+	})
+}
--- a/pkg/txt/replacements.go
+++ b/pkg/txt/replacements.go
@ -1,48 +0,0 @@
-package txt
-
-var TitleReplacements = map[string]string{
-	"Nyc":     "NYC",
-	"Ny ":     "NY ",
-	"Uae":     "UAE",
-	"Usa":     "USA",
-	"Amd ":    "AMD ",
-	"Tiff":    "TIFF",
-	"Ibm":     "IBM",
-	"Usd":     "USD",
-	"Gbp":     "GBP",
-	"Chf":     "CHF",
-	"Ceo":     "CEO",
-	"Cto":     "CTO",
-	"Cfo":     "CFO",
-	"Cia ":    "CIA ",
-	"Fbi":     "FBI",
-	"Bnd":     "BND",
-	"Fsb":     "FSB",
-	"Nsa":     "NSA",
-	"Lax ":    "LAX ",
-	"Sfx":     "SFX",
-	"Ber ":    "BER ",
-	"Sfo":     "SFO",
-	"Lh ":     "LH ",
-	"Lhr":     "LHR",
-	"Afl ":    "AFL ",
-	"Nrl":     "NRL",
-	"Nsw":     "NSW",
-	"Qld":     "QLD",
-	"Vic ":    "VIC ",
-	"Iphone":  "iPhone",
-	"Imac":    "iMac",
-	"Ipad":    "iPad",
-	"Macbook": "MacBook",
-	" And ":   " and ",
-	" Or ":    " or ",
-	" A ":     " a ",
-	" An ":    " an ",
-	" To ":    " to ",
-	" At ":    " at ",
-	" By ":    " by ",
-	" But ":   " but ",
-	" For ":   " for ",
-	" Of ":    " of ",
-	" The ":   " the ",
-}
--- a/pkg/txt/strings.go
+++ b/pkg/txt/strings.go
@ -3,124 +3,15 @@ package txt
 import (
 	"regexp"
 	"strings"
-	"unicode"
-
-	"github.com/photoprism/photoprism/pkg/fs"
 )

 var ContainsNumberRegexp = regexp.MustCompile("\\d+")
-var FileTitleRegexp = regexp.MustCompile("[\\p{L}\\-]{2,}")

 // ContainsNumber returns true if string contains a number.
 func ContainsNumber(s string) bool {
 	return ContainsNumberRegexp.MatchString(s)
 }

-// isSeparator reports whether the rune could mark a word boundary.
-func isSeparator(r rune) bool {
-	// ASCII alphanumerics and underscore are not separators
-	if r <= 0x7F {
-		switch {
-		case '0' <= r && r <= '9':
-			return false
-		case 'a' <= r && r <= 'z':
-			return false
-		case 'A' <= r && r <= 'Z':
-			return false
-		case r == '_', r == '\'':
-			return false
-		}
-		return true
-	}
-	// Letters and digits are not separators
-	if unicode.IsLetter(r) || unicode.IsDigit(r) {
-		return false
-	}
-	// Otherwise, all we can do for now is treat spaces as separators.
-	return unicode.IsSpace(r)
-}
-
-// UcFirst returns the string with the first character converted to uppercase.
-func UcFirst(str string) string {
-	for i, v := range str {
-		return string(unicode.ToUpper(v)) + str[i+1:]
-	}
-	return ""
-}
-
-// Title returns the string with the first characters of each word converted to uppercase.
-func Title(s string) string {
-	s = strings.TrimSpace(s)
-	s = strings.ReplaceAll(s, "_", " ")
-
-	prev := ' '
-	result := strings.Map(
-		func(r rune) rune {
-			if isSeparator(prev) {
-				prev = r
-				return unicode.ToTitle(r)
-			}
-			prev = r
-			return r
-		},
-		s)
-
-	for match, abbr := range TitleReplacements {
-		result = strings.ReplaceAll(result, match, abbr)
-	}
-
-	return result
-}
-
-// TitleFromFileName returns the string with the first characters of each word converted to uppercase.
-func TitleFromFileName(s string) string {
-	s = fs.Base(s, true)
-
-	if len(s) < 3 {
-		return ""
-	}
-
-	words := FileTitleRegexp.FindAllString(s, -1)
-	var result []string
-
-	found := 0
-
-	for _, w := range words {
-		w = strings.ToLower(w)
-
-		if len(w) < 3 && found == 0 {
-			continue
-		}
-
-		if _, ok := Stopwords[w]; ok && found == 0 {
-			continue
-		}
-
-		result = append(result, w)
-
-		found++
-
-		if found >= 10 {
-			break
-		}
-	}
-
-	if found == 0 {
-		return ""
-	}
-
-	title := strings.Join(result, " ")
-
-	title = strings.ReplaceAll(title, "--", " / ")
-	title = strings.ReplaceAll(title, "-", " ")
-	title = strings.ReplaceAll(title, "  ", " ")
-
-	if len(title) < 3 {
-		return ""
-	}
-
-	return Title(title)
-}

 // Bool casts a string to bool.
 func Bool(s string) bool {
--- a/pkg/txt/strings_test.go
+++ b/pkg/txt/strings_test.go
@ -15,84 +15,6 @@ func TestContainsNumber(t *testing.T) {
 	})
 }

-func TestIsSeparator(t *testing.T) {
-	t.Run("rune A", func(t *testing.T) {
-		assert.Equal(t, false, isSeparator('A'))
-	})
-	t.Run("rune 99", func(t *testing.T) {
-		assert.Equal(t, false, isSeparator('9'))
-	})
-	t.Run("rune /", func(t *testing.T) {
-		assert.Equal(t, true, isSeparator('/'))
-	})
-	t.Run("rune \\", func(t *testing.T) {
-		assert.Equal(t, true, isSeparator('\\'))
-	})
-	t.Run("rune ♥ ", func(t *testing.T) {
-		assert.Equal(t, false, isSeparator('♥'))
-	})
-	t.Run("rune  space", func(t *testing.T) {
-		assert.Equal(t, true, isSeparator(' '))
-	})
-	t.Run("rune '", func(t *testing.T) {
-		assert.Equal(t, false, isSeparator('\''))
-	})
-	t.Run("rune ý", func(t *testing.T) {
-		assert.Equal(t, false, isSeparator('ý'))
-	})
-}
-
-func TestUcFirst(t *testing.T) {
-	t.Run("photo-lover", func(t *testing.T) {
-		assert.Equal(t, "Photo-lover", UcFirst("photo-lover"))
-	})
-	t.Run("cat", func(t *testing.T) {
-		assert.Equal(t, "Cat", UcFirst("Cat"))
-	})
-	t.Run("empty string", func(t *testing.T) {
-		assert.Equal(t, "", UcFirst(""))
-	})
-}
-
-func TestTitle(t *testing.T) {
-	t.Run("Browse your life in pictures", func(t *testing.T) {
-		assert.Equal(t, "Browse Your Life In Pictures", Title("Browse your life in pictures"))
-	})
-	t.Run("photo-lover", func(t *testing.T) {
-		assert.Equal(t, "Photo-Lover", Title("photo-lover"))
-	})
-}
-
-func TestTitleFromFileName(t *testing.T) {
-	t.Run("Browse your life in pictures", func(t *testing.T) {
-		assert.Equal(t, "Browse Your Life In Pictures", TitleFromFileName("Browse your life in pictures"))
-	})
-	t.Run("photo-lover", func(t *testing.T) {
-		assert.Equal(t, "Photo Lover", TitleFromFileName("photo-lover"))
-	})
-	t.Run("BRIDGE in nyc", func(t *testing.T) {
-		assert.Equal(t, "Bridge In NYC", TitleFromFileName("BRIDGE in nyc"))
-	})
-	t.Run("phil unveils iphone, ipad, imac or macbook 11 pro and max", func(t *testing.T) {
-		assert.Equal(t, "Phil Unveils iPhone iPad iMac or MacBook Pro and Max", TitleFromFileName("phil unveils iphone, ipad, imac or macbook 11 pro and max"))
-	})
-	t.Run("IMG_4568", func(t *testing.T) {
-		assert.Equal(t, "", TitleFromFileName("IMG_4568"))
-	})
-	t.Run("queen-city-yacht-club--toronto-island_7999432607_o.jpg", func(t *testing.T) {
-		assert.Equal(t, "Queen City Yacht Club / Toronto Island", TitleFromFileName("queen-city-yacht-club--toronto-island_7999432607_o.jpg"))
-	})
-	t.Run("tim-robbins--tiff-2012_7999233420_o.jpg", func(t *testing.T) {
-		assert.Equal(t, "Tim Robbins / TIFF", TitleFromFileName("tim-robbins--tiff-2012_7999233420_o.jpg"))
-	})
-	t.Run("20200102-204030-Berlin-Germany-2020-3h4.jpg", func(t *testing.T) {
-		assert.Equal(t, "Berlin Germany", TitleFromFileName("20200102-204030-Berlin-Germany-2020-3h4.jpg"))
-	})
-	t.Run("changing-of-the-guard--buckingham-palace_7925318070_o.jpg", func(t *testing.T) {
-		assert.Equal(t, "Changing of the Guard / Buckingham Palace", TitleFromFileName("changing-of-the-guard--buckingham-palace_7925318070_o.jpg"))
-	})
-}
-
 func TestBool(t *testing.T) {
 	t.Run("not empty", func(t *testing.T) {
 		assert.Equal(t, true, Bool("Browse your life in pictures"))