Metadata: Merge existing keywords #1153

This commit is contained in:
Michael Mayer 2021-04-25 14:17:34 +02:00
parent de6f61ecc7
commit 27c106863d
13 changed files with 121 additions and 71 deletions

View file

@ -20,7 +20,7 @@ type Data struct {
Codec string `meta:"CompressorID,Compression,FileType"`
Title string `meta:"Title"`
Subject string `meta:"Subject,PersonInImage,ObjectName,HierarchicalSubject,CatalogSets"`
Keywords string `meta:"Keywords"`
Keywords Keywords `meta:"Keywords"`
Notes string `meta:"-"`
Artist string `meta:"Artist,Creator,OwnerName"`
Description string `meta:"Description"`

View file

@ -270,7 +270,7 @@ func (data *Data) Exif(fileName string, fileType fs.FileFormat) (err error) {
if value, ok := tags["Flash"]; ok {
if i, err := strconv.Atoi(value); err == nil && i&1 == 1 {
data.AddKeyword(KeywordFlash)
data.AddKeywords(KeywordFlash)
data.Flash = true
}
}
@ -281,11 +281,10 @@ func (data *Data) Exif(fileName string, fileType fs.FileFormat) (err error) {
}
if value, ok := tags["ProjectionType"]; ok {
data.AddKeyword(KeywordPanorama)
data.AddKeywords(KeywordPanorama)
data.Projection = SanitizeString(value)
}
data.Keywords = SanitizeMeta(data.Keywords)
data.Subject = SanitizeMeta(data.Subject)
data.Artist = SanitizeMeta(data.Artist)

View file

@ -51,8 +51,8 @@ func TestExif(t *testing.T) {
assert.Equal(t, "Photographer: TMB", data.Artist)
assert.Equal(t, "2011-07-10T17:34:28Z", data.TakenAt.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "2011-07-10T19:34:28Z", data.TakenAtLocal.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "", data.Title) // Should be "Ladybug"
assert.Equal(t, "", data.Keywords) // Should be "Ladybug"
assert.Equal(t, "", data.Title) // Should be "Ladybug"
assert.Equal(t, "", data.Keywords.String()) // Should be "Ladybug"
assert.Equal(t, "", data.Description)
assert.Equal(t, "", data.Copyright)
assert.Equal(t, 540, data.Height)
@ -84,7 +84,7 @@ func TestExif(t *testing.T) {
assert.Equal(t, "2017-12-21T05:17:28Z", data.TakenAt.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "2017-12-21T05:17:28Z", data.TakenAtLocal.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "", data.Title)
assert.Equal(t, "", data.Keywords)
assert.Equal(t, "", data.Keywords.String())
assert.Equal(t, "", data.Description)
assert.Equal(t, "", data.Copyright)
assert.Equal(t, 180, data.Height)
@ -305,7 +305,7 @@ func TestExif(t *testing.T) {
assert.Equal(t, "2020-05-24T08:55:21Z", data.TakenAt.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "2020-05-24T11:55:21Z", data.TakenAtLocal.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "", data.Title)
assert.Equal(t, "", data.Keywords)
assert.Equal(t, "", data.Keywords.String())
assert.Equal(t, "", data.Description)
assert.Equal(t, "", data.Copyright)
assert.Equal(t, 3600, data.Height)
@ -336,7 +336,7 @@ func TestExif(t *testing.T) {
assert.Equal(t, "0001-01-01T00:00:00Z", data.TakenAt.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "0001-01-01T00:00:00Z", data.TakenAtLocal.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "", data.Title)
assert.Equal(t, "", data.Keywords)
assert.Equal(t, "", data.Keywords.String())
assert.Equal(t, "", data.Description)
assert.Equal(t, "", data.Copyright)
assert.Equal(t, 43, data.Height)
@ -367,7 +367,7 @@ func TestExif(t *testing.T) {
assert.Equal(t, "2017-04-09T18:33:44Z", data.TakenAt.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "2017-04-09T18:33:44Z", data.TakenAtLocal.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "", data.Title)
assert.Equal(t, "", data.Keywords)
assert.Equal(t, "", data.Keywords.String())
assert.Equal(t, "", data.Description)
assert.Equal(t, "", data.Copyright)
assert.Equal(t, 2448, data.Height)
@ -400,7 +400,7 @@ func TestExif(t *testing.T) {
assert.Equal(t, "2020-10-17T17:48:24Z", data.TakenAtLocal.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "Europe/Berlin", data.TimeZone)
assert.Equal(t, "", data.Title)
assert.Equal(t, "", data.Keywords)
assert.Equal(t, "", data.Keywords.String())
assert.Equal(t, "", data.Description)
assert.Equal(t, "", data.Copyright)
assert.Equal(t, 2736, data.Height)

View file

@ -64,12 +64,16 @@ func (data *Data) Exiftool(jsonData []byte, originalName string) (err error) {
}
// Skip empty values.
if !jsonValue.Exists() || !fieldValue.IsZero() {
if !jsonValue.Exists() {
continue
}
switch t := fieldValue.Interface().(type) {
case time.Time:
if !fieldValue.IsZero() {
continue
}
s := strings.TrimSpace(jsonValue.String())
s = strings.ReplaceAll(s, "/", ":")
@ -79,16 +83,46 @@ func (data *Data) Exiftool(jsonData []byte, originalName string) (err error) {
fieldValue.Set(reflect.ValueOf(tv.Round(time.Second)))
}
case time.Duration:
if !fieldValue.IsZero() {
continue
}
fieldValue.Set(reflect.ValueOf(StringToDuration(jsonValue.String())))
case int, int64:
if !fieldValue.IsZero() {
continue
}
fieldValue.SetInt(jsonValue.Int())
case float32, float64:
if !fieldValue.IsZero() {
continue
}
fieldValue.SetFloat(jsonValue.Float())
case uint, uint64:
if !fieldValue.IsZero() {
continue
}
fieldValue.SetUint(jsonValue.Uint())
case []string:
existing := fieldValue.Interface().([]string)
fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, strings.TrimSpace(jsonValue.String()))))
case Keywords:
existing := fieldValue.Interface().(Keywords)
fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, strings.TrimSpace(jsonValue.String()))))
case string:
if !fieldValue.IsZero() {
continue
}
fieldValue.SetString(strings.TrimSpace(jsonValue.String()))
case bool:
if !fieldValue.IsZero() {
continue
}
fieldValue.SetBool(jsonValue.Bool())
default:
log.Warnf("metadata: can't assign value of type %s to %s (exiftool)", t, tagValue)
@ -190,12 +224,11 @@ func (data *Data) Exiftool(jsonData []byte, originalName string) (err error) {
}
if data.Projection == "equirectangular" {
data.AddKeyword(KeywordPanorama)
data.AddKeywords(KeywordPanorama)
}
data.Title = SanitizeTitle(data.Title)
data.Description = SanitizeDescription(data.Description)
data.Keywords = SanitizeMeta(data.Keywords)
data.Subject = SanitizeMeta(data.Subject)
data.Artist = SanitizeMeta(data.Artist)

View file

@ -446,7 +446,7 @@ func TestJSON(t *testing.T) {
assert.Equal(t, "2020-05-24T08:55:21Z", data.TakenAt.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "2020-05-24T11:55:21Z", data.TakenAtLocal.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "", data.Title)
assert.Equal(t, "panorama", data.Keywords)
assert.Equal(t, "panorama", data.Keywords.String())
assert.Equal(t, "", data.Description)
assert.Equal(t, "", data.Copyright)
assert.Equal(t, 3600, data.Height)
@ -477,7 +477,7 @@ func TestJSON(t *testing.T) {
assert.Equal(t, "2018-07-25T11:18:42Z", data.TakenAt.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "2018-07-25T11:18:42Z", data.TakenAtLocal.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "", data.Title)
assert.Equal(t, "", data.Keywords)
assert.Equal(t, "", data.Keywords.String())
assert.Equal(t, "", data.Description)
assert.Equal(t, "", data.Copyright)
assert.Equal(t, 1080, data.Height)
@ -535,7 +535,7 @@ func TestJSON(t *testing.T) {
assert.Equal(t, "2020-10-17T17:48:24Z", data.TakenAtLocal.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "Europe/Berlin", data.TimeZone)
assert.Equal(t, "", data.Title)
assert.Equal(t, "Berlin, Shop", data.Keywords)
assert.Equal(t, "berlin, shop", data.Keywords.String())
assert.Equal(t, "", data.Description)
assert.Equal(t, "", data.Copyright)
assert.Equal(t, 375, data.Height)
@ -693,7 +693,7 @@ func TestJSON(t *testing.T) {
assert.Equal(t, "iPhone 6s", data.CameraModel)
assert.Equal(t, "iPhone 6s back camera 4.15mm f/2.2", data.LensModel)
assert.Equal(t, "holiday", data.Subject)
assert.Equal(t, "holiday", data.Keywords)
assert.Equal(t, "holiday", data.Keywords.String())
})
t.Run("subject-2.json", func(t *testing.T) {
@ -719,7 +719,7 @@ func TestJSON(t *testing.T) {
assert.Equal(t, "iPhone 6s", data.CameraModel)
assert.Equal(t, "iPhone 6s back camera 4.15mm f/2.2", data.LensModel)
assert.Equal(t, "holiday, greetings", data.Subject)
assert.Equal(t, "holiday, greetings", data.Keywords)
assert.Equal(t, "greetings, holiday", data.Keywords.String())
})
t.Run("newline.json", func(t *testing.T) {
@ -735,7 +735,7 @@ func TestJSON(t *testing.T) {
assert.Equal(t, "0001-01-01T00:00:00Z", data.TakenAt.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "0001-01-01T00:00:00Z", data.TakenAtLocal.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "This is the title", data.Title)
assert.Equal(t, "", data.Keywords)
assert.Equal(t, "", data.Keywords.String())
assert.Equal(t, "This is a\n\ndescription!", data.Description)
assert.Equal(t, "This is the world.", data.Subject)
assert.Equal(t, "© 2011 PhotoPrism", data.Copyright)

View file

@ -1,6 +1,9 @@
package meta
import "strings"
import (
"github.com/photoprism/photoprism/pkg/txt"
"strings"
)
const (
KeywordFlash = "flash"
@ -10,36 +13,38 @@ const (
KeywordEquirectangular = "equirectangular"
)
// Keywords represents a list of metadata keywords.
type Keywords []string
// String returns a string containing all keywords.
func (w Keywords) String() string {
return strings.Join(w, ", ")
}
var AutoKeywords = []string{KeywordHdr, KeywordBurst, KeywordPanorama, KeywordEquirectangular}
// AddKeyword appends a keyword if not exists.
func (data *Data) AddKeyword(w string) {
w = strings.ToLower(SanitizeString(w))
// AddKeywords appends keywords.
func (data *Data) AddKeywords(w string) {
w = strings.ToLower(SanitizeMeta(w))
if len(w) < 3 {
if len(w) < 1 {
return
}
if !strings.Contains(data.Keywords, w) {
if data.Keywords == "" {
data.Keywords = w
} else {
data.Keywords += ", " + w
}
}
data.Keywords = txt.AddToWords(data.Keywords, w)
}
// AutoAddKeywords automatically adds relevant keywords from a string (e.g. description).
// AutoAddKeywords automatically appends relevant keywords from a string (e.g. description).
func (data *Data) AutoAddKeywords(s string) {
s = strings.ToLower(SanitizeString(s))
s = strings.ToLower(SanitizeMeta(s))
if len(s) < 3 {
if len(s) < 1 {
return
}
for _, w := range AutoKeywords {
if strings.Contains(s, w) {
data.AddKeyword(w)
data.AddKeywords(w)
}
}
}

View file

@ -6,29 +6,29 @@ import (
"github.com/stretchr/testify/assert"
)
func TestData_AddKeyword(t *testing.T) {
func TestData_AddKeywords(t *testing.T) {
t.Run("success", func(t *testing.T) {
data := NewData()
assert.Equal(t, "", data.Keywords)
assert.Equal(t, "", data.Keywords.String())
data.AddKeyword("FooBar")
data.AddKeywords("FooBar")
assert.Equal(t, "foobar", data.Keywords)
assert.Equal(t, "foobar", data.Keywords.String())
data.AddKeyword("BAZ")
data.AddKeywords("BAZ; pro")
assert.Equal(t, "foobar, baz", data.Keywords)
assert.Equal(t, "foobar, pro", data.Keywords.String())
})
t.Run("ignore", func(t *testing.T) {
data := NewData()
assert.Equal(t, "", data.Keywords)
assert.Equal(t, "", data.Keywords.String())
data.AddKeyword("Fo")
data.AddKeywords("Fo")
assert.Equal(t, "", data.Keywords)
assert.Equal(t, "", data.Keywords.String())
})
}
@ -36,30 +36,30 @@ func TestData_AutoAddKeywords(t *testing.T) {
t.Run("success", func(t *testing.T) {
data := NewData()
assert.Equal(t, "", data.Keywords)
assert.Equal(t, "", data.Keywords.String())
data.AutoAddKeywords("FooBar burst baz flash")
assert.Equal(t, "burst", data.Keywords)
assert.Equal(t, "burst", data.Keywords.String())
})
t.Run("ignore", func(t *testing.T) {
data := NewData()
assert.Equal(t, "", data.Keywords)
assert.Equal(t, "", data.Keywords.String())
data.AutoAddKeywords("FooBar go pro baz banana")
assert.Equal(t, "", data.Keywords)
assert.Equal(t, "", data.Keywords.String())
})
t.Run("ignore because too short", func(t *testing.T) {
data := NewData()
assert.Equal(t, "", data.Keywords)
assert.Equal(t, "", data.Keywords.String())
data.AutoAddKeywords("es")
assert.Equal(t, "", data.Keywords)
assert.Equal(t, "", data.Keywords.String())
})
}

View file

@ -338,7 +338,7 @@ func (ind *Index) MediaFile(m *MediaFile, o IndexOptions, originalName string) (
photo.SetCoordinates(metaData.Lat, metaData.Lng, metaData.Altitude, entity.SrcXmp)
// Update metadata details.
details.SetKeywords(metaData.Keywords, entity.SrcXmp)
details.SetKeywords(metaData.Keywords.String(), entity.SrcXmp)
details.SetNotes(metaData.Notes, entity.SrcXmp)
details.SetSubject(metaData.Subject, entity.SrcXmp)
details.SetArtist(metaData.Artist, entity.SrcXmp)
@ -356,7 +356,7 @@ func (ind *Index) MediaFile(m *MediaFile, o IndexOptions, originalName string) (
photo.SetCameraSerial(metaData.CameraSerial)
// Update metadata details.
details.SetKeywords(metaData.Keywords, entity.SrcMeta)
details.SetKeywords(metaData.Keywords.String(), entity.SrcMeta)
details.SetNotes(metaData.Notes, entity.SrcMeta)
details.SetSubject(metaData.Subject, entity.SrcMeta)
details.SetArtist(metaData.Artist, entity.SrcMeta)
@ -405,7 +405,7 @@ func (ind *Index) MediaFile(m *MediaFile, o IndexOptions, originalName string) (
photo.SetCameraSerial(metaData.CameraSerial)
// Update metadata details.
details.SetKeywords(metaData.Keywords, entity.SrcMeta)
details.SetKeywords(metaData.Keywords.String(), entity.SrcMeta)
details.SetNotes(metaData.Notes, entity.SrcMeta)
details.SetSubject(metaData.Subject, entity.SrcMeta)
details.SetArtist(metaData.Artist, entity.SrcMeta)
@ -504,7 +504,7 @@ func (ind *Index) MediaFile(m *MediaFile, o IndexOptions, originalName string) (
photo.SetCameraSerial(metaData.CameraSerial)
// Update metadata details.
details.SetKeywords(metaData.Keywords, entity.SrcMeta)
details.SetKeywords(metaData.Keywords.String(), entity.SrcMeta)
details.SetNotes(metaData.Notes, entity.SrcMeta)
details.SetSubject(metaData.Subject, entity.SrcMeta)
details.SetArtist(metaData.Artist, entity.SrcMeta)

View file

@ -14,8 +14,7 @@ func TestIndex_MediaFile(t *testing.T) {
t.Skip("skipping test in short mode.")
}
//TODO This test MUST run if https://github.com/photoprism/photoprism/issues/1153 is fixed
/*t.Run("/exifWithFlashDescriptionProjectionKeywordsSmall", func(t *testing.T) {
t.Run("flash.jpg", func(t *testing.T) {
conf := config.TestConfig()
conf.InitializeTestData(t)
@ -26,24 +25,27 @@ func TestIndex_MediaFile(t *testing.T) {
ind := NewIndex(conf, tf, nd, convert, NewFiles(), NewPhotos())
indexOpt := IndexOptionsAll()
mediaFile, err := NewMediaFile("../meta/testdata/exifWithFlashDescriptionProjectionKeywordsSmall.jpg")
mediaFile, err := NewMediaFile("testdata/flash.jpg")
if err != nil {
t.Fatal(err)
}
assert.Equal(t, "", mediaFile.metaData.Keywords)
result := ind.MediaFile(mediaFile, indexOpt, "exifWithFlashDescriptionProjectionKeywordsSmall.jpg")
assert.Equal(t, "", mediaFile.metaData.Keywords.String())
assert.Contains(t, mediaFile.metaData.Keywords, "Marienkäfer")
assert.Contains(t, mediaFile.metaData.Keywords, "burst")
assert.Contains(t, mediaFile.metaData.Keywords, "flash")
assert.Contains(t, mediaFile.metaData.Keywords, "panorama")
result := ind.MediaFile(mediaFile, indexOpt, "flash.jpg")
words := mediaFile.metaData.Keywords.String()
assert.Contains(t, words, "marienkäfer")
assert.Contains(t, words, "burst")
assert.Contains(t, words, "flash")
assert.Contains(t, words, "panorama")
assert.Equal(t, "Animal with green eyes on table burst", mediaFile.metaData.Description)
assert.Equal(t, IndexStatus("added"), result.Status)
})*/
})
t.Run("/blue-go-video.mp4", func(t *testing.T) {
t.Run("blue-go-video.mp4", func(t *testing.T) {
conf := config.TestConfig()
conf.InitializeTestData(t)

View file

@ -215,7 +215,7 @@ func TestMediaFile_Exif_JPEG(t *testing.T) {
assert.Equal(t, "2020-05-24T08:55:21Z", data.TakenAt.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "2020-05-24T11:55:21Z", data.TakenAtLocal.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "", data.Title)
assert.Equal(t, "panorama", data.Keywords)
assert.Equal(t, "panorama", data.Keywords.String())
assert.Equal(t, "", data.Description)
assert.Equal(t, "", data.Copyright)
assert.Equal(t, 3600, data.Height)
@ -252,7 +252,7 @@ func TestMediaFile_Exif_JPEG(t *testing.T) {
assert.Equal(t, "2020-10-17T17:48:24Z", data.TakenAtLocal.Format("2006-01-02T15:04:05Z"))
assert.Equal(t, "Europe/Berlin", data.TimeZone)
assert.Equal(t, "", data.Title)
assert.Equal(t, "Berlin, Shop", data.Keywords)
assert.Equal(t, "berlin, shop", data.Keywords.String())
assert.Equal(t, "", data.Description)
assert.Equal(t, "", data.Copyright)
assert.Equal(t, 2736, data.Height)

View file

Before

Width:  |  Height:  |  Size: 151 KiB

After

Width:  |  Height:  |  Size: 151 KiB

View file

@ -1,7 +1,7 @@
[{
"SourceFile": "exifWithFlashDescriptionProjectionKeywordsSmall.jpg",
"SourceFile": "flash.jpg",
"ExifToolVersion": 11.88,
"FileName": "exifWithFlashDescriptionProjectionKeywordsSmall.jpg",
"FileName": "flash.jpg",
"Directory": ".",
"FileSize": "151 kB",
"FileModifyDate": "2021:03:25 11:13:55+01:00",

View file

@ -153,6 +153,17 @@ func RemoveFromWords(words []string, remove string) (results []string) {
return results
}
// AddToWords add words to a string slice and returns the sorted result.
func AddToWords(existing []string, words string) []string {
w := Keywords(words)
if len(w) < 1 {
return existing
}
return UniqueWords(append(existing, w...))
}
// UniqueKeywords returns a slice of unique and sorted keywords without stopwords.
func UniqueKeywords(s string) (results []string) {
if s == "" {
@ -180,7 +191,7 @@ func UniqueKeywords(s string) (results []string) {
return results
}
// Sorts string slice case insensitive.
// SortCaseInsensitive performs a case-insensitive slice sort.
func SortCaseInsensitive(words []string) {
sort.Slice(words, func(i, j int) bool { return strings.ToLower(words[i]) < strings.ToLower(words[j]) })
}