Metadata: Improve data parsing and sanitization #2897

Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
Michael Mayer 2022-11-17 08:28:30 +01:00
parent ce66a77c38
commit 18f1d5a6f8
5 changed files with 43 additions and 23 deletions

View file

@ -16,6 +16,11 @@ func StringToDuration(s string) (d time.Duration) {
}
s = strings.TrimSpace(s)
if s == "" {
return d
}
sec := DurationSecondsRegexp.FindAllString(s, -1)
if len(sec) == 1 {

View file

@ -80,9 +80,11 @@ func (data *Data) Exif(fileName string, fileFormat fs.Type, bruteForce bool) (er
// Ignore IFD1 data.exif with existing IFD0 values.
// see https://github.com/photoprism/photoprism/issues/2231
for _, tag := range entries {
s := strings.Split(tag.FormattedFirst, "\x00")[0]
if tag.TagName != "" && s != "" && (data.exif[tag.TagName] == "" || tag.IfdPath != exif.ThumbnailFqIfdPath) {
data.exif[tag.TagName] = s
s := strings.Split(tag.FormattedFirst, "\x00")
if tag.TagName == "" || len(s) == 0 {
// Do nothing.
} else if s[0] != "" && (data.exif[tag.TagName] == "" || tag.IfdPath != exif.ThumbnailFqIfdPath) {
data.exif[tag.TagName] = s[0]
}
}
@ -96,13 +98,13 @@ func (data *Data) Exif(fileName string, fileFormat fs.Type, bruteForce bool) (er
// Find and parse GPS coordinates.
if err != nil {
log.Debugf("metadata: %s in %s (exif)", err, logName)
log.Debugf("metadata: %s in %s (exif collect)", err, logName)
} else {
var ifd *exif.Ifd
if ifd, err = ifdIndex.RootIfd.ChildWithIfdPath(exifcommon.IfdGpsInfoStandardIfdIdentity); err == nil {
var gi *exif.GpsInfo
if gi, err = ifd.GpsInfo(); err != nil {
log.Infof("metadata: %s in %s (exif)", err, logName)
log.Debugf("metadata: %s in %s (exif gps-info)", err, logName)
} else {
if !math.IsNaN(gi.Latitude.Decimal()) && !math.IsNaN(gi.Longitude.Decimal()) {
data.Lat = float32(gi.Latitude.Decimal())
@ -201,8 +203,8 @@ func (data *Data) Exif(fileName string, fileFormat fs.Type, bruteForce bool) (er
if i, err := strconv.Atoi(value); err == nil {
data.FocalLength = i
}
} else if value, ok := data.exif["FocalLength"]; ok {
values := strings.Split(value, "/")
} else if v, ok := data.exif["FocalLength"]; ok {
values := strings.Split(v, "/")
if len(values) == 2 && values[1] != "0" && values[1] != "" {
number, _ := strconv.ParseFloat(values[0], 64)

View file

@ -46,7 +46,7 @@ func (data *Data) Exiftool(jsonData []byte, originalName string) (err error) {
jsonValues := j.Map()
for key, val := range jsonValues {
data.json[key] = SanitizeUnicode(val.String())
data.json[key] = val.String()
}
if fileName, ok := data.json["FileName"]; ok && fileName != "" && originalName != "" && fileName != originalName {

View file

@ -78,19 +78,23 @@ func SanitizeString(s string) string {
}
// SanitizeUID normalizes unique IDs found in XMP or Exif metadata.
func SanitizeUID(value string) string {
value = SanitizeString(value)
func SanitizeUID(s string) string {
s = SanitizeString(s)
if start := strings.LastIndex(value, ":"); start != -1 {
value = value[start+1:]
if len(s) < 15 {
return ""
}
if start := strings.LastIndex(s, ":"); start != -1 {
s = s[start+1:]
}
// Not a unique ID?
if len(value) < 15 || len(value) > 36 {
value = ""
if len(s) < 15 || len(s) > 36 {
s = ""
}
return strings.ToLower(value)
return strings.ToLower(s)
}
// SanitizeTitle normalizes titles and removes unwanted information.
@ -122,22 +126,25 @@ func SanitizeTitle(title string) string {
func SanitizeDescription(s string) string {
s = SanitizeString(s)
if s == "" {
switch {
case s == "":
return ""
} else if remove := UnwantedDescriptions[s]; remove {
s = ""
} else if strings.HasPrefix(s, "DCIM\\") && !strings.Contains(s, " ") {
s = ""
case UnwantedDescriptions[s]:
return ""
case strings.HasPrefix(s, "DCIM\\") && !strings.Contains(s, " "):
return ""
default:
return s
}
return s
}
// SanitizeMeta normalizes metadata fields that may contain JSON arrays like keywords and subject.
func SanitizeMeta(s string) string {
if s == "" {
return ""
} else if strings.HasPrefix(s, "[") && strings.HasSuffix(s, "]") {
}
if strings.HasPrefix(s, "[") && strings.HasSuffix(s, "]") {
var words []string
if err := json.Unmarshal([]byte(s), &words); err != nil {

View file

@ -10,6 +10,12 @@ func TestUnicode(t *testing.T) {
t.Run("Valid", func(t *testing.T) {
assert.Equal(t, "Naïve bonds and futures surge as inflation eases 🚀🚀🚀", Unicode("Naïve bonds and futures surge as inflation eases 🚀🚀🚀"))
})
t.Run("Null", func(t *testing.T) {
assert.Equal(t, "\x00", Unicode("\x00"))
})
t.Run("FFFD", func(t *testing.T) {
assert.Equal(t, "", Unicode("\uFFFD"))
})
t.Run("Empty", func(t *testing.T) {
assert.Equal(t, "", Unicode(""))
})