Exiftool: Improve metadata extraction and support FocalDistance #2513

Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
Michael Mayer 2022-07-22 11:46:53 +02:00
parent 989f5c7917
commit d2086d5622
6 changed files with 293 additions and 78 deletions

View file

@ -14,56 +14,58 @@ const (
// Data represents image metadata.
type Data struct {
FileName string `meta:"FileName"`
DocumentID string `meta:"BurstUUID,MediaGroupUUID,ImageUniqueID,OriginalDocumentID,DocumentID,DigitalImageGUID"`
InstanceID string `meta:"InstanceID,DocumentID"`
TakenAt time.Time `meta:"SubSecDateTimeOriginal,SubSecDateTimeCreated,SubSecCreateDate,DateTimeOriginal,CreationDate,CreateDate,MediaCreateDate,ContentCreateDate,DateTimeCreated,DateTimeDigitized,DateTime" xmp:"DateCreated"`
TakenAtLocal time.Time `meta:"SubSecDateTimeOriginal,SubSecDateTimeCreated,SubSecCreateDate,DateTimeOriginal,CreationDate,CreateDate,MediaCreateDate,ContentCreateDate,DateTimeCreated,DateTimeDigitized,DateTime"`
TakenGps time.Time `meta:"GPSDateTime,GPSDateStamp"`
TakenNs int `meta:"-"`
TimeZone string `meta:"-"`
Duration time.Duration `meta:"Duration,MediaDuration,TrackDuration"`
FPS float64 `meta:"VideoFrameRate,VideoAvgFrameRate"`
Frames int `meta:"FrameCount"`
Codec string `meta:"CompressorID,VideoCodecID,CodecID,FileType"`
Title string `meta:"Headline,Title" xmp:"dc:title" dc:"title,title.Alt"`
Subject string `meta:"Subject,PersonInImage,ObjectName,HierarchicalSubject,CatalogSets" xmp:"Subject"`
Keywords Keywords `meta:"Keywords"`
Notes string `meta:"Comment"`
Artist string `meta:"Artist,Creator,By-line,OwnerName,Owner" xmp:"Creator"`
Description string `meta:"Description,Caption-Abstract" xmp:"Description,Description.Alt"`
Copyright string `meta:"Rights,Copyright,CopyrightNotice,WebStatement" xmp:"Rights,Rights.Alt"`
License string `meta:"UsageTerms,License"`
Projection string `meta:"ProjectionType"`
ColorProfile string `meta:"ICCProfileName,ProfileDescription"`
CameraMake string `meta:"CameraMake,Make" xmp:"Make"`
CameraModel string `meta:"CameraModel,Model" xmp:"Model"`
CameraOwner string `meta:"OwnerName"`
CameraSerial string `meta:"SerialNumber"`
LensMake string `meta:"LensMake"`
LensModel string `meta:"Lens,LensModel" xmp:"LensModel"`
Software string `meta:"Software,HistorySoftwareAgent,ProcessingSoftware"`
Flash bool `meta:"FlashFired"`
FocalLength int `meta:"FocalLength"`
Exposure string `meta:"ExposureTime,ShutterSpeedValue,ShutterSpeed,TargetExposureTime"`
Aperture float32 `meta:"ApertureValue,Aperture"`
FNumber float32 `meta:"FNumber"`
Iso int `meta:"ISO"`
ImageType int `meta:"HDRImageType"`
GPSPosition string `meta:"GPSPosition"`
GPSLatitude string `meta:"GPSLatitude"`
GPSLongitude string `meta:"GPSLongitude"`
Lat float32 `meta:"-"`
Lng float32 `meta:"-"`
Altitude int `meta:"GlobalAltitude,GPSAltitude"`
Width int `meta:"ImageWidth,PixelXDimension,ExifImageWidth,SourceImageWidth"`
Height int `meta:"ImageHeight,ImageLength,PixelYDimension,ExifImageHeight,SourceImageHeight"`
Orientation int `meta:"-"`
Rotation int `meta:"Rotation"`
Views int `meta:"-"`
Albums []string `meta:"-"`
Error error `meta:"-"`
exif map[string]string
FileName string `meta:"FileName"`
DocumentID string `meta:"BurstUUID,MediaGroupUUID,ImageUniqueID,OriginalDocumentID,DocumentID,DigitalImageGUID"`
InstanceID string `meta:"InstanceID,DocumentID"`
TakenAt time.Time `meta:"SubSecDateTimeOriginal,SubSecDateTimeCreated,SubSecCreateDate,DateTimeOriginal,CreationDate,CreateDate,MediaCreateDate,ContentCreateDate,DateTimeCreated,DateTime,DateTimeDigitized" xmp:"DateCreated"`
TakenAtLocal time.Time `meta:"SubSecDateTimeOriginal,SubSecDateTimeCreated,SubSecCreateDate,DateTimeOriginal,CreationDate,CreateDate,MediaCreateDate,ContentCreateDate,DateTimeCreated,DateTime,DateTimeDigitized"`
TakenGps time.Time `meta:"GPSDateTime,GPSDateStamp"`
TakenNs int `meta:"-"`
TimeZone string `meta:"-"`
Duration time.Duration `meta:"Duration,MediaDuration,TrackDuration"`
FPS float64 `meta:"VideoFrameRate,VideoAvgFrameRate"`
Frames int `meta:"FrameCount"`
Codec string `meta:"CompressorID,VideoCodecID,CodecID,FileType"`
Title string `meta:"Headline,Title" xmp:"dc:title" dc:"title,title.Alt"`
Subject string `meta:"Subject,PersonInImage,ObjectName,HierarchicalSubject,CatalogSets" xmp:"Subject"`
Keywords Keywords `meta:"Keywords"`
Notes string `meta:"Comment"`
Artist string `meta:"Artist,Creator,By-line,OwnerName,Owner" xmp:"Creator"`
Description string `meta:"Description,Caption-Abstract" xmp:"Description,Description.Alt"`
Copyright string `meta:"Rights,Copyright,CopyrightNotice,WebStatement" xmp:"Rights,Rights.Alt"`
License string `meta:"UsageTerms,License"`
Projection string `meta:"ProjectionType"`
ColorProfile string `meta:"ICCProfileName,ProfileDescription"`
CameraMake string `meta:"CameraMake,Make" xmp:"Make"`
CameraModel string `meta:"CameraModel,Model" xmp:"Model"`
CameraOwner string `meta:"OwnerName"`
CameraSerial string `meta:"SerialNumber"`
LensMake string `meta:"LensMake"`
LensModel string `meta:"Lens,LensModel" xmp:"LensModel"`
Software string `meta:"Software,HistorySoftwareAgent,ProcessingSoftware"`
Flash bool `meta:"FlashFired"`
FocalLength int `meta:"FocalLength,FocalLengthIn35mmFormat"`
FocalDistance float64 `meta:"HyperfocalDistance"`
Exposure string `meta:"ExposureTime,ShutterSpeedValue,ShutterSpeed,TargetExposureTime"`
Aperture float32 `meta:"ApertureValue,Aperture"`
FNumber float32 `meta:"FNumber"`
Iso int `meta:"ISO"`
ImageType int `meta:"HDRImageType"`
GPSPosition string `meta:"GPSPosition"`
GPSLatitude string `meta:"GPSLatitude"`
GPSLongitude string `meta:"GPSLongitude"`
Lat float32 `meta:"-"`
Lng float32 `meta:"-"`
Altitude int `meta:"GlobalAltitude,GPSAltitude"`
Width int `meta:"ImageWidth,PixelXDimension,ExifImageWidth,SourceImageWidth"`
Height int `meta:"ImageHeight,ImageLength,PixelYDimension,ExifImageHeight,SourceImageHeight"`
Orientation int `meta:"-"`
Rotation int `meta:"Rotation"`
Views int `meta:"-"`
Albums []string `meta:"-"`
Error error `meta:"-"`
json map[string]string
exif map[string]string
}
// New returns a new metadata struct.

View file

@ -25,8 +25,8 @@ import (
var exifIfdMapping *exifcommon.IfdMapping
var exifTagIndex = exif.NewTagIndex()
var exifMutex = sync.Mutex{}
var exifDateTimeTags = []string{"DateTimeOriginal", "DateTimeCreated", "CreateDate", "DateTimeDigitized", "DateTime"}
var exifSubSecTags = []string{"SubSecTimeOriginal", "SubSecTimeDigitized", "SubSecTime"}
var exifDateTimeTags = []string{"DateTimeOriginal", "DateTimeCreated", "CreateDate", "DateTime", "DateTimeDigitized"}
var exifSubSecTags = []string{"SubSecTimeOriginal", "SubSecTime", "SubSecTimeDigitized"}
func init() {
exifIfdMapping = exifcommon.NewIfdMapping()

View file

@ -37,14 +37,14 @@ func (data *Data) Exiftool(jsonData []byte, originalName string) (err error) {
return fmt.Errorf("metadata: data is not an object in %s (exiftool)", clean.Log(filepath.Base(originalName)))
}
jsonStrings := make(map[string]string)
data.json = make(map[string]string)
jsonValues := j.Map()
for key, val := range jsonValues {
jsonStrings[key] = val.String()
data.json[key] = val.String()
}
if fileName, ok := jsonStrings["FileName"]; ok && fileName != "" && originalName != "" && fileName != originalName {
if fileName, ok := data.json["FileName"]; ok && fileName != "" && originalName != "" && fileName != originalName {
return fmt.Errorf("metadata: original name %s does not match %s (exiftool)", clean.Log(originalName), clean.Log(fileName))
}
@ -97,19 +97,31 @@ func (data *Data) Exiftool(jsonData []byte, originalName string) (err error) {
continue
}
fieldValue.SetInt(jsonValue.Int())
if intVal := jsonValue.Int(); intVal != 0 {
fieldValue.SetInt(intVal)
} else if intVal = txt.Int64(jsonValue.String()); intVal != 0 {
fieldValue.SetInt(intVal)
}
case float32, float64:
if !fieldValue.IsZero() {
continue
}
fieldValue.SetFloat(jsonValue.Float())
if f := jsonValue.Float(); f != 0 {
fieldValue.SetFloat(f)
} else if f = txt.Float64(jsonValue.String()); f != 0 {
fieldValue.SetFloat(f)
}
case uint, uint64:
if !fieldValue.IsZero() {
continue
}
fieldValue.SetUint(jsonValue.Uint())
if uintVal := jsonValue.Uint(); uintVal > 0 {
fieldValue.SetUint(uintVal)
} else if intVal := txt.Int64(jsonValue.String()); intVal > 0 {
fieldValue.SetUint(uint64(intVal))
}
case []string:
existing := fieldValue.Interface().([]string)
fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, strings.TrimSpace(jsonValue.String()))))
@ -143,7 +155,7 @@ func (data *Data) Exiftool(jsonData []byte, originalName string) (err error) {
// Nanoseconds.
if data.TakenNs <= 0 {
for _, name := range exifSubSecTags {
if s := jsonStrings[name]; txt.IsPosInt(s) {
if s := data.json[name]; txt.IsPosInt(s) {
data.TakenNs = txt.Int(s + strings.Repeat("0", 9-len(s)))
break
}
@ -162,7 +174,7 @@ func (data *Data) Exiftool(jsonData []byte, originalName string) (err error) {
if data.Altitude == 0 {
// Parseable floating point number?
if fl := GpsFloatRegexp.FindAllString(jsonStrings["GPSAltitude"], -1); len(fl) != 1 {
if fl := GpsFloatRegexp.FindAllString(data.json["GPSAltitude"], -1); len(fl) != 1 {
// Ignore.
} else if alt, err := strconv.ParseFloat(fl[0], 64); err == nil && alt != 0 {
data.Altitude = int(alt)
@ -180,7 +192,7 @@ func (data *Data) Exiftool(jsonData []byte, originalName string) (err error) {
if _, offset := data.TakenAtLocal.Zone(); offset != 0 && !data.TakenAtLocal.IsZero() {
hasTimeOffset = true
} else if mt, ok := jsonStrings["MIMEType"]; ok && (mt == MimeVideoMP4 || mt == MimeQuicktime) {
} else if mt, ok := data.json["MIMEType"]; ok && (mt == MimeVideoMP4 || mt == MimeQuicktime) {
// Assume default time zone for MP4 & Quicktime videos is UTC.
// see https://exiftool.org/TagNames/QuickTime.html
data.TimeZone = time.UTC.String()
@ -259,7 +271,7 @@ func (data *Data) Exiftool(jsonData []byte, originalName string) (err error) {
}
// Image orientation, see https://www.daveperrett.com/articles/2012/07/28/exif-orientation-handling-is-a-ghetto/.
if orientation, ok := jsonStrings["Orientation"]; ok && orientation != "" {
if orientation, ok := data.json["Orientation"]; ok && orientation != "" {
switch orientation {
case "1", "Horizontal (normal)":
data.Orientation = 1

View file

@ -18,7 +18,7 @@ func TestJSON(t *testing.T) {
t.Fatal(err)
}
// t.Logf("DATA: %+v", data)
// t.Logf("DATA: %#v", data)
assert.Equal(t, "20170323-083538-Berlin-Zoologischer-Garten-2017-2u4.mov", data.FileName)
assert.Equal(t, CodecAvc1, data.Codec)
@ -528,7 +528,7 @@ func TestJSON(t *testing.T) {
t.Fatal(err)
}
// t.Logf("all: %+v", data.All)
// t.Logf("all: %+v", data.json)
assert.Equal(t, "", data.Artist)
assert.Equal(t, "2020-05-24T08:55:21Z", data.TakenAt.Format("2006-01-02T15:04:05Z"))
@ -547,7 +547,7 @@ func TestJSON(t *testing.T) {
assert.Equal(t, "SM-C200", data.CameraModel)
assert.Equal(t, "", data.CameraOwner)
assert.Equal(t, "", data.CameraSerial)
assert.Equal(t, 0, data.FocalLength)
assert.Equal(t, 1, data.FocalLength)
assert.Equal(t, 1, data.Orientation)
assert.Equal(t, projection.Equirectangular.String(), data.Projection)
})
@ -559,7 +559,7 @@ func TestJSON(t *testing.T) {
t.Fatal(err)
}
// t.Logf("all: %+v", data.All)
// t.Logf("all: %+v", data.json)
assert.Equal(t, "", data.Artist)
assert.Equal(t, "2018-07-25T11:18:42Z", data.TakenAt.Format("2006-01-02T15:04:05Z"))
@ -590,7 +590,7 @@ func TestJSON(t *testing.T) {
t.Fatal(err)
}
// t.Logf("all: %+v", data.All)
// t.Logf("all: %+v", data.json)
assert.Equal(t, "OLYMPUS DIGITAL CAMERA", data.CameraMake)
assert.Equal(t, "E-M10MarkII", data.CameraModel)
@ -636,7 +636,7 @@ func TestJSON(t *testing.T) {
assert.Equal(t, "", data.CameraSerial)
assert.Equal(t, "", data.LensMake)
assert.Equal(t, "", data.LensModel)
assert.Equal(t, 0, data.FocalLength)
assert.Equal(t, 5, data.FocalLength)
assert.Equal(t, 1, int(data.Orientation))
})
@ -817,7 +817,7 @@ func TestJSON(t *testing.T) {
t.Fatal(err)
}
// t.Logf("all: %+v", data.All)
// t.Logf("all: %+v", data.json)
assert.Equal(t, "Jens\r\tMander", data.Artist)
assert.Equal(t, "2004-09-23T10:57:57Z", data.TakenAt.Format("2006-01-02T15:04:05Z"))
@ -837,7 +837,7 @@ func TestJSON(t *testing.T) {
assert.Equal(t, "Canon EOS-1DS", data.CameraModel)
assert.Equal(t, "", data.CameraOwner)
assert.Equal(t, "123456", data.CameraSerial)
assert.Equal(t, 0, data.FocalLength)
assert.Equal(t, 17, data.FocalLength)
assert.Equal(t, 1, data.Orientation)
assert.Equal(t, "", data.Projection)
})
@ -934,7 +934,7 @@ func TestJSON(t *testing.T) {
assert.Equal(t, "Canon EOS 6D", data.CameraModel)
assert.Equal(t, "", data.CameraOwner)
assert.Equal(t, "012324001432", data.CameraSerial)
assert.Equal(t, 0, data.FocalLength)
assert.Equal(t, 35, data.FocalLength)
assert.Equal(t, 1, data.Orientation)
assert.Equal(t, "", data.Projection)
})
@ -1006,7 +1006,7 @@ func TestJSON(t *testing.T) {
t.Fatal(err)
}
// t.Logf("all: %+v", data.All)
t.Logf("Iceland-P3.jpg: %#v", data.json["HyperfocalDistance"])
assert.Equal(t, "Nicolas Cornet", data.Artist)
assert.Equal(t, "2012-08-08T22:07:18Z", data.TakenAt.Format("2006-01-02T15:04:05Z"))
@ -1025,7 +1025,8 @@ func TestJSON(t *testing.T) {
assert.Equal(t, "NIKON D800E", data.CameraModel)
assert.Equal(t, "", data.CameraOwner)
assert.Equal(t, "6001440", data.CameraSerial)
assert.Equal(t, 0, data.FocalLength)
assert.Equal(t, 16, data.FocalLength)
assert.InEpsilon(t, 1.0650, data.FocalDistance, 0.01)
assert.Equal(t, 1, data.Orientation)
assert.Equal(t, "", data.Projection)
assert.Equal(t, "Display P3", data.ColorProfile)
@ -1038,7 +1039,7 @@ func TestJSON(t *testing.T) {
t.Fatal(err)
}
// t.Logf("all: %+v", data.All)
t.Logf("Iceland-P3-n.jpg: %#v", data.json["HyperfocalDistance"])
assert.Equal(t, "Nicolas Cornet", data.Artist)
assert.Equal(t, "2012-08-08T22:07:18Z", data.TakenAt.Format("2006-01-02T15:04:05Z"))
@ -1058,6 +1059,7 @@ func TestJSON(t *testing.T) {
assert.Equal(t, "", data.CameraOwner)
assert.Equal(t, "6001440", data.CameraSerial)
assert.Equal(t, 16, data.FocalLength)
assert.InEpsilon(t, 1.0650, data.FocalDistance, 0.01)
assert.Equal(t, 1, data.Orientation)
assert.Equal(t, "", data.Projection)
assert.Equal(t, "Display P3", data.ColorProfile)
@ -1070,7 +1072,7 @@ func TestJSON(t *testing.T) {
t.Fatal(err)
}
// t.Logf("all: %+v", data.All)
// t.Logf("all: %+v", data.json)
assert.Equal(t, "Nicolas Cornet", data.Artist)
assert.Equal(t, "2012-08-08T22:07:18Z", data.TakenAt.Format("2006-01-02T15:04:05Z"))
@ -1089,7 +1091,7 @@ func TestJSON(t *testing.T) {
assert.Equal(t, "NIKON D800E", data.CameraModel)
assert.Equal(t, "", data.CameraOwner)
assert.Equal(t, "6001440", data.CameraSerial)
assert.Equal(t, 0, data.FocalLength)
assert.Equal(t, 16, data.FocalLength)
assert.Equal(t, 1, data.Orientation)
assert.Equal(t, "", data.Projection)
assert.Equal(t, "Display P3", data.ColorProfile)
@ -1102,7 +1104,7 @@ func TestJSON(t *testing.T) {
t.Fatal(err)
}
// t.Logf("all: %+v", data.All)
// t.Logf("all: %+v", data.json)
assert.Equal(t, "", data.Artist)
assert.Equal(t, "0001-01-01T00:00:00Z", data.TakenAt.Format("2006-01-02T15:04:05Z"))
@ -1135,7 +1137,7 @@ func TestJSON(t *testing.T) {
if err != nil {
t.Fatal(err)
}
//t.Logf("all: %+v", data.exif)
//t.Logf("all: %+v", data.json)
assert.Equal(t, "creator A, creator B", data.Artist)
assert.Equal(t, "my image headline", data.Title)

66
pkg/txt/numeric.go Normal file
View file

@ -0,0 +1,66 @@
package txt
import (
"strconv"
"strings"
)
// Numeric removes non-numeric characters from a string and returns the result.
func Numeric(s string) string {
if s == "" {
return ""
}
sep := '.'
if c := strings.Count(s, "."); c == 0 || c > 1 {
sep = ','
}
// Remove invalid characters.
s = strings.Map(func(r rune) rune {
if r == sep {
return '.'
} else if r == '-' {
return '-'
} else if r < '0' || r > '9' {
return -1
}
return r
}, s)
return s
}
// Float64 converts a string to a 64-bit floating point number or 0 if invalid.
func Float64(s string) float64 {
if s == "" {
return 0
}
f, err := strconv.ParseFloat(Numeric(s), 64)
if err != nil {
return 0
}
return f
}
// Int64 converts a string to a signed 64-bit integer or 0 if invalid.
func Int64(s string) int64 {
if s == "" {
return 0
}
i := strings.SplitN(Numeric(s), ".", 2)
result, err := strconv.ParseInt(i[0], 10, 64)
if err != nil {
return 0
}
return result
}

133
pkg/txt/numeric_test.go Normal file
View file

@ -0,0 +1,133 @@
package txt
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestNumeric(t *testing.T) {
t.Run("Empty", func(t *testing.T) {
result := Numeric("")
assert.Equal(t, "", result)
})
t.Run("NonNumeric", func(t *testing.T) {
result := Numeric(" Screenshot ")
assert.Equal(t, "", result)
})
t.Run("Zero", func(t *testing.T) {
result := Numeric("0")
assert.Equal(t, "0", result)
})
t.Run("LeadingZeros", func(t *testing.T) {
result := Numeric(" 000123")
assert.Equal(t, "000123", result)
})
t.Run("WhitespacePadding", func(t *testing.T) {
result := Numeric(" 123,556\t ")
assert.Equal(t, "123.556", result)
})
t.Run("PositiveFloat", func(t *testing.T) {
result := Numeric("123,000.45245 ")
assert.Equal(t, "123000.45245", result)
})
t.Run("NegativeFloat", func(t *testing.T) {
result := Numeric(" - 123,000.45245 ")
assert.Equal(t, "-123000.45245", result)
})
t.Run("MultipleDots", func(t *testing.T) {
result := Numeric("123.000.45245.44 m")
assert.Equal(t, "1230004524544", result)
})
}
func TestInt64(t *testing.T) {
t.Run("Empty", func(t *testing.T) {
result := Int64("")
assert.Equal(t, int64(0), result)
})
t.Run("NonNumeric", func(t *testing.T) {
result := Int64(" Screenshot ")
assert.Equal(t, int64(0), result)
})
t.Run("Zero", func(t *testing.T) {
result := Int64("0")
assert.Equal(t, int64(0), result)
})
t.Run("LeadingZeros", func(t *testing.T) {
result := Int64(" 000123")
assert.Equal(t, int64(123), result)
})
t.Run("WhitespacePadding", func(t *testing.T) {
result := Int64(" 123,556\t ")
assert.Equal(t, int64(123), result)
})
t.Run("PositiveFloat", func(t *testing.T) {
result := Int64("123,000.45245 ")
assert.Equal(t, int64(123000), result)
})
t.Run("NegativeFloat", func(t *testing.T) {
result := Int64(" - 123,000.45245 ")
assert.Equal(t, int64(-123000), result)
})
t.Run("MultipleDots", func(t *testing.T) {
result := Int64("123.000.45245.44 m")
assert.Equal(t, int64(1230004524544), result)
})
}
func TestFloat64(t *testing.T) {
t.Run("Empty", func(t *testing.T) {
result := Float64("")
assert.Equal(t, 0.0, result)
})
t.Run("NonNumeric", func(t *testing.T) {
result := Float64(" Screenshot ")
assert.Equal(t, 0.0, result)
})
t.Run("Zero", func(t *testing.T) {
result := Float64("0")
assert.Equal(t, 0.0, result)
})
t.Run("LeadingZeros", func(t *testing.T) {
result := Float64(" 000123")
assert.Equal(t, 123.0, result)
})
t.Run("WhitespacePadding", func(t *testing.T) {
result := Float64(" 123,556\t ")
assert.Equal(t, 123.556, result)
})
t.Run("PositiveFloat", func(t *testing.T) {
result := Float64("123,000.45245 ")
assert.Equal(t, 123000.45245, result)
})
t.Run("NegativeFloat", func(t *testing.T) {
result := Float64(" - 123,000.45245 ")
assert.Equal(t, -123000.45245, result)
})
t.Run("MultipleDots", func(t *testing.T) {
result := Float64("123.000.45245.44 m")
assert.Equal(t, 1230004524544.0, result)
})
}