Metadata: Sanitize bad Unicode strings #2897
Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
parent
61b4be2c6f
commit
ae130dc500
4 changed files with 50 additions and 13 deletions
|
@ -9,15 +9,14 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/photoprism/photoprism/pkg/video"
|
|
||||||
|
|
||||||
"github.com/photoprism/photoprism/pkg/projection"
|
|
||||||
|
|
||||||
"github.com/photoprism/photoprism/pkg/clean"
|
|
||||||
"github.com/photoprism/photoprism/pkg/rnd"
|
|
||||||
"github.com/photoprism/photoprism/pkg/txt"
|
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
"gopkg.in/photoprism/go-tz.v2/tz"
|
"gopkg.in/photoprism/go-tz.v2/tz"
|
||||||
|
|
||||||
|
"github.com/photoprism/photoprism/pkg/clean"
|
||||||
|
"github.com/photoprism/photoprism/pkg/projection"
|
||||||
|
"github.com/photoprism/photoprism/pkg/rnd"
|
||||||
|
"github.com/photoprism/photoprism/pkg/txt"
|
||||||
|
"github.com/photoprism/photoprism/pkg/video"
|
||||||
)
|
)
|
||||||
|
|
||||||
const MimeVideoMP4 = "video/mp4"
|
const MimeVideoMP4 = "video/mp4"
|
||||||
|
@ -47,7 +46,7 @@ func (data *Data) Exiftool(jsonData []byte, originalName string) (err error) {
|
||||||
jsonValues := j.Map()
|
jsonValues := j.Map()
|
||||||
|
|
||||||
for key, val := range jsonValues {
|
for key, val := range jsonValues {
|
||||||
data.json[key] = val.String()
|
data.json[key] = SanitizeString(val.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
if fileName, ok := data.json["FileName"]; ok && fileName != "" && originalName != "" && fileName != originalName {
|
if fileName, ok := data.json["FileName"]; ok && fileName != "" && originalName != "" && fileName != originalName {
|
||||||
|
@ -134,22 +133,22 @@ func (data *Data) Exiftool(jsonData []byte, originalName string) (err error) {
|
||||||
}
|
}
|
||||||
case []string:
|
case []string:
|
||||||
existing := fieldValue.Interface().([]string)
|
existing := fieldValue.Interface().([]string)
|
||||||
fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, strings.TrimSpace(jsonValue.String()))))
|
fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, SanitizeString(jsonValue.String()))))
|
||||||
case Keywords:
|
case Keywords:
|
||||||
existing := fieldValue.Interface().(Keywords)
|
existing := fieldValue.Interface().(Keywords)
|
||||||
fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, strings.TrimSpace(jsonValue.String()))))
|
fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, SanitizeString(jsonValue.String()))))
|
||||||
case projection.Type:
|
case projection.Type:
|
||||||
if !fieldValue.IsZero() {
|
if !fieldValue.IsZero() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
fieldValue.Set(reflect.ValueOf(projection.Type(strings.TrimSpace(jsonValue.String()))))
|
fieldValue.Set(reflect.ValueOf(projection.Type(SanitizeString(jsonValue.String()))))
|
||||||
case string:
|
case string:
|
||||||
if !fieldValue.IsZero() {
|
if !fieldValue.IsZero() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
fieldValue.SetString(strings.TrimSpace(jsonValue.String()))
|
fieldValue.SetString(SanitizeString(jsonValue.String()))
|
||||||
case bool:
|
case bool:
|
||||||
if !fieldValue.IsZero() {
|
if !fieldValue.IsZero() {
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -5,6 +5,7 @@ import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/photoprism/photoprism/pkg/clean"
|
||||||
"github.com/photoprism/photoprism/pkg/fs"
|
"github.com/photoprism/photoprism/pkg/fs"
|
||||||
"github.com/photoprism/photoprism/pkg/txt"
|
"github.com/photoprism/photoprism/pkg/txt"
|
||||||
)
|
)
|
||||||
|
@ -64,7 +65,7 @@ func SanitizeString(s string) string {
|
||||||
|
|
||||||
s = strings.TrimSpace(s)
|
s = strings.TrimSpace(s)
|
||||||
|
|
||||||
return strings.Replace(s, "\"", "", -1)
|
return clean.Unicode(strings.Replace(s, "\"", "", -1))
|
||||||
}
|
}
|
||||||
|
|
||||||
// SanitizeUID normalizes unique IDs found in XMP or Exif metadata.
|
// SanitizeUID normalizes unique IDs found in XMP or Exif metadata.
|
||||||
|
|
21
pkg/clean/unicode.go
Normal file
21
pkg/clean/unicode.go
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
package clean
|
||||||
|
|
||||||
|
import "strings"
|
||||||
|
|
||||||
|
// Unicode returns a string a valid unicode.
|
||||||
|
func Unicode(s string) string {
|
||||||
|
if s == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
var b strings.Builder
|
||||||
|
|
||||||
|
for _, c := range s {
|
||||||
|
if c == '\uFFFD' {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
b.WriteRune(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
return b.String()
|
||||||
|
}
|
16
pkg/clean/unicode_test.go
Normal file
16
pkg/clean/unicode_test.go
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
package clean
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestUnicode(t *testing.T) {
|
||||||
|
t.Run("Valid", func(t *testing.T) {
|
||||||
|
assert.Equal(t, "Naïve bonds and futures surge as inflation eases 🚀🚀🚀", Unicode("Naïve bonds and futures surge as inflation eases 🚀🚀🚀"))
|
||||||
|
})
|
||||||
|
t.Run("Empty", func(t *testing.T) {
|
||||||
|
assert.Equal(t, "", Unicode(""))
|
||||||
|
})
|
||||||
|
}
|
Loading…
Reference in a new issue