Metadata: Sanitize bad Unicode strings #2897
Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
parent
61b4be2c6f
commit
ae130dc500
4 changed files with 50 additions and 13 deletions
|
@ -9,15 +9,14 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/photoprism/photoprism/pkg/video"
|
||||
|
||||
"github.com/photoprism/photoprism/pkg/projection"
|
||||
|
||||
"github.com/photoprism/photoprism/pkg/clean"
|
||||
"github.com/photoprism/photoprism/pkg/rnd"
|
||||
"github.com/photoprism/photoprism/pkg/txt"
|
||||
"github.com/tidwall/gjson"
|
||||
"gopkg.in/photoprism/go-tz.v2/tz"
|
||||
|
||||
"github.com/photoprism/photoprism/pkg/clean"
|
||||
"github.com/photoprism/photoprism/pkg/projection"
|
||||
"github.com/photoprism/photoprism/pkg/rnd"
|
||||
"github.com/photoprism/photoprism/pkg/txt"
|
||||
"github.com/photoprism/photoprism/pkg/video"
|
||||
)
|
||||
|
||||
const MimeVideoMP4 = "video/mp4"
|
||||
|
@ -47,7 +46,7 @@ func (data *Data) Exiftool(jsonData []byte, originalName string) (err error) {
|
|||
jsonValues := j.Map()
|
||||
|
||||
for key, val := range jsonValues {
|
||||
data.json[key] = val.String()
|
||||
data.json[key] = SanitizeString(val.String())
|
||||
}
|
||||
|
||||
if fileName, ok := data.json["FileName"]; ok && fileName != "" && originalName != "" && fileName != originalName {
|
||||
|
@ -134,22 +133,22 @@ func (data *Data) Exiftool(jsonData []byte, originalName string) (err error) {
|
|||
}
|
||||
case []string:
|
||||
existing := fieldValue.Interface().([]string)
|
||||
fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, strings.TrimSpace(jsonValue.String()))))
|
||||
fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, SanitizeString(jsonValue.String()))))
|
||||
case Keywords:
|
||||
existing := fieldValue.Interface().(Keywords)
|
||||
fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, strings.TrimSpace(jsonValue.String()))))
|
||||
fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, SanitizeString(jsonValue.String()))))
|
||||
case projection.Type:
|
||||
if !fieldValue.IsZero() {
|
||||
continue
|
||||
}
|
||||
|
||||
fieldValue.Set(reflect.ValueOf(projection.Type(strings.TrimSpace(jsonValue.String()))))
|
||||
fieldValue.Set(reflect.ValueOf(projection.Type(SanitizeString(jsonValue.String()))))
|
||||
case string:
|
||||
if !fieldValue.IsZero() {
|
||||
continue
|
||||
}
|
||||
|
||||
fieldValue.SetString(strings.TrimSpace(jsonValue.String()))
|
||||
fieldValue.SetString(SanitizeString(jsonValue.String()))
|
||||
case bool:
|
||||
if !fieldValue.IsZero() {
|
||||
continue
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/photoprism/photoprism/pkg/clean"
|
||||
"github.com/photoprism/photoprism/pkg/fs"
|
||||
"github.com/photoprism/photoprism/pkg/txt"
|
||||
)
|
||||
|
@ -64,7 +65,7 @@ func SanitizeString(s string) string {
|
|||
|
||||
s = strings.TrimSpace(s)
|
||||
|
||||
return strings.Replace(s, "\"", "", -1)
|
||||
return clean.Unicode(strings.Replace(s, "\"", "", -1))
|
||||
}
|
||||
|
||||
// SanitizeUID normalizes unique IDs found in XMP or Exif metadata.
|
||||
|
|
21
pkg/clean/unicode.go
Normal file
21
pkg/clean/unicode.go
Normal file
|
@ -0,0 +1,21 @@
|
|||
package clean
|
||||
|
||||
import "strings"
|
||||
|
||||
// Unicode returns a string a valid unicode.
|
||||
func Unicode(s string) string {
|
||||
if s == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
|
||||
for _, c := range s {
|
||||
if c == '\uFFFD' {
|
||||
continue
|
||||
}
|
||||
b.WriteRune(c)
|
||||
}
|
||||
|
||||
return b.String()
|
||||
}
|
16
pkg/clean/unicode_test.go
Normal file
16
pkg/clean/unicode_test.go
Normal file
|
@ -0,0 +1,16 @@
|
|||
package clean
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestUnicode(t *testing.T) {
|
||||
t.Run("Valid", func(t *testing.T) {
|
||||
assert.Equal(t, "Naïve bonds and futures surge as inflation eases 🚀🚀🚀", Unicode("Naïve bonds and futures surge as inflation eases 🚀🚀🚀"))
|
||||
})
|
||||
t.Run("Empty", func(t *testing.T) {
|
||||
assert.Equal(t, "", Unicode(""))
|
||||
})
|
||||
}
|
Loading…
Reference in a new issue