From 5866ba307f260df2e31b59a78b2dbc2e611ce855 Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Tue, 14 Jul 2020 15:00:14 +0200 Subject: [PATCH] Backend: Improve detection of generated file names Signed-off-by: Michael Mayer --- pkg/fs/id.go | 10 +++---- pkg/fs/id_test.go | 71 ++++++++++++++++++++++++++--------------------- 2 files changed, 44 insertions(+), 37 deletions(-) diff --git a/pkg/fs/id.go b/pkg/fs/id.go index 43f29992c..65f0246ea 100644 --- a/pkg/fs/id.go +++ b/pkg/fs/id.go @@ -6,7 +6,7 @@ import ( "github.com/photoprism/photoprism/pkg/rnd" ) -var DscNameRegexp = regexp.MustCompile("\\D{3}[\\d_]\\d{4,8}_?\\d{0,6}_?\\d{0,6}(.JPG)?") +var DscNameRegexp = regexp.MustCompile("\\D{3}[\\d_]\\d{4,8}_?\\d{0,6}_?\\d{0,6}[\\.jpgJPGXx]{0,4}") var UniqueNameRegexp = regexp.MustCompile("[a-f0-9]{8,16}_[a-f0-9]{6,16}_[A-Za-z0-9]{1,20}_?[A-Za-z0-9]{0,4}") // Example: 8263987746_d0a6055c58_o var UUIDNameRegexp = regexp.MustCompile("[A-Fa-f0-9\\-]{16,36}_?[A-Za-z0-9_]{0,20}") // Example: 8263987746_d0a6055c58_o @@ -32,7 +32,7 @@ func IsAsciiID(s string) bool { } for _, r := range s { - if (r < 65 || r > 90) && (r < 48 || r > 57) { + if (r < 65 || r > 90) && (r < 48 || r > 57) && r != 45 && r != 95 { return false } } @@ -76,7 +76,9 @@ func IsGenerated(fileName string) bool { base := BasePrefix(fileName, false) - if IsHash(base) { + if IsAsciiID(base) { + return true + } else if IsHash(base) { return true } else if IsInt(base) { return true @@ -88,8 +90,6 @@ func IsGenerated(fileName string) bool { return true } else if IsCanonical(base) { return true - } else if IsAsciiID(base) { - return true } return false diff --git a/pkg/fs/id_test.go b/pkg/fs/id_test.go index b9793739b..bf2abc562 100644 --- a/pkg/fs/id_test.go +++ b/pkg/fs/id_test.go @@ -10,48 +10,25 @@ func TestIsAsciiID(t *testing.T) { assert.False(t, IsAsciiID("lt9k3pw1wowuy3c2")) assert.False(t, IsAsciiID("dafbfeb8-a129-4e7c-9cf0-e7996a701cdb")) assert.False(t, IsAsciiID("6ba7b810-9dad-11d1-80b4-00c04fd430c8")) - assert.False(t, IsAsciiID("55785BAC-9A4B-4747-B090-EE123FFEE437")) + assert.True(t, IsAsciiID("55785BAC-9A4B-4747-B090-EE123FFEE437")) assert.False(t, IsAsciiID("550e8400-e29b-11d4-a716-446655440000")) assert.False(t, IsAsciiID("IMG_0599.JPG")) assert.True(t, IsAsciiID("DSC10599")) assert.True(t, IsAsciiID("IQVG4929")) - assert.False(t, IsAsciiID("DSC_0599")) + assert.True(t, IsAsciiID("DSC_0599")) assert.False(t, IsAsciiID("iqVG4929")) - assert.False(t, IsAsciiID("20091117_203458_ERROR000")) - assert.False(t, IsAsciiID("20091117_203458_12345678")) + assert.True(t, IsAsciiID("20091117_203458_ERROR000")) + assert.True(t, IsAsciiID("20091117_203458_12345678")) assert.True(t, IsAsciiID("4B1FEF2D1CF4A5BE38B263E0637EDEAD")) assert.True(t, IsAsciiID("123")) - assert.False(t, IsAsciiID("_")) + assert.True(t, IsAsciiID("_")) assert.False(t, IsAsciiID("")) assert.False(t, IsAsciiID("20191117-153400-Central-Park-New-York-2019-3qy.mov")) assert.False(t, IsAsciiID("e98eb86480a72bd585d228a709f0622f90e86cbc.jpg")) assert.False(t, IsAsciiID("IMG_8115.jpg")) assert.False(t, IsAsciiID("01 Introduction Businessmodel.pdf")) assert.False(t, IsAsciiID("A regular file name with 121345678643 numbers")) -} - -func TestIsGenerated(t *testing.T) { - assert.True(t, IsGenerated("lt9k3pw1wowuy3c2")) - assert.True(t, IsGenerated("dafbfeb8-a129-4e7c-9cf0-e7996a701cdb")) - assert.True(t, IsGenerated("6ba7b810-9dad-11d1-80b4-00c04fd430c8")) - assert.True(t, IsGenerated("55785BAC-9A4B-4747-B090-EE123FFEE437")) - assert.True(t, IsGenerated("550e8400-e29b-11d4-a716-446655440000")) - assert.True(t, IsGenerated("IMG_0599.JPG")) - assert.True(t, IsGenerated("DSC10599")) - assert.True(t, IsGenerated("IQVG4929")) - assert.True(t, IsGenerated("49007520716_67ff0ce0ec_4k")) - assert.True(t, IsGenerated("8263987746_d0a6055c58_o")) - assert.True(t, IsGenerated("20091117_203458_ERROR000")) - assert.True(t, IsGenerated("20091117_203458_12345678")) - assert.True(t, IsGenerated("4B1FEF2D1CF4A5BE38B263E0637EDEAD")) - assert.True(t, IsGenerated("123")) - assert.False(t, IsGenerated("_")) - assert.False(t, IsGenerated("")) - assert.False(t, IsGenerated("20191117-153400-Central-Park-New-York-2019-3qy.mov")) - assert.True(t, IsGenerated("e98eb86480a72bd585d228a709f0622f90e86cbc.jpg")) - assert.True(t, IsGenerated("IMG_8115.jpg")) - assert.False(t, IsGenerated("01 Introduction Businessmodel.pdf")) - assert.False(t, IsGenerated("A regular file name with 121345678643 numbers")) + assert.True(t, IsGenerated("2013-06-06_DMC-LX5_P1080235")) } func TestIsInt(t *testing.T) { @@ -94,13 +71,17 @@ func TestIsDscName(t *testing.T) { assert.False(t, IsDscName("55785BAC-9A4B-4747-B090-EE123FFEE437")) assert.False(t, IsDscName("550e8400-e29b-11d4-a716-446655440000")) assert.True(t, IsDscName("IMG_0599.JPG")) + assert.True(t, IsDscName("IMG_0599.jpg")) + assert.True(t, IsDscName("IMG_0599.Jpg")) + assert.True(t, IsDscName("IMG_0599_100px")) + assert.True(t, IsDscName("IMG_8115.jpg")) assert.True(t, IsDscName("IMG_20190721_095643.JPG")) assert.True(t, IsDscName("IMG_20190119_135810_145")) - //TODO - //assert.True(t, IsDscName("IMG_1059_2048px")) + assert.True(t, IsDscName("IMG_1059_2048px")) assert.True(t, IsDscName("IMG_20190721_095643")) assert.True(t, IsDscName("DSC_20190721_095643")) assert.True(t, IsDscName("DSC10599")) + assert.True(t, IsDscName("DSC10599_3500px")) assert.False(t, IsDscName("IQVG4929")) assert.False(t, IsDscName("49007520716_67ff0ce0ec_4k")) assert.False(t, IsDscName("8263987746_d0a6055c58_o")) @@ -112,7 +93,33 @@ func TestIsDscName(t *testing.T) { assert.False(t, IsDscName("")) assert.False(t, IsDscName("20191117-153400-Central-Park-New-York-2019-3qy.mov")) assert.False(t, IsDscName("e98eb86480a72bd585d228a709f0622f90e86cbc.jpg")) - assert.False(t, IsDscName("IMG_8115.jpg")) assert.False(t, IsDscName("01 Introduction Businessmodel.pdf")) assert.False(t, IsDscName("A regular file name with 121345678643 numbers")) } + +func TestIsGenerated(t *testing.T) { + assert.True(t, IsGenerated("lt9k3pw1wowuy3c2")) + assert.True(t, IsGenerated("dafbfeb8-a129-4e7c-9cf0-e7996a701cdb")) + assert.True(t, IsGenerated("6ba7b810-9dad-11d1-80b4-00c04fd430c8")) + assert.True(t, IsGenerated("55785BAC-9A4B-4747-B090-EE123FFEE437")) + assert.True(t, IsGenerated("550e8400-e29b-11d4-a716-446655440000")) + assert.True(t, IsGenerated("IMG_0599.JPG")) + assert.True(t, IsGenerated("DSC10599")) + assert.True(t, IsGenerated("IQVG4929")) + assert.True(t, IsGenerated("49007520716_67ff0ce0ec_4k")) + assert.True(t, IsGenerated("8263987746_d0a6055c58_o")) + assert.True(t, IsGenerated("20091117_203458_ERROR000")) + assert.True(t, IsGenerated("20091117_203458_12345678")) + assert.True(t, IsGenerated("4B1FEF2D1CF4A5BE38B263E0637EDEAD")) + assert.True(t, IsGenerated("123")) + assert.True(t, IsGenerated("_")) + assert.False(t, IsGenerated("")) + assert.False(t, IsGenerated("20191117-153400-Central-Park-New-York-2019-3qy.mov")) + assert.True(t, IsGenerated("e98eb86480a72bd585d228a709f0622f90e86cbc.jpg")) + assert.True(t, IsGenerated("IMG_8115.jpg")) + assert.False(t, IsGenerated("01 Introduction Businessmodel.pdf")) + assert.False(t, IsGenerated("A regular file name with 121345678643 numbers")) + assert.True(t, IsGenerated("2013-06-06_DMC-LX5_P1080235.JPG")) + assert.False(t, IsGenerated("The quick brown fox jumps over the lazy dog")) + assert.False(t, IsGenerated("QUICK BROWN FOX")) +}