diff --git a/.dockerignore b/.dockerignore index c123b0794..dc21d2d81 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,6 +5,7 @@ /assets/resources/database/* /assets/resources/static/build/* /assets/resources/nasnet +/assets/resources/nsfw /assets/testdata /assets/backups Dockerfile diff --git a/.gitignore b/.gitignore index e3e06b17e..a7336367a 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ /assets/testdata /assets/backups /assets/resources/nasnet +/assets/resources/nsfw *.log # Binaries for programs and plugins @@ -50,4 +51,4 @@ Thumbs.db .tmp #Karma Coverage Report -frontend/coverage/ \ No newline at end of file +frontend/coverage/ diff --git a/Dockerfile b/Dockerfile index f34f7fbbe..76b98d77d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM photoprism/development:20191105 +FROM photoprism/development:20191214 # Set up project directory WORKDIR "/go/src/github.com/photoprism/photoprism" diff --git a/Makefile b/Makefile index 680201cce..d179bc04d 100644 --- a/Makefile +++ b/Makefile @@ -40,7 +40,7 @@ install-assets: mkdir -p ~/Pictures/Import mkdir -p ~/Pictures/Export mkdir -p ~/.local/share/photoprism/resources/database - cp -r assets/resources/static assets/resources/templates assets/resources/nasnet ~/.local/share/photoprism/resources + cp -r assets/resources/static assets/resources/templates assets/resources/nasnet assets/resources/nsfw ~/.local/share/photoprism/resources rsync -a -v --ignore-existing assets/config/*.yml ~/.config/photoprism find ~/.local/share/photoprism -name '.*' -type f -delete dep-js: @@ -49,8 +49,11 @@ dep-go: go build -v ./... dep-tensorflow: scripts/download-nasnet.sh + scripts/download-nsfw.sh zip-nasnet: (cd assets/resources && zip -r nasnet.zip nasnet -x "*/.*" -x "*/version.txt") +zip-nsfw: + (cd assets/resources && zip -r nsfw.zip nsfw -x "*/.*" -x "*/version.txt") build-js: (cd frontend && env NODE_ENV=production npm run build) build-go: diff --git a/docker/development/Dockerfile b/docker/development/Dockerfile index 7d0d93979..ccd03cef0 100644 --- a/docker/development/Dockerfile +++ b/docker/development/Dockerfile @@ -82,12 +82,12 @@ RUN npm install --unsafe-perm=true --allow-root -g npm testcafe chromedriver RUN npm config set cache ~/.cache/npm # Install Go -ENV GOLANG_VERSION 1.13.4 +ENV GOLANG_VERSION 1.13.5 RUN set -eux; \ \ url="https://golang.org/dl/go${GOLANG_VERSION}.linux-amd64.tar.gz"; \ wget -O go.tgz "$url"; \ - echo "692d17071736f74be04a72a06dab9cac1cd759377bd85316e52b2227604c004c *go.tgz" | sha256sum -c -; \ + echo "512103d7ad296467814a6e3f635631bd35574cab3369a97a323c9a585ccaa569 *go.tgz" | sha256sum -c -; \ tar -C /usr/local -xzf go.tgz; \ rm go.tgz; \ export PATH="/usr/local/go/bin:$PATH"; \ @@ -102,6 +102,7 @@ RUN mkdir -p "$GOPATH/src" "$GOPATH/bin" && chmod -R 777 "$GOPATH" # Download TensorFlow model and test files RUN rm -rf /tmp/* && mkdir -p /tmp/photoprism +RUN wget "https://dl.photoprism.org/tensorflow/nsfw.zip?${BUILD_TAG}" -O /tmp/photoprism/nsfw.zip RUN wget "https://dl.photoprism.org/tensorflow/nasnet.zip?${BUILD_TAG}" -O /tmp/photoprism/nasnet.zip RUN wget "https://dl.photoprism.org/fixtures/testdata.zip?${BUILD_TAG}" -O /tmp/photoprism/testdata.zip diff --git a/docker/photoprism/Dockerfile b/docker/photoprism/Dockerfile index bbd308e37..bea22bccb 100644 --- a/docker/photoprism/Dockerfile +++ b/docker/photoprism/Dockerfile @@ -1,4 +1,4 @@ -FROM photoprism/development:20191105 as build +FROM photoprism/development:20191214 as build # Set up project directory WORKDIR "/go/src/github.com/photoprism/photoprism" diff --git a/frontend/package-lock.json b/frontend/package-lock.json index c102753c4..0e6fc9f31 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -11925,9 +11925,9 @@ } }, "terser": { - "version": "4.4.0", - "resolved": "https://registry.npmjs.org/terser/-/terser-4.4.0.tgz", - "integrity": "sha512-oDG16n2WKm27JO8h4y/w3iqBGAOSCtq7k8dRmrn4Wf9NouL0b2WpMHGChFGZq4nFAQy1FsNJrVQHfurXOSTmOA==", + "version": "4.4.2", + "resolved": "https://registry.npmjs.org/terser/-/terser-4.4.2.tgz", + "integrity": "sha512-Uufrsvhj9O1ikwgITGsZ5EZS6qPokUOkCegS7fYOdGTv+OA90vndUbU6PEjr5ePqHfNUbGyMO7xyIZv2MhsALQ==", "requires": { "commander": "^2.20.0", "source-map": "~0.6.1", @@ -11942,26 +11942,21 @@ } }, "terser-webpack-plugin": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-1.4.1.tgz", - "integrity": "sha512-ZXmmfiwtCLfz8WKZyYUuuHf3dMYEjg8NrjHMb0JqHVHVOSkzp3cW2/XG1fP3tRhqEqSzMwzzRQGtAPbs4Cncxg==", + "version": "1.4.3", + "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-1.4.3.tgz", + "integrity": "sha512-QMxecFz/gHQwteWwSo5nTc6UaICqN1bMedC5sMtUc7y3Ha3Q8y6ZO0iCR8pq4RJC8Hjf0FEPEHZqcMB/+DFCrA==", "requires": { "cacache": "^12.0.2", "find-cache-dir": "^2.1.0", "is-wsl": "^1.1.0", "schema-utils": "^1.0.0", - "serialize-javascript": "^1.7.0", + "serialize-javascript": "^2.1.2", "source-map": "^0.6.1", "terser": "^4.1.2", "webpack-sources": "^1.4.0", "worker-farm": "^1.7.0" }, "dependencies": { - "serialize-javascript": { - "version": "1.9.1", - "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-1.9.1.tgz", - "integrity": "sha512-0Vb/54WJ6k5v8sSWN09S0ora+Hnr+cX40r9F170nT+mSkaxltoE/7R3OrIdBSUv1OoiobH1QoWQbCnAO+e8J1A==" - }, "source-map": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", diff --git a/internal/nsfw/detector.go b/internal/nsfw/detector.go new file mode 100644 index 000000000..c6e25abcd --- /dev/null +++ b/internal/nsfw/detector.go @@ -0,0 +1,193 @@ +package nsfw + +import ( + "bufio" + "errors" + "io/ioutil" + "os" + "path/filepath" + + tf "github.com/tensorflow/tensorflow/tensorflow/go" + "github.com/tensorflow/tensorflow/tensorflow/go/op" +) + +// Detector uses TensorFlow to label drawing, hentai, neutral, porn and sexy images. +type Detector struct { + model *tf.SavedModel + modelPath string + modelTags []string + labels []string +} + +// NewDetector returns a new detector instance. +func NewDetector(modelPath string) *Detector { + return &Detector{modelPath: modelPath, modelTags: []string{"serve"}} +} + +// LabelsFromFile returns matching labels for a jpeg media file. +func (t *Detector) LabelsFromFile(filename string) (result Labels, err error) { + imageBuffer, err := ioutil.ReadFile(filename) + + if err != nil { + return result, err + } + + return t.Labels(imageBuffer) +} + +// Labels returns matching labels for a jpeg media string. +func (t *Detector) Labels(img []byte) (result Labels, err error) { + if err := t.loadModel(); err != nil { + return result, err + } + + // Make tensor + tensor, err := makeTensorFromImage(img, "jpeg") + + if err != nil { + log.Error(err) + return result, errors.New("invalid image") + } + + // Run inference + output, err := t.model.Session.Run( + map[tf.Output]*tf.Tensor{ + t.model.Graph.Operation("input_tensor").Output(0): tensor, + }, + []tf.Output{ + t.model.Graph.Operation("nsfw_cls_model/final_prediction").Output(0), + }, + nil) + + if err != nil { + log.Error(err) + return result, errors.New("could not run inference") + } + + if len(output) < 1 { + return result, errors.New("result is empty") + } + + log.Infof("output: %+v", output[0].Value()) + + // Return best labels + result = t.getLabels(output[0].Value().([][]float32)[0]) + + log.Debugf("tensorflow: image classified as %+v", result) + + return result, nil +} + +func (t *Detector) loadLabels(path string) error { + modelLabels := path + "/labels.txt" + + log.Infof("tensorflow: loading classification labels from labels.txt") + + // Load labels + f, err := os.Open(modelLabels) + + if err != nil { + return err + } + + defer f.Close() + + scanner := bufio.NewScanner(f) + + // Labels are separated by newlines + for scanner.Scan() { + t.labels = append(t.labels, scanner.Text()) + } + + if err := scanner.Err(); err != nil { + return err + } + + return nil +} + +func (t *Detector) loadModel() error { + if t.model != nil { + // Already loaded + return nil + } + + log.Infof("tensorflow: loading image classification model from \"%s\"", filepath.Base(t.modelPath)) + + // Load model + model, err := tf.LoadSavedModel(t.modelPath, t.modelTags, nil) + + if err != nil { + return err + } + + t.model = model + + return t.loadLabels(t.modelPath) +} + +func (t *Detector) getLabels(p []float32) Labels { + return Labels{ + Drawing: p[0], + Hentai: p[1], + Neutral: p[2], + Porn: p[3], + Sexy: p[4], + } +} + +func makeTransformImageGraph(imageFormat string) (graph *tf.Graph, input, output tf.Output, err error) { + const ( + H, W = 224, 224 + Mean = float32(117) + Scale = float32(1) + ) + s := op.NewScope() + input = op.Placeholder(s, tf.String) + // Decode PNG or JPEG + var decode tf.Output + if imageFormat == "png" { + decode = op.DecodePng(s, input, op.DecodePngChannels(3)) + } else { + decode = op.DecodeJpeg(s, input, op.DecodeJpegChannels(3)) + } + // Div and Sub perform (value-Mean)/Scale for each pixel + output = op.Div(s, + op.Sub(s, + // Resize to 224x224 with bilinear interpolation + op.ResizeBilinear(s, + // Create a batch containing a single image + op.ExpandDims(s, + // Use decoded pixel values + op.Cast(s, decode, tf.Float), + op.Const(s.SubScope("make_batch"), int32(0))), + op.Const(s.SubScope("size"), []int32{H, W})), + op.Const(s.SubScope("mean"), Mean)), + op.Const(s.SubScope("scale"), Scale)) + graph, err = s.Finalize() + return graph, input, output, err +} + +func makeTensorFromImage(image []byte, imageFormat string) (*tf.Tensor, error) { + tensor, err := tf.NewTensor(string(image)) + if err != nil { + return nil, err + } + graph, input, output, err := makeTransformImageGraph(imageFormat) + if err != nil { + return nil, err + } + session, err := tf.NewSession(graph, nil) + if err != nil { + return nil, err + } + defer session.Close() + normalized, err := session.Run( + map[tf.Output]*tf.Tensor{input: tensor}, + []tf.Output{output}, + nil) + if err != nil { + return nil, err + } + return normalized[0], nil +} diff --git a/internal/nsfw/nsfw.go b/internal/nsfw/nsfw.go new file mode 100644 index 000000000..300b1cec9 --- /dev/null +++ b/internal/nsfw/nsfw.go @@ -0,0 +1,47 @@ +/* +This package detects porn images. + +Additional information can be found in our Developer Guide: + +https://github.com/photoprism/photoprism/wiki/Storage +*/ + +package nsfw + +import ( + "github.com/photoprism/photoprism/internal/event" +) + +var log = event.Log + +type Labels struct { + Drawing float32 + Hentai float32 + Neutral float32 + Porn float32 + Sexy float32 +} + +func (l *Labels) IsSafe() bool { + return !l.NSFW() +} + +func (l *Labels) NSFW() bool { + if l.Neutral > 0.25 && l.Porn < 0.75 { + return false + } + if l.Porn > 0.4 { + return true + } + if l.Sexy > 0.5 { + return true + } + if l.Hentai > 0.75 { + return true + } + if l.Drawing > 0.9 { + return true + } + + return false +} diff --git a/internal/nsfw/nsfw_test.go b/internal/nsfw/nsfw_test.go new file mode 100644 index 000000000..f4e9a50fe --- /dev/null +++ b/internal/nsfw/nsfw_test.go @@ -0,0 +1,101 @@ +package nsfw + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +var modelPath, _ = filepath.Abs("../../assets/resources/nsfw") + +var detector = NewDetector(modelPath) + +func TestNSFW(t *testing.T) { + detect := func(filename string) Labels { + result, err := detector.LabelsFromFile(filename) + + if err != nil { + t.Fatalf(err.Error()) + } + + assert.NotNil(t, result) + assert.IsType(t, Labels{}, result) + + return result + } + + expected := map[string]Labels{ + "beach_sand.jpg": {0, 0, 0.9, 0, 0}, + "beach_wood.jpg": {0, 0, 0.36, 0.59, 0}, + "cat_brown.jpg": {0, 0, 0.93, 0, 0}, + "cat_yellow_grey.jpg": {0, 0, 0, 0, 0.01}, + "clock_purple.jpg": {0.19, 0, 0.80, 0, 0}, + "clowns_colorful.jpg": {0.06, 0.02, 0.89, 0.01, 0}, + "dog.jpg": {0.86, 0, 0.12, 0, 0}, + "hentai_1.jpg": {0.15, 0.84, 0, 0, 0}, + "hentai_2.jpg": {0, 0.98, 0, 0, 0}, + "hentai_3.jpg": {0, 0.99, 0, 0, 0}, + "hentai_4.jpg": {0, 0.94, 0, 0.05, 0}, + "hentai_5.jpg": {0, 0.85, 0, 0.07, 0}, + "jellyfish_blue.jpg": {0.29, 0.09, 0.57, 0, 0}, + "limes.jpg": {0, 0.21, 0.78, 0, 0}, + "ocean_cyan.jpg": {0, 0, 0.95, 0.03, 0}, + "peacock_blue.jpg": {0.05, 0.05, 0.49, 0.37, 0}, + "porn_1.jpg": {0, 0, 0, 0.97, 0}, + "porn_2.jpg": {0, 0, 0.12, 0.77, 0}, + "porn_3.jpg": {0, 0, 0, 0.55, 0.41}, + "porn_4.jpg": {0, 0, 0, 0.99, 0}, + "porn_5.jpg": {0, 0, 0.11, 0.41, 0.43}, + "porn_6.jpg": {0, 0.1, 0.04, 0.22, 0.60}, + "porn_7.jpg": {0, 0.25, 0, 0.66, 0}, + "porn_8.jpg": {0, 0.12, 0, 0.86, 0.01}, + "porn_9.jpg": {0.95, 0.02, 0, 0.01, 0}, + "porn_10.jpg": {0, 0.05, 0, 0.79, 0.13}, + "porn_11.jpg": {0, 0, 0.09, 0.36, 0.53}, + "sexy_1.jpg": {0.02, 0.49, 0.01, 0, 0.46}, + "sharks_blue.jpg": {0.22, 0.007, 0.75, 0, 0}, + "zebra_green_brown.jpg": {0.24, 0.01, 0.73, 0.004, 0.001}, + } + + err := filepath.Walk("testdata", func(filename string, fileInfo os.FileInfo, err error) error { + if err != nil { + return nil + } + + if fileInfo.IsDir() || strings.HasPrefix(filepath.Base(filename), ".") { + return nil + } + + t.Run(filename, func(t *testing.T) { + l := detect(filename) + + basename := filepath.Base(filename) + + t.Logf("labels: %+v", l) + + if e, ok := expected[basename]; ok { + t.Logf("expected: %+v", e) + + assert.GreaterOrEqual(t, l.Drawing, e.Drawing) + assert.GreaterOrEqual(t, l.Hentai, e.Hentai) + assert.GreaterOrEqual(t, l.Neutral, e.Neutral) + assert.GreaterOrEqual(t, l.Porn, e.Porn) + assert.GreaterOrEqual(t, l.Sexy, e.Sexy) + } + + isNSFW := strings.Contains(basename, "porn") || strings.Contains(basename, "hentai") + + assert.Equal(t, isNSFW, l.NSFW()) + assert.Equal(t, !isNSFW, l.IsSafe()) + }) + + return nil + }) + + if err != nil { + t.Log(err.Error()) + } +} diff --git a/internal/nsfw/testdata/beach_sand.jpg b/internal/nsfw/testdata/beach_sand.jpg new file mode 100644 index 000000000..ec577fa01 Binary files /dev/null and b/internal/nsfw/testdata/beach_sand.jpg differ diff --git a/internal/nsfw/testdata/beach_wood.jpg b/internal/nsfw/testdata/beach_wood.jpg new file mode 100644 index 000000000..cd3caf987 Binary files /dev/null and b/internal/nsfw/testdata/beach_wood.jpg differ diff --git a/internal/nsfw/testdata/cat_brown.jpg b/internal/nsfw/testdata/cat_brown.jpg new file mode 100644 index 000000000..564abe4af Binary files /dev/null and b/internal/nsfw/testdata/cat_brown.jpg differ diff --git a/internal/nsfw/testdata/cat_yellow_grey.jpg b/internal/nsfw/testdata/cat_yellow_grey.jpg new file mode 100644 index 000000000..afa057721 Binary files /dev/null and b/internal/nsfw/testdata/cat_yellow_grey.jpg differ diff --git a/internal/nsfw/testdata/clock_purple.jpg b/internal/nsfw/testdata/clock_purple.jpg new file mode 100644 index 000000000..bcf13b8f5 Binary files /dev/null and b/internal/nsfw/testdata/clock_purple.jpg differ diff --git a/internal/nsfw/testdata/clowns_colorful.jpg b/internal/nsfw/testdata/clowns_colorful.jpg new file mode 100644 index 000000000..355051b1f Binary files /dev/null and b/internal/nsfw/testdata/clowns_colorful.jpg differ diff --git a/internal/nsfw/testdata/dog.jpg b/internal/nsfw/testdata/dog.jpg new file mode 100644 index 000000000..5b15d28a7 Binary files /dev/null and b/internal/nsfw/testdata/dog.jpg differ diff --git a/internal/nsfw/testdata/hentai_2.jpg b/internal/nsfw/testdata/hentai_2.jpg new file mode 100644 index 000000000..9e7f48a7b Binary files /dev/null and b/internal/nsfw/testdata/hentai_2.jpg differ diff --git a/internal/nsfw/testdata/jellyfish_blue.jpg b/internal/nsfw/testdata/jellyfish_blue.jpg new file mode 100644 index 000000000..9030b0194 Binary files /dev/null and b/internal/nsfw/testdata/jellyfish_blue.jpg differ diff --git a/internal/nsfw/testdata/limes.jpg b/internal/nsfw/testdata/limes.jpg new file mode 100644 index 000000000..84d09de77 Binary files /dev/null and b/internal/nsfw/testdata/limes.jpg differ diff --git a/internal/nsfw/testdata/ocean_cyan.jpg b/internal/nsfw/testdata/ocean_cyan.jpg new file mode 100644 index 000000000..9c345e372 Binary files /dev/null and b/internal/nsfw/testdata/ocean_cyan.jpg differ diff --git a/internal/nsfw/testdata/peacock_blue.jpg b/internal/nsfw/testdata/peacock_blue.jpg new file mode 100644 index 000000000..e1ec0f052 Binary files /dev/null and b/internal/nsfw/testdata/peacock_blue.jpg differ diff --git a/internal/nsfw/testdata/sexy_1.jpg b/internal/nsfw/testdata/sexy_1.jpg new file mode 100755 index 000000000..dafd86956 Binary files /dev/null and b/internal/nsfw/testdata/sexy_1.jpg differ diff --git a/internal/nsfw/testdata/sharks_blue.jpg b/internal/nsfw/testdata/sharks_blue.jpg new file mode 100644 index 000000000..fcb04a4a8 Binary files /dev/null and b/internal/nsfw/testdata/sharks_blue.jpg differ diff --git a/internal/nsfw/testdata/zebra_green_brown.jpg b/internal/nsfw/testdata/zebra_green_brown.jpg new file mode 100644 index 000000000..db3be1a0f Binary files /dev/null and b/internal/nsfw/testdata/zebra_green_brown.jpg differ diff --git a/scripts/download-nasnet.sh b/scripts/download-nasnet.sh index 94d05cbb2..8c3209ece 100755 --- a/scripts/download-nasnet.sh +++ b/scripts/download-nasnet.sh @@ -10,7 +10,7 @@ MODEL_HASH="f18b801354e95cade497b4f12e8d2537d04c04f6 $MODEL_ZIP" MODEL_VERSION="$MODEL_PATH/version.txt" MODEL_BACKUP="assets/backups/nasnet-$TODAY" -echo "Installing $MODEL_NAME for TensorFlow..." +echo "Installing $MODEL_NAME model for TensorFlow..." # Create directories mkdir -p /tmp/photoprism diff --git a/scripts/download-nsfw.sh b/scripts/download-nsfw.sh new file mode 100755 index 000000000..0b27f9977 --- /dev/null +++ b/scripts/download-nsfw.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +TODAY=`date -u +%Y%m%d` + +MODEL_NAME="NSFW" +MODEL_URL="https://dl.photoprism.org/tensorflow/nsfw.zip?$TODAY" +MODEL_PATH="assets/resources/nsfw" +MODEL_ZIP="/tmp/photoprism/nsfw.zip" +MODEL_HASH="2e03ad3c6aec27c270c650d0574ff2a6291d992b $MODEL_ZIP" +MODEL_VERSION="$MODEL_PATH/version.txt" +MODEL_BACKUP="assets/backups/nsfw-$TODAY" + +echo "Installing $MODEL_NAME model for TensorFlow..." + +# Create directories +mkdir -p /tmp/photoprism +mkdir -p assets/backups + +# Check for update +if [[ -f ${MODEL_ZIP} ]] && [[ `sha1sum ${MODEL_ZIP}` == ${MODEL_HASH} ]]; then + if [[ -f ${MODEL_VERSION} ]]; then + echo "Already up to date." + exit + fi +else + # Download model + echo "Downloading latest model from $MODEL_URL..." + wget ${MODEL_URL} -O ${MODEL_ZIP} + + TMP_HASH=`sha1sum ${MODEL_ZIP}` + + echo ${TMP_HASH} +fi + +# Create backup +if [[ -e ${MODEL_PATH} ]]; then + echo "Creating backup of existing directory: $MODEL_BACKUP" + rm -rf ${MODEL_BACKUP} + mv ${MODEL_PATH} ${MODEL_BACKUP} +fi + +# Unzip model +unzip ${MODEL_ZIP} -d assets/resources +echo "$MODEL_NAME $TODAY $MODEL_HASH" > ${MODEL_VERSION} + +echo "Latest $MODEL_NAME installed."