Backend: Add NSFW detector

Signed-off-by: Michael Mayer <michael@liquidbytes.net>
This commit is contained in:
Michael Mayer 2019-12-14 15:41:37 +01:00
parent 9987d65933
commit 78eae2f14e
27 changed files with 407 additions and 19 deletions

View file

@ -5,6 +5,7 @@
/assets/resources/database/*
/assets/resources/static/build/*
/assets/resources/nasnet
/assets/resources/nsfw
/assets/testdata
/assets/backups
Dockerfile

1
.gitignore vendored
View file

@ -14,6 +14,7 @@
/assets/testdata
/assets/backups
/assets/resources/nasnet
/assets/resources/nsfw
*.log
# Binaries for programs and plugins

View file

@ -1,4 +1,4 @@
FROM photoprism/development:20191105
FROM photoprism/development:20191214
# Set up project directory
WORKDIR "/go/src/github.com/photoprism/photoprism"

View file

@ -40,7 +40,7 @@ install-assets:
mkdir -p ~/Pictures/Import
mkdir -p ~/Pictures/Export
mkdir -p ~/.local/share/photoprism/resources/database
cp -r assets/resources/static assets/resources/templates assets/resources/nasnet ~/.local/share/photoprism/resources
cp -r assets/resources/static assets/resources/templates assets/resources/nasnet assets/resources/nsfw ~/.local/share/photoprism/resources
rsync -a -v --ignore-existing assets/config/*.yml ~/.config/photoprism
find ~/.local/share/photoprism -name '.*' -type f -delete
dep-js:
@ -49,8 +49,11 @@ dep-go:
go build -v ./...
dep-tensorflow:
scripts/download-nasnet.sh
scripts/download-nsfw.sh
zip-nasnet:
(cd assets/resources && zip -r nasnet.zip nasnet -x "*/.*" -x "*/version.txt")
zip-nsfw:
(cd assets/resources && zip -r nsfw.zip nsfw -x "*/.*" -x "*/version.txt")
build-js:
(cd frontend && env NODE_ENV=production npm run build)
build-go:

View file

@ -82,12 +82,12 @@ RUN npm install --unsafe-perm=true --allow-root -g npm testcafe chromedriver
RUN npm config set cache ~/.cache/npm
# Install Go
ENV GOLANG_VERSION 1.13.4
ENV GOLANG_VERSION 1.13.5
RUN set -eux; \
\
url="https://golang.org/dl/go${GOLANG_VERSION}.linux-amd64.tar.gz"; \
wget -O go.tgz "$url"; \
echo "692d17071736f74be04a72a06dab9cac1cd759377bd85316e52b2227604c004c *go.tgz" | sha256sum -c -; \
echo "512103d7ad296467814a6e3f635631bd35574cab3369a97a323c9a585ccaa569 *go.tgz" | sha256sum -c -; \
tar -C /usr/local -xzf go.tgz; \
rm go.tgz; \
export PATH="/usr/local/go/bin:$PATH"; \
@ -102,6 +102,7 @@ RUN mkdir -p "$GOPATH/src" "$GOPATH/bin" && chmod -R 777 "$GOPATH"
# Download TensorFlow model and test files
RUN rm -rf /tmp/* && mkdir -p /tmp/photoprism
RUN wget "https://dl.photoprism.org/tensorflow/nsfw.zip?${BUILD_TAG}" -O /tmp/photoprism/nsfw.zip
RUN wget "https://dl.photoprism.org/tensorflow/nasnet.zip?${BUILD_TAG}" -O /tmp/photoprism/nasnet.zip
RUN wget "https://dl.photoprism.org/fixtures/testdata.zip?${BUILD_TAG}" -O /tmp/photoprism/testdata.zip

View file

@ -1,4 +1,4 @@
FROM photoprism/development:20191105 as build
FROM photoprism/development:20191214 as build
# Set up project directory
WORKDIR "/go/src/github.com/photoprism/photoprism"

View file

@ -11925,9 +11925,9 @@
}
},
"terser": {
"version": "4.4.0",
"resolved": "https://registry.npmjs.org/terser/-/terser-4.4.0.tgz",
"integrity": "sha512-oDG16n2WKm27JO8h4y/w3iqBGAOSCtq7k8dRmrn4Wf9NouL0b2WpMHGChFGZq4nFAQy1FsNJrVQHfurXOSTmOA==",
"version": "4.4.2",
"resolved": "https://registry.npmjs.org/terser/-/terser-4.4.2.tgz",
"integrity": "sha512-Uufrsvhj9O1ikwgITGsZ5EZS6qPokUOkCegS7fYOdGTv+OA90vndUbU6PEjr5ePqHfNUbGyMO7xyIZv2MhsALQ==",
"requires": {
"commander": "^2.20.0",
"source-map": "~0.6.1",
@ -11942,26 +11942,21 @@
}
},
"terser-webpack-plugin": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-1.4.1.tgz",
"integrity": "sha512-ZXmmfiwtCLfz8WKZyYUuuHf3dMYEjg8NrjHMb0JqHVHVOSkzp3cW2/XG1fP3tRhqEqSzMwzzRQGtAPbs4Cncxg==",
"version": "1.4.3",
"resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-1.4.3.tgz",
"integrity": "sha512-QMxecFz/gHQwteWwSo5nTc6UaICqN1bMedC5sMtUc7y3Ha3Q8y6ZO0iCR8pq4RJC8Hjf0FEPEHZqcMB/+DFCrA==",
"requires": {
"cacache": "^12.0.2",
"find-cache-dir": "^2.1.0",
"is-wsl": "^1.1.0",
"schema-utils": "^1.0.0",
"serialize-javascript": "^1.7.0",
"serialize-javascript": "^2.1.2",
"source-map": "^0.6.1",
"terser": "^4.1.2",
"webpack-sources": "^1.4.0",
"worker-farm": "^1.7.0"
},
"dependencies": {
"serialize-javascript": {
"version": "1.9.1",
"resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-1.9.1.tgz",
"integrity": "sha512-0Vb/54WJ6k5v8sSWN09S0ora+Hnr+cX40r9F170nT+mSkaxltoE/7R3OrIdBSUv1OoiobH1QoWQbCnAO+e8J1A=="
},
"source-map": {
"version": "0.6.1",
"resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",

193
internal/nsfw/detector.go Normal file
View file

@ -0,0 +1,193 @@
package nsfw
import (
"bufio"
"errors"
"io/ioutil"
"os"
"path/filepath"
tf "github.com/tensorflow/tensorflow/tensorflow/go"
"github.com/tensorflow/tensorflow/tensorflow/go/op"
)
// Detector uses TensorFlow to label drawing, hentai, neutral, porn and sexy images.
type Detector struct {
model *tf.SavedModel
modelPath string
modelTags []string
labels []string
}
// NewDetector returns a new detector instance.
func NewDetector(modelPath string) *Detector {
return &Detector{modelPath: modelPath, modelTags: []string{"serve"}}
}
// LabelsFromFile returns matching labels for a jpeg media file.
func (t *Detector) LabelsFromFile(filename string) (result Labels, err error) {
imageBuffer, err := ioutil.ReadFile(filename)
if err != nil {
return result, err
}
return t.Labels(imageBuffer)
}
// Labels returns matching labels for a jpeg media string.
func (t *Detector) Labels(img []byte) (result Labels, err error) {
if err := t.loadModel(); err != nil {
return result, err
}
// Make tensor
tensor, err := makeTensorFromImage(img, "jpeg")
if err != nil {
log.Error(err)
return result, errors.New("invalid image")
}
// Run inference
output, err := t.model.Session.Run(
map[tf.Output]*tf.Tensor{
t.model.Graph.Operation("input_tensor").Output(0): tensor,
},
[]tf.Output{
t.model.Graph.Operation("nsfw_cls_model/final_prediction").Output(0),
},
nil)
if err != nil {
log.Error(err)
return result, errors.New("could not run inference")
}
if len(output) < 1 {
return result, errors.New("result is empty")
}
log.Infof("output: %+v", output[0].Value())
// Return best labels
result = t.getLabels(output[0].Value().([][]float32)[0])
log.Debugf("tensorflow: image classified as %+v", result)
return result, nil
}
func (t *Detector) loadLabels(path string) error {
modelLabels := path + "/labels.txt"
log.Infof("tensorflow: loading classification labels from labels.txt")
// Load labels
f, err := os.Open(modelLabels)
if err != nil {
return err
}
defer f.Close()
scanner := bufio.NewScanner(f)
// Labels are separated by newlines
for scanner.Scan() {
t.labels = append(t.labels, scanner.Text())
}
if err := scanner.Err(); err != nil {
return err
}
return nil
}
func (t *Detector) loadModel() error {
if t.model != nil {
// Already loaded
return nil
}
log.Infof("tensorflow: loading image classification model from \"%s\"", filepath.Base(t.modelPath))
// Load model
model, err := tf.LoadSavedModel(t.modelPath, t.modelTags, nil)
if err != nil {
return err
}
t.model = model
return t.loadLabels(t.modelPath)
}
func (t *Detector) getLabels(p []float32) Labels {
return Labels{
Drawing: p[0],
Hentai: p[1],
Neutral: p[2],
Porn: p[3],
Sexy: p[4],
}
}
func makeTransformImageGraph(imageFormat string) (graph *tf.Graph, input, output tf.Output, err error) {
const (
H, W = 224, 224
Mean = float32(117)
Scale = float32(1)
)
s := op.NewScope()
input = op.Placeholder(s, tf.String)
// Decode PNG or JPEG
var decode tf.Output
if imageFormat == "png" {
decode = op.DecodePng(s, input, op.DecodePngChannels(3))
} else {
decode = op.DecodeJpeg(s, input, op.DecodeJpegChannels(3))
}
// Div and Sub perform (value-Mean)/Scale for each pixel
output = op.Div(s,
op.Sub(s,
// Resize to 224x224 with bilinear interpolation
op.ResizeBilinear(s,
// Create a batch containing a single image
op.ExpandDims(s,
// Use decoded pixel values
op.Cast(s, decode, tf.Float),
op.Const(s.SubScope("make_batch"), int32(0))),
op.Const(s.SubScope("size"), []int32{H, W})),
op.Const(s.SubScope("mean"), Mean)),
op.Const(s.SubScope("scale"), Scale))
graph, err = s.Finalize()
return graph, input, output, err
}
func makeTensorFromImage(image []byte, imageFormat string) (*tf.Tensor, error) {
tensor, err := tf.NewTensor(string(image))
if err != nil {
return nil, err
}
graph, input, output, err := makeTransformImageGraph(imageFormat)
if err != nil {
return nil, err
}
session, err := tf.NewSession(graph, nil)
if err != nil {
return nil, err
}
defer session.Close()
normalized, err := session.Run(
map[tf.Output]*tf.Tensor{input: tensor},
[]tf.Output{output},
nil)
if err != nil {
return nil, err
}
return normalized[0], nil
}

47
internal/nsfw/nsfw.go Normal file
View file

@ -0,0 +1,47 @@
/*
This package detects porn images.
Additional information can be found in our Developer Guide:
https://github.com/photoprism/photoprism/wiki/Storage
*/
package nsfw
import (
"github.com/photoprism/photoprism/internal/event"
)
var log = event.Log
type Labels struct {
Drawing float32
Hentai float32
Neutral float32
Porn float32
Sexy float32
}
func (l *Labels) IsSafe() bool {
return !l.NSFW()
}
func (l *Labels) NSFW() bool {
if l.Neutral > 0.25 && l.Porn < 0.75 {
return false
}
if l.Porn > 0.4 {
return true
}
if l.Sexy > 0.5 {
return true
}
if l.Hentai > 0.75 {
return true
}
if l.Drawing > 0.9 {
return true
}
return false
}

101
internal/nsfw/nsfw_test.go Normal file
View file

@ -0,0 +1,101 @@
package nsfw
import (
"os"
"path/filepath"
"strings"
"testing"
"github.com/stretchr/testify/assert"
)
var modelPath, _ = filepath.Abs("../../assets/resources/nsfw")
var detector = NewDetector(modelPath)
func TestNSFW(t *testing.T) {
detect := func(filename string) Labels {
result, err := detector.LabelsFromFile(filename)
if err != nil {
t.Fatalf(err.Error())
}
assert.NotNil(t, result)
assert.IsType(t, Labels{}, result)
return result
}
expected := map[string]Labels{
"beach_sand.jpg": {0, 0, 0.9, 0, 0},
"beach_wood.jpg": {0, 0, 0.36, 0.59, 0},
"cat_brown.jpg": {0, 0, 0.93, 0, 0},
"cat_yellow_grey.jpg": {0, 0, 0, 0, 0.01},
"clock_purple.jpg": {0.19, 0, 0.80, 0, 0},
"clowns_colorful.jpg": {0.06, 0.02, 0.89, 0.01, 0},
"dog.jpg": {0.86, 0, 0.12, 0, 0},
"hentai_1.jpg": {0.15, 0.84, 0, 0, 0},
"hentai_2.jpg": {0, 0.98, 0, 0, 0},
"hentai_3.jpg": {0, 0.99, 0, 0, 0},
"hentai_4.jpg": {0, 0.94, 0, 0.05, 0},
"hentai_5.jpg": {0, 0.85, 0, 0.07, 0},
"jellyfish_blue.jpg": {0.29, 0.09, 0.57, 0, 0},
"limes.jpg": {0, 0.21, 0.78, 0, 0},
"ocean_cyan.jpg": {0, 0, 0.95, 0.03, 0},
"peacock_blue.jpg": {0.05, 0.05, 0.49, 0.37, 0},
"porn_1.jpg": {0, 0, 0, 0.97, 0},
"porn_2.jpg": {0, 0, 0.12, 0.77, 0},
"porn_3.jpg": {0, 0, 0, 0.55, 0.41},
"porn_4.jpg": {0, 0, 0, 0.99, 0},
"porn_5.jpg": {0, 0, 0.11, 0.41, 0.43},
"porn_6.jpg": {0, 0.1, 0.04, 0.22, 0.60},
"porn_7.jpg": {0, 0.25, 0, 0.66, 0},
"porn_8.jpg": {0, 0.12, 0, 0.86, 0.01},
"porn_9.jpg": {0.95, 0.02, 0, 0.01, 0},
"porn_10.jpg": {0, 0.05, 0, 0.79, 0.13},
"porn_11.jpg": {0, 0, 0.09, 0.36, 0.53},
"sexy_1.jpg": {0.02, 0.49, 0.01, 0, 0.46},
"sharks_blue.jpg": {0.22, 0.007, 0.75, 0, 0},
"zebra_green_brown.jpg": {0.24, 0.01, 0.73, 0.004, 0.001},
}
err := filepath.Walk("testdata", func(filename string, fileInfo os.FileInfo, err error) error {
if err != nil {
return nil
}
if fileInfo.IsDir() || strings.HasPrefix(filepath.Base(filename), ".") {
return nil
}
t.Run(filename, func(t *testing.T) {
l := detect(filename)
basename := filepath.Base(filename)
t.Logf("labels: %+v", l)
if e, ok := expected[basename]; ok {
t.Logf("expected: %+v", e)
assert.GreaterOrEqual(t, l.Drawing, e.Drawing)
assert.GreaterOrEqual(t, l.Hentai, e.Hentai)
assert.GreaterOrEqual(t, l.Neutral, e.Neutral)
assert.GreaterOrEqual(t, l.Porn, e.Porn)
assert.GreaterOrEqual(t, l.Sexy, e.Sexy)
}
isNSFW := strings.Contains(basename, "porn") || strings.Contains(basename, "hentai")
assert.Equal(t, isNSFW, l.NSFW())
assert.Equal(t, !isNSFW, l.IsSafe())
})
return nil
})
if err != nil {
t.Log(err.Error())
}
}

BIN
internal/nsfw/testdata/beach_sand.jpg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 109 KiB

BIN
internal/nsfw/testdata/beach_wood.jpg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

BIN
internal/nsfw/testdata/cat_brown.jpg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 87 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 69 KiB

BIN
internal/nsfw/testdata/clock_purple.jpg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 96 KiB

BIN
internal/nsfw/testdata/dog.jpg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 350 KiB

BIN
internal/nsfw/testdata/hentai_2.jpg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
internal/nsfw/testdata/limes.jpg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 62 KiB

BIN
internal/nsfw/testdata/ocean_cyan.jpg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

BIN
internal/nsfw/testdata/peacock_blue.jpg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

BIN
internal/nsfw/testdata/sexy_1.jpg vendored Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 178 KiB

BIN
internal/nsfw/testdata/sharks_blue.jpg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB

View file

@ -10,7 +10,7 @@ MODEL_HASH="f18b801354e95cade497b4f12e8d2537d04c04f6 $MODEL_ZIP"
MODEL_VERSION="$MODEL_PATH/version.txt"
MODEL_BACKUP="assets/backups/nasnet-$TODAY"
echo "Installing $MODEL_NAME for TensorFlow..."
echo "Installing $MODEL_NAME model for TensorFlow..."
# Create directories
mkdir -p /tmp/photoprism

46
scripts/download-nsfw.sh Executable file
View file

@ -0,0 +1,46 @@
#!/usr/bin/env bash
TODAY=`date -u +%Y%m%d`
MODEL_NAME="NSFW"
MODEL_URL="https://dl.photoprism.org/tensorflow/nsfw.zip?$TODAY"
MODEL_PATH="assets/resources/nsfw"
MODEL_ZIP="/tmp/photoprism/nsfw.zip"
MODEL_HASH="2e03ad3c6aec27c270c650d0574ff2a6291d992b $MODEL_ZIP"
MODEL_VERSION="$MODEL_PATH/version.txt"
MODEL_BACKUP="assets/backups/nsfw-$TODAY"
echo "Installing $MODEL_NAME model for TensorFlow..."
# Create directories
mkdir -p /tmp/photoprism
mkdir -p assets/backups
# Check for update
if [[ -f ${MODEL_ZIP} ]] && [[ `sha1sum ${MODEL_ZIP}` == ${MODEL_HASH} ]]; then
if [[ -f ${MODEL_VERSION} ]]; then
echo "Already up to date."
exit
fi
else
# Download model
echo "Downloading latest model from $MODEL_URL..."
wget ${MODEL_URL} -O ${MODEL_ZIP}
TMP_HASH=`sha1sum ${MODEL_ZIP}`
echo ${TMP_HASH}
fi
# Create backup
if [[ -e ${MODEL_PATH} ]]; then
echo "Creating backup of existing directory: $MODEL_BACKUP"
rm -rf ${MODEL_BACKUP}
mv ${MODEL_PATH} ${MODEL_BACKUP}
fi
# Unzip model
unzip ${MODEL_ZIP} -d assets/resources
echo "$MODEL_NAME $TODAY $MODEL_HASH" > ${MODEL_VERSION}
echo "Latest $MODEL_NAME installed."