2020-01-09 01:21:09 +01:00
|
|
|
package classify
|
2018-09-14 12:44:15 +02:00
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
2019-04-30 13:17:01 +02:00
|
|
|
"bytes"
|
2020-07-14 18:00:32 +02:00
|
|
|
"fmt"
|
2019-04-30 13:17:01 +02:00
|
|
|
"image"
|
|
|
|
"math"
|
2018-09-14 12:44:15 +02:00
|
|
|
"os"
|
2019-12-13 16:25:47 +01:00
|
|
|
"path"
|
2019-12-11 04:12:54 +01:00
|
|
|
"path/filepath"
|
2021-05-06 12:45:38 +02:00
|
|
|
"runtime/debug"
|
2018-09-14 12:44:15 +02:00
|
|
|
"sort"
|
2019-05-16 08:41:16 +02:00
|
|
|
"strings"
|
2018-10-31 07:14:33 +01:00
|
|
|
|
2019-04-30 13:17:01 +02:00
|
|
|
"github.com/disintegration/imaging"
|
2020-05-03 18:00:50 +02:00
|
|
|
"github.com/photoprism/photoprism/pkg/txt"
|
2018-10-31 07:14:33 +01:00
|
|
|
tf "github.com/tensorflow/tensorflow/tensorflow/go"
|
2018-09-14 12:44:15 +02:00
|
|
|
)
|
|
|
|
|
2020-02-21 01:14:45 +01:00
|
|
|
// TensorFlow is a wrapper for tensorflow low-level API.
|
2018-09-14 12:44:15 +02:00
|
|
|
type TensorFlow struct {
|
2019-05-16 08:41:16 +02:00
|
|
|
model *tf.SavedModel
|
2020-01-09 01:21:09 +01:00
|
|
|
modelsPath string
|
|
|
|
disabled bool
|
2019-12-13 16:25:47 +01:00
|
|
|
modelName string
|
|
|
|
modelTags []string
|
2019-05-16 08:41:16 +02:00
|
|
|
labels []string
|
2018-09-14 12:44:15 +02:00
|
|
|
}
|
|
|
|
|
2020-01-09 01:21:09 +01:00
|
|
|
// New returns new TensorFlow instance with Nasnet model.
|
|
|
|
func New(modelsPath string, disabled bool) *TensorFlow {
|
|
|
|
return &TensorFlow{modelsPath: modelsPath, disabled: disabled, modelName: "nasnet", modelTags: []string{"photoprism"}}
|
2018-09-14 12:44:15 +02:00
|
|
|
}
|
|
|
|
|
2020-02-21 01:14:45 +01:00
|
|
|
// Init initialises tensorflow models if not disabled
|
2020-01-02 02:58:26 +01:00
|
|
|
func (t *TensorFlow) Init() (err error) {
|
2020-01-09 01:21:09 +01:00
|
|
|
if t.disabled {
|
2020-01-06 06:59:35 +01:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-01-09 01:21:09 +01:00
|
|
|
return t.loadModel()
|
2020-01-02 02:58:26 +01:00
|
|
|
}
|
|
|
|
|
2020-01-09 01:21:09 +01:00
|
|
|
// File returns matching labels for a jpeg media file.
|
|
|
|
func (t *TensorFlow) File(filename string) (result Labels, err error) {
|
|
|
|
if t.disabled {
|
2020-01-06 06:59:35 +01:00
|
|
|
return result, nil
|
|
|
|
}
|
|
|
|
|
2021-10-06 07:10:50 +02:00
|
|
|
imageBuffer, err := os.ReadFile(filename)
|
2018-09-14 12:44:15 +02:00
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2019-06-04 18:26:35 +02:00
|
|
|
return t.Labels(imageBuffer)
|
2018-09-14 12:44:15 +02:00
|
|
|
}
|
|
|
|
|
2019-06-05 18:25:20 +02:00
|
|
|
// Labels returns matching labels for a jpeg media string.
|
2019-06-04 18:26:35 +02:00
|
|
|
func (t *TensorFlow) Labels(img []byte) (result Labels, err error) {
|
2021-05-06 12:45:38 +02:00
|
|
|
defer func() {
|
|
|
|
if r := recover(); r != nil {
|
2021-05-06 12:50:53 +02:00
|
|
|
err = fmt.Errorf("classify: %s (inference panic)\nstack: %s", r, debug.Stack())
|
2021-05-06 12:45:38 +02:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2020-01-09 01:21:09 +01:00
|
|
|
if t.disabled {
|
2020-01-06 06:59:35 +01:00
|
|
|
return result, nil
|
|
|
|
}
|
|
|
|
|
2018-09-14 12:44:15 +02:00
|
|
|
if err := t.loadModel(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2020-07-14 18:00:32 +02:00
|
|
|
// Create tensor from image.
|
|
|
|
tensor, err := t.createTensor(img, "jpeg")
|
2018-09-14 12:44:15 +02:00
|
|
|
|
|
|
|
if err != nil {
|
2021-05-06 12:50:53 +02:00
|
|
|
return nil, err
|
2018-09-14 12:44:15 +02:00
|
|
|
}
|
|
|
|
|
2020-07-14 18:00:32 +02:00
|
|
|
// Run inference.
|
2019-04-30 13:17:01 +02:00
|
|
|
output, err := t.model.Session.Run(
|
2018-09-14 12:44:15 +02:00
|
|
|
map[tf.Output]*tf.Tensor{
|
2019-04-30 13:17:01 +02:00
|
|
|
t.model.Graph.Operation("input_1").Output(0): tensor,
|
2018-09-14 12:44:15 +02:00
|
|
|
},
|
|
|
|
[]tf.Output{
|
2019-04-30 13:17:01 +02:00
|
|
|
t.model.Graph.Operation("predictions/Softmax").Output(0),
|
2018-09-14 12:44:15 +02:00
|
|
|
},
|
|
|
|
nil)
|
|
|
|
|
|
|
|
if err != nil {
|
2020-07-14 18:00:32 +02:00
|
|
|
return result, fmt.Errorf("classify: %s (run inference)", err.Error())
|
2018-09-14 12:44:15 +02:00
|
|
|
}
|
|
|
|
|
2019-04-30 13:17:01 +02:00
|
|
|
if len(output) < 1 {
|
2020-07-14 18:00:32 +02:00
|
|
|
return result, fmt.Errorf("classify: inference failed, no output")
|
2019-04-30 13:17:01 +02:00
|
|
|
}
|
|
|
|
|
2018-09-14 12:44:15 +02:00
|
|
|
// Return best labels
|
2019-06-04 18:26:35 +02:00
|
|
|
result = t.bestLabels(output[0].Value().([][]float32)[0])
|
2019-05-04 17:34:51 +02:00
|
|
|
|
2019-12-11 04:12:54 +01:00
|
|
|
if len(result) > 0 {
|
2020-07-14 18:00:32 +02:00
|
|
|
log.Tracef("classify: image classified as %+v", result)
|
2019-12-11 04:12:54 +01:00
|
|
|
}
|
2019-05-04 17:34:51 +02:00
|
|
|
|
|
|
|
return result, nil
|
2018-09-14 12:44:15 +02:00
|
|
|
}
|
|
|
|
|
2019-07-17 11:53:33 +02:00
|
|
|
func (t *TensorFlow) loadLabels(path string) error {
|
|
|
|
modelLabels := path + "/labels.txt"
|
2018-09-14 12:44:15 +02:00
|
|
|
|
2020-07-14 18:00:32 +02:00
|
|
|
log.Infof("classify: loading labels from labels.txt")
|
2019-05-04 17:34:51 +02:00
|
|
|
|
2018-09-14 12:44:15 +02:00
|
|
|
// Load labels
|
2019-05-04 17:34:51 +02:00
|
|
|
f, err := os.Open(modelLabels)
|
|
|
|
|
2018-09-14 12:44:15 +02:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2019-04-30 13:17:01 +02:00
|
|
|
|
2019-05-04 17:34:51 +02:00
|
|
|
defer f.Close()
|
|
|
|
|
|
|
|
scanner := bufio.NewScanner(f)
|
2018-09-14 12:44:15 +02:00
|
|
|
|
|
|
|
// Labels are separated by newlines
|
|
|
|
for scanner.Scan() {
|
|
|
|
t.labels = append(t.labels, scanner.Text())
|
|
|
|
}
|
2019-05-04 17:34:51 +02:00
|
|
|
|
2018-09-14 12:44:15 +02:00
|
|
|
if err := scanner.Err(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2019-05-04 17:34:51 +02:00
|
|
|
|
2018-09-14 12:44:15 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-05-06 12:45:38 +02:00
|
|
|
// ModelLoaded tests if the TensorFlow model is loaded.
|
2020-04-30 16:11:27 +02:00
|
|
|
func (t *TensorFlow) ModelLoaded() bool {
|
|
|
|
return t.model != nil
|
|
|
|
}
|
|
|
|
|
2019-07-17 11:53:33 +02:00
|
|
|
func (t *TensorFlow) loadModel() error {
|
2020-04-30 16:11:27 +02:00
|
|
|
if t.ModelLoaded() {
|
2019-07-17 11:53:33 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-01-09 01:21:09 +01:00
|
|
|
modelPath := path.Join(t.modelsPath, t.modelName)
|
2019-07-17 11:53:33 +02:00
|
|
|
|
2020-07-17 16:09:55 +02:00
|
|
|
log.Infof("classify: loading %s", txt.Quote(filepath.Base(modelPath)))
|
2019-07-17 11:53:33 +02:00
|
|
|
|
|
|
|
// Load model
|
2019-12-13 16:25:47 +01:00
|
|
|
model, err := tf.LoadSavedModel(modelPath, t.modelTags, nil)
|
2019-07-17 11:53:33 +02:00
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
t.model = model
|
|
|
|
|
2019-12-13 16:25:47 +01:00
|
|
|
return t.loadLabels(modelPath)
|
2019-07-17 11:53:33 +02:00
|
|
|
}
|
|
|
|
|
2020-02-21 01:14:45 +01:00
|
|
|
// bestLabels returns the best 5 labels (if enough high probability labels) from the prediction of the model
|
2019-06-04 18:26:35 +02:00
|
|
|
func (t *TensorFlow) bestLabels(probabilities []float32) Labels {
|
|
|
|
var result Labels
|
2019-05-16 08:41:16 +02:00
|
|
|
|
2018-09-14 12:44:15 +02:00
|
|
|
for i, p := range probabilities {
|
|
|
|
if i >= len(t.labels) {
|
2020-02-21 01:14:45 +01:00
|
|
|
// break if probabilities and labels does not match
|
2018-09-14 12:44:15 +02:00
|
|
|
break
|
|
|
|
}
|
2019-05-04 17:34:51 +02:00
|
|
|
|
2020-02-21 01:14:45 +01:00
|
|
|
// discard labels with low probabilities
|
2019-12-16 20:22:46 +01:00
|
|
|
if p < 0.1 {
|
2019-05-06 23:18:10 +02:00
|
|
|
continue
|
|
|
|
}
|
2019-05-04 17:34:51 +02:00
|
|
|
|
2019-05-16 08:41:16 +02:00
|
|
|
labelText := strings.ToLower(t.labels[i])
|
|
|
|
|
2021-09-23 23:46:17 +02:00
|
|
|
rule, _ := Rules.Find(labelText)
|
2019-05-16 08:41:16 +02:00
|
|
|
|
2020-02-21 01:14:45 +01:00
|
|
|
// discard labels that don't met the threshold
|
2019-05-16 08:41:16 +02:00
|
|
|
if p < rule.Threshold {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2020-02-21 01:14:45 +01:00
|
|
|
// Get rule label name instead of t.labels name if it exists
|
2019-06-05 10:18:03 +02:00
|
|
|
if rule.Label != "" {
|
|
|
|
labelText = rule.Label
|
2019-05-16 08:41:16 +02:00
|
|
|
}
|
|
|
|
|
2019-06-05 10:18:03 +02:00
|
|
|
labelText = strings.TrimSpace(labelText)
|
|
|
|
|
2019-06-09 05:22:53 +02:00
|
|
|
uncertainty := 100 - int(math.Round(float64(p*100)))
|
2019-06-04 18:26:35 +02:00
|
|
|
|
2020-05-14 11:57:26 +02:00
|
|
|
result = append(result, Label{Name: labelText, Source: SrcImage, Uncertainty: uncertainty, Priority: rule.Priority, Categories: rule.Categories})
|
2018-09-14 12:44:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Sort by probability
|
2019-12-14 14:38:43 +01:00
|
|
|
sort.Sort(result)
|
2019-05-04 17:34:51 +02:00
|
|
|
|
2021-05-06 12:45:38 +02:00
|
|
|
// Return the best labels only.
|
2019-05-16 08:41:16 +02:00
|
|
|
if l := len(result); l < 5 {
|
2019-05-04 17:34:51 +02:00
|
|
|
return result[:l]
|
2019-05-16 08:41:16 +02:00
|
|
|
} else {
|
|
|
|
return result[:5]
|
2019-05-04 17:34:51 +02:00
|
|
|
}
|
2018-09-14 12:44:15 +02:00
|
|
|
}
|
|
|
|
|
2020-07-14 18:00:32 +02:00
|
|
|
// createTensor converts bytes jpeg image in a tensor object required as tensorflow model input
|
|
|
|
func (t *TensorFlow) createTensor(image []byte, imageFormat string) (*tf.Tensor, error) {
|
2019-05-01 14:54:11 +02:00
|
|
|
img, err := imaging.Decode(bytes.NewReader(image), imaging.AutoOrientation(true))
|
2019-04-30 13:17:01 +02:00
|
|
|
|
2018-09-14 12:44:15 +02:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2019-04-30 13:17:01 +02:00
|
|
|
|
|
|
|
width, height := 224, 224
|
|
|
|
|
2019-05-13 18:01:50 +02:00
|
|
|
img = imaging.Fill(img, width, height, imaging.Center, imaging.Lanczos)
|
2019-04-30 13:17:01 +02:00
|
|
|
|
2021-05-06 12:45:38 +02:00
|
|
|
return imageToTensor(img, width, height)
|
2019-04-30 13:17:01 +02:00
|
|
|
}
|
|
|
|
|
2021-05-06 12:45:38 +02:00
|
|
|
func imageToTensor(img image.Image, imageHeight, imageWidth int) (tfTensor *tf.Tensor, err error) {
|
|
|
|
defer func() {
|
|
|
|
if r := recover(); r != nil {
|
|
|
|
err = fmt.Errorf("classify: %s (panic)\nstack: %s", r, debug.Stack())
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
if imageHeight <= 0 || imageWidth <= 0 {
|
|
|
|
return tfTensor, fmt.Errorf("classify: image width and height must be > 0")
|
|
|
|
}
|
|
|
|
|
2019-04-30 13:17:01 +02:00
|
|
|
var tfImage [1][][][3]float32
|
|
|
|
|
|
|
|
for j := 0; j < imageHeight; j++ {
|
|
|
|
tfImage[0] = append(tfImage[0], make([][3]float32, imageWidth))
|
2018-09-14 12:44:15 +02:00
|
|
|
}
|
2019-04-30 13:17:01 +02:00
|
|
|
|
|
|
|
for i := 0; i < imageWidth; i++ {
|
|
|
|
for j := 0; j < imageHeight; j++ {
|
|
|
|
r, g, b, _ := img.At(i, j).RGBA()
|
2021-05-06 12:45:38 +02:00
|
|
|
tfImage[0][j][i][0] = convertValue(r)
|
|
|
|
tfImage[0][j][i][1] = convertValue(g)
|
|
|
|
tfImage[0][j][i][2] = convertValue(b)
|
2019-04-30 13:17:01 +02:00
|
|
|
}
|
2018-09-14 12:44:15 +02:00
|
|
|
}
|
2019-04-30 13:17:01 +02:00
|
|
|
|
|
|
|
return tf.NewTensor(tfImage)
|
2018-09-14 12:44:15 +02:00
|
|
|
}
|
|
|
|
|
2021-05-06 12:45:38 +02:00
|
|
|
func convertValue(value uint32) float32 {
|
2019-04-30 13:17:01 +02:00
|
|
|
return (float32(value>>8) - float32(127.5)) / float32(127.5)
|
2018-09-14 12:44:15 +02:00
|
|
|
}
|