photoprism/internal/face/net.go
Michael Mayer d198a056a7 People: Improve face matching performance and accuracy #22
By default, matching is now limited to unmatched faces and markers.
2021-08-29 13:26:05 +02:00

209 lines
4.9 KiB
Go

package face
import (
"bytes"
"fmt"
"image"
"io/ioutil"
"os"
"path"
"path/filepath"
"runtime/debug"
"sync"
"github.com/photoprism/photoprism/pkg/fs"
"github.com/disintegration/imaging"
"github.com/photoprism/photoprism/pkg/txt"
tf "github.com/tensorflow/tensorflow/tensorflow/go"
)
// Net is a wrapper for the TensorFlow Facenet model.
type Net struct {
model *tf.SavedModel
modelPath string
cachePath string
disabled bool
modelName string
modelTags []string
mutex sync.Mutex
}
// NewNet returns a new TensorFlow Facenet instance.
func NewNet(modelPath, cachePath string, disabled bool) *Net {
if err := os.MkdirAll(filepath.Join(cachePath, "faces"), os.ModePerm); err != nil {
log.Errorf("faces: failed creating cache folder")
}
return &Net{modelPath: modelPath, cachePath: cachePath, disabled: disabled, modelTags: []string{"serve"}}
}
// Detect runs the detection and facenet algorithms over the provided source image.
func (t *Net) Detect(fileName string) (faces Faces, err error) {
faces, err = Detect(fileName)
if err != nil {
return faces, err
}
if t.disabled {
return faces, nil
}
err = t.loadModel()
if err != nil {
return faces, err
}
fileHash := fs.Hash(fileName)
for i, f := range faces {
if f.Face.Col == 0 && f.Face.Row == 0 {
continue
}
if img, err := t.getFaceCrop(fileName, fileHash, f.Face); err != nil {
log.Errorf("faces: failed to decode image: %v", err)
} else if embeddings := t.getEmbeddings(img); len(embeddings) > 0 {
faces[i].Embeddings = embeddings
}
}
return faces, nil
}
// ModelLoaded tests if the TensorFlow model is loaded.
func (t *Net) ModelLoaded() bool {
return t.model != nil
}
func (t *Net) loadModel() error {
t.mutex.Lock()
defer t.mutex.Unlock()
if t.ModelLoaded() {
return nil
}
modelPath := path.Join(t.modelPath)
log.Infof("faces: loading %s", txt.Quote(filepath.Base(modelPath)))
// Load model
model, err := tf.LoadSavedModel(modelPath, t.modelTags, nil)
if err != nil {
return err
}
t.model = model
return nil
}
func (t *Net) getFaceCrop(fileName, fileHash string, f Point) (img image.Image, err error) {
cacheFolder := filepath.Join(t.cachePath, "faces", string(fileHash[0]), string(fileHash[1]), string(fileHash[2]))
if err := os.MkdirAll(cacheFolder, os.ModePerm); err != nil {
log.Errorf("faces: failed creating cache folder")
}
cacheFile := filepath.Join(cacheFolder, fmt.Sprintf("%s-%s%s", fileHash, f.String(), fs.JpegExt))
if !fs.FileExists(cacheFile) {
// Do nothing.
} else if img, err := imaging.Open(cacheFile); err != nil {
log.Errorf("faces: failed loading cached crop %s", filepath.Base(cacheFile))
} else {
log.Debugf("faces: using cached crop %s", filepath.Base(cacheFile))
return img, nil
}
x, y := f.TopLeft()
imageBuffer, err := ioutil.ReadFile(fileName)
img, err = imaging.Decode(bytes.NewReader(imageBuffer), imaging.AutoOrientation(true))
if err != nil {
return img, err
}
img = imaging.Crop(img, image.Rect(y, x, y+f.Scale, x+f.Scale))
img = imaging.Fill(img, CropSize, CropSize, imaging.Center, imaging.Lanczos)
if err := imaging.Save(img, cacheFile); err != nil {
log.Errorf("faces: failed caching crop %s", filepath.Base(cacheFile))
} else {
log.Debugf("faces: saved crop %s", filepath.Base(cacheFile))
}
return img, nil
}
func (t *Net) getEmbeddings(img image.Image) [][]float32 {
tensor, err := imageToTensor(img, CropSize, CropSize)
if err != nil {
log.Errorf("faces: failed to convert image to tensor: %v", err)
}
// TODO: pre-whiten image as in facenet
trainPhaseBoolTensor, err := tf.NewTensor(false)
output, err := t.model.Session.Run(
map[tf.Output]*tf.Tensor{
t.model.Graph.Operation("input").Output(0): tensor,
t.model.Graph.Operation("phase_train").Output(0): trainPhaseBoolTensor,
},
[]tf.Output{
t.model.Graph.Operation("embeddings").Output(0),
},
nil)
if err != nil {
log.Errorf("faces: %s", err)
}
if len(output) < 1 {
log.Errorf("faces: inference failed, no output")
} else {
return output[0].Value().([][]float32)
}
return nil
}
func imageToTensor(img image.Image, imageHeight, imageWidth int) (tfTensor *tf.Tensor, err error) {
defer func() {
if r := recover(); r != nil {
err = fmt.Errorf("faces: %s (panic)\nstack: %s", r, debug.Stack())
}
}()
if imageHeight <= 0 || imageWidth <= 0 {
return tfTensor, fmt.Errorf("faces: image width and height must be > 0")
}
var tfImage [1][][][3]float32
for j := 0; j < imageHeight; j++ {
tfImage[0] = append(tfImage[0], make([][3]float32, imageWidth))
}
for i := 0; i < imageWidth; i++ {
for j := 0; j < imageHeight; j++ {
r, g, b, _ := img.At(i, j).RGBA()
tfImage[0][j][i][0] = convertValue(r)
tfImage[0][j][i][1] = convertValue(g)
tfImage[0][j][i][2] = convertValue(b)
}
}
return tf.NewTensor(tfImage)
}
func convertValue(value uint32) float32 {
return (float32(value>>8) - float32(127.5)) / float32(127.5)
}