People: Detect number of faces (experimental) #22

This commit is contained in:
Michael Mayer 2021-05-25 18:01:21 +02:00
parent f5a1cc6231
commit a6bf89d104
11 changed files with 267 additions and 169 deletions

View file

@ -241,7 +241,7 @@
</v-checkbox>
</v-flex>
<v-flex xs12 sm6 lg3 class="px-2 pb-2 pt-2">
<v-flex v-if="config.experimental" xs12 sm6 lg3 class="px-2 pb-2 pt-2">
<v-checkbox
v-model="settings.features.people"
:disabled="busy"

View file

@ -47,3 +47,24 @@ func LocationLabel(name string, uncertainty int) Label {
func (l Label) Title() string {
return txt.Title(txt.Clip(l.Name, txt.ClipDefault))
}
// FaceLabels returns matching labels if there are people in the image.
func FaceLabels(count int, src string, uncertainty int) Labels {
var r LabelRule
if count < 1 {
return Labels{}
} else if count == 1 {
r = rules["portrait"]
} else {
r = rules["people"]
}
return Labels{Label{
Name: r.Label,
Source: src,
Uncertainty: uncertainty,
Priority: r.Priority,
Categories: r.Categories,
}}
}

View file

@ -12,13 +12,13 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"academic gown": {
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"accordion": {
Label: "instrument",
@ -516,7 +516,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"bathtub": {
Label: "living",
@ -630,7 +630,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"bernese mountain dog": {
Label: "dog",
@ -672,7 +672,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"binder": {
Label: "office",
@ -804,13 +804,13 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"bonnet": {
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"book jacket": {
Label: "book",
@ -882,7 +882,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"box turtle": {
Label: "turtle",
@ -924,7 +924,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"breakwater": {
Label: "water",
@ -936,7 +936,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"briard dog": {
Label: "dog",
@ -1134,7 +1134,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"cardigan dog": {
Label: "dog",
@ -1242,7 +1242,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"chain saw": {
Label: "outdoor",
@ -1392,7 +1392,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"clock": {
Label: "display",
@ -1686,7 +1686,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"cup": {
Label: "",
@ -2088,7 +2088,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"feather boa": {
Label: "",
@ -2298,7 +2298,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"gallery": {
Label: "",
@ -2466,7 +2466,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"grand piano": {
Label: "instrument",
@ -2616,7 +2616,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"hair spray": {
Label: "bottle",
@ -2820,7 +2820,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"horizontal bar": {
Label: "",
@ -3054,7 +3054,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"jeep": {
Label: "",
@ -3072,7 +3072,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"jigsaw puzzle": {
Label: "puzzle",
@ -3126,7 +3126,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"king crab": {
Label: "crab",
@ -3198,7 +3198,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"labrador retriever dog": {
Label: "dog",
@ -3516,7 +3516,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"malamute dog": {
Label: "dog",
@ -3660,7 +3660,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"milk can": {
Label: "",
@ -3696,7 +3696,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"minivan": {
Label: "car",
@ -3792,7 +3792,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"mosque": {
Label: "tower",
@ -3870,19 +3870,19 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"neck brace": {
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"necklace": {
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"nematode": {
Label: "worm",
@ -4020,7 +4020,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"ox": {
Label: "cow",
@ -4080,7 +4080,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"palace": {
Label: "historic",
@ -4220,6 +4220,12 @@ var rules = LabelRules{
Priority: 0,
Categories: []string{},
},
"people": {
Label: "people",
Threshold: 0.300000,
Priority: 0,
Categories: []string{},
},
"perfume": {
Label: "bottle",
Threshold: 0.700000,
@ -4428,7 +4434,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"pool table": {
Label: "",
@ -4896,7 +4902,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"sax": {
Label: "instrument",
@ -5256,7 +5262,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"soft-coated wheaten terrier dog": {
Label: "dog",
@ -5274,7 +5280,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"sorrel": {
Label: "",
@ -5538,7 +5544,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"sulphur butterfly": {
Label: "butterfly",
@ -5598,7 +5604,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"swimming trunks": {
Label: "portrait",
@ -6060,7 +6066,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"viaduct": {
Label: "building",
@ -6282,7 +6288,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"white stork": {
Label: "bird",
@ -6330,7 +6336,7 @@ var rules = LabelRules{
Label: "portrait",
Threshold: 0.500000,
Priority: 0,
Categories: []string{"portrait"},
Categories: []string{},
},
"wine bottle": {
Label: "bottle",

View file

@ -143,8 +143,6 @@ rapeseed:
fashion:
label: portrait
threshold: 0.5
categories:
- portrait
vestment:
see: fashion
@ -4099,6 +4097,10 @@ portrait:
categories:
- people
people:
label: people
threshold: 0.3
shower cap:
label: portrait
categories:

View file

@ -17,9 +17,9 @@ type Marker struct {
RefUID string `gorm:"type:VARBINARY(42);index;" json:"UID" yaml:"UID,omitempty"`
MarkerSrc string `gorm:"type:VARBINARY(8);default:'';" json:"Src" yaml:"Src,omitempty"`
MarkerType string `gorm:"type:VARBINARY(8);default:'';" json:"Type" yaml:"Type"`
MarkerScore int `gorm:"type:SMALLINT"`
MarkerLabel string `gorm:"type:VARCHAR(255);" json:"Label" yaml:"Label,omitempty"`
MarkerMeta string `gorm:"type:TEXT;" json:"Meta" yaml:"Meta,omitempty"`
Uncertainty int `gorm:"type:SMALLINT"`
X float32 `gorm:"type:FLOAT;" json:"X" yaml:"X,omitempty"`
Y float32 `gorm:"type:FLOAT;" json:"Y" yaml:"Y,omitempty"`
W float32 `gorm:"type:FLOAT;" json:"W" yaml:"W,omitempty"`

View file

@ -1,22 +1,18 @@
package face
import (
"embed"
_ "embed"
"fmt"
pigo "github.com/esimov/pigo/core"
"github.com/photoprism/photoprism/pkg/fs"
"github.com/photoprism/photoprism/pkg/txt"
_ "image/jpeg"
"io"
"os"
"time"
"github.com/photoprism/photoprism/pkg/fs"
"github.com/photoprism/photoprism/pkg/txt"
pigo "github.com/esimov/pigo/core"
"path/filepath"
"runtime/debug"
)
//go:embed cascade/lps/*
var efs embed.FS
//go:embed cascade/facefinder
var cascadeFile []byte
@ -68,43 +64,52 @@ type Detector struct {
shiftFactor float64
scaleFactor float64
iouThreshold float64
scoreThreshold float32
perturb int
}
func DefaultDetector() *Detector {
return &Detector{
// Detect runs the detection algorithm over the provided source image.
func Detect(fileName string) (faces Faces, err error) {
defer func() {
if r := recover(); r != nil {
log.Errorf("face: %s (panic)\nstack: %s", r, debug.Stack())
}
}()
fd := &Detector{
minSize: 20,
maxSize: 1000,
angle: 0.0,
shiftFactor: 0.1,
scaleFactor: 1.1,
iouThreshold: 0.2,
}
scoreThreshold: 10.0,
perturb: 63,
}
// Detect runs the detection algorithm over the provided source image.
func Detect(fileName string, fd *Detector) (det Faces, err error) {
if !fs.FileExists(fileName) {
return det, fmt.Errorf("face: file '%s' not found", fileName)
return faces, fmt.Errorf("face: file '%s' not found", txt.Quote(filepath.Base(fileName)))
}
start := time.Now()
log.Debugf("face: detecting faces in %s", txt.Quote(filepath.Base(fileName)))
log.Debugf("\nface: detecting faces in %s", txt.Quote(fileName))
faces, params, err := fd.Detect(fileName)
if err != nil {
return det, fmt.Errorf("face: %v (detect faces)", err)
}
det, err = fd.Results(faces, params)
det, params, err := fd.Detect(fileName)
if err != nil {
return det, fmt.Errorf("face: %s (Faces)", err)
return faces, fmt.Errorf("face: %v (detect faces)", err)
}
log.Debugf("\nface: %s done in \x1b[92m%.2fs\n", txt.Quote(fileName), time.Since(start).Seconds())
if det == nil {
return faces, fmt.Errorf("face: no result")
}
return det, nil
faces, err = fd.Faces(det, params)
if err != nil {
return faces, fmt.Errorf("face: %s (faces)", err)
}
return faces, nil
}
// Detect runs the detection algorithm over the provided source image.
@ -117,9 +122,7 @@ func (fd *Detector) Detect(fileName string) (faces []pigo.Detection, params pigo
return faces, params, err
}
defer func(file *os.File) {
_ = file.Close()
}(file)
defer file.Close()
srcFile = file
@ -157,21 +160,19 @@ func (fd *Detector) Detect(fileName string) (faces []pigo.Detection, params pigo
}
// Faces adds landmark coordinates to detected faces and returns the results.
func (fd *Detector) Results(faces []pigo.Detection, params pigo.CascadeParams) (Faces, error) {
func (fd *Detector) Faces(det []pigo.Detection, params pigo.CascadeParams) (Faces, error) {
var (
qThresh float32 = 5.0
perturb = 63
)
var (
detections Faces
results Faces
eyesCoords []Point
landmarkCoords []Point
puploc *pigo.Puploc
)
for _, face := range faces {
if face.Q > qThresh {
for _, face := range det {
if face.Q < fd.scoreThreshold {
continue
}
faceCoord := NewPoint(
"face",
face.Row-face.Scale/2,
@ -185,7 +186,7 @@ func (fd *Detector) Results(faces []pigo.Detection, params pigo.CascadeParams) (
Row: face.Row - int(0.075*float32(face.Scale)),
Col: face.Col - int(0.175*float32(face.Scale)),
Scale: float32(face.Scale) * 0.25,
Perturbs: perturb,
Perturbs: fd.perturb,
}
leftEye := plc.RunDetector(*puploc, params.ImageParams, fd.angle, false)
@ -204,7 +205,7 @@ func (fd *Detector) Results(faces []pigo.Detection, params pigo.CascadeParams) (
Row: face.Row - int(0.075*float32(face.Scale)),
Col: face.Col + int(0.185*float32(face.Scale)),
Scale: float32(face.Scale) * 0.25,
Perturbs: perturb,
Perturbs: fd.perturb,
}
rightEye := plc.RunDetector(*puploc, params.ImageParams, fd.angle, false)
@ -218,9 +219,14 @@ func (fd *Detector) Results(faces []pigo.Detection, params pigo.CascadeParams) (
))
}
if leftEye != nil && rightEye != nil {
for _, eye := range eyeCascades {
for _, flpc := range flpcs[eye] {
flp := flpc.GetLandmarkPoint(leftEye, rightEye, params.ImageParams, perturb, false)
if flpc == nil {
continue
}
flp := flpc.GetLandmarkPoint(leftEye, rightEye, params.ImageParams, fd.perturb, false)
if flp.Row > 0 && flp.Col > 0 {
landmarkCoords = append(landmarkCoords, NewPoint(
eye,
@ -230,7 +236,7 @@ func (fd *Detector) Results(faces []pigo.Detection, params pigo.CascadeParams) (
))
}
flp = flpc.GetLandmarkPoint(leftEye, rightEye, params.ImageParams, perturb, true)
flp = flpc.GetLandmarkPoint(leftEye, rightEye, params.ImageParams, fd.perturb, true)
if flp.Row > 0 && flp.Col > 0 {
landmarkCoords = append(landmarkCoords, NewPoint(
eye+"_v",
@ -241,11 +247,16 @@ func (fd *Detector) Results(faces []pigo.Detection, params pigo.CascadeParams) (
}
}
}
}
// Find mouth.
for _, mouth := range mouthCascades {
for _, flpc := range flpcs[mouth] {
flp := flpc.GetLandmarkPoint(leftEye, rightEye, params.ImageParams, perturb, false)
if flpc == nil {
continue
}
flp := flpc.GetLandmarkPoint(leftEye, rightEye, params.ImageParams, fd.perturb, false)
if flp.Row > 0 && flp.Col > 0 {
landmarkCoords = append(landmarkCoords, NewPoint(
"mouth_"+mouth,
@ -256,7 +267,11 @@ func (fd *Detector) Results(faces []pigo.Detection, params pigo.CascadeParams) (
}
}
}
flp := flpcs["lp84"][0].GetLandmarkPoint(leftEye, rightEye, params.ImageParams, perturb, true)
flpc := flpcs["lp84"][0]
if flpc != nil {
flp := flpc.GetLandmarkPoint(leftEye, rightEye, params.ImageParams, fd.perturb, true)
if flp.Row > 0 && flp.Col > 0 {
landmarkCoords = append(landmarkCoords, NewPoint(
"lp84",
@ -266,16 +281,18 @@ func (fd *Detector) Results(faces []pigo.Detection, params pigo.CascadeParams) (
))
}
}
}
detections = append(detections, Face{
results = append(results, Face{
Rows: params.ImageParams.Rows,
Cols: params.ImageParams.Cols,
Score: int(face.Q),
Face: faceCoord,
Eyes: eyesCoords,
Landmarks: landmarkCoords,
})
}
}
return detections, nil
return results, nil
}

View file

@ -47,6 +47,7 @@ type Faces []Face
type Face struct {
Rows int `json:"rows,omitempty"`
Cols int `json:"cols,omitempty"`
Score int `json:"score,omitempty"`
Face Point `json:"face,omitempty"`
Eyes Points `json:"eyes,omitempty"`
Landmarks Points `json:"landmarks,omitempty"`

View file

@ -16,7 +16,7 @@ func TestDetect(t *testing.T) {
"2.jpg": 1,
"3.jpg": 1,
"4.jpg": 1,
"5.jpg": 2,
"5.jpg": 1,
"6.jpg": 1,
"7.jpg": 0,
"8.jpg": 0,
@ -30,6 +30,7 @@ func TestDetect(t *testing.T) {
"16.jpg": 1,
"17.jpg": 1,
"18.jpg": 2,
"19.jpg": 0,
}
if err := fastwalk.Walk("testdata", func(fileName string, info os.FileMode) error {
@ -40,7 +41,7 @@ func TestDetect(t *testing.T) {
t.Run(fileName, func(t *testing.T) {
baseName := filepath.Base(fileName)
faces, err := Detect(fileName, DefaultDetector())
faces, err := Detect(fileName)
if err != nil {
t.Fatal(err)
@ -58,7 +59,7 @@ func TestDetect(t *testing.T) {
}
if i, ok := expected[baseName]; ok {
assert.Equal(t, len(faces), i)
assert.Equal(t, i, len(faces))
} else {
t.Errorf("unknown test result for %s", baseName)
}

View file

@ -1,12 +1,16 @@
package face
import (
"embed"
"errors"
"path/filepath"
pigo "github.com/esimov/pigo/core"
)
//go:embed cascade/lps
var efs embed.FS
// FlpCascade holds the binary representation of the facial landmark points cascade files
type FlpCascade struct {
*pigo.PuplocCascade
@ -19,7 +23,7 @@ func ReadCascadeDir(plc *pigo.PuplocCascade, path string) (result map[string][]*
cascades, err := efs.ReadDir(path)
if len(cascades) == 0 {
return nil, errors.New("the cascade directory is empty")
return result, errors.New("the cascade directory is empty")
}
if err != nil {
@ -27,11 +31,20 @@ func ReadCascadeDir(plc *pigo.PuplocCascade, path string) (result map[string][]*
}
for _, cascade := range cascades {
cf, err := filepath.Abs(path + "/" + cascade.Name())
cf := filepath.Join(path, cascade.Name())
f, err := efs.ReadFile(cf)
if err != nil {
return nil, err
return result, err
}
flpc, err := plc.UnpackFlp(cf)
flpc, err := plc.UnpackCascade(f)
if err != nil {
return result, err
}
result[cascade.Name()] = append(result[cascade.Name()], &FlpCascade{flpc, err})
}

BIN
internal/face/testdata/19.jpg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB

View file

@ -9,9 +9,11 @@ import (
"time"
"github.com/jinzhu/gorm"
"github.com/photoprism/photoprism/internal/classify"
"github.com/photoprism/photoprism/internal/entity"
"github.com/photoprism/photoprism/internal/event"
"github.com/photoprism/photoprism/internal/face"
"github.com/photoprism/photoprism/internal/meta"
"github.com/photoprism/photoprism/internal/nsfw"
"github.com/photoprism/photoprism/internal/query"
@ -600,6 +602,13 @@ func (ind *Index) MediaFile(m *MediaFile, o IndexOptions, originalName string) (
if file.FilePrimary {
labels := photo.ClassifyLabels()
if Config().Experimental() && Config().Settings().Features.People {
faces := ind.detectFaces(m)
photo.AddLabels(classify.FaceLabels(len(faces), entity.SrcImage, 10))
photo.PhotoPeople = len(faces)
}
if err := photo.UpdateTitle(labels); err != nil {
log.Debugf("%s in %s (update title)", err, logName)
}
@ -759,7 +768,7 @@ func (ind *Index) NSFW(jpeg *MediaFile) bool {
return false
}
// classifyImage returns all matching labels for a media file.
// classifyImage classifies a JPEG image and returns matching labels.
func (ind *Index) classifyImage(jpeg *MediaFile) (results classify.Labels) {
start := time.Now()
@ -812,3 +821,31 @@ func (ind *Index) classifyImage(jpeg *MediaFile) (results classify.Labels) {
return results
}
// detectFaces detects faces in a JPEG image and returns them.
func (ind *Index) detectFaces(jpeg *MediaFile) face.Faces {
if jpeg == nil {
return face.Faces{}
}
thumbName, err := jpeg.Thumbnail(Config().ThumbPath(), "fit_720")
if err != nil {
log.Debugf("%s in %s", err, txt.Quote(jpeg.BaseName()))
return face.Faces{}
}
start := time.Now()
faces, err := face.Detect(thumbName)
if err != nil {
log.Debugf("%s in %s", err, txt.Quote(jpeg.BaseName()))
}
elapsed := time.Since(start)
log.Debugf("index: face detection took %s", elapsed)
return faces
}