2021-06-26 17:23:15 +02:00
|
|
|
<?php
|
|
|
|
|
|
|
|
namespace BookStack\Entities\Tools;
|
2020-11-22 01:17:45 +01:00
|
|
|
|
|
|
|
use BookStack\Entities\EntityProvider;
|
|
|
|
use BookStack\Entities\Models\Entity;
|
|
|
|
use BookStack\Entities\Models\SearchTerm;
|
2020-11-28 17:42:12 +01:00
|
|
|
use Illuminate\Support\Collection;
|
2020-11-22 01:17:45 +01:00
|
|
|
|
|
|
|
class SearchIndex
|
|
|
|
{
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var EntityProvider
|
|
|
|
*/
|
|
|
|
protected $entityProvider;
|
|
|
|
|
2021-11-11 14:36:49 +01:00
|
|
|
public function __construct(EntityProvider $entityProvider)
|
2020-11-22 01:17:45 +01:00
|
|
|
{
|
|
|
|
$this->entityProvider = $entityProvider;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Index the given entity.
|
|
|
|
*/
|
|
|
|
public function indexEntity(Entity $entity)
|
|
|
|
{
|
|
|
|
$this->deleteEntityTerms($entity);
|
2021-11-11 14:36:49 +01:00
|
|
|
$terms = $this->entityToTermDataArray($entity);
|
|
|
|
SearchTerm::query()->insert($terms);
|
2020-11-22 01:17:45 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2021-06-26 17:23:15 +02:00
|
|
|
* Index multiple Entities at once.
|
|
|
|
*
|
2020-11-22 01:17:45 +01:00
|
|
|
* @param Entity[] $entities
|
|
|
|
*/
|
2021-11-08 12:29:25 +01:00
|
|
|
public function indexEntities(array $entities)
|
2020-11-22 01:17:45 +01:00
|
|
|
{
|
|
|
|
$terms = [];
|
|
|
|
foreach ($entities as $entity) {
|
2021-11-11 14:36:49 +01:00
|
|
|
$entityTerms = $this->entityToTermDataArray($entity);
|
|
|
|
array_push($terms, ...$entityTerms);
|
2020-11-22 01:17:45 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
$chunkedTerms = array_chunk($terms, 500);
|
|
|
|
foreach ($chunkedTerms as $termChunk) {
|
2021-11-11 14:36:49 +01:00
|
|
|
SearchTerm::query()->insert($termChunk);
|
2020-11-22 01:17:45 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Delete and re-index the terms for all entities in the system.
|
|
|
|
*/
|
|
|
|
public function indexAllEntities()
|
|
|
|
{
|
2021-11-11 14:36:49 +01:00
|
|
|
SearchTerm::query()->truncate();
|
2020-11-22 01:17:45 +01:00
|
|
|
|
|
|
|
foreach ($this->entityProvider->all() as $entityModel) {
|
|
|
|
$selectFields = ['id', 'name', $entityModel->textField];
|
|
|
|
$entityModel->newQuery()
|
|
|
|
->withTrashed()
|
|
|
|
->select($selectFields)
|
2020-11-28 17:42:12 +01:00
|
|
|
->chunk(1000, function (Collection $entities) {
|
|
|
|
$this->indexEntities($entities->all());
|
2020-11-22 01:17:45 +01:00
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Delete related Entity search terms.
|
|
|
|
*/
|
|
|
|
public function deleteEntityTerms(Entity $entity)
|
|
|
|
{
|
|
|
|
$entity->searchTerms()->delete();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Create a scored term array from the given text.
|
2021-11-11 14:36:49 +01:00
|
|
|
*
|
|
|
|
* @returns array{term: string, score: float}
|
2020-11-22 01:17:45 +01:00
|
|
|
*/
|
|
|
|
protected function generateTermArrayFromText(string $text, int $scoreAdjustment = 1): array
|
|
|
|
{
|
|
|
|
$tokenMap = []; // {TextToken => OccurrenceCount}
|
|
|
|
$splitChars = " \n\t.,!?:;()[]{}<>`'\"";
|
|
|
|
$token = strtok($text, $splitChars);
|
|
|
|
|
|
|
|
while ($token !== false) {
|
|
|
|
if (!isset($tokenMap[$token])) {
|
|
|
|
$tokenMap[$token] = 0;
|
|
|
|
}
|
|
|
|
$tokenMap[$token]++;
|
|
|
|
$token = strtok($splitChars);
|
|
|
|
}
|
|
|
|
|
|
|
|
$terms = [];
|
|
|
|
foreach ($tokenMap as $token => $count) {
|
|
|
|
$terms[] = [
|
2021-06-26 17:23:15 +02:00
|
|
|
'term' => $token,
|
|
|
|
'score' => $count * $scoreAdjustment,
|
2020-11-22 01:17:45 +01:00
|
|
|
];
|
|
|
|
}
|
|
|
|
|
|
|
|
return $terms;
|
|
|
|
}
|
2021-11-11 14:36:49 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* For the given entity, Generate an array of term data details.
|
|
|
|
* Is the raw term data, not instances of SearchTerm models.
|
|
|
|
*
|
|
|
|
* @returns array{term: string, score: float}[]
|
|
|
|
*/
|
|
|
|
protected function entityToTermDataArray(Entity $entity): array
|
|
|
|
{
|
|
|
|
$nameTerms = $this->generateTermArrayFromText($entity->name, 40 * $entity->searchFactor);
|
|
|
|
$bodyTerms = $this->generateTermArrayFromText($entity->getText(), 1 * $entity->searchFactor);
|
|
|
|
$termData = array_merge($nameTerms, $bodyTerms);
|
|
|
|
|
|
|
|
foreach ($termData as $index => $term) {
|
|
|
|
$termData[$index]['entity_type'] = $entity->getMorphClass();
|
|
|
|
$termData[$index]['entity_id'] = $entity->id;
|
|
|
|
}
|
|
|
|
|
|
|
|
return $termData;
|
|
|
|
}
|
2020-11-22 01:17:45 +01:00
|
|
|
}
|