Includes: Developed to get new system working with inline includes
Adds logic for locating and splitting text nodes. Adds specific classes to offload tag/content specific logic.
This commit is contained in:
parent
04d21c8a97
commit
75936454cc
5 changed files with 192 additions and 8 deletions
68
app/Entities/Tools/PageIncludeContent.php
Normal file
68
app/Entities/Tools/PageIncludeContent.php
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace BookStack\Entities\Tools;
|
||||||
|
|
||||||
|
use BookStack\Util\HtmlDocument;
|
||||||
|
use DOMNode;
|
||||||
|
|
||||||
|
class PageIncludeContent
|
||||||
|
{
|
||||||
|
protected static array $topLevelTags = ['table', 'ul', 'ol', 'pre'];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var DOMNode[]
|
||||||
|
*/
|
||||||
|
protected array $contents = [];
|
||||||
|
|
||||||
|
protected bool $isTopLevel;
|
||||||
|
|
||||||
|
public function __construct(
|
||||||
|
string $html,
|
||||||
|
PageIncludeTag $tag,
|
||||||
|
) {
|
||||||
|
$this->parseHtml($html, $tag);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function parseHtml(string $html, PageIncludeTag $tag): void
|
||||||
|
{
|
||||||
|
if (empty($html)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$doc = new HtmlDocument($html);
|
||||||
|
|
||||||
|
$sectionId = $tag->getSectionId();
|
||||||
|
if (!$sectionId) {
|
||||||
|
$this->contents = [...$doc->getBodyChildren()];
|
||||||
|
$this->isTopLevel = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$section = $doc->getElementById($sectionId);
|
||||||
|
if (!$section) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$isTopLevel = in_array(strtolower($section->nodeName), static::$topLevelTags);
|
||||||
|
$this->isTopLevel = $isTopLevel;
|
||||||
|
$this->contents = $isTopLevel ? [$section] : [...$section->childNodes];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function isInline(): bool
|
||||||
|
{
|
||||||
|
return !$this->isTopLevel;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function isEmpty(): bool
|
||||||
|
{
|
||||||
|
return empty($this->contents);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return DOMNode[]
|
||||||
|
*/
|
||||||
|
public function toDomNodes(): array
|
||||||
|
{
|
||||||
|
return $this->contents;
|
||||||
|
}
|
||||||
|
}
|
|
@ -4,6 +4,8 @@ namespace BookStack\Entities\Tools;
|
||||||
|
|
||||||
use BookStack\Util\HtmlDocument;
|
use BookStack\Util\HtmlDocument;
|
||||||
use Closure;
|
use Closure;
|
||||||
|
use DOMNode;
|
||||||
|
use DOMText;
|
||||||
|
|
||||||
class PageIncludeParser
|
class PageIncludeParser
|
||||||
{
|
{
|
||||||
|
@ -17,14 +19,25 @@ class PageIncludeParser
|
||||||
|
|
||||||
public function parse(): string
|
public function parse(): string
|
||||||
{
|
{
|
||||||
$html = new HtmlDocument($this->pageHtml);
|
$doc = new HtmlDocument($this->pageHtml);
|
||||||
|
|
||||||
$includeHosts = $html->queryXPath("//body//*[contains(text(), '{{@')]");
|
$tags = $this->locateAndIsolateIncludeTags($doc);
|
||||||
$node = $includeHosts->item(0);
|
|
||||||
|
|
||||||
// One of the direct child textnodes of the "$includeHosts" should be
|
foreach ($tags as $tag) {
|
||||||
// the one with the include tag within.
|
$htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
|
||||||
$textNode = $node->childNodes->item(0);
|
$content = new PageIncludeContent($htmlContent, $tag);
|
||||||
|
|
||||||
|
if ($content->isInline()) {
|
||||||
|
$adopted = $doc->adoptNodes($content->toDomNodes());
|
||||||
|
foreach ($adopted as $adoptedContentNode) {
|
||||||
|
$tag->domNode->parentNode->insertBefore($adoptedContentNode, $tag->domNode);
|
||||||
|
}
|
||||||
|
$tag->domNode->parentNode->removeChild($tag->domNode);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO - Non-inline
|
||||||
|
}
|
||||||
|
|
||||||
// TODO:
|
// TODO:
|
||||||
// Hunt down the specific text nodes with matches
|
// Hunt down the specific text nodes with matches
|
||||||
|
@ -52,6 +65,64 @@ class PageIncludeParser
|
||||||
// in changes affecting the next tag, where tags may be in the same/adjacent nodes.
|
// in changes affecting the next tag, where tags may be in the same/adjacent nodes.
|
||||||
|
|
||||||
|
|
||||||
return $html->getBodyInnerHtml();
|
return $doc->getBodyInnerHtml();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Locate include tags within the given document, isolating them to their
|
||||||
|
* own nodes in the DOM for future targeted manipulation.
|
||||||
|
* @return PageIncludeTag[]
|
||||||
|
*/
|
||||||
|
protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array
|
||||||
|
{
|
||||||
|
$includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]");
|
||||||
|
$includeTags = [];
|
||||||
|
|
||||||
|
/** @var DOMNode $node */
|
||||||
|
/** @var DOMNode $childNode */
|
||||||
|
foreach ($includeHosts as $node) {
|
||||||
|
foreach ($node->childNodes as $childNode) {
|
||||||
|
if ($childNode->nodeName === '#text') {
|
||||||
|
array_push($includeTags, ...$this->splitTextNodesAtTags($childNode));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $includeTags;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes a text DOMNode and splits its text content at include tags
|
||||||
|
* into multiple text nodes within the original parent.
|
||||||
|
* Returns found PageIncludeTag references.
|
||||||
|
* @return PageIncludeTag[]
|
||||||
|
*/
|
||||||
|
protected function splitTextNodesAtTags(DOMNode $textNode): array
|
||||||
|
{
|
||||||
|
$includeTags = [];
|
||||||
|
$text = $textNode->textContent;
|
||||||
|
preg_match_all(static::$includeTagRegex, $text, $matches, PREG_OFFSET_CAPTURE);
|
||||||
|
|
||||||
|
$currentOffset = 0;
|
||||||
|
foreach ($matches[0] as $index => $fullTagMatch) {
|
||||||
|
$tagOuterContent = $fullTagMatch[0];
|
||||||
|
$tagInnerContent = $matches[1][$index][0];
|
||||||
|
$tagStartOffset = $fullTagMatch[1];
|
||||||
|
|
||||||
|
if ($currentOffset < $tagStartOffset) {
|
||||||
|
$previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset);
|
||||||
|
$textNode->parentNode->insertBefore(new DOMText($previousText), $textNode);
|
||||||
|
}
|
||||||
|
|
||||||
|
$node = $textNode->parentNode->insertBefore(new DOMText($tagOuterContent), $textNode);
|
||||||
|
$includeTags[] = new PageIncludeTag($tagInnerContent, $node);
|
||||||
|
$currentOffset = $tagStartOffset + strlen($tagOuterContent);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($currentOffset > 0) {
|
||||||
|
$textNode->textContent = substr($text, $currentOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $includeTags;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
30
app/Entities/Tools/PageIncludeTag.php
Normal file
30
app/Entities/Tools/PageIncludeTag.php
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace BookStack\Entities\Tools;
|
||||||
|
|
||||||
|
use DOMNode;
|
||||||
|
|
||||||
|
class PageIncludeTag
|
||||||
|
{
|
||||||
|
public function __construct(
|
||||||
|
public string $tagContent,
|
||||||
|
public DOMNode $domNode,
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the page ID that this tag references.
|
||||||
|
*/
|
||||||
|
public function getPageId(): int
|
||||||
|
{
|
||||||
|
return intval(trim(explode('#', $this->tagContent, 2)[0]));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the section ID that this tag references (if any)
|
||||||
|
*/
|
||||||
|
public function getSectionId(): string
|
||||||
|
{
|
||||||
|
return trim(explode('#', $this->tagContent, 2)[1] ?? '');
|
||||||
|
}
|
||||||
|
}
|
|
@ -149,4 +149,19 @@ class HtmlDocument
|
||||||
{
|
{
|
||||||
return $this->document->saveHTML($node);
|
return $this->document->saveHTML($node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adopt the given nodes into this document.
|
||||||
|
* @param DOMNode[] $nodes
|
||||||
|
* @return DOMNode[]
|
||||||
|
*/
|
||||||
|
public function adoptNodes(array $nodes): array
|
||||||
|
{
|
||||||
|
$adopted = [];
|
||||||
|
foreach ($nodes as $node) {
|
||||||
|
$adopted[] = $this->document->importNode($node, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $adopted;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,7 +37,7 @@ class PageIncludeParserTest extends TestCase
|
||||||
protected function runParserTest(string $html, array $contentById, string $expected)
|
protected function runParserTest(string $html, array $contentById, string $expected)
|
||||||
{
|
{
|
||||||
$parser = new PageIncludeParser($html, function (int $id) use ($contentById) {
|
$parser = new PageIncludeParser($html, function (int $id) use ($contentById) {
|
||||||
return $contentById[strval($id)] ?? null;
|
return $contentById[strval($id)] ?? '';
|
||||||
});
|
});
|
||||||
|
|
||||||
$result = $parser->parse();
|
$result = $parser->parse();
|
||||||
|
|
Loading…
Reference in a new issue