2023-11-14 16:46:32 +01:00
|
|
|
<?php
|
|
|
|
|
|
|
|
namespace BookStack\Util;
|
|
|
|
|
|
|
|
use DOMDocument;
|
|
|
|
use DOMElement;
|
|
|
|
use DOMNode;
|
|
|
|
use DOMNodeList;
|
|
|
|
use DOMXPath;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* HtmlDocument is a thin wrapper around DOMDocument built
|
|
|
|
* specifically for loading, querying and generating HTML content.
|
|
|
|
*/
|
|
|
|
class HtmlDocument
|
|
|
|
{
|
|
|
|
protected DOMDocument $document;
|
|
|
|
protected ?DOMXPath $xpath = null;
|
|
|
|
protected int $loadOptions;
|
|
|
|
|
|
|
|
public function __construct(string $partialHtml = '', int $loadOptions = 0)
|
|
|
|
{
|
|
|
|
libxml_use_internal_errors(true);
|
|
|
|
$this->document = new DOMDocument();
|
|
|
|
$this->loadOptions = $loadOptions;
|
|
|
|
|
|
|
|
if ($partialHtml) {
|
|
|
|
$this->loadPartialHtml($partialHtml);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Load some HTML content that's part of a document (e.g. body content)
|
|
|
|
* into the current document.
|
|
|
|
*/
|
|
|
|
public function loadPartialHtml(string $html): void
|
|
|
|
{
|
|
|
|
$html = '<?xml encoding="utf-8" ?><body>' . $html . '</body>';
|
|
|
|
$this->document->loadHTML($html, $this->loadOptions);
|
|
|
|
$this->xpath = null;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Load a complete page of HTML content into the document.
|
|
|
|
*/
|
|
|
|
public function loadCompleteHtml(string $html): void
|
|
|
|
{
|
|
|
|
$html = '<?xml encoding="utf-8" ?>' . $html;
|
|
|
|
$this->document->loadHTML($html, $this->loadOptions);
|
|
|
|
$this->xpath = null;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Start an XPath query on the current document.
|
|
|
|
*/
|
|
|
|
public function queryXPath(string $expression): DOMNodeList
|
|
|
|
{
|
|
|
|
if (is_null($this->xpath)) {
|
|
|
|
$this->xpath = new DOMXPath($this->document);
|
|
|
|
}
|
|
|
|
|
|
|
|
$result = $this->xpath->query($expression);
|
|
|
|
if ($result === false) {
|
|
|
|
throw new \InvalidArgumentException("XPath query for expression [$expression] failed to execute");
|
|
|
|
}
|
|
|
|
|
|
|
|
return $result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Create a new DOMElement instance within the document.
|
|
|
|
*/
|
|
|
|
public function createElement(string $localName, string $value = ''): DOMElement
|
|
|
|
{
|
|
|
|
$element = $this->document->createElement($localName, $value);
|
|
|
|
|
|
|
|
if ($element === false) {
|
|
|
|
throw new \InvalidArgumentException("Failed to create element of name [$localName] and value [$value]");
|
|
|
|
}
|
|
|
|
|
|
|
|
return $element;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get an element within the document of the given ID.
|
|
|
|
*/
|
|
|
|
public function getElementById(string $elementId): ?DOMElement
|
|
|
|
{
|
|
|
|
return $this->document->getElementById($elementId);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get the DOMNode that represents the HTML body.
|
|
|
|
*/
|
|
|
|
public function getBody(): DOMNode
|
|
|
|
{
|
|
|
|
return $this->document->getElementsByTagName('body')[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get the nodes that are a direct child of the body.
|
|
|
|
* This is usually all the content nodes if loaded partially.
|
|
|
|
*/
|
|
|
|
public function getBodyChildren(): DOMNodeList
|
|
|
|
{
|
|
|
|
return $this->getBody()->childNodes;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get the inner HTML content of the body.
|
|
|
|
* This is usually all the content if loaded partially.
|
|
|
|
*/
|
|
|
|
public function getBodyInnerHtml(): string
|
|
|
|
{
|
|
|
|
$html = '';
|
|
|
|
foreach ($this->getBodyChildren() as $child) {
|
|
|
|
$html .= $this->document->saveHTML($child);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $html;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get the HTML content of the whole document.
|
|
|
|
*/
|
|
|
|
public function getHtml(): string
|
|
|
|
{
|
2023-11-14 18:23:05 +01:00
|
|
|
return $this->document->saveHTML($this->document->documentElement);
|
2023-11-14 16:46:32 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get the inner HTML for the given node.
|
|
|
|
*/
|
|
|
|
public function getNodeInnerHtml(DOMNode $node): string
|
|
|
|
{
|
|
|
|
$html = '';
|
|
|
|
|
|
|
|
foreach ($node->childNodes as $childNode) {
|
|
|
|
$html .= $this->document->saveHTML($childNode);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $html;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get the outer HTML for the given node.
|
|
|
|
*/
|
|
|
|
public function getNodeOuterHtml(DOMNode $node): string
|
|
|
|
{
|
|
|
|
return $this->document->saveHTML($node);
|
|
|
|
}
|
|
|
|
}
|