From 5e6092aaf8fd420202016038286554860bf8ea64 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Thu, 2 Sep 2021 22:02:30 +0100 Subject: [PATCH] Added extra HTML filtering of dangerous content In particular, That around the casing of dangerous values within attributes. This uses some xpath translation to handle different casing in contains searching. --- app/Util/HtmlContentFilter.php | 19 +++++++++++++++---- tests/Entity/PageContentTest.php | 25 +++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/app/Util/HtmlContentFilter.php b/app/Util/HtmlContentFilter.php index f251a22fd..729b80474 100644 --- a/app/Util/HtmlContentFilter.php +++ b/app/Util/HtmlContentFilter.php @@ -28,19 +28,19 @@ class HtmlContentFilter static::removeNodes($scriptElems); // Remove clickable links to JavaScript URI - $badLinks = $xPath->query('//*[contains(@href, \'javascript:\')]'); + $badLinks = $xPath->query('//*[' . static::xpathContains('@href', 'javascript:') . ']'); static::removeNodes($badLinks); // Remove forms with calls to JavaScript URI - $badForms = $xPath->query('//*[contains(@action, \'javascript:\')] | //*[contains(@formaction, \'javascript:\')]'); + $badForms = $xPath->query('//*[' . static::xpathContains('@action', 'javascript:') . '] | //*[' . static::xpathContains('@formaction', 'javascript:') . ']'); static::removeNodes($badForms); // Remove meta tag to prevent external redirects - $metaTags = $xPath->query('//meta[contains(@content, \'url\')]'); + $metaTags = $xPath->query('//meta[' . static::xpathContains('@content', 'url') . ']'); static::removeNodes($metaTags); // Remove data or JavaScript iFrames - $badIframes = $xPath->query('//*[contains(@src, \'data:\')] | //*[contains(@src, \'javascript:\')] | //*[@srcdoc]'); + $badIframes = $xPath->query('//*[' . static::xpathContains('@src', 'data:') . '] | //*[' . static::xpathContains('@src', 'javascript:') . '] | //*[@srcdoc]'); static::removeNodes($badIframes); // Remove 'on*' attributes @@ -60,6 +60,17 @@ class HtmlContentFilter return $html; } + /** + * Create a xpath contains statement with a translation automatically built within + * to affectively search in a cases-insensitive manner. + */ + protected static function xpathContains(string $property, string $value): string + { + $value = strtolower($value); + $upperVal = strtoupper($value); + return 'contains(translate(' . $property . ', \'' . $upperVal . '\', \'' . $value . '\'), \'' . $value . '\')'; + } + /** * Removed all of the given DOMNodes. */ diff --git a/tests/Entity/PageContentTest.php b/tests/Entity/PageContentTest.php index 5aee97887..193f81400 100644 --- a/tests/Entity/PageContentTest.php +++ b/tests/Entity/PageContentTest.php @@ -135,14 +135,26 @@ class PageContentTest extends TestCase } } - public function test_iframe_js_and_base64_urls_are_removed() + public function test_js_and_base64_src_urls_are_removed() { $checks = [ '', + '', + '', '', '', + '', '', + '', + '', + '', + '', + '', + '', + '', '', + '', + '', ]; $this->asEditor(); @@ -155,6 +167,7 @@ class PageContentTest extends TestCase $pageView = $this->get($page->getUrl()); $pageView->assertStatus(200); $pageView->assertElementNotContains('.page-content', ''); $pageView->assertElementNotContains('.page-content', 'src='); $pageView->assertElementNotContains('.page-content', 'javascript:'); @@ -168,6 +181,8 @@ class PageContentTest extends TestCase $checks = [ ''); + $pageView->assertElementNotContains('.page-content', 'assertElementNotContains('.page-content', 'href=javascript:'); } } @@ -188,8 +203,10 @@ class PageContentTest extends TestCase { $checks = [ '
', + '
', '
', '
', + '
', ]; $this->asEditor(); @@ -213,6 +230,8 @@ class PageContentTest extends TestCase { $checks = [ '', + '', + '', ]; $this->asEditor(); @@ -249,11 +268,13 @@ class PageContentTest extends TestCase { $checks = [ '

Hello

', + '

Hello

', '
Lorem ipsum dolor sit amet.

Hello

', '
Lorem ipsum dolor sit amet.

Hello

', '
Lorem ipsum dolor sit amet.

Hello

', '
Lorem ipsum dolor sit amet.

Hello

', '
xss link\', ]; $this->asEditor();