From 2ac99a0f2adce23ddc4e1284dbd2f318aa1fb9e1 Mon Sep 17 00:00:00 2001 From: Adrien Prokopowicz Date: Sun, 13 May 2018 15:49:00 +0200 Subject: [PATCH] [GB.XML] * BUG: Fix reading XML comments and CDATA tags when they contain characters that belong to the end tag while not forming a complete one. * BUG: Fix returning value of XML comments. [GB.XML.HTML] * BUG: Fix reading lowercase HTML doctypes. --- gb.xml/src/CReader.cpp | 6 ++--- gb.xml/src/document.cpp | 1 + gb.xml/src/reader.cpp | 57 +++++++++++++-------------------------- gb.xml/src/serializer.cpp | 2 ++ gb.xml/src/textnode.cpp | 8 ++++++ gb.xml/src/textnode.h | 1 + 6 files changed, 33 insertions(+), 42 deletions(-) diff --git a/gb.xml/src/CReader.cpp b/gb.xml/src/CReader.cpp index e83f92505..222b83f3e 100644 --- a/gb.xml/src/CReader.cpp +++ b/gb.xml/src/CReader.cpp @@ -115,6 +115,8 @@ else if(attr == 0) {GB.StopEnum(); THIS->curAttrEnum = 0; (THIS->depth)--; return;} +THIS->curAttrEnum = attr; + if((attr->attrValue && attr->lenAttrValue)) { GB.ReturnNewString(attr->attrValue, attr->lenAttrValue); @@ -124,10 +126,6 @@ else GB.ReturnNewZeroString(0); } - -THIS->curAttrEnum = attr; - - END_METHOD BEGIN_METHOD(CReaderNodeAttr_get, GB_STRING name) diff --git a/gb.xml/src/document.cpp b/gb.xml/src/document.cpp index 3d8f028f9..53789a24f 100644 --- a/gb.xml/src/document.cpp +++ b/gb.xml/src/document.cpp @@ -126,6 +126,7 @@ void XMLDocument_SetContent(Document *doc, const char *content, const size_t len { //On cherche le début du prologue XML posStart = (char*)memchrs(content, len, "content; - size_t &lenTextContent = node->lenContent; - textContent = (char*)realloc(textContent, lenTextContent + num); - for (unsigned int i = 0; i < num; ++i) - textContent[lenTextContent + i] = car; - lenTextContent += num; -} - int Reader::ReadChar(char car) { #define APPEND(elmt) if(curElmt == 0){}\ else {XMLNode_appendChild(curElmt, elmt);} - ++(this->pos); if(waitClosingElmt) @@ -147,26 +136,6 @@ int Reader::ReadChar(char car) // this->state = READ_END_CUR_ELEMENT; return 0; } - - /* [T. Boege, 02 Apr 2017]: Reset specialTagLevel, which tries to recognise a sequence - * of characters, when this sequence is interrupted, so that e.g. - * does *not* finish the CDATA tag at ]x]>. We want a literal ]]>. */ - if (inCDATA) - { - if (specialTagLevel > CDATA_TAG_STARTCHAR_8 && car != ']' && car != '>') - { - addchars((TextNode *) curNode, ']', specialTagLevel - CDATA_TAG_STARTCHAR_8); - specialTagLevel = CDATA_TAG_STARTCHAR_8; - } - } - if (inComment) - { - if (specialTagLevel > COMMENT_TAG_STARTCHAR_3 && car != '-' && car != '-') - { - addchars((TextNode *) curNode, '-', specialTagLevel - COMMENT_TAG_STARTCHAR_3); - specialTagLevel = COMMENT_TAG_STARTCHAR_3; - } - } if(car == '<' && !inComment && !inCDATA)//Début de tag { @@ -504,13 +473,25 @@ int Reader::ReadChar(char car) else if(XML_isTextNode(curNode)) { - char *&textContent = ((TextNode*)curNode)->content; - size_t &lenTextContent = ((TextNode*)curNode)->lenContent; - textContent = (char*)realloc(textContent, lenTextContent + 1); - textContent[lenTextContent] = car; - ++lenTextContent; - if(curNode->type == Node::Comment) specialTagLevel = COMMENT_TAG_STARTCHAR_3; //En cas de "-" non significatifs - else if(inXMLProlog) specialTagLevel = 0;//En cas de "?" non significatifs + if(curNode->type == Node::Comment) { //In case of extra "-" + switch (specialTagLevel) { + case COMMENT_TAG_ENDCHAR_1: XMLTextNode_appendTextContent((TextNode*)curNode, "-", 1); break; + case COMMENT_TAG_ENDCHAR_2: XMLTextNode_appendTextContent((TextNode*)curNode, "--", 2); break; + default: break; + } + specialTagLevel = COMMENT_TAG_STARTCHAR_3; + } + else if(curNode->type == Node::CDATA) {//In case of exra CDATA chars + switch (specialTagLevel) { + case CDATA_TAG_ENDCHAR_1: XMLTextNode_appendTextContent((TextNode*)curNode, "]", 1); break; + case CDATA_TAG_ENDCHAR_2: XMLTextNode_appendTextContent((TextNode*)curNode, "]]", 2); break; + default: break; + } + specialTagLevel = CDATA_TAG_STARTCHAR_8; + } + else if(inXMLProlog) specialTagLevel = 0;//In case of extra "?" + + XMLTextNode_appendTextContent((TextNode*)curNode, &car, 1); } } diff --git a/gb.xml/src/serializer.cpp b/gb.xml/src/serializer.cpp index b08844898..4916f5f3e 100644 --- a/gb.xml/src/serializer.cpp +++ b/gb.xml/src/serializer.cpp @@ -310,6 +310,7 @@ void addTextContentLen(Node *node, size_t &len) } break; case Node::NodeText: + case Node::Comment: case Node::CDATA: XMLTextNode_checkContent((TextNode*)node); len += ((TextNode*)node)->lenContent; @@ -335,6 +336,7 @@ void addTextContent(Node *node, char *&data) } break; case Node::NodeText: + case Node::Comment: case Node::CDATA: memcpy(data, ((TextNode*)node)->content, ((TextNode*)node)->lenContent); data += ((TextNode*)node)->lenContent; diff --git a/gb.xml/src/textnode.cpp b/gb.xml/src/textnode.cpp index d31b12da1..5d5fa344b 100644 --- a/gb.xml/src/textnode.cpp +++ b/gb.xml/src/textnode.cpp @@ -280,6 +280,14 @@ void XMLTextNode_setTextContent(TextNode *node, const char *ncontent, const size node->content[node->lenContent] = 0; } +void XMLTextNode_appendTextContent(TextNode *node, const char *content, const size_t len) +{ + size_t newLen = node->lenContent + len; + node->content = (char*)realloc(node->content, newLen); + memcpy(node->content + node->lenContent, content, len); + node->lenContent = newLen; +} + /*************************************** Comment ***************************************/ CommentNode* XMLComment_New() diff --git a/gb.xml/src/textnode.h b/gb.xml/src/textnode.h index e80e32e17..cc7c33dc5 100644 --- a/gb.xml/src/textnode.h +++ b/gb.xml/src/textnode.h @@ -32,6 +32,7 @@ TextNode* XMLTextNode_New(); TextNode* XMLTextNode_New(const char *ncontent, const size_t nlen); void XMLTextNode_Free(TextNode *node); +void XMLTextNode_appendTextContent(TextNode *node, const char *content, const size_t len); void XMLTextNode_checkEscapedContent(TextNode *node); void XMLTextNode_checkContent(TextNode *node); void XMLTextNode_setEscapedTextContent(TextNode *node, const char *ncontent, const size_t nlen);