* BUG: Fix reading XML comments and CDATA tags when they contain
characters that belong to the end tag while not forming a complete one.
* BUG: Fix returning value of XML comments.

[GB.XML.HTML]
* BUG: Fix reading lowercase HTML doctypes.
This commit is contained in:
Adrien Prokopowicz 2018-05-13 15:49:00 +02:00
parent dce6496cd9
commit 2ac99a0f2a
No known key found for this signature in database
GPG key ID: 752A4E1D70CABBE3
6 changed files with 33 additions and 42 deletions

View file

@ -115,6 +115,8 @@ else
if(attr == 0) {GB.StopEnum(); THIS->curAttrEnum = 0; (THIS->depth)--; return;}
THIS->curAttrEnum = attr;
if((attr->attrValue && attr->lenAttrValue))
{
GB.ReturnNewString(attr->attrValue, attr->lenAttrValue);
@ -124,10 +126,6 @@ else
GB.ReturnNewZeroString(0);
}
THIS->curAttrEnum = attr;
END_METHOD
BEGIN_METHOD(CReaderNodeAttr_get, GB_STRING name)

View file

@ -126,6 +126,7 @@ void XMLDocument_SetContent(Document *doc, const char *content, const size_t len
{
//On cherche le début du prologue XML
posStart = (char*)memchrs(content, len, "<!DOCTYPE ", 10);
if(!posStart) posStart = (char*)memchrs(content, len, "<!doctype ", 10);
//On cherche la fin du prologue XML
if(posStart)

View file

@ -121,22 +121,11 @@ void Reader::DestroyReader()
ClearReader();
}
static void addchars(TextNode *node, char car, size_t num)
{
char *&textContent = node->content;
size_t &lenTextContent = node->lenContent;
textContent = (char*)realloc(textContent, lenTextContent + num);
for (unsigned int i = 0; i < num; ++i)
textContent[lenTextContent + i] = car;
lenTextContent += num;
}
int Reader::ReadChar(char car)
{
#define APPEND(elmt) if(curElmt == 0){}\
else {XMLNode_appendChild(curElmt, elmt);}
++(this->pos);
if(waitClosingElmt)
@ -147,26 +136,6 @@ int Reader::ReadChar(char car)
// this->state = READ_END_CUR_ELEMENT;
return 0;
}
/* [T. Boege, 02 Apr 2017]: Reset specialTagLevel, which tries to recognise a sequence
* of characters, when this sequence is interrupted, so that e.g. <![CDATA[ab]x]>
* does *not* finish the CDATA tag at ]x]>. We want a literal ]]>. */
if (inCDATA)
{
if (specialTagLevel > CDATA_TAG_STARTCHAR_8 && car != ']' && car != '>')
{
addchars((TextNode *) curNode, ']', specialTagLevel - CDATA_TAG_STARTCHAR_8);
specialTagLevel = CDATA_TAG_STARTCHAR_8;
}
}
if (inComment)
{
if (specialTagLevel > COMMENT_TAG_STARTCHAR_3 && car != '-' && car != '-')
{
addchars((TextNode *) curNode, '-', specialTagLevel - COMMENT_TAG_STARTCHAR_3);
specialTagLevel = COMMENT_TAG_STARTCHAR_3;
}
}
if(car == '<' && !inComment && !inCDATA)//Début de tag
{
@ -504,13 +473,25 @@ int Reader::ReadChar(char car)
else if(XML_isTextNode(curNode))
{
char *&textContent = ((TextNode*)curNode)->content;
size_t &lenTextContent = ((TextNode*)curNode)->lenContent;
textContent = (char*)realloc(textContent, lenTextContent + 1);
textContent[lenTextContent] = car;
++lenTextContent;
if(curNode->type == Node::Comment) specialTagLevel = COMMENT_TAG_STARTCHAR_3; //En cas de "-" non significatifs
else if(inXMLProlog) specialTagLevel = 0;//En cas de "?" non significatifs
if(curNode->type == Node::Comment) { //In case of extra "-"
switch (specialTagLevel) {
case COMMENT_TAG_ENDCHAR_1: XMLTextNode_appendTextContent((TextNode*)curNode, "-", 1); break;
case COMMENT_TAG_ENDCHAR_2: XMLTextNode_appendTextContent((TextNode*)curNode, "--", 2); break;
default: break;
}
specialTagLevel = COMMENT_TAG_STARTCHAR_3;
}
else if(curNode->type == Node::CDATA) {//In case of exra CDATA chars
switch (specialTagLevel) {
case CDATA_TAG_ENDCHAR_1: XMLTextNode_appendTextContent((TextNode*)curNode, "]", 1); break;
case CDATA_TAG_ENDCHAR_2: XMLTextNode_appendTextContent((TextNode*)curNode, "]]", 2); break;
default: break;
}
specialTagLevel = CDATA_TAG_STARTCHAR_8;
}
else if(inXMLProlog) specialTagLevel = 0;//In case of extra "?"
XMLTextNode_appendTextContent((TextNode*)curNode, &car, 1);
}
}

View file

@ -310,6 +310,7 @@ void addTextContentLen(Node *node, size_t &len)
}
break;
case Node::NodeText:
case Node::Comment:
case Node::CDATA:
XMLTextNode_checkContent((TextNode*)node);
len += ((TextNode*)node)->lenContent;
@ -335,6 +336,7 @@ void addTextContent(Node *node, char *&data)
}
break;
case Node::NodeText:
case Node::Comment:
case Node::CDATA:
memcpy(data, ((TextNode*)node)->content, ((TextNode*)node)->lenContent);
data += ((TextNode*)node)->lenContent;

View file

@ -280,6 +280,14 @@ void XMLTextNode_setTextContent(TextNode *node, const char *ncontent, const size
node->content[node->lenContent] = 0;
}
void XMLTextNode_appendTextContent(TextNode *node, const char *content, const size_t len)
{
size_t newLen = node->lenContent + len;
node->content = (char*)realloc(node->content, newLen);
memcpy(node->content + node->lenContent, content, len);
node->lenContent = newLen;
}
/*************************************** Comment ***************************************/
CommentNode* XMLComment_New()

View file

@ -32,6 +32,7 @@ TextNode* XMLTextNode_New();
TextNode* XMLTextNode_New(const char *ncontent, const size_t nlen);
void XMLTextNode_Free(TextNode *node);
void XMLTextNode_appendTextContent(TextNode *node, const char *content, const size_t len);
void XMLTextNode_checkEscapedContent(TextNode *node);
void XMLTextNode_checkContent(TextNode *node);
void XMLTextNode_setEscapedTextContent(TextNode *node, const char *ncontent, const size_t nlen);