518 lines
16 KiB
C++
518 lines
16 KiB
C++
/***************************************************************************
|
|
|
|
(c) 2012 Adrien Prokopowicz <prokopy@users.sourceforge.net>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2, or (at your option)
|
|
any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA.
|
|
|
|
***************************************************************************/
|
|
|
|
#include "reader.h"
|
|
|
|
#include "node.h"
|
|
#include "utils.h"
|
|
#include "element.h"
|
|
#include "document.h"
|
|
#include "textnode.h"
|
|
|
|
#include <memory.h>
|
|
#include <stdlib.h>
|
|
|
|
#define DELETE(_ob) if(_ob) {delete _ob; _ob = 0;}
|
|
#define FREE(_ob) if(_ob) {free(_ob); _ob = 0;}
|
|
#define UNREF(_ob) if(_ob) GB.Unref(POINTER(&(_ob)))
|
|
#define DESTROYPARENT(_ob) if(_ob) {XMLNode_DestroyParent(_ob); _ob = 0;}
|
|
|
|
void Reader::ClearReader()
|
|
{
|
|
//UNREF(foundNode);
|
|
//UNREF(curNode);
|
|
this->keepMemory = false;
|
|
this->pos = 0;
|
|
this->depth = -1;
|
|
this->inTag = false;
|
|
this->inTagName = false;
|
|
this->inAttr = false;
|
|
this->inAttrName = false;
|
|
this->inAttrVal = false;
|
|
this->inNewTag = false;
|
|
this->specialTagLevel = 0;
|
|
this->inEndTag = false;
|
|
this->inXMLProlog = false;
|
|
this->inCommentTag = false;
|
|
this->inCDATATag = false;
|
|
this->inComment = false;
|
|
this->inCDATA = false;
|
|
this->waitClosingElmt = false;
|
|
this->specialTagLevel = 0;
|
|
this->state = 0;
|
|
|
|
if(curNode != foundNode)
|
|
{
|
|
DESTROYPARENT(curNode);
|
|
}
|
|
else
|
|
{
|
|
curNode = 0;
|
|
}
|
|
DESTROYPARENT(foundNode);
|
|
curElmt = 0;
|
|
storedDocument = 0;
|
|
FREE(attrName);
|
|
lenAttrName = 0;
|
|
FREE(attrVal);
|
|
lenAttrVal = 0;
|
|
FREE(content);
|
|
lenContent = 0;
|
|
|
|
if(storedElements)
|
|
{
|
|
|
|
/*for(vector<Node*>::iterator it = storedElements->begin(); it != storedElements->end(); ++it)
|
|
{
|
|
GB.Unref(POINTER(&(*it)));
|
|
}
|
|
this->storedElements->clear();*/
|
|
}
|
|
|
|
curAttrEnum = 0;
|
|
|
|
}
|
|
|
|
void Reader::InitReader()
|
|
{
|
|
attrName = 0;
|
|
attrVal = 0;
|
|
content = 0;
|
|
storedDocument = 0;
|
|
storedElements = 0;
|
|
curNode = 0;
|
|
foundNode = 0;
|
|
|
|
ClearReader();
|
|
|
|
this->flags[NODE_ELEMENT] = true;
|
|
this->flags[NODE_TEXT] = true;
|
|
this->flags[NODE_COMMENT] = true;
|
|
this->flags[NODE_CDATA] = true;
|
|
this->flags[NODE_ATTRIBUTE] = false;
|
|
this->flags[READ_ATTRIBUTE] = false;
|
|
|
|
this->flags[READ_END_CUR_ELEMENT] = true;
|
|
this->flags[READ_ERR_EOF] = true;
|
|
FREE(storedElements);
|
|
|
|
}
|
|
|
|
void Reader::DestroyReader()
|
|
{
|
|
ClearReader();
|
|
}
|
|
|
|
static void addchars(TextNode *node, char car, size_t num)
|
|
{
|
|
char *&textContent = node->content;
|
|
size_t &lenTextContent = node->lenContent;
|
|
textContent = (char*)realloc(textContent, lenTextContent + num);
|
|
for (unsigned int i = 0; i < num; ++i)
|
|
textContent[lenTextContent + i] = car;
|
|
lenTextContent += num;
|
|
}
|
|
|
|
int Reader::ReadChar(char car)
|
|
{
|
|
#define APPEND(elmt) if(curElmt == 0){}\
|
|
else {XMLNode_appendChild(curElmt, elmt);}
|
|
|
|
|
|
++(this->pos);
|
|
|
|
if(waitClosingElmt)
|
|
{
|
|
if(car != '>') return 0;
|
|
waitClosingElmt = false;
|
|
depth--;
|
|
// this->state = READ_END_CUR_ELEMENT;
|
|
return 0;
|
|
}
|
|
|
|
/* [T. Boege, 02 Apr 2017]: Reset specialTagLevel, which tries to recognise a sequence
|
|
* of characters, when this sequence is interrupted, so that e.g. <![CDATA[ab]x]>
|
|
* does *not* finish the CDATA tag at ]x]>. We want a literal ]]>. */
|
|
if (inCDATA)
|
|
{
|
|
if (specialTagLevel > CDATA_TAG_STARTCHAR_8 && car != ']' && car != '>')
|
|
{
|
|
addchars((TextNode *) curNode, ']', specialTagLevel - CDATA_TAG_STARTCHAR_8);
|
|
specialTagLevel = CDATA_TAG_STARTCHAR_8;
|
|
}
|
|
}
|
|
if (inComment)
|
|
{
|
|
if (specialTagLevel > COMMENT_TAG_STARTCHAR_3 && car != '-' && car != '-')
|
|
{
|
|
addchars((TextNode *) curNode, '-', specialTagLevel - COMMENT_TAG_STARTCHAR_3);
|
|
specialTagLevel = COMMENT_TAG_STARTCHAR_3;
|
|
}
|
|
}
|
|
|
|
if(car == '<' && !inComment && !inCDATA)//Début de tag
|
|
{
|
|
if(inTag)//Si on est déjà dans un tag
|
|
{
|
|
throw XMLParseException_New("Invalid tag Name", pos);
|
|
}
|
|
inNewTag = true;
|
|
inTagName = true;
|
|
if(curNode && curNode->type == Node::NodeText) //Si il y avait du texte avant
|
|
{
|
|
DESTROYPARENT(foundNode);
|
|
foundNode = curNode;
|
|
if(keepMemory)
|
|
{
|
|
APPEND(foundNode);
|
|
}
|
|
//const char *trimmedText = curNode->toTextNode()->content;
|
|
//size_t lenTrimmedText = curNode->toTextNode()->lenContent;
|
|
|
|
//Trim(trimmedText, lenTrimmedText);
|
|
|
|
XMLTextNode_TrimContent((TextNode*)curNode);
|
|
|
|
curNode = 0;
|
|
this->state = NODE_TEXT;
|
|
return NODE_TEXT;
|
|
}
|
|
}
|
|
else if(car == '>' && inTag && !inEndTag && !inComment && !inCDATA)//Fin de tag (de nouvel élément)
|
|
{
|
|
DESTROYPARENT(foundNode);
|
|
//UNREF(foundNode);
|
|
foundNode = curNode;//On a trouvé un élément complet
|
|
//curNode = 0;
|
|
//GB.Ref(foundNode);
|
|
inTag = false;
|
|
depth++;
|
|
if(keepMemory)
|
|
{
|
|
APPEND(foundNode);
|
|
curElmt = ((Element*)foundNode);
|
|
}
|
|
if(attrName && attrVal)
|
|
{
|
|
XMLElement_AddAttribute(((Element*)curNode), attrName, lenAttrName,
|
|
attrVal, lenAttrVal);
|
|
FREE(attrName); lenAttrName = 0; inAttrName = false; inAttr = false;
|
|
FREE(attrVal); lenAttrVal = 0; inAttrVal = false;
|
|
}
|
|
else if(attrName)
|
|
{
|
|
XMLElement_AddAttribute(((Element*)curNode), attrName, lenAttrName, "", 0);
|
|
FREE(attrName); lenAttrName = 0; inAttrName = false; inAttr = false;
|
|
}
|
|
this->state = NODE_ELEMENT;
|
|
return NODE_ELEMENT;
|
|
}
|
|
else if(isWhiteSpace(car) && inTag && inTagName && !inComment && !inCDATA)// Fin de tagName
|
|
{
|
|
inTagName = false;
|
|
XMLElement_RefreshPrefix((Element*)curNode);
|
|
}
|
|
else if(isNameStartChar(car) && inTag && !inTagName && !inEndTag && !inAttrVal && !inAttrName && !inComment && !inCDATA)//Début de nom d'attribut
|
|
{
|
|
if(attrName && attrVal)
|
|
{
|
|
XMLElement_AddAttribute(((Element*)curNode), attrName, lenAttrName,
|
|
attrVal, lenAttrVal);
|
|
FREE(attrName); lenAttrName = 0; inAttrName = false; inAttr = false;
|
|
FREE(attrVal); lenAttrVal = 0; inAttrVal = false;
|
|
}
|
|
else if(attrName)
|
|
{
|
|
XMLElement_AddAttribute(((Element*)curNode), attrName, lenAttrName, "", 0);
|
|
FREE(attrName); lenAttrName = 0; inAttrName = false; inAttr = false;
|
|
}
|
|
inAttr = true;
|
|
inAttrName = true;
|
|
attrName = (char*)malloc(1);
|
|
*attrName = car;
|
|
lenAttrName = 1;
|
|
}
|
|
else if(car == '=' && inAttrName && !inComment && !inCDATA)//Fin du nom d'attribut
|
|
{
|
|
inAttrName = false;
|
|
}
|
|
else if((car == '\'' || car == '"') && inAttr && !inAttrVal && !inComment && !inCDATA)//Début de valeur d'attribut
|
|
{
|
|
inAttrVal = true;
|
|
attrVal = 0;
|
|
}
|
|
else if((car == '\'' || car == '"') && inAttr && inAttrVal && !inComment && !inCDATA)//Fin de valeur d'attribut
|
|
{
|
|
XMLElement_AddAttribute(((Element*)curNode), attrName, lenAttrName,
|
|
attrVal, lenAttrVal);
|
|
FREE(attrName); lenAttrName = 0;
|
|
FREE(attrVal); lenAttrVal = 0;
|
|
inAttr = false;
|
|
inAttrVal = false;
|
|
this->state = READ_ATTRIBUTE;
|
|
return READ_ATTRIBUTE;
|
|
}
|
|
else if(car == '/' && inTag && !inAttrVal && !inComment && !inCDATA)//Self-closed element
|
|
{
|
|
inTag = false;
|
|
inTagName = false;
|
|
inEndTag = false;
|
|
if(curElmt) curElmt = (Element*)(curElmt->parent);
|
|
FREE(content); lenContent = 0;
|
|
//depth--;
|
|
waitClosingElmt = true;
|
|
DESTROYPARENT(foundNode);
|
|
foundNode = curNode;
|
|
XMLElement_RefreshPrefix((Element*)curNode);
|
|
this->state = NODE_ELEMENT;
|
|
depth++;
|
|
return NODE_ELEMENT;
|
|
}
|
|
else if(car == '/' && inNewTag && !inComment && !inCDATA)//C'est un tag de fin
|
|
{
|
|
inEndTag = true;
|
|
inNewTag = false;
|
|
inTag = true;
|
|
}
|
|
else if(car == '>' && inEndTag && !inComment && !inCDATA)//La fin d'un tag de fin
|
|
{
|
|
inTag = false;
|
|
inEndTag = false;
|
|
if(curElmt && lenContent == curElmt->lenTagName)
|
|
{
|
|
if(memcmp(curElmt->tagName, content, lenContent))
|
|
curElmt = (Element*)(curElmt->parent);
|
|
}
|
|
FREE(content); lenContent = 0;
|
|
depth--;
|
|
this->state = READ_END_CUR_ELEMENT;
|
|
return READ_END_CUR_ELEMENT;
|
|
}
|
|
else if(inEndTag)//Tag de fin
|
|
{
|
|
if(!content)
|
|
{
|
|
content = (char*)malloc(1);
|
|
content[0] = car;
|
|
lenContent = 1;
|
|
}
|
|
else
|
|
{
|
|
content = (char*)realloc(content, lenContent + 1);
|
|
content[lenContent] = car;
|
|
++lenContent;
|
|
}
|
|
|
|
}
|
|
else if(inNewTag && car == '!' )//Premier caractère de commentaire
|
|
{
|
|
specialTagLevel = COMMENT_TAG_STARTCHAR_1;
|
|
inCommentTag = true;
|
|
inNewTag = false;
|
|
inTag = false;
|
|
}
|
|
//Caractère de début de CDATA
|
|
else if(inCommentTag && car == '[' && specialTagLevel == COMMENT_TAG_STARTCHAR_1)
|
|
{
|
|
specialTagLevel = CDATA_TAG_STARTCHAR_2;
|
|
inCommentTag = false;
|
|
inCDATATag = true;
|
|
}
|
|
//Caractère de CDATA
|
|
else if(inCDATATag && specialTagLevel >= CDATA_TAG_STARTCHAR_2 && specialTagLevel < CDATA_TAG_STARTCHAR_8
|
|
&& (car == '[' || car == 'C' || car == 'D' || car == 'A' || car == 'T'))
|
|
{
|
|
++specialTagLevel;
|
|
if(specialTagLevel == CDATA_TAG_STARTCHAR_8)
|
|
{
|
|
inCDATATag = false;
|
|
inCDATA = true;
|
|
curNode = XMLCDATA_New();
|
|
}
|
|
}
|
|
//Caractère "]" de fin de CDATA
|
|
else if(curNode && curNode->type == Node::CDATA && car == ']')
|
|
{
|
|
++specialTagLevel;
|
|
if(specialTagLevel > CDATA_TAG_ENDCHAR_2)//On est allés un peu trop loin, il y a des ] en trop
|
|
{
|
|
--specialTagLevel;
|
|
char *&textContent = ((TextNode*)curNode)->content;
|
|
size_t &lenTextContent = ((TextNode*)curNode)->lenContent;
|
|
textContent = (char*)realloc(textContent, lenTextContent + 1);
|
|
textContent[lenTextContent] = car;
|
|
++lenTextContent;
|
|
}
|
|
}
|
|
//Fin du CDATA
|
|
else if(curNode && curNode->type == Node::CDATA && car == '>' && specialTagLevel == CDATA_TAG_ENDCHAR_2)
|
|
{
|
|
specialTagLevel = 0;
|
|
inTag = false;
|
|
DESTROYPARENT(foundNode);
|
|
//UNREF(foundNode);
|
|
foundNode = curNode;
|
|
inCDATA = false;
|
|
if(keepMemory)
|
|
{
|
|
APPEND(foundNode);
|
|
}
|
|
curNode = 0;
|
|
this->state = NODE_CDATA;
|
|
return NODE_CDATA;
|
|
}
|
|
//Caractère "-" de début de commentaire
|
|
else if(inCommentTag && car == '-' && specialTagLevel >= COMMENT_TAG_STARTCHAR_1 && specialTagLevel < COMMENT_TAG_STARTCHAR_3 && !inComment && !inCDATA)
|
|
{
|
|
++specialTagLevel;
|
|
if (specialTagLevel == COMMENT_TAG_STARTCHAR_3)//Le tag <!-- est complet, on crée un nouveau node
|
|
{
|
|
inCommentTag = false;
|
|
inComment = true;
|
|
//DESTROYPARENT(curNode);
|
|
//UNREF(curNode);
|
|
curNode = XMLComment_New();
|
|
//GB.Ref(curNode);
|
|
}
|
|
}
|
|
//Caractère "-" de fin de commentaire
|
|
else if(curNode && curNode->type == Node::Comment && car == '-')
|
|
{
|
|
++specialTagLevel;
|
|
if(specialTagLevel > COMMENT_TAG_ENDCHAR_2)//On est allés un peu trop loin, il y a des - en trop
|
|
{
|
|
--specialTagLevel;
|
|
char *&textContent = ((TextNode*)curNode)->content;
|
|
size_t &lenTextContent = ((TextNode*)curNode)->lenContent;
|
|
textContent = (char*)realloc(textContent, lenTextContent + 1);
|
|
textContent[lenTextContent] = car;
|
|
++lenTextContent;
|
|
}
|
|
}
|
|
//Fin du commentaire
|
|
else if(curNode && curNode->type == Node::Comment && car == '>' && specialTagLevel == COMMENT_TAG_ENDCHAR_2)
|
|
{
|
|
specialTagLevel = 0;
|
|
inTag = false;
|
|
|
|
DESTROYPARENT(foundNode);
|
|
//UNREF(foundNode);
|
|
foundNode = curNode;
|
|
inComment = false;
|
|
if(keepMemory)
|
|
{
|
|
APPEND(foundNode);
|
|
}
|
|
curNode = 0;
|
|
this->state = NODE_COMMENT;
|
|
return NODE_COMMENT;
|
|
}
|
|
//Début de prologue XML
|
|
else if(car == '?' && inNewTag && !inComment && !inCDATA)
|
|
{
|
|
inXMLProlog = true;
|
|
inNewTag = false;
|
|
inTag = false;
|
|
}
|
|
else if(car == '?' && inXMLProlog && !inComment && !inCDATA)
|
|
{
|
|
specialTagLevel = PROLOG_TAG_ENDCHAR;
|
|
}
|
|
else if(car == '>' && inXMLProlog && specialTagLevel == PROLOG_TAG_ENDCHAR && !inComment && !inCDATA)
|
|
{
|
|
specialTagLevel = 0;
|
|
inXMLProlog = 0;
|
|
}
|
|
else//Texte
|
|
{
|
|
if(inXMLProlog) return 0;
|
|
if(inNewTag && !inEndTag)//On est dans un tag avec contenu -> on crée l'élément
|
|
{
|
|
Element* newNode = XMLElement_New(&car, 1);
|
|
inTag = true;
|
|
inNewTag = false;
|
|
//DESTROYPARENT(curNode);
|
|
//UNREF(curNode);
|
|
curNode = newNode;
|
|
//GB.Ref(curNode);
|
|
}
|
|
else if(!curNode || (!XML_isTextNode(curNode) && !inTag))//Pas de nœud courant -> nœud texte
|
|
{
|
|
if(isWhiteSpace(car)) return 0;
|
|
TextNode* newNode = XMLTextNode_New(&car, 1);
|
|
//DESTROYPARENT(curNode);
|
|
curNode = newNode;
|
|
//GB.Ref(curNode);
|
|
}
|
|
else if(curNode->type == Node::ElementNode && inTag && !inAttr)//Si on est dans le tag d'un élément
|
|
{
|
|
if(!isNameChar(car)) return 0;
|
|
char *&textContent = ((Element*)curNode)->tagName;
|
|
size_t &lenTextContent = ((Element*)curNode)->lenTagName;
|
|
textContent = (char*)realloc(textContent, lenTextContent + 1);
|
|
textContent[lenTextContent] = car;
|
|
++lenTextContent;
|
|
}
|
|
else if(inAttrName && inAttr)//Nom d'attribut
|
|
{
|
|
if(!attrName)
|
|
{
|
|
attrName = (char*)malloc(1);
|
|
*attrName = car;
|
|
lenAttrName = 1;
|
|
}
|
|
else
|
|
{
|
|
attrName = (char*)realloc(attrName, lenAttrName + 1);
|
|
attrName[lenAttrName] = car;
|
|
++lenAttrName;
|
|
}
|
|
}
|
|
else if(inAttrVal && inAttr)//Valeur d'attribut
|
|
{
|
|
if(!attrVal)
|
|
{
|
|
attrVal = (char*)malloc(1);
|
|
*attrVal = car;
|
|
lenAttrVal = 1;
|
|
}
|
|
else
|
|
{
|
|
attrVal = (char*)realloc(attrVal, lenAttrVal + 1);
|
|
attrVal[lenAttrVal] = car;
|
|
++lenAttrVal;
|
|
}
|
|
}
|
|
|
|
else if(XML_isTextNode(curNode))
|
|
{
|
|
char *&textContent = ((TextNode*)curNode)->content;
|
|
size_t &lenTextContent = ((TextNode*)curNode)->lenContent;
|
|
textContent = (char*)realloc(textContent, lenTextContent + 1);
|
|
textContent[lenTextContent] = car;
|
|
++lenTextContent;
|
|
if(curNode->type == Node::Comment) specialTagLevel = COMMENT_TAG_STARTCHAR_3; //En cas de "-" non significatifs
|
|
else if(inXMLProlog) specialTagLevel = 0;//En cas de "?" non significatifs
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|