diff --git a/gb.xml/src/element.cpp b/gb.xml/src/element.cpp index 746cb7448..2ff588bf1 100644 --- a/gb.xml/src/element.cpp +++ b/gb.xml/src/element.cpp @@ -280,7 +280,7 @@ vector* Element::fromText(wstring data, wstring::size_type i, uint c, uin for(INC; i < data.length(); INC) //On cherche le tagName { - if(!isLetter(s) && s != L":" && s != L"-"&& s != L"_") break; + if(!isNameChar(s)) break; tag += s; } @@ -357,9 +357,9 @@ vector* Element::fromText(wstring data, wstring::size_type i, uint c, uin wstring attr, sVal = L""; - if(isLetter(s)) + if(isNameStartChar(s)) { - while(i < data.length() && (isLetter(s) || s == L"-" || s == L":" || s == L"_")) + while(i < data.length() && isNameChar(s)) { attr += s; INC; diff --git a/gb.xml/src/main.cpp b/gb.xml/src/main.cpp index f1323dc26..d040c6808 100644 --- a/gb.xml/src/main.cpp +++ b/gb.xml/src/main.cpp @@ -9,18 +9,91 @@ #include "CExplorer.h" #endif -std::string WStringToString(const std::wstring& s) -{ -std::string temp(s.length(), ' '); -std::copy(s.begin(), s.end(), temp.begin()); -return temp; + +void utf8toWStr(wstring& dest, const string& src){ + dest.clear(); + wchar_t w = 0; + int bytes = 0; + wchar_t err = L'�'; + for (size_t i = 0; i < src.size(); i++){ + unsigned char c = (unsigned char)src[i]; + if (c <= 0x7f){//first byte + if (bytes){ + dest.push_back(err); + bytes = 0; + } + dest.push_back((wchar_t)c); + } + else if (c <= 0xbf){//second/third/etc byte + if (bytes){ + w = ((w << 6)|(c & 0x3f)); + bytes--; + if (bytes == 0) + dest.push_back(w); + } + else + dest.push_back(err); + } + else if (c <= 0xdf){//2byte sequence start + bytes = 1; + w = c & 0x1f; + } + else if (c <= 0xef){//3byte sequence start + bytes = 2; + w = c & 0x0f; + } + else if (c <= 0xf7){//3byte sequence start + bytes = 3; + w = c & 0x07; + } + else{ + dest.push_back(err); + bytes = 0; + } + } + if (bytes) + dest.push_back(err); } -std::wstring StringToWString(const std::string& s) +void wstrToUtf8(string& dest, const wstring& src){ + dest.clear(); + for (size_t i = 0; i < src.size(); i++){ + wchar_t w = src[i]; + if (w <= 0x7f) + dest.push_back((char)w); + else if (w <= 0x7ff){ + dest.push_back(0xc0 | ((w >> 6)& 0x1f)); + dest.push_back(0x80| (w & 0x3f)); + } + else if (w <= 0xffff){ + dest.push_back(0xe0 | ((w >> 12)& 0x0f)); + dest.push_back(0x80| ((w >> 6) & 0x3f)); + dest.push_back(0x80| (w & 0x3f)); + } + else if (w <= 0x10ffff){ + dest.push_back(0xf0 | ((w >> 18)& 0x07)); + dest.push_back(0x80| ((w >> 12) & 0x3f)); + dest.push_back(0x80| ((w >> 6) & 0x3f)); + dest.push_back(0x80| (w & 0x3f)); + } + else + dest.push_back('?'); + } +} + + +std::string WStringToString(const std::wstring& str) { -std::wstring temp(s.length(),L' '); -std::copy(s.begin(), s.end(), temp.begin()); -return temp; + string result; + wstrToUtf8(result, str); + return result; +} + +std::wstring StringToWString(const std::string& str) +{ + wstring result; + utf8toWStr(result, str); + return result; } wstring Html$(wstring text) @@ -69,6 +142,44 @@ ostream &operator<<( ostream &out, std::wstring *str ) return out; } +/* http://www.w3.org/TR/REC-xml/#NT-NameStartChar + + NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | + [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | + [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] + */ + +#define INTER(min, max) (car >= min && car <= max) +#define INTERCAR(min, max) (car >= *(min) && car <= *(max)) +#define CAR(c) (car == *(c)) + +bool isNameStartChar(wstring &s) +{ + const wchar_t car = (s.at(0)); + + return CAR(":") || INTERCAR("A", "Z") || CAR("_") || INTERCAR("a", "z") || CAR("Ø") || + INTER(0xC0, 0xD6) || INTER(0xD8, 0xF6) || INTER(0xF8, 0x2FF) || + INTER(0x370, 0x37D) || INTER(0x37F, 0x1FFF) || INTER(0x200C, 0x200D) || + INTER(0x2070, 0x218F) || INTER(0xC200, 0x2FEF) || INTER(0x3001, 0xD7FF) || + INTER(0xF900, 0xFDCF) || INTER(0xFDF0, 0xFFFD) || INTER(0x10000, 0xEFFFF); + + +} + +/* http://www.w3.org/TR/REC-xml/#NT-NameChar + + NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] + + */ + +bool isNameChar(wstring &s) +{ + const wchar_t car = (s.at(0)); + + return isNameStartChar(s) || CAR("-") || CAR(".") || INTERCAR("0", "9") || + (car == 0xB7) || INTER(0x0300, 0x036F) || INTER(0x203F, 0x2040); +} + #ifndef __HMAIN_CPP GB_INTERFACE GB EXPORT; diff --git a/gb.xml/src/main.h b/gb.xml/src/main.h index d523936c5..1c56a582e 100644 --- a/gb.xml/src/main.h +++ b/gb.xml/src/main.h @@ -64,6 +64,9 @@ extern "C" GB_INTERFACE GB; #define VARGOBJ(type, arg) reinterpret_cast(VARG(arg)) +bool isNameStartChar(wstring &s); +bool isNameChar(wstring &s); + #endif // MAIN_H