[osg-users] osgDB XmlParser and localization

Trajce Nikolov NICK trajce.nikolov.nick at gmail.com
Mon Jul 9 03:40:54 PDT 2018


Hi Robert,

I am working with some localized XML files (with chars out of 0-255 range)
and at present the XML Node parsing is not suitable to manage it. And I
think the support for is important - at my case it is OpenStreetMap with
street names containing these chars.

Attached is the modified source (based on the master) that fixes ity, if
you have time and will to review. I can do PR if you want

Thanks and cheers!

Nick

p.s. welcome back

-- 
trajce nikolov nick
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openscenegraph.org/pipermail/osg-users-openscenegraph.org/attachments/20180709/a5fb28aa/attachment.html>
-------------- next part --------------
/* -*-c++-*- OpenSceneGraph - Copyright (C) 1998-2009 Robert Osfield
 *
 * This library is open source and may be redistributed and/or modified under
 * the terms of the OpenSceneGraph Public License (OSGPL) version 0.0 or
 * (at your option) any later version.  The full license is in LICENSE file
 * included with this distribution, and on the openscenegraph.org website.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * OpenSceneGraph Public License for more details.
*/

#include <osgDB/XmlParser>
#include <osgDB/FileUtils>

#include <osg/Notify>

using namespace osgDB;

XmlNode* osgDB::readXmlFile(const std::string& filename,const Options* options)
{
    std::string foundFile = osgDB::findDataFile(filename, options);
    if (!foundFile.empty())
    {
        XmlNode::Input input;
        input.open(foundFile);
        input.readAllDataIntoBuffer();

        if (!input)
        {
            OSG_NOTICE<<"Could not open XML file: "<<filename<<std::endl;
            return 0;
        }

        osg::ref_ptr<XmlNode> root = new XmlNode;
        root->read(input);

        return root.release();
    }
    else
    {
        OSG_NOTICE<<"Could not find XML file: "<<filename<<std::endl;
        return 0;
    }
}

std::string osgDB::trimEnclosingSpaces(const std::string& str)
{
    if (str.empty()) return str;

    const std::string whitespaces(" \t\f\v\n\r");

    std::string::size_type start = str.find_first_not_of(whitespaces);
    if (start==std::string::npos) return std::string();

    std::string::size_type end = str.find_last_not_of(whitespaces);
    if (end==std::string::npos) return std::string();

    return std::string(str, start, (end-start)+1);
}


XmlNode* osgDB::readXmlStream(std::istream& fin)
{
    XmlNode::Input input;
    input.attach(fin);
    input.readAllDataIntoBuffer();

    if (!input)
    {
        OSG_NOTICE<<"Could not attach to XML stream."<<std::endl;
        return 0;
    }

    osg::ref_ptr<XmlNode> root = new XmlNode;
    root->read(input);

    return root.release();
}

XmlNode::ControlMap::ControlMap()
{
    setUpControlMappings();
}

void XmlNode::ControlMap::addControlToCharacter(const std::string& control, int c)
{
    _controlToCharacterMap[control] = c;
    _characterToControlMap[c] = control;
}

void XmlNode::ControlMap::setUpControlMappings()
{
    addControlToCharacter("&",'&');
    addControlToCharacter("<",'<');
    addControlToCharacter(">",'>');
    addControlToCharacter(""",'"');
    addControlToCharacter("'",'\'');
    addControlToCharacter("&nl;",'\n');
}

XmlNode::Input::Input():
    _currentPos(0)
{
}

XmlNode::Input::Input(const Input&):
    ControlMap(),
    _currentPos(0)
{
}

XmlNode::Input::~Input()
{
}
void XmlNode::Input::open(const std::string& filename)
{
    _fin.open(filename.c_str());
}

void XmlNode::Input::attach(std::istream& fin)
{
    std::ios &fios = _fin;
    fios.rdbuf(fin.rdbuf());
}

void XmlNode::Input::readAllDataIntoBuffer()
{
    while(_fin)
    {
        int c = _fin.get();
		//if (c>=0 && c<=255)
        {
            _buffer.push_back(c);
        }
    }
}

void XmlNode::Input::skipWhiteSpace()
{
    while(_currentPos<_buffer.size() && (_buffer[_currentPos]==' ' || _buffer[_currentPos]=='\t' || _buffer[_currentPos]=='\n' || _buffer[_currentPos]=='\r'))
    {
        //OSG_NOTICE<<"_currentPos="<<_currentPos<<"_buffer.size()="<<_buffer.size()<<" v="<<int(_buffer[_currentPos])<<std::endl;
        ++_currentPos;
    }
    //OSG_NOTICE<<"done"<<std::endl;
}

XmlNode::XmlNode()
{
    type = UNASSIGNED;
}

bool XmlNode::read(Input& input)
{
    if (type == UNASSIGNED) type = ROOT;

    while(input)
    {
        //input.skipWhiteSpace();
        if (input.match("<!--"))
        {
            XmlNode* commentNode = new XmlNode;
            commentNode->type = XmlNode::COMMENT;
            children.push_back(commentNode);

            input += 4;
            XmlNode::Input::size_type end = input.find("-->");
            commentNode->contents = input.substr(0, end);
            if (end!=std::string::npos)
            {
                OSG_INFO<<"Valid Comment record ["<<commentNode->contents<<"]"<<std::endl;
                input += (end+3);
            }
            else
            {
                OSG_NOTICE<<"Error: Unclosed Comment record ["<<commentNode->contents<<"]"<<std::endl;
                input += end;
            }
        }
        else if (input.match("</"))
        {
            input += 2;
            XmlNode::Input::size_type end = input.find(">");
            std::string comment = input.substr(0, end);
            if (end!=std::string::npos)
            {
                OSG_INFO<<"Valid end tag ["<<comment<<"]"<<std::endl;
                input += (end+1);
            }
            else
            {
                OSG_NOTICE<<"Error: Unclosed end tag ["<<comment<<"]"<<std::endl;
                input += end;
            }

            if (comment==name) { OSG_INFO<<"end tag is matched correctly"<<std::endl; }
            else { OSG_NOTICE<<"Error: end tag is not matched correctly"<<std::endl; }

            return true;
        }
        else if (input.match("<!DOCTYPE"))
        {
            XmlNode* commentNode = new XmlNode;
            commentNode->type = XmlNode::INFORMATION;
            children.push_back(commentNode);

            ++input;
            XmlNode::Input::size_type end = input.find(">");
            commentNode->contents = input.substr(0, end);
            if (end!=std::string::npos)
            {
                OSG_INFO<<"Valid information record ["<<commentNode->contents<<"]"<<std::endl;
                input += (end+2);
            }
            else
            {
                OSG_NOTICE<<"Error: Unclosed information record ["<<commentNode->contents<<"]"<<std::endl;
                input += end;
            }
        }
        else if (input.match("<![CDATA["))
        {
            XmlNode* commentNode = new XmlNode;
            commentNode->type = XmlNode::INFORMATION;
            children.push_back(commentNode);

            input += 9;
            XmlNode::Input::size_type end = input.find("]]>");
            commentNode->contents = input.substr(0, end);
            if (end!=std::string::npos)
            {
                OSG_INFO<<"Valid information record ["<<commentNode->contents<<"]"<<std::endl;
                input += (end+2);
            }
            else
            {
                OSG_NOTICE<<"Error: Unclosed information record ["<<commentNode->contents<<"]"<<std::endl;
                input += end;
            }
        }
        else if (input.match("<?"))
        {
            XmlNode* commentNode = new XmlNode;
            commentNode->type = XmlNode::INFORMATION;
            children.push_back(commentNode);

            input += 2;
            XmlNode::Input::size_type end = input.find("?>");
            commentNode->contents = input.substr(0, end);
            if (end!=std::string::npos)
            {
                OSG_INFO<<"Valid information record ["<<commentNode->contents<<"]"<<std::endl;
                input += (end+2);
            }
            else
            {
                OSG_NOTICE<<"Error: Unclosed information record ["<<commentNode->contents<<"]"<<std::endl;
                input += end;
            }
        }
        else if (input.match("<"))
        {
            XmlNode* childNode = new XmlNode;
            childNode->type = XmlNode::NODE;
            children.push_back(childNode);

            input += 1;

            input.skipWhiteSpace();

            int c = 0;
			c = input[0];
            while (c!=' ' && c!='\n' && c!='\r' && c!='>' && c!='/')
            {
                childNode->name.push_back(c);
                ++input;

				c = input[0];
            }

			c = input[0];
            while (c!='>' && c!='/')
            {
                Input::size_type prev_pos = input.currentPosition();

                input.skipWhiteSpace();
                std::string option;
                std::string value;

                if (input[0]=='"')
                {
                    option.push_back(input[0]);
                    ++input;
					c = input[0];
                    while(c!='"')
                    {
                        if (c=='&')
                            readAndReplaceControl(option, input);
                        else
                        {
                            option.push_back(c);
                            ++input;
                        }
						c = input[0];
                    }
                    option.push_back(input[0]);
                    ++input;
					c = input[0];
                }
                else
                {
					c = input[0];
                    while(c!='>' && c!='/' && c!='"' && c!='\'' && c!='=' && c!=' ' && c!='\n' && c!='\r')
                    {
                        option.push_back(c);
                        ++input;
						c = input[0];
                    }
                }

                input.skipWhiteSpace();
                if (input[0]=='=')
                {
                    ++input;

                    input.skipWhiteSpace();

                    if (input[0]=='"')
                    {
                        ++input;
						c = input[0];
                        while(c!='"')
                        {
                            if (c=='&')
                                readAndReplaceControl(value, input);
                            else
                            {
                                value.push_back(c);
                                ++input;
                            }
							c = input[0];
                        }
                        ++input;
						c = input[0];
                    }
                    else if (input[0]=='\'')
                    {
                        ++input;
						c = input[0];
                        while(c!='\'')
                        {
                            if (c=='&')
                                readAndReplaceControl(value, input);
                            else
                            {
                                value.push_back(c);
                                ++input;
                            }
							c = input[0];
                        }
                        ++input;
						c = input[0];
                    }
                    else
                    {
                        ++input;
						c = input[0];
                        while(c!=' ' && c!='\n' && c!='\r' && c!='"' && c!='\'' && c!='>')
                        {
                            value.push_back(c);
                            ++input;
							c = input[0];
                        }
                    }
                }

                if (prev_pos == input.currentPosition())
                {
                    OSG_NOTICE<<"Error, parser iterator not advanced, position: "<<input.substr(0,50)<<std::endl;
                    ++input;
                }

                if (!option.empty())
                {
                    OSG_INFO<<"Assigning option "<<option<<" with value "<<value<<std::endl;
                    childNode->properties[option] = value;
                }
				c = input[0];
            }

			c = input[0];
            if ((c=='>' || c=='/'))
            {
                ++input;

                OSG_INFO<<"Valid tag ["<<childNode->name<<"]"<<std::endl;

                if (c=='/')
                {
                    if ((c=input[0])>=0 && c=='>')
                    {
                        ++input;
                        OSG_INFO<<"tag is closed correctly"<<std::endl;
                        childNode->type = ATOM;
                    }
                    else
                        OSG_NOTICE<<"Error: tag is not closed correctly"<<std::endl;
                }
                else
                {
                    bool result = childNode->read(input);
                    if (!result) return false;
                }

                if (type==NODE && !children.empty()) type = GROUP;
            }
            else
            {
                OSG_NOTICE<<"Unclosed tag ["<<childNode->name<<"]"<<std::endl;
                return false;
            }

        }
        else
        {
            int c = input[0];

            if (c=='&')
            {
                readAndReplaceControl(contents, input);
            }
            else
            {
                contents.push_back( c );
                ++input;
            }

        }
    }

    if (type==NODE && !children.empty()) type = GROUP;
    return false;
}

bool XmlNode::write(std::ostream& fout, const std::string& indent) const
{
    ControlMap controlMap;
    return write(controlMap, fout, indent);
}

bool XmlNode::write(const ControlMap& controlMap, std::ostream& fout, const std::string& indent) const
{
    switch(type)
    {
        case(UNASSIGNED):
            OSG_NOTICE<<"UNASSIGNED"<<std::endl;
            return false;
        case(ATOM):
        {
            fout<<indent<<"<"<<name;
            writeProperties(controlMap, fout);
            fout<<" />"<<std::endl;
            return true;
        }
        case(ROOT):
        {
            writeChildren(controlMap, fout, indent);
            return true;
        }
        case(NODE):
            fout<<indent<<"<"<<name;
            writeProperties(controlMap,fout);
            fout<<">"; writeString(controlMap, fout, contents); fout<<"</"<<name<<">"<<std::endl;
            return true;
        case(GROUP):
        {
            fout<<indent<<"<"<<name;
            writeProperties(controlMap,fout);
            fout<<">"<<std::endl;

            writeChildren(controlMap, fout, indent + "  ");

            fout<<indent<<"</"<<name<<">"<<std::endl;
            return true;
        }
        case(COMMENT):
        {
            fout<<indent<<"<!--"<<contents<<"-->"<<std::endl;
            return true;
        }
        case(INFORMATION):
        {
            fout<<indent<<"<?"<<contents<<"?>"<<std::endl;
            return true;
        }
    }
    return false;
}

bool XmlNode::writeString(const ControlMap& controlMap, std::ostream& fout, const std::string& str) const
{
    for(std::string::const_iterator itr = str.begin();
        itr != str.end();
        ++itr)
    {
        int c = *itr;
        ControlMap::CharacterToControlMap::const_iterator citr = controlMap._characterToControlMap.find(c);
        if (citr != controlMap._characterToControlMap.end()) fout << citr->second;
        else fout.put(c);
    }
    return true;
}

bool XmlNode::writeChildren(const ControlMap& /*controlMap*/, std::ostream& fout, const std::string& indent) const
{
    for(Children::const_iterator citr = children.begin();
        citr != children.end();
        ++citr)
    {
        if (!(*citr)->write(fout, indent))
            return false;
    }

    return true;
}

bool XmlNode::writeProperties(const ControlMap& controlMap, std::ostream& fout) const
{
    for(Properties::const_iterator oitr = properties.begin();
        oitr != properties.end();
        ++oitr)
    {
        fout<<" "<<oitr->first<<"=\"";
        if (!writeString(controlMap,fout,oitr->second))
            return false;
        fout<<"\"";
    }

    return true;
}

bool XmlNode::readAndReplaceControl(std::string& in_contents, XmlNode::Input& input) const
{
    int c = 0;
    std::string value;
    while(input && (c=input.get())!=';') { value.push_back(c); }
    value.push_back(c);

    if (input._controlToCharacterMap.count(value)!=0)
    {
        c = input._controlToCharacterMap[value];
        OSG_INFO<<"Read control character "<<value<<" converted to "<<char(c)<<std::endl;
        in_contents.push_back(c);
        return true;
    }
    else
    {
        OSG_NOTICE<<"Warning: read control character "<<value<<", but have no mapping to convert it to."<<std::endl;
        return false;
    }
}


More information about the osg-users mailing list