libxml:从 xmlTextReaderPtr 构建 DOM 树,处理命名空间

发布于 2024-12-27 01:54:49 字数 5226 浏览 0 评论 0 原文

我正在尝试从 DOM 树 /xmlsoft.org/html/libxml-xmlreader.html" rel="nofollow">xmlTextReaderPtr。在我的最终程序中,它将用于使用大型 XML 文件中的 xslt 处理小型 DOM 树,例如

#include <stdio.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xmlreader.h>


/* dump XML node */
static void print_element_names(xmlNode * a_node)
    {
        xmlNode *cur_node = NULL;

        for (cur_node = a_node; cur_node; cur_node = xmlNextElementSibling(cur_node))
        {
        if (cur_node->type == XML_ELEMENT_NODE) 
            {
            xmlAttrPtr attr;
                printf("node type: Element, name: %s", cur_node->name);
            if(cur_node->ns!=0 && cur_node->ns->href!=0) printf(" with namespace: %s", cur_node->ns->href);
            printf("\n");
            for(attr = cur_node->properties; NULL != attr; attr = attr->next)
                {
                xmlChar* v=xmlGetProp( cur_node,attr->name);
                printf(" @%s=%s ", attr->name,v);
                xmlFree(v);
                }

            }

        print_element_names(cur_node->children);
        }
    }


int main(int argc,char** argv)
    {
    LIBXML_TEST_VERSION;
    xmlTextReaderPtr reader;
    xmlDocPtr doc = NULL;
    xmlNodePtr current=NULL;
    xmlNsPtr ns=NULL;
    /* read from stdin */
    reader=xmlReaderForFd(fileno(stdin),0,"UTF-8",0);


    for(;;)
        {
        int nodeType;
        int ret = xmlTextReaderRead(reader);
        if(ret<=0) break;
        nodeType=xmlTextReaderNodeType(reader);

        switch(nodeType)
            {
            case XML_READER_TYPE_ELEMENT:
                {
                xmlNsPtr ns=0;
                xmlNodePtr node;
                if(doc==NULL)
                    {
                    doc=xmlNewDoc( BAD_CAST "1.0");
                    }
                if(xmlTextReaderConstNamespaceUri(reader)!=0)
                    {
                    /** how should I handle the attributes' namespaces & prefix here ? */
                    xmlNsPtr ns=xmlSearchNs(doc,current,xmlTextReaderConstNamespaceUri(reader));
                    node=xmlNewNode(ns, xmlTextReaderConstName(reader));
                    if(ns==0)
                        {
                        ns=xmlNewNs(node,
                            xmlTextReaderConstPrefix(reader),
                            xmlTextReaderConstNamespaceUri(reader)
                            );
                        }
                    }
                else
                    {
                    node=xmlNewNode(0, xmlTextReaderConstName(reader));
                    }

                if(current==NULL)
                    {
                    xmlDocSetRootElement(doc,node);
                    }
                else
                    {

                    xmlAddChild(current,node);
                    }

                current=node;

                if(xmlTextReaderIsEmptyElement(reader))
                     {
                     current= current->parent;
                     }


                if(xmlTextReaderHasAttributes(reader))
                     {
                     int i;
                     int n_att=xmlTextReaderAttributeCount(reader);
                     for(i=0;i< n_att;++i)
                     {
                     const xmlChar* k;
                     xmlChar* v;
                     xmlTextReaderMoveToAttributeNo(reader,i);
                     k = xmlTextReaderConstName(reader);
                     v = xmlTextReaderValue(reader);
                    /** how should I handle the attributes' namespaces & prefix here ? */
                     xmlNewProp(node,k, v);
                     xmlFree(v);
                     }
                     xmlTextReaderMoveToElement(reader);
                     }

                break;
                }
             case XML_READER_TYPE_END_ELEMENT:
                 {
                 current= current->parent;
                 break;
                 }
             case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
             case XML_READER_TYPE_TEXT:
                 {
                 const xmlChar* v = xmlTextReaderConstValue(reader);
                 xmlNodePtr node= xmlNewDocText(doc,v);
                 xmlAddChild(current,node);
                 break;
                 }
            default:
                {
                fprintf(stderr,"Ignoring node Type %d\n",nodeType);
                break;
                }
            }
        }
    if(doc!=NULL)
        {
        print_element_names(xmlDocGetRootElement(doc));
        xmlDocDump(stderr,doc);
        xmlFreeDoc(doc);
        }
    xmlFreeTextReader(reader);
    xmlCleanupParser();
    xmlMemoryDump();
    return 0;
    }

这是我的测试文件:

<?xml version="1.0"?>
<a xmlns="http://urn1.org" xmlns:ns1="http://urn2.org" ns1:test="ok">azdazd
    <b xmlns:ns2="http://urn3.org" xmlns:ns3="http://urn4.org" ns3:test="OK"/>
    azd
    <ns1:b test="ok"/>
    xaz
</a>

非常感谢,

I'm trying to build a DOM tree from a xmlTextReaderPtr. In my final program, it will be used to process a small DOM tree with xslt from a large XML file like I did in java. I cannot find the proper way to work with the namespaces/prefixes (how should I create the elements and the attributes ? ). Here is a very basic C code: I'm building the DOM from the stream and I dump the DOM. How should I modify my code to handle the namespaces/prefixes ? Many thanks !

#include <stdio.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xmlreader.h>


/* dump XML node */
static void print_element_names(xmlNode * a_node)
    {
        xmlNode *cur_node = NULL;

        for (cur_node = a_node; cur_node; cur_node = xmlNextElementSibling(cur_node))
        {
        if (cur_node->type == XML_ELEMENT_NODE) 
            {
            xmlAttrPtr attr;
                printf("node type: Element, name: %s", cur_node->name);
            if(cur_node->ns!=0 && cur_node->ns->href!=0) printf(" with namespace: %s", cur_node->ns->href);
            printf("\n");
            for(attr = cur_node->properties; NULL != attr; attr = attr->next)
                {
                xmlChar* v=xmlGetProp( cur_node,attr->name);
                printf(" @%s=%s ", attr->name,v);
                xmlFree(v);
                }

            }

        print_element_names(cur_node->children);
        }
    }


int main(int argc,char** argv)
    {
    LIBXML_TEST_VERSION;
    xmlTextReaderPtr reader;
    xmlDocPtr doc = NULL;
    xmlNodePtr current=NULL;
    xmlNsPtr ns=NULL;
    /* read from stdin */
    reader=xmlReaderForFd(fileno(stdin),0,"UTF-8",0);


    for(;;)
        {
        int nodeType;
        int ret = xmlTextReaderRead(reader);
        if(ret<=0) break;
        nodeType=xmlTextReaderNodeType(reader);

        switch(nodeType)
            {
            case XML_READER_TYPE_ELEMENT:
                {
                xmlNsPtr ns=0;
                xmlNodePtr node;
                if(doc==NULL)
                    {
                    doc=xmlNewDoc( BAD_CAST "1.0");
                    }
                if(xmlTextReaderConstNamespaceUri(reader)!=0)
                    {
                    /** how should I handle the attributes' namespaces & prefix here ? */
                    xmlNsPtr ns=xmlSearchNs(doc,current,xmlTextReaderConstNamespaceUri(reader));
                    node=xmlNewNode(ns, xmlTextReaderConstName(reader));
                    if(ns==0)
                        {
                        ns=xmlNewNs(node,
                            xmlTextReaderConstPrefix(reader),
                            xmlTextReaderConstNamespaceUri(reader)
                            );
                        }
                    }
                else
                    {
                    node=xmlNewNode(0, xmlTextReaderConstName(reader));
                    }

                if(current==NULL)
                    {
                    xmlDocSetRootElement(doc,node);
                    }
                else
                    {

                    xmlAddChild(current,node);
                    }

                current=node;

                if(xmlTextReaderIsEmptyElement(reader))
                     {
                     current= current->parent;
                     }


                if(xmlTextReaderHasAttributes(reader))
                     {
                     int i;
                     int n_att=xmlTextReaderAttributeCount(reader);
                     for(i=0;i< n_att;++i)
                     {
                     const xmlChar* k;
                     xmlChar* v;
                     xmlTextReaderMoveToAttributeNo(reader,i);
                     k = xmlTextReaderConstName(reader);
                     v = xmlTextReaderValue(reader);
                    /** how should I handle the attributes' namespaces & prefix here ? */
                     xmlNewProp(node,k, v);
                     xmlFree(v);
                     }
                     xmlTextReaderMoveToElement(reader);
                     }

                break;
                }
             case XML_READER_TYPE_END_ELEMENT:
                 {
                 current= current->parent;
                 break;
                 }
             case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
             case XML_READER_TYPE_TEXT:
                 {
                 const xmlChar* v = xmlTextReaderConstValue(reader);
                 xmlNodePtr node= xmlNewDocText(doc,v);
                 xmlAddChild(current,node);
                 break;
                 }
            default:
                {
                fprintf(stderr,"Ignoring node Type %d\n",nodeType);
                break;
                }
            }
        }
    if(doc!=NULL)
        {
        print_element_names(xmlDocGetRootElement(doc));
        xmlDocDump(stderr,doc);
        xmlFreeDoc(doc);
        }
    xmlFreeTextReader(reader);
    xmlCleanupParser();
    xmlMemoryDump();
    return 0;
    }

and here is my test file:

<?xml version="1.0"?>
<a xmlns="http://urn1.org" xmlns:ns1="http://urn2.org" ns1:test="ok">azdazd
    <b xmlns:ns2="http://urn3.org" xmlns:ns3="http://urn4.org" ns3:test="OK"/>
    azd
    <ns1:b test="ok"/>
    xaz
</a>

Many thanks,

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

小红帽 2025-01-03 01:54:49

好的,我已经找到了我的错误以及如何使用 API。这是代码:

#include <stdio.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xmlreader.h>

#define WHERE fprintf(stderr,"[DEBUG]line:%d\n",__LINE__)

/* dump XML node */
static void print_element_names(xmlNode * a_node)
    {
        xmlNode *cur_node = NULL;

        for (cur_node = a_node; cur_node; cur_node = xmlNextElementSibling(cur_node))
        {
        if (cur_node->type == XML_ELEMENT_NODE) 
            {
            xmlAttrPtr attr;
            printf("node type: Element, name: %s", cur_node->name);
            xmlNsPtr ns=cur_node->nsDef;
            while(ns!=0)
                {
                printf(" with namespace: %s %p\n", ns->href,ns->prefix);
                ns=ns->next;
                }

            printf("\n");
            for(attr = cur_node->properties; NULL != attr; attr = attr->next)
                {
                xmlChar* v=xmlGetProp( cur_node,attr->name);

        if(attr->ns!=0)
            {
            printf(" with namespace: %s %p\n", attr->ns->href,attr->ns->prefix);

            }
                printf(" @%s=%s ", attr->name,v);
                xmlFree(v);
                }
        printf("\n");
            }

        print_element_names(cur_node->children);
        }
    }


int main(int argc,char** argv)
    {
    LIBXML_TEST_VERSION;
    xmlTextReaderPtr reader;
    xmlDocPtr doc = NULL;
    xmlNodePtr current=NULL;
    /* read from stdin */
    reader=xmlReaderForFd(fileno(stdin),0,"UTF-8",0);


    for(;;)
        {
        int nodeType;
        int ret = xmlTextReaderRead(reader);
        if(ret<=0) break;
        nodeType=xmlTextReaderNodeType(reader);

        switch(nodeType)
            {
            case XML_READER_TYPE_ELEMENT:
                {
                xmlNsPtr ns=0;
                xmlNodePtr node;
                if(doc==NULL)
                    {
                    doc=xmlNewDoc( BAD_CAST "1.0");
                    }
                if(xmlTextReaderConstNamespaceUri(reader)!=0)
                    {
                    xmlNsPtr ns=xmlSearchNs(doc,current,xmlTextReaderConstNamespaceUri(reader));
                    node=xmlNewNode(ns, xmlTextReaderConstName(reader));
                    if(ns==0)
                        {
                        WHERE;
                        ns=xmlNewNs(node,
                            xmlTextReaderConstNamespaceUri(reader),
                            xmlTextReaderConstPrefix(reader)
                            );
                        }
                    }
                else
                    {
                    node=xmlNewNode(0, xmlTextReaderConstName(reader));
                    }

                if(current==NULL)
                    {
                    xmlDocSetRootElement(doc,node);
                    }
                else
                    {

                    xmlAddChild(current,node);
                    }

                current=node;




                if(xmlTextReaderHasAttributes(reader))
                     {
                     int i;
                     int n_att=xmlTextReaderAttributeCount(reader);
                     for(i=0;i< n_att;++i)
                     {
                     const xmlChar* k;
                     xmlChar* v;
                     xmlTextReaderMoveToAttributeNo(reader,i);
                     k = xmlTextReaderConstName(reader);
                     v = xmlTextReaderValue(reader);
                     if(xmlTextReaderConstNamespaceUri(reader)!=0)
                        {
                        if(!xmlStrEqual(xmlTextReaderConstNamespaceUri(reader),BAD_CAST "http://www.w3.org/2000/xmlns/"))
                        {
                    xmlNsPtr ns=xmlSearchNs(doc,current,xmlTextReaderConstNamespaceUri(reader));
                        if(ns==0)
                        {
                        ns=xmlNewNs(node,
                            xmlTextReaderConstNamespaceUri(reader),
                            xmlTextReaderConstPrefix(reader)
                            );
                        }
                    xmlNewNsProp(current,ns,
                        xmlTextReaderConstLocalName(reader)
                        ,
                        v);
                    }
                        }
                     else
                        {
                        xmlNewProp(current,k, v);
                        }


                     xmlFree(v);
                     }
                     xmlTextReaderMoveToElement(reader);
                     }
        if(xmlTextReaderIsEmptyElement(reader))
                     {
                     current= current->parent;
                     }
                break;
                }
             case XML_READER_TYPE_END_ELEMENT:
                 {
                 current= current->parent;
                 break;
                 }
             case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
             case XML_READER_TYPE_TEXT:
                 {
                 const xmlChar* v = xmlTextReaderConstValue(reader);
                 xmlNodePtr node= xmlNewDocText(doc,v);
                 xmlAddChild(current,node);
                 break;
                 }
            default:
                {
                fprintf(stderr,"Ignoring node Type %d\n",nodeType);
                break;
                }
            }
        }
    if(doc!=NULL)
        {
        print_element_names(xmlDocGetRootElement(doc));
        xmlDocDump(stderr,doc);
        xmlFreeDoc(doc);
        }
    xmlFreeTextReader(reader);
    xmlCleanupParser();
    xmlMemoryDump();
    return 0;
    }

Ok, I've found my erros and how to use the API. Here is the code:

#include <stdio.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xmlreader.h>

#define WHERE fprintf(stderr,"[DEBUG]line:%d\n",__LINE__)

/* dump XML node */
static void print_element_names(xmlNode * a_node)
    {
        xmlNode *cur_node = NULL;

        for (cur_node = a_node; cur_node; cur_node = xmlNextElementSibling(cur_node))
        {
        if (cur_node->type == XML_ELEMENT_NODE) 
            {
            xmlAttrPtr attr;
            printf("node type: Element, name: %s", cur_node->name);
            xmlNsPtr ns=cur_node->nsDef;
            while(ns!=0)
                {
                printf(" with namespace: %s %p\n", ns->href,ns->prefix);
                ns=ns->next;
                }

            printf("\n");
            for(attr = cur_node->properties; NULL != attr; attr = attr->next)
                {
                xmlChar* v=xmlGetProp( cur_node,attr->name);

        if(attr->ns!=0)
            {
            printf(" with namespace: %s %p\n", attr->ns->href,attr->ns->prefix);

            }
                printf(" @%s=%s ", attr->name,v);
                xmlFree(v);
                }
        printf("\n");
            }

        print_element_names(cur_node->children);
        }
    }


int main(int argc,char** argv)
    {
    LIBXML_TEST_VERSION;
    xmlTextReaderPtr reader;
    xmlDocPtr doc = NULL;
    xmlNodePtr current=NULL;
    /* read from stdin */
    reader=xmlReaderForFd(fileno(stdin),0,"UTF-8",0);


    for(;;)
        {
        int nodeType;
        int ret = xmlTextReaderRead(reader);
        if(ret<=0) break;
        nodeType=xmlTextReaderNodeType(reader);

        switch(nodeType)
            {
            case XML_READER_TYPE_ELEMENT:
                {
                xmlNsPtr ns=0;
                xmlNodePtr node;
                if(doc==NULL)
                    {
                    doc=xmlNewDoc( BAD_CAST "1.0");
                    }
                if(xmlTextReaderConstNamespaceUri(reader)!=0)
                    {
                    xmlNsPtr ns=xmlSearchNs(doc,current,xmlTextReaderConstNamespaceUri(reader));
                    node=xmlNewNode(ns, xmlTextReaderConstName(reader));
                    if(ns==0)
                        {
                        WHERE;
                        ns=xmlNewNs(node,
                            xmlTextReaderConstNamespaceUri(reader),
                            xmlTextReaderConstPrefix(reader)
                            );
                        }
                    }
                else
                    {
                    node=xmlNewNode(0, xmlTextReaderConstName(reader));
                    }

                if(current==NULL)
                    {
                    xmlDocSetRootElement(doc,node);
                    }
                else
                    {

                    xmlAddChild(current,node);
                    }

                current=node;




                if(xmlTextReaderHasAttributes(reader))
                     {
                     int i;
                     int n_att=xmlTextReaderAttributeCount(reader);
                     for(i=0;i< n_att;++i)
                     {
                     const xmlChar* k;
                     xmlChar* v;
                     xmlTextReaderMoveToAttributeNo(reader,i);
                     k = xmlTextReaderConstName(reader);
                     v = xmlTextReaderValue(reader);
                     if(xmlTextReaderConstNamespaceUri(reader)!=0)
                        {
                        if(!xmlStrEqual(xmlTextReaderConstNamespaceUri(reader),BAD_CAST "http://www.w3.org/2000/xmlns/"))
                        {
                    xmlNsPtr ns=xmlSearchNs(doc,current,xmlTextReaderConstNamespaceUri(reader));
                        if(ns==0)
                        {
                        ns=xmlNewNs(node,
                            xmlTextReaderConstNamespaceUri(reader),
                            xmlTextReaderConstPrefix(reader)
                            );
                        }
                    xmlNewNsProp(current,ns,
                        xmlTextReaderConstLocalName(reader)
                        ,
                        v);
                    }
                        }
                     else
                        {
                        xmlNewProp(current,k, v);
                        }


                     xmlFree(v);
                     }
                     xmlTextReaderMoveToElement(reader);
                     }
        if(xmlTextReaderIsEmptyElement(reader))
                     {
                     current= current->parent;
                     }
                break;
                }
             case XML_READER_TYPE_END_ELEMENT:
                 {
                 current= current->parent;
                 break;
                 }
             case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
             case XML_READER_TYPE_TEXT:
                 {
                 const xmlChar* v = xmlTextReaderConstValue(reader);
                 xmlNodePtr node= xmlNewDocText(doc,v);
                 xmlAddChild(current,node);
                 break;
                 }
            default:
                {
                fprintf(stderr,"Ignoring node Type %d\n",nodeType);
                break;
                }
            }
        }
    if(doc!=NULL)
        {
        print_element_names(xmlDocGetRootElement(doc));
        xmlDocDump(stderr,doc);
        xmlFreeDoc(doc);
        }
    xmlFreeTextReader(reader);
    xmlCleanupParser();
    xmlMemoryDump();
    return 0;
    }
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文