python解析xml模块封装代码

程序员文章站 2023-11-23 17:24:40

有如下的xml文件：复制代码代码如下: ...

有如下的xml文件：

<?xml version="1.0" encoding="utf-8" ?>  
<root>  
<childs>  
<child name='first' >1</child>  
<child value="2">2</child>  
</childs>  
</root>

下面介绍python解析xml文件的几种方法，使用python模块实现。

方式1，python模块实现自动遍历所有节点：

复制代码代码如下:

#!/usr/bin/env python  
# -*- coding: utf-8 -*-  
from xml.sax.handler import contenthandler  
from xml.sax import parse
class testhandle(contenthandler):  
    def __init__(self, inlist):  
        self.inlist = inlist  

    def startelement(self,name,attrs):  
        print 'name:',name, 'attrs:',attrs.keys()  

    def endelement(self,name):  
        print 'endname',name  

    def characters(self,chars):  
        print 'chars',chars  
        self.inlist.append(chars)  

              
if __name__ == '__main__':  
    lt = []  
    parse('test.xml', testhandle(lt))  
    print lt 

结果：
[html] view plaincopy
name: root attrs: []
chars

name: childs attrs: []
chars

name: child attrs: [u'name']
chars 1
endname child
chars

name: child attrs: [u'value']
chars 2
endname child
chars

endname childs
chars

endname root
[u'\n', u'\n', u'1', u'\n', u'2', u'\n', u'\n']

方式2，python模块实现获取根节点，按需查找指定节点：

复制代码代码如下:

#!/usr/bin/env python    
# -*- coding: utf-8 -*-    
from xml.dom import minidom    
xmlstr = '''''<?xml version="1.0" encoding="utf-8"?> 
<hash> 
    <request name='first'>/2/photos/square/type.xml</request> 
    <error_code>21301</error_code> 
    <error>auth faild!</error> 
</hash> 
'''  
def doxml(xmlstr):  
    dom = minidom.parsestring(xmlstr)      
    print 'dom:'      
    print dom.toxml()    

    root = dom.firstchild      
    print 'root:'      
    print root.toxml()    

    childs = root.childnodes    
    for child in childs:  
        print child.toxml()  
        if child.nodetype == child.text_node:  
            pass  
        else:  
            print 'child node attribute name:', child.getattribute('name')  
            print 'child node name:', child.nodename  
            print 'child node len:',len(child.childnodes)  
            print 'child data:',child.childnodes[0].data  
            print '======================================='  
            print 'more help info to see:'  
            for med in dir(child):  
                print help(med)      

                
if __name__ == '__main__':    
    doxml(xmlstr) 

结果：
[html] view plaincopy
dom:
<?xml version="1.0" ?><hash>
    <request name="first">/2/photos/square/type.xml</request>
    <error_code>21301</error_code>
    <error>auth faild!</error>
</hash>
root:
<hash>
    <request name="first">/2/photos/square/type.xml</request>
    <error_code>21301</error_code>
    <error>auth faild!</error>
</hash>

<request name="first">/2/photos/square/type.xml</request>
child node attribute name: first
child node name: request
child node len: 1
child data: /2/photos/square/type.xml
=======================================
more help info to see:
两种方法各有其优点，python的xml处理模块太多，目前只用到这2个。

=====补充分割线================
实际工作中发现python的mimidom无法解析其它编码的xml，只能解析utf-8的编码，而其xml文件的头部申明也必须是utf-8，为其它编码会报错误。
网上的解决办法都是替换xml文件头部的编码申明，然后转换编码为utf-8再用minidom解码，实际测试为可行，不过有点累赘的感觉。

本节是 python解析xml模块封装代码的第二部分。
====写xml内容的分割线=========

复制代码代码如下:

#!\urs\bin\env python  
#encoding: utf-8  
from xml.dom import minidom  

class xmlwrite:  
    def __init__(self, resultfile):  
        self.resultfile = resultfile  
        self.rootname = 'api'  
        self.__create_xml_dom()  

    def __create_xml_dom(self):  
        xmlimpl = minidom.getdomimplementation()  
        self.dom = xmlimpl.createdocument(none, self.rootname, none)  
        self.root = self.dom.documentelement  

    def __get_spec_node(self, xpath):  
        patharr = xpath.split(r'/')  
        parentnode = self.root  
        exist = 1  
        for nodename in patharr:  
            if nodename.strip() == '':  
                continue  
            if not exist:  
                return none  
            spcindex = nodename.find('[')  
            if spcindex > -1:  
                index = int(nodename[spcindex+1:-1])  
            else:  
                index = 0  
            count = 0  
            childs = parentnode.childnodes  
            for child in childs:  
                if child.nodename == nodename[:spcindex]:  
                    if count == index:  
                        parentnode = child  
                        exist = 1  
                        break  
                    count += 1  
                    continue  
                else:  
                    exist = 0  
        return parentnode  

          
    def write_node(self, parent, nodename, value, attribute=none, cdata=false):  
        node = self.dom.createelement(nodename)  
        if value:  
            if cdata:  
                nodedata = self.dom.createcdatasection(value)  
            else:  
                nodedata = self.dom.createtextnode(value)  
            node.appendchild(nodedata)  
            if attribute and isinstance(attribute, dict):  
                for key, value in attribute.items():  
                    node.setattribute(key, value)     
        try:  
            parentnode = self.__get_spec_node(parent)  
        except:  
            print 'get parent node fail, use the root as parent node'  
            parentnode = self.root  
        parentnode.appendchild(node)  

      
    def write_start_time(self, time):  
        self.write_node('/','starttime', time)  

    def write_end_time(self, time):  
        self.write_node('/','endtime', time)      

    def write_pass_count(self, count):  
        self.write_node('/','passcount', count)     

    def write_fail_count(self, count):  
        self.write_node('/','failcount', count)     

    def write_case(self):  
        self.write_node('/','case', none)     

    def write_case_no(self, index, value):  
        self.write_node('/case[%s]/' % index,'no', value)  

    def write_case_url(self, index, value):  
        self.write_node('/case[%s]/' % index,'url', value)  

    def write_case_dbdata(self, index, value):  
        self.write_node('/case[%s]/' % index,'dbdata', value)  

    def write_case_apidata(self, index, value):  
        self.write_node('/case[%s]/' % index,'apidata', value)  

    def write_case_dbsql(self, index, value):  
        self.write_node('/case[%s]/' % index,'dbsql', value, cdata=true)  

    def write_case_apixpath(self, index, value):  
        self.write_node('/case[%s]/' % index,'apixpath', value)         

    def save_xml(self):  
        myfile = file(self.resultfile, 'w')  
        self.dom.writexml(myfile, encoding='utf-8')  
        myfile.close()  

if __name__ == '__main__':  
      xr = xmlwrite(r'd:\test.xml')  
      xr.write_start_time('2223')  
      xr.write_end_time('444')        
      xr.write_pass_count('22')  
      xr.write_fail_count('33')    
      xr.write_case()  
      xr.write_case()  
      xr.write_case_no(0, '0')  
      xr.write_case_url(0, 'http://www.google.com')     
      xr.write_case_url(0, 'http://www.google.com')     
      xr.write_case_dbsql(0, 'select * from ')  
      xr.write_case_dbdata(0, 'dbtata')  
      xr.write_case_apixpath(0, '/xpath')  
      xr.write_case_apidata(0, 'apidata')  
      xr.write_case_no(1, '1')         
      xr.write_case_url(1, 'http://www.baidu.com')     
      xr.write_case_url(1, 'http://www.baidu.com')     
      xr.write_case_dbsql(1, 'select 1 from ')  
      xr.write_case_dbdata(1, 'dbtata1')  
      xr.write_case_apixpath(1, '/xpath1')  
      xr.write_case_apidata(1, 'apidata1')  
      xr.save_xml() 

以上封装了minidom，支持通过xpath来写节点，不支持xpath带属性的匹配，但支持带索引的匹配。
比如：/root/child[1], 表示root的第2个child节点。

上一篇： Laravel框架自定义验证过程实例分析

下一篇： python合并文本文件示例

python解析xml模块封装代码

简单介绍使用Python解析并修改XML文档的方法

Python中functools模块的常用函数解析

Python解析m3u8拼接下载mp4视频文件的示例代码

深入分析在Python模块顶层运行的代码引起的一个Bug

Android使用Pull解析器解析xml文件的实现代码

python实现的解析crontab配置文件代码

Python openpyxl模块原理及用法解析

Python通过DOM和SAX方式解析XML的应用实例分享

Python csv模块使用方法代码实例

详解在Python程序中解析并修改XML内容的方法