python解析xml模塊封裝代碼
有如下的xml文件:
<?xml version="1.0" encoding="utf-8" ?>
<root>
<childs>
<child name='first' >1</child>
<child value="2">2</child>
</childs>
</root>
下面介紹python解析xml文件的幾種方法,使用python模塊實現(xiàn)。
方式1,python模塊實現(xiàn)自動遍歷所有節(jié)點:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from xml.sax.handler import ContentHandler
from xml.sax import parse
class TestHandle(ContentHandler):
def __init__(self, inlist):
self.inlist = inlist
def startElement(self,name,attrs):
print 'name:',name, 'attrs:',attrs.keys()
def endElement(self,name):
print 'endname',name
def characters(self,chars):
print 'chars',chars
self.inlist.append(chars)
if __name__ == '__main__':
lt = []
parse('test.xml', TestHandle(lt))
print lt
結(jié)果:
[html] view plaincopy
name: root attrs: []
chars
name: childs attrs: []
chars
name: child attrs: [u'name']
chars 1
endname child
chars
name: child attrs: [u'value']
chars 2
endname child
chars
endname childs
chars
endname root
[u'\n', u'\n', u'1', u'\n', u'2', u'\n', u'\n']
方式2,python模塊實現(xiàn)獲取根節(jié)點,按需查找指定節(jié)點:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from xml.dom import minidom
xmlstr = '''''<?xml version="1.0" encoding="UTF-8"?>
<hash>
<request name='first'>/2/photos/square/type.xml</request>
<error_code>21301</error_code>
<error>auth faild!</error>
</hash>
'''
def doxml(xmlstr):
dom = minidom.parseString(xmlstr)
print 'Dom:'
print dom.toxml()
root = dom.firstChild
print 'root:'
print root.toxml()
childs = root.childNodes
for child in childs:
print child.toxml()
if child.nodeType == child.TEXT_NODE:
pass
else:
print 'child node attribute name:', child.getAttribute('name')
print 'child node name:', child.nodeName
print 'child node len:',len(child.childNodes)
print 'child data:',child.childNodes[0].data
print '======================================='
print 'more help info to see:'
for med in dir(child):
print help(med)
if __name__ == '__main__':
doxml(xmlstr)
結(jié)果:
[html] view plaincopy
Dom:
<?xml version="1.0" ?><hash>
<request name="first">/2/photos/square/type.xml</request>
<error_code>21301</error_code>
<error>auth faild!</error>
</hash>
root:
<hash>
<request name="first">/2/photos/square/type.xml</request>
<error_code>21301</error_code>
<error>auth faild!</error>
</hash>
<request name="first">/2/photos/square/type.xml</request>
child node attribute name: first
child node name: request
child node len: 1
child data: /2/photos/square/type.xml
=======================================
more help info to see:
兩種方法各有其優(yōu)點,python的xml處理模塊太多,目前只用到這2個。
=====補充分割線================
實際工作中發(fā)現(xiàn)python的mimidom無法解析其它編碼的xml,只能解析utf-8的編碼,而其xml文件的頭部申明也必須是utf-8,為其它編碼會報錯誤。
網(wǎng)上的解決辦法都是替換xml文件頭部的編碼申明,然后轉(zhuǎn)換編碼為utf-8再用minidom解碼,實際測試為可行,不過有點累贅的感覺。
本節(jié)是 python解析xml模塊封裝代碼 的第二部分。
====寫xml內(nèi)容的分割線=========
#!\urs\bin\env python
#encoding: utf-8
from xml.dom import minidom
class xmlwrite:
def __init__(self, resultfile):
self.resultfile = resultfile
self.rootname = 'api'
self.__create_xml_dom()
def __create_xml_dom(self):
xmlimpl = minidom.getDOMImplementation()
self.dom = xmlimpl.createDocument(None, self.rootname, None)
self.root = self.dom.documentElement
def __get_spec_node(self, xpath):
patharr = xpath.split(r'/')
parentnode = self.root
exist = 1
for nodename in patharr:
if nodename.strip() == '':
continue
if not exist:
return None
spcindex = nodename.find('[')
if spcindex > -1:
index = int(nodename[spcindex+1:-1])
else:
index = 0
count = 0
childs = parentnode.childNodes
for child in childs:
if child.nodeName == nodename[:spcindex]:
if count == index:
parentnode = child
exist = 1
break
count += 1
continue
else:
exist = 0
return parentnode
def write_node(self, parent, nodename, value, attribute=None, CDATA=False):
node = self.dom.createElement(nodename)
if value:
if CDATA:
nodedata = self.dom.createCDATASection(value)
else:
nodedata = self.dom.createTextNode(value)
node.appendChild(nodedata)
if attribute and isinstance(attribute, dict):
for key, value in attribute.items():
node.setAttribute(key, value)
try:
parentnode = self.__get_spec_node(parent)
except:
print 'Get parent Node Fail, Use the Root as parent Node'
parentnode = self.root
parentnode.appendChild(node)
def write_start_time(self, time):
self.write_node('/','StartTime', time)
def write_end_time(self, time):
self.write_node('/','EndTime', time)
def write_pass_count(self, count):
self.write_node('/','PassCount', count)
def write_fail_count(self, count):
self.write_node('/','FailCount', count)
def write_case(self):
self.write_node('/','Case', None)
def write_case_no(self, index, value):
self.write_node('/Case[%s]/' % index,'No', value)
def write_case_url(self, index, value):
self.write_node('/Case[%s]/' % index,'URL', value)
def write_case_dbdata(self, index, value):
self.write_node('/Case[%s]/' % index,'DBData', value)
def write_case_apidata(self, index, value):
self.write_node('/Case[%s]/' % index,'APIData', value)
def write_case_dbsql(self, index, value):
self.write_node('/Case[%s]/' % index,'DBSQL', value, CDATA=True)
def write_case_apixpath(self, index, value):
self.write_node('/Case[%s]/' % index,'APIXPath', value)
def save_xml(self):
myfile = file(self.resultfile, 'w')
self.dom.writexml(myfile, encoding='utf-8')
myfile.close()
if __name__ == '__main__':
xr = xmlwrite(r'D:\test.xml')
xr.write_start_time('2223')
xr.write_end_time('444')
xr.write_pass_count('22')
xr.write_fail_count('33')
xr.write_case()
xr.write_case()
xr.write_case_no(0, '0')
xr.write_case_url(0, 'http://www.google.com')
xr.write_case_url(0, 'http://www.google.com')
xr.write_case_dbsql(0, 'select * from ')
xr.write_case_dbdata(0, 'dbtata')
xr.write_case_apixpath(0, '/xpath')
xr.write_case_apidata(0, 'apidata')
xr.write_case_no(1, '1')
xr.write_case_url(1, 'http://www.baidu.com')
xr.write_case_url(1, 'http://www.baidu.com')
xr.write_case_dbsql(1, 'select 1 from ')
xr.write_case_dbdata(1, 'dbtata1')
xr.write_case_apixpath(1, '/xpath1')
xr.write_case_apidata(1, 'apidata1')
xr.save_xml()
以上封裝了minidom,支持通過xpath來寫節(jié)點,不支持xpath帶屬性的匹配,但支持帶索引的匹配。
比如:/root/child[1], 表示root的第2個child節(jié)點。
- 用Python解析XML的幾種常見方法的介紹
- 深入解讀Python解析XML的幾種方式
- Python中使用ElementTree解析XML示例
- 詳細解讀Python中解析XML數(shù)據(jù)的方法
- python 解析XML python模塊xml.dom解析xml實例代碼
- 使用python解析xml成對應(yīng)的html示例分享
- Python中使用SAX解析xml實例
- 橫向?qū)Ρ确治鯬ython解析XML的四種方式
- Python解析xml中dom元素的方法
- Python通過DOM和SAX方式解析XML的應(yīng)用實例分享
- Python3基于sax解析xml操作示例
- python解析xml簡單示例
相關(guān)文章
windows10系統(tǒng)中安裝python3.x+scrapy教程
本文給大家主要介紹了在windows10系統(tǒng)中安裝python3以及scrapy框架的教程以及有可能會遇到的問題的解決辦法,希望大家能夠喜歡2016-11-11Python的Flask框架中實現(xiàn)簡單的登錄功能的教程
這篇文章主要介紹了Python的Flask框架中實現(xiàn)簡單的登錄功能的教程,登錄是各個web框架中的基礎(chǔ)功能,需要的朋友可以參考下2015-04-04Python使用pickle模塊存儲數(shù)據(jù)報錯解決示例代碼
這篇文章主要介紹了Python使用pickle模塊存儲數(shù)據(jù)報錯解決示例代碼,小編覺得還是挺不錯的,具有一定借鑒價值,需要的朋友可以參考下2018-01-01python 實現(xiàn)得到當前時間偏移day天后的日期方法
今天小編就為大家分享一篇python 實現(xiàn)得到當前時間偏移day天后的日期方法,具有很好的參考價值,希望對大家有所幫助。一起跟隨小編過來看看吧2018-12-12