在阅读之前,你需要了解一些xml.dom的一些理论知识,在这里你可以对xml.dom有一定的了解,如果你阅读完之后。
下面是我做的demo
运行效果:
解析的XML文件位置:c:\\test\\hongten.xml
1 <?xml version="1.0" encoding="UTF-8"?>
2 <students>
3 <student no="2009081097">
4 <name>Hongten</name>
5 <gender>M</gender>
6 <age>20</age>
7 <score subject="math">97</score>
8 <score subject="chinese">90</score>
9 </student>
10 <student no="2009081098">
11 <name>DuDu</name>
12 <gender>W</gender>
13 <age>21</age>
14 <score subject="math">87</score>
15 <score subject="chinese">96</score>
16 </student>
17 <student no="2009081099">
18 <name>Sum</name>
19 <gender>M</gender>
20 <age>19</age>
21 <score subject="math">64</score>
22 <score subject="chinese">98</score>
23 </student>
24 </students>
====================================================
代码部分:
====================================================
1 #python xml.dom
2
3 #Author : Hongten
4 #Mailto : hongtenzone@foxmail.com
5 #Blog : http://www.cnblogs.com/hongten
6 #QQ : 648719819
7 #Version : 1.0
8 #Create : 2013-09-03
9
10 import os
11 from xml.dom import minidom
12
13 #global var
14 SHOW_LOG = True
15 XML_PATH = None
16
17 def get_dom_by_parse(path):
18 '''根据XML文件地址解析XML文件,返回dom对象'''
19 if os.path.exists(path):
20 if SHOW_LOG:
21 print('开始解析XML文件:[{}]'.format(path))
22 return minidom.parse(path)
23 else:
24 print('the path [{}] dose not exist!'.format(path))
25
26 def get_dom_by_file(path):
27 '''解析作为文档打开的XML文件'''
28 if os.path.exists(path):
29 if SHOW_LOG:
30 print('开始打开XML文件:[{}]'.format(path))
31 with open(path) as pf:
32 if SHOW_LOG:
33 print('开始解析XML文件:[{}]'.format(path))
34 return minidom.parse(pf)
35 else:
36 print('the path [{}] dose not exist!'.format(path))
37
38 def get_dom_by_string(s):
39 '''解析以字符串形式的XML数据格式'''
40 if s is not None and s != '':
41 if SHOW_LOG:
42 print('开始解析字符串形式的XML数据:[{}]'.format(s))
43 return minidom.parseString(s)
44 else:
45 print('the input string is None or equals \'\'.')
46
47 def get_root(dom):
48 '''返回XML文件的根节点'''
49 if dom is not None:
50 return dom.documentElement
51 else:
52 print('the dom is None!')
53
54 def get_element_children(fatherElement, subNodeName):
55 '''根据父节点fatherElement获取子节点subNodeName'''
56 if fatherElement is not None:
57 if subNodeName is not None and subNodeName != '':
58 return fatherElement.getElementsByTagName(subNodeName)
59 else:
60 print('the sub node name is None or equals \'\'.')
61 else:
62 print('the father node is None!')
63
64 def get_element_value(element, index=0):
65 '''获取节点的值'''
66 if element is not None:
67 return element.childNodes[index].nodeValue
68 else:
69 print('the element is None!')
70
71 def get_element_attrib_value(element, name):
72 '''根据节点element的属性名称name获取属性名称的值'''
73 if element is not None:
74 if name is not None and name != '':
75 return element.getAttribute(name)
76 else:
77 print('the name is None or equals \'\'.')
78 else:
79 print('the element is None!')
80
81 def get_info(root_children):
82 '''解析XML内容'''
83 info = []
84 for item in root_children:
85 subs = []
86 score_value = []
87 i_no = get_element_attrib_value(item, 'no')
88 i_name = get_element_children(item, 'name')
89 i_gender = get_element_children(item, 'gender')
90 i_age = get_element_children(item, 'age')
91 i_score = get_element_children(item, 'score')
92 for sub in i_score:
93 i_sub = get_element_attrib_value(sub, 'subject')
94 subs.append(i_sub)
95
96 v_name = get_element_value(i_name[0])
97 v_gender = get_element_value(i_gender[0])
98 v_age = get_element_value(i_age[0])
99 for s in range(len(i_score)):
100 score_value.append(s)
101 v_score = dict(zip(subs, score_value))
102 info.append(v_name)
103 info.append(v_gender)
104 info.append(v_age)
105 info.append(v_score)
106 return info
107
108
109 def init():
110 global SHOW_LOG
111 SHOW_LOG = True
112 global XML_PATH
113 XML_PATH = 'C:\\test\\hongten.xml'
114
115 def main():
116 init()
117 dom = get_dom_by_parse(XML_PATH)
118 root = dom.documentElement
119 print(root)
120 root_children = get_element_children(root, 'student')
121 print(root_children)
122 info = get_info(root_children)
123 print(info)
124
125 if __name__ == '__main__':
126 main()