把英文词含义和词语放在一个XML文件 Dictionary.xml 中, 让 python 读取该文件。
from xml.dom import minidom
import numpy as np
# parse an xml file by name
mydoc = minidom.parse('./data/Dictionary.xml')
# for this file, there's a list of items with tagname 日期.
words = mydoc.getElementsByTagName('word')
#print all the couples of the word and its meaning
for word in words:
print(word.attributes['meaning'].value, word.firstChild.data)
#Get random 10 words from the list
#make every string an array
for word in words[0:30]:
s=word.firstChild.data
ss = list(s)
print(ss)
打印结果如下:
['e', 'l', 'u', 'd', 'e']['i', 'm', 'm', 'a', 'c', 'u', 'l', 'a', 't', 'e']['t', 'a', 'c', 'i', 't', 'u', 'r', 'n']['e', 'n', 'c', 'o', 'r', 'e']['r', 'e', 'v', 'e', 'l']['s', 'l', 'a', 'n', 'd', 'e', 'r']['e', 'v', 'a', 'n', 'e', 's', 'c', 'e', 'n', 't']['s', 'a', 't', 'i', 'a', 't', 'e']['r', 'e', 'l', 'e', 'n', 't']['c', 'o', 'h', 'o', 'r', 't']['p', 'r', 'e', 'd', 'a', 'c', 'i', 'o', 'u', 's']['i', 'n', 'v', 'e', 'r', 's', 'e']['o', 'b', 's', 'e', 'q', 'u', 'i', 'o', 'u', 's']['i', 'm', 'p', 'o', 'r', 't']['u', 'n', 'b', 'r', 'i', 'd', 'l', 'e', 'd']['m', 'o', 'n', 'o', 'l', 'i', 't', 'h', 'i', 'c']['p', 'l', 'a', 'u', 'd', 'i', 't']['r', 'a', 'b', 'b', 'l', 'e']['c', 'a', 'p', 'i', 't', 'a', 'l']['c', 'o', 'm', 'm', 'i', 's', 's', 'a', 'r', 'y']['t', 'u', 't', 'e', 'l', 'a', 'g', 'e']['t', 'e', 'n', 's', 'i', 'l', 'e']['s', 'i', 'd', 'e', 'r', 'e', 'a', 'l']['f', 'e', 's', 't', 'o', 'o', 'n']['p', 'e', 's', 't', 'i', 'l', 'e', 'n', 'c', 'e']['e', 's', 'p', 'o', 'u', 's', 'e']['r', 'e', 'c', 'e', 'p', 't', 'a', 'c', 'l', 'e']['i', 'n', 't', 'e', 'g', 'u', 'm', 'e', 'n', 't']['s', 'o', 'v', 'e', 'r', 'e', 'i', 'g', 'n']['d', 'i', 'c', 't', 'u', 'm']
创建一个将字符转化为 ASCII 码的方法
#function to convert the string to acsii, to make it possible to calculate as a matrix
def convertStr2Asciicode(str):
listnew = [0] * len(str)
ss = list(str)
i=0
for s in ss:
listnew[i] = ord(s)
i = i+1
return listnew
将前30个字符串转发为ASCII字母值存为一个2维数组:
asciilist = []
for word in words[0:30]:
s=word.firstChild.data
ss = convertStr2Asciicode(s)
asciilist.append(ss)
asciilist
结果输出为:
[[101, 108, 117, 100, 101], [105, 109, 109, 97, 99, 117, 108, 97, 116, 101], [116, 97, 99, 105, 116, 117, 114, 110], [101, 110, 99, 111, 114, 101], [114, 101, 118, 101, 108], [115, 108, 97, 110, 100, 101, 114], [101, 118, 97, 110, 101, 115, 99, 101, 110, 116], [115, 97, 116, 105, 97, 116, 101], [114, 101, 108, 101, 110, 116], [99, 111, 104, 111, 114, 116], [112, 114, 101, 100, 97, 99, 105, 111, 117, 115], [105, 110, 118, 101, 114, 115, 101], [111, 98, 115, 101, 113, 117, 105, 111, 117, 115], [105, 109, 112, 111, 114, 116], [117, 110, 98, 114, 105, 100, 108, 101, 100], [109, 111, 110, 111, 108, 105, 116, 104, 105, 99], [112, 108, 97, 117, 100, 105, 116], [114, 97, 98, 98, 108, 101], [99, 97, 112, 105, 116, 97, 108], [99, 111, 109, 109, 105, 115, 115, 97, 114, 121], [116, 117, 116, 101, 108, 97, 103, 101], [116, 101, 110, 115, 105, 108, 101], [115, 105, 100, 101, 114, 101, 97, 108], [102, 101, 115, 116, 111, 111, 110], [112, 101, 115, 116, 105, 108, 101, 110, 99, 101], [101, 115, 112, 111, 117, 115, 101], [114, 101, 99, 101, 112, 116, 97, 99, 108, 101], [105, 110, 116, 101, 103, 117, 109, 101, 110, 116], [115, 111, 118, 101, 114, 101, 105, 103, 110], [100, 105, 99, 116, 117, 109]]
将前30个字符串存为一个2维数组,行长度为最长的字符串的长度。
#get the len of all of the words
lenlist = []
for word in words[0:30]:
lenlist.append(len(word.firstChild.data))
#get the max len of the words
maxlen = max(lenlist)
asciilist = []
for word in words[0:30]:
s = word.firstChild.data
ss = convertStr2Asciicode(s)
if len(ss) < maxlen:
#if the len of the word is less than the max len, then fill 0 of the rests.
s0 = [0] * (maxlen - len(ss))
ss = ss + s0
asciilist.append(ss)
asciilist
其结果输出为
[[101, 108, 117, 100, 101, 0, 0, 0, 0, 0], [105, 109, 109, 97, 99, 117, 108, 97, 116, 101], [116, 97, 99, 105, 116, 117, 114, 110, 0, 0], [101, 110, 99, 111, 114, 101, 0, 0, 0, 0], [114, 101, 118, 101, 108, 0, 0, 0, 0, 0], [115, 108, 97, 110, 100, 101, 114, 0, 0, 0], [101, 118, 97, 110, 101, 115, 99, 101, 110, 116], [115, 97, 116, 105, 97, 116, 101, 0, 0, 0], [114, 101, 108, 101, 110, 116, 0, 0, 0, 0], [99, 111, 104, 111, 114, 116, 0, 0, 0, 0], [112, 114, 101, 100, 97, 99, 105, 111, 117, 115], [105, 110, 118, 101, 114, 115, 101, 0, 0, 0], [111, 98, 115, 101, 113, 117, 105, 111, 117, 115], [105, 109, 112, 111, 114, 116, 0, 0, 0, 0], [117, 110, 98, 114, 105, 100, 108, 101, 100, 0], [109, 111, 110, 111, 108, 105, 116, 104, 105, 99], [112, 108, 97, 117, 100, 105, 116, 0, 0, 0], [114, 97, 98, 98, 108, 101, 0, 0, 0, 0], [99, 97, 112, 105, 116, 97, 108, 0, 0, 0], [99, 111, 109, 109, 105, 115, 115, 97, 114, 121], [116, 117, 116, 101, 108, 97, 103, 101, 0, 0], [116, 101, 110, 115, 105, 108, 101, 0, 0, 0], [115, 105, 100, 101, 114, 101, 97, 108, 0, 0], [102, 101, 115, 116, 111, 111, 110, 0, 0, 0], [112, 101, 115, 116, 105, 108, 101, 110, 99, 101], [101, 115, 112, 111, 117, 115, 101, 0, 0, 0], [114, 101, 99, 101, 112, 116, 97, 99, 108, 101], [105, 110, 116, 101, 103, 117, 109, 101, 110, 116], [115, 111, 118, 101, 114, 101, 105, 103, 110, 0], [100, 105, 99, 116, 117, 109, 0, 0, 0, 0]]
这样一个二维数组可以用来干什么呢?
领取专属 10元无门槛券
私享最新 技术干货