我一直在用VB.NET开发一个Windows应用程序来解析一个大型的XML文档,这是一本日英词典。我最初构建了一个基于字符串的解析器,它读取所有的XML标记并手动提取它们的内容。这平均需要30秒才能完全解析XML文件。此时,我从未听说过反序列化。
然后,多亏了several users on this site,我得到了一个合适的XML反序列化器,它将XML文件中的所有数据加载到一个类对象中。非常出色。但平均装载时间是20秒。
虽然这是一个33%的效率提高,我想知道是否有一个更好的方法,我没有意识到或没有考虑。或者,在我现有的代码/类定义中,可能是效率低下,至少可以减少。
该文件有470万行长,显示了超过18万个字典条目的信息,因此我准备接受这样一个事实:解析XML的方法可能不会更快。我想知道是否有其他方法来存储数据,而不是XML文本文件格式,因为Windows窗体应用程序可以更快地读取XML文本文件格式。
下面是我目前用于反序列化的代码:
Sub Deserialise()
Dim settings As XmlReaderSettings = New XmlReaderSettings
settings.DtdProcessing = DtdProcessing.Parse
Dim xmlPath As String = Path.Combine(Application.StartupPath, jmdictpath)
Dim serialiser As New XmlSerializer(GetType(JMdict))
Using rdr As Xml.XmlReader = Xml.XmlReader.Create(xmlPath, settings)
dict = CType(serialiser.Deserialize(rdr), JMdict)
End Using
End Sub下面是类的定义:
<XmlRoot>
Public Class JMdict
<XmlElement("entry")>
Public Property entrylist As List(Of Entry)
End Class
<Serializable()>
<XmlType("entry")>
Public Class Entry
<XmlElement("ent_seq")>
Public Property EntrySequence As Integer
<XmlElement("k_ele")>
Public Property Keywords As List(Of KeywordElement)
<XmlElement("r_ele")>
Public Property Readings As List(Of ReadingElement)
<XmlElement("sense")>
Public Property Senses As List(Of SenseElement)
End Class
<Serializable()>
<XmlType("k_ele")>
Public Class KeywordElement
<XmlElement("keb")>
Public Property Keyword As String
<XmlElement("ke_pri")>
Public Property KeywordPriority As List(Of String)
<XmlElement("ke_inf")>
Public Property KeywordOrthography As List(Of String)
End Class
<Serializable()>
<XmlType("r_ele")>
Public Class ReadingElement
<XmlElement("reb")>
Public Property Reading As String
<XmlElement("re_pri")>
Public Property ReadingPriority As List(Of String)
<XmlElement("re_inf")>
Public Property ReadingOrthography As List(Of String)
<XmlElement("re_restr")>
Public Property ReadingToKeywordRestriction As List(Of String)
Private _NotTrueReading As Boolean
<XmlElement("re_nokanji")>
Public Property NotTrueReading As String
Get
Return _NotTrueReading
End Get
Set
If NotTrueReading IsNot Nothing Then
_NotTrueReading = True
Else
_NotTrueReading = False
End If
End Set
End Property
End Class
<Serializable()>
<XmlType("sense")>
Public Class SenseElement
<XmlElement("pos")>
Public Property PartOfSpeech As List(Of String)
<XmlElement("gloss")>
Public Property Gloss As List(Of GlossElement)
<XmlElement("stagk")>
Public Property SenseRestrictedToKeyword As List(Of String)
<XmlElement("stagr")>
Public Property SenseRestrictedToReading As List(Of String)
<XmlElement("xref")>
Public Property CrossReference As List(Of String)
<XmlElement("ant")>
Public Property Antonym As List(Of String)
<XmlElement("field")>
Public Property Field As List(Of String)
<XmlElement("dial")>
Public Property Dialect As List(Of String)
<XmlElement("s_inf")>
Public Property SenseInformation As List(Of String)
<XmlElement("misc")>
Public Property Misc As List(Of String)
<XmlElement("lsource")>
Public Property LanguageSource As List(Of LanguageSourceElement)
End Class
<Serializable()>
<XmlType("gloss")>
Public Class GlossElement
<XmlAttribute("xml:lang")>
Public Property Language As String
<XmlAttribute("g_type")>
Public Property GlossType As String
<XmlText>
Public Property Text As String
Public Overrides Function ToString() As String
Return Text
End Function
End Class
<Serializable()>
<XmlType("lsource")>
Public Class LanguageSourceElement
<XmlAttribute("xml:lang")>
Public Property Language As String
<XmlAttribute("ls_type")>
Public Property LanguageSourceType As String
Private _IsWaseieigo As Boolean
<XmlAttribute("ls_wasei")>
Public Property IsWaseieigo As String
Get
Return _IsWaseieigo
End Get
Set
If IsWaseieigo = "y" Then
_IsWaseieigo = True
Else
_IsWaseieigo = False
End If
'_IsWaseieigo = Value
End Set
End Property
<XmlText>
Public Property Text As String
Public Overrides Function ToString() As String
Return Text
End Function
End Class我们怎么想?有没有更快的解决办法的希望?
编辑-以下是XML的示例:
<entry>
<ent_seq>1486440</ent_seq>
<k_ele>
<keb>美術</keb>
<ke_pri>ichi1</ke_pri>
<ke_pri>news1</ke_pri>
<ke_pri>nf02</ke_pri>
</k_ele>
<r_ele>
<reb>びじゅつ</reb>
<re_pri>ichi1</re_pri>
<re_pri>news1</re_pri>
<re_pri>nf02</re_pri>
</r_ele>
<sense>
<pos>&n;</pos>
<pos>&adj-no;</pos>
<gloss>art</gloss>
<gloss>fine arts</gloss>
</sense>
<sense>
<gloss xml:lang="dut">kunst</gloss>
<gloss xml:lang="dut">schone kunsten</gloss>
</sense>
<sense>
<gloss xml:lang="fre">art</gloss>
<gloss xml:lang="fre">beaux-arts</gloss>
</sense>
<sense>
<gloss xml:lang="ger">Kunst</gloss>
<gloss xml:lang="ger">die schönen Künste</gloss>
<gloss xml:lang="ger">bildende Kunst</gloss>
</sense>
<sense>
<gloss xml:lang="ger">Produktionsdesign</gloss>
<gloss xml:lang="ger">Szenographie</gloss>
</sense>
<sense>
<gloss xml:lang="hun">művészet</gloss>
<gloss xml:lang="hun">művészeti</gloss>
<gloss xml:lang="hun">művészi</gloss>
<gloss xml:lang="hun">rajzóra</gloss>
<gloss xml:lang="hun">szépművészet</gloss>
</sense>
<sense>
<gloss xml:lang="rus">изящные искусства; искусство</gloss>
<gloss xml:lang="rus">{~{的}} художественный, артистический</gloss>
</sense>
<sense>
<gloss xml:lang="slv">umetnost</gloss>
<gloss xml:lang="slv">likovna umetnost</gloss>
</sense>
<sense>
<gloss xml:lang="spa">bellas artes</gloss>
</sense>
</entry>整件事都被包装在标签里
发布于 2020-03-26 06:01:22
试试。下面的代码没有经过测试,因为您没有发布任何xml。:
Imports System.Xml
Imports System.Xml.Linq
Imports System.IO
Module Module1
Const FILENAME As String = "c:\temp\test.xml"
Sub Main()
Dim JMdict As JMdict = JMdict.Parse(FILENAME)
End Sub
Public Class JMdict
Public Property entrylist As List(Of Entry)
Public Shared Function Parse(filename As String) As JMdict
Dim reader As New StreamReader(filename)
reader.ReadLine()
Dim doc As XDocument = XDocument.Load(reader)
Dim newJMdict As New JMdict()
Dim jmDict = doc.Root
newJMdict.entrylist = jmDict.Elements("entry").Select(Function(x) Entry.Parse(x)).ToList()
Return newJMdict
End Function
End Class
Public Class Entry
Public Property EntrySequence As Integer
Public Property Keywords As List(Of KeywordElement)
Public Property Readings As List(Of ReadingElement)
Public Property Senses As List(Of SenseElement)
Public Shared Function Parse(xEntry As XElement) As Entry
Dim newEntry As New Entry()
newEntry.EntrySequence = CType(xEntry.Element("ent_seq"), Integer)
newEntry.Keywords = xEntry.Elements("k_ele").Select(Function(x) KeywordElement.Parse(x)).ToList()
newEntry.Readings = xEntry.Elements("r_ele").Select(Function(x) ReadingElement.Parse(x)).ToList()
newEntry.Senses = xEntry.Elements("sense").Select(Function(x) SenseElement.Parse(x)).ToList()
Return newEntry
End Function
End Class
Public Class KeywordElement
Public Property Keyword As String
Public Property KeywordPriority As List(Of String)
Public Property KeywordOrthography As List(Of String)
Public Shared Function Parse(xKeywordElement As XElement) As KeywordElement
Dim newKeywordElement As New KeywordElement()
newKeywordElement.Keyword = CType(xKeywordElement.Element("key"), String)
newKeywordElement.KeywordPriority = xKeywordElement.Elements("ke_pri").Select(Function(x) CType(x, String)).ToList()
newKeywordElement.KeywordOrthography = xKeywordElement.Elements("ke_inf").Select(Function(x) CType(x, String)).ToList()
Return newKeywordElement
End Function
End Class
Public Class ReadingElement
Public Property Reading As String
Public Property ReadingPriority As List(Of String)
Public Property ReadingOrthography As List(Of String)
Public Property ReadingToKeywordRestriction As List(Of String)
Private _NotTrueReading As Boolean
Public Property NotTrueReading As String
Get
Return _NotTrueReading
End Get
Set(value As String)
If NotTrueReading IsNot Nothing Then
_NotTrueReading = True
Else
_NotTrueReading = False
End If
End Set
End Property
Public Shared Function Parse(xReadingElement As XElement) As ReadingElement
Dim newReadingElement As New ReadingElement
newReadingElement.Reading = CType(xReadingElement.Element("reb"), String)
newReadingElement.ReadingPriority = xReadingElement.Elements("re_pri").Select(Function(x) CType(x, String)).ToList()
newReadingElement.ReadingOrthography = xReadingElement.Elements("re_inf").Select(Function(x) CType(x, String)).ToList()
newReadingElement.ReadingToKeywordRestriction = xReadingElement.Elements("re_restr").Select(Function(x) CType(x, String)).ToList()
newReadingElement.NotTrueReading = CType(xReadingElement.Element("re_nokanji"), String)
Return newReadingElement
End Function
End Class
Public Class SenseElement
Public Property PartOfSpeech As List(Of String)
Public Property Gloss As List(Of GlossElement)
Public Property SenseRestrictedToKeyword As List(Of String)
Public Property SenseRestrictedToReading As List(Of String)
Public Property CrossReference As List(Of String)
Public Property Antonym As List(Of String)
Public Property Field As List(Of String)
Public Property Dialect As List(Of String)
Public Property SenseInformation As List(Of String)
Public Property Misc As List(Of String)
Public Property LanguageSource As List(Of LanguageSourceElement)
Public Shared Function Parse(xSenseElement As XElement) As SenseElement
Dim newSenseElement As New SenseElement
newSenseElement.PartOfSpeech = xSenseElement.Elements("pos").Select(Function(x) CType(x, String)).ToList()
newSenseElement.Gloss = xSenseElement.Elements("gloss").Select(Function(x) GlossElement.Parse(x)).ToList()
newSenseElement.SenseRestrictedToKeyword = xSenseElement.Elements("stagk").Select(Function(x) CType(x, String)).ToList()
newSenseElement.SenseRestrictedToReading = xSenseElement.Elements("stagr").Select(Function(x) CType(x, String)).ToList()
newSenseElement.CrossReference = xSenseElement.Elements("xref").Select(Function(x) CType(x, String)).ToList()
newSenseElement.Antonym = xSenseElement.Elements("ant").Select(Function(x) CType(x, String)).ToList()
newSenseElement.Field = xSenseElement.Elements("field").Select(Function(x) CType(x, String)).ToList()
newSenseElement.Dialect = xSenseElement.Elements("dial").Select(Function(x) CType(x, String)).ToList()
newSenseElement.SenseInformation = xSenseElement.Elements("s_inf").Select(Function(x) CType(x, String)).ToList()
newSenseElement.Misc = xSenseElement.Elements("misc").Select(Function(x) CType(x, String)).ToList()
newSenseElement.LanguageSource = xSenseElement.Elements("lsource").Select(Function(x) LanguageSourceElement.Parse(x)).ToList()
Return newSenseElement
End Function
End Class
Public Class GlossElement
Public Property Language As String
Public Property GlossType As String
Public Property Text As String
Public Shared Function Parse(xGlossElement As XElement) As GlossElement
Dim newGlossElement As New GlossElement
newGlossElement.Language = CType(xGlossElement.Attributes().Where(Function(x) x.Name.LocalName = "lang").FirstOrDefault(), String)
newGlossElement.GlossType = CType(xGlossElement.Attribute("g_type"), String)
newGlossElement.Text = CType(xGlossElement, String)
Return newGlossElement
End Function
End Class
Public Class LanguageSourceElement
Public Property Language As String
Public Property LanguageSourceType As String
Public IsWaseieigo As Boolean
Public Property Text As String
Public Shared Function Parse(xLanguageSourceElement As XElement) As LanguageSourceElement
Dim newLanguageSourceElement As New LanguageSourceElement
newLanguageSourceElement.Language = CType(xLanguageSourceElement.Attributes().Where(Function(x) x.Name.LocalName = "lang").FirstOrDefault(), String)
newLanguageSourceElement.LanguageSourceType = CType(xLanguageSourceElement.Attribute("ls_type"), String)
newLanguageSourceElement.IsWaseieigo = IIf(CType(xLanguageSourceElement.Attribute("ls_wasei"), String) = "y", True, False)
newLanguageSourceElement.Text = CType(xLanguageSourceElement, String)
Return newLanguageSourceElement
End Function
End Class
End Modulehttps://stackoverflow.com/questions/60854811
复制相似问题