我试图通过转换数据并应用xml函数来提取数据来将dat数据转换为数据帧。然而,面对不同行数的错误。附加的数据首先从dat中还原并转换为字符串。作为R的初学者,我们将非常感谢您的帮助
数据:
dat <- '<d2lm:d2LogicalModel extensionVersion="2.0" extensionName="NTIS Published Services"
modelBaseVersion="2" xmlns:ns4="http://www.thalesgroup.com/NTIS/Datex2Extensions/1.0Beta1"
xmlns:ns3="http://datex2.eu/schema/2/2_0/inrix" xmlns:d2lm="http://datex2.eu/schema/2/2_0">
<d2lm:exchange><d2lm:supplierIdentification><d2lm:country>gb</d2lm:country>
<d2lm:nationalIdentifier>NTIS</d2lm:nationalIdentifier></d2lm:supplierIdentification></d2lm:exchange>
<d2lm:payloadPublication xsi:type="d2lm:SituationPublication" lang="en"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><d2lm:feedType>Event Data</d2lm:feedType>
<d2lm:publicationTime>2020-05-10T00:00:44.778+01:00</d2lm:publicationTime><d2lm:publicationCreator>
<d2lm:country>gb</d2lm:country><d2lm:nationalIdentifier>NTIS</d2lm:nationalIdentifier>
</d2lm:publicationCreator><d2lm:situation version="" id="2922904"><d2lm:headerInformation>
<d2lm:areaOfInterest>national</d2lm:areaOfInterest>
<d2lm:confidentiality>restrictedToAuthoritiesTrafficOperatorsAndPublishers</d2lm:confidentiality>
<d2lm:informationStatus>real</d2lm:informationStatus></d2lm:headerInformation><d2lm:situationRecord
xsi:type="d2lm:RoadOrCarriagewayOrLaneManagement" version="" id="2922904">
<d2lm:situationRecordCreationReference>UF-20-05-09-600215</d2lm:situationRecordCreationReference>
<d2lm:situationRecordCreationTime>2020-05-09T21:04:28.000+01:00</d2lm:situationRecordCreationTime>
<d2lm:situationRecordVersionTime>2020-05-10T00:00:39.677+01:00</d2lm:situationRecordVersionTime>
<d2lm:probabilityOfOccurrence>certain</d2lm:probabilityOfOccurrence>
<d2lm:severity>highest</d2lm:severity><d2lm:source>
<d2lm:sourceIdentification>RCCs</d2lm:sourceIdentification><d2lm:sourceExtension>
<d2lm:sourceSituation><d2lm:sourceSituationId>SL</d2lm:sourceSituationId>
<d2lm:sourceSituationCreationTime>2020-05-09T20:48:00.000+01:00</d2lm:sourceSituationCreationTime>
</d2lm:sourceSituation></d2lm:sourceExtension></d2lm:source><d2lm:validity>
<d2lm:validityStatus>active</d2lm:validityStatus><d2lm:validityTimeSpecification>
<d2lm:overallStartTime>2020-05-09T21:03:19.755+01:00</d2lm:overallStartTime>
<d2lm:overallEndTime>2020-05-10T02:41:00.000+01:00</d2lm:overallEndTime>
</d2lm:validityTimeSpecification></d2lm:validity><d2lm:impact>
<d2lm:capacityRemaining>0.0</d2lm:capacityRemaining>
<d2lm:numberOfLanesRestricted>0</d2lm:numberOfLanesRestricted>
<d2lm:numberOfOperationalLanes>0</d2lm:numberOfOperationalLanes>
<d2lm:originalNumberOfLanes>3</d2lm:originalNumberOfLanes>
<d2lm:trafficConstrictionType>carriagewayBlocked</d2lm:trafficConstrictionType><d2lm:impactExtension>
<d2lm:impactDetails><d2lm:changeInCapacity>-100.0</d2lm:changeInCapacity><d2lm:individualLanesStatus>
<d2lm:individualLane><d2lm:laneIdentifier>hardShoulder</d2lm:laneIdentifier>
<d2lm:laneStatus>closed</d2lm:laneStatus></d2lm:individualLane><d2lm:individualLane>
<d2lm:laneIdentifier>lane1</d2lm:laneIdentifier><d2lm:laneStatus>closed</d2lm:laneStatus>
</d2lm:individualLane><d2lm:individualLane><d2lm:laneIdentifier>lane2</d2lm:laneIdentifier>
<d2lm:laneStatus>closed</d2lm:laneStatus></d2lm:individualLane><d2lm:individualLane>
<d2lm:laneIdentifier>lane3</d2lm:laneIdentifier><d2lm:laneStatus>closed</d2lm:laneStatus>
</d2lm:individualLane></d2lm:individualLanesStatus><d2lm:returnToNormalStatus>
<d2lm:predictedTimeToClear>2020-05-10T02:41:03.536+01:00</d2lm:predictedTimeToClear>
<d2lm:predictedReturnToProfile>2020-05-10T02:41:00.000+01:00</d2lm:predictedReturnToProfile>
</d2lm:returnToNormalStatus></d2lm:impactDetails></d2lm:impactExtension></d2lm:impact>
<d2lm:generalPublicComment><d2lm:comment><d2lm:values><d2lm:value>Traffic is being diverted via the
exit and entry slips</d2lm:value></d2lm:values></d2lm:comment>
<d2lm:commentType>internalNote</d2lm:commentType></d2lm:generalPublicComment>
<d2lm:generalPublicComment><d2lm:comment><d2lm:values><d2lm:value>TYPE : GDP</d2lm:value><
<d2lm:value>Location : The M5 southbound at junction J27 </d2lm:value><d2lm:value>Reason : Road
Management</d2lm:value><d2lm:value>Status : Currently Active</d2lm:value><d2lm:value>Time To Clear :
The event is expected to clear between 02:30 and 02:45 on 10 May 2020</d2lm:value><d2lm:value>Return
To Normal : Normal traffic conditions are expected between 02:30 and 02:45 on 10 May
2020</d2lm:value><d2lm:value>Lanes Closed : All lanes are closed</d2lm:value></d2lm:values>
</d2lm:comment></d2lm:generalPublicComment><d2lm:groupOfLocations
xsi:type="d2lm:NonOrderedLocationGroupByList"><d2lm:locationContainedInGroup xsi:type="d2lm:Point">
<d2lm:locationForDisplay><d2lm:latitude>50.9196</d2lm:latitude>
<d2lm:longitude>-3.3536696</d2lm:longitude></d2lm:locationForDisplay></d2lm:locationContainedInGroup>
<d2lm:locationContainedInGroup xsi:type="d2lm:LocationByReference"><d2lm:predefinedLocationReference
targetClass="PredefinedLocation" version="12.10" id="101002501"/></d2lm:locationContainedInGroup>
</d2lm:groupOfLocations><d2lm:management><d2lm:lifeCycleManagement><d2lm:cancel>false</d2lm:cancel>
<d2lm:end>false</d2lm:end></d2lm:lifeCycleManagement><d2lm:managementExtension>
<d2lm:situationManagement>
<d2lm:confirmedBy>RCCs</d2lm:confirmedBy><d2lm:areaTeamInvolved>Area 2</d2lm:areaTeamInvolved>
<d2lm:rccInformation><d2lm:allocatedRcc>South West RCC</d2lm:allocatedRcc>
<d2lm:rccSituationId>1004</d2lm:rccSituationId></d2lm:rccInformation><d2lm:emergencyServicesInvolved>
<d2lm:emergencyServiceStatus><d2lm:serviceType>police</d2lm:serviceType>
<d2lm:serviceStatus>attending</d2lm:serviceStatus></d2lm:emergencyServiceStatus>
<d2lm:emergencyServiceStatus><d2lm:serviceType>fire</d2lm:serviceType>
<d2lm:serviceStatus>none</d2lm:serviceStatus></d2lm:emergencyServiceStatus>
<d2lm:emergencyServiceStatus>
<d2lm:serviceType>ambulance</d2lm:serviceType><d2lm:serviceStatus>none</d2lm:serviceStatus>
</d2lm:emergencyServiceStatus><d2lm:emergencyServiceStatus>
<d2lm:serviceType>airAmbulance</d2lm:serviceType><d2lm:serviceStatus>none</d2lm:serviceStatus>
</d2lm:emergencyServiceStatus></d2lm:emergencyServicesInvolved><d2lm:peopleAndVehiclesInvolved>
<d2lm:totalNumberOfPeopleInvolved>0</d2lm:totalNumberOfPeopleInvolved>
<d2lm:totalNumberOfVehiclesInvolved>0</d2lm:totalNumberOfVehiclesInvolved>
<d2lm:groupOfVehiclesInvolved>
<d2lm:numberOfVehicles>0</d2lm:numberOfVehicles><d2lm:vehicleCharacteristics>
<d2lm:vehicleType>articulatedVehicle</d2lm:vehicleType></d2lm:vehicleCharacteristics>
</d2lm:groupOfVehiclesInvolved><d2lm:groupOfVehiclesInvolved>
<d2lm:numberOfVehicles>0</d2lm:numberOfVehicles><d2lm:vehicleCharacteristics>
<d2lm:vehicleType>lorry</d2lm:vehicleType></d2lm:vehicleCharacteristics>
</d2lm:groupOfVehiclesInvolved><d2lm:groupOfVehiclesInvolved>
<d2lm:numberOfVehicles>0</d2lm:numberOfVehicles><d2lm:vehicleCharacteristics>
<d2lm:vehicleType>car</d2lm:vehicleType></d2lm:vehicleCharacteristics>
</d2lm:groupOfVehiclesInvolved><d2lm:groupOfVehiclesInvolved>
<d2lm:numberOfVehicles>0</d2lm:numberOfVehicles><d2lm:vehicleCharacteristics>
<d2lm:vehicleType>motorcycle</d2lm:vehicleType></d2lm:vehicleCharacteristics>
</d2lm:groupOfVehiclesInvolved><d2lm:groupOfVehiclesInvolved>
<d2lm:numberOfVehicles>0</d2lm:numberOfVehicles><d2lm:vehicleCharacteristics>
<d2lm:vehicleType>other</d2lm:vehicleType></d2lm:vehicleCharacteristics>
</d2lm:groupOfVehiclesInvolved><d2lm:groupOfVehiclesInvolved>
<d2lm:numberOfVehicles>0</d2lm:numberOfVehicles>
<d2lm:vehicleCharacteristics><d2lm:vehicleType>trailer</d2lm:vehicleType>
</d2lm:vehicleCharacteristics></d2lm:groupOfVehiclesInvolved><d2lm:groupOfPeopleInvolved>
<d2lm:numberOfPeople>0</d2lm:numberOfPeople><d2lm:injuryStatus>slightlyInjured</d2lm:injuryStatus>
</d2lm:groupOfPeopleInvolved><d2lm:groupOfPeopleInvolved><d2lm:numberOfPeople>0</d2lm:numberOfPeople>
<d2lm:injuryStatus>seriouslyInjured</d2lm:injuryStatus></d2lm:groupOfPeopleInvolved>
</d2lm:peopleAndVehiclesInvolved></d2lm:situationManagement></d2lm:managementExtension>
</d2lm:management>
<d2lm:complianceOption>mandatory</d2lm:complianceOption>
<d2lm:roadOrCarriagewayOrLaneManagementType>other</d2lm:roadOrCarriagewayOrLaneManagementType>
</d2lm:situationRecord></d2lm:situation></d2lm:payloadPublication></d2lm:d2LogicalModel>
'
到目前为止的代码:
library(XML)
require(plyr)
library(stringr)
datDF <- data.frame(
tags = unlist(str_extract_all(dat, "<([^>]*)>(?=[^>]*</\\1>)")),
values = unlist(str_extract_all(dat, "(?<=<([^>]{1,100})>).*(?=</\\1>)"))
)
datDF
非常感谢
发布于 2020-06-05 01:40:16
data.frame
中的所有变量都需要具有相同的长度。tags
和values
的长度不同。您可以通过将NAs添加到较短的向量,然后将它们组合在一起,使它们具有相同的长度:
library(XML)
require(plyr)
library(stringr)
library(xml2)
tags = unlist(str_extract_all(dat, "<([^>]*)>(?=[^>]*</\\1>)"))
values = unlist(str_extract_all(dat, "(?<=<([^>]{1,100})>).*(?=</\\1>)"))
values <- c(values, rep(NA, length(tags)-length(values)))
datDF <- data.frame(
tags,
values
)
但是我要小心,因为这假设您正确地解析了文档。我认为情况并非如此,因为标记roadOrCarriagewayOrLaneManagementType
在倒数第二行,而相应的value
"other“在value
的最后一行。
https://stackoverflow.com/questions/62200535
复制相似问题