在Node.js The Right Way一书中有一个解析RDF文件的例子。我不能让这个例子像书中描述的那样工作。
RDF解析器如下:
"use strict";
const fs = require("fs");
const cheerio = require("cheerio");
module.exports = function(filename, callback) {
fs.readFile(filename, function(err, data) {
if (err) {
return callback(err);
}
let $ = cheerio.load(data.toString());
let collect = function(index, elem) {
return $(elem).text();
};
callback(null, {
_id: $('pgterms\\:ebook').attr('rdf:about').replace('ebooks/', ''),
title: $('dcterms\\:title').text(),
authors: $('pgterms\\:agent pgterms\\:name').map(collect),
subjects: $('[rdf\\:resource$="/LCSH"] ~ rdf\\:value').map(collect)
});
});
};输入RDF文件如下所示:
<rdf:RDF>
<pgterms:ebook rdf:about="ebooks/132">
<dcterms:subject>
<rdf:Description>
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCSH"/>
<rdf:value>Military art and science -- Early works to 1800</rdf:value>
<rdf:value>War -- Early works to 1800</rdf:value>
</rdf:Description>
</dcterms:subject>
<dcterms:title>The Art of War</dcterms:title>
</pgterms:ebook>
<pgterms:agent rdf:about="2009/agents/4349">
<pgterms:name>Sunzi (6th cent. BC)</pgterms:name>
</pgterms:agent>
<pgterms:agent rdf:about="2009/agents/5101">
<pgterms:name>Giles, Lionel</pgterms:name>
</pgterms:agent>
</rdf:RDF>我相信解析器的输出应该是这样的:
{
"_id": "132",
"title": "The Art of War",
"authors": [
"Sunzi (6th cent. BC)",
"Giles, Lionel"
],
"subjects": [
"Military art and science -- Early works to 1800",
"War -- Early works to 1800"
]
}代码运行并解析RDF文件,但是有很多我不熟悉的额外“东西”。我不知道我是否使用了正确的示例代码,或者本书的源代码中是否存在错误。
我用来运行解析器的命令如下:
node -e 'require("./lib/rdf-parser.js")("test/pg132.rdf", console.log)'我得到了以下输出,而不是上面预期的输出:
null { _id: '132',
title: 'The Art of War',
authors:
{ '0': 'Sunzi (6th cent. BC)',
'1': 'Giles, Lionel',
options:
{ withDomLvl1: true,
normalizeWhitespace: false,
xmlMode: false,
decodeEntities: true },
_root: { '0': [Object], options: [Object], length: 1, _root: [Circular] },
length: 2,
prevObject:
{ '0': [Object],
'1': [Object],
options: [Object],
_root: [Object],
length: 2,
prevObject: [Object] } },
subjects:
{ options:
{ withDomLvl1: true,
normalizeWhitespace: false,
xmlMode: false,
decodeEntities: true },
_root: { '0': [Object], options: [Object], length: 1, _root: [Circular] },
length: 0,
prevObject:
{ options: [Object],
_root: [Object],
length: 0,
prevObject: [Object] } } }有什么问题吗?
发布于 2015-10-26 13:06:13
npm install cheerio@0.12.4发布于 2017-04-16 22:05:27
我刚刚对它进行了调试,我发现作者需要调用.get() .map(collect).get()
对于这个主题,你需要调用兄弟而不是~ .siblings('rdf\\:value').map(collect).get()
我希望这能帮到你
干杯伊恩
发布于 2017-07-20 06:09:47
rdf-parser.js最后两行的新代码应为:
authors: $('pgterms\\:agent pgterms\\:name').map(collect).get(),
subjects : $('[rdf\\:resource$="/LCSH"]').siblings('rdf\\:value').map(collect).toArray()此外,根据https://forums.pragprog.com/forums/301/topics/12439的作者,新的示例JSON文件应该是这样的(请注意subjects部分中的更改):
{
"id": "132",
"title": "",
"authors": [
"Sunzi, active 6th century B.C.",
"Giles, Lionel"
],
"subjects": [
"Military art and science -- Early works to 1800"
]}
https://stackoverflow.com/questions/33322284
复制相似问题