我最近写了一个面试问题的解决方案,但被拒绝了。请分析我的解决方案和建议?处理大型文件不是很有效吗?
Problem给出了一个包含得分记录的数据文件,在您最喜欢的编程语言中,编写一个程序输出按下降分数排序的N个最高记录ID。输出应该是格式良好的JSON。考虑考虑解决方案的资源效率。
数据文件输入数据文件是一系列键值对(每一行一个),格式如下:
3830591998918656:{"id":"2208ef95-355c-53a6-96bc-206a4831f2fe","data":"Tu"} 548113328635904:{“id”:“d 5887987-bf5d-5813-B524-722 ff11882”,"data":"Vubiteh hone na dabupkof tetnut“,} 4322085113430016:{"id":"8f5330b9-0279-5a67-aee3-d6acf221a53a","data":"Losu nihpe aee3”
最多可达100 k或更多
成功运行后,您的解决方案应该退出,退出代码为0。如果输入数据文件无效,则解决方案应使用代码2退出,而如果未找到输入文件,则应使用代码1退出解决方案。应忽略输入文件中的空行,而不是将其视为无效输入。
我的解决方案
highest.js入口点
{
let argv = require("yargs").argv;
const yargs = require("yargs");
const handler = require("./handler");
const fileName = argv._[0];
const numberOfEntries = argv._[1];
if (!fileName || !numberOfEntries) {
console.log("Command Incorrect : --use node highest <filename> <count>");
} else {
handler.getHighestScore(fileName, numberOfEntries);
}
}
Handler.js {
const path = require("path");
const fs = require("fs");
const es = require("event-stream");
let ln = 0;
const scoreArray = [];
exports.getHighestScore = (fileName, numberOfEntries) => {
const filePath = path.join(path.dirname(require.main.filename), fileName);
fs.stat(filePath, function (err, stat) {
if (stat && stat.isFile()) {
let s = fs
.createReadStream(filePath)
.pipe(es.split())
.pipe(
es
.mapSync(function (line) {
s.pause();
if (ln < parseInt(numberOfEntries)) {
if (line.trim().length > 0) {
let score = line.slice(0, line.indexOf(":"));
let secondPart = line.slice(line.indexOf(":") + 1);
let jsonObject = JSON.parse(secondPart);
if (
jsonObject.hasOwnProperty("id") &&
jsonObject.id.trim().length > 0
) {
let outputObject = { score: score, id: jsonObject.id };
scoreArray.push(outputObject);
} else {
process.stdout.write(
"***********File Invalid - ID Mssing************* code(2)"
);
process.exit(2);
}
}
}
if (ln == parseInt(numberOfEntries)) {
s.end();
} else {
ln += 1;
}
s.resume();
})
.on("error", function (err) {
process.stdout.write(
"***********Error while reading file************* code(2)"
);
process.exit(2);
})
.on("end", function () {
let arr = scoreArray.sort((a, b) => (b.score > a.score ? 1 : -1));
process.stdout.write(
"TOTAL LINES READ = " +
ln +
" TOTAL OUTPUT = " +
arr.length +
"\n"
);
process.stdout.write(JSON.stringify(arr));
process.stdout.write("\n");
process.exit(0);
})
);
} else {
process.stdout.write("***********FILE NOT FOUND************* code(1)");
process.exit(1);
}
});
};
}
欢迎任何有关解决方案的建议和批评。
发布于 2022-10-26 04:23:10
您正在将整个输入文件读入内存并存储所有内容。一种空间高效的方法是一次流一行输入,解析它,检查它是否包含N-最高分数。如果是,将其添加到N-最高数据结构中。如果不是,跳过它。当您浏览整个文件时,只在内存中保留N个最高的数据。,这似乎是您的代码遗漏的要点。
出于效率原因,这会在最高数组中进行插入排序,所以每次添加值时,它都不会不断地使用整个数组。
以下是nodejs中的一个实现:
const readline = require('node:readline');
const fs = require('node:fs');
// sample input data per line
// 3830591998918656: { "id": "2208ef95-355c-53a6-96bc-206a4831f2fe", "data": "Tu" }
// 548113328635904: { "id": "d5887987-bf5d-5813-b524-722ffff11882", "data": "Vubiteh hone na dabupkof tetnut." }
// 4322085113430016: { "id": "8f5330b9-0279-5a67-aee3-d6acf221a53a", "data": "Losu nihpe upaveitpse teblujnis." }
// 6348702421614592: { "id": "1fef0dbc-c75b-5959-835f-80e5f15b6da1", "data": "Riliv kaliku laiza zen daze ." }
const scoreRegex = /^\s*(\d+):\s*/;
function parseLine(line) {
// ok to skip empty lines
if (line.trim() === "") return null;
const result = {};
// parse leading digits
const scoreMatch = line.match(scoreRegex);
if (!scoreMatch) throw new Error("Missing score at beginning of line");
result.score = BigInt(scoreMatch[1], 10);
const remainingLine = line.slice(scoreMatch[0].length);
result.info = JSON.parse(remainingLine);
if (typeof result.info.id !== "string" || result.info.id === "") {
throw new Error("Missing valid id value");
}
return result;
}
// input and output files
const fileInput = "input.txt";
const fileOutput = "output.txt";
const howManyHighest = 2;
const highestScores = [];
function getLowest() {
return highestScores[highestScores.length - 1].score;
}
// do an insertion sort into the highestScores array
// highest score record first
// maintain length at no more than howManyHighest
function insertHighest(val) {
let inserted = false;
// for performance reasons, only search through the highestScores
// list if this new score is higher than the lowest score we have in the list so far
if (highestScores.length && val.score > getLowest()) {
for (let [index, item] of highestScores.entries()) {
if (val.score > item.score) {
// insert it into this position in the array, pushing the others up
highestScores.splice(index, 0, val);
inserted = true;
break;
}
}
}
if (inserted) {
// trim overflow, if any
if (highestScores.length > howManyHighest) {
highestScores.pop();
}
} else {
// didn't insert it, see if we aren't full yet
if (highestScores.length < howManyHighest) {
highestScores.push(val);
}
}
}
const rl = readline.createInterface({
input: fs.createReadStream(fileInput),
crlfDelay: Infinity
});
rl.on('error', err => {
if (err.code === 'ENOENT') {
process.exit(1);
} else {
console.log(err);
// some sort of read error
process.exit(2);
}
}).on('line', line => {
try {
const data = parseLine(line);
if (data) {
insertHighest(data);
}
} catch (e) {
console.log(e);
console.log(`Invalid line: ${line}`);
process.exit(2);
}
}).on('close', () => {
// generate array of highest scores
const output = highestScores.map(item => item.info.id)
console.log(output);
fs.writeFile(fileOutput, JSON.stringify(output), err => {
if (err) {
console.log(err);
// output write error
process.exit(3);
} else {
// all done, successfully
process.exit(0);
}
});
});
我遇到了一些未具体说明的实现问题,如果您在回答时提出这些问题,就会显示您完全理解这个问题。
BigInt
来处理任意大的数字(代价是一些运行时性能)。由于没有指定这一点,所以我在这里使用了BigInt
解析和比较,使分数值具有无限整数。id
属性的有效结构。这段代码只是测试一个非空字符串值.,
https://stackoverflow.com/questions/74201237
复制相似问题