所以我有一个100000字的文件。我想知道怎样才能创建一个文件,上面写着:“10282次”“和: 322次”"sadfas222: 1次“
文本文件如下所示:
asdf
jkasdf
the
sadf
asdn
23kl
asdf
qer
f
asdf
r
2
r
fsd
fa
sdf
asd
far2
sdv
as
df
asdf
asdf发布于 2016-08-15 12:05:54
在Node.js中和在执行npm i split2 through2 -S之后
const fs = require('fs')
const split = require('split2')
const through = require('through2')
const words = {}
fs.createReadStream('words.txt')
.pipe(split())
.pipe(through(write, end))
function write (buf, enc, next) {
const word = buf.toString()
words[word] = ++words[word] || 1
next()
}
function end () {
Object.keys(words)
.sort((a, b) => words[b] - words[a])
.forEach(word => {
console.log(`${word}: ${words[word]} times`)
})
}发布于 2016-08-15 11:58:15
您可以对此稍作修改,以接受来自文本文件的输入
var letterArray = "asdf\njkasdf\nthe\nsadf".split('\n');
function count(letterArray) {
let mapping = {};
for (let i=0; i < letterArray.length; i++){
if (mapping[letterArray[i]] !== undefined){ // if the letter already exists in the mapping increment it
mapping[letterArray[i]] += 1;
}else { //if the letter does not exist add it and initialize it
mapping[letterArray[i]] = 1;
}
}
return mapping;
}
console.log("count: ", count(letterArray));发布于 2016-08-15 12:00:32
我使用python方法得到了您所期望的结果。
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
def wordcount(data):
cv = CountVectorizer(min_df=0, strip_accents="ascii", decode_error ="ignore")
counts = cv.fit_transform([data]).toarray().ravel()
words = np.array(cv.get_feature_names())
counts = map(int, counts)
words = [i.tolist() for i in words]
words = [x.encode('UTF8') for x in words]
unsorted_result = zip(words, counts)
print sorted(unsorted_result,key=lambda x: x[1], reverse=True)将数据作为字符串发送到此函数
https://stackoverflow.com/questions/38948988
复制相似问题