#!/usr/bin/env python
import random
'abc..z'
alphaStr = "".join(map(chr, range(97,123)))
fp = open("word.txt", "w")
maxIter = 100000
for i in range(maxIter):
word = ""
len =random.randint(1,5)
for j in range(len):
word + = alphaStr[random.randint(0,25)]
fp.write(word + '\n')
fp.close()
cat word.txt | ./wordcount_mapper.py | ./wordcount_reducer.py .
word count reduce, python
filename: wordcount_reducer.py
from operator import itemgetter
import sys
wordcount = {}
for line in sys.stdin:
word, count = line.strip().split('\t',1)
try:
count = int(count)
wordcount[word] = wordcount.get(word,0) + count
except ValueError
pass
sorted_wordcount = sorted(wordcount.iterms(), key = itemgettter(0))
for word,count in sorted_wordcount:
print("%s\t%s") %(word, count)</pre>
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。