编写用于分析apache日志的脚本,主要要求如下:
简单实现
# vim countweb.py
#!/usr/bin/env python
import re
logfile = '/var/log/httpd/access_log'
cdict = {}
patt_ip = '^\d+\.\d+\.\d+\.\d+' #定义匹配IP地址的正则表达式
with open(logfile) as f:
for eachLine in f:
m = re.search(patt_ip, eachLine)
if m is not None:
ipaddr = m.group()
#如果IP地址已在字典中,将其值加1,否则初始值设置为1
cDict[ipaddr] = cDict.get(ipaddr, 0) + 1
print cDict
使用函数式编程实现
# vim countweb2.py
!/usr/bin/env python
import re
def countPatt(patt, fname): #定义可以在指定文件中搜索指定字符串的函数
cDict = {}
with open(fname) as f:
for eachLine in f:
m = re.search(patt, eachLine)
if m is not None:
k = m.group()
cDict[k] = cDict.get(k, 0) + 1
return cDict
def test():
logfile = '/var/log/httpd/access_log'
patt_ip = '^\d+\.\d+\.\d+\.\d+'
print countPatt(patt_ip, logfile)
patt_br = 'Firefox|MSIE'
print countPatt(patt_br, logfile)
if __name__ == '__main__':
test()