BitTorrent文件使用bencode编码,其中包括了4种数据类型:
'd' 开头表示是dict类型,'e'表示结束
'l' (小写字母L)开头表示是list类型,'e'表示结束
'i'开头表示是integer类型,'e'表示结束,可以表示负数
以数字开头表示string类型,数字为string长度,长度与string内容以':'分割
默认所有text类型的属性为utf-8编码,但是大多数BitTorrent包含codepage 和 encoding属性,指定了text的编码格式
BitTorrent的标准参见:http://www.bittorrent.org/beps/bep_0003.html
以下是自己写的Python实现,初学Python,代码写起来还都是C/C++风格,慢慢改进吧。
torrent_file.py
import os
from datetime import tzinfo
from datetime import datetime
import bcodec
_READ_MAX_LEN = -1
class BTFormatError(BaseException):
pass
class TorrentFile(object):
__metainfo = {}
__file_name = ''
def read_file(self, filename):
torrent_file = open(filename, 'rb')
data = torrent_file.read(_READ_MAX_LEN)
torrent_file.close()
data = list(data)
metainfo = bcodec.bdcode(data)
if metainfo and type(metainfo) == type({}):
self.__file_name = filename
self.__metainfo = metainfo
else:
raise BTFormatError()
def __is_singlefile(self):
return 'length' in self.__metainfo.keys()
def __decode_text(self, text):
encoding = 'utf-8'
resultstr = ''
if self.get_encoding():
encoding = self.get_encoding()
elif self.get_codepage():
encoding = 'cp' + str(self.get_codepage())
if text:
try:
resultstr = text.decode(encoding=encoding)
except ValueError:
return text
else:
return None
return resultstr
def __get_meta_top(self, key):
if key in self.__metainfo.keys():
return self.__metainfo[key]
else:
return None
def __get_meta_info(self,key):
meta_info = self.__get_meta_top('info')
if meta_info and key in meta_info.keys():
return meta_info[key]
return None
def get_codepage(self):
return self.__get_meta_top('codepage')
def get_encoding(self):
return self.__get_meta_top('encoding')
def get_announces(self):
announces = []
ann = self.__get_meta_top('announce')
if ann:
ann_list = []
ann_list.append(ann)
announces.append(ann_list)
announces.append(self.__get_meta_top('announce-list'))
return announces
def get_publisher(self):
return self.__decode_text(self.__get_meta_top('publisher'))
def get_publisher_url(self):
return self.__decode_text(self.__get_meta_top('publisher-url'))
def get_creater(self):
return self.__decode_text(self.__get_meta_top('created by'))
def get_creation_date(self):
utc_date = self.__get_meta_top('creation date')
if utc_date is None:
return utc_date
creationdate = datetime.utcfromtimestamp(utc_date)
return creationdate
def get_comment(self):
return self.__get_meta_top('comment')
def get_nodes(self):
return self.__get_meta_top('nodes')
def get_piece_length(self):
return self.__get_meta_info('piece length')
def get_files(self):
files = []
pieces = self.__get_meta_info('pieces')
name = self.__decode_text(self.__get_meta_info('name'))
piece_length = self.get_piece_length()
if not pieces or not name:
return files
if self.__is_singlefile():
file_name = name
file_length = self.__get_meta_info('length')
if not file_length:
return files
pieces_num = file_length/piece_length
if file_length % piece_length:
pieces_num = int(pieces_num) + 1
if 20*pieces_num > len(pieces):
return files
file_pieces = []
i = 0
pn = 0
while pn < pieces_num:
file_pieces.append(pieces[i:i+20])
i += 20
pn += 1
files.appen({'name':[file_name], 'length':file_length, 'peaces':file_pieces})
return files
folder = name
meta_files = self.__get_meta_info('files')
if not meta_files:
return files
total_length = 0
for one_file in self.__get_meta_info('files'):
file_info = {}
path_list = []
path_list.append(folder)
if 'path' not in one_file.keys():
break
for path in one_file['path']:
path_list.append(self.__decode_text(path))
file_info['name'] = path_list
if 'length' not in one_file.keys():
break
file_info['length'] = one_file['length']
piece_index = int(total_length / piece_length)
total_length += one_file['length']
pieces_num = int(total_length / piece_length) - piece_index
pieces_num = int(file_info['length']/piece_length)
if total_length % piece_length:
pieces_num += 1
# print (piece_index+pieces_num)*20, len(pieces),pieces_num,file_info['length'], self.get_piece_length()
if (piece_index+pieces_num)*20 > len(pieces):
break
file_info['pieces'] = []
pn = 0
while pn < pieces_num:
file_info['pieces'].append(pieces[piece_index*20:piece_index*20+20])
pn += 1
files.append(file_info)
return files
if __name__ == '__main__':
#filename = r".\huapi2.torrent"
#filename = r".\mh5t3tJ0EC.torrent"
filename = r".\huapi2.1.torrent"
torrent = TorrentFile()
print "begin to read file"
try:
torrent.read_file(filename)
except (IOError,BTFormatError), reason:
print "Read bittorrent file error! Error:%s" %reason
print "end to read file"
print "announces: " , torrent.get_announces()
print "peace length:", torrent.get_piece_length()
print "code page:" , torrent.get_codepage()
print "encoding:" , torrent.get_encoding()
print "publisher:" ,torrent.get_publisher()
print "publisher url:", torrent.get_publisher_url()
print "creater:" , torrent.get_creater()
print "creation date:", torrent.get_creation_date()
print "commnent:", torrent.get_comment()
print "nodes:", torrent.get_nodes()
torrent.get_files()
for one_file in torrent.get_files():
print 'file name:', '\\'.join(one_file['name'])
print 'file length:', one_file['length']
print 'pieces:', list(one_file['pieces'])
bcodec.py
1 '''
2 Created on 2012-9-30
3
4 @author: ddt
5 '''
6 def bdcode(data):
7 data = list(data)
8 return _read_chunk(data)
9
10 def _read_chunk(data):
11
12 chunk = None
13
14 if len(data) == 0:
15 return chunk
16
17 leading_chr = data[0]
18
19 if leading_chr.isdigit():
20 chunk = _read_string(data)
21 elif leading_chr == 'd':
22 chunk = _read_dict(data)
23 elif leading_chr == 'i':
24 chunk = _read_integer(data)
25 elif leading_chr == 'l':
26 chunk = _read_list(data)
27
28 #print leading_chr, chunk
29 return chunk
30
31 def _read_dict(data):
32
33 if len(data) == 0 or data.pop(0) != 'd':
34 return None
35
36 chunk = {}
37 while len(data) > 0 and data[0] != 'e':
38
39 key = _read_chunk(data)
40 value = _read_chunk(data)
41
42 if key and value and type(key) == type(''):
43 chunk[key] = value
44 else:
45 return None
46
47 if len(data) == 0 or data.pop(0) != 'e':
48 return None
49
50 return chunk
51
52 def _read_list(data):
53
54 if len(data) == 0 or data.pop(0) != 'l':
55 return None
56
57 chunk = []
58 while len(data) > 0 and data[0] != 'e':
59 value = _read_chunk(data)
60 if value:
61 chunk.append(value)
62 else:
63 return None
64
65 if len(data) == 0 or data.pop(0) != 'e':
66 return None
67
68 return chunk
69
70 def _read_string(data):
71
72 str_len = ''
73 while len(data) > 0 and data[0].isdigit():
74 str_len += data.pop(0)
75
76 if len(data) == 0 or data.pop(0) != ':':
77 return None
78
79 str_len = int(str_len)
80 if str_len > len(data):
81 return None
82
83 value = data[0:str_len]
84 del data[0:str_len]
85 return ''.join(value)
86
87 def _read_integer(data):
88
89 integer = ''
90 if len(data) < len('i2e') or data.pop(0) != 'i':
91 return None
92
93 sign = data.pop(0)
94 if sign != '-' and not sign.isdigit():
95 return None
96 integer += sign
97
98 while len(data) > 0 and data[0].isdigit():
99 integer += data.pop(0)
100
101 if len(data) == 0 or data.pop(0) != 'e':
102 return None
103
104 return int(integer)