在之前的blog中,曾经写到过关于搜索本地文件的技术文章
下面说说python中关于线程来搜索本地文件
利用多个线程处理搜索的问题,我们可以发现他很快....
========================================================
下面是代码部分:
========================================================
1 # A parallelized "find(1)" using the thread module.
2
3 # This demonstrates the use of a work queue and worker threads.
4 # It really does do more stats/sec when using multiple threads,
5 # although the improvement is only about 20-30 percent.
6 # (That was 8 years ago. In 2002, on Linux, I can't measure
7 # a speedup. :-( )
8
9 # I'm too lazy to write a command line parser for the full find(1)
10 # command line syntax, so the predicate it searches for is wired-in,
11 # see function selector() below. (It currently searches for files with
12 # world write permission.)
13
14 # Usage: parfind.py [-w nworkers] [directory] ...
15 # Default nworkers is 4
16
17
18 import sys
19 import getopt
20 import time
21 import os
22 from stat import *
23 import _thread as thread
24
25
26 # Work queue class. Usage:
27 # wq = WorkQ()
28 # wq.addwork(func, (arg1, arg2, ...)) # one or more calls
29 # wq.run(nworkers)
30 # The work is done when wq.run() completes.
31 # The function calls executed by the workers may add more work.
32 # Don't use keyboard interrupts!
33
34 class WorkQ:
35
36 # Invariants:
37
38 # - busy and work are only modified when mutex is locked
39 # - len(work) is the number of jobs ready to be taken
40 # - busy is the number of jobs being done
41 # - todo is locked iff there is no work and somebody is busy
42
43 def __init__(self):
44 self.mutex = thread.allocate()
45 self.todo = thread.allocate()
46 self.todo.acquire()
47 self.work = []
48 self.busy = 0
49
50 def addwork(self, func, args):
51 job = (func, args)
52 self.mutex.acquire()
53 self.work.append(job)
54 self.mutex.release()
55 if len(self.work) == 1:
56 self.todo.release()
57
58 def _getwork(self):
59 self.todo.acquire()
60 self.mutex.acquire()
61 if self.busy == 0 and len(self.work) == 0:
62 self.mutex.release()
63 self.todo.release()
64 return None
65 job = self.work[0]
66 del self.work[0]
67 self.busy = self.busy + 1
68 self.mutex.release()
69 if len(self.work) > 0:
70 self.todo.release()
71 return job
72
73 def _donework(self):
74 self.mutex.acquire()
75 self.busy = self.busy - 1
76 if self.busy == 0 and len(self.work) == 0:
77 self.todo.release()
78 self.mutex.release()
79
80 def _worker(self):
81 time.sleep(0.00001) # Let other threads run
82 while 1:
83 job = self._getwork()
84 if not job:
85 break
86 func, args = job
87 func(*args)
88 self._donework()
89
90 def run(self, nworkers):
91 if not self.work:
92 return # Nothing to do
93 for i in range(nworkers-1):
94 thread.start_new(self._worker, ())
95 self._worker()
96 self.todo.acquire()
97
98
99 # Main program
100
101 def main():
102 nworkers = 4
103 #print(getopt.getopt(sys.argv[1:], '-w:'))
104 opts, args = getopt.getopt(sys.argv[1:], '-w:')
105 for opt, arg in opts:
106 if opt == '-w':
107 nworkers = int(arg)
108 if not args:
109 #print(os.curdir)
110 args = [os.curdir]
111
112 wq = WorkQ()
113 for dir in args:
114 wq.addwork(find, (dir, selector, wq))
115
116 t1 = time.time()
117 wq.run(nworkers)
118 t2 = time.time()
119
120 sys.stderr.write('Total time %r sec.\n' % (t2-t1))
121
122
123 # The predicate -- defines what files we look for.
124 # Feel free to change this to suit your purpose
125
126 def selector(dir, name, fullname, stat):
127 # Look for world writable files that are not symlinks
128 return (stat[ST_MODE] & 0o002) != 0 and not S_ISLNK(stat[ST_MODE])
129
130
131 # The find procedure -- calls wq.addwork() for subdirectories
132
133 def find(dir, pred, wq):
134 try:
135 names = os.listdir(dir)
136 except os.error as msg:
137 print(repr(dir), ':', msg)
138 return
139 for name in names:
140 if name not in (os.curdir, os.pardir):
141 fullname = os.path.join(dir, name)
142 try:
143 stat = os.lstat(fullname)
144 except os.error as msg:
145 print(repr(fullname), ':', msg)
146 continue
147 if pred(dir, name, fullname, stat):
148 print(fullname)
149 if S_ISDIR(stat[ST_MODE]):
150 if not os.path.ismount(fullname):
151 wq.addwork(find, (fullname, pred, wq))
152
153
154 # Call the main program
155
156 main()
更多信息:http://www.oschina.net/code/explore/Python-3.1.3/Demo/threads/find.py
E | hongtenzone@foxmail.com B | http://www.cnblogs.com/hongten