python迭代器于平常的可迭代对象相比,拥有占用字节少等优点,往往在处理大量可迭代对象的时候应该优先考虑迭代器实现,如下面的例子:
a = [x for x in range(100)]
a.__sizeof__()
888
gen=iter(a)
gen.__sizeof__()
32
如果数据达到上百万级别,这里的优化还是蛮可观的,iter()
和next()
是迭代器常用的两个方法,迭代器是一个可以记住遍历的位置的对象。
迭代器对象从集合的第一个元素开始访问,直到所有的元素被访问完结束。迭代器只能往前不会后退。
lst = [1, 2, 3]
glst = iter(lst)
type(glst)
list_iterator
next(glst)
1
next(glst)
2
next(glst)
3
next(glst)
---------------------------------------------------------------------------
StopIteration Traceback (most recent call last)
<ipython-input-18-41016c5b5795> in <module>()
----> 1 next(glst)
StopIteration:
itertools是python标准库,目的是实现高效循环的迭代器。
import itertools
# count(start, [step=1]) 返回起始值和步进值,持续迭代
gen = itertools.count(200)
next(gen)
100
next(gen)
101
## 生成器实现
def count(start=0, step=1)
while True:
yield start
start += step
import itertools
# cycle(iterable) 重复循环迭代iterable,iterable为可迭代对象
gen = itertools.cycle([1, 2])
next(gen)
1
next(gen)
2
next(gen)
1
## 生成器实现
def cycle(iterable):
ret = iterable
while True:
for i in ret:
yield i
else:
ret = iterable
import itertools
# repeat(elem[, times]) 如果未指定times,讲一直迭代elem
gen = itertools.repeat('n')
next(gen)
n
## 生成器实现
def repeat(elem, times=None):
if times is None:
while True:
yield elem
else:
for _ in range(times):
yield elem
import itertools
# accumulate(iterable[, func]) 迭代输出循环的和
gen = itertools.accumulate([1, 2], lambda x, y: x+y)
next(gen)
1
next(gen)
3
## 生成器实现
def func(total, element):
return total + element
def accumulate(iterable, func=func)
it = iter(iterable)
total = next(it)
yield total
for i in it:
total=func(total, i)
yield total
import itertools
# chain(iterable, iterable...) 讲多个迭代对象合并,以迭代器输出每个迭代对象的要素
gen=itertools.chain([1, 2], "a")
next(gen)
1
next(gen)
2
next(gen)
a
## 生成器实现
def chain(*args):
for it in args:
for i in it:
yield i
import itertools
# from_iterable(iterable) 将一个可迭代对象内的要素,以chain的方法重新迭代输出
gen=itertools.chain.from_iterable([1, 2, 3])
next(gen)
1
next(gen)
2
next(gen)
3
## 生成器实现
def from_iterable(iterable):
for it in iterable:
for i in it:
yield i
import itertools
# compress(iterable, selectors) 根据selectors返回相应的iterable对应索引的值
gen=itertools.compress('abcd', [1, 0, 1, 0])
next(gen)
a
next(gen)
c
## 生成器实现
def compress(iterable, selectors):
s=selectors
d=iterable
return (d[i] for i, v in enumerate(s) if v)
import itertools
# dropwhile(fun, seq) 迭代seq在fun中返回为假后的全部要素
gen=itertools.dropwhile(lambda x: x < 5, [1, 2, 3, 4, 5, 1])
next(gen)
5
next(gen)
1
## 生成器实现
def dropwhile(predicate, iterable):
iterable = iter(iterable)
for x in iterable:
if not predicate(x):
yield x
break
for x in iterable:
yield x
import itertools
# filterfalse(predicate, iterable) 当predicate为假,迭代假的要素
gen=itertools.filterfalse(lambda x: x<5, [1, 2, 3, 4, 5, 1])
next(gen)
5
## 生成器实现
def filterfalse(predicate, iterable):
iterable = iter(iterable)
for x in iterable:
if not predicate(x):
yield x
import itertools
# groupby(iterable[, key]) 按照分组的子迭代器
for key, items in itertools.groupby(data, key=lambda u:u):
print(key)
for item in items:
print(item)
group只检查相连的项
按客户ip分组nginx日志
from itertools import groupby
with open("access.log") as f:
data = f.readlines()
data.sort() #数据排序
for key, items in groupby(data, lambda x: x.split()[0]):
print(key)
count=0
for item in items:
count += 1
print(count)
import itertools
# islice(seq[, start], stop[, step]) 切割并迭代
gen=itertools.islice('abcdf', 0, None, 2)
next(gen)
a
next(gen)
c
## 生成器实现
def islice(seq, start=0, stop=None, step=1)
it = seq[start:stop]
index = 0
while True:
yield it[index]
index += step
import itertools
# starmap 类似map的作用,但是此方法可以传递多个参数
def fun(x, y):
return x+y
gen=itertools.starmap(fun, [(1, 2), (3, 4)])
list(gen)
[3, 7]
## 生成器实现
def starmap(fun, iterable):
for it in iterable:
yield fun(*it)
import itertools
# takewhile(pred, seq) 迭代直到pred放回false,和dropwhile相反
gen=itertools.takewhile(lambda x:x<3, [1, 2, 3, 4, 5, 1])
next(gen)
1
next(gen)
2
## 生成器实现
def takewhile(predicate, iterable):
iterable = iter(iterable)
for x in iterable:
if predicate(x):
yield x
else:
break
import itertools
# tee(it, n) 将一份迭代器分割成多份
gen = (x for x in range(10))
a, b = itertools.tee(gen, 2)
list(a)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
list(b)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
# zip_longest(p, q) zip的扩展函数
list(zip(['x', 'y'], [1, 2, 3, 4]))
[('x', 1), ('y', 2)]
list(itertools.zip_longest(['x', 'y'], [1, 2, 3, 4], fillvalue='*'))
[('x', 1), ('y', 2), ('*', 3), ('*', 4)]
import itertools
# product(p, q, ....[repeat=1]) 笛卡尔积
list(itertools.product(['a', 'b', 'c'], [1, 2, 3]))
[('a', 1),
('a', 2),
('a', 3),
('b', 1),
('b', 2),
('b', 3),
('c', 1),
('c', 2),
('c', 3)]
list(itertools.product(['a', 'b', 'c'], repeat=2))
[('a', 'a'),
('a', 'b'),
('a', 'c'),
('b', 'a'),
('b', 'b'),
('b', 'c'),
('c', 'a'),
('c', 'b'),
('c', 'c')]
# permutations(p[, r]) 返回r长度的元祖,全部可能排序,没有重复元素
list(itertools.permutations('abc'))
[('a', 'b', 'c'),
('a', 'c', 'b'),
('b', 'a', 'c'),
('b', 'c', 'a'),
('c', 'a', 'b'),
('c', 'b', 'a')]
list(itertools.permutations('abc', 2))
[('a', 'b'), ('a', 'c'), ('b', 'a'), ('b', 'c'), ('c', 'a'), ('c', 'b')]
# combinations(p, r) 返回r长度的元组,元组元素经过重新排序的排序,没有重复元素
list(itertools.combinations('abc', 3))
[('a', 'b', 'c')]
list(itertools.combinations('abc', 2))
[('a', 'b'), ('a', 'c'), ('b', 'c')]
# combinations_with_replacement(p, r) 返回r长度的元组,元组元素经过重新排序的排序,有重复的元素
list(itertools.combinations_with_replacement('abc', 3))
[('a', 'a', 'a'),
('a', 'a', 'b'),
('a', 'a', 'c'),
('a', 'b', 'b'),
('a', 'b', 'c'),
('a', 'c', 'c'),
('b', 'b', 'b'),
('b', 'b', 'c'),
('b', 'c', 'c'),
('c', 'c', 'c')]
list(itertools.combinations_with_replacement('abc', 2))
[('a', 'a'), ('a', 'b'), ('a', 'c'), ('b', 'b'), ('b', 'c'), ('c', 'c')]
生成器就是迭代器,生成器不会把结果保存在一个列表中,而是保存为生成器状态,通过关键字yield来实现
(x for x in range(10))
<generator object <genexpr> at 0x10e6a5830> 返回迭代器对象
yield必须在函数中使用,所有生成器必须是以恶搞函数
# 实现一个阶乘生成器
def factorial():
ret = 1
incr = 1
while True:
yield ret
incr += 1
ret *= incr
gen=factorial()
next(gen)
1
next(gen)
2
next(gen)
6
# 从列表中读取生成器数据
lst = [1, 2, 3, 4, 5]
def gen():
yield from lst
g=gen()
next(g)
1
next(g)
2
yield可以理解为不停的暂停函数的执行,而return则是直接退出函数的执行,yield和return的效果。
def incr():
for i in range(100):
yield i
if i >= 3:
return "more than 3"
gen=incr()
for i in gen:
print(i)
0
1
2
3
# 这里被函数中的return退出了生成器函数,但是我们并没有看见return回的字符,而且这里我们也可以得到结论,只要有yield,整个函数就是生成器函数,返回的就是迭代器。
gen=incr()
next(gen)
0
next(gen)
1
next(gen)
2
next(gen)
3
next(gen)
---------------------------------------------------------------------------
StopIteration Traceback (most recent call last)
<ipython-input-56-8a6233884a6c> in <module>()
----> 1 next(gen)
StopIteration: more than 3
gen=incr()
next(gen)
0
gen.close()
next(gen)
---------------------------------------------------------------------------
StopIteration Traceback (most recent call last)
<ipython-input-60-8a6233884a6c> in <module>()
----> 1 next(gen)
StopIteration:
def g1():
print("g1 is running")
def g2():
val="start"
while True:
recv=yield val
if recv == 'g1':
g1()
val='get val {}'.format(recv)
gen=g2()
next(gen)
start
next(gen)
None
print(gen.send('test'))
test
print(gen.send('g1'))
g1 is running
get val g1
可以得知next()函数就是向yield发送了None,等于gen.send(None)
def th():
while True:
try:
yield "test 1"
except ValueError:
print("ValueError")
except IOError:
print("IOError")
gen=th()
gen.throw(ValueError)
ValueError
'test 1'
gen.throw(IOError)
IOError
'test 1'
以梦为马 不负韶华 归来仍是少年
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。