class SafeUncompressor(object):
    """Small proxy class that enables external file object
    support for uncompressed, bzip2 and gzip files. Works transparently, and
    supports a maximum size to avoid zipbombs.
    """
    blocksize = 16 * 1024

    class FileTooLarge(Exception):
        pass

    def __init__(self, fileobj, maxsize=10*1024*1024):
        self.fileobj = fileobj
        self.name = getattr(self.fileobj, "name", None)
        self.maxsize = maxsize
        self.init()

    def init(self):
        import bz2
        import gzip
        self.pos = 0
        self.fileobj.seek(0)
        self.buf = ""
        self.format = "plain"

        magic = self.fileobj.read(2)
        if magic == '\037\213':
            self.format = "gzip"
            self.gzipobj = gzip.GzipFile(fileobj = self.fileobj, mode = 'r')
        elif magic == 'BZ':
            raise IOError, "bzip2 support in SafeUncompressor disabled, as self.bz2obj.decompress is not safe"
            self.format = "bz2"
            self.bz2obj = bz2.BZ2Decompressor()
        self.fileobj.seek(0)


    def read(self, size):
        b = [self.buf]
        x = len(self.buf)
        while x < size:
            if self.format == 'gzip':
                data = self.gzipobj.read(self.blocksize)
                if not data:
                    break
            elif self.format == 'bz2':
                raw = self.fileobj.read(self.blocksize)
                if not raw:
                    break
                # this can already bomb here, to some extend.
                # so disable bzip support until resolved.
                # Also monitor http://stackoverflow.com/questions/13622706/how-to-protect-myself-from-a-gzip-or-bzip2-bomb for ideas
                data = self.bz2obj.decompress(raw)
            else:
                data = self.fileobj.read(self.blocksize)
                if not data:
                    break
            b.append(data)
            x += len(data)

            if self.pos + x > self.maxsize:
                self.buf = ""
                self.pos = 0
                raise SafeUncompressor.FileTooLarge, "Compressed file too large"
        self.buf = "".join(b)

        buf = self.buf[:size]
        self.buf = self.buf[size:]
        self.pos += len(buf)
        return buf

    def seek(self, pos, whence=0):
        if whence != 0:
            raise IOError, "SafeUncompressor only supports whence=0"
        if pos < self.pos:
            self.init()
        self.read(pos - self.pos)

    def tell(self):
        return self.pos

它不能很好地用于bzip2，因此部分代码被禁用。原因是bz2.BZ2Decompressor.decompress可能已经产生了大量不需要的数据。

票数 3

Stack Overflow用户

发布于 2012-12-25 01:39:02

您可以使用resource module来限制您的进程及其子进程可用的资源。

如果你需要在内存中解压，那么你可以设置resource.RLIMIT_AS (或RLIMIT_DATA，RLIMIT_STACK)，例如，使用上下文管理器自动将其恢复为以前的值：

import contextlib
import resource

@contextlib.contextmanager
def limit(limit, type=resource.RLIMIT_AS):
    soft_limit, hard_limit = resource.getrlimit(type)
    resource.setrlimit(type, (limit, hard_limit)) # set soft limit
    try:
        yield
    finally:
        resource.setrlimit(type, (soft_limit, hard_limit)) # restore

with limit(1 << 30): # 1GB 
    # do the thing that might try to consume all memory

如果达到限制，则MemoryError将被提升。

票数 13

Stack Overflow用户

发布于 2012-12-23 23:42:45

这将确定gzip流的解压缩大小，同时使用有限的内存：

#!/usr/bin/python
import sys
import zlib
f = open(sys.argv[1], "rb")
z = zlib.decompressobj(15+16)
total = 0
while True:
    buf = z.unconsumed_tail
    if buf == "":
        buf = f.read(1024)
        if buf == "":
            break
    got = z.decompress(buf, 4096)
    if got == "":
        break
    total += len(got)
print total
if z.unused_data != "" or f.read(1024) != "":
    print "warning: more input after end of gzip stream"

解压时，它将返回对tar文件中所有文件所需空间的略微高估。长度包括这些文件，以及tar目录信息。

gzip.py代码不控制解压缩的数据量，除非依靠输入数据的大小。在gzip.py中，它一次读取1024个压缩字节。因此，如果您可以使用高达1056768字节的内存来存储未压缩的数据(1032x1024，其中1032:1是deflate的最大压缩比)，则可以使用gzip.py。这里的解决方案使用带有第二个参数的zlib.decompress，该参数限制了未压缩数据的数量。gzip.py不需要。

这将通过解码tar格式准确地确定提取的tar条目的总大小：

#!/usr/bin/python

import sys
import zlib

def decompn(f, z, n):
    """Return n uncompressed bytes, or fewer if at the end of the compressed
       stream.  This only decompresses as much as necessary, in order to
       avoid excessive memory usage for highly compressed input.
    """
    blk = ""
    while len(blk) < n:
        buf = z.unconsumed_tail
        if buf == "":
            buf = f.read(1024)
        got = z.decompress(buf, n - len(blk))
        blk += got
        if got == "":
            break
    return blk

f = open(sys.argv[1], "rb")
z = zlib.decompressobj(15+16)
total = 0
left = 0
while True:
    blk = decompn(f, z, 512)
    if len(blk) < 512:
        break
    if left == 0:
        if blk == "\0"*512:
            continue
        if blk[156] in ["1", "2", "3", "4", "5", "6"]:
            continue
        if blk[124] == 0x80:
            size = 0
            for i in range(125, 136):
                size <<= 8
                size += blk[i]
        else:
            size = int(blk[124:136].split()[0].split("\0")[0], 8)
        if blk[156] not in ["x", "g", "X", "L", "K"]:
                total += size
        left = (size + 511) // 512
    else:
        left -= 1
print total
if blk != "":
    print "warning: partial final block"
if left != 0:
    print "warning: tar file ended in the middle of an entry"
if z.unused_data != "" or f.read(1024) != "":
    print "warning: more input after end of gzip stream"

您可以使用它的一个变体来扫描tar文件中的炸弹。这样做的好处是，在解压数据之前，就可以在头信息中找到较大的大小。

至于.tar.bz2归档，Python bz2库(至少在3.3版本中)对于消耗太多内存的bz2炸弹来说不可避免地是不安全的。与zlib.decompress不同，bz2.decompress函数不提供第二个参数。更糟糕的是，由于游程编码，bz2格式的最大压缩比比zlib要高得多。bzip2将1 GB的0压缩为722字节。因此，即使没有第二个参数，也不能像使用zlib.decompress那样通过计量输入来计量bz2.decompress的输出。缺乏对解压缩输出大小的限制是Python接口中的一个根本缺陷。

我查看了3.3中的_bz2module.c，看看是否有未记录的方法来使用它来避免这个问题。这是无计可施的。其中的decompress函数只是不断增加结果缓冲区，直到它可以解压缩所有提供的输入。_bz2module.c需要修复。

票数 6

页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://stackoverflow.com/questions/13622706

复制

相似问题

问如何保护自己免受gzip或bzip2炸弹的攻击？
EN

回答 5

Stack Overflow用户

Stack Overflow用户

Stack Overflow用户

社区

活动

资源

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问如何保护自己免受gzip或bzip2炸弹的攻击？EN

回答 5

Stack Overflow用户

Stack Overflow用户

Stack Overflow用户

社区

活动

资源

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问如何保护自己免受gzip或bzip2炸弹的攻击？
EN