前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >python 比较两个目录 脚本

python 比较两个目录 脚本

作者头像
用户5760343
发布2022-05-13 11:14:11
7390
发布2022-05-13 11:14:11
举报
文章被收录于专栏:sktjsktj

""" ################################################################################ Usage: "python diffall.py dir1 dir2". Recursive directory tree comparison: report unique files that exist in only dir1 or dir2, report files of the same name in dir1 and dir2 with differing contents, report instances of same name but different type in dir1 and dir2, and do the same for all subdirectories of the same names in and below dir1 and dir2. A summary of diffs appears at end of output, but search redirected output for "DIFF" and "unique" strings for further details. New: (3E) limit reads to 1M for large files, (3E) catch same name=file/dir, (4E) avoid extra os.listdir() calls in dirdiff.comparedirs() by passing results here along. ################################################################################ """

import os, dirdiff blocksize = 1024 * 1024 # up to 1M per read

def intersect(seq1, seq2): """ Return all items in both seq1 and seq2; a set(seq1) & set(seq2) woud work too, but sets are randomly ordered, so any platform-dependent directory order would be lost """ return [item for item in seq1 if item in seq2]

def comparetrees(dir1, dir2, diffs, verbose=False): """ Compare all subdirectories and files in two directory trees; uses binary files to prevent Unicode decoding and endline transforms, as trees might contain arbitrary binary files as well as arbitrary text; may need bytes listdir arg for undecodable filenames on some platforms """ # compare file name lists print('-' * 20) names1 = os.listdir(dir1) names2 = os.listdir(dir2) if not dirdiff.comparedirs(dir1, dir2, names1, names2): diffs.append('unique files at %s - %s' % (dir1, dir2))

代码语言:javascript
复制
print('Comparing contents')
common = intersect(names1, names2)
missed = common[:]

# compare contents of files in common
for name in common:
    path1 = os.path.join(dir1, name)
    path2 = os.path.join(dir2, name)
    if os.path.isfile(path1) and os.path.isfile(path2):
        missed.remove(name)
        file1 = open(path1, 'rb')
        file2 = open(path2, 'rb')
        while True:
            bytes1 = file1.read(blocksize)
            bytes2 = file2.read(blocksize)
            if (not bytes1) and (not bytes2):
                if verbose: print(name, 'matches')
                break
            if bytes1 != bytes2:
                diffs.append('files differ at %s - %s' % (path1, path2))
                print(name, 'DIFFERS')
                break

# recur to compare directories in common
for name in common:
    path1 = os.path.join(dir1, name)
    path2 = os.path.join(dir2, name)
    if os.path.isdir(path1) and os.path.isdir(path2):
        missed.remove(name)
        comparetrees(path1, path2, diffs, verbose)

# same name but not both files or dirs?
for name in missed:
    diffs.append('files missed at %s - %s: %s' % (dir1, dir2, name))
    print(name, 'DIFFERS')

if name == 'main': dir1, dir2 = dirdiff.getargs() diffs = [] comparetrees(dir1, dir2, diffs, True) # changes diffs in-place print('=' * 40) # walk, report diffs list if not diffs: print('No diffs found.') else: print('Diffs found:', len(diffs)) for diff in diffs: print('-', diff)


""" ################################################################################ Usage: python dirdiff.py dir1-path dir2-path Compare two directories to find files that exist in one but not the other. This version uses the os.listdir function and list difference. Note that this script checks only filenames, not file contents--see diffall.py for an extension that does the latter by comparing .read() results. ################################################################################ """

import os, sys

def reportdiffs(unique1, unique2, dir1, dir2): """ Generate diffs report for one dir: part of comparedirs output """ if not (unique1 or unique2): print('Directory lists are identical') else: if unique1: print('Files unique to', dir1) for file in unique1: print('...', file) if unique2: print('Files unique to', dir2) for file in unique2: print('...', file)

def difference(seq1, seq2): """ Return all items in seq1 only; a set(seq1) - set(seq2) would work too, but sets are randomly ordered, so any platform-dependent directory order would be lost """ return [item for item in seq1 if item not in seq2]

def comparedirs(dir1, dir2, files1=None, files2=None): """ Compare directory contents, but not actual files; may need bytes listdir arg for undecodable filenames on some platforms """ print('Comparing', dir1, 'to', dir2) files1 = os.listdir(dir1) if files1 is None else files1 files2 = os.listdir(dir2) if files2 is None else files2 unique1 = difference(files1, files2) unique2 = difference(files2, files1) reportdiffs(unique1, unique2, dir1, dir2) return not (unique1 or unique2) # true if no diffs

def getargs(): "Args for command-line mode" try: dir1, dir2 = sys.argv[1:] # 2 command-line args except: print('Usage: dirdiff.py dir1 dir2') sys.exit(1) else: return (dir1, dir2)

if name == 'main': dir1, dir2 = getargs() comparedirs(dir1, dir2)

本文参与 腾讯云自媒体分享计划,分享自作者个人站点/博客。
原始发表:2022-05-13,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 作者个人站点/博客 前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体分享计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档