Python: 通过Ghostscript压缩pdf文件

Exploring

发布于 2022-09-20 14:03:38

2.4K0

发布于 2022-09-20 14:03:38

文章背景：为了文件传输的方便，有时需要对pdf文件进行压缩。针对pdf压缩，一种方法是借助PYMUPDF第三方库函数进行图片提取和压缩，最后合并生成一个新的PDF。该方法并非对所有pdf文件都适用，且压缩后的pdf画质可能会变差。

本文借助Ghostscript对pdf文件进行压缩。因此，电脑上需要事先安装Ghostscript软件。

Ghostscript是一套建基于Adobe、PostScript及可移植文档格式（PDF）的页面描述语言等而编译成的自由软件。许可协议有两个：GNU通用公共许可协议（免费，GPL Ghostscript）, Aladdin Free Public License（收费）。

代码1：(pdf_compressor.py)

#!/usr/bin/env python3
# Author: Theeko74
# Contributor(s): skjerns
# Oct, 2021
# MIT license -- free to use as you want, cheers.

"""
Simple python wrapper script to use ghoscript function to compress PDF files.
Compression levels:
    0: default
    1: prepress
    2: printer
    3: ebook
    4: screen
Dependency: Ghostscript.
On MacOSX install via command line `brew install ghostscript`.
"""

import argparse
import subprocess
import os.path
import sys
import shutil

def compress(input_file_path, output_file_path, power=0):
    """Function to compress PDF via Ghostscript command line interface"""
    quality = {
        0: '/default',
        1: '/prepress',
        2: '/printer',
        3: '/ebook',
        4: '/screen'
    }

    # Basic controls
    # Check if valid path
    if not os.path.isfile(input_file_path):
        print("Error: invalid path for input PDF file")
        sys.exit(1)

    # Check if file is a PDF by extension
    if input_file_path.split('.')[-1].lower() != 'pdf':
        print("Error: input file is not a PDF")
        sys.exit(1)

    gs = get_ghostscript_path()
    print("Compress PDF...")
    initial_size = os.path.getsize(input_file_path)
    subprocess.call([gs, '-sDEVICE=pdfwrite', '-dCompatibilityLevel=1.4',
                    '-dPDFSETTINGS={}'.format(quality[power]),
                    '-dNOPAUSE', '-dQUIET', '-dBATCH',
                    '-sOutputFile={}'.format(output_file_path),
                     input_file_path]
    )
    final_size = os.path.getsize(output_file_path)
    ratio = 1 - (final_size / initial_size)
    print("Compression by {0:.0%}.".format(ratio))
    print("Final file size is {0:.1f}MB".format(final_size / 1024/1024))
    print("Done.")


def get_ghostscript_path():
    gs_names = ['gs', 'gswin32', 'gswin64']
    for name in gs_names:
        if shutil.which(name):
            return shutil.which(name)
    raise FileNotFoundError(f'No GhostScript executable was found on path ({"/".join(gs_names)})')


def main():
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter
    )
    parser.add_argument('input', help='Relative or absolute path of the input PDF file')
    parser.add_argument('-o', '--out', help='Relative or absolute path of the output PDF file')
    parser.add_argument('-c', '--compress', type=int, help='Compression level from 0 to 4')
    parser.add_argument('-b', '--backup', action='store_true', help="Backup the old PDF file")
    parser.add_argument('--open', action='store_true', default=False,
                        help='Open PDF after compression')
    args = parser.parse_args()

    # In case no compression level is specified, default is 2 '/ printer'
    if not args.compress:
        args.compress = 2
    # In case no output file is specified, store in temp file
    if not args.out:
        args.out = 'temp.pdf'

    # Run
    compress(args.input, args.out, power=args.compress)

    # In case no output file is specified, erase original file
    if args.out == 'temp.pdf':
        if args.backup:
            shutil.copyfile(args.input, args.input.replace(".pdf", "_BACKUP.pdf"))
        shutil.copyfile(args.out, args.input)
        os.remove(args.out)

    # In case we want to open the file after compression
    if args.open:
        if args.out == 'temp.pdf' and args.backup:
            subprocess.call(['open', args.input])
        else:
            subprocess.call(['open', args.out])

if __name__ == '__main__':
    main()

代码2：（compressPDF_tkinter.py）

# -*- coding: UTF-8 -*-
# Tkinter界面，压缩PDF文件

from tkinter import *
import os

from tkinter import filedialog
from tkinter import messagebox

from pdf_compressor import compress

def getPDF():
    #通过文件对话框，获取文件路径
    file_path = filedialog.askopenfilename()
    
    FilePath_result.delete(0,END)
    if file_path !="":
        FilePath_result.insert(0,file_path)
    else:
        messagebox.showinfo("提示","未选中pdf文件！")
        
def comPDF(PDFpath):
    #压缩pdf文件
    
    if PDFpath == "":
        messagebox.showinfo("提示","文件路径为空！")
    elif not os.path.exists(PDFpath):
        messagebox.showinfo("提示","该路径不存在，请确认！")
    elif PDFpath[-3:] == 'pdf':
        
        #文件大小
        content = int(os.path.getsize(PDFpath)/1024)   #kb
        Size_rusult.config(text = str(content))
        
        #压缩后的文件路径
        new_path = os.path.join(os.getcwd(),"compressed.pdf")
        if os.path.exists(new_path):
            messagebox.showinfo("提示","该路径已存在，请确认！\n" + new_path)
        else:
            # 压缩文件
            compress(PDFpath, new_path, power=0)
            
            content_compressed = int(os.path.getsize(new_path)/1024)   #kb
            
            Size_comp_result.config(text = str(content_compressed))
            path_comp_result.config(text = new_path)
            
            messagebox.showinfo("提示","Done！")
            
    else:
        messagebox.showinfo("提示","不是pdf文件，请确认！")
        
root = Tk()
root.title("pdf compressor")
root.columnconfigure(1,weight=1)

btn1 = Button(root, text="获取待压缩的pdf文件",command=getPDF,bg="AliceBlue")
btn2 = Button(root, text="压缩文件",command=lambda: comPDF(FilePath_result.get()),bg="Beige")
FilePath = Label(root,text="压缩前的文件路径",relief = "raised")
FilePath_result = Entry(root,width=50)

Size = Label(root,text="PDF文件大小（kb）",relief = "raised")
Size_rusult = Label(root,text="",relief = "raised")

Size_comp = Label(root,text="压缩后的文件大小（kb）",relief = "raised")
Size_comp_result = Label(root,text="",relief = "raised")

path_comp = Label(root,text="压缩后文件的路径：",relief = "raised")
path_comp_result = Label(root,text="",relief = "raised")

# GUI界面
btn1.grid(row=0,column=0,padx=5,pady=5,stick=W+E)
btn2.grid(row=0,column=1,padx=5,pady=5,stick=E)

FilePath.grid(row=1,column=0,padx=5,pady=5,stick=W+E)
FilePath_result.grid(row=1,column=1,padx=5,stick=W+E)

Size.grid(row=2,column=0,padx=5,pady=5,stick=W+E)
Size_rusult.grid(row=2,column=1,padx=5,stick=W+E)

Size_comp.grid(row=3,column=0,padx=5,pady=5,stick=W+E)
Size_comp_result.grid(row=3,column=1,padx=5,stick=W+E)

path_comp.grid(row=4,column=0,padx=5,pady=5,stick=W+E)
path_comp_result.grid(row=4,column=1,padx=5,stick=W+E)

root.mainloop()

运行界面：

效果演示：

http://mpvideo.qpic.cn/0b2e5aaagaaaiuadzxx6urqvb2gdapuaaaya.f10002.mp4?dis_k=a1fc7ee8875901f98cd70b5773b566c8&dis_t=1663653776&vid=wxv_2255866590980374534&format_id=10002&support_redirect=0&mmversion=false

参考资料：

[1] python实现PDF压缩(https://blog.csdn.net/qq_40507857/article/details/116501856)

[2] Ghostscript 9.55.0 for Windows (64 bit)(https://www.ghostscript.com/releases/gsdnld.html)

[3] Ghostscript(https://zh.wikipedia.org/wiki/Ghostscript)

[4] windows下ghostscript （gs）安装(https://blog.csdn.net/jasmine______001/article/details/105433002)

[5] Pdfc -- PDF Compressor(https://github.com/theeko74/pdfc)

[6] Python GUI设计：tkinter菜鸟编程(https://item.jd.com/12667860.html)

本文参与腾讯云自媒体同步曝光计划，分享自微信公众号。

原始发表：2022-02-05，如有侵权请联系 cloudcommunity@tencent.com 删除

https