前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >Python: 通过Ghostscript压缩pdf文件

Python: 通过Ghostscript压缩pdf文件

作者头像
Exploring
发布2022-09-20 14:03:38
2.4K0
发布2022-09-20 14:03:38
举报
文章被收录于专栏:数据处理与编程实践

文章背景:为了文件传输的方便,有时需要对pdf文件进行压缩。针对pdf压缩,一种方法是借助PYMUPDF第三方库函数进行图片提取和压缩,最后合并生成一个新的PDF。该方法并非对所有pdf文件都适用,且压缩后的pdf画质可能会变差。

本文借助Ghostscript对pdf文件进行压缩。因此,电脑上需要事先安装Ghostscript软件。

Ghostscript是一套建基于Adobe、PostScript及可移植文档格式(PDF)的页面描述语言等而编译成的自由软件。许可协议有两个:GNU通用公共许可协议(免费,GPL Ghostscript), Aladdin Free Public License(收费)。

代码1:(pdf_compressor.py)

代码语言:javascript
复制
#!/usr/bin/env python3
# Author: Theeko74
# Contributor(s): skjerns
# Oct, 2021
# MIT license -- free to use as you want, cheers.

"""
Simple python wrapper script to use ghoscript function to compress PDF files.
Compression levels:
    0: default
    1: prepress
    2: printer
    3: ebook
    4: screen
Dependency: Ghostscript.
On MacOSX install via command line `brew install ghostscript`.
"""

import argparse
import subprocess
import os.path
import sys
import shutil

def compress(input_file_path, output_file_path, power=0):
    """Function to compress PDF via Ghostscript command line interface"""
    quality = {
        0: '/default',
        1: '/prepress',
        2: '/printer',
        3: '/ebook',
        4: '/screen'
    }

    # Basic controls
    # Check if valid path
    if not os.path.isfile(input_file_path):
        print("Error: invalid path for input PDF file")
        sys.exit(1)

    # Check if file is a PDF by extension
    if input_file_path.split('.')[-1].lower() != 'pdf':
        print("Error: input file is not a PDF")
        sys.exit(1)

    gs = get_ghostscript_path()
    print("Compress PDF...")
    initial_size = os.path.getsize(input_file_path)
    subprocess.call([gs, '-sDEVICE=pdfwrite', '-dCompatibilityLevel=1.4',
                    '-dPDFSETTINGS={}'.format(quality[power]),
                    '-dNOPAUSE', '-dQUIET', '-dBATCH',
                    '-sOutputFile={}'.format(output_file_path),
                     input_file_path]
    )
    final_size = os.path.getsize(output_file_path)
    ratio = 1 - (final_size / initial_size)
    print("Compression by {0:.0%}.".format(ratio))
    print("Final file size is {0:.1f}MB".format(final_size / 1024/1024))
    print("Done.")


def get_ghostscript_path():
    gs_names = ['gs', 'gswin32', 'gswin64']
    for name in gs_names:
        if shutil.which(name):
            return shutil.which(name)
    raise FileNotFoundError(f'No GhostScript executable was found on path ({"/".join(gs_names)})')


def main():
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter
    )
    parser.add_argument('input', help='Relative or absolute path of the input PDF file')
    parser.add_argument('-o', '--out', help='Relative or absolute path of the output PDF file')
    parser.add_argument('-c', '--compress', type=int, help='Compression level from 0 to 4')
    parser.add_argument('-b', '--backup', action='store_true', help="Backup the old PDF file")
    parser.add_argument('--open', action='store_true', default=False,
                        help='Open PDF after compression')
    args = parser.parse_args()

    # In case no compression level is specified, default is 2 '/ printer'
    if not args.compress:
        args.compress = 2
    # In case no output file is specified, store in temp file
    if not args.out:
        args.out = 'temp.pdf'

    # Run
    compress(args.input, args.out, power=args.compress)

    # In case no output file is specified, erase original file
    if args.out == 'temp.pdf':
        if args.backup:
            shutil.copyfile(args.input, args.input.replace(".pdf", "_BACKUP.pdf"))
        shutil.copyfile(args.out, args.input)
        os.remove(args.out)

    # In case we want to open the file after compression
    if args.open:
        if args.out == 'temp.pdf' and args.backup:
            subprocess.call(['open', args.input])
        else:
            subprocess.call(['open', args.out])

if __name__ == '__main__':
    main()

代码2:(compressPDF_tkinter.py

代码语言:javascript
复制
# -*- coding: UTF-8 -*-
# Tkinter界面,压缩PDF文件

from tkinter import *
import os

from tkinter import filedialog
from tkinter import messagebox

from pdf_compressor import compress

def getPDF():
    #通过文件对话框,获取文件路径
    file_path = filedialog.askopenfilename()
    
    FilePath_result.delete(0,END)
    if file_path !="":
        FilePath_result.insert(0,file_path)
    else:
        messagebox.showinfo("提示","未选中pdf文件!")
        
def comPDF(PDFpath):
    #压缩pdf文件
    
    if PDFpath == "":
        messagebox.showinfo("提示","文件路径为空!")
    elif not os.path.exists(PDFpath):
        messagebox.showinfo("提示","该路径不存在,请确认!")
    elif PDFpath[-3:] == 'pdf':
        
        #文件大小
        content = int(os.path.getsize(PDFpath)/1024)   #kb
        Size_rusult.config(text = str(content))
        
        #压缩后的文件路径
        new_path = os.path.join(os.getcwd(),"compressed.pdf")
        if os.path.exists(new_path):
            messagebox.showinfo("提示","该路径已存在,请确认!\n" + new_path)
        else:
            # 压缩文件
            compress(PDFpath, new_path, power=0)
            
            content_compressed = int(os.path.getsize(new_path)/1024)   #kb
            
            Size_comp_result.config(text = str(content_compressed))
            path_comp_result.config(text = new_path)
            
            messagebox.showinfo("提示","Done!")
            
    else:
        messagebox.showinfo("提示","不是pdf文件,请确认!")
        
root = Tk()
root.title("pdf compressor")
root.columnconfigure(1,weight=1)

btn1 = Button(root, text="获取待压缩的pdf文件",command=getPDF,bg="AliceBlue")
btn2 = Button(root, text="压缩文件",command=lambda: comPDF(FilePath_result.get()),bg="Beige")
FilePath = Label(root,text="压缩前的文件路径",relief = "raised")
FilePath_result = Entry(root,width=50)

Size = Label(root,text="PDF文件大小(kb)",relief = "raised")
Size_rusult = Label(root,text="",relief = "raised")

Size_comp = Label(root,text="压缩后的文件大小(kb)",relief = "raised")
Size_comp_result = Label(root,text="",relief = "raised")

path_comp = Label(root,text="压缩后文件的路径:",relief = "raised")
path_comp_result = Label(root,text="",relief = "raised")

# GUI界面
btn1.grid(row=0,column=0,padx=5,pady=5,stick=W+E)
btn2.grid(row=0,column=1,padx=5,pady=5,stick=E)

FilePath.grid(row=1,column=0,padx=5,pady=5,stick=W+E)
FilePath_result.grid(row=1,column=1,padx=5,stick=W+E)

Size.grid(row=2,column=0,padx=5,pady=5,stick=W+E)
Size_rusult.grid(row=2,column=1,padx=5,stick=W+E)

Size_comp.grid(row=3,column=0,padx=5,pady=5,stick=W+E)
Size_comp_result.grid(row=3,column=1,padx=5,stick=W+E)

path_comp.grid(row=4,column=0,padx=5,pady=5,stick=W+E)
path_comp_result.grid(row=4,column=1,padx=5,stick=W+E)

root.mainloop()

运行界面:

效果演示:

http://mpvideo.qpic.cn/0b2e5aaagaaaiuadzxx6urqvb2gdapuaaaya.f10002.mp4?dis_k=a1fc7ee8875901f98cd70b5773b566c8&dis_t=1663653776&vid=wxv_2255866590980374534&format_id=10002&support_redirect=0&mmversion=false

参考资料:

[1] python实现PDF压缩(https://blog.csdn.net/qq_40507857/article/details/116501856)

[2] Ghostscript 9.55.0 for Windows (64 bit)(https://www.ghostscript.com/releases/gsdnld.html)

[3] Ghostscript(https://zh.wikipedia.org/wiki/Ghostscript)

[4] windows下ghostscript (gs)安装(https://blog.csdn.net/jasmine______001/article/details/105433002)

[5] Pdfc -- PDF Compressor(https://github.com/theeko74/pdfc)

[6] Python GUI设计:tkinter菜鸟编程(https://item.jd.com/12667860.html)

本文参与 腾讯云自媒体同步曝光计划,分享自微信公众号。
原始发表:2022-02-05,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 数据处理与编程实践 微信公众号,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
相关产品与服务
文件存储
文件存储(Cloud File Storage,CFS)为您提供安全可靠、可扩展的共享文件存储服务。文件存储可与腾讯云服务器、容器服务、批量计算等服务搭配使用,为多个计算节点提供容量和性能可弹性扩展的高性能共享存储。腾讯云文件存储的管理界面简单、易使用,可实现对现有应用的无缝集成;按实际用量付费,为您节约成本,简化 IT 运维工作。
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档